1
0
mirror of https://github.com/chylex/IntelliJ-IdeaVim.git synced 2025-02-26 05:46:00 +01:00

documenting regex code

This commit is contained in:
Emanuel Gestosa 2023-08-21 17:24:03 +01:00 committed by lippfi
parent e2c6c0539f
commit 823a52583c
15 changed files with 186 additions and 34 deletions

View File

@ -17,16 +17,17 @@ import com.maddyhome.idea.vim.regexp.parser.VimRegexParserResult
import com.maddyhome.idea.vim.regexp.parser.visitors.PatternVisitor
/**
* Represents a compiled Vim regular expression. Provides methods to
* Represents a compiled Vim pattern. Provides methods to
* match, replace and split strings in the editor with a pattern.
* To learn about Vim's pattern syntax see :help pattern
*
* @see :help /pattern
*/
public class VimRegex(pattern: String) {
private enum class CaseSensitivity { SMART_CASE, IGNORE_CASE, NO_IGNORE_CASE }
private val caseSensitivity: CaseSensitivity
/**
* The NFA representing the compiled regular expression
* The NFA representing the compiled pattern
*/
private val nfa: NFA
@ -49,7 +50,7 @@ public class VimRegex(pattern: String) {
/**
* Indicates whether the regular expression can find at least one match in the specified editor
* Indicates whether the pattern can find at least one match in the specified editor
*
* @param editor The editor where to look for the match in
*
@ -79,10 +80,12 @@ public class VimRegex(pattern: String) {
}
/**
* Returns the first match of a regular expression in the editor, beginning at the specified index.
* Returns the first match of a pattern in the editor, beginning at the specified index.
*
* @param editor The editor where to look for the match in
* @param startIndex The index to start the find
*
* @return The first match found in the editor
*/
public fun find(
editor: VimEditor,
@ -111,11 +114,13 @@ public class VimRegex(pattern: String) {
}
/**
* Returns a sequence of all occurrences of a regular expression within
* Returns a sequence of all occurrences of a pattern within
* the editor, beginning at the specified index
*
* @param editor The editor where to look for the match in
* @param startIndex The index to start the find
*
* @return All the matches found in the editor
*/
public fun findAll(
editor: VimEditor,
@ -146,11 +151,13 @@ public class VimRegex(pattern: String) {
}
/**
* Attempts to match a regular expression exactly at the specified
* Attempts to match a pattern exactly at the specified
* index in the editor text.
*
* @param editor The editor where to look for the match in
* @param index The index to start the match
*
* @return The match, either successful or not, found at the specified index
*/
public fun matchAt(
editor: VimEditor,
@ -163,6 +170,8 @@ public class VimRegex(pattern: String) {
* Attempts to match the entire editor against the pattern.
*
* @param editor The editor where to look for the match in
*
* @return The match, either successful or not, when matching against the entire editor
*/
public fun matchEntire(
editor: VimEditor
@ -178,9 +187,11 @@ public class VimRegex(pattern: String) {
}
/**
* Indicates whether the regular expression matches the entire editor.
* Indicates whether the pattern matches the entire editor.
*
* @param editor The editor where to look for the match in
*
* @return True if the entire editor matches, false otherwise
*/
public fun matches(
editor: VimEditor
@ -193,10 +204,12 @@ public class VimRegex(pattern: String) {
}
/**
* Checks if a regular expression matches a part of the editor
* Checks if a pattern matches a part of the editor
* starting exactly at the specified index.
*
* @param editor The editor where to look for the match in
*
* @return True if there is a successful match starting at the specified index, false otherwise
*/
public fun matchesAt(
editor: VimEditor,
@ -209,6 +222,15 @@ public class VimRegex(pattern: String) {
}
}
/**
* Simulates the internal NFA with the determined flags,
* started on a given index.
*
* @param editor The editor that is used for the simulation
* @param index The index where the simulation should start
*
* @return The resulting match result
*/
private fun simulateNFA(editor: VimEditor, index: Int = 0) : VimMatchResult {
val ignoreCase = when (caseSensitivity) {
CaseSensitivity.NO_IGNORE_CASE -> false

View File

@ -136,6 +136,7 @@ internal class NFA private constructor(
* and end, respectfully, the capturing of a group.
*
* @param groupNumber The number of the capture group
* @param force Whether the state should force-end the capturing of the group
*/
internal fun capture(groupNumber: Int, force: Boolean = true) {
this.startState.startCapture.add(groupNumber)
@ -148,10 +149,10 @@ internal class NFA private constructor(
* may or may not consume input, and can be positive (simulation must
* succeed) or negative (simulation must fail).
*
* @param shouldConsume Whether the assertion should consume input
* @param isPositive Whether the assertion is positive or negative
* @param shouldConsume Whether the assertion should consume input.
* @param isPositive Whether the assertion is positive or negative.
*
* @return The NFA instance marked for assertion
* @return The NFA instance marked for assertion.
*/
internal fun assert(shouldConsume: Boolean, isPositive: Boolean = false) : NFA {
val newStart = NFAState()

View File

@ -8,6 +8,22 @@
package com.maddyhome.idea.vim.regexp.nfa
/**
* Represents an assertion.
*
* @param shouldConsume Whether the simulation should consume the input "consumed" by the assertion.
* @param isPositive True if the assertion is positive, false if negative.
* @param startState The state to jump to, to start the assertion
* @param endState The state where the assertion should end
* @param jumpTo The state that the simulation should jump to, to resume with normal
* simulation after the assertion.
*
* @see :help /@=
* @see :help /@!
* @see :help /@&lt=
* @see :help /@&lt!
* @see :help /@&gt
*/
internal data class NFAAssertion(
val shouldConsume: Boolean,
val isPositive: Boolean,

View File

@ -11,39 +11,39 @@ package com.maddyhome.idea.vim.regexp.nfa
/**
* Represents a single state of a NFA.
*/
internal class NFAState (
internal class NFAState {
/**
* All the transitions from this state. Order matters.
* Transitions with higher priority should be in lower
* indexes. This is relevant for the implementation of
* lazy quantifiers.
*/
val transitions: ArrayList<NFATransition> = ArrayList(),
internal val transitions: ArrayList<NFATransition> = ArrayList()
/**
*
* If this is not null, then when simulation reaches this state,
* it has to check if this assertion is successful to continue.
*/
var assertion: NFAAssertion? = null,
internal var assertion: NFAAssertion? = null
/**
* Stores the numbers of the capture groups that start
* being captured on this state
*/
val startCapture: MutableList<Int> = ArrayList(),
internal val startCapture: MutableList<Int> = ArrayList()
/**
* Stores the number of the capture groups that stop
* being captured on this state
*/
val endCapture: MutableList<Int> = ArrayList(),
internal val endCapture: MutableList<Int> = ArrayList()
/**
* Stores the number of the capture groups that stop
* being captured on this state, even if that group
* had already been set to stop being captured
*/
val forceEndCapture: MutableList<Int> = ArrayList(),
) {
internal val forceEndCapture: MutableList<Int> = ArrayList()
/**
* Adds a new transition from this state. This transition

View File

@ -11,18 +11,15 @@ package com.maddyhome.idea.vim.regexp.nfa
import com.maddyhome.idea.vim.regexp.nfa.matcher.Matcher
/**
* Represents a transition of the NFA
* Represents a transition of the NFA.
*
* @param matcher The matcher that determines if the transition can
* be made, as well as information on how many characters
* are consumed by the transition.
*
* @param destState The destination state of the transition.
*/
internal data class NFATransition(
/**
* The matcher that determines if the transition can
* be made, as well as information on how many characters
* are consumed by the transition
*/
val matcher: Matcher,
/**
* The destination state of the transition
*/
val destState: NFAState,
)

View File

@ -11,6 +11,9 @@ package com.maddyhome.idea.vim.regexp.nfa.matcher
import com.maddyhome.idea.vim.api.VimEditor
import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
/**
* Matcher used to check if index is at the end of a line.
*/
internal class EndOfLineMatcher : Matcher {
override fun matches(
editor: VimEditor,

View File

@ -11,6 +11,9 @@ package com.maddyhome.idea.vim.regexp.nfa.matcher
import com.maddyhome.idea.vim.api.VimEditor
import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
/**
* Matcher used to check if index is at the end of a word.
*/
internal class EndOfWordMatcher : Matcher {
override fun matches(
editor: VimEditor,

View File

@ -12,8 +12,7 @@ import com.maddyhome.idea.vim.api.VimEditor
import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
/**
* Matcher used to check if index is at the
* start of a file
* Matcher used to check if index is at the start of a file.
*/
internal class StartOfFileMatcher : Matcher{
override fun matches(

View File

@ -11,6 +11,9 @@ package com.maddyhome.idea.vim.regexp.nfa.matcher
import com.maddyhome.idea.vim.api.VimEditor
import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
/**
* Matcher used to check if index is at the start of a line.
*/
internal class StartOfLineMatcher : Matcher {
override fun matches(
editor: VimEditor,

View File

@ -11,6 +11,9 @@ package com.maddyhome.idea.vim.regexp.nfa.matcher
import com.maddyhome.idea.vim.api.VimEditor
import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
/**
* Matcher used to check if index is at the start of a word.
*/
internal class StartOfWordMatcher : Matcher {
override fun matches(
editor: VimEditor,

View File

@ -16,7 +16,19 @@ import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser
import org.antlr.v4.runtime.CharStreams
import org.antlr.v4.runtime.CommonTokenStream
/**
* Represents a parser of Vim's patterns.
* This is a singleton.
*/
internal object VimRegexParser {
/**
* Tries to parse a given pattern
*
* @param pattern The Vim pattern that is to be parsed
*
* @return The result, either successful or not, of trying to parse the pattern
*/
fun parse(pattern: String) : VimRegexParserResult {
return try {
val regexLexer = BailErrorLexer(CharStreams.fromString(pattern))
@ -30,6 +42,12 @@ internal object VimRegexParser {
}
}
/**
* Auxiliary function used to get the case sensitivity settings from the lexer.
* The lexer has an internal flag, ignoreCase, that is initially null; if it
* then comes across a \c, it sets this flag to true, and if it comes across a
* \C, sets it to false.
*/
private fun getCaseSensitivitySettings(lexer: RegexLexer) : CaseSensitivitySettings {
return when (lexer.ignoreCase) {
// explicitly compare with true and false, since it might be null

View File

@ -10,11 +10,35 @@ package com.maddyhome.idea.vim.regexp.parser
import org.antlr.v4.runtime.tree.ParseTree
/**
* The result of trying to parse a string representing a Vim
* regular expression into a parse tree
*/
internal sealed class VimRegexParserResult {
/**
* Represents a successful parse
*
* @param tree The parse tree of the parsed regular expression
* @param caseSensitivitySettings The value of the case sensitivity flag in the regular expression
*/
data class Success(val tree: ParseTree, val caseSensitivitySettings: CaseSensitivitySettings) : VimRegexParserResult()
/**
* Represents an unsuccessful parse
*
* @param message A message giving output on why parsing failed
*/
data class Failure(val message: String = "Invalid pattern") : VimRegexParserResult()
}
/**
* Represents the case sensitivity setting of a regular expression
* IGNORE_CASE is for \c, NO_IGNORE_CASE for \C, and DEFAULT when
* none of these tokens are present.
*
* @see :help /ignorecase
*/
internal enum class CaseSensitivitySettings {
DEFAULT,
IGNORE_CASE,

View File

@ -11,6 +11,11 @@ package com.maddyhome.idea.vim.regexp.parser.visitors
import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser
import com.maddyhome.idea.vim.regexp.parser.generated.RegexParserBaseVisitor
/**
* A tree visitor for visiting nodes representing a collection.
*
* @see :help /collection
*/
internal class CollectionElementVisitor : RegexParserBaseVisitor<Pair<CollectionElement, Boolean>>() {
override fun visitSingleColElem(ctx: RegexParser.SingleColElemContext): Pair<CollectionElement, Boolean> {
@ -41,7 +46,23 @@ internal class CollectionElementVisitor : RegexParserBaseVisitor<Pair<Collection
}
}
/**
* Represents a single element in a collection. This element can be
* a single character, or a range of characters.
*/
internal sealed class CollectionElement {
/**
* Represents a single character collection element.
*
* @param char The character element.
*/
data class SingleCharacter(val char: Char) : CollectionElement()
/**
* Represents a range of characters collection element.
*
* @param start The starting character of the range.
* @param end The ending character of the range.
*/
data class CharacterRange(val start: Char, val end: Char) : CollectionElement()
}

View File

@ -13,6 +13,12 @@ import com.maddyhome.idea.vim.regexp.parser.generated.RegexParserBaseVisitor
import org.antlr.v4.runtime.Token
import org.antlr.v4.runtime.tree.TerminalNode
/**
* A tree visitor for visiting nodes representing a multi. It is used to identify
* what type of multi is being visited.
*
* @see :help /multi
*/
internal class MultiVisitor : RegexParserBaseVisitor<Multi>() {
override fun visitZeroOrMore(ctx: RegexParser.ZeroOrMoreContext): Multi {
@ -55,11 +61,22 @@ internal class MultiVisitor : RegexParserBaseVisitor<Multi>() {
}
}
/**
* Represents a multi.
*
* @see :help multi
*/
internal sealed class Multi {
/**
* Delimits the number of times that a multi should
* make a certain atom repeat itself
*
* @param lowerBoundary The minimum number of times that the atom can repeat itself.
* @param upperBoundary The maximum number of times that the atom can repeat itself. This number can be infinite.
* @param isGreedy Whether this multi is greedy. A greedy multi always consumes as much input
* it can, while a non-greedy, or lazy multi, consumes the least amount of input
* it can.
*/
internal data class RangeMulti(
val lowerBoundary: RangeBoundary.IntRangeBoundary,
@ -68,12 +85,21 @@ internal sealed class Multi {
) : Multi()
/**
* Used to represent an atomic group.
* Used to represent an atomic atom. Atoms that are atomic, match
* as if they were a whole pattern.
*
* @see :help /\@>
*/
object AtomicMulti : Multi()
/**
* Used to represent an assertion multi
* Used to represent an assertion multi. These
* are also known as look-ahead and look-behind.
* They can be positive, meaning that they must match,
* or negative, meaning that they must not match.
*
* @param isPositive Whether the assertion is positive
* @param isAhead Whether it is a look-ahead
*/
internal data class AssertionMulti(
val isPositive: Boolean,
@ -81,6 +107,9 @@ internal sealed class Multi {
) : Multi()
}
/**
* Used to represent a boundary of a range multi
*/
internal sealed class RangeBoundary {
/**
* Represents an integer boundary

View File

@ -25,9 +25,22 @@ import com.maddyhome.idea.vim.regexp.nfa.matcher.StartOfWordMatcher
import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser
import com.maddyhome.idea.vim.regexp.parser.generated.RegexParserBaseVisitor
/**
* A tree visitor for converting a parsed Vim pattern into an internal
* NFA, that is then used to then find matches in an editor.
* This is a singleton.
*/
internal object PatternVisitor : RegexParserBaseVisitor<NFA>() {
/**
* Tracks the number of capture groups visited
*/
private var groupCount: Int = 0
/**
* Maps tree nodes representing capture groups to their respective group number
*
*/
private val groupNumbers: HashMap<RegexParser.GroupingCaptureContext, Int> = HashMap()
override fun visitPattern(ctx: RegexParser.PatternContext): NFA {