mirror of
https://github.com/chylex/IntelliJ-IdeaVim.git
synced 2025-02-26 05:46:00 +01:00
documenting regex code
This commit is contained in:
parent
e2c6c0539f
commit
823a52583c
vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp
@ -17,16 +17,17 @@ import com.maddyhome.idea.vim.regexp.parser.VimRegexParserResult
|
||||
import com.maddyhome.idea.vim.regexp.parser.visitors.PatternVisitor
|
||||
|
||||
/**
|
||||
* Represents a compiled Vim regular expression. Provides methods to
|
||||
* Represents a compiled Vim pattern. Provides methods to
|
||||
* match, replace and split strings in the editor with a pattern.
|
||||
* To learn about Vim's pattern syntax see :help pattern
|
||||
*
|
||||
* @see :help /pattern
|
||||
*/
|
||||
public class VimRegex(pattern: String) {
|
||||
private enum class CaseSensitivity { SMART_CASE, IGNORE_CASE, NO_IGNORE_CASE }
|
||||
private val caseSensitivity: CaseSensitivity
|
||||
|
||||
/**
|
||||
* The NFA representing the compiled regular expression
|
||||
* The NFA representing the compiled pattern
|
||||
*/
|
||||
private val nfa: NFA
|
||||
|
||||
@ -49,7 +50,7 @@ public class VimRegex(pattern: String) {
|
||||
|
||||
|
||||
/**
|
||||
* Indicates whether the regular expression can find at least one match in the specified editor
|
||||
* Indicates whether the pattern can find at least one match in the specified editor
|
||||
*
|
||||
* @param editor The editor where to look for the match in
|
||||
*
|
||||
@ -79,10 +80,12 @@ public class VimRegex(pattern: String) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first match of a regular expression in the editor, beginning at the specified index.
|
||||
* Returns the first match of a pattern in the editor, beginning at the specified index.
|
||||
*
|
||||
* @param editor The editor where to look for the match in
|
||||
* @param startIndex The index to start the find
|
||||
*
|
||||
* @return The first match found in the editor
|
||||
*/
|
||||
public fun find(
|
||||
editor: VimEditor,
|
||||
@ -111,11 +114,13 @@ public class VimRegex(pattern: String) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a sequence of all occurrences of a regular expression within
|
||||
* Returns a sequence of all occurrences of a pattern within
|
||||
* the editor, beginning at the specified index
|
||||
*
|
||||
* @param editor The editor where to look for the match in
|
||||
* @param startIndex The index to start the find
|
||||
*
|
||||
* @return All the matches found in the editor
|
||||
*/
|
||||
public fun findAll(
|
||||
editor: VimEditor,
|
||||
@ -146,11 +151,13 @@ public class VimRegex(pattern: String) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to match a regular expression exactly at the specified
|
||||
* Attempts to match a pattern exactly at the specified
|
||||
* index in the editor text.
|
||||
*
|
||||
* @param editor The editor where to look for the match in
|
||||
* @param index The index to start the match
|
||||
*
|
||||
* @return The match, either successful or not, found at the specified index
|
||||
*/
|
||||
public fun matchAt(
|
||||
editor: VimEditor,
|
||||
@ -163,6 +170,8 @@ public class VimRegex(pattern: String) {
|
||||
* Attempts to match the entire editor against the pattern.
|
||||
*
|
||||
* @param editor The editor where to look for the match in
|
||||
*
|
||||
* @return The match, either successful or not, when matching against the entire editor
|
||||
*/
|
||||
public fun matchEntire(
|
||||
editor: VimEditor
|
||||
@ -178,9 +187,11 @@ public class VimRegex(pattern: String) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates whether the regular expression matches the entire editor.
|
||||
* Indicates whether the pattern matches the entire editor.
|
||||
*
|
||||
* @param editor The editor where to look for the match in
|
||||
*
|
||||
* @return True if the entire editor matches, false otherwise
|
||||
*/
|
||||
public fun matches(
|
||||
editor: VimEditor
|
||||
@ -193,10 +204,12 @@ public class VimRegex(pattern: String) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a regular expression matches a part of the editor
|
||||
* Checks if a pattern matches a part of the editor
|
||||
* starting exactly at the specified index.
|
||||
*
|
||||
* @param editor The editor where to look for the match in
|
||||
*
|
||||
* @return True if there is a successful match starting at the specified index, false otherwise
|
||||
*/
|
||||
public fun matchesAt(
|
||||
editor: VimEditor,
|
||||
@ -209,6 +222,15 @@ public class VimRegex(pattern: String) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simulates the internal NFA with the determined flags,
|
||||
* started on a given index.
|
||||
*
|
||||
* @param editor The editor that is used for the simulation
|
||||
* @param index The index where the simulation should start
|
||||
*
|
||||
* @return The resulting match result
|
||||
*/
|
||||
private fun simulateNFA(editor: VimEditor, index: Int = 0) : VimMatchResult {
|
||||
val ignoreCase = when (caseSensitivity) {
|
||||
CaseSensitivity.NO_IGNORE_CASE -> false
|
||||
|
@ -136,6 +136,7 @@ internal class NFA private constructor(
|
||||
* and end, respectfully, the capturing of a group.
|
||||
*
|
||||
* @param groupNumber The number of the capture group
|
||||
* @param force Whether the state should force-end the capturing of the group
|
||||
*/
|
||||
internal fun capture(groupNumber: Int, force: Boolean = true) {
|
||||
this.startState.startCapture.add(groupNumber)
|
||||
@ -148,10 +149,10 @@ internal class NFA private constructor(
|
||||
* may or may not consume input, and can be positive (simulation must
|
||||
* succeed) or negative (simulation must fail).
|
||||
*
|
||||
* @param shouldConsume Whether the assertion should consume input
|
||||
* @param isPositive Whether the assertion is positive or negative
|
||||
* @param shouldConsume Whether the assertion should consume input.
|
||||
* @param isPositive Whether the assertion is positive or negative.
|
||||
*
|
||||
* @return The NFA instance marked for assertion
|
||||
* @return The NFA instance marked for assertion.
|
||||
*/
|
||||
internal fun assert(shouldConsume: Boolean, isPositive: Boolean = false) : NFA {
|
||||
val newStart = NFAState()
|
||||
|
@ -8,6 +8,22 @@
|
||||
|
||||
package com.maddyhome.idea.vim.regexp.nfa
|
||||
|
||||
/**
|
||||
* Represents an assertion.
|
||||
*
|
||||
* @param shouldConsume Whether the simulation should consume the input "consumed" by the assertion.
|
||||
* @param isPositive True if the assertion is positive, false if negative.
|
||||
* @param startState The state to jump to, to start the assertion
|
||||
* @param endState The state where the assertion should end
|
||||
* @param jumpTo The state that the simulation should jump to, to resume with normal
|
||||
* simulation after the assertion.
|
||||
*
|
||||
* @see :help /@=
|
||||
* @see :help /@!
|
||||
* @see :help /@<=
|
||||
* @see :help /@<!
|
||||
* @see :help /@>
|
||||
*/
|
||||
internal data class NFAAssertion(
|
||||
val shouldConsume: Boolean,
|
||||
val isPositive: Boolean,
|
||||
|
@ -11,39 +11,39 @@ package com.maddyhome.idea.vim.regexp.nfa
|
||||
/**
|
||||
* Represents a single state of a NFA.
|
||||
*/
|
||||
internal class NFAState (
|
||||
internal class NFAState {
|
||||
/**
|
||||
* All the transitions from this state. Order matters.
|
||||
* Transitions with higher priority should be in lower
|
||||
* indexes. This is relevant for the implementation of
|
||||
* lazy quantifiers.
|
||||
*/
|
||||
val transitions: ArrayList<NFATransition> = ArrayList(),
|
||||
internal val transitions: ArrayList<NFATransition> = ArrayList()
|
||||
|
||||
/**
|
||||
*
|
||||
* If this is not null, then when simulation reaches this state,
|
||||
* it has to check if this assertion is successful to continue.
|
||||
*/
|
||||
var assertion: NFAAssertion? = null,
|
||||
internal var assertion: NFAAssertion? = null
|
||||
|
||||
/**
|
||||
* Stores the numbers of the capture groups that start
|
||||
* being captured on this state
|
||||
*/
|
||||
val startCapture: MutableList<Int> = ArrayList(),
|
||||
internal val startCapture: MutableList<Int> = ArrayList()
|
||||
|
||||
/**
|
||||
* Stores the number of the capture groups that stop
|
||||
* being captured on this state
|
||||
*/
|
||||
val endCapture: MutableList<Int> = ArrayList(),
|
||||
internal val endCapture: MutableList<Int> = ArrayList()
|
||||
|
||||
/**
|
||||
* Stores the number of the capture groups that stop
|
||||
* being captured on this state, even if that group
|
||||
* had already been set to stop being captured
|
||||
*/
|
||||
val forceEndCapture: MutableList<Int> = ArrayList(),
|
||||
) {
|
||||
internal val forceEndCapture: MutableList<Int> = ArrayList()
|
||||
|
||||
/**
|
||||
* Adds a new transition from this state. This transition
|
||||
|
@ -11,18 +11,15 @@ package com.maddyhome.idea.vim.regexp.nfa
|
||||
import com.maddyhome.idea.vim.regexp.nfa.matcher.Matcher
|
||||
|
||||
/**
|
||||
* Represents a transition of the NFA
|
||||
* Represents a transition of the NFA.
|
||||
*
|
||||
* @param matcher The matcher that determines if the transition can
|
||||
* be made, as well as information on how many characters
|
||||
* are consumed by the transition.
|
||||
*
|
||||
* @param destState The destination state of the transition.
|
||||
*/
|
||||
internal data class NFATransition(
|
||||
/**
|
||||
* The matcher that determines if the transition can
|
||||
* be made, as well as information on how many characters
|
||||
* are consumed by the transition
|
||||
*/
|
||||
val matcher: Matcher,
|
||||
|
||||
/**
|
||||
* The destination state of the transition
|
||||
*/
|
||||
val destState: NFAState,
|
||||
)
|
@ -11,6 +11,9 @@ package com.maddyhome.idea.vim.regexp.nfa.matcher
|
||||
import com.maddyhome.idea.vim.api.VimEditor
|
||||
import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
|
||||
|
||||
/**
|
||||
* Matcher used to check if index is at the end of a line.
|
||||
*/
|
||||
internal class EndOfLineMatcher : Matcher {
|
||||
override fun matches(
|
||||
editor: VimEditor,
|
||||
|
@ -11,6 +11,9 @@ package com.maddyhome.idea.vim.regexp.nfa.matcher
|
||||
import com.maddyhome.idea.vim.api.VimEditor
|
||||
import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
|
||||
|
||||
/**
|
||||
* Matcher used to check if index is at the end of a word.
|
||||
*/
|
||||
internal class EndOfWordMatcher : Matcher {
|
||||
override fun matches(
|
||||
editor: VimEditor,
|
||||
|
@ -12,8 +12,7 @@ import com.maddyhome.idea.vim.api.VimEditor
|
||||
import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
|
||||
|
||||
/**
|
||||
* Matcher used to check if index is at the
|
||||
* start of a file
|
||||
* Matcher used to check if index is at the start of a file.
|
||||
*/
|
||||
internal class StartOfFileMatcher : Matcher{
|
||||
override fun matches(
|
||||
|
@ -11,6 +11,9 @@ package com.maddyhome.idea.vim.regexp.nfa.matcher
|
||||
import com.maddyhome.idea.vim.api.VimEditor
|
||||
import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
|
||||
|
||||
/**
|
||||
* Matcher used to check if index is at the start of a line.
|
||||
*/
|
||||
internal class StartOfLineMatcher : Matcher {
|
||||
override fun matches(
|
||||
editor: VimEditor,
|
||||
|
@ -11,6 +11,9 @@ package com.maddyhome.idea.vim.regexp.nfa.matcher
|
||||
import com.maddyhome.idea.vim.api.VimEditor
|
||||
import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
|
||||
|
||||
/**
|
||||
* Matcher used to check if index is at the start of a word.
|
||||
*/
|
||||
internal class StartOfWordMatcher : Matcher {
|
||||
override fun matches(
|
||||
editor: VimEditor,
|
||||
|
@ -16,7 +16,19 @@ import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser
|
||||
import org.antlr.v4.runtime.CharStreams
|
||||
import org.antlr.v4.runtime.CommonTokenStream
|
||||
|
||||
/**
|
||||
* Represents a parser of Vim's patterns.
|
||||
* This is a singleton.
|
||||
*/
|
||||
internal object VimRegexParser {
|
||||
|
||||
/**
|
||||
* Tries to parse a given pattern
|
||||
*
|
||||
* @param pattern The Vim pattern that is to be parsed
|
||||
*
|
||||
* @return The result, either successful or not, of trying to parse the pattern
|
||||
*/
|
||||
fun parse(pattern: String) : VimRegexParserResult {
|
||||
return try {
|
||||
val regexLexer = BailErrorLexer(CharStreams.fromString(pattern))
|
||||
@ -30,6 +42,12 @@ internal object VimRegexParser {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Auxiliary function used to get the case sensitivity settings from the lexer.
|
||||
* The lexer has an internal flag, ignoreCase, that is initially null; if it
|
||||
* then comes across a \c, it sets this flag to true, and if it comes across a
|
||||
* \C, sets it to false.
|
||||
*/
|
||||
private fun getCaseSensitivitySettings(lexer: RegexLexer) : CaseSensitivitySettings {
|
||||
return when (lexer.ignoreCase) {
|
||||
// explicitly compare with true and false, since it might be null
|
||||
|
@ -10,11 +10,35 @@ package com.maddyhome.idea.vim.regexp.parser
|
||||
|
||||
import org.antlr.v4.runtime.tree.ParseTree
|
||||
|
||||
/**
|
||||
* The result of trying to parse a string representing a Vim
|
||||
* regular expression into a parse tree
|
||||
*/
|
||||
internal sealed class VimRegexParserResult {
|
||||
|
||||
/**
|
||||
* Represents a successful parse
|
||||
*
|
||||
* @param tree The parse tree of the parsed regular expression
|
||||
* @param caseSensitivitySettings The value of the case sensitivity flag in the regular expression
|
||||
*/
|
||||
data class Success(val tree: ParseTree, val caseSensitivitySettings: CaseSensitivitySettings) : VimRegexParserResult()
|
||||
|
||||
/**
|
||||
* Represents an unsuccessful parse
|
||||
*
|
||||
* @param message A message giving output on why parsing failed
|
||||
*/
|
||||
data class Failure(val message: String = "Invalid pattern") : VimRegexParserResult()
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents the case sensitivity setting of a regular expression
|
||||
* IGNORE_CASE is for \c, NO_IGNORE_CASE for \C, and DEFAULT when
|
||||
* none of these tokens are present.
|
||||
*
|
||||
* @see :help /ignorecase
|
||||
*/
|
||||
internal enum class CaseSensitivitySettings {
|
||||
DEFAULT,
|
||||
IGNORE_CASE,
|
||||
|
@ -11,6 +11,11 @@ package com.maddyhome.idea.vim.regexp.parser.visitors
|
||||
import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser
|
||||
import com.maddyhome.idea.vim.regexp.parser.generated.RegexParserBaseVisitor
|
||||
|
||||
/**
|
||||
* A tree visitor for visiting nodes representing a collection.
|
||||
*
|
||||
* @see :help /collection
|
||||
*/
|
||||
internal class CollectionElementVisitor : RegexParserBaseVisitor<Pair<CollectionElement, Boolean>>() {
|
||||
|
||||
override fun visitSingleColElem(ctx: RegexParser.SingleColElemContext): Pair<CollectionElement, Boolean> {
|
||||
@ -41,7 +46,23 @@ internal class CollectionElementVisitor : RegexParserBaseVisitor<Pair<Collection
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a single element in a collection. This element can be
|
||||
* a single character, or a range of characters.
|
||||
*/
|
||||
internal sealed class CollectionElement {
|
||||
/**
|
||||
* Represents a single character collection element.
|
||||
*
|
||||
* @param char The character element.
|
||||
*/
|
||||
data class SingleCharacter(val char: Char) : CollectionElement()
|
||||
|
||||
/**
|
||||
* Represents a range of characters collection element.
|
||||
*
|
||||
* @param start The starting character of the range.
|
||||
* @param end The ending character of the range.
|
||||
*/
|
||||
data class CharacterRange(val start: Char, val end: Char) : CollectionElement()
|
||||
}
|
@ -13,6 +13,12 @@ import com.maddyhome.idea.vim.regexp.parser.generated.RegexParserBaseVisitor
|
||||
import org.antlr.v4.runtime.Token
|
||||
import org.antlr.v4.runtime.tree.TerminalNode
|
||||
|
||||
/**
|
||||
* A tree visitor for visiting nodes representing a multi. It is used to identify
|
||||
* what type of multi is being visited.
|
||||
*
|
||||
* @see :help /multi
|
||||
*/
|
||||
internal class MultiVisitor : RegexParserBaseVisitor<Multi>() {
|
||||
|
||||
override fun visitZeroOrMore(ctx: RegexParser.ZeroOrMoreContext): Multi {
|
||||
@ -55,11 +61,22 @@ internal class MultiVisitor : RegexParserBaseVisitor<Multi>() {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a multi.
|
||||
*
|
||||
* @see :help multi
|
||||
*/
|
||||
internal sealed class Multi {
|
||||
|
||||
/**
|
||||
* Delimits the number of times that a multi should
|
||||
* make a certain atom repeat itself
|
||||
*
|
||||
* @param lowerBoundary The minimum number of times that the atom can repeat itself.
|
||||
* @param upperBoundary The maximum number of times that the atom can repeat itself. This number can be infinite.
|
||||
* @param isGreedy Whether this multi is greedy. A greedy multi always consumes as much input
|
||||
* it can, while a non-greedy, or lazy multi, consumes the least amount of input
|
||||
* it can.
|
||||
*/
|
||||
internal data class RangeMulti(
|
||||
val lowerBoundary: RangeBoundary.IntRangeBoundary,
|
||||
@ -68,12 +85,21 @@ internal sealed class Multi {
|
||||
) : Multi()
|
||||
|
||||
/**
|
||||
* Used to represent an atomic group.
|
||||
* Used to represent an atomic atom. Atoms that are atomic, match
|
||||
* as if they were a whole pattern.
|
||||
*
|
||||
* @see :help /\@>
|
||||
*/
|
||||
object AtomicMulti : Multi()
|
||||
|
||||
/**
|
||||
* Used to represent an assertion multi
|
||||
* Used to represent an assertion multi. These
|
||||
* are also known as look-ahead and look-behind.
|
||||
* They can be positive, meaning that they must match,
|
||||
* or negative, meaning that they must not match.
|
||||
*
|
||||
* @param isPositive Whether the assertion is positive
|
||||
* @param isAhead Whether it is a look-ahead
|
||||
*/
|
||||
internal data class AssertionMulti(
|
||||
val isPositive: Boolean,
|
||||
@ -81,6 +107,9 @@ internal sealed class Multi {
|
||||
) : Multi()
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to represent a boundary of a range multi
|
||||
*/
|
||||
internal sealed class RangeBoundary {
|
||||
/**
|
||||
* Represents an integer boundary
|
||||
|
@ -25,9 +25,22 @@ import com.maddyhome.idea.vim.regexp.nfa.matcher.StartOfWordMatcher
|
||||
import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser
|
||||
import com.maddyhome.idea.vim.regexp.parser.generated.RegexParserBaseVisitor
|
||||
|
||||
/**
|
||||
* A tree visitor for converting a parsed Vim pattern into an internal
|
||||
* NFA, that is then used to then find matches in an editor.
|
||||
* This is a singleton.
|
||||
*/
|
||||
internal object PatternVisitor : RegexParserBaseVisitor<NFA>() {
|
||||
|
||||
/**
|
||||
* Tracks the number of capture groups visited
|
||||
*/
|
||||
private var groupCount: Int = 0
|
||||
|
||||
/**
|
||||
* Maps tree nodes representing capture groups to their respective group number
|
||||
*
|
||||
*/
|
||||
private val groupNumbers: HashMap<RegexParser.GroupingCaptureContext, Int> = HashMap()
|
||||
|
||||
override fun visitPattern(ctx: RegexParser.PatternContext): NFA {
|
||||
|
Loading…
Reference in New Issue
Block a user