documenting regex code

2025-02-26 14:46:00 +01:00 · 2023-08-21 17:24:03 +01:00 · 2023-08-21 17:24:03 +01:00 · 823a52583c
commit 823a52583c
parent e2c6c0539f
15 changed files with 186 additions and 34 deletions
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/VimRegex.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/VimRegex.kt
@ -17,16 +17,17 @@ import com.maddyhome.idea.vim.regexp.parser.VimRegexParserResult
 import com.maddyhome.idea.vim.regexp.parser.visitors.PatternVisitor

 /**
- * Represents a compiled Vim regular expression. Provides methods to
+ * Represents a compiled Vim pattern. Provides methods to
 * match, replace and split strings in the editor with a pattern.
- * To learn about Vim's pattern syntax see :help pattern
+ *
+ * @see :help /pattern
 */
 public class VimRegex(pattern: String) {
  private enum class CaseSensitivity { SMART_CASE, IGNORE_CASE, NO_IGNORE_CASE }
  private val caseSensitivity: CaseSensitivity

  /**
-   * The NFA representing the compiled regular expression
+   * The NFA representing the compiled pattern
   */
  private val nfa: NFA

@ -49,7 +50,7 @@ public class VimRegex(pattern: String) {


  /**
-   * Indicates whether the regular expression can find at least one match in the specified editor
+   * Indicates whether the pattern can find at least one match in the specified editor
   *
   * @param editor The editor where to look for the match in
   *
@ -79,10 +80,12 @@ public class VimRegex(pattern: String) {
  }

  /**
-   * Returns the first match of a regular expression in the editor, beginning at the specified index.
+   * Returns the first match of a pattern in the editor, beginning at the specified index.
   *
   * @param editor     The editor where to look for the match in
   * @param startIndex The index to start the find
+   *
+   * @return The first match found in the editor
   */
  public fun find(
    editor: VimEditor,
@ -111,11 +114,13 @@ public class VimRegex(pattern: String) {
  }

  /**
-   * Returns a sequence of all occurrences of a regular expression within
+   * Returns a sequence of all occurrences of a pattern within
   * the editor, beginning at the specified index
   *
   * @param editor     The editor where to look for the match in
   * @param startIndex The index to start the find
+   *
+   * @return All the matches found in the editor
   */
  public fun findAll(
    editor: VimEditor,
@ -146,11 +151,13 @@ public class VimRegex(pattern: String) {
  }

  /**
-   * Attempts to match a regular expression exactly at the specified
+   * Attempts to match a pattern exactly at the specified
   * index in the editor text.
   *
   * @param editor The editor where to look for the match in
   * @param index  The index to start the match
+   *
+   * @return The match, either successful or not, found at the specified index
   */
  public fun matchAt(
    editor: VimEditor,
@ -163,6 +170,8 @@ public class VimRegex(pattern: String) {
   * Attempts to match the entire editor against the pattern.
   *
   * @param editor The editor where to look for the match in
+   *
+   * @return The match, either successful or not, when matching against the entire editor
   */
  public fun matchEntire(
    editor: VimEditor
@ -178,9 +187,11 @@ public class VimRegex(pattern: String) {
  }

  /**
-   * Indicates whether the regular expression matches the entire editor.
+   * Indicates whether the pattern matches the entire editor.
   *
   * @param editor The editor where to look for the match in
+   *
+   * @return True if the entire editor matches, false otherwise
   */
  public fun matches(
    editor: VimEditor
@ -193,10 +204,12 @@ public class VimRegex(pattern: String) {
  }

  /**
-   * Checks if a regular expression matches a part of the editor
+   * Checks if a pattern matches a part of the editor
   * starting exactly at the specified index.
   *
   * @param editor The editor where to look for the match in
+   *
+   * @return True if there is a successful match starting at the specified index, false otherwise
   */
  public fun matchesAt(
    editor: VimEditor,
@ -209,6 +222,15 @@ public class VimRegex(pattern: String) {
    }
  }

+  /**
+   * Simulates the internal NFA with the determined flags,
+   * started on a given index.
+   *
+   * @param editor The editor that is used for the simulation
+   * @param index  The index where the simulation should start
+   *
+   * @return The resulting match result
+   */
  private fun simulateNFA(editor: VimEditor, index: Int = 0) : VimMatchResult {
    val ignoreCase = when (caseSensitivity) {
      CaseSensitivity.NO_IGNORE_CASE -> false
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/NFA.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/NFA.kt
@ -136,6 +136,7 @@ internal class NFA private constructor(
   * and end, respectfully, the capturing of a group.
   *
   * @param groupNumber The number of the capture group
+   * @param force       Whether the state should force-end the capturing of the group
   */
  internal fun capture(groupNumber: Int, force: Boolean = true) {
    this.startState.startCapture.add(groupNumber)
@ -148,10 +149,10 @@ internal class NFA private constructor(
   * may or may not consume input, and can be positive (simulation must
   * succeed) or negative (simulation must fail).
   *
-   * @param shouldConsume Whether the assertion should consume input
-   * @param isPositive Whether the assertion is positive or negative
+   * @param shouldConsume Whether the assertion should consume input.
+   * @param isPositive Whether the assertion is positive or negative.
   *
-   * @return The NFA instance marked for assertion
+   * @return The NFA instance marked for assertion.
   */
  internal fun assert(shouldConsume: Boolean, isPositive: Boolean = false) : NFA {
    val newStart = NFAState()
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/NFAAssertion.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/NFAAssertion.kt
@ -8,6 +8,22 @@

 package com.maddyhome.idea.vim.regexp.nfa

+/**
+ * Represents an assertion.
+ *
+ * @param shouldConsume Whether the simulation should consume the input "consumed" by the assertion.
+ * @param isPositive    True if the assertion is positive, false if negative.
+ * @param startState    The state to jump to, to start the assertion
+ * @param endState      The state where the assertion should end
+ * @param jumpTo        The state that the simulation should jump to, to resume with normal
+ * simulation after the assertion.
+ *
+ * @see :help /@=
+ * @see :help /@!
+ * @see :help /@&lt=
+ * @see :help /@&lt!
+ * @see :help /@&gt
+ */
 internal data class NFAAssertion(
  val shouldConsume: Boolean,
  val isPositive: Boolean,
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/NFAState.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/NFAState.kt
@ -11,39 +11,39 @@ package com.maddyhome.idea.vim.regexp.nfa
 /**
 * Represents a single state of a NFA.
 */
-internal class NFAState (
+internal class NFAState {
  /**
   * All the transitions from this state. Order matters.
   * Transitions with higher priority should be in lower
   * indexes. This is relevant for the implementation of
   * lazy quantifiers.
   */
-  val transitions: ArrayList<NFATransition> = ArrayList(),
+  internal val transitions: ArrayList<NFATransition> = ArrayList()

  /**
-   *
+   * If this is not null, then when simulation reaches this state,
+   * it has to check if this assertion is successful to continue.
   */
-  var assertion: NFAAssertion? = null,
+  internal var assertion: NFAAssertion? = null

  /**
   * Stores the numbers of the capture groups that start
   * being captured on this state
   */
-  val startCapture: MutableList<Int> = ArrayList(),
+  internal val startCapture: MutableList<Int> = ArrayList()

  /**
   *  Stores the number of the capture groups that stop
   *  being captured on this state
   */
-  val endCapture: MutableList<Int> = ArrayList(),
+  internal val endCapture: MutableList<Int> = ArrayList()

  /**
   *  Stores the number of the capture groups that stop
   *  being captured on this state, even if that group
   *  had already been set to stop being captured
   */
-  val forceEndCapture: MutableList<Int> = ArrayList(),
-) {
+  internal val forceEndCapture: MutableList<Int> = ArrayList()

  /**
   * Adds a new transition from this state. This transition
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/NFATransition.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/NFATransition.kt
@ -11,18 +11,15 @@ package com.maddyhome.idea.vim.regexp.nfa
 import com.maddyhome.idea.vim.regexp.nfa.matcher.Matcher

 /**
- * Represents a transition of the NFA
+ * Represents a transition of the NFA.
+ *
+ * @param matcher The matcher that determines if the transition can
+ * be made, as well as information on how many characters
+ * are consumed by the transition.
+ *
+ * @param destState The destination state of the transition.
 */
 internal data class NFATransition(
-  /**
-   * The matcher that determines if the transition can
-   * be made, as well as information on how many characters
-   * are consumed by the transition
-   */
  val matcher: Matcher,
-
-  /**
-   * The destination state of the transition
-   */
  val destState: NFAState,
 )
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/matcher/EndOfLineMatcher.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/matcher/EndOfLineMatcher.kt
@ -11,6 +11,9 @@ package com.maddyhome.idea.vim.regexp.nfa.matcher
 import com.maddyhome.idea.vim.api.VimEditor
 import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection

+/**
+ * Matcher used to check if index is at the end of a line.
+ */
 internal class EndOfLineMatcher : Matcher {
  override fun matches(
    editor: VimEditor,
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/matcher/EndOfWordMatcher.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/matcher/EndOfWordMatcher.kt
@ -11,6 +11,9 @@ package com.maddyhome.idea.vim.regexp.nfa.matcher
 import com.maddyhome.idea.vim.api.VimEditor
 import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection

+/**
+ * Matcher used to check if index is at the end of a word.
+ */
 internal class EndOfWordMatcher : Matcher {
  override fun matches(
    editor: VimEditor,
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/matcher/StartOfFileMatcher.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/matcher/StartOfFileMatcher.kt
@ -12,8 +12,7 @@ import com.maddyhome.idea.vim.api.VimEditor
 import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection

 /**
- * Matcher used to check if index is at the
- * start of a file
+ * Matcher used to check if index is at the start of a file.
 */
 internal class StartOfFileMatcher : Matcher{
  override fun matches(
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/matcher/StartOfLineMatcher.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/matcher/StartOfLineMatcher.kt
@ -11,6 +11,9 @@ package com.maddyhome.idea.vim.regexp.nfa.matcher
 import com.maddyhome.idea.vim.api.VimEditor
 import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection

+/**
+ * Matcher used to check if index is at the start of a line.
+ */
 internal class StartOfLineMatcher : Matcher {
  override fun matches(
    editor: VimEditor,
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/matcher/StartOfWordMatcher.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/nfa/matcher/StartOfWordMatcher.kt
@ -11,6 +11,9 @@ package com.maddyhome.idea.vim.regexp.nfa.matcher
 import com.maddyhome.idea.vim.api.VimEditor
 import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection

+/**
+ * Matcher used to check if index is at the start of a word.
+ */
 internal class StartOfWordMatcher : Matcher {
  override fun matches(
    editor: VimEditor,
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/VimRegexParser.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/VimRegexParser.kt
@ -16,7 +16,19 @@ import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser
 import org.antlr.v4.runtime.CharStreams
 import org.antlr.v4.runtime.CommonTokenStream

+/**
+ * Represents a parser of Vim's patterns.
+ * This is a singleton.
+ */
 internal object VimRegexParser {
+
+  /**
+   * Tries to parse a given pattern
+   *
+   * @param pattern The Vim pattern that is to be parsed
+   *
+   * @return The result, either successful or not, of trying to parse the pattern
+   */
  fun parse(pattern: String) : VimRegexParserResult {
    return try {
      val regexLexer = BailErrorLexer(CharStreams.fromString(pattern))
@ -30,6 +42,12 @@ internal object VimRegexParser {
    }
  }

+  /**
+   * Auxiliary function used to get the case sensitivity settings from the lexer.
+   * The lexer has an internal flag, ignoreCase, that is initially null; if it
+   * then comes across a \c, it sets this flag to true, and if it comes across a
+   * \C, sets it to false.
+   */
  private fun getCaseSensitivitySettings(lexer: RegexLexer) : CaseSensitivitySettings {
    return when (lexer.ignoreCase) {
      // explicitly compare with true and false, since it might be null
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/VimRegexParserResult.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/VimRegexParserResult.kt
@ -10,11 +10,35 @@ package com.maddyhome.idea.vim.regexp.parser

 import org.antlr.v4.runtime.tree.ParseTree

+/**
+ * The result of trying to parse a string representing a Vim
+ * regular expression into a parse tree
+ */
 internal sealed class VimRegexParserResult {
+
+  /**
+   * Represents a successful parse
+   *
+   * @param tree                    The parse tree of the parsed regular expression
+   * @param caseSensitivitySettings The value of the case sensitivity flag in the regular expression
+   */
  data class Success(val tree: ParseTree, val caseSensitivitySettings: CaseSensitivitySettings) : VimRegexParserResult()
+
+  /**
+   * Represents an unsuccessful parse
+   *
+   * @param message A message giving output on why parsing failed
+   */
  data class Failure(val message: String = "Invalid pattern") : VimRegexParserResult()
 }

+/**
+ * Represents the case sensitivity setting of a regular expression
+ * IGNORE_CASE is for \c, NO_IGNORE_CASE for \C, and DEFAULT when
+ * none of these tokens are present.
+ *
+ * @see :help /ignorecase
+ */
 internal enum class CaseSensitivitySettings {
  DEFAULT,
  IGNORE_CASE,
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/CollectionElementVisitor.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/CollectionElementVisitor.kt
@ -11,6 +11,11 @@ package com.maddyhome.idea.vim.regexp.parser.visitors
 import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser
 import com.maddyhome.idea.vim.regexp.parser.generated.RegexParserBaseVisitor

+/**
+ * A tree visitor for visiting nodes representing a collection.
+ *
+ * @see :help /collection
+ */
 internal class CollectionElementVisitor : RegexParserBaseVisitor<Pair<CollectionElement, Boolean>>() {

  override fun visitSingleColElem(ctx: RegexParser.SingleColElemContext): Pair<CollectionElement, Boolean> {
@ -41,7 +46,23 @@ internal class CollectionElementVisitor : RegexParserBaseVisitor<Pair<Collection
  }
 }

+/**
+ * Represents a single element in a collection. This element can be
+ * a single character, or a range of characters.
+ */
 internal sealed class CollectionElement {
+  /**
+   * Represents a single character collection element.
+   *
+   * @param char The character element.
+   */
  data class SingleCharacter(val char: Char) : CollectionElement()
+
+  /**
+   * Represents a range of characters collection element.
+   *
+   * @param start The starting character of the range.
+   * @param end   The ending character of the range.
+   */
  data class CharacterRange(val start: Char, val end: Char) : CollectionElement()
 }
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/MultiVisitor.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/MultiVisitor.kt
@ -13,6 +13,12 @@ import com.maddyhome.idea.vim.regexp.parser.generated.RegexParserBaseVisitor
 import org.antlr.v4.runtime.Token
 import org.antlr.v4.runtime.tree.TerminalNode

+/**
+ * A tree visitor for visiting nodes representing a multi. It is used to identify
+ * what type of multi is being visited.
+ *
+ * @see :help /multi
+ */
 internal class MultiVisitor : RegexParserBaseVisitor<Multi>() {

  override fun visitZeroOrMore(ctx: RegexParser.ZeroOrMoreContext): Multi {
@ -55,11 +61,22 @@ internal class MultiVisitor : RegexParserBaseVisitor<Multi>() {
  }
 }

+/**
+ * Represents a multi.
+ *
+ * @see :help multi
+ */
 internal sealed class Multi {

  /**
   * Delimits the number of times that a multi should
   * make a certain atom repeat itself
+   *
+   * @param lowerBoundary The minimum number of times that the atom can repeat itself.
+   * @param upperBoundary The maximum number of times that the atom can repeat itself. This number can be infinite.
+   * @param isGreedy Whether this multi is greedy. A greedy multi always consumes as much input
+   * it can, while a non-greedy, or lazy multi, consumes the least amount of input
+   * it can.
   */
  internal data class RangeMulti(
    val lowerBoundary: RangeBoundary.IntRangeBoundary,
@ -68,12 +85,21 @@ internal sealed class Multi {
    ) : Multi()

  /**
-   * Used to represent an atomic group.
+   * Used to represent an atomic atom. Atoms that are atomic, match
+   * as if they were a whole pattern.
+   *
+   * @see :help /\@>
   */
  object AtomicMulti : Multi()

  /**
-   * Used to represent an assertion multi
+   * Used to represent an assertion multi. These
+   * are also known as look-ahead and look-behind.
+   * They can be positive, meaning that they must match,
+   * or negative, meaning that they must not match.
+   *
+   * @param isPositive Whether the assertion is positive
+   * @param isAhead    Whether it is a look-ahead
   */
  internal data class AssertionMulti(
    val isPositive: Boolean,
@ -81,6 +107,9 @@ internal sealed class Multi {
  ) : Multi()
 }

+/**
+ * Used to represent a boundary of a range multi
+ */
 internal sealed class RangeBoundary {
  /**
   * Represents an integer boundary
--- a/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/PatternVisitor.kt
+++ b/vim-engine/src/main/kotlin/com/maddyhome/idea/vim/regexp/parser/visitors/PatternVisitor.kt
@ -25,9 +25,22 @@ import com.maddyhome.idea.vim.regexp.nfa.matcher.StartOfWordMatcher
 import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser
 import com.maddyhome.idea.vim.regexp.parser.generated.RegexParserBaseVisitor

+/**
+ * A tree visitor for converting a parsed Vim pattern into an internal
+ * NFA, that is then used to then find matches in an editor.
+ * This is a singleton.
+ */
 internal object PatternVisitor : RegexParserBaseVisitor<NFA>() {

+  /**
+   * Tracks the number of capture groups visited
+   */
  private var groupCount: Int = 0
+
+  /**
+   * Maps tree nodes representing capture groups to their respective group number
+   *
+   */
  private val groupNumbers: HashMap<RegexParser.GroupingCaptureContext, Int> = HashMap()

  override fun visitPattern(ctx: RegexParser.PatternContext): NFA {