1
0
mirror of https://github.com/chylex/IntelliJ-IdeaVim.git synced 2025-02-26 05:46:00 +01:00

parsing lazy quantifiers

This commit is contained in:
Emanuel Gestosa 2023-08-09 11:46:33 +01:00 committed by lippfi
parent 96baa4ffc6
commit bf94a3c68d
5 changed files with 52 additions and 21 deletions
vim-engine/src
main
antlr
kotlin/com/maddyhome/idea/vim/regexp/parser/visitors
test/kotlin/com/maddyhome/idea/vim/regexp

View File

@ -8,7 +8,7 @@ tokens {
CLASS_OCTAL, CLASS_NOT_OCTAL, CLASS_WORD, CLASS_NOT_WORD, CLASS_HEADWORD, CLASS_NOT_HEADWORD,
CLASS_ALPHA, CLASS_NOT_ALPHA, CLASS_LCASE, CLASS_NOT_LCASE, CLASS_UCASE, CLASS_NOT_UCASE,
CLASS_ESC, CLASS_TAB, CLASS_CR, CLASS_BS, CLASS_NL, COLLECTION_LITERAL_CHAR, CURSOR,
LEFT_PAREN_NOCAPTURE, START_MATCH, END_MATCH, DOTNL
LEFT_PAREN_NOCAPTURE, START_MATCH, END_MATCH, DOTNL, RANGE_START_LAZY
}
// ------------------------------------------------------------------------------------------------ //
@ -29,6 +29,7 @@ STAR_MAGIC: '*' -> type(STAR);
PLUS_MAGIC: '\\+' -> type(PLUS);
OPTIONAL_MAGIC: ('\\=' | '\\?') -> type(OPTIONAL);
RANGE_START_MAGIC: '\\{' -> pushMode(INSIDE_RANGE), type(RANGE_START);
RANGE_START_LAZY_MAGIC: '\\{-' -> pushMode(INSIDE_RANGE), type(RANGE_START_LAZY);
COLLECTION_START_MAGIC: '[' -> pushMode(INSIDE_COLLECTION), type(COLLECTION_START);
// zero-width tokens
@ -97,6 +98,7 @@ STAR_NOMAGIC: '\\*' -> type(STAR);
PLUS_NOMAGIC: '\\+' -> type(PLUS);
OPTIONAL_NOMAGIC: ('\\=' | '\\?') -> type(OPTIONAL);
RANGE_START_NOMAGIC: '\\{' -> pushMode(INSIDE_RANGE), type(RANGE_START);
RANGE_START_LAZY_NOMAGIC: '\\{-' -> pushMode(INSIDE_RANGE), type(RANGE_START_LAZY);
COLLECTION_START_NOMAGIC: '\\[' -> pushMode(INSIDE_COLLECTION), type(COLLECTION_START);
// zero-width tokens
@ -166,6 +168,7 @@ STAR_VMAGIC: '*' -> type(STAR);
PLUS_VMAGIC: '+' -> type(PLUS);
OPTIONAL_VMAGIC: ('=' | '?') -> type(OPTIONAL);
RANGE_START_VMAGIC: '{' -> pushMode(INSIDE_RANGE), type(RANGE_START);
RANGE_START_LAZY_VMAGIC: '{-' -> pushMode(INSIDE_RANGE), type(RANGE_START_LAZY);
COLLECTION_START_VMAGIC: '[' -> pushMode(INSIDE_COLLECTION), type(COLLECTION_START);
// zero-width tokens
@ -234,6 +237,7 @@ STAR_VNOMAGIC: '\\*' -> type(STAR);
PLUS_VNOMAGIC: '\\+' -> type(PLUS);
OPTIONAL_VNOMAGIC: ('\\=' | '\\?') -> type(OPTIONAL);
RANGE_START_VNOMAGIC: '\\{' -> pushMode(INSIDE_RANGE), type(RANGE_START);
RANGE_START_LAZY_VNOMAGIC: '\\{-' -> pushMode(INSIDE_RANGE), type(RANGE_START_LAZY);
COLLECTION_START_VNOMAGIC: '\\[' -> pushMode(INSIDE_COLLECTION), type(COLLECTION_START);
// zero-width tokens

View File

@ -71,7 +71,8 @@ multi : STAR #ZeroOrMore
* "a\{4}" matches a sequence of exactly 4 "a" characters;
* "a\{}" matches any sequence of "a" characters.
*/
range : RANGE_START lower_bound=INT? (COMMA upper_bound=INT?)? RANGE_END
range : RANGE_START lower_bound=INT? (COMMA upper_bound=INT?)? RANGE_END #RangeGreedy
| RANGE_START_LAZY lower_bound=INT? (COMMA upper_bound=INT?)? RANGE_END #RangeLazy
;
/**

View File

@ -10,26 +10,36 @@ package com.maddyhome.idea.vim.regexp.parser.visitors
import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser
import com.maddyhome.idea.vim.regexp.parser.generated.RegexParserBaseVisitor
import org.antlr.v4.runtime.Token
import org.antlr.v4.runtime.tree.TerminalNode
internal class MultiVisitor : RegexParserBaseVisitor<Pair<MultiDelimiter.IntMultiDelimiter, MultiDelimiter>>() {
internal class MultiVisitor : RegexParserBaseVisitor<MultiDelimiter>() {
override fun visitZeroOrMore(ctx: RegexParser.ZeroOrMoreContext): Pair<MultiDelimiter.IntMultiDelimiter, MultiDelimiter> {
return Pair(MultiDelimiter.IntMultiDelimiter(0), MultiDelimiter.InfiniteMultiDelimiter)
override fun visitZeroOrMore(ctx: RegexParser.ZeroOrMoreContext): MultiDelimiter {
return MultiDelimiter(MultiBoundary.IntMultiBoundary(0), MultiBoundary.InfiniteMultiBoundary, true)
}
override fun visitOneOrMore(ctx: RegexParser.OneOrMoreContext): Pair<MultiDelimiter.IntMultiDelimiter, MultiDelimiter> {
return Pair(MultiDelimiter.IntMultiDelimiter(1), MultiDelimiter.InfiniteMultiDelimiter)
override fun visitOneOrMore(ctx: RegexParser.OneOrMoreContext): MultiDelimiter {
return MultiDelimiter(MultiBoundary.IntMultiBoundary(1), MultiBoundary.InfiniteMultiBoundary, true)
}
override fun visitZeroOrOne(ctx: RegexParser.ZeroOrOneContext?): Pair<MultiDelimiter.IntMultiDelimiter, MultiDelimiter> {
return Pair(MultiDelimiter.IntMultiDelimiter(0), MultiDelimiter.IntMultiDelimiter(1))
override fun visitZeroOrOne(ctx: RegexParser.ZeroOrOneContext?): MultiDelimiter {
return MultiDelimiter(MultiBoundary.IntMultiBoundary(0), MultiBoundary.IntMultiBoundary(1), true)
}
override fun visitRange(ctx: RegexParser.RangeContext): Pair<MultiDelimiter.IntMultiDelimiter, MultiDelimiter> {
val lowerDelimiter = if (ctx.lower_bound == null) MultiDelimiter.IntMultiDelimiter(0) else MultiDelimiter.IntMultiDelimiter(ctx.lower_bound.text.toInt())
val upperDelimiter = if (ctx.COMMA() != null) if (ctx.upper_bound == null) MultiDelimiter.InfiniteMultiDelimiter else MultiDelimiter.IntMultiDelimiter(ctx.upper_bound.text.toInt())
else if (ctx.lower_bound == null) MultiDelimiter.InfiniteMultiDelimiter else lowerDelimiter
return Pair(lowerDelimiter, upperDelimiter)
override fun visitRangeGreedy(ctx: RegexParser.RangeGreedyContext): MultiDelimiter {
return visitRange(ctx.lower_bound, ctx.upper_bound, ctx.COMMA(), true)
}
override fun visitRangeLazy(ctx: RegexParser.RangeLazyContext): MultiDelimiter {
return visitRange(ctx.lower_bound, ctx.upper_bound, ctx.COMMA(), false)
}
private fun visitRange(lowerBoundToken: Token?, upperBoundToken: Token?, comma: TerminalNode?, isGreedy: Boolean): MultiDelimiter {
val lowerDelimiter = if (lowerBoundToken == null) MultiBoundary.IntMultiBoundary(0) else MultiBoundary.IntMultiBoundary(lowerBoundToken.text.toInt())
val upperDelimiter = if (comma != null) if (upperBoundToken == null) MultiBoundary.InfiniteMultiBoundary else MultiBoundary.IntMultiBoundary(upperBoundToken.text.toInt())
else if (lowerBoundToken == null) MultiBoundary.InfiniteMultiBoundary else lowerDelimiter
return MultiDelimiter(lowerDelimiter, upperDelimiter, isGreedy)
}
}
@ -37,16 +47,22 @@ internal class MultiVisitor : RegexParserBaseVisitor<Pair<MultiDelimiter.IntMult
* Delimits the number of times that a multi should
* make a certain atom repeat itself
*/
internal sealed class MultiDelimiter {
internal data class MultiDelimiter(
val lowerBoundary: MultiBoundary.IntMultiBoundary,
val upperBoundary: MultiBoundary,
val isGreedy: Boolean
)
internal sealed class MultiBoundary {
/**
* Represents an integer boundary
*
* @param i The boundary of the multi
*/
data class IntMultiDelimiter(val i: Int) : MultiDelimiter()
data class IntMultiBoundary(val i: Int) : MultiBoundary()
/**
* Represents an infinite boundary
*/
object InfiniteMultiDelimiter : MultiDelimiter()
}
object InfiniteMultiBoundary : MultiBoundary()
}

View File

@ -44,13 +44,13 @@ internal class PatternVisitor : RegexParserBaseVisitor<NFA>() {
val range = multiVisitor.visit(ctx.multi())
val prefixNFA = NFA.fromSingleState()
for (i in 0 until range.first.i)
for (i in 0 until range.lowerBoundary.i)
prefixNFA.concatenate(visit(ctx.atom()))
var suffixNFA = NFA.fromSingleState()
if (range.second is MultiDelimiter.InfiniteMultiDelimiter) suffixNFA = visit(ctx.atom()).closure()
if (range.upperBoundary is MultiBoundary.InfiniteMultiBoundary) suffixNFA = visit(ctx.atom()).closure()
else {
for (i in range.first.i until (range.second as MultiDelimiter.IntMultiDelimiter).i) {
for (i in range.lowerBoundary.i until (range.upperBoundary as MultiBoundary.IntMultiBoundary).i) {
suffixNFA.concatenate(visit(ctx.atom()))
suffixNFA.optional()
}

View File

@ -38,6 +38,11 @@ class RegexParserTest {
assertSuccess("\\{5}", RANGE)
}
@Test
fun `range lazy`() {
assertSuccess("\\{-,5}", RANGE)
}
@Test
fun `range missing right bracket`() {
assertFailure("\\{5", RANGE)
@ -58,6 +63,11 @@ class RegexParserTest {
assertFailure("\\{2,g}", RANGE)
}
@Test
fun `range lazy with extra dash`() {
assertFailure("\\{--2,5}", RANGE)
}
@Test
fun `collection a to z`() {
assertSuccess("[a-z]", COLLECTION)