1
0
mirror of https://github.com/chylex/IntelliJ-IdeaVim.git synced 2025-02-26 05:46:00 +01:00

ignore case tokens

This commit is contained in:
Emanuel Gestosa 2023-08-11 14:20:40 +01:00 committed by lippfi
parent f009687ddf
commit cfddcf1630
15 changed files with 118 additions and 55 deletions
vim-engine/src

View File

@ -17,6 +17,13 @@ tokens {
CLASS_NOT_LCASE_NL, CLASS_UCASE_NL, CLASS_NOT_UCASE_NL
}
@members {
public Boolean ignoreCase = null;
void setIgnoreCase() { ignoreCase = true; }
void setNoIgnoreCase() { ignoreCase = false; }
}
// ------------------------------------------------------------------------------------------------ //
// //
// //
@ -45,6 +52,10 @@ CURSOR_MAGIC: '\\%#' -> type(CURSOR);
START_MATCH_MAGIC: '\\zs' -> type(START_MATCH);
END_MATCH_MAGIC: '\\ze' -> type(END_MATCH);
// case-related tokens
IGNORE_CASE_MAGIC: '\\c' { setIgnoreCase(); } -> skip;
NO_IGNORE_CASE_MAGIC: '\\C' { setNoIgnoreCase(); } -> skip;
// character classes
CLASS_IDENTIFIER_MAGIC: '\\i' -> type(CLASS_IDENTIFIER);
CLASS_IDENTIFIER_D_MAGIC: '\\I' -> type(CLASS_IDENTIFIER_D);
@ -144,6 +155,10 @@ CURSOR_NOMAGIC: '\\%#' -> type(CURSOR);
START_MATCH_NOMAGIC: '\\zs' -> type(START_MATCH);
END_MATCH_NOMAGIC: '\\ze' -> type(END_MATCH);
// case-related tokens
IGNORE_CASE_NOMAGIC: '\\c' { setIgnoreCase(); } -> skip;
NO_IGNORE_CASE_NOMAGIC: '\\C' { setNoIgnoreCase(); } -> skip;
// character classes
CLASS_IDENTIFIER_NOMAGIC: '\\i' -> type(CLASS_IDENTIFIER);
CLASS_IDENTIFIER_D_NOMAGIC: '\\I' -> type(CLASS_IDENTIFIER_D);
@ -244,6 +259,10 @@ CURSOR_VMAGIC: '%#' -> type(CURSOR);
START_MATCH_VMAGIC: '\\zs' -> type(START_MATCH);
END_MATCH_VMAGIC: '\\ze' -> type(END_MATCH);
// case-related tokens
IGNORE_CASE_VMAGIC: '\\c' { setIgnoreCase(); } -> skip;
NO_IGNORE_CASE_VMAGIC: '\\C' { setNoIgnoreCase(); } -> skip;
// character classes
CLASS_IDENTIFIER_VMAGIC: '\\i' -> type(CLASS_IDENTIFIER);
CLASS_IDENTIFIER_D_VMAGIC: '\\I' -> type(CLASS_IDENTIFIER_D);
@ -343,6 +362,10 @@ CURSOR_VNOMAGIC: '\\%#' -> type(CURSOR);
START_MATCH_VNOMAGIC: '\\zs' -> type(START_MATCH);
END_MATCH_VNOMAGIC: '\\ze' -> type(END_MATCH);
// case-related tokens
IGNORE_CASE_VNOMAGIC: '\\c' { setIgnoreCase(); } -> skip;
OT_IGNORE_CASE_VNOMAGIC: '\\C' { setNoIgnoreCase(); } -> skip;
// character classes
CLASS_IDENTIFIER_VNOMAGIC: '\\i' -> type(CLASS_IDENTIFIER);
CLASS_IDENTIFIER_D_VNOMAGIC: '\\I' -> type(CLASS_IDENTIFIER_D);

View File

@ -118,11 +118,11 @@ char_class : (CLASS_IDENTIFIER | CLASS_IDENTIFIER_NL) #Identifier
| (CLASS_NOT_LCASE | CLASS_NOT_LCASE_NL) #NotLcase
| (CLASS_UCASE | CLASS_UCASE_NL) #Ucase
| (CLASS_NOT_UCASE | CLASS_NOT_UCASE_NL) #NotUcase
| CLASS_ESC #Esc
| CLASS_TAB #Tab
| CLASS_CR #CR
| CLASS_BS #BS
| CLASS_NL #NL
| CLASS_ESC #Esc
| CLASS_TAB #Tab
| CLASS_CR #CR
| CLASS_BS #BS
| CLASS_NL #NL
;
/**

View File

@ -12,7 +12,7 @@ import com.maddyhome.idea.vim.regexp.parser.generated.RegexParser
import org.antlr.v4.runtime.BailErrorStrategy
import org.antlr.v4.runtime.TokenStream
public class RegexParser(input: TokenStream?) : RegexParser(input) {
internal class RegexParser(input: TokenStream?) : RegexParser(input) {
/**
* Override default error handling strategy

View File

@ -12,7 +12,7 @@ import com.maddyhome.idea.vim.regexp.parser.generated.RegexLexer
import org.antlr.v4.runtime.CharStream
import org.antlr.v4.runtime.LexerNoViableAltException
public class BailErrorLexer(input: CharStream?) : RegexLexer(input) {
internal class BailErrorLexer(input: CharStream) : RegexLexer(input) {
override fun recover(e: LexerNoViableAltException?) {
throw RuntimeException(e)
}

View File

@ -23,6 +23,9 @@ import org.antlr.v4.runtime.CommonTokenStream
* To learn about Vim's pattern syntax see :help pattern
*/
public class VimRegex(pattern: String) {
private enum class CaseSensitivity { SMART_CASE, IGNORE_CASE, NO_IGNORE_CASE }
// TODO: check ignorecase options
private var caseSensitivity = CaseSensitivity.NO_IGNORE_CASE
/**
* The NFA representing the compiled regular expression
@ -35,6 +38,7 @@ public class VimRegex(pattern: String) {
val parser = RegexParser(tokens)
val tree = parser.pattern()
val patternVisitor = PatternVisitor()
this.caseSensitivity = if (regexLexer.ignoreCase == true) CaseSensitivity.IGNORE_CASE else CaseSensitivity.NO_IGNORE_CASE
this.nfa = patternVisitor.visit(tree)
}
@ -48,7 +52,7 @@ public class VimRegex(pattern: String) {
public fun containsMatchIn(editor: VimEditor): Boolean {
var startIndex = 0
while (startIndex <= editor.text().length) {
val result = nfa.simulate(editor, startIndex)
val result = simulateNFA(editor, startIndex)
when (result) {
/**
* A match was found
@ -80,7 +84,7 @@ public class VimRegex(pattern: String) {
): VimMatchResult {
var index = startIndex
while (index <= editor.text().length) {
val result = nfa.simulate(editor, index)
val result = simulateNFA(editor, index)
when (result) {
/**
* A match was found
@ -114,7 +118,7 @@ public class VimRegex(pattern: String) {
var index = startIndex
val foundMatches: MutableList<VimMatchResult.Success> = emptyList<VimMatchResult.Success>().toMutableList()
while (index <= editor.text().length) {
val result = nfa.simulate(editor, index)
val result = simulateNFA(editor, index)
when (result) {
/**
* A match was found, add it to foundMatches and increment
@ -146,7 +150,7 @@ public class VimRegex(pattern: String) {
editor: VimEditor,
index: Int
): VimMatchResult {
return nfa.simulate(editor, index)
return simulateNFA(editor, index)
}
/**
@ -157,7 +161,7 @@ public class VimRegex(pattern: String) {
public fun matchEntire(
editor: VimEditor
): VimMatchResult {
val result = nfa.simulate(editor)
val result = simulateNFA(editor)
return when (result) {
is VimMatchResult.Failure -> result
is VimMatchResult.Success -> {
@ -175,7 +179,7 @@ public class VimRegex(pattern: String) {
public fun matches(
editor: VimEditor
): Boolean {
val result = nfa.simulate(editor)
val result = simulateNFA(editor)
return when (result) {
is VimMatchResult.Failure -> false
is VimMatchResult.Success -> result.range.last + 1 == editor.text().length
@ -192,10 +196,19 @@ public class VimRegex(pattern: String) {
editor: VimEditor,
index: Int
): Boolean {
val result = nfa.simulate(editor, index)
val result = simulateNFA(editor, index)
return when (result) {
is VimMatchResult.Failure -> false
is VimMatchResult.Success -> true
}
}
private fun simulateNFA(editor: VimEditor, index: Int = 0) : VimMatchResult {
val ignoreCase = when (caseSensitivity) {
CaseSensitivity.NO_IGNORE_CASE -> false
CaseSensitivity.IGNORE_CASE -> true
CaseSensitivity.SMART_CASE -> false // TODO
}
return nfa.simulate(editor, index, ignoreCase)
}
}

View File

@ -158,9 +158,9 @@ internal class NFA private constructor(
*
* @return The resulting match result
*/
internal fun simulate(editor: VimEditor, startIndex: Int = 0) : VimMatchResult {
internal fun simulate(editor: VimEditor, startIndex: Int = 0, isCaseInsensitive: Boolean = false) : VimMatchResult {
groups.groupCount = 0
if (simulate(editor, startIndex, startState)) {
if (simulate(editor, startIndex, startState, isCaseInsensitive)) {
return groups.get(0)?.let {
VimMatchResult.Success(
it.range,
@ -175,18 +175,19 @@ internal class NFA private constructor(
/**
* Simulates the NFA in a depth-first search fashion.
*
* @param editor The editor that is used for the simulation
* @param currentIndex The current index of the text in the simulation
* @param currentState The current NFA state in the simulation
* @param epsilonVisited Records the states that have been visited up to this point without consuming any input
* @param editor The editor that is used for the simulation
* @param currentIndex The current index of the text in the simulation
* @param currentState The current NFA state in the simulation
* @param isCaseInsensitive Whether the simulation should ignore case
* @param epsilonVisited Records the states that have been visited up to this point without consuming any input
*
* @return True if matching was successful, false otherwise
*/
private fun simulate(editor: VimEditor, currentIndex : Int = 0, currentState: NFAState = startState, epsilonVisited: HashSet<NFAState> = HashSet()) : Boolean {
private fun simulate(editor: VimEditor, currentIndex: Int, currentState: NFAState, isCaseInsensitive: Boolean, epsilonVisited: HashSet<NFAState> = HashSet()) : Boolean {
updateCaptureGroups(editor, currentIndex, currentState)
if (currentState.isAccept) return true
for (transition in currentState.transitions) {
val transitionMatcherResult = transition.matcher.matches(editor, currentIndex, groups)
val transitionMatcherResult = transition.matcher.matches(editor, currentIndex, groups, isCaseInsensitive)
if (transitionMatcherResult is MatcherResult.Success) {
var epsilonVisitedCopy = HashSet(epsilonVisited)
if (transitionMatcherResult.consumed == 0) {
@ -195,7 +196,7 @@ internal class NFA private constructor(
} else {
epsilonVisitedCopy = HashSet()
}
if (simulate(editor, currentIndex + transitionMatcherResult.consumed, transition.destState, epsilonVisitedCopy)) return true
if (simulate(editor, currentIndex + transitionMatcherResult.consumed, transition.destState, isCaseInsensitive, epsilonVisitedCopy)) return true
}
}
return false
@ -209,18 +210,9 @@ internal class NFA private constructor(
* @param state The current state in the simulation
*/
private fun updateCaptureGroups(editor: VimEditor, index: Int, state: NFAState) {
for (groupNumber in state.startCapture) {
println("index $index: starts capturing group $groupNumber")
groups.setGroupStart(groupNumber, index)
}
for (groupNumber in state.endCapture) {
println("index $index: ends capturing group $groupNumber")
groups.setGroupEnd(groupNumber, index, editor.text())
}
for (groupNumber in state.forceEndCapture) {
println("index $index: force ends capturing group $groupNumber")
groups.setForceGroupEnd(groupNumber, index, editor.text())
}
for (groupNumber in state.startCapture) groups.setGroupStart(groupNumber, index)
for (groupNumber in state.endCapture) groups.setGroupEnd(groupNumber, index, editor.text())
for (groupNumber in state.forceEndCapture) groups.setForceGroupEnd(groupNumber, index, editor.text())
}
internal companion object {

View File

@ -17,15 +17,20 @@ import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
* @param groupNumber The number of the back-referenced captured group
*/
internal class BackreferenceMatcher(private val groupNumber: Int) : Matcher {
override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection): MatcherResult {
override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection, isCaseInsensitive: Boolean): MatcherResult {
if (groups.get(groupNumber) == null) {
// TODO: throw illegal backreference error
return MatcherResult.Failure
}
val capturedString = groups.get(groupNumber)!!.value
val capturedString = if (isCaseInsensitive) groups.get(groupNumber)!!.value.lowercase()
else groups.get(groupNumber)!!.value
if (editor.text().length - index < capturedString.length) return MatcherResult.Failure
return if (editor.text().substring(index until index + capturedString.length) == capturedString)
val editorString = if (isCaseInsensitive) editor.text().substring(index until index + capturedString.length).lowercase()
else editor.text().substring(index until index + capturedString.length)
return if (capturedString == editorString)
MatcherResult.Success(capturedString.length)
else
MatcherResult.Failure

View File

@ -15,8 +15,13 @@ import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
* Matcher used to match against single characters
*/
internal class CharacterMatcher(val char: Char) : Matcher {
override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection): MatcherResult {
return if (index < editor.text().length && editor.text()[index] == char) MatcherResult.Success(1)
override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection, isCaseInsensitive: Boolean): MatcherResult {
if (index >= editor.text().length) return MatcherResult.Failure
val targetChar = if (isCaseInsensitive) char.lowercaseChar() else char
val editorChar = if (isCaseInsensitive) editor.text()[index].lowercaseChar() else editor.text()[index]
return if (targetChar == editorChar) MatcherResult.Success(1)
else MatcherResult.Failure
}
}

View File

@ -25,14 +25,15 @@ internal class CollectionMatcher(
private val isNegated: Boolean = false,
private val includesEOL: Boolean = false
) : Matcher {
override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection): MatcherResult {
override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection, isCaseInsensitive: Boolean): MatcherResult {
if (index >= editor.text().length) return MatcherResult.Failure
if (!includesEOL && editor.text()[index] == '\n') return MatcherResult.Failure
if (includesEOL && editor.text()[index] == '\n') return MatcherResult.Success(1)
val char = editor.text()[index]
val result = (chars.contains(char) || ranges.any { it.inRange(char) }) == !isNegated
val result = if (isCaseInsensitive) (chars.map { it.lowercaseChar() }.contains(char.lowercaseChar()) || ranges.any { it.inRange(char, true) }) == !isNegated
else (chars.contains(char) || ranges.any { it.inRange(char) }) == !isNegated
return if (result) MatcherResult.Success(1)
else MatcherResult.Failure
}
@ -49,11 +50,13 @@ internal data class CollectionRange(val start: Char, val end: Char) {
/**
* Determines whether a character is inside the range
*
* @param char The character to verify
* @param char The character to verify
* @param isCaseInsensitive Whether case should be ignored
*
* @return whether char is inside the range
*/
internal fun inRange(char: Char) : Boolean {
return char.code in start.code..end.code
internal fun inRange(char: Char, isCaseInsensitive: Boolean = false) : Boolean {
return if (isCaseInsensitive) char.lowercaseChar().code in start.lowercaseChar().code..end.lowercaseChar().code
else char.code in start.code..end.code
}
}

View File

@ -17,7 +17,7 @@ import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
* on the given index
*/
internal class CursorMatcher : Matcher {
override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection): MatcherResult {
override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection, isCaseInsensitive: Boolean): MatcherResult {
return if (editor.carets().map { it.offset }.contains(Offset(index))) MatcherResult.Success(0)
else MatcherResult.Failure
}

View File

@ -15,7 +15,7 @@ import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
* Matcher that matches with any character
*/
internal class DotMatcher(private val includeNewLine: Boolean) : Matcher {
override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection): MatcherResult {
override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection, isCaseInsensitive: Boolean): MatcherResult {
return if (includeNewLine)
if (index < editor.text().length) MatcherResult.Success(1)
else MatcherResult.Failure

View File

@ -17,7 +17,7 @@ import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
* taken and without consuming any character.
*/
internal class EpsilonMatcher : Matcher {
override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection): MatcherResult {
override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection, isCaseInsensitive: Boolean): MatcherResult {
return MatcherResult.Success(0)
}
}

View File

@ -20,11 +20,12 @@ internal interface Matcher {
/**
* Determines whether the matcher should match.
*
* @param editor The editor in its current state
* @param index The current index in the text of the editor
* @param groups The groups captured so far
* @param editor The editor in its current state
* @param index The current index in the text of the editor
* @param groups The groups captured so far
* @param isCaseInsensitive Whether the matcher should ignore case
*
* @return A result indicating either a failure to match, or success with the number of consumed characters
*/
fun matches(editor: VimEditor, index : Int, groups: VimMatchGroupCollection): MatcherResult
fun matches(editor: VimEditor, index : Int, groups: VimMatchGroupCollection, isCaseInsensitive: Boolean): MatcherResult
}

View File

@ -17,7 +17,7 @@ import com.maddyhome.idea.vim.regexp.match.VimMatchGroupCollection
* @param predicate The predicate used to check if the character should be accepted
*/
internal class PredicateMatcher(val predicate: (Char) -> Boolean) : Matcher {
override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection): MatcherResult {
override fun matches(editor: VimEditor, index: Int, groups: VimMatchGroupCollection, isCaseInsensitive: Boolean): MatcherResult {
return if (index < editor.text().length && predicate(editor.text()[index])) MatcherResult.Success(1)
else MatcherResult.Failure
}

View File

@ -650,17 +650,38 @@ class NFATest {
)
}
@Test
fun `test case insensitive word`() {
assertCorrectRange(
"IdeaVim",
"ideavim",
0 until 7,
ignoreCase = true
)
}
@Test
fun `test case insensitive collection`() {
assertCorrectRange(
"IdeaVim",
"[a-z]*",
0 until 7,
ignoreCase = true
)
}
private fun assertCorrectRange(
text: CharSequence,
pattern: String,
expectedResultRange:
IntRange,
offset: Int = 0,
carets: List<Int> = emptyList()
carets: List<Int> = emptyList(),
ignoreCase: Boolean = false
) {
val editor = buildEditor(text, carets)
val nfa = buildNFA(pattern)
val result = nfa.simulate(editor, offset)
val result = nfa.simulate(editor, offset, isCaseInsensitive = ignoreCase)
when (result) {
is VimMatchResult.Failure -> fail("Expected to find match")
is VimMatchResult.Success -> assertEquals(expectedResultRange, result.range)