mirror of
https://github.com/chylex/Code-Statistics.git
synced 2024-11-21 22:42:45 +01:00
233 lines
8.6 KiB
C#
233 lines
8.6 KiB
C#
using System;
|
|
|
|
namespace CodeStatisticsCore.Handling.Utils{
|
|
public class CodeParser{
|
|
protected readonly string code;
|
|
protected readonly int length;
|
|
protected int cursor;
|
|
|
|
public Predicate<char> IsWhiteSpace = char.IsWhiteSpace;
|
|
public Predicate<char> IsIdentifierStart = chr => char.IsLetterOrDigit(chr) || chr == '_';
|
|
public Predicate<char> IsIdentifierPart = chr => char.IsLetterOrDigit(chr) || chr == '_';
|
|
|
|
public Predicate<string> IsValidIdentifier = str => true;
|
|
|
|
public string Contents { get { return code; } }
|
|
public char Char { get { return cursor < length ? code[cursor] : '\0'; } }
|
|
public bool IsEOF { get { return cursor >= length; } }
|
|
|
|
public CodeParser(string code){
|
|
this.code = code;
|
|
this.length = code.Length;
|
|
}
|
|
|
|
protected string SubstrIndex(int startIndex, int endIndex){
|
|
startIndex = Math.Max(0, Math.Min(length-1, startIndex));
|
|
endIndex = Math.Min(length, endIndex);
|
|
return code.Substring(startIndex, endIndex-startIndex);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Clones all of the parser settings, optionally with different code contents, and resets the cursor position to 0.
|
|
/// </summary>
|
|
public virtual CodeParser Clone(string newCode = null){
|
|
return new CodeParser(newCode ?? code){
|
|
IsWhiteSpace = this.IsWhiteSpace,
|
|
IsIdentifierStart = this.IsIdentifierStart,
|
|
IsIdentifierPart = this.IsIdentifierPart,
|
|
IsValidIdentifier = this.IsValidIdentifier
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Resets the cursor back to position zero and returns itself.
|
|
/// </summary>
|
|
public CodeParser Reset(){
|
|
cursor = 0;
|
|
return this;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Skips to the next character and returns it.
|
|
/// </summary>
|
|
public char Next(){
|
|
++cursor;
|
|
return Char;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Skips a single character and returns itself.
|
|
/// </summary>
|
|
public CodeParser Skip(){
|
|
++cursor;
|
|
return this;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Skips all whitespace characters defined by <see cref="IsWhiteSpace"/> and places the cursor after the last found whitespace character.
|
|
/// Returns itself.
|
|
/// </summary>
|
|
public CodeParser SkipSpaces(){
|
|
while(cursor < length){
|
|
if (IsWhiteSpace(Char))++cursor;
|
|
else break;
|
|
}
|
|
|
|
return this;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Skips to the next matching character and returns itself. Caution: The skip may not succeed, so verify if the current character is valid if not certain.
|
|
/// If the skip fails, the cursor will not move.
|
|
/// </summary>
|
|
public CodeParser SkipTo(char chr){
|
|
int prevCursor = cursor;
|
|
|
|
while(cursor < length){
|
|
if (Char == chr)break;
|
|
else ++cursor;
|
|
}
|
|
|
|
if (Char != chr)cursor = prevCursor;
|
|
|
|
return this;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Skips a block enclosed by <paramref name="blockStart"/> and <paramref name="blockEnd"/>. If the current character does not match <paramref name="blockStart"/>,
|
|
/// the parser will try skipping to the next <paramref name="blockStart"/> character first. If it fails, the cursor will not move, otherwise the cursor
|
|
/// will be placed after the ending character of the block.
|
|
/// </summary>
|
|
public void SkipBlock(char blockStart, char blockEnd){
|
|
SkipTo(blockStart);
|
|
if (Char != blockStart)return;
|
|
|
|
int blockDepth = 0;
|
|
|
|
while(cursor < length){
|
|
char next = Next();
|
|
|
|
if (next == blockEnd){
|
|
if (--blockDepth < 0){
|
|
Next();
|
|
break;
|
|
}
|
|
}
|
|
else if (next == blockStart){
|
|
++blockDepth;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Skips a text that matches a string with tokens. Returns true if the string matched code at current position and moves the cursor after the match,
|
|
/// otherwise returns false and the cursor does not move. <para/>
|
|
/// The <paramref name="matchStr"/> parameter may consist of the following tokens: <para/>
|
|
/// ^s --- Whitespace Character <para/>
|
|
/// ^n --- Not An Identifier Part Character <para/>
|
|
/// ^. --- Any Character
|
|
/// </summary>
|
|
public bool SkipIfMatch(string matchStr){
|
|
int index = 0, prevCursor = cursor;
|
|
|
|
while(index < matchStr.Length){
|
|
char matchChr = matchStr[index];
|
|
bool hasMatched;
|
|
|
|
if (matchChr == '^' && index < matchStr.Length-1){
|
|
switch(matchStr[++index]){
|
|
case 's':
|
|
hasMatched = IsWhiteSpace(Char);
|
|
break;
|
|
|
|
case 'n':
|
|
hasMatched = !IsIdentifierPart(Char);
|
|
break;
|
|
|
|
case '.':
|
|
hasMatched = true;
|
|
break;
|
|
|
|
default:
|
|
hasMatched = false;
|
|
break;
|
|
}
|
|
}
|
|
else{
|
|
hasMatched = Char == matchChr;
|
|
}
|
|
|
|
if (!hasMatched){
|
|
cursor = prevCursor;
|
|
return false;
|
|
}
|
|
else{
|
|
++cursor;
|
|
++index;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Skips to the next matching character and returns a new instance of CodeParser with contents that were skipped (excluding the target character).
|
|
/// If the skip does not succeed, an empty CodeParser will be returned and the cursor will not move.
|
|
/// </summary>
|
|
public CodeParser ReadTo(char chr){
|
|
int indexStart = cursor;
|
|
SkipTo(chr);
|
|
return Clone(SubstrIndex(indexStart, cursor));
|
|
}
|
|
|
|
/// <summary>
|
|
/// Skips to the next matching character and returns a new instance of CodeParser with contents that were skipped (excluding the target character).
|
|
/// If the skip does not succeed, an empty CodeParser will be returned and the cursor will not move. If it succeeds, the parser will also skip the target character.
|
|
/// </summary>
|
|
public CodeParser ReadToSkip(char chr){
|
|
CodeParser read = ReadTo(chr);
|
|
if (Char == chr)Skip();
|
|
return read;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Skips a block enclosed by <paramref name="blockStart"/> and <paramref name="blockEnd"/> and returns a new instance of CodeParser with the contents of that
|
|
/// block, excluding the enclosing characters. If the current character does not match <paramref name="blockStart"/>, the parser will try skipping to the
|
|
/// next <paramref name="blockStart"/>, first. If it fails, the returned contents will be empty and the cursor will not move, otherwise the cursor will be
|
|
/// placed after the ending character of the block.
|
|
/// </summary>
|
|
public CodeParser ReadBlock(char blockStart, char blockEnd){
|
|
SkipTo(blockStart);
|
|
if (Char != blockStart)return Clone("");
|
|
|
|
int indexStart = cursor;
|
|
SkipBlock(blockStart, blockEnd);
|
|
|
|
return Clone(SubstrIndex(indexStart+1, cursor-1));
|
|
}
|
|
|
|
/// <summary>
|
|
/// Attempts to read an identifier from the current cursor position. If there is an identifier and it is valid, it is returned and the cursor moves after
|
|
/// the identifier. If any step fails, the cursor does not move and <see cref="string.Empty"/> is returned.
|
|
/// </summary>
|
|
public string ReadIdentifier(){
|
|
if (!IsIdentifierStart(Char))return string.Empty;
|
|
|
|
int prevCursor = cursor;
|
|
|
|
while(cursor < length){
|
|
if (!IsIdentifierPart(Next()))break;
|
|
}
|
|
|
|
string identifier = SubstrIndex(prevCursor, cursor);
|
|
|
|
if (!IsValidIdentifier(identifier)){
|
|
cursor = prevCursor;
|
|
return string.Empty;
|
|
}
|
|
|
|
return identifier;
|
|
}
|
|
}
|
|
}
|