1
0
mirror of https://github.com/chylex/Code-Statistics.git synced 2024-11-21 22:42:45 +01:00
Code-Statistics/CodeStatisticsCore/Handling/Utils/CodeParser.cs

233 lines
8.6 KiB
C#

using System;
namespace CodeStatisticsCore.Handling.Utils{
public class CodeParser{
protected readonly string code;
protected readonly int length;
protected int cursor;
public Predicate<char> IsWhiteSpace = char.IsWhiteSpace;
public Predicate<char> IsIdentifierStart = chr => char.IsLetterOrDigit(chr) || chr == '_';
public Predicate<char> IsIdentifierPart = chr => char.IsLetterOrDigit(chr) || chr == '_';
public Predicate<string> IsValidIdentifier = str => true;
public string Contents { get { return code; } }
public char Char { get { return cursor < length ? code[cursor] : '\0'; } }
public bool IsEOF { get { return cursor >= length; } }
public CodeParser(string code){
this.code = code;
this.length = code.Length;
}
protected string SubstrIndex(int startIndex, int endIndex){
startIndex = Math.Max(0, Math.Min(length-1, startIndex));
endIndex = Math.Min(length, endIndex);
return code.Substring(startIndex, endIndex-startIndex);
}
/// <summary>
/// Clones all of the parser settings, optionally with different code contents, and resets the cursor position to 0.
/// </summary>
public virtual CodeParser Clone(string newCode = null){
return new CodeParser(newCode ?? code){
IsWhiteSpace = this.IsWhiteSpace,
IsIdentifierStart = this.IsIdentifierStart,
IsIdentifierPart = this.IsIdentifierPart,
IsValidIdentifier = this.IsValidIdentifier
};
}
/// <summary>
/// Resets the cursor back to position zero and returns itself.
/// </summary>
public CodeParser Reset(){
cursor = 0;
return this;
}
/// <summary>
/// Skips to the next character and returns it.
/// </summary>
public char Next(){
++cursor;
return Char;
}
/// <summary>
/// Skips a single character and returns itself.
/// </summary>
public CodeParser Skip(){
++cursor;
return this;
}
/// <summary>
/// Skips all whitespace characters defined by <see cref="IsWhiteSpace"/> and places the cursor after the last found whitespace character.
/// Returns itself.
/// </summary>
public CodeParser SkipSpaces(){
while(cursor < length){
if (IsWhiteSpace(Char))++cursor;
else break;
}
return this;
}
/// <summary>
/// Skips to the next matching character and returns itself. Caution: The skip may not succeed, so verify if the current character is valid if not certain.
/// If the skip fails, the cursor will not move.
/// </summary>
public CodeParser SkipTo(char chr){
int prevCursor = cursor;
while(cursor < length){
if (Char == chr)break;
else ++cursor;
}
if (Char != chr)cursor = prevCursor;
return this;
}
/// <summary>
/// Skips a block enclosed by <paramref name="blockStart"/> and <paramref name="blockEnd"/>. If the current character does not match <paramref name="blockStart"/>,
/// the parser will try skipping to the next <paramref name="blockStart"/> character first. If it fails, the cursor will not move, otherwise the cursor
/// will be placed after the ending character of the block.
/// </summary>
public void SkipBlock(char blockStart, char blockEnd){
SkipTo(blockStart);
if (Char != blockStart)return;
int blockDepth = 0;
while(cursor < length){
char next = Next();
if (next == blockEnd){
if (--blockDepth < 0){
Next();
break;
}
}
else if (next == blockStart){
++blockDepth;
}
}
}
/// <summary>
/// Skips a text that matches a string with tokens. Returns true if the string matched code at current position and moves the cursor after the match,
/// otherwise returns false and the cursor does not move. <para/>
/// The <paramref name="matchStr"/> parameter may consist of the following tokens: <para/>
/// ^s --- Whitespace Character <para/>
/// ^n --- Not An Identifier Part Character <para/>
/// ^. --- Any Character
/// </summary>
public bool SkipIfMatch(string matchStr){
int index = 0, prevCursor = cursor;
while(index < matchStr.Length){
char matchChr = matchStr[index];
bool hasMatched;
if (matchChr == '^' && index < matchStr.Length-1){
switch(matchStr[++index]){
case 's':
hasMatched = IsWhiteSpace(Char);
break;
case 'n':
hasMatched = !IsIdentifierPart(Char);
break;
case '.':
hasMatched = true;
break;
default:
hasMatched = false;
break;
}
}
else{
hasMatched = Char == matchChr;
}
if (!hasMatched){
cursor = prevCursor;
return false;
}
else{
++cursor;
++index;
}
}
return true;
}
/// <summary>
/// Skips to the next matching character and returns a new instance of CodeParser with contents that were skipped (excluding the target character).
/// If the skip does not succeed, an empty CodeParser will be returned and the cursor will not move.
/// </summary>
public CodeParser ReadTo(char chr){
int indexStart = cursor;
SkipTo(chr);
return Clone(SubstrIndex(indexStart, cursor));
}
/// <summary>
/// Skips to the next matching character and returns a new instance of CodeParser with contents that were skipped (excluding the target character).
/// If the skip does not succeed, an empty CodeParser will be returned and the cursor will not move. If it succeeds, the parser will also skip the target character.
/// </summary>
public CodeParser ReadToSkip(char chr){
CodeParser read = ReadTo(chr);
if (Char == chr)Skip();
return read;
}
/// <summary>
/// Skips a block enclosed by <paramref name="blockStart"/> and <paramref name="blockEnd"/> and returns a new instance of CodeParser with the contents of that
/// block, excluding the enclosing characters. If the current character does not match <paramref name="blockStart"/>, the parser will try skipping to the
/// next <paramref name="blockStart"/>, first. If it fails, the returned contents will be empty and the cursor will not move, otherwise the cursor will be
/// placed after the ending character of the block.
/// </summary>
public CodeParser ReadBlock(char blockStart, char blockEnd){
SkipTo(blockStart);
if (Char != blockStart)return Clone("");
int indexStart = cursor;
SkipBlock(blockStart, blockEnd);
return Clone(SubstrIndex(indexStart+1, cursor-1));
}
/// <summary>
/// Attempts to read an identifier from the current cursor position. If there is an identifier and it is valid, it is returned and the cursor moves after
/// the identifier. If any step fails, the cursor does not move and <see cref="string.Empty"/> is returned.
/// </summary>
public string ReadIdentifier(){
if (!IsIdentifierStart(Char))return string.Empty;
int prevCursor = cursor;
while(cursor < length){
if (!IsIdentifierPart(Next()))break;
}
string identifier = SubstrIndex(prevCursor, cursor);
if (!IsValidIdentifier(identifier)){
cursor = prevCursor;
return string.Empty;
}
return identifier;
}
}
}