1
0
Fork 0
Code-Statistics/LanguageJava/Handling/JavaCodeParser.cs

510 lines
18 KiB
C#

using System.Collections.Generic;
using System.Text;
using CodeStatisticsCore.Handling.Utils;
using LanguageJava.Elements;
using LanguageJava.Utils;
namespace LanguageJava.Handling{
public class JavaCodeParser : CodeParser{
public delegate void OnAnnotationRead(Annotation annotation);
public delegate void OnCodeBlockRead(JavaCodeBlockParser blockParser);
public event OnAnnotationRead AnnotationCallback;
public event OnCodeBlockRead CodeBlockCallback;
public JavaCodeParser(string code) : base(code){
IsWhiteSpace = JavaCharacters.IsWhiteSpace;
IsIdentifierStart = JavaCharacters.IsIdentifierStart;
IsIdentifierPart = JavaCharacters.IsIdentifierPart;
IsValidIdentifier = JavaCharacters.IsNotReservedWord;
}
public override CodeParser Clone(string newCode = null){
return new JavaCodeParser(newCode ?? string.Empty){
AnnotationCallback = this.AnnotationCallback,
CodeBlockCallback = this.CodeBlockCallback
};
}
/// <summary>
/// Skips all whitespace and semicolons and places the cursor after the last skipped character. Returns itself.
/// </summary>
public JavaCodeParser SkipSpacesAndSemicolons(){
int prevCursor;
do{
prevCursor = cursor;
if (SkipSpaces().Char == ';')Skip();
}while(!IsEOF && prevCursor != cursor);
return this;
}
/// <summary>
/// Skips to the next matching character where the brackets ([{ }]) are balanced, and returns itself.
/// If the skip fails, the cursor will not move.
/// </summary>
public JavaCodeParser SkipToIfBalanced(char chr){
int prevCursor = cursor;
Stack<char> bracketStack = new Stack<char>(4);
while(cursor < length){
if (bracketStack.Count == 0 && Char == chr)break;
switch(Char){
case '(': bracketStack.Push(')'); break;
case '[': bracketStack.Push(']'); break;
case '{': bracketStack.Push('}'); break;
case ')': case ']': case '}':
if (bracketStack.Count == 0 || bracketStack.Pop() != Char){
cursor = prevCursor;
return this;
}
break;
}
++cursor;
}
if (Char != chr || bracketStack.Count > 0)cursor = prevCursor;
return this;
}
/// <summary>
/// Skips to the next matching character where the brackets ([{ }]) are balanced, and returns a new instance of JavaCodeParser with the contents
/// of all skipped characters. If the skip fails, the returned contents will be empty and the cursor will not move.
/// </summary>
public JavaCodeParser ReadToIfBalanced(char chr){
int indexStart = cursor;
SkipToIfBalanced(chr);
return (JavaCodeParser)Clone(SubstrIndex(indexStart, cursor));
}
/// <summary>
/// Reads the entire full type name, which consists of one or more identifiers separated by the dot character,
/// optionally ending with a star if <paramref name="allowStarAtEnd"/> is true. Skips generics when referring
/// to nested types. <para/>
/// https://docs.oracle.com/javase/specs/jls/se8/html/jls-6.html#d5e7695
/// </summary>
public string ReadFullTypeName(bool allowStarAtEnd = false){
StringBuilder build = new StringBuilder();
string identifier = ReadIdentifier();
if (identifier.Length == 0)return string.Empty;
while(true){
build.Append(identifier);
if (Char == '.'){
build.Append('.');
if (Skip().Char == '*' && allowStarAtEnd){
if (Skip().IsEOF)identifier = "*";
else return string.Empty;
}
else{
identifier = ReadIdentifier();
if (identifier.Length == 0)return string.Empty;
}
}
else if (SkipSpaces().Char == '<'){
SkipBlock('<', '>');
identifier = string.Empty;
}
else break;
}
return build.ToString();
}
/// <summary>
/// https://docs.oracle.com/javase/specs/jls/se8/html/jls-9.html#jls-9.7
/// </summary>
public Annotation? ReadAnnotation(){
if (Char != '@')return null;
Skip().SkipSpaces(); // skip @ and spaces
string simpleName = JavaParseUtils.FullToSimpleName(ReadFullTypeName()); // read type name
if (simpleName.Length == 0)return null;
if (SkipSpaces().Char == '('){ // skip arguments and ignore
SkipBlock('(', ')');
}
Annotation annotation = new Annotation(simpleName);
if (AnnotationCallback != null)AnnotationCallback(annotation);
return annotation;
}
/// <summary>
/// Skips spaces and finds all following annotations.
/// </summary>
public List<Annotation> SkipReadAnnotationList(){
return JavaParseUtils.ReadStructList(this, ReadAnnotation, 1);
}
/// <summary>
/// Reads the package declaration (excluding package modifier - that has to be read separately).
/// https://docs.oracle.com/javase/specs/jls/se8/html/jls-7.html#jls-7.4
/// </summary>
public string ReadPackageDeclaration(){
return SkipIfMatch("package^s") ? ((JavaCodeParser)ReadToSkip(';')).ReadFullTypeName() : string.Empty;
}
/// <summary>
/// https://docs.oracle.com/javase/specs/jls/se8/html/jls-7.html#jls-7.5
/// </summary>
public Import? ReadImportDeclaration(){
if (!SkipIfMatch("import^s"))return null;
bool isStatic = SkipIfMatch("static^s");
string type = ((JavaCodeParser)ReadToSkip(';')).ReadFullTypeName(true);
if (type.Length == 0)return null;
return new Import(type, isStatic);
}
/// <summary>
/// Reads a modifier specified in <see cref="Modifiers"/> and skips it.
/// </summary>
public Modifiers? ReadModifier(){
foreach(string modifierStr in JavaModifiers.Strings){
if (SkipIfMatch(modifierStr+"^n")){
if (cursor > 0 && !IsWhiteSpace(code[cursor-1]))--cursor; // fix static{} and such skipping the bracket
return JavaModifiers.FromString(modifierStr);
}
}
return null;
}
/// <summary>
/// Skips spaces and finds all following modifiers.
/// </summary>
public List<Modifiers> SkipReadModifierList(){
return JavaParseUtils.ReadStructList(this, ReadModifier, 2);
}
/// <summary>
/// Reads a primitive value specified in <see cref="Primitives"/> and skips it.
/// </summary>
public Primitives? ReadPrimitive(){
foreach(string primitiveStr in JavaPrimitives.Strings){
if (SkipIfMatch(primitiveStr+"^n")){
if (cursor > 0 && !IsWhiteSpace(code[cursor-1]))--cursor; // fix arrays and varargs
return JavaPrimitives.FromString(primitiveStr);
}
}
return null;
}
/// <summary>
/// Skips spaces and reads following member info (list of annotations and modifiers).
/// </summary>
public Member SkipReadMemberInfo(){
return new Member(SkipReadAnnotationList(), SkipReadModifierList());
}
/// <summary>
/// Skips generics declaration if available, and skips spaces.
/// </summary>
public JavaCodeParser SkipGenerics(){
SkipSpaces();
if (Char == '<'){
SkipBlock('<', '>');
SkipSpaces();
}
return this;
}
/// <summary>
/// Skips spaces and all following pairs of angled and square brackets, and triple dots (varargs).
/// </summary>
public JavaCodeParser SkipTypeArrayAndGenerics(){
do{
SkipSpaces();
if (Char == '[')SkipBlock('[', ']');
if (Char == '<')SkipBlock('<', '>');
if (Char == '.')SkipIfMatch("...");
}
while(!IsEOF && (Char == '[' || Char == '<'));
return this;
}
/// <summary>
/// Reads the type, which can either be a method return/parameter type or a field type, and skips it.
/// </summary>
public TypeOf? ReadTypeOf(bool isMethodParameter){
while(ReadAnnotation().HasValue){} // skip type annotations
if (isMethodParameter)SkipIfMatch("final^s"); // skip final keyword in method parameters
else SkipGenerics(); // skip method return type generics
// void
if (SkipIfMatch("void^s"))return TypeOf.Void();
// primitive
Primitives? primitive = ReadPrimitive();
if (primitive.HasValue){
SkipTypeArrayAndGenerics();
return TypeOf.Primitive(primitive.Value);
}
// object name
string typeName = ReadFullTypeName();
if (typeName.Length > 0){
SkipTypeArrayAndGenerics();
return TypeOf.Object(JavaParseUtils.FullToSimpleName(typeName));
}
// nothing
return null;
}
/// <summary>
/// Reads a declaration type specified in <see cref="Type.DeclarationType"/> and skips it.
/// </summary>
public Type.DeclarationType? ReadTypeDeclaration(){
if (SkipIfMatch("class^s"))return Type.DeclarationType.Class;
else if (SkipIfMatch("interface^s"))return Type.DeclarationType.Interface;
else if (SkipIfMatch("enum^s"))return Type.DeclarationType.Enum;
else if (SkipIfMatch("@interface^s"))return Type.DeclarationType.Annotation;
else return null;
}
/// <summary>
/// Reads an entire type declaration and generates data from the contents, and skips the block.
/// </summary>
public Type ReadType(){
Member memberInfo = SkipReadMemberInfo();
Type.DeclarationType? type = ReadTypeDeclaration();
if (!type.HasValue)return null;
string identifier = SkipSpaces().ReadIdentifier();
if (identifier.Length == 0)return null;
Type readType = new Type(type.Value, identifier, memberInfo);
((JavaCodeParser)SkipTo('{').ReadBlock('{', '}')).ReadTypeContents(readType);
return readType;
}
/// <summary>
/// Recursively reads all members of a Type, including all nested Types. Called on a cloned JavaCodeParser that only
/// contains contents of the Type block.
/// </summary>
private void ReadTypeContents(Type type){
// enum values
if (type.Declaration == Type.DeclarationType.Enum){
Type.DataEnum enumData = type.GetData<Type.DataEnum>();
JavaCodeParser enumParser = ReadToIfBalanced(';');
if (enumParser.Contents.Length == 0)enumParser = this;
foreach(string enumValue in enumParser.ReadEnumValueList()){
enumData.EnumValues.Add(enumValue);
}
if (Char == ';')Skip();
}
// members
int skippedMembers = 0;
while(!IsEOF && skippedMembers < 50){
Member memberInfo = SkipReadMemberInfo(); // skips spaces at the beginning and end
// nested types
Type.DeclarationType? declaration = ReadTypeDeclaration();
if (declaration.HasValue){
string identifier = SkipSpaces().ReadIdentifier();
if (identifier.Length == 0)break; // error, break out
Type nestedType = new Type(declaration.Value, identifier, memberInfo);
((JavaCodeParser)SkipTo('{').ReadBlock('{', '}')).ReadTypeContents(nestedType);
SkipSpacesAndSemicolons();
type.NestedTypes.Add(nestedType);
continue;
}
// static / instance initializer
if (Char == '{'){
Method method = new Method(memberInfo.Modifiers.HasFlag(Modifiers.Static) ? "<clinit>" : "<init>", TypeOf.Void(), memberInfo);
SkipProcessCodeBlock();
type.GetData().Methods.Add(method);
continue;
}
// fields and methods
TypeOf? returnOrFieldType = ReadTypeOf(false);
if (returnOrFieldType.HasValue){
int prevCursor = cursor;
string identifier = SkipSpaces().ReadIdentifier();
if (identifier.Length == 0 && string.Equals(returnOrFieldType.Value.AsSimpleType(), type.Identifier)){ // constructor
identifier = Method.ConstructorIdentifier;
returnOrFieldType = TypeOf.Void();
}
if (identifier.Length == 0)break; // error, break out
if (SkipSpaces().Char == '('){ // method
List<TypeOf> parameterList = ((JavaCodeParser)ReadBlock('(', ')')).ReadMethodParameterList();
if (type.Declaration == Type.DeclarationType.Annotation){
if (SkipSpaces().SkipIfMatch("default^n")){
memberInfo = new Member(memberInfo, memberInfo.Modifiers | Modifiers.Default);
SkipTo(';');
}
}
else{
if (SkipSpaces().SkipIfMatch("throws^s")){
while(true){
ReadFullTypeName();
if (SkipSpaces().Char == ',')Skip().SkipSpaces();
else break;
}
}
}
if (Char == ';')Skip();
else{
SkipProcessCodeBlock();
SkipSpacesAndSemicolons();
}
type.GetData().Methods.Add(new Method(identifier, returnOrFieldType.Value, parameterList, type.GetData().UpdateMethodInfo(memberInfo)));
}
else{ // field
Type.TypeData data = type.GetData();
cursor = prevCursor;
foreach(string fieldIdentifier in ReadToIfBalanced(';').ReadFieldIdentifierList()){
data.Fields.Add(new Field(fieldIdentifier, returnOrFieldType.Value, data.UpdateFieldInfo(memberInfo)));
}
Skip();
}
continue;
}
// extra checks before the skip
if (Char == ';'){
Skip();
continue;
}
if (IsEOF)break;
// skip
if (skippedMembers == 0){
System.Diagnostics.Debugger.Break();
}
SkipBlock('{', '}');
SkipSpacesAndSemicolons();
++skippedMembers;
}
}
/// <summary>
/// Reads all identifier names in a field declaration.
/// </summary>
private List<string> ReadFieldIdentifierList(){
var list = new List<string>();
if (SkipSpaces().IsEOF)return list;
if (CodeBlockCallback != null){
CodeBlockCallback(new JavaCodeBlockParser(Contents));
}
while(true){
string identifier = ReadIdentifier();
if (identifier.Length == 0)break;
list.Add(identifier);
if (SkipToIfBalanced(',').Char == ',')Skip().SkipSpaces();
else break;
}
return list;
}
/// <summary>
/// Reads all parameters of a method. Called on a cloned JavaCodeParser that only contains contents between parentheses.
/// </summary>
private List<TypeOf> ReadMethodParameterList(){
var list = new List<TypeOf>();
if (SkipSpaces().IsEOF)return list;
while(true){
TypeOf? type = ReadTypeOf(true);
if (!type.HasValue)break;
list.Add(type.Value);
if (SkipTo(',').Char == ',')Skip().SkipSpaces();
else break;
}
return list;
}
/// <summary>
/// Reads all enum values. Called on a cloned JavaCodeParser that only contains contents between the beginning of the Type and the first semicolon.
/// </summary>
private List<string> ReadEnumValueList(){
var list = new List<string>();
if (SkipSpaces().IsEOF)return list;
while(true){
string value = ReadIdentifier();
if (value.Length == 0)break;
list.Add(value);
if (SkipToIfBalanced(',').Char == ',')Skip().SkipSpaces();
else break;
}
return list;
}
/// <summary>
/// Reads a block of code between { and }, and skips it. If an event listener exists for <see cref="CodeBlockCallback"/>, the event is called with the contents
/// of the skipped block. Uses <see cref="CodeParser.SkipBlock"/> or <see cref="CodeParser.ReadBlock"/>.
/// </summary>
private void SkipProcessCodeBlock(){
if (CodeBlockCallback == null){
SkipBlock('{', '}');
}
else{
CodeBlockCallback(new JavaCodeBlockParser(ReadBlock('{', '}').Contents));
}
}
}
}