From f970838ecfa1a35ff868beb80b28fe6c1cccd092 Mon Sep 17 00:00:00 2001 From: dfsek Date: Sat, 19 Dec 2020 20:04:58 -0700 Subject: [PATCH] Working parser/tokenizer --- .../dfsek/terra/api/structures/Argument.java | 4 - .../dfsek/terra/api/structures/Function.java | 11 --- .../dfsek/terra/api/structures/Parser.java | 4 - .../terra/api/structures/parser/Argument.java | 5 + .../terra/api/structures/parser/Function.java | 12 +++ .../structures/parser/FunctionBuilder.java | 11 +++ .../terra/api/structures/parser/Parser.java | 96 +++++++++++++++++++ .../parser/exceptions/ParseException.java | 19 ++++ .../api/structures/tokenizer/Lookahead.java | 8 ++ .../api/structures/tokenizer/Position.java | 5 + .../terra/api/structures/tokenizer/Token.java | 8 +- .../tokenizer/TokenizedStatement.java | 5 - .../api/structures/tokenizer/Tokenizer.java | 54 ++++------- .../structures/tokenizer/group/Brackets.java | 18 ---- .../api/structures/tokenizer/group/Group.java | 9 -- .../tokenizer/group/LineComment.java | 18 ---- .../tokenizer/group/Parentheses.java | 18 ---- .../structures/tokenizer/group/Quotes.java | 19 ---- .../src/test/java/structure/ParserTest.java | 73 ++++++++++++++ .../test/java/structure/TokenizerTest.java | 9 +- common/src/test/resources/test.tesf | 18 +--- 21 files changed, 263 insertions(+), 161 deletions(-) delete mode 100644 common/src/main/java/com/dfsek/terra/api/structures/Argument.java delete mode 100644 common/src/main/java/com/dfsek/terra/api/structures/Function.java delete mode 100644 common/src/main/java/com/dfsek/terra/api/structures/Parser.java create mode 100644 common/src/main/java/com/dfsek/terra/api/structures/parser/Argument.java create mode 100644 common/src/main/java/com/dfsek/terra/api/structures/parser/Function.java create mode 100644 common/src/main/java/com/dfsek/terra/api/structures/parser/FunctionBuilder.java create mode 100644 common/src/main/java/com/dfsek/terra/api/structures/parser/Parser.java create mode 100644 common/src/main/java/com/dfsek/terra/api/structures/parser/exceptions/ParseException.java delete mode 100644 common/src/main/java/com/dfsek/terra/api/structures/tokenizer/TokenizedStatement.java delete mode 100644 common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Brackets.java delete mode 100644 common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Group.java delete mode 100644 common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/LineComment.java delete mode 100644 common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Parentheses.java delete mode 100644 common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Quotes.java create mode 100644 common/src/test/java/structure/ParserTest.java diff --git a/common/src/main/java/com/dfsek/terra/api/structures/Argument.java b/common/src/main/java/com/dfsek/terra/api/structures/Argument.java deleted file mode 100644 index 98b7d0e63..000000000 --- a/common/src/main/java/com/dfsek/terra/api/structures/Argument.java +++ /dev/null @@ -1,4 +0,0 @@ -package com.dfsek.terra.api.structures; - -public interface Argument { -} diff --git a/common/src/main/java/com/dfsek/terra/api/structures/Function.java b/common/src/main/java/com/dfsek/terra/api/structures/Function.java deleted file mode 100644 index 73c5e1a22..000000000 --- a/common/src/main/java/com/dfsek/terra/api/structures/Function.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.dfsek.terra.api.structures; - -import java.util.List; - -public interface Function { - void apply(); - - String name(); - - List getArguments(); -} diff --git a/common/src/main/java/com/dfsek/terra/api/structures/Parser.java b/common/src/main/java/com/dfsek/terra/api/structures/Parser.java deleted file mode 100644 index 680abcc22..000000000 --- a/common/src/main/java/com/dfsek/terra/api/structures/Parser.java +++ /dev/null @@ -1,4 +0,0 @@ -package com.dfsek.terra.api.structures; - -public class Parser { -} diff --git a/common/src/main/java/com/dfsek/terra/api/structures/parser/Argument.java b/common/src/main/java/com/dfsek/terra/api/structures/parser/Argument.java new file mode 100644 index 000000000..ffe9eb84d --- /dev/null +++ b/common/src/main/java/com/dfsek/terra/api/structures/parser/Argument.java @@ -0,0 +1,5 @@ +package com.dfsek.terra.api.structures.parser; + +public interface Argument { + T parse(String input); +} diff --git a/common/src/main/java/com/dfsek/terra/api/structures/parser/Function.java b/common/src/main/java/com/dfsek/terra/api/structures/parser/Function.java new file mode 100644 index 000000000..ec4393f48 --- /dev/null +++ b/common/src/main/java/com/dfsek/terra/api/structures/parser/Function.java @@ -0,0 +1,12 @@ +package com.dfsek.terra.api.structures.parser; + +import com.dfsek.terra.api.math.vector.Location; +import com.dfsek.terra.api.platform.world.Chunk; + +public interface Function { + void apply(Location location); + + void apply(Location location, Chunk chunk); + + String name(); +} diff --git a/common/src/main/java/com/dfsek/terra/api/structures/parser/FunctionBuilder.java b/common/src/main/java/com/dfsek/terra/api/structures/parser/FunctionBuilder.java new file mode 100644 index 000000000..8b24fbcf0 --- /dev/null +++ b/common/src/main/java/com/dfsek/terra/api/structures/parser/FunctionBuilder.java @@ -0,0 +1,11 @@ +package com.dfsek.terra.api.structures.parser; + +import com.dfsek.terra.api.structures.parser.exceptions.ParseException; + +import java.util.List; + +public interface FunctionBuilder { + T build(List argumentList) throws ParseException; + + List> getArguments(); +} diff --git a/common/src/main/java/com/dfsek/terra/api/structures/parser/Parser.java b/common/src/main/java/com/dfsek/terra/api/structures/parser/Parser.java new file mode 100644 index 000000000..399212b99 --- /dev/null +++ b/common/src/main/java/com/dfsek/terra/api/structures/parser/Parser.java @@ -0,0 +1,96 @@ +package com.dfsek.terra.api.structures.parser; + +import com.dfsek.terra.api.structures.parser.exceptions.ParseException; +import com.dfsek.terra.api.structures.tokenizer.Token; +import com.dfsek.terra.api.structures.tokenizer.Tokenizer; +import com.dfsek.terra.api.structures.tokenizer.exceptions.TokenizerException; +import com.dfsek.terra.api.util.GlueList; +import com.google.common.collect.Sets; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +public class Parser { + private final String data; + private final Map> functions = new HashMap<>(); + Set allowedArguments = Sets.newHashSet(Token.Type.STRING, Token.Type.NUMBER, Token.Type.IDENTIFIER); + + public Parser(String data) { + this.data = data; + } + + public Parser addFunction(String name, FunctionBuilder functionBuilder) { + functions.put(name, functionBuilder); + return this; + } + + public List parse() throws ParseException { + Tokenizer tokenizer = new Tokenizer(data); + List builtFunctions = new GlueList<>(); + List functionBuilder = new GlueList<>(); + Token token = null; + while(tokenizer.hasNext()) { + try { + token = tokenizer.fetch(); + functionBuilder.add(token); + + if(token.getType().equals(Token.Type.STATEMENT_END)) { + Token identifier = functionBuilder.remove(0); + checkType(identifier, Token.Type.IDENTIFIER); // First token must be identifier + + if(!functions.containsKey(identifier.getContent())) + throw new ParseException("No such function " + identifier.getContent() + ": " + identifier.getStart()); + + checkType(functionBuilder.remove(0), Token.Type.BODY_BEGIN); // Second is body begin + + boolean expectingSeparator = false; + + List args = new GlueList<>(); + + while(!functionBuilder.get(0).getType().equals(Token.Type.BODY_END)) { + Token current = functionBuilder.remove(0); + if(expectingSeparator) { + checkType(current, Token.Type.SEPARATOR); + expectingSeparator = false; + } else { + if(!allowedArguments.contains(current.getType())) + throw new ParseException("Token type " + current.getType() + " not allowed in arguments: " + current.getStart()); + args.add(current); + expectingSeparator = true; + } + } + + functionBuilder.remove(0); // Remove body end + + for(Token t : args) System.out.println("TOKEN: " + t); + + checkType(functionBuilder.remove(0), Token.Type.STATEMENT_END); + + List arg = args.stream().map(Token::getContent).collect(Collectors.toList()); + + for(String s : arg) System.out.println("ARG: " + s); + FunctionBuilder builder = functions.get(identifier.getContent()); + if(arg.size() != builder.getArguments().size()) + throw new ParseException("Expected " + builder.getArguments().size() + " arguments, found " + arg.size() + ": " + identifier.getStart()); + + builtFunctions.add(functions.get(identifier.getContent()).build(arg)); + + functionBuilder.clear(); + } + } catch(TokenizerException e) { + throw new ParseException("Failed to tokenize input", e); + } + + } + if(token != null) checkType(token, Token.Type.STATEMENT_END); + return builtFunctions; + } + + private void checkType(Token token, Token.Type expected) throws ParseException { + if(!token.getType().equals(expected)) + throw new ParseException("Expected " + expected + " but found " + token.getType() + ": " + token.getStart()); + } +} diff --git a/common/src/main/java/com/dfsek/terra/api/structures/parser/exceptions/ParseException.java b/common/src/main/java/com/dfsek/terra/api/structures/parser/exceptions/ParseException.java new file mode 100644 index 000000000..c8e3794d6 --- /dev/null +++ b/common/src/main/java/com/dfsek/terra/api/structures/parser/exceptions/ParseException.java @@ -0,0 +1,19 @@ +package com.dfsek.terra.api.structures.parser.exceptions; + +public class ParseException extends Exception { + public ParseException() { + super(); + } + + public ParseException(String message) { + super(message); + } + + public ParseException(String message, Throwable cause) { + super(message, cause); + } + + public ParseException(Throwable cause) { + super(cause); + } +} diff --git a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Lookahead.java b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Lookahead.java index ebf881e9e..cfbec6d6b 100644 --- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Lookahead.java +++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Lookahead.java @@ -83,6 +83,14 @@ public class Lookahead { } else return buffer.get(ahead); } + public int getLine() { + return line; + } + + public int getIndex() { + return index; + } + /** * Consume an amount of characters * diff --git a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Position.java b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Position.java index cc0968365..2c0651409 100644 --- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Position.java +++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Position.java @@ -8,4 +8,9 @@ public class Position { this.line = line; this.index = index; } + + @Override + public String toString() { + return (line + 1) + ":" + (index + 1); + } } diff --git a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Token.java b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Token.java index 63c114025..e7502195a 100644 --- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Token.java +++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Token.java @@ -3,10 +3,12 @@ package com.dfsek.terra.api.structures.tokenizer; public class Token { private final String content; private final Type type; + private final Position start; - public Token(String content, Type type) { + public Token(String content, Type type, Position start) { this.content = content; this.type = type; + this.start = start; } public Type getType() { @@ -17,6 +19,10 @@ public class Token { return content; } + public Position getStart() { + return start; + } + @Override public String toString() { return type + ": '" + content + "'"; diff --git a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/TokenizedStatement.java b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/TokenizedStatement.java deleted file mode 100644 index d6b37a880..000000000 --- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/TokenizedStatement.java +++ /dev/null @@ -1,5 +0,0 @@ -package com.dfsek.terra.api.structures.tokenizer; - -public class TokenizedStatement { - -} diff --git a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Tokenizer.java b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Tokenizer.java index 5f05136d1..f2bb0da99 100644 --- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Tokenizer.java +++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Tokenizer.java @@ -1,13 +1,11 @@ package com.dfsek.terra.api.structures.tokenizer; import com.dfsek.terra.api.structures.tokenizer.exceptions.EOFException; +import com.dfsek.terra.api.structures.tokenizer.exceptions.FormatException; import com.dfsek.terra.api.structures.tokenizer.exceptions.TokenizerException; -import com.dfsek.terra.api.structures.tokenizer.group.Group; -import com.dfsek.terra.api.util.GlueList; import com.google.common.collect.Sets; import java.io.StringReader; -import java.util.List; import java.util.Set; public class Tokenizer { @@ -20,46 +18,47 @@ public class Tokenizer { reader = new Lookahead(new StringReader(data + '\0')); } - public List tokenize() { - List tokens = new GlueList<>(); - while(reader.current().isEOF()) { - Char c = reader.current(); - } - - return tokens; + public boolean hasNext() { + while(!reader.current().isEOF() && reader.current().isWhitespace()) reader.consume(); // Consume whitespace. + return !reader.current().isEOF(); } public Token fetch() throws TokenizerException { - while(!reader.current().isEOF() && reader.current().isWhitespace()) reader.consume(); if(reader.current().isEOF()) return null; // EOF if(reader.matches("//", true)) skipLine(); // Skip line if comment - if(reader.matches("/*", true)) skipTo("*/"); + if(reader.matches("/*", true)) skipTo("*/"); // Skip multi line comment if(isNumberStart()) { StringBuilder num = new StringBuilder(); while(!reader.current().isEOF() && isNumberLike()) { num.append(reader.consume()); } - return new Token(num.toString(), Token.Type.NUMBER); + return new Token(num.toString(), Token.Type.NUMBER, new Position(reader.getLine(), reader.getIndex())); } if(reader.current().is('"')) { reader.consume(); // Consume first quote StringBuilder string = new StringBuilder(); - while(!reader.current().isEOF() && !reader.current().is('"')) { + while(!reader.current().is('"')) { + if(reader.current().isEOF()) + throw new FormatException("No end of string literal found. " + reader.getLine() + ":" + reader.getIndex()); string.append(reader.consume()); } reader.consume(); // Consume last quote - return new Token(string.toString(), Token.Type.STRING); + return new Token(string.toString(), Token.Type.STRING, new Position(reader.getLine(), reader.getIndex())); } - if(reader.current().is('(')) return new Token(reader.consume().toString(), Token.Type.BODY_BEGIN); - if(reader.current().is(')')) return new Token(reader.consume().toString(), Token.Type.BODY_END); - if(reader.current().is(';')) return new Token(reader.consume().toString(), Token.Type.STATEMENT_END); - if(reader.current().is(',')) return new Token(reader.consume().toString(), Token.Type.SEPARATOR); + if(reader.current().is('(')) + return new Token(reader.consume().toString(), Token.Type.BODY_BEGIN, new Position(reader.getLine(), reader.getIndex())); + if(reader.current().is(')')) + return new Token(reader.consume().toString(), Token.Type.BODY_END, new Position(reader.getLine(), reader.getIndex())); + if(reader.current().is(';')) + return new Token(reader.consume().toString(), Token.Type.STATEMENT_END, new Position(reader.getLine(), reader.getIndex())); + if(reader.current().is(',')) + return new Token(reader.consume().toString(), Token.Type.SEPARATOR, new Position(reader.getLine(), reader.getIndex())); StringBuilder token = new StringBuilder(); while(!reader.current().isEOF() && !isSyntaxSignificant(reader.current().getCharacter())) { @@ -67,7 +66,7 @@ public class Tokenizer { if(!c.isWhitespace()) token.append(c); } - return new Token(token.toString(), Token.Type.IDENTIFIER); + return new Token(token.toString(), Token.Type.IDENTIFIER, new Position(reader.getLine(), reader.getIndex())); } private boolean isNumberLike() { @@ -94,21 +93,6 @@ public class Tokenizer { throw new EOFException("No end of expression found."); } - /** - * Read to the end of a group, consuming all - * - * @param g - * @return - */ - private String readToEndOfGroup(Group g) { - StringBuilder builder = new StringBuilder(); - do { - Char current = reader.consume(); - - } while(reader.current().getCharacter() != g.getEnd()); - return builder.toString(); - } - public boolean isSyntaxSignificant(char c) { return syntaxSignificant.contains(c); } diff --git a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Brackets.java b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Brackets.java deleted file mode 100644 index ddaed62f1..000000000 --- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Brackets.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.dfsek.terra.api.structures.tokenizer.group; - -public class Brackets implements Group { - @Override - public char getBegin() { - return '['; - } - - @Override - public char getEnd() { - return ']'; - } - - @Override - public boolean ignoreInsideSyntax() { - return false; - } -} diff --git a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Group.java b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Group.java deleted file mode 100644 index 20ab13f83..000000000 --- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Group.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.dfsek.terra.api.structures.tokenizer.group; - -public interface Group { - char getBegin(); - - char getEnd(); - - boolean ignoreInsideSyntax(); -} diff --git a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/LineComment.java b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/LineComment.java deleted file mode 100644 index 7f458af71..000000000 --- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/LineComment.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.dfsek.terra.api.structures.tokenizer.group; - -public class LineComment implements Group { - @Override - public char getBegin() { - return '#'; - } - - @Override - public char getEnd() { - return '\n'; - } - - @Override - public boolean ignoreInsideSyntax() { - return true; - } -} diff --git a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Parentheses.java b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Parentheses.java deleted file mode 100644 index 91af4aa00..000000000 --- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Parentheses.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.dfsek.terra.api.structures.tokenizer.group; - -public class Parentheses implements Group { - @Override - public char getBegin() { - return '('; - } - - @Override - public char getEnd() { - return ')'; - } - - @Override - public boolean ignoreInsideSyntax() { - return false; - } -} diff --git a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Quotes.java b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Quotes.java deleted file mode 100644 index f092bc520..000000000 --- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Quotes.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.dfsek.terra.api.structures.tokenizer.group; - -public class Quotes implements Group { - - @Override - public char getBegin() { - return '"'; - } - - @Override - public char getEnd() { - return '"'; - } - - @Override - public boolean ignoreInsideSyntax() { - return true; - } -} diff --git a/common/src/test/java/structure/ParserTest.java b/common/src/test/java/structure/ParserTest.java new file mode 100644 index 000000000..dbc98b64c --- /dev/null +++ b/common/src/test/java/structure/ParserTest.java @@ -0,0 +1,73 @@ +package structure; + +import com.dfsek.terra.api.math.vector.Location; +import com.dfsek.terra.api.platform.world.Chunk; +import com.dfsek.terra.api.structures.parser.Argument; +import com.dfsek.terra.api.structures.parser.Function; +import com.dfsek.terra.api.structures.parser.FunctionBuilder; +import com.dfsek.terra.api.structures.parser.Parser; +import com.dfsek.terra.api.structures.parser.exceptions.ParseException; +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +public class ParserTest { + @Test + public void parse() throws IOException, ParseException { + Parser parser = new Parser(IOUtils.toString(getClass().getResourceAsStream("/test.tesf"))); + + parser.addFunction("test", new FunctionBuilder() { + @Override + public Test1 build(List argumentList) throws ParseException { + return new Test1(argumentList.get(0), Double.parseDouble(argumentList.get(1))); + } + + @Override + public List> getArguments() { + return Arrays.asList(id -> id, Double::parseDouble); + } + }); + + List functions = parser.parse(); + + for(Function f : functions) { + System.out.println(f); + } + } + + private static class Test1 implements Function { + private final String a; + private final double b; + + public Test1(String a, double b) { + this.a = a; + this.b = b; + } + + public String getA() { + return a; + } + + public double getB() { + return b; + } + + @Override + public void apply(Location location) { + + } + + @Override + public void apply(Location location, Chunk chunk) { + + } + + @Override + public String name() { + return null; + } + } +} diff --git a/common/src/test/java/structure/TokenizerTest.java b/common/src/test/java/structure/TokenizerTest.java index 37f071cad..73e0fd6e2 100644 --- a/common/src/test/java/structure/TokenizerTest.java +++ b/common/src/test/java/structure/TokenizerTest.java @@ -12,12 +12,15 @@ public class TokenizerTest { @Test public void tokens() throws IOException, TokenizerException { Tokenizer tokenizer = new Tokenizer(IOUtils.toString(getClass().getResourceAsStream("/test.tesf"))); + // Actual run + long l = System.nanoTime(); - for(int i = 0; i < 100; i++) { + + while(tokenizer.hasNext()) { Token t = tokenizer.fetch(); - if(t == null) break; System.out.println(t); - } + + System.out.println((double) (System.nanoTime() - l) / 1000000); } } diff --git a/common/src/test/resources/test.tesf b/common/src/test/resources/test.tesf index d170e2f2c..f1cc70dca 100644 --- a/common/src/test/resources/test.tesf +++ b/common/src/test/resources/test.tesf @@ -1,18 +1,4 @@ -FUNCTION(1, "hello"); +test("hello", 1); -; -) +test("ghgjhgjhgj", 3.4); -FUNCTION2(1.2, "he423llo"); - -// comment - -FUNCTION3(3.4, "35234523452345"); - -/* -block comment -fsfsdf -gsdfgsdfg -*/ - -FUNCTION(1, "hello"); \ No newline at end of file