Working parser/tokenizer

2026-06-14 04:41:13 +00:00 · 2020-12-19 20:04:58 -07:00
parent 29e2746e72
commit f970838ecf
21 changed files with 263 additions and 161 deletions
@@ -1,4 +0,0 @@
-package com.dfsek.terra.api.structures;
-
-public interface Argument {
-}
@@ -1,11 +0,0 @@
-package com.dfsek.terra.api.structures;
-
-import java.util.List;
-
-public interface Function {
-    void apply();
-
-    String name();
-
-    List<Argument> getArguments();
-}
@@ -1,4 +0,0 @@
-package com.dfsek.terra.api.structures;
-
-public class Parser {
-}
@@ -0,0 +1,5 @@
+package com.dfsek.terra.api.structures.parser;
+
+public interface Argument<T> {
+    T parse(String input);
+}
@@ -0,0 +1,12 @@
+package com.dfsek.terra.api.structures.parser;
+
+import com.dfsek.terra.api.math.vector.Location;
+import com.dfsek.terra.api.platform.world.Chunk;
+
+public interface Function {
+    void apply(Location location);
+
+    void apply(Location location, Chunk chunk);
+
+    String name();
+}
@@ -0,0 +1,11 @@
+package com.dfsek.terra.api.structures.parser;
+
+import com.dfsek.terra.api.structures.parser.exceptions.ParseException;
+
+import java.util.List;
+
+public interface FunctionBuilder<T extends Function> {
+    T build(List<String> argumentList) throws ParseException;
+
+    List<Argument<?>> getArguments();
+}
@@ -0,0 +1,96 @@
+package com.dfsek.terra.api.structures.parser;
+
+import com.dfsek.terra.api.structures.parser.exceptions.ParseException;
+import com.dfsek.terra.api.structures.tokenizer.Token;
+import com.dfsek.terra.api.structures.tokenizer.Tokenizer;
+import com.dfsek.terra.api.structures.tokenizer.exceptions.TokenizerException;
+import com.dfsek.terra.api.util.GlueList;
+import com.google.common.collect.Sets;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class Parser {
+    private final String data;
+    private final Map<String, FunctionBuilder<? extends Function>> functions = new HashMap<>();
+    Set<Token.Type> allowedArguments = Sets.newHashSet(Token.Type.STRING, Token.Type.NUMBER, Token.Type.IDENTIFIER);
+
+    public Parser(String data) {
+        this.data = data;
+    }
+
+    public Parser addFunction(String name, FunctionBuilder<? extends Function> functionBuilder) {
+        functions.put(name, functionBuilder);
+        return this;
+    }
+
+    public List<Function> parse() throws ParseException {
+        Tokenizer tokenizer = new Tokenizer(data);
+        List<Function> builtFunctions = new GlueList<>();
+        List<Token> functionBuilder = new GlueList<>();
+        Token token = null;
+        while(tokenizer.hasNext()) {
+            try {
+                token = tokenizer.fetch();
+                functionBuilder.add(token);
+
+                if(token.getType().equals(Token.Type.STATEMENT_END)) {
+                    Token identifier = functionBuilder.remove(0);
+                    checkType(identifier, Token.Type.IDENTIFIER); // First token must be identifier
+
+                    if(!functions.containsKey(identifier.getContent()))
+                        throw new ParseException("No such function " + identifier.getContent() + ": " + identifier.getStart());
+
+                    checkType(functionBuilder.remove(0), Token.Type.BODY_BEGIN); // Second is body begin
+
+                    boolean expectingSeparator = false;
+
+                    List<Token> args = new GlueList<>();
+
+                    while(!functionBuilder.get(0).getType().equals(Token.Type.BODY_END)) {
+                        Token current = functionBuilder.remove(0);
+                        if(expectingSeparator) {
+                            checkType(current, Token.Type.SEPARATOR);
+                            expectingSeparator = false;
+                        } else {
+                            if(!allowedArguments.contains(current.getType()))
+                                throw new ParseException("Token type " + current.getType() + " not allowed in arguments: " + current.getStart());
+                            args.add(current);
+                            expectingSeparator = true;
+                        }
+                    }
+
+                    functionBuilder.remove(0); // Remove body end
+
+                    for(Token t : args) System.out.println("TOKEN: " + t);
+
+                    checkType(functionBuilder.remove(0), Token.Type.STATEMENT_END);
+
+                    List<String> arg = args.stream().map(Token::getContent).collect(Collectors.toList());
+
+                    for(String s : arg) System.out.println("ARG: " + s);
+                    FunctionBuilder<?> builder = functions.get(identifier.getContent());
+                    if(arg.size() != builder.getArguments().size())
+                        throw new ParseException("Expected " + builder.getArguments().size() + " arguments, found " + arg.size() + ": " + identifier.getStart());
+
+                    builtFunctions.add(functions.get(identifier.getContent()).build(arg));
+
+                    functionBuilder.clear();
+                }
+            } catch(TokenizerException e) {
+                throw new ParseException("Failed to tokenize input", e);
+            }
+
+        }
+        if(token != null) checkType(token, Token.Type.STATEMENT_END);
+        return builtFunctions;
+    }
+
+    private void checkType(Token token, Token.Type expected) throws ParseException {
+        if(!token.getType().equals(expected))
+            throw new ParseException("Expected " + expected + " but found " + token.getType() + ": " + token.getStart());
+    }
+}
@@ -0,0 +1,19 @@
+package com.dfsek.terra.api.structures.parser.exceptions;
+
+public class ParseException extends Exception {
+    public ParseException() {
+        super();
+    }
+
+    public ParseException(String message) {
+        super(message);
+    }
+
+    public ParseException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public ParseException(Throwable cause) {
+        super(cause);
+    }
+}
@@ -83,6 +83,14 @@ public class Lookahead {
        } else return buffer.get(ahead);
    }

+    public int getLine() {
+        return line;
+    }
+
+    public int getIndex() {
+        return index;
+    }
+
    /**
     * Consume an amount of characters
     *
@@ -8,4 +8,9 @@ public class Position {
        this.line = line;
        this.index = index;
    }
+
+    @Override
+    public String toString() {
+        return (line + 1) + ":" + (index + 1);
+    }
 }
@@ -3,10 +3,12 @@ package com.dfsek.terra.api.structures.tokenizer;
 public class Token {
    private final String content;
    private final Type type;
+    private final Position start;

-    public Token(String content, Type type) {
+    public Token(String content, Type type, Position start) {
        this.content = content;
        this.type = type;
+        this.start = start;
    }

    public Type getType() {
@@ -17,6 +19,10 @@ public class Token {
        return content;
    }

+    public Position getStart() {
+        return start;
+    }
+
    @Override
    public String toString() {
        return type + ": '" + content + "'";
@@ -1,5 +0,0 @@
-package com.dfsek.terra.api.structures.tokenizer;
-
-public class TokenizedStatement {
-
-}
@@ -1,13 +1,11 @@
 package com.dfsek.terra.api.structures.tokenizer;

 import com.dfsek.terra.api.structures.tokenizer.exceptions.EOFException;
+import com.dfsek.terra.api.structures.tokenizer.exceptions.FormatException;
 import com.dfsek.terra.api.structures.tokenizer.exceptions.TokenizerException;
-import com.dfsek.terra.api.structures.tokenizer.group.Group;
-import com.dfsek.terra.api.util.GlueList;
 import com.google.common.collect.Sets;

 import java.io.StringReader;
-import java.util.List;
 import java.util.Set;

 public class Tokenizer {
@@ -20,46 +18,47 @@ public class Tokenizer {
        reader = new Lookahead(new StringReader(data + '\0'));
    }

-    public List<TokenizedStatement> tokenize() {
-        List<TokenizedStatement> tokens = new GlueList<>();
-        while(reader.current().isEOF()) {
-            Char c = reader.current();
-        }
-
-        return tokens;
+    public boolean hasNext() {
+        while(!reader.current().isEOF() && reader.current().isWhitespace()) reader.consume(); // Consume whitespace.
+        return !reader.current().isEOF();
    }

    public Token fetch() throws TokenizerException {
-
        while(!reader.current().isEOF() && reader.current().isWhitespace()) reader.consume();
        if(reader.current().isEOF()) return null; // EOF

        if(reader.matches("//", true)) skipLine(); // Skip line if comment

-        if(reader.matches("/*", true)) skipTo("*/");
+        if(reader.matches("/*", true)) skipTo("*/"); // Skip multi line comment

        if(isNumberStart()) {
            StringBuilder num = new StringBuilder();
            while(!reader.current().isEOF() && isNumberLike()) {
                num.append(reader.consume());
            }
-            return new Token(num.toString(), Token.Type.NUMBER);
+            return new Token(num.toString(), Token.Type.NUMBER, new Position(reader.getLine(), reader.getIndex()));
        }

        if(reader.current().is('"')) {
            reader.consume(); // Consume first quote
            StringBuilder string = new StringBuilder();
-            while(!reader.current().isEOF() && !reader.current().is('"')) {
+            while(!reader.current().is('"')) {
+                if(reader.current().isEOF())
+                    throw new FormatException("No end of string literal found. " + reader.getLine() + ":" + reader.getIndex());
                string.append(reader.consume());
            }
            reader.consume(); // Consume last quote
-            return new Token(string.toString(), Token.Type.STRING);
+            return new Token(string.toString(), Token.Type.STRING, new Position(reader.getLine(), reader.getIndex()));
        }

-        if(reader.current().is('(')) return new Token(reader.consume().toString(), Token.Type.BODY_BEGIN);
-        if(reader.current().is(')')) return new Token(reader.consume().toString(), Token.Type.BODY_END);
-        if(reader.current().is(';')) return new Token(reader.consume().toString(), Token.Type.STATEMENT_END);
-        if(reader.current().is(',')) return new Token(reader.consume().toString(), Token.Type.SEPARATOR);
+        if(reader.current().is('('))
+            return new Token(reader.consume().toString(), Token.Type.BODY_BEGIN, new Position(reader.getLine(), reader.getIndex()));
+        if(reader.current().is(')'))
+            return new Token(reader.consume().toString(), Token.Type.BODY_END, new Position(reader.getLine(), reader.getIndex()));
+        if(reader.current().is(';'))
+            return new Token(reader.consume().toString(), Token.Type.STATEMENT_END, new Position(reader.getLine(), reader.getIndex()));
+        if(reader.current().is(','))
+            return new Token(reader.consume().toString(), Token.Type.SEPARATOR, new Position(reader.getLine(), reader.getIndex()));

        StringBuilder token = new StringBuilder();
        while(!reader.current().isEOF() && !isSyntaxSignificant(reader.current().getCharacter())) {
@@ -67,7 +66,7 @@ public class Tokenizer {
            if(!c.isWhitespace()) token.append(c);
        }

-        return new Token(token.toString(), Token.Type.IDENTIFIER);
+        return new Token(token.toString(), Token.Type.IDENTIFIER, new Position(reader.getLine(), reader.getIndex()));
    }

    private boolean isNumberLike() {
@@ -94,21 +93,6 @@ public class Tokenizer {
        throw new EOFException("No end of expression found.");
    }

-    /**
-     * Read to the end of a group, consuming all
-     *
-     * @param g
-     * @return
-     */
-    private String readToEndOfGroup(Group g) {
-        StringBuilder builder = new StringBuilder();
-        do {
-            Char current = reader.consume();
-
-        } while(reader.current().getCharacter() != g.getEnd());
-        return builder.toString();
-    }
-
    public boolean isSyntaxSignificant(char c) {
        return syntaxSignificant.contains(c);
    }
@@ -1,18 +0,0 @@
-package com.dfsek.terra.api.structures.tokenizer.group;
-
-public class Brackets implements Group {
-    @Override
-    public char getBegin() {
-        return '[';
-    }
-
-    @Override
-    public char getEnd() {
-        return ']';
-    }
-
-    @Override
-    public boolean ignoreInsideSyntax() {
-        return false;
-    }
-}
@@ -1,9 +0,0 @@
-package com.dfsek.terra.api.structures.tokenizer.group;
-
-public interface Group {
-    char getBegin();
-
-    char getEnd();
-
-    boolean ignoreInsideSyntax();
-}
@@ -1,18 +0,0 @@
-package com.dfsek.terra.api.structures.tokenizer.group;
-
-public class LineComment implements Group {
-    @Override
-    public char getBegin() {
-        return '#';
-    }
-
-    @Override
-    public char getEnd() {
-        return '\n';
-    }
-
-    @Override
-    public boolean ignoreInsideSyntax() {
-        return true;
-    }
-}
@@ -1,18 +0,0 @@
-package com.dfsek.terra.api.structures.tokenizer.group;
-
-public class Parentheses implements Group {
-    @Override
-    public char getBegin() {
-        return '(';
-    }
-
-    @Override
-    public char getEnd() {
-        return ')';
-    }
-
-    @Override
-    public boolean ignoreInsideSyntax() {
-        return false;
-    }
-}
@@ -1,19 +0,0 @@
-package com.dfsek.terra.api.structures.tokenizer.group;
-
-public class Quotes implements Group {
-
-    @Override
-    public char getBegin() {
-        return '"';
-    }
-
-    @Override
-    public char getEnd() {
-        return '"';
-    }
-
-    @Override
-    public boolean ignoreInsideSyntax() {
-        return true;
-    }
-}