working tokenizer

2026-04-08 16:56:07 +00:00 · 2020-12-19 01:50:56 -07:00
parent 1ce884d1c7
commit 29e2746e72
29 changed files with 402 additions and 40 deletions
--- a/common/src/main/java/com/dfsek/terra/CommandHandler.java
+++ b/common/src/main/java/com/dfsek/terra/CommandHandler.java
@@ -0,0 +1,4 @@
+package com.dfsek.terra;
+
+public interface CommandHandler {
+}
--- a/common/src/main/java/com/dfsek/terra/api/platform/package-info.java
+++ b/common/src/main/java/com/dfsek/terra/api/platform/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * API for platform implementations. Mostly interfaces to be implemented by platform delegates.
+ */
+package com.dfsek.terra.api.platform;
--- a/common/src/main/java/com/dfsek/terra/api/structures/Argument.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/Argument.java
@@ -0,0 +1,4 @@
+package com.dfsek.terra.api.structures;
+
+public interface Argument {
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/Function.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/Function.java
@@ -0,0 +1,11 @@
+package com.dfsek.terra.api.structures;
+
+import java.util.List;
+
+public interface Function {
+    void apply();
+
+    String name();
+
+    List<Argument> getArguments();
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/Parser.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/Parser.java
@@ -0,0 +1,4 @@
+package com.dfsek.terra.api.structures;
+
+public class Parser {
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Char.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Char.java
@@ -0,0 +1,56 @@
+package com.dfsek.terra.api.structures.tokenizer;
+
+public class Char {
+    private final char character;
+    private final int index;
+    private final int line;
+
+
+    public Char(char character, int index, int line) {
+        this.character = character;
+        this.index = index;
+        this.line = line;
+    }
+
+    public char getCharacter() {
+        return character;
+    }
+
+    public int getIndex() {
+        return index;
+    }
+
+    public int getLine() {
+        return line;
+    }
+
+    public boolean isWhitespace() {
+        return Character.isWhitespace(character);
+    }
+
+    public boolean isNewLine() {
+        return character == '\n';
+    }
+
+    public boolean isDigit() {
+        return Character.isDigit(character);
+    }
+
+    public boolean is(char... tests) {
+        for(char test : tests) {
+            if(test == character && test != '\0') {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    public boolean isEOF() {
+        return character == '\0';
+    }
+
+    @Override
+    public String toString() {
+        return Character.toString(character);
+    }
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Lookahead.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Lookahead.java
@@ -0,0 +1,113 @@
+package com.dfsek.terra.api.structures.tokenizer;
+
+import com.dfsek.terra.api.util.GlueList;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.List;
+
+/**
+ * Stream-like data structure that allows viewing future elements without consuming current.
+ */
+public class Lookahead {
+    private final List<Char> buffer = new GlueList<>();
+    private final Reader input;
+    private int index = 0;
+    private int line = 0;
+    private boolean end = false;
+
+    public Lookahead(Reader r) {
+        this.input = r;
+    }
+
+    /**
+     * Get the current character without consuming it.
+     *
+     * @return current character
+     */
+    public Char current() {
+        return next(0);
+    }
+
+
+    /**
+     * Consume and return one character.
+     *
+     * @return Character that was consumed.
+     */
+    public Char consume() {
+        Char consumed = current();
+        consume(1);
+        return consumed;
+    }
+
+    /**
+     * Fetch the next character.
+     *
+     * @return Next character
+     */
+    private Char fetch() {
+        try {
+            int c = input.read();
+            if(c == -1) return null;
+            if(c == '\n') {
+                line++;
+                index = 0;
+            }
+            index++;
+            return new Char((char) c, line, index);
+        } catch(IOException e) {
+            e.printStackTrace();
+            return null;
+        }
+    }
+
+    /**
+     * Fetch a future character without consuming it.
+     *
+     * @param ahead Distance ahead to peek
+     * @return Character
+     */
+    public Char next(int ahead) {
+        if(ahead < 0) throw new IllegalArgumentException();
+
+        while(buffer.size() <= ahead && !end) {
+            Char item = fetch();
+            if(item != null) {
+                buffer.add(item);
+            } else end = true;
+        }
+
+        if(ahead >= buffer.size()) {
+            return null;
+        } else return buffer.get(ahead);
+    }
+
+    /**
+     * Consume an amount of characters
+     *
+     * @param amount Number of characters to consume
+     */
+    public void consume(int amount) {
+        if(amount < 0) throw new IllegalArgumentException();
+        while(amount-- > 0) {
+            if(!buffer.isEmpty()) buffer.remove(0); // Remove top item from buffer.
+            else {
+                if(end) return;
+                Char item = fetch();
+                if(item == null) end = true;
+            }
+        }
+    }
+
+    public boolean matches(String check, boolean consume) {
+        if(check == null) return false;
+
+        for(int i = 0; i < check.length(); i++) {
+            if(!next(i).is(check.charAt(i))) return false;
+        }
+
+        if(consume) consume(check.length()); // Consume string
+        return true;
+    }
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Position.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Position.java
@@ -0,0 +1,11 @@
+package com.dfsek.terra.api.structures.tokenizer;
+
+public class Position {
+    private final int line;
+    private final int index;
+
+    public Position(int line, int index) {
+        this.line = line;
+        this.index = index;
+    }
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Token.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Token.java
@@ -0,0 +1,28 @@
+package com.dfsek.terra.api.structures.tokenizer;
+
+public class Token {
+    private final String content;
+    private final Type type;
+
+    public Token(String content, Type type) {
+        this.content = content;
+        this.type = type;
+    }
+
+    public Type getType() {
+        return type;
+    }
+
+    public String getContent() {
+        return content;
+    }
+
+    @Override
+    public String toString() {
+        return type + ": '" + content + "'";
+    }
+
+    public enum Type {
+        IDENTIFIER, NUMBER, STRING, BOOLEAN, BODY_BEGIN, BODY_END, STATEMENT_END, SEPARATOR
+    }
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/TokenizedStatement.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/TokenizedStatement.java
@@ -0,0 +1,5 @@
+package com.dfsek.terra.api.structures.tokenizer;
+
+public class TokenizedStatement {
+
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Tokenizer.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Tokenizer.java
@@ -0,0 +1,116 @@
+package com.dfsek.terra.api.structures.tokenizer;
+
+import com.dfsek.terra.api.structures.tokenizer.exceptions.EOFException;
+import com.dfsek.terra.api.structures.tokenizer.exceptions.TokenizerException;
+import com.dfsek.terra.api.structures.tokenizer.group.Group;
+import com.dfsek.terra.api.util.GlueList;
+import com.google.common.collect.Sets;
+
+import java.io.StringReader;
+import java.util.List;
+import java.util.Set;
+
+public class Tokenizer {
+    private final Lookahead reader;
+
+    private final Set<Character> syntaxSignificant = Sets.newHashSet(';', '(', ')', '"', '[', ']', ',');
+
+
+    public Tokenizer(String data) {
+        reader = new Lookahead(new StringReader(data + '\0'));
+    }
+
+    public List<TokenizedStatement> tokenize() {
+        List<TokenizedStatement> tokens = new GlueList<>();
+        while(reader.current().isEOF()) {
+            Char c = reader.current();
+        }
+
+        return tokens;
+    }
+
+    public Token fetch() throws TokenizerException {
+
+        while(!reader.current().isEOF() && reader.current().isWhitespace()) reader.consume();
+        if(reader.current().isEOF()) return null; // EOF
+
+        if(reader.matches("//", true)) skipLine(); // Skip line if comment
+
+        if(reader.matches("/*", true)) skipTo("*/");
+
+        if(isNumberStart()) {
+            StringBuilder num = new StringBuilder();
+            while(!reader.current().isEOF() && isNumberLike()) {
+                num.append(reader.consume());
+            }
+            return new Token(num.toString(), Token.Type.NUMBER);
+        }
+
+        if(reader.current().is('"')) {
+            reader.consume(); // Consume first quote
+            StringBuilder string = new StringBuilder();
+            while(!reader.current().isEOF() && !reader.current().is('"')) {
+                string.append(reader.consume());
+            }
+            reader.consume(); // Consume last quote
+            return new Token(string.toString(), Token.Type.STRING);
+        }
+
+        if(reader.current().is('(')) return new Token(reader.consume().toString(), Token.Type.BODY_BEGIN);
+        if(reader.current().is(')')) return new Token(reader.consume().toString(), Token.Type.BODY_END);
+        if(reader.current().is(';')) return new Token(reader.consume().toString(), Token.Type.STATEMENT_END);
+        if(reader.current().is(',')) return new Token(reader.consume().toString(), Token.Type.SEPARATOR);
+
+        StringBuilder token = new StringBuilder();
+        while(!reader.current().isEOF() && !isSyntaxSignificant(reader.current().getCharacter())) {
+            Char c = reader.consume();
+            if(!c.isWhitespace()) token.append(c);
+        }
+
+        return new Token(token.toString(), Token.Type.IDENTIFIER);
+    }
+
+    private boolean isNumberLike() {
+        return reader.current().isDigit()
+                || reader.current().is('_', '.', '-', 'E');
+    }
+
+    private boolean isNumberStart() {
+        return reader.current().isDigit()
+                || reader.current().is('-') && reader.next(1).isDigit()
+                || reader.current().is('-') && reader.next(1).is('.') && reader.next(2).isDigit()
+                || reader.current().is('.') && reader.next(1).isDigit();
+    }
+
+    private void skipLine() {
+        while(!reader.current().isEOF() && !reader.current().isNewLine()) reader.consume();
+    }
+
+    private void skipTo(String s) throws EOFException {
+        while(!reader.current().isEOF()) {
+            if(reader.matches(s, true)) return;
+            reader.consume();
+        }
+        throw new EOFException("No end of expression found.");
+    }
+
+    /**
+     * Read to the end of a group, consuming all
+     *
+     * @param g
+     * @return
+     */
+    private String readToEndOfGroup(Group g) {
+        StringBuilder builder = new StringBuilder();
+        do {
+            Char current = reader.consume();
+
+        } while(reader.current().getCharacter() != g.getEnd());
+        return builder.toString();
+    }
+
+    public boolean isSyntaxSignificant(char c) {
+        return syntaxSignificant.contains(c);
+    }
+
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/EOFException.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/EOFException.java
@@ -0,0 +1,20 @@
+package com.dfsek.terra.api.structures.tokenizer.exceptions;
+
+public class EOFException extends TokenizerException {
+
+    public EOFException(String s) {
+        super(s);
+    }
+
+    public EOFException() {
+        super();
+    }
+
+    public EOFException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public EOFException(Throwable cause) {
+        super(cause);
+    }
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/FormatException.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/FormatException.java
@@ -0,0 +1,20 @@
+package com.dfsek.terra.api.structures.tokenizer.exceptions;
+
+public class FormatException extends TokenizerException {
+
+    public FormatException(String s) {
+        super(s);
+    }
+
+    public FormatException() {
+        super();
+    }
+
+    public FormatException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public FormatException(Throwable cause) {
+        super(cause);
+    }
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/TokenizerException.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/TokenizerException.java
@@ -0,0 +1,19 @@
+package com.dfsek.terra.api.structures.tokenizer.exceptions;
+
+public abstract class TokenizerException extends Exception {
+    public TokenizerException(String s) {
+        super(s);
+    }
+
+    public TokenizerException() {
+        super();
+    }
+
+    public TokenizerException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public TokenizerException(Throwable cause) {
+        super(cause);
+    }
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Brackets.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Brackets.java
@@ -0,0 +1,18 @@
+package com.dfsek.terra.api.structures.tokenizer.group;
+
+public class Brackets implements Group {
+    @Override
+    public char getBegin() {
+        return '[';
+    }
+
+    @Override
+    public char getEnd() {
+        return ']';
+    }
+
+    @Override
+    public boolean ignoreInsideSyntax() {
+        return false;
+    }
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Group.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Group.java
@@ -0,0 +1,9 @@
+package com.dfsek.terra.api.structures.tokenizer.group;
+
+public interface Group {
+    char getBegin();
+
+    char getEnd();
+
+    boolean ignoreInsideSyntax();
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/LineComment.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/LineComment.java
@@ -0,0 +1,18 @@
+package com.dfsek.terra.api.structures.tokenizer.group;
+
+public class LineComment implements Group {
+    @Override
+    public char getBegin() {
+        return '#';
+    }
+
+    @Override
+    public char getEnd() {
+        return '\n';
+    }
+
+    @Override
+    public boolean ignoreInsideSyntax() {
+        return true;
+    }
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Parentheses.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Parentheses.java
@@ -0,0 +1,18 @@
+package com.dfsek.terra.api.structures.tokenizer.group;
+
+public class Parentheses implements Group {
+    @Override
+    public char getBegin() {
+        return '(';
+    }
+
+    @Override
+    public char getEnd() {
+        return ')';
+    }
+
+    @Override
+    public boolean ignoreInsideSyntax() {
+        return false;
+    }
+}
--- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Quotes.java
+++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/group/Quotes.java
@@ -0,0 +1,19 @@
+package com.dfsek.terra.api.structures.tokenizer.group;
+
+public class Quotes implements Group {
+
+    @Override
+    public char getBegin() {
+        return '"';
+    }
+
+    @Override
+    public char getEnd() {
+        return '"';
+    }
+
+    @Override
+    public boolean ignoreInsideSyntax() {
+        return true;
+    }
+}
--- a/common/src/main/java/com/dfsek/terra/command/Command.java
+++ b/common/src/main/java/com/dfsek/terra/command/Command.java
@@ -0,0 +1,5 @@
+package com.dfsek.terra.command;
+
+public class Command {
+
+}