Working parser/tokenizer

This commit is contained in:
dfsek
2020-12-19 20:04:58 -07:00
parent 29e2746e72
commit f970838ecf
21 changed files with 263 additions and 161 deletions

View File

@@ -1,4 +0,0 @@
package com.dfsek.terra.api.structures;
public interface Argument {
}

View File

@@ -1,11 +0,0 @@
package com.dfsek.terra.api.structures;
import java.util.List;
public interface Function {
void apply();
String name();
List<Argument> getArguments();
}

View File

@@ -1,4 +0,0 @@
package com.dfsek.terra.api.structures;
public class Parser {
}

View File

@@ -0,0 +1,5 @@
package com.dfsek.terra.api.structures.parser;
public interface Argument<T> {
T parse(String input);
}

View File

@@ -0,0 +1,12 @@
package com.dfsek.terra.api.structures.parser;
import com.dfsek.terra.api.math.vector.Location;
import com.dfsek.terra.api.platform.world.Chunk;
public interface Function {
void apply(Location location);
void apply(Location location, Chunk chunk);
String name();
}

View File

@@ -0,0 +1,11 @@
package com.dfsek.terra.api.structures.parser;
import com.dfsek.terra.api.structures.parser.exceptions.ParseException;
import java.util.List;
public interface FunctionBuilder<T extends Function> {
T build(List<String> argumentList) throws ParseException;
List<Argument<?>> getArguments();
}

View File

@@ -0,0 +1,96 @@
package com.dfsek.terra.api.structures.parser;
import com.dfsek.terra.api.structures.parser.exceptions.ParseException;
import com.dfsek.terra.api.structures.tokenizer.Token;
import com.dfsek.terra.api.structures.tokenizer.Tokenizer;
import com.dfsek.terra.api.structures.tokenizer.exceptions.TokenizerException;
import com.dfsek.terra.api.util.GlueList;
import com.google.common.collect.Sets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
public class Parser {
private final String data;
private final Map<String, FunctionBuilder<? extends Function>> functions = new HashMap<>();
Set<Token.Type> allowedArguments = Sets.newHashSet(Token.Type.STRING, Token.Type.NUMBER, Token.Type.IDENTIFIER);
public Parser(String data) {
this.data = data;
}
public Parser addFunction(String name, FunctionBuilder<? extends Function> functionBuilder) {
functions.put(name, functionBuilder);
return this;
}
public List<Function> parse() throws ParseException {
Tokenizer tokenizer = new Tokenizer(data);
List<Function> builtFunctions = new GlueList<>();
List<Token> functionBuilder = new GlueList<>();
Token token = null;
while(tokenizer.hasNext()) {
try {
token = tokenizer.fetch();
functionBuilder.add(token);
if(token.getType().equals(Token.Type.STATEMENT_END)) {
Token identifier = functionBuilder.remove(0);
checkType(identifier, Token.Type.IDENTIFIER); // First token must be identifier
if(!functions.containsKey(identifier.getContent()))
throw new ParseException("No such function " + identifier.getContent() + ": " + identifier.getStart());
checkType(functionBuilder.remove(0), Token.Type.BODY_BEGIN); // Second is body begin
boolean expectingSeparator = false;
List<Token> args = new GlueList<>();
while(!functionBuilder.get(0).getType().equals(Token.Type.BODY_END)) {
Token current = functionBuilder.remove(0);
if(expectingSeparator) {
checkType(current, Token.Type.SEPARATOR);
expectingSeparator = false;
} else {
if(!allowedArguments.contains(current.getType()))
throw new ParseException("Token type " + current.getType() + " not allowed in arguments: " + current.getStart());
args.add(current);
expectingSeparator = true;
}
}
functionBuilder.remove(0); // Remove body end
for(Token t : args) System.out.println("TOKEN: " + t);
checkType(functionBuilder.remove(0), Token.Type.STATEMENT_END);
List<String> arg = args.stream().map(Token::getContent).collect(Collectors.toList());
for(String s : arg) System.out.println("ARG: " + s);
FunctionBuilder<?> builder = functions.get(identifier.getContent());
if(arg.size() != builder.getArguments().size())
throw new ParseException("Expected " + builder.getArguments().size() + " arguments, found " + arg.size() + ": " + identifier.getStart());
builtFunctions.add(functions.get(identifier.getContent()).build(arg));
functionBuilder.clear();
}
} catch(TokenizerException e) {
throw new ParseException("Failed to tokenize input", e);
}
}
if(token != null) checkType(token, Token.Type.STATEMENT_END);
return builtFunctions;
}
private void checkType(Token token, Token.Type expected) throws ParseException {
if(!token.getType().equals(expected))
throw new ParseException("Expected " + expected + " but found " + token.getType() + ": " + token.getStart());
}
}

View File

@@ -0,0 +1,19 @@
package com.dfsek.terra.api.structures.parser.exceptions;
public class ParseException extends Exception {
public ParseException() {
super();
}
public ParseException(String message) {
super(message);
}
public ParseException(String message, Throwable cause) {
super(message, cause);
}
public ParseException(Throwable cause) {
super(cause);
}
}

View File

@@ -83,6 +83,14 @@ public class Lookahead {
} else return buffer.get(ahead);
}
public int getLine() {
return line;
}
public int getIndex() {
return index;
}
/**
* Consume an amount of characters
*

View File

@@ -8,4 +8,9 @@ public class Position {
this.line = line;
this.index = index;
}
@Override
public String toString() {
return (line + 1) + ":" + (index + 1);
}
}

View File

@@ -3,10 +3,12 @@ package com.dfsek.terra.api.structures.tokenizer;
public class Token {
private final String content;
private final Type type;
private final Position start;
public Token(String content, Type type) {
public Token(String content, Type type, Position start) {
this.content = content;
this.type = type;
this.start = start;
}
public Type getType() {
@@ -17,6 +19,10 @@ public class Token {
return content;
}
public Position getStart() {
return start;
}
@Override
public String toString() {
return type + ": '" + content + "'";

View File

@@ -1,5 +0,0 @@
package com.dfsek.terra.api.structures.tokenizer;
public class TokenizedStatement {
}

View File

@@ -1,13 +1,11 @@
package com.dfsek.terra.api.structures.tokenizer;
import com.dfsek.terra.api.structures.tokenizer.exceptions.EOFException;
import com.dfsek.terra.api.structures.tokenizer.exceptions.FormatException;
import com.dfsek.terra.api.structures.tokenizer.exceptions.TokenizerException;
import com.dfsek.terra.api.structures.tokenizer.group.Group;
import com.dfsek.terra.api.util.GlueList;
import com.google.common.collect.Sets;
import java.io.StringReader;
import java.util.List;
import java.util.Set;
public class Tokenizer {
@@ -20,46 +18,47 @@ public class Tokenizer {
reader = new Lookahead(new StringReader(data + '\0'));
}
public List<TokenizedStatement> tokenize() {
List<TokenizedStatement> tokens = new GlueList<>();
while(reader.current().isEOF()) {
Char c = reader.current();
}
return tokens;
public boolean hasNext() {
while(!reader.current().isEOF() && reader.current().isWhitespace()) reader.consume(); // Consume whitespace.
return !reader.current().isEOF();
}
public Token fetch() throws TokenizerException {
while(!reader.current().isEOF() && reader.current().isWhitespace()) reader.consume();
if(reader.current().isEOF()) return null; // EOF
if(reader.matches("//", true)) skipLine(); // Skip line if comment
if(reader.matches("/*", true)) skipTo("*/");
if(reader.matches("/*", true)) skipTo("*/"); // Skip multi line comment
if(isNumberStart()) {
StringBuilder num = new StringBuilder();
while(!reader.current().isEOF() && isNumberLike()) {
num.append(reader.consume());
}
return new Token(num.toString(), Token.Type.NUMBER);
return new Token(num.toString(), Token.Type.NUMBER, new Position(reader.getLine(), reader.getIndex()));
}
if(reader.current().is('"')) {
reader.consume(); // Consume first quote
StringBuilder string = new StringBuilder();
while(!reader.current().isEOF() && !reader.current().is('"')) {
while(!reader.current().is('"')) {
if(reader.current().isEOF())
throw new FormatException("No end of string literal found. " + reader.getLine() + ":" + reader.getIndex());
string.append(reader.consume());
}
reader.consume(); // Consume last quote
return new Token(string.toString(), Token.Type.STRING);
return new Token(string.toString(), Token.Type.STRING, new Position(reader.getLine(), reader.getIndex()));
}
if(reader.current().is('(')) return new Token(reader.consume().toString(), Token.Type.BODY_BEGIN);
if(reader.current().is(')')) return new Token(reader.consume().toString(), Token.Type.BODY_END);
if(reader.current().is(';')) return new Token(reader.consume().toString(), Token.Type.STATEMENT_END);
if(reader.current().is(',')) return new Token(reader.consume().toString(), Token.Type.SEPARATOR);
if(reader.current().is('('))
return new Token(reader.consume().toString(), Token.Type.BODY_BEGIN, new Position(reader.getLine(), reader.getIndex()));
if(reader.current().is(')'))
return new Token(reader.consume().toString(), Token.Type.BODY_END, new Position(reader.getLine(), reader.getIndex()));
if(reader.current().is(';'))
return new Token(reader.consume().toString(), Token.Type.STATEMENT_END, new Position(reader.getLine(), reader.getIndex()));
if(reader.current().is(','))
return new Token(reader.consume().toString(), Token.Type.SEPARATOR, new Position(reader.getLine(), reader.getIndex()));
StringBuilder token = new StringBuilder();
while(!reader.current().isEOF() && !isSyntaxSignificant(reader.current().getCharacter())) {
@@ -67,7 +66,7 @@ public class Tokenizer {
if(!c.isWhitespace()) token.append(c);
}
return new Token(token.toString(), Token.Type.IDENTIFIER);
return new Token(token.toString(), Token.Type.IDENTIFIER, new Position(reader.getLine(), reader.getIndex()));
}
private boolean isNumberLike() {
@@ -94,21 +93,6 @@ public class Tokenizer {
throw new EOFException("No end of expression found.");
}
/**
* Read to the end of a group, consuming all
*
* @param g
* @return
*/
private String readToEndOfGroup(Group g) {
StringBuilder builder = new StringBuilder();
do {
Char current = reader.consume();
} while(reader.current().getCharacter() != g.getEnd());
return builder.toString();
}
public boolean isSyntaxSignificant(char c) {
return syntaxSignificant.contains(c);
}

View File

@@ -1,18 +0,0 @@
package com.dfsek.terra.api.structures.tokenizer.group;
public class Brackets implements Group {
@Override
public char getBegin() {
return '[';
}
@Override
public char getEnd() {
return ']';
}
@Override
public boolean ignoreInsideSyntax() {
return false;
}
}

View File

@@ -1,9 +0,0 @@
package com.dfsek.terra.api.structures.tokenizer.group;
public interface Group {
char getBegin();
char getEnd();
boolean ignoreInsideSyntax();
}

View File

@@ -1,18 +0,0 @@
package com.dfsek.terra.api.structures.tokenizer.group;
public class LineComment implements Group {
@Override
public char getBegin() {
return '#';
}
@Override
public char getEnd() {
return '\n';
}
@Override
public boolean ignoreInsideSyntax() {
return true;
}
}

View File

@@ -1,18 +0,0 @@
package com.dfsek.terra.api.structures.tokenizer.group;
public class Parentheses implements Group {
@Override
public char getBegin() {
return '(';
}
@Override
public char getEnd() {
return ')';
}
@Override
public boolean ignoreInsideSyntax() {
return false;
}
}

View File

@@ -1,19 +0,0 @@
package com.dfsek.terra.api.structures.tokenizer.group;
public class Quotes implements Group {
@Override
public char getBegin() {
return '"';
}
@Override
public char getEnd() {
return '"';
}
@Override
public boolean ignoreInsideSyntax() {
return true;
}
}

View File

@@ -0,0 +1,73 @@
package structure;
import com.dfsek.terra.api.math.vector.Location;
import com.dfsek.terra.api.platform.world.Chunk;
import com.dfsek.terra.api.structures.parser.Argument;
import com.dfsek.terra.api.structures.parser.Function;
import com.dfsek.terra.api.structures.parser.FunctionBuilder;
import com.dfsek.terra.api.structures.parser.Parser;
import com.dfsek.terra.api.structures.parser.exceptions.ParseException;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
public class ParserTest {
@Test
public void parse() throws IOException, ParseException {
Parser parser = new Parser(IOUtils.toString(getClass().getResourceAsStream("/test.tesf")));
parser.addFunction("test", new FunctionBuilder<Test1>() {
@Override
public Test1 build(List<String> argumentList) throws ParseException {
return new Test1(argumentList.get(0), Double.parseDouble(argumentList.get(1)));
}
@Override
public List<Argument<?>> getArguments() {
return Arrays.asList(id -> id, Double::parseDouble);
}
});
List<Function> functions = parser.parse();
for(Function f : functions) {
System.out.println(f);
}
}
private static class Test1 implements Function {
private final String a;
private final double b;
public Test1(String a, double b) {
this.a = a;
this.b = b;
}
public String getA() {
return a;
}
public double getB() {
return b;
}
@Override
public void apply(Location location) {
}
@Override
public void apply(Location location, Chunk chunk) {
}
@Override
public String name() {
return null;
}
}
}

View File

@@ -12,12 +12,15 @@ public class TokenizerTest {
@Test
public void tokens() throws IOException, TokenizerException {
Tokenizer tokenizer = new Tokenizer(IOUtils.toString(getClass().getResourceAsStream("/test.tesf")));
// Actual run
long l = System.nanoTime();
for(int i = 0; i < 100; i++) {
while(tokenizer.hasNext()) {
Token t = tokenizer.fetch();
if(t == null) break;
System.out.println(t);
}
System.out.println((double) (System.nanoTime() - l) / 1000000);
}
}

View File

@@ -1,18 +1,4 @@
FUNCTION(1, "hello");
test("hello", 1);
;
)
test("ghgjhgjhgj", 3.4);
FUNCTION2(1.2, "he423llo");
// comment
FUNCTION3(3.4, "35234523452345");
/*
block comment
fsfsdf
gsdfgsdfg
*/
FUNCTION(1, "hello");