mirror of
https://github.com/PolyhedralDev/Terra.git
synced 2026-04-08 16:56:07 +00:00
Working parser/tokenizer
This commit is contained in:
@@ -1,4 +0,0 @@
|
||||
package com.dfsek.terra.api.structures;
|
||||
|
||||
public interface Argument {
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
package com.dfsek.terra.api.structures;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public interface Function {
|
||||
void apply();
|
||||
|
||||
String name();
|
||||
|
||||
List<Argument> getArguments();
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
package com.dfsek.terra.api.structures;
|
||||
|
||||
public class Parser {
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
package com.dfsek.terra.api.structures.parser;
|
||||
|
||||
public interface Argument<T> {
|
||||
T parse(String input);
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package com.dfsek.terra.api.structures.parser;
|
||||
|
||||
import com.dfsek.terra.api.math.vector.Location;
|
||||
import com.dfsek.terra.api.platform.world.Chunk;
|
||||
|
||||
public interface Function {
|
||||
void apply(Location location);
|
||||
|
||||
void apply(Location location, Chunk chunk);
|
||||
|
||||
String name();
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.dfsek.terra.api.structures.parser;
|
||||
|
||||
import com.dfsek.terra.api.structures.parser.exceptions.ParseException;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public interface FunctionBuilder<T extends Function> {
|
||||
T build(List<String> argumentList) throws ParseException;
|
||||
|
||||
List<Argument<?>> getArguments();
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
package com.dfsek.terra.api.structures.parser;
|
||||
|
||||
import com.dfsek.terra.api.structures.parser.exceptions.ParseException;
|
||||
import com.dfsek.terra.api.structures.tokenizer.Token;
|
||||
import com.dfsek.terra.api.structures.tokenizer.Tokenizer;
|
||||
import com.dfsek.terra.api.structures.tokenizer.exceptions.TokenizerException;
|
||||
import com.dfsek.terra.api.util.GlueList;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class Parser {
|
||||
private final String data;
|
||||
private final Map<String, FunctionBuilder<? extends Function>> functions = new HashMap<>();
|
||||
Set<Token.Type> allowedArguments = Sets.newHashSet(Token.Type.STRING, Token.Type.NUMBER, Token.Type.IDENTIFIER);
|
||||
|
||||
public Parser(String data) {
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public Parser addFunction(String name, FunctionBuilder<? extends Function> functionBuilder) {
|
||||
functions.put(name, functionBuilder);
|
||||
return this;
|
||||
}
|
||||
|
||||
public List<Function> parse() throws ParseException {
|
||||
Tokenizer tokenizer = new Tokenizer(data);
|
||||
List<Function> builtFunctions = new GlueList<>();
|
||||
List<Token> functionBuilder = new GlueList<>();
|
||||
Token token = null;
|
||||
while(tokenizer.hasNext()) {
|
||||
try {
|
||||
token = tokenizer.fetch();
|
||||
functionBuilder.add(token);
|
||||
|
||||
if(token.getType().equals(Token.Type.STATEMENT_END)) {
|
||||
Token identifier = functionBuilder.remove(0);
|
||||
checkType(identifier, Token.Type.IDENTIFIER); // First token must be identifier
|
||||
|
||||
if(!functions.containsKey(identifier.getContent()))
|
||||
throw new ParseException("No such function " + identifier.getContent() + ": " + identifier.getStart());
|
||||
|
||||
checkType(functionBuilder.remove(0), Token.Type.BODY_BEGIN); // Second is body begin
|
||||
|
||||
boolean expectingSeparator = false;
|
||||
|
||||
List<Token> args = new GlueList<>();
|
||||
|
||||
while(!functionBuilder.get(0).getType().equals(Token.Type.BODY_END)) {
|
||||
Token current = functionBuilder.remove(0);
|
||||
if(expectingSeparator) {
|
||||
checkType(current, Token.Type.SEPARATOR);
|
||||
expectingSeparator = false;
|
||||
} else {
|
||||
if(!allowedArguments.contains(current.getType()))
|
||||
throw new ParseException("Token type " + current.getType() + " not allowed in arguments: " + current.getStart());
|
||||
args.add(current);
|
||||
expectingSeparator = true;
|
||||
}
|
||||
}
|
||||
|
||||
functionBuilder.remove(0); // Remove body end
|
||||
|
||||
for(Token t : args) System.out.println("TOKEN: " + t);
|
||||
|
||||
checkType(functionBuilder.remove(0), Token.Type.STATEMENT_END);
|
||||
|
||||
List<String> arg = args.stream().map(Token::getContent).collect(Collectors.toList());
|
||||
|
||||
for(String s : arg) System.out.println("ARG: " + s);
|
||||
FunctionBuilder<?> builder = functions.get(identifier.getContent());
|
||||
if(arg.size() != builder.getArguments().size())
|
||||
throw new ParseException("Expected " + builder.getArguments().size() + " arguments, found " + arg.size() + ": " + identifier.getStart());
|
||||
|
||||
builtFunctions.add(functions.get(identifier.getContent()).build(arg));
|
||||
|
||||
functionBuilder.clear();
|
||||
}
|
||||
} catch(TokenizerException e) {
|
||||
throw new ParseException("Failed to tokenize input", e);
|
||||
}
|
||||
|
||||
}
|
||||
if(token != null) checkType(token, Token.Type.STATEMENT_END);
|
||||
return builtFunctions;
|
||||
}
|
||||
|
||||
private void checkType(Token token, Token.Type expected) throws ParseException {
|
||||
if(!token.getType().equals(expected))
|
||||
throw new ParseException("Expected " + expected + " but found " + token.getType() + ": " + token.getStart());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.dfsek.terra.api.structures.parser.exceptions;
|
||||
|
||||
public class ParseException extends Exception {
|
||||
public ParseException() {
|
||||
super();
|
||||
}
|
||||
|
||||
public ParseException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public ParseException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
|
||||
public ParseException(Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
}
|
||||
@@ -83,6 +83,14 @@ public class Lookahead {
|
||||
} else return buffer.get(ahead);
|
||||
}
|
||||
|
||||
public int getLine() {
|
||||
return line;
|
||||
}
|
||||
|
||||
public int getIndex() {
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume an amount of characters
|
||||
*
|
||||
|
||||
@@ -8,4 +8,9 @@ public class Position {
|
||||
this.line = line;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return (line + 1) + ":" + (index + 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,10 +3,12 @@ package com.dfsek.terra.api.structures.tokenizer;
|
||||
public class Token {
|
||||
private final String content;
|
||||
private final Type type;
|
||||
private final Position start;
|
||||
|
||||
public Token(String content, Type type) {
|
||||
public Token(String content, Type type, Position start) {
|
||||
this.content = content;
|
||||
this.type = type;
|
||||
this.start = start;
|
||||
}
|
||||
|
||||
public Type getType() {
|
||||
@@ -17,6 +19,10 @@ public class Token {
|
||||
return content;
|
||||
}
|
||||
|
||||
public Position getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return type + ": '" + content + "'";
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
package com.dfsek.terra.api.structures.tokenizer;
|
||||
|
||||
public class TokenizedStatement {
|
||||
|
||||
}
|
||||
@@ -1,13 +1,11 @@
|
||||
package com.dfsek.terra.api.structures.tokenizer;
|
||||
|
||||
import com.dfsek.terra.api.structures.tokenizer.exceptions.EOFException;
|
||||
import com.dfsek.terra.api.structures.tokenizer.exceptions.FormatException;
|
||||
import com.dfsek.terra.api.structures.tokenizer.exceptions.TokenizerException;
|
||||
import com.dfsek.terra.api.structures.tokenizer.group.Group;
|
||||
import com.dfsek.terra.api.util.GlueList;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
public class Tokenizer {
|
||||
@@ -20,46 +18,47 @@ public class Tokenizer {
|
||||
reader = new Lookahead(new StringReader(data + '\0'));
|
||||
}
|
||||
|
||||
public List<TokenizedStatement> tokenize() {
|
||||
List<TokenizedStatement> tokens = new GlueList<>();
|
||||
while(reader.current().isEOF()) {
|
||||
Char c = reader.current();
|
||||
}
|
||||
|
||||
return tokens;
|
||||
public boolean hasNext() {
|
||||
while(!reader.current().isEOF() && reader.current().isWhitespace()) reader.consume(); // Consume whitespace.
|
||||
return !reader.current().isEOF();
|
||||
}
|
||||
|
||||
public Token fetch() throws TokenizerException {
|
||||
|
||||
while(!reader.current().isEOF() && reader.current().isWhitespace()) reader.consume();
|
||||
if(reader.current().isEOF()) return null; // EOF
|
||||
|
||||
if(reader.matches("//", true)) skipLine(); // Skip line if comment
|
||||
|
||||
if(reader.matches("/*", true)) skipTo("*/");
|
||||
if(reader.matches("/*", true)) skipTo("*/"); // Skip multi line comment
|
||||
|
||||
if(isNumberStart()) {
|
||||
StringBuilder num = new StringBuilder();
|
||||
while(!reader.current().isEOF() && isNumberLike()) {
|
||||
num.append(reader.consume());
|
||||
}
|
||||
return new Token(num.toString(), Token.Type.NUMBER);
|
||||
return new Token(num.toString(), Token.Type.NUMBER, new Position(reader.getLine(), reader.getIndex()));
|
||||
}
|
||||
|
||||
if(reader.current().is('"')) {
|
||||
reader.consume(); // Consume first quote
|
||||
StringBuilder string = new StringBuilder();
|
||||
while(!reader.current().isEOF() && !reader.current().is('"')) {
|
||||
while(!reader.current().is('"')) {
|
||||
if(reader.current().isEOF())
|
||||
throw new FormatException("No end of string literal found. " + reader.getLine() + ":" + reader.getIndex());
|
||||
string.append(reader.consume());
|
||||
}
|
||||
reader.consume(); // Consume last quote
|
||||
return new Token(string.toString(), Token.Type.STRING);
|
||||
return new Token(string.toString(), Token.Type.STRING, new Position(reader.getLine(), reader.getIndex()));
|
||||
}
|
||||
|
||||
if(reader.current().is('(')) return new Token(reader.consume().toString(), Token.Type.BODY_BEGIN);
|
||||
if(reader.current().is(')')) return new Token(reader.consume().toString(), Token.Type.BODY_END);
|
||||
if(reader.current().is(';')) return new Token(reader.consume().toString(), Token.Type.STATEMENT_END);
|
||||
if(reader.current().is(',')) return new Token(reader.consume().toString(), Token.Type.SEPARATOR);
|
||||
if(reader.current().is('('))
|
||||
return new Token(reader.consume().toString(), Token.Type.BODY_BEGIN, new Position(reader.getLine(), reader.getIndex()));
|
||||
if(reader.current().is(')'))
|
||||
return new Token(reader.consume().toString(), Token.Type.BODY_END, new Position(reader.getLine(), reader.getIndex()));
|
||||
if(reader.current().is(';'))
|
||||
return new Token(reader.consume().toString(), Token.Type.STATEMENT_END, new Position(reader.getLine(), reader.getIndex()));
|
||||
if(reader.current().is(','))
|
||||
return new Token(reader.consume().toString(), Token.Type.SEPARATOR, new Position(reader.getLine(), reader.getIndex()));
|
||||
|
||||
StringBuilder token = new StringBuilder();
|
||||
while(!reader.current().isEOF() && !isSyntaxSignificant(reader.current().getCharacter())) {
|
||||
@@ -67,7 +66,7 @@ public class Tokenizer {
|
||||
if(!c.isWhitespace()) token.append(c);
|
||||
}
|
||||
|
||||
return new Token(token.toString(), Token.Type.IDENTIFIER);
|
||||
return new Token(token.toString(), Token.Type.IDENTIFIER, new Position(reader.getLine(), reader.getIndex()));
|
||||
}
|
||||
|
||||
private boolean isNumberLike() {
|
||||
@@ -94,21 +93,6 @@ public class Tokenizer {
|
||||
throw new EOFException("No end of expression found.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Read to the end of a group, consuming all
|
||||
*
|
||||
* @param g
|
||||
* @return
|
||||
*/
|
||||
private String readToEndOfGroup(Group g) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
do {
|
||||
Char current = reader.consume();
|
||||
|
||||
} while(reader.current().getCharacter() != g.getEnd());
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
public boolean isSyntaxSignificant(char c) {
|
||||
return syntaxSignificant.contains(c);
|
||||
}
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
package com.dfsek.terra.api.structures.tokenizer.group;
|
||||
|
||||
public class Brackets implements Group {
|
||||
@Override
|
||||
public char getBegin() {
|
||||
return '[';
|
||||
}
|
||||
|
||||
@Override
|
||||
public char getEnd() {
|
||||
return ']';
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean ignoreInsideSyntax() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
package com.dfsek.terra.api.structures.tokenizer.group;
|
||||
|
||||
public interface Group {
|
||||
char getBegin();
|
||||
|
||||
char getEnd();
|
||||
|
||||
boolean ignoreInsideSyntax();
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
package com.dfsek.terra.api.structures.tokenizer.group;
|
||||
|
||||
public class LineComment implements Group {
|
||||
@Override
|
||||
public char getBegin() {
|
||||
return '#';
|
||||
}
|
||||
|
||||
@Override
|
||||
public char getEnd() {
|
||||
return '\n';
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean ignoreInsideSyntax() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
package com.dfsek.terra.api.structures.tokenizer.group;
|
||||
|
||||
public class Parentheses implements Group {
|
||||
@Override
|
||||
public char getBegin() {
|
||||
return '(';
|
||||
}
|
||||
|
||||
@Override
|
||||
public char getEnd() {
|
||||
return ')';
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean ignoreInsideSyntax() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
package com.dfsek.terra.api.structures.tokenizer.group;
|
||||
|
||||
public class Quotes implements Group {
|
||||
|
||||
@Override
|
||||
public char getBegin() {
|
||||
return '"';
|
||||
}
|
||||
|
||||
@Override
|
||||
public char getEnd() {
|
||||
return '"';
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean ignoreInsideSyntax() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user