From 25ae2b3c9b3cb0916ae60fdca706056599db8e29 Mon Sep 17 00:00:00 2001 From: dfsek Date: Wed, 6 Jan 2021 01:11:13 -0700 Subject: [PATCH] drastically increase script loading speed via optimised token pipeline --- .../terra/api/structures/parser/Parser.java | 53 +++++---------- .../api/structures/parser/TokenHolder.java | 59 ----------------- .../api/structures/tokenizer/Tokenizer.java | 65 +++++++++++++++++-- .../tokenizer/exceptions/EOFException.java | 18 ++--- .../tokenizer/exceptions/FormatException.java | 18 ++--- .../exceptions/TokenizerException.java | 22 +++---- .../test/java/structure/TokenizerTest.java | 26 -------- 7 files changed, 95 insertions(+), 166 deletions(-) delete mode 100644 common/src/main/java/com/dfsek/terra/api/structures/parser/TokenHolder.java delete mode 100644 common/src/test/java/structure/TokenizerTest.java diff --git a/common/src/main/java/com/dfsek/terra/api/structures/parser/Parser.java b/common/src/main/java/com/dfsek/terra/api/structures/parser/Parser.java index 8b948bfee..885ad7f49 100644 --- a/common/src/main/java/com/dfsek/terra/api/structures/parser/Parser.java +++ b/common/src/main/java/com/dfsek/terra/api/structures/parser/Parser.java @@ -44,7 +44,6 @@ import com.dfsek.terra.api.structures.parser.lang.variables.Variable; import com.dfsek.terra.api.structures.tokenizer.Position; import com.dfsek.terra.api.structures.tokenizer.Token; import com.dfsek.terra.api.structures.tokenizer.Tokenizer; -import com.dfsek.terra.api.structures.tokenizer.exceptions.TokenizerException; import com.dfsek.terra.api.util.GlueList; import java.util.Collections; @@ -79,18 +78,7 @@ public class Parser { * @throws ParseException If parsing fails. */ public Block parse() throws ParseException { - Tokenizer tokenizer = new Tokenizer(data); - - TokenHolder tokens = new TokenHolder(); - try { - Token t = tokenizer.fetch(); - while(t != null) { - tokens.add(t); - t = tokenizer.fetch(); - } - } catch(TokenizerException e) { - throw new ParseException("Failed to tokenize input", new Position(0, 0), e); - } + Tokenizer tokens = new Tokenizer(data); // Parse ID ParserUtil.checkType(tokens.consume(), Token.Type.ID); // First token must be ID @@ -99,21 +87,12 @@ public class Parser { ParserUtil.checkType(tokens.consume(), Token.Type.STATEMENT_END); this.id = idToken.getContent(); - // Check for dangling brackets - int blockLevel = 0; - for(Token t : tokens.getTokens()) { - if(t.getType().equals(Token.Type.BLOCK_BEGIN)) blockLevel++; - else if(t.getType().equals(Token.Type.BLOCK_END)) blockLevel--; - if(blockLevel < 0) throw new ParseException("Dangling closing brace", t.getPosition()); - } - if(blockLevel != 0) - throw new ParseException("Dangling opening brace", tokens.getTokens().get(tokens.getTokens().size() - 1).getPosition()); return parseBlock(tokens, new HashMap<>(), false); } - private Keyword parseLoopLike(TokenHolder tokens, Map> variableMap, boolean loop) throws ParseException { + private Keyword parseLoopLike(Tokenizer tokens, Map> variableMap, boolean loop) throws ParseException { Token identifier = tokens.consume(); ParserUtil.checkType(identifier, Token.Type.IF_STATEMENT, Token.Type.WHILE_LOOP, Token.Type.FOR_LOOP); @@ -132,7 +111,7 @@ public class Parser { } } - private WhileKeyword parseWhileLoop(TokenHolder tokens, Map> variableMap, Position start) throws ParseException { + private WhileKeyword parseWhileLoop(Tokenizer tokens, Map> variableMap, Position start) throws ParseException { Returnable first = parseExpression(tokens, true, variableMap); ParserUtil.checkReturnType(first, Returnable.ReturnType.BOOLEAN); @@ -141,7 +120,7 @@ public class Parser { return new WhileKeyword(parseStatementBlock(tokens, variableMap, true), (Returnable) first, start); // While loop } - private IfKeyword parseIfStatement(TokenHolder tokens, Map> variableMap, Position start, boolean loop) throws ParseException { + private IfKeyword parseIfStatement(Tokenizer tokens, Map> variableMap, Position start, boolean loop) throws ParseException { Returnable condition = parseExpression(tokens, true, variableMap); ParserUtil.checkReturnType(condition, Returnable.ReturnType.BOOLEAN); @@ -168,7 +147,7 @@ public class Parser { return new IfKeyword(statement, (Returnable) condition, elseIf, elseBlock, start); // If statement } - private Block parseStatementBlock(TokenHolder tokens, Map> variableMap, boolean loop) throws ParseException { + private Block parseStatementBlock(Tokenizer tokens, Map> variableMap, boolean loop) throws ParseException { if(tokens.get().getType().equals(Token.Type.BLOCK_BEGIN)) { ParserUtil.checkType(tokens.consume(), Token.Type.BLOCK_BEGIN); @@ -183,7 +162,7 @@ public class Parser { } } - private ForKeyword parseForLoop(TokenHolder tokens, Map> old, Position start) throws ParseException { + private ForKeyword parseForLoop(Tokenizer tokens, Map> old, Position start) throws ParseException { Map> variableMap = new HashMap<>(old); // New scope Token f = tokens.get(); ParserUtil.checkType(f, Token.Type.NUMBER_VARIABLE, Token.Type.STRING_VARIABLE, Token.Type.BOOLEAN_VARIABLE, Token.Type.IDENTIFIER); @@ -216,7 +195,7 @@ public class Parser { return new ForKeyword(parseStatementBlock(tokens, variableMap, true), initializer, (Returnable) conditional, incrementer, start); } - private Returnable parseExpression(TokenHolder tokens, boolean full, Map> variableMap) throws ParseException { + private Returnable parseExpression(Tokenizer tokens, boolean full, Map> variableMap) throws ParseException { boolean booleanInverted = false; // Check for boolean not operator boolean negate = false; if(tokens.get().getType().equals(Token.Type.BOOLEAN_NOT)) { @@ -259,7 +238,7 @@ public class Parser { return expression; } - private ConstantExpression parseConstantExpression(TokenHolder tokens) throws ParseException { + private ConstantExpression parseConstantExpression(Tokenizer tokens) throws ParseException { Token constantToken = tokens.consume(); Position position = constantToken.getPosition(); switch(constantToken.getType()) { @@ -275,7 +254,7 @@ public class Parser { } } - private Returnable parseGroup(TokenHolder tokens, Map> variableMap) throws ParseException { + private Returnable parseGroup(Tokenizer tokens, Map> variableMap) throws ParseException { ParserUtil.checkType(tokens.consume(), Token.Type.GROUP_BEGIN); Returnable expression = parseExpression(tokens, true, variableMap); // Parse inside of group as a separate expression ParserUtil.checkType(tokens.consume(), Token.Type.GROUP_END); @@ -283,7 +262,7 @@ public class Parser { } - private BinaryOperation parseBinaryOperation(Returnable left, TokenHolder tokens, Map> variableMap) throws ParseException { + private BinaryOperation parseBinaryOperation(Returnable left, Tokenizer tokens, Map> variableMap) throws ParseException { Token binaryOperator = tokens.consume(); ParserUtil.checkBinaryOperator(binaryOperator); @@ -337,7 +316,7 @@ public class Parser { } } - private Variable parseVariableDeclaration(TokenHolder tokens, Returnable.ReturnType type) throws ParseException { + private Variable parseVariableDeclaration(Tokenizer tokens, Returnable.ReturnType type) throws ParseException { ParserUtil.checkVarType(tokens.get(), type); // Check for type mismatch switch(type) { case NUMBER: @@ -350,7 +329,7 @@ public class Parser { throw new UnsupportedOperationException("Unsupported variable type: " + type); } - private Block parseBlock(TokenHolder tokens, Map> superVars, boolean loop) throws ParseException { + private Block parseBlock(Tokenizer tokens, Map> superVars, boolean loop) throws ParseException { List> parsedItems = new GlueList<>(); Map> parsedVariables = new HashMap<>(superVars); // New hashmap as to not mutate parent scope's declarations. @@ -366,7 +345,7 @@ public class Parser { return new Block(parsedItems, first.getPosition()); } - private Item parseItem(TokenHolder tokens, Map> variableMap, boolean loop) throws ParseException { + private Item parseItem(Tokenizer tokens, Map> variableMap, boolean loop) throws ParseException { Token token = tokens.get(); if(loop) ParserUtil.checkType(token, Token.Type.IDENTIFIER, Token.Type.IF_STATEMENT, Token.Type.WHILE_LOOP, Token.Type.FOR_LOOP, Token.Type.NUMBER_VARIABLE, Token.Type.STRING_VARIABLE, Token.Type.BOOLEAN_VARIABLE, Token.Type.RETURN, Token.Type.BREAK, Token.Type.CONTINUE, Token.Type.FAIL); @@ -401,7 +380,7 @@ public class Parser { else throw new UnsupportedOperationException("Unexpected token " + token.getType() + ": " + token.getPosition()); } - private Assignment parseAssignment(Variable variable, TokenHolder tokens, Map> variableMap) throws ParseException { + private Assignment parseAssignment(Variable variable, Tokenizer tokens, Map> variableMap) throws ParseException { Token name = tokens.get(); ParserUtil.checkType(tokens.consume(), Token.Type.IDENTIFIER); @@ -415,7 +394,7 @@ public class Parser { return new Assignment<>((Variable) variable, (Returnable) expression, name.getPosition()); } - private Function parseFunction(TokenHolder tokens, boolean fullStatement, Map> variableMap) throws ParseException { + private Function parseFunction(Tokenizer tokens, boolean fullStatement, Map> variableMap) throws ParseException { Token identifier = tokens.consume(); ParserUtil.checkType(identifier, Token.Type.IDENTIFIER); // First token must be identifier @@ -449,7 +428,7 @@ public class Parser { } - private List> getArgs(TokenHolder tokens, Map> variableMap) throws ParseException { + private List> getArgs(Tokenizer tokens, Map> variableMap) throws ParseException { List> args = new GlueList<>(); while(!tokens.get().getType().equals(Token.Type.GROUP_END)) { diff --git a/common/src/main/java/com/dfsek/terra/api/structures/parser/TokenHolder.java b/common/src/main/java/com/dfsek/terra/api/structures/parser/TokenHolder.java deleted file mode 100644 index b8bb90023..000000000 --- a/common/src/main/java/com/dfsek/terra/api/structures/parser/TokenHolder.java +++ /dev/null @@ -1,59 +0,0 @@ -package com.dfsek.terra.api.structures.parser; - -import com.dfsek.terra.api.structures.parser.exceptions.ParseException; -import com.dfsek.terra.api.structures.tokenizer.Position; -import com.dfsek.terra.api.structures.tokenizer.Token; -import com.dfsek.terra.api.util.GlueList; - -import java.util.List; - -/** - * Data structure to hold tokens, where items are inserted at the top and removed from the bottom. - */ -public class TokenHolder { - private final List tokens = new GlueList<>(); - private Position last; - - /** - * Add a token to the top of the stack. - * - * @param token Token to add - */ - public void add(Token token) { - tokens.add(token); - } - - /** - * Get the token at the bottom of the stack. - * - * @return First token - * @throws ParseException If stack is empty - */ - public Token get() throws ParseException { - if(!hasNext()) throw new ParseException("Unexpected end of input", last); - Token token = tokens.get(0); - last = token.getPosition(); - return token; - } - - /** - * Consume (get and remove) the token at the bottom of the stack. - * - * @return First token - * @throws ParseException If stack is empty - */ - public Token consume() throws ParseException { - if(!hasNext()) throw new ParseException("Unexpected end of input", last); - Token token = tokens.remove(0); - last = token.getPosition(); - return token; - } - - public List getTokens() { - return tokens; - } - - public boolean hasNext() { - return tokens.size() > 0; - } -} diff --git a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Tokenizer.java b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Tokenizer.java index d856d4653..2fb3e4e1f 100644 --- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Tokenizer.java +++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/Tokenizer.java @@ -1,5 +1,6 @@ package com.dfsek.terra.api.structures.tokenizer; +import com.dfsek.terra.api.structures.parser.exceptions.ParseException; import com.dfsek.terra.api.structures.tokenizer.exceptions.EOFException; import com.dfsek.terra.api.structures.tokenizer.exceptions.FormatException; import com.dfsek.terra.api.structures.tokenizer.exceptions.TokenizerException; @@ -7,18 +8,67 @@ import com.google.common.collect.Sets; import java.io.StringReader; import java.util.Set; +import java.util.Stack; public class Tokenizer { - private final Lookahead reader; - public static final Set syntaxSignificant = Sets.newHashSet(';', '(', ')', '"', ',', '\\', '=', '{', '}', '+', '-', '*', '/', '>', '<', '!'); // Reserved chars + private final Lookahead reader; + private final Stack brackets = new Stack<>(); + private Token current; - - public Tokenizer(String data) { + public Tokenizer(String data) throws ParseException { reader = new Lookahead(new StringReader(data + '\0')); + current = fetchCheck(); } - public Token fetch() throws TokenizerException { + /** + * Get the first token. + * + * @return First token + * @throws ParseException If token does not exist + */ + public Token get() throws ParseException { + if(!hasNext()) throw new ParseException("Unexpected end of input", current.getPosition()); + return current; + } + + /** + * Consume (get and remove) the first token. + * + * @return First token + * @throws ParseException If token does not exist + */ + public Token consume() throws ParseException { + if(!hasNext()) throw new ParseException("Unexpected end of input", current.getPosition()); + Token temp = current; + current = fetchCheck(); + return temp; + } + + /** + * Whether this {@code Tokenizer} contains additional tokens. + * + * @return {@code true} if more tokens are present, otherwise {@code false} + */ + public boolean hasNext() { + return !(current == null); + } + + private Token fetchCheck() throws ParseException { + Token fetch = fetch(); + if(fetch != null) { + if(fetch.getType().equals(Token.Type.BLOCK_BEGIN)) brackets.push(fetch); // Opening bracket + else if(fetch.getType().equals(Token.Type.BLOCK_END)) { + if(!brackets.isEmpty()) brackets.pop(); + else throw new ParseException("Dangling opening brace", new Position(0, 0)); + } + } else if(!brackets.isEmpty()) { + throw new ParseException("Dangling closing brace", brackets.peek().getPosition()); + } + return fetch; + } + + private Token fetch() throws TokenizerException { while(!reader.current().isEOF() && reader.current().isWhitespace()) reader.consume(); while(reader.matches("//", true)) skipLine(); // Skip line if comment @@ -66,7 +116,7 @@ public class Tokenizer { continue; } else ignoreNext = false; if(reader.current().isEOF()) - throw new FormatException("No end of string literal found. " + reader.getLine() + ":" + reader.getIndex()); + throw new FormatException("No end of string literal found. ", new Position(reader.getLine(), reader.getIndex())); string.append(reader.consume()); } reader.consume(); // Consume last quote @@ -166,6 +216,7 @@ public class Tokenizer { } private void skipTo(String s) throws EOFException { + Position begin = new Position(reader.getLine(), reader.getIndex()); while(!reader.current().isEOF()) { if(reader.matches(s, true)) { consumeWhitespace(); @@ -173,7 +224,7 @@ public class Tokenizer { } reader.consume(); } - throw new EOFException("No end of expression found."); + throw new EOFException("No end of expression found.", begin); } public boolean isSyntaxSignificant(char c) { diff --git a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/EOFException.java b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/EOFException.java index 2750b86f9..8602e6c2f 100644 --- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/EOFException.java +++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/EOFException.java @@ -1,20 +1,14 @@ package com.dfsek.terra.api.structures.tokenizer.exceptions; +import com.dfsek.terra.api.structures.tokenizer.Position; + public class EOFException extends TokenizerException { - public EOFException(String s) { - super(s); + public EOFException(String message, Position position) { + super(message, position); } - public EOFException() { - super(); - } - - public EOFException(String message, Throwable cause) { - super(message, cause); - } - - public EOFException(Throwable cause) { - super(cause); + public EOFException(String message, Position position, Throwable cause) { + super(message, position, cause); } } diff --git a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/FormatException.java b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/FormatException.java index c8b8414ca..039c8fe60 100644 --- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/FormatException.java +++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/FormatException.java @@ -1,20 +1,14 @@ package com.dfsek.terra.api.structures.tokenizer.exceptions; +import com.dfsek.terra.api.structures.tokenizer.Position; + public class FormatException extends TokenizerException { - public FormatException(String s) { - super(s); + public FormatException(String message, Position position) { + super(message, position); } - public FormatException() { - super(); - } - - public FormatException(String message, Throwable cause) { - super(message, cause); - } - - public FormatException(Throwable cause) { - super(cause); + public FormatException(String message, Position position, Throwable cause) { + super(message, position, cause); } } diff --git a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/TokenizerException.java b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/TokenizerException.java index 282eb4886..994a65361 100644 --- a/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/TokenizerException.java +++ b/common/src/main/java/com/dfsek/terra/api/structures/tokenizer/exceptions/TokenizerException.java @@ -1,19 +1,15 @@ package com.dfsek.terra.api.structures.tokenizer.exceptions; -public abstract class TokenizerException extends Exception { - public TokenizerException(String s) { - super(s); +import com.dfsek.terra.api.structures.parser.exceptions.ParseException; +import com.dfsek.terra.api.structures.tokenizer.Position; + +public abstract class TokenizerException extends ParseException { + + public TokenizerException(String message, Position position) { + super(message, position); } - public TokenizerException() { - super(); - } - - public TokenizerException(String message, Throwable cause) { - super(message, cause); - } - - public TokenizerException(Throwable cause) { - super(cause); + public TokenizerException(String message, Position position, Throwable cause) { + super(message, position, cause); } } diff --git a/common/src/test/java/structure/TokenizerTest.java b/common/src/test/java/structure/TokenizerTest.java deleted file mode 100644 index 994171733..000000000 --- a/common/src/test/java/structure/TokenizerTest.java +++ /dev/null @@ -1,26 +0,0 @@ -package structure; - -import com.dfsek.terra.api.structures.tokenizer.Token; -import com.dfsek.terra.api.structures.tokenizer.Tokenizer; -import com.dfsek.terra.api.structures.tokenizer.exceptions.TokenizerException; -import org.apache.commons.io.IOUtils; -import org.junit.jupiter.api.Test; - -import java.io.IOException; - -public class TokenizerTest { - @Test - public void tokens() throws IOException, TokenizerException { - Tokenizer tokenizer = new Tokenizer(IOUtils.toString(getClass().getResourceAsStream("/test.tesf"))); - // Actual run - long l = System.nanoTime(); - - Token t = tokenizer.fetch(); - while(t != null) { - System.out.println(t); - t = tokenizer.fetch(); - } - - System.out.println((double) (System.nanoTime() - l) / 1000000); - } -}