working tokenizer

This commit is contained in:
dfsek
2020-12-19 01:50:56 -07:00
parent 1ce884d1c7
commit 29e2746e72
29 changed files with 402 additions and 40 deletions

View File

@@ -0,0 +1,4 @@
package com.dfsek.terra;
public interface CommandHandler {
}

View File

@@ -0,0 +1,4 @@
/**
* API for platform implementations. Mostly interfaces to be implemented by platform delegates.
*/
package com.dfsek.terra.api.platform;

View File

@@ -0,0 +1,4 @@
package com.dfsek.terra.api.structures;
public interface Argument {
}

View File

@@ -0,0 +1,11 @@
package com.dfsek.terra.api.structures;
import java.util.List;
public interface Function {
void apply();
String name();
List<Argument> getArguments();
}

View File

@@ -0,0 +1,4 @@
package com.dfsek.terra.api.structures;
public class Parser {
}

View File

@@ -1,4 +1,4 @@
package com.dfsek.terra.structure.v2.tokenizer;
package com.dfsek.terra.api.structures.tokenizer;
public class Char {
private final char character;
@@ -24,6 +24,18 @@ public class Char {
return line;
}
public boolean isWhitespace() {
return Character.isWhitespace(character);
}
public boolean isNewLine() {
return character == '\n';
}
public boolean isDigit() {
return Character.isDigit(character);
}
public boolean is(char... tests) {
for(char test : tests) {
if(test == character && test != '\0') {
@@ -32,4 +44,13 @@ public class Char {
}
return false;
}
public boolean isEOF() {
return character == '\0';
}
@Override
public String toString() {
return Character.toString(character);
}
}

View File

@@ -1,6 +1,6 @@
package com.dfsek.terra.structure.v2.tokenizer;
package com.dfsek.terra.api.structures.tokenizer;
import org.polydev.gaea.util.GlueList;
import com.dfsek.terra.api.util.GlueList;
import java.io.IOException;
import java.io.Reader;
@@ -23,12 +23,13 @@ public class Lookahead {
/**
* Get the current character without consuming it.
*
* @return
* @return current character
*/
public Char current() {
return next(0);
}
/**
* Consume and return one character.
*
@@ -98,4 +99,15 @@ public class Lookahead {
}
}
}
public boolean matches(String check, boolean consume) {
if(check == null) return false;
for(int i = 0; i < check.length(); i++) {
if(!next(i).is(check.charAt(i))) return false;
}
if(consume) consume(check.length()); // Consume string
return true;
}
}

View File

@@ -1,4 +1,4 @@
package com.dfsek.terra.structure.v2.tokenizer;
package com.dfsek.terra.api.structures.tokenizer;
public class Position {
private final int line;

View File

@@ -0,0 +1,28 @@
package com.dfsek.terra.api.structures.tokenizer;
public class Token {
private final String content;
private final Type type;
public Token(String content, Type type) {
this.content = content;
this.type = type;
}
public Type getType() {
return type;
}
public String getContent() {
return content;
}
@Override
public String toString() {
return type + ": '" + content + "'";
}
public enum Type {
IDENTIFIER, NUMBER, STRING, BOOLEAN, BODY_BEGIN, BODY_END, STATEMENT_END, SEPARATOR
}
}

View File

@@ -0,0 +1,5 @@
package com.dfsek.terra.api.structures.tokenizer;
public class TokenizedStatement {
}

View File

@@ -0,0 +1,116 @@
package com.dfsek.terra.api.structures.tokenizer;
import com.dfsek.terra.api.structures.tokenizer.exceptions.EOFException;
import com.dfsek.terra.api.structures.tokenizer.exceptions.TokenizerException;
import com.dfsek.terra.api.structures.tokenizer.group.Group;
import com.dfsek.terra.api.util.GlueList;
import com.google.common.collect.Sets;
import java.io.StringReader;
import java.util.List;
import java.util.Set;
public class Tokenizer {
private final Lookahead reader;
private final Set<Character> syntaxSignificant = Sets.newHashSet(';', '(', ')', '"', '[', ']', ',');
public Tokenizer(String data) {
reader = new Lookahead(new StringReader(data + '\0'));
}
public List<TokenizedStatement> tokenize() {
List<TokenizedStatement> tokens = new GlueList<>();
while(reader.current().isEOF()) {
Char c = reader.current();
}
return tokens;
}
public Token fetch() throws TokenizerException {
while(!reader.current().isEOF() && reader.current().isWhitespace()) reader.consume();
if(reader.current().isEOF()) return null; // EOF
if(reader.matches("//", true)) skipLine(); // Skip line if comment
if(reader.matches("/*", true)) skipTo("*/");
if(isNumberStart()) {
StringBuilder num = new StringBuilder();
while(!reader.current().isEOF() && isNumberLike()) {
num.append(reader.consume());
}
return new Token(num.toString(), Token.Type.NUMBER);
}
if(reader.current().is('"')) {
reader.consume(); // Consume first quote
StringBuilder string = new StringBuilder();
while(!reader.current().isEOF() && !reader.current().is('"')) {
string.append(reader.consume());
}
reader.consume(); // Consume last quote
return new Token(string.toString(), Token.Type.STRING);
}
if(reader.current().is('(')) return new Token(reader.consume().toString(), Token.Type.BODY_BEGIN);
if(reader.current().is(')')) return new Token(reader.consume().toString(), Token.Type.BODY_END);
if(reader.current().is(';')) return new Token(reader.consume().toString(), Token.Type.STATEMENT_END);
if(reader.current().is(',')) return new Token(reader.consume().toString(), Token.Type.SEPARATOR);
StringBuilder token = new StringBuilder();
while(!reader.current().isEOF() && !isSyntaxSignificant(reader.current().getCharacter())) {
Char c = reader.consume();
if(!c.isWhitespace()) token.append(c);
}
return new Token(token.toString(), Token.Type.IDENTIFIER);
}
private boolean isNumberLike() {
return reader.current().isDigit()
|| reader.current().is('_', '.', '-', 'E');
}
private boolean isNumberStart() {
return reader.current().isDigit()
|| reader.current().is('-') && reader.next(1).isDigit()
|| reader.current().is('-') && reader.next(1).is('.') && reader.next(2).isDigit()
|| reader.current().is('.') && reader.next(1).isDigit();
}
private void skipLine() {
while(!reader.current().isEOF() && !reader.current().isNewLine()) reader.consume();
}
private void skipTo(String s) throws EOFException {
while(!reader.current().isEOF()) {
if(reader.matches(s, true)) return;
reader.consume();
}
throw new EOFException("No end of expression found.");
}
/**
* Read to the end of a group, consuming all
*
* @param g
* @return
*/
private String readToEndOfGroup(Group g) {
StringBuilder builder = new StringBuilder();
do {
Char current = reader.consume();
} while(reader.current().getCharacter() != g.getEnd());
return builder.toString();
}
public boolean isSyntaxSignificant(char c) {
return syntaxSignificant.contains(c);
}
}

View File

@@ -0,0 +1,20 @@
package com.dfsek.terra.api.structures.tokenizer.exceptions;
public class EOFException extends TokenizerException {
public EOFException(String s) {
super(s);
}
public EOFException() {
super();
}
public EOFException(String message, Throwable cause) {
super(message, cause);
}
public EOFException(Throwable cause) {
super(cause);
}
}

View File

@@ -0,0 +1,20 @@
package com.dfsek.terra.api.structures.tokenizer.exceptions;
public class FormatException extends TokenizerException {
public FormatException(String s) {
super(s);
}
public FormatException() {
super();
}
public FormatException(String message, Throwable cause) {
super(message, cause);
}
public FormatException(Throwable cause) {
super(cause);
}
}

View File

@@ -0,0 +1,19 @@
package com.dfsek.terra.api.structures.tokenizer.exceptions;
public abstract class TokenizerException extends Exception {
public TokenizerException(String s) {
super(s);
}
public TokenizerException() {
super();
}
public TokenizerException(String message, Throwable cause) {
super(message, cause);
}
public TokenizerException(Throwable cause) {
super(cause);
}
}

View File

@@ -0,0 +1,18 @@
package com.dfsek.terra.api.structures.tokenizer.group;
public class Brackets implements Group {
@Override
public char getBegin() {
return '[';
}
@Override
public char getEnd() {
return ']';
}
@Override
public boolean ignoreInsideSyntax() {
return false;
}
}

View File

@@ -0,0 +1,9 @@
package com.dfsek.terra.api.structures.tokenizer.group;
public interface Group {
char getBegin();
char getEnd();
boolean ignoreInsideSyntax();
}

View File

@@ -0,0 +1,18 @@
package com.dfsek.terra.api.structures.tokenizer.group;
public class LineComment implements Group {
@Override
public char getBegin() {
return '#';
}
@Override
public char getEnd() {
return '\n';
}
@Override
public boolean ignoreInsideSyntax() {
return true;
}
}

View File

@@ -0,0 +1,18 @@
package com.dfsek.terra.api.structures.tokenizer.group;
public class Parentheses implements Group {
@Override
public char getBegin() {
return '(';
}
@Override
public char getEnd() {
return ')';
}
@Override
public boolean ignoreInsideSyntax() {
return false;
}
}

View File

@@ -0,0 +1,19 @@
package com.dfsek.terra.api.structures.tokenizer.group;
public class Quotes implements Group {
@Override
public char getBegin() {
return '"';
}
@Override
public char getEnd() {
return '"';
}
@Override
public boolean ignoreInsideSyntax() {
return true;
}
}

View File

@@ -0,0 +1,5 @@
package com.dfsek.terra.command;
public class Command {
}

View File

@@ -1,6 +1,6 @@
package structure;
import com.dfsek.terra.structure.v2.tokenizer.Lookahead;
import com.dfsek.terra.api.structures.tokenizer.Lookahead;
import org.junit.jupiter.api.Test;
import java.io.StringReader;

View File

@@ -0,0 +1,23 @@
package structure;
import com.dfsek.terra.api.structures.tokenizer.Token;
import com.dfsek.terra.api.structures.tokenizer.Tokenizer;
import com.dfsek.terra.api.structures.tokenizer.exceptions.TokenizerException;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import java.io.IOException;
public class TokenizerTest {
@Test
public void tokens() throws IOException, TokenizerException {
Tokenizer tokenizer = new Tokenizer(IOUtils.toString(getClass().getResourceAsStream("/test.tesf")));
for(int i = 0; i < 100; i++) {
Token t = tokenizer.fetch();
if(t == null) break;
System.out.println(t);
}
}
}

View File

@@ -0,0 +1,18 @@
FUNCTION(1, "hello");
;
)
FUNCTION2(1.2, "he423llo");
// comment
FUNCTION3(3.4, "35234523452345");
/*
block comment
fsfsdf
gsdfgsdfg
*/
FUNCTION(1, "hello");

View File

@@ -1,5 +0,0 @@
package com.dfsek.terra.structure.v2;
public interface Function {
void apply();
}

View File

@@ -1,4 +0,0 @@
package com.dfsek.terra.structure.v2;
public class Parser {
}

View File

@@ -1,4 +0,0 @@
package com.dfsek.terra.structure.v2.tokenizer;
public class Token {
}

View File

@@ -1,12 +0,0 @@
package com.dfsek.terra.structure.v2.tokenizer;
import java.io.StringReader;
public class Tokenizer {
private final Lookahead reader;
public Tokenizer(String data) {
reader = new Lookahead(new StringReader(data));
}
}

View File

@@ -1,5 +0,0 @@
package com.dfsek.terra.structure.v2.tokenizer;
public enum Tokens {
FUNCTION, SEPARATOR, ARGUMENT
}

View File

@@ -1,4 +0,0 @@
package com.dfsek.terra.structure.v2.tokenizer.exceptions;
public abstract class TokenizerException extends Exception {
}