scanning 4

2025-05-28 14:46:34 +00:00 · 2025-05-28 14:46:34 +00:00 · 2d0ce2dca2
commit 2d0ce2dca2
7 changed files with 285 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,16 @@
+.dub
+docs.json
+__dummy.html
+docs/
+/lox
+lox.so
+lox.dylib
+lox.dll
+lox.a
+lox.lib
+lox-test-*
+*.exe
+*.pdb
+*.o
+*.obj
+*.lst
--- a/dub.sdl
+++ b/dub.sdl
@ -0,0 +1,6 @@
+name "lox"
+description "A minimal D application."
+authors "tanya"
+copyright "Copyright © 2025, tanya"
+license "MPL-2.0"
+dependency "taggedalgebraic" version="~>0.11.23"
--- a/dub.selections.json
+++ b/dub.selections.json
@ -0,0 +1,6 @@
+{
+	"fileVersion": 1,
+	"versions": {
+		"taggedalgebraic": "0.11.23"
+	}
+}
--- a/src/main.d
+++ b/src/main.d
@ -0,0 +1,57 @@
+import std.stdio;
+import std.file;
+import std.conv;
+import std.exception;
+
+import token;
+import tokentype;
+import scanner;
+
+static bool hadError = false;
+
+void error(int line, string message){
+	report(line, "", message);
+}
+void report(int line, string where, string message){
+	stderr.writeln("[line " ~ line.to!string ~ "] Error" ~ where ~ ": " ~ message);
+	hadError = true;
+}
+
+void run(string source){
+	Scanner scanner = new Scanner(source);
+	Token[] tokens = scanner.scanTokens();
+
+	foreach(token; tokens)
+		writeln(token);
+}
+
+void runFile(string path){
+	string bytes = readText(path);
+	run(bytes);
+	enforce(!hadError);
+}
+
+void runPrompt(){
+	while(true){
+		write("lox> ");
+		string line = stdin.readln();
+		if(!line)
+			break;
+		run(line);
+		hadError = false;
+	}
+	writeln();
+}
+
+int main(string[] argv){
+	if(argv.length > 2){
+		writeln("Usage: jlox [script]");
+		return 1;
+	}
+	if(argv.length == 2)
+		runFile(argv[1]);
+	else
+		runPrompt();
+	return 0;
+}
+
--- a/src/scanner.d
+++ b/src/scanner.d
@ -0,0 +1,147 @@
+import std.ascii;
+import std.conv;
+
+import token;
+import tokentype;
+import main : error, report;
+
+private bool isAlpha_(dchar c)    => c.isAlpha    || c == '_';
+private bool isAlphaNum_(dchar c) => c.isAlphaNum || c == '_';
+
+class Scanner {
+	private string source;
+	private Token[] tokens;
+	private int start = 0;
+	private int current = 0;
+	private int line = 1;
+
+	this(string source){
+		this.source = source;
+	}
+	private bool isAtEnd(uint n = 0) const => current + n >= source.length;
+	private char advance() => source[current++];
+	private char peek(uint n = 0) const => isAtEnd(n) ? '\0' : source[current + n];
+	private char peekNext() const => peek(1);
+	private bool match(char expected){
+		if(peek() != expected)
+			return false;
+		current++;
+		return true;
+	}
+	private void addToken(TokenType type, TTokenValue literal = TTokenValue.nil(0)){
+		string text = source[start .. current];
+		tokens ~= new Token(type, text, literal, line);
+	}
+	Token[] scanTokens(){
+		while(!isAtEnd){
+			start = current;
+			scanToken();
+		}
+		addToken(TokenType.EOF);
+		return tokens;
+	}
+	private void stringLiteral(){
+		while(peek() != '"' && !isAtEnd){
+			if(peek() == '\n')
+				line++;
+			advance();
+		}
+		if(isAtEnd){
+			error(line, "Unterminated string.");
+			return;
+		}
+		advance();
+		string value = source[start + 1 .. current -1];
+		addToken(TokenType.STRING, TTokenValue.str(value));
+	}
+	private void number(){
+		while(peek().isDigit)
+			advance();
+		if(peek() == '.' && peekNext().isDigit){
+			advance(); // Eat the .
+			while(peek().isDigit)
+				advance();
+		}
+		addToken(TokenType.NUMBER, TTokenValue.dbl(source[start .. current].to!double));
+	}
+	private TokenType keywords(string word){
+		with(TokenType) switch(word){
+			case "and":    return AND;
+			case "class":  return CLASS;
+			case "else":   return ELSE;
+			case "false":  return FALSE;
+			case ":or":    return FOR;
+			case "fun":    return FUN;
+			case "if":     return IF;
+			case "nil":    return NIL;
+			case "or":     return OR;
+			case "print":  return PRINT;
+			case "return": return RETURN;
+			case "super":  return SUPER;
+			case "this":   return THIS;
+			case "true":   return TRUE;
+			case "var":    return VAR;
+			case "while":  return WHILE;
+			default:       return IDENTIFIER;
+		}
+	}
+	private void identifier(){
+		while(peek().isAlphaNum_)
+			advance();
+		addToken(keywords(source[start .. current]));
+	}
+	private void scanToken() {
+		char c = advance();
+		with(TokenType) switch(c){
+			case '(': addToken(LEFT_PAREN); break;
+			case ')': addToken(RIGHT_PAREN); break;
+			case '{': addToken(LEFT_BRACE); break;
+			case '}': addToken(RIGHT_BRACE); break;
+			case ',': addToken(COMMA); break;
+			case '.': addToken(DOT); break;
+			case '-': addToken(MINUS); break;
+			case '+': addToken(PLUS); break;
+			case ';': addToken(SEMICOLON); break;
+			case '*': addToken(STAR); break; 
+			case '!':
+				addToken(match('=') ? BANG_EQUAL : BANG);
+				break;
+			case '=':
+				addToken(match('=') ? EQUAL_EQUAL : EQUAL);
+				break;
+			case '<':
+				addToken(match('=') ? LESS_EQUAL : LESS);
+				break;
+			case '>':
+				addToken(match('=') ? GREATER_EQUAL : GREATER);
+				break;
+			case '/':
+				if(match('/')){
+					while(peek() != '\n' && !isAtEnd)
+						advance();
+				} else {
+					addToken(SLASH);
+				}
+				break;
+			case ' ', '\r', '\t':
+				break;
+			case '\n':
+				line++;
+				break;
+			case '"':
+				stringLiteral();
+				break;
+			case '0': .. case '9':
+				number();
+				break;
+			case 'a': .. case 'z':
+			case 'A': .. case 'Z':
+			case '_':
+				identifier();
+				break;
+			default:
+				error(line, "Unexpected character");
+		}
+	}
+}
+
--- a/src/token.d
+++ b/src/token.d
@ -0,0 +1,31 @@
+import std.conv;
+
+import taggedalgebraic;
+
+import tokentype;
+
+struct TokenValue{
+	string str;
+	double dbl;
+	bool nil = false;
+}
+alias TTokenValue = TaggedUnion!TokenValue;
+
+class Token {
+	TokenType type;
+	string lexeme;
+	TTokenValue literal;
+	int line; 
+
+	this(TokenType type, string lexeme, TTokenValue literal, int line) {
+		this.type = type;
+		this.lexeme = lexeme;
+		this.literal = literal;
+		this.line = line;
+	}
+
+	override string toString() const{
+		return type.to!string ~ " " ~ lexeme ~ " " ~ literal.to!string;
+	}
+}
+
--- a/src/tokentype.d
+++ b/src/tokentype.d
@ -0,0 +1,22 @@
+
+enum TokenType {
+	// Single-character tokens.
+	LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE,
+	COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR,
+
+	// One or two character tokens.
+	BANG, BANG_EQUAL,
+	EQUAL, EQUAL_EQUAL,
+	GREATER, GREATER_EQUAL,
+	LESS, LESS_EQUAL,
+
+	// Literals.
+	IDENTIFIER, STRING, NUMBER,
+
+	// Keywords.
+	AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR,
+	PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE,
+
+	EOF
+}
+