From 2d0ce2dca2640f88f5ebae598c674254db37540c Mon Sep 17 00:00:00 2001 From: nazrin Date: Wed, 28 May 2025 14:46:34 +0000 Subject: [PATCH] scanning 4 --- .gitignore | 16 +++++ dub.sdl | 6 ++ dub.selections.json | 6 ++ src/main.d | 57 +++++++++++++++++ src/scanner.d | 147 ++++++++++++++++++++++++++++++++++++++++++++ src/token.d | 31 ++++++++++ src/tokentype.d | 22 +++++++ 7 files changed, 285 insertions(+) create mode 100644 .gitignore create mode 100644 dub.sdl create mode 100644 dub.selections.json create mode 100644 src/main.d create mode 100644 src/scanner.d create mode 100644 src/token.d create mode 100644 src/tokentype.d diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f9dffc0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +.dub +docs.json +__dummy.html +docs/ +/lox +lox.so +lox.dylib +lox.dll +lox.a +lox.lib +lox-test-* +*.exe +*.pdb +*.o +*.obj +*.lst diff --git a/dub.sdl b/dub.sdl new file mode 100644 index 0000000..3fa3d95 --- /dev/null +++ b/dub.sdl @@ -0,0 +1,6 @@ +name "lox" +description "A minimal D application." +authors "tanya" +copyright "Copyright © 2025, tanya" +license "MPL-2.0" +dependency "taggedalgebraic" version="~>0.11.23" diff --git a/dub.selections.json b/dub.selections.json new file mode 100644 index 0000000..3fbfc12 --- /dev/null +++ b/dub.selections.json @@ -0,0 +1,6 @@ +{ + "fileVersion": 1, + "versions": { + "taggedalgebraic": "0.11.23" + } +} diff --git a/src/main.d b/src/main.d new file mode 100644 index 0000000..a3e0fdc --- /dev/null +++ b/src/main.d @@ -0,0 +1,57 @@ +import std.stdio; +import std.file; +import std.conv; +import std.exception; + +import token; +import tokentype; +import scanner; + +static bool hadError = false; + +void error(int line, string message){ + report(line, "", message); +} +void report(int line, string where, string message){ + stderr.writeln("[line " ~ line.to!string ~ "] Error" ~ where ~ ": " ~ message); + hadError = true; +} + +void run(string source){ + Scanner scanner = new Scanner(source); + Token[] tokens = scanner.scanTokens(); + + foreach(token; tokens) + writeln(token); +} + +void runFile(string path){ + string bytes = readText(path); + run(bytes); + enforce(!hadError); +} + +void runPrompt(){ + while(true){ + write("lox> "); + string line = stdin.readln(); + if(!line) + break; + run(line); + hadError = false; + } + writeln(); +} + +int main(string[] argv){ + if(argv.length > 2){ + writeln("Usage: jlox [script]"); + return 1; + } + if(argv.length == 2) + runFile(argv[1]); + else + runPrompt(); + return 0; +} + diff --git a/src/scanner.d b/src/scanner.d new file mode 100644 index 0000000..cd715ba --- /dev/null +++ b/src/scanner.d @@ -0,0 +1,147 @@ +import std.ascii; +import std.conv; + +import token; +import tokentype; +import main : error, report; + +private bool isAlpha_(dchar c) => c.isAlpha || c == '_'; +private bool isAlphaNum_(dchar c) => c.isAlphaNum || c == '_'; + +class Scanner { + private string source; + private Token[] tokens; + private int start = 0; + private int current = 0; + private int line = 1; + + this(string source){ + this.source = source; + } + private bool isAtEnd(uint n = 0) const => current + n >= source.length; + private char advance() => source[current++]; + private char peek(uint n = 0) const => isAtEnd(n) ? '\0' : source[current + n]; + private char peekNext() const => peek(1); + private bool match(char expected){ + if(peek() != expected) + return false; + current++; + return true; + } + private void addToken(TokenType type, TTokenValue literal = TTokenValue.nil(0)){ + string text = source[start .. current]; + tokens ~= new Token(type, text, literal, line); + } + Token[] scanTokens(){ + while(!isAtEnd){ + start = current; + scanToken(); + } + addToken(TokenType.EOF); + return tokens; + } + private void stringLiteral(){ + while(peek() != '"' && !isAtEnd){ + if(peek() == '\n') + line++; + advance(); + } + if(isAtEnd){ + error(line, "Unterminated string."); + return; + } + advance(); + string value = source[start + 1 .. current -1]; + addToken(TokenType.STRING, TTokenValue.str(value)); + } + private void number(){ + while(peek().isDigit) + advance(); + if(peek() == '.' && peekNext().isDigit){ + advance(); // Eat the . + while(peek().isDigit) + advance(); + } + addToken(TokenType.NUMBER, TTokenValue.dbl(source[start .. current].to!double)); + } + private TokenType keywords(string word){ + with(TokenType) switch(word){ + case "and": return AND; + case "class": return CLASS; + case "else": return ELSE; + case "false": return FALSE; + case ":or": return FOR; + case "fun": return FUN; + case "if": return IF; + case "nil": return NIL; + case "or": return OR; + case "print": return PRINT; + case "return": return RETURN; + case "super": return SUPER; + case "this": return THIS; + case "true": return TRUE; + case "var": return VAR; + case "while": return WHILE; + default: return IDENTIFIER; + } + } + private void identifier(){ + while(peek().isAlphaNum_) + advance(); + addToken(keywords(source[start .. current])); + } + private void scanToken() { + char c = advance(); + with(TokenType) switch(c){ + case '(': addToken(LEFT_PAREN); break; + case ')': addToken(RIGHT_PAREN); break; + case '{': addToken(LEFT_BRACE); break; + case '}': addToken(RIGHT_BRACE); break; + case ',': addToken(COMMA); break; + case '.': addToken(DOT); break; + case '-': addToken(MINUS); break; + case '+': addToken(PLUS); break; + case ';': addToken(SEMICOLON); break; + case '*': addToken(STAR); break; + case '!': + addToken(match('=') ? BANG_EQUAL : BANG); + break; + case '=': + addToken(match('=') ? EQUAL_EQUAL : EQUAL); + break; + case '<': + addToken(match('=') ? LESS_EQUAL : LESS); + break; + case '>': + addToken(match('=') ? GREATER_EQUAL : GREATER); + break; + case '/': + if(match('/')){ + while(peek() != '\n' && !isAtEnd) + advance(); + } else { + addToken(SLASH); + } + break; + case ' ', '\r', '\t': + break; + case '\n': + line++; + break; + case '"': + stringLiteral(); + break; + case '0': .. case '9': + number(); + break; + case 'a': .. case 'z': + case 'A': .. case 'Z': + case '_': + identifier(); + break; + default: + error(line, "Unexpected character"); + } + } +} + diff --git a/src/token.d b/src/token.d new file mode 100644 index 0000000..ba2818b --- /dev/null +++ b/src/token.d @@ -0,0 +1,31 @@ +import std.conv; + +import taggedalgebraic; + +import tokentype; + +struct TokenValue{ + string str; + double dbl; + bool nil = false; +} +alias TTokenValue = TaggedUnion!TokenValue; + +class Token { + TokenType type; + string lexeme; + TTokenValue literal; + int line; + + this(TokenType type, string lexeme, TTokenValue literal, int line) { + this.type = type; + this.lexeme = lexeme; + this.literal = literal; + this.line = line; + } + + override string toString() const{ + return type.to!string ~ " " ~ lexeme ~ " " ~ literal.to!string; + } +} + diff --git a/src/tokentype.d b/src/tokentype.d new file mode 100644 index 0000000..1fb8c9e --- /dev/null +++ b/src/tokentype.d @@ -0,0 +1,22 @@ + +enum TokenType { + // Single-character tokens. + LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE, + COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR, + + // One or two character tokens. + BANG, BANG_EQUAL, + EQUAL, EQUAL_EQUAL, + GREATER, GREATER_EQUAL, + LESS, LESS_EQUAL, + + // Literals. + IDENTIFIER, STRING, NUMBER, + + // Keywords. + AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR, + PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE, + + EOF +} +