syntax test - mighty - The mighty programming language, compiler and tools (WIP)

commit 791321af39c8203340815bd2747ddc10163b3f2b
parent 308881ef7bfcfa3943a70e24f1a1e5e0a005e1d9
Author: keyle <keyle@capsule.org>
Date:   Sat, 16 May 2026 16:40:02 +1000

syntax test

Diffstat:
A lexer.mty  | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

1 file changed, 124 insertions(+), 0 deletions(-)
diff --git a/lexer.mty b/lexer.mty
@@ -0,0 +1,124 @@
+ns lexer
+
+// This file is a sketch of how a lexer could look in Mighty.
+// The important idea is `generate`: it hides the loop, while `next_token`
+// advances an explicit immutable lexer state.
+
+pub enum TokenKind ::
+    ident
+    int_lit
+    plus
+    lparen
+    rparen
+    eof
+    bad
+
+pub struct Token ::
+    TokenKind kind
+    str text
+    int line
+    int col
+
+struct Lexer ::
+    str src
+    int i
+    int line
+    int col
+
+// A generation step returns one value and the next state.
+struct Step[T, S] ::
+    T value
+    S state
+
+// Public entry point.
+// generate(seed, step) repeatedly calls step(state), collects each value,
+// and stops when step returns nil.
+pub Token[] lex(str src) =
+    generate(Lexer(src: src, i: 0, line: 1, col: 1), next_token)
+
+// Produce one token, plus the lexer state after that token.
+// Returning nil means there is nothing else to produce.
+Step[Token, Lexer]? next_token(Lexer s) =
+    nil if done(s)
+    else next_token(skip_space(s)) if space(peek(s))
+    else read_ident(s) if ident_head(peek(s))
+    else read_int(s) if digit(peek(s))
+    else read_one(s, TokenKind.plus) if peek(s) == '+'
+    else read_one(s, TokenKind.lparen) if peek(s) == '('
+    else read_one(s, TokenKind.rparen) if peek(s) == ')'
+    else read_one(s, TokenKind.bad)
+
+// End of input. The index is the only thing that moves through src.
+bool done(Lexer s) =
+    s.i >= #s.src
+
+// Safe current character. Only call this after checking not done(s).
+char peek(Lexer s) =
+    s.src[s.i]
+
+// Advance by one character, updating line/column.
+Lexer advance(Lexer s) =
+    Lexer(src: s.src, i: s.i + 1, line: s.line + 1, col: 1)
+        if peek(s) == '\n'
+        else Lexer(src: s.src, i: s.i + 1, line: s.line, col: s.col + 1)
+
+// Skip spaces and newlines. This returns only the updated state;
+// no token is emitted for whitespace in this tiny lexer.
+Lexer skip_space(Lexer s) =
+    s if done(s) or not space(peek(s))
+    else skip_space(advance(s))
+
+// Read a single-character token like '+', '(', or ')'.
+Step[Token, Lexer] read_one(Lexer s, TokenKind kind) =
+    Step(
+        value: Token(kind: kind, text: s.src[s.i..s.i + 1], line: s.line, col: s.col),
+        state: advance(s)
+    )
+
+// Read an identifier by consuming the longest identifier tail.
+Step[Token, Lexer] read_ident(Lexer s) ::
+    int start = s.i
+    int line = s.line
+    int col = s.col
+    Lexer end = skip_while(s, ident_tail)
+
+    ret Step(
+        value: Token(kind: TokenKind.ident, text: s.src[start..end.i], line: line, col: col),
+        state: end
+    )
+
+// Read an integer literal by consuming consecutive digits.
+Step[Token, Lexer] read_int(Lexer s) ::
+    int start = s.i
+    int line = s.line
+    int col = s.col
+    Lexer end = skip_while(s, digit)
+
+    ret Step(
+        value: Token(kind: TokenKind.int_lit, text: s.src[start..end.i], line: line, col: col),
+        state: end
+    )
+
+// Move forward while pred(current_char) is true.
+Lexer skip_while(Lexer s, bool pred(char)) =
+    s if done(s) or not pred(peek(s))
+    else skip_while(advance(s), pred)
+
+bool space(char c) =
+    c == ' ' or c == '\t' or c == '\r' or c == '\n'
+
+bool digit(char c) =
+    c >= '0' and c <= '9'
+
+bool alpha(char c) =
+    c >= 'a' and c <= 'z' or c >= 'A' and c <= 'Z'
+
+bool ident_head(char c) =
+    alpha(c) or c == '_'
+
+bool ident_tail(char c) =
+    ident_head(c) or digit(c)
+
+// Builtin shape, shown here only to make the control flow explicit.
+// A compiler/runtime would implement this as a loop.
+T[] generate[T, S](S state, Step[T, S]? step(S)) = builtin

	mighty The mighty programming language, compiler and tools (WIP)
	Log \| Files \| Refs