mighty

The mighty programming language, compiler and tools (WIP)
Log | Files | Refs

commit 7df7dd39e477ce01bd2712e6f554048772385816
parent da6a6faac23dc2e0b9a498cf6a43e10a81efbc6d
Author: citbl <citbl@citbl.org>
Date:   Sat, 23 May 2026 19:11:36 +1000

lex

Diffstat:
Mmtcl/main.lua | 75++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
1 file changed, 52 insertions(+), 23 deletions(-)

diff --git a/mtcl/main.lua b/mtcl/main.lua @@ -2,24 +2,27 @@ local file = io.open(arg[1], "rb") local contents = file:read("*all") +local a = string.byte("a") +local z = string.byte("z") +local A = string.byte("A") +local Z = string.byte("Z") +local zero = string.byte("0") +local nine = string.byte("9") +function is_alpha(c) + return (c >= a and c <= z) or (c >= A and c <= Z) +end + +function alpha_num(c) + return is_alpha(c) or (c >= zero and c <= nine) +end function read_ident(start, src) local word = "" - local a = string.byte("a") - local z = string.byte("z") - local A = string.byte("A") - local Z = string.byte("Z") - local zero = string.byte("0") - local nine = string.byte("9") local adv = 0 local i = start while i <= #src do local c = src:sub(i, i):byte() - if - not (c >= a and c <= z) - and not (c >= A and c <= Z) - and not (c >= zero and c <= nine) - then + if not alpha_num(c) then break end i = i + 1 @@ -29,27 +32,53 @@ function read_ident(start, src) return word, i end +TK = { + IDK = "UNKNOWN", + COLON = "colon", + COLONCOLON = "colcol", + IDENT = "ident", + DOT = "dot", +} + +Token = { kind = TK.IDK } + +local function print_token(t) + print("type: " .. t.kind .. "\t value: " .. t.lexeme) +end + function lex(src) - local function skip() end - local whitespacers = { - ["\t"] = skip, - ["\r"] = skip, - ["\n"] = skip, - [" "] = skip, - } + local tokens = {} local i = 1 + + local function next() + if i + 1 <= #src then + return src:sub(i + 1, i + 1) + end + end + while i <= #src do local c = src:sub(i, i) - local char_fx = whitespacers[c] - if char_fx then - char_fx() - else + if c == "\t" or c == "\n" or c == " " or c == "\r" then + elseif c == ":" then + if next() == ":" then + tokens[#tokens + 1] = { kind = TK.COLONCOLON, lexeme = "::" } + i = i + 1 + else + tokens[#tokens + 1] = { kind = TK.COLON, lexeme = ":" } + end + elseif is_alpha(c:byte()) then word, i = read_ident(i, src) - -- add word to tokens + tokens[#tokens + 1] = { kind = TK.IDENT, lexeme = word } + else + tokens[#tokens + 1] = { kind = TK.IDK, lexeme = "" } end i = i + 1 end + + for i = 1, #tokens do + print_token(tokens[i]) + end end lex(contents)