commit c3b65f7f8688d268cac9e824c14428d867a56286
parent 1fc64d1171d719ce9d54ebeef6283cd7c1993925
Author: citbl <citbl@citbl.org>
Date: Sat, 23 May 2026 21:15:52 +1000
refac
Diffstat:
| A | mtcl/lexer.lua | | | 143 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | mtcl/main.lua | | | 141 | +++---------------------------------------------------------------------------- |
2 files changed, 147 insertions(+), 137 deletions(-)
diff --git a/mtcl/lexer.lua b/mtcl/lexer.lua
@@ -0,0 +1,143 @@
+local lexer = { tokens = {} }
+
+local a = string.byte("a")
+local z = string.byte("z")
+local A = string.byte("A")
+local Z = string.byte("Z")
+local zero = string.byte("0")
+local nine = string.byte("9")
+
+function is_alpha(c)
+ return (c >= a and c <= z) or (c >= A and c <= Z)
+end
+
+function is_digit(c)
+ return (c >= zero and c <= nine)
+end
+
+function alpha_num(c)
+ return is_alpha(c) or is_digit(c)
+end
+
+function read_ident(start, src)
+ local word = ""
+ local i = start
+ local c = 0
+ while i <= #src do
+ c = src:sub(i, i):byte()
+ if not alpha_num(c) then
+ i = i - 1 -- rewind
+ break
+ end
+ i = i + 1
+ end
+ word = src:sub(start, i)
+ return word, i
+end
+
+function read_number(start, src)
+ local num = ""
+ local i = start
+ local c = 0
+ local ch = ""
+ local is_float = false
+ while i <= #src do
+ ch = src:sub(i, i)
+ c = ch:byte()
+ if ch == "." then
+ is_float = true
+ end
+ if not is_digit(c) and ch ~= "." then
+ i = i - 1 -- rewind
+ break
+ end
+ i = i + 1
+ end
+ num = src:sub(start, i)
+ return num, i, is_float
+end
+
+TK = {
+ IDK = "UNKNOWN",
+ COLON = "colon",
+ COLONCOLON = "colcol",
+ IDENT = "ident",
+ DOT = "dot",
+ EQ = "assign",
+ EQEQ = "equality",
+ L_PAREN = "l paren",
+ R_PAREN = "r paren",
+ DBL_QUOTE = "dbl quote",
+ SGL_QUOTE = "sgl quote",
+ LIT_FLOAT = "float lit",
+ LIT_INT = "int lit",
+}
+
+Token = { kind = TK.IDK }
+
+local function print_token(t)
+ print("type: " .. t.kind .. "\t value: " .. t.lexeme)
+end
+
+function lexer:lex(src)
+ local i = 1
+ local tokens = {}
+
+ local function next()
+ if i + 1 <= #src then
+ return src:sub(i + 1, i + 1)
+ end
+ end
+
+ while i <= #src do
+ local c = src:sub(i, i)
+
+ if c == "\t" or c == "\n" or c == " " or c == "\r" then
+ -- nothing
+ elseif is_alpha(c:byte()) then
+ word, i = read_ident(i, src)
+ tokens[#tokens + 1] = { kind = TK.IDENT, lexeme = word }
+ elseif is_digit(c:byte()) then
+ number, i, is_float = read_number(i, src)
+ local kind = TK.IDK
+ if is_float then
+ kind = TK.LIT_FLOAT
+ else
+ kind = TK.LIT_INT
+ end
+ tokens[#tokens + 1] = { kind = kind, lexeme = number }
+ elseif c == ":" then
+ if next() == ":" then
+ tokens[#tokens + 1] = { kind = TK.COLONCOLON, lexeme = "::" }
+ i = i + 1
+ else
+ tokens[#tokens + 1] = { kind = TK.COLON, lexeme = ":" }
+ end
+ elseif c == "=" then
+ if next() == "=" then
+ tokens[#tokens + 1] = { kind = TK.EQEQ, lexeme = "==" }
+ i = i + 1
+ else
+ tokens[#tokens + 1] = { kind = TK.EQ, lexeme = "=" }
+ end
+ elseif c == "(" then
+ tokens[#tokens + 1] = { kind = TK.L_PAREN, lexeme = "(" }
+ elseif c == ")" then
+ tokens[#tokens + 1] = { kind = TK.R_PAREN, lexeme = ")" }
+ elseif c == "'" then
+ tokens[#tokens + 1] = { kind = TK.SGL_QUOTE, lexeme = "'" }
+ elseif c == '"' then
+ tokens[#tokens + 1] = { kind = TK.DBL_QUOTE, lexeme = '"' }
+ else
+ tokens[#tokens + 1] = { kind = TK.IDK, lexeme = "" }
+ end
+ i = i + 1
+ end
+
+ for i = 1, #tokens do
+ print_token(tokens[i])
+ end
+ self.tokens = tokens
+end
+
+return lexer
diff --git a/mtcl/main.lua b/mtcl/main.lua
@@ -1,143 +1,10 @@
-- comp
+local lexer = require("lexer")
+
local file = io.open(arg[1], "rb")
local contents = file:read("*all")
-local a = string.byte("a")
-local z = string.byte("z")
-local A = string.byte("A")
-local Z = string.byte("Z")
-local zero = string.byte("0")
-local nine = string.byte("9")
-function is_alpha(c)
- return (c >= a and c <= z) or (c >= A and c <= Z)
-end
-
-function is_digit(c)
- return (c >= zero and c <= nine)
-end
-
-function alpha_num(c)
- return is_alpha(c) or is_digit(c)
-end
-
-function read_ident(start, src)
- local word = ""
- local i = start
- local c = 0
- while i <= #src do
- c = src:sub(i, i):byte()
- if not alpha_num(c) then
- i = i - 1 -- rewind
- break
- end
- i = i + 1
- end
- word = src:sub(start, i)
- return word, i
-end
-
-function read_number(start, src)
- local num = ""
- local i = start
- local c = 0
- local ch = ""
- local is_float = false
- while i <= #src do
- ch = src:sub(i, i)
- c = ch:byte()
- if ch == "." then
- is_float = true
- end
- if not is_digit(c) and ch ~= "." then
- i = i - 1 -- rewind
- break
- end
- i = i + 1
- end
- num = src:sub(start, i)
- return num, i, is_float
-end
-
-TK = {
- IDK = "UNKNOWN",
- COLON = "colon",
- COLONCOLON = "colcol",
- IDENT = "ident",
- DOT = "dot",
- EQ = "assign",
- EQEQ = "equality",
- L_PAREN = "l paren",
- R_PAREN = "r paren",
- DBL_QUOTE = "dbl quote",
- SGL_QUOTE = "sgl quote",
- LIT_FLOAT = "float lit",
- LIT_INT = "int lit",
-}
-
-Token = { kind = TK.IDK }
-
-local function print_token(t)
- print("type: " .. t.kind .. "\t value: " .. t.lexeme)
-end
-
-function lex(src)
- local tokens = {}
- local i = 1
-
- local function next()
- if i + 1 <= #src then
- return src:sub(i + 1, i + 1)
- end
- end
-
- while i <= #src do
- local c = src:sub(i, i)
-
- if c == "\t" or c == "\n" or c == " " or c == "\r" then
- -- nothing
- elseif is_alpha(c:byte()) then
- word, i = read_ident(i, src)
- tokens[#tokens + 1] = { kind = TK.IDENT, lexeme = word }
- elseif is_digit(c:byte()) then
- number, i, is_float = read_number(i, src)
- local kind = TK.IDK
- if is_float then
- kind = TK.LIT_FLOAT
- else
- kind = TK.LIT_INT
- end
- tokens[#tokens + 1] = { kind = kind, lexeme = number }
- elseif c == ":" then
- if next() == ":" then
- tokens[#tokens + 1] = { kind = TK.COLONCOLON, lexeme = "::" }
- i = i + 1
- else
- tokens[#tokens + 1] = { kind = TK.COLON, lexeme = ":" }
- end
- elseif c == "=" then
- if next() == "=" then
- tokens[#tokens + 1] = { kind = TK.EQEQ, lexeme = "==" }
- i = i + 1
- else
- tokens[#tokens + 1] = { kind = TK.EQ, lexeme = "=" }
- end
- elseif c == "(" then
- tokens[#tokens + 1] = { kind = TK.L_PAREN, lexeme = "(" }
- elseif c == ")" then
- tokens[#tokens + 1] = { kind = TK.R_PAREN, lexeme = ")" }
- elseif c == "'" then
- tokens[#tokens + 1] = { kind = TK.SGL_QUOTE, lexeme = "'" }
- elseif c == '"' then
- tokens[#tokens + 1] = { kind = TK.DBL_QUOTE, lexeme = '"' }
- else
- tokens[#tokens + 1] = { kind = TK.IDK, lexeme = "" }
- end
- i = i + 1
- end
- for i = 1, #tokens do
- print_token(tokens[i])
- end
-end
+lexer:lex(contents)
-lex(contents)
+print(#lexer.tokens .. " tokens")