commit 7df7dd39e477ce01bd2712e6f554048772385816
parent da6a6faac23dc2e0b9a498cf6a43e10a81efbc6d
Author: citbl <citbl@citbl.org>
Date: Sat, 23 May 2026 19:11:36 +1000
lex
Diffstat:
| M | mtcl/main.lua | | | 75 | ++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------- |
1 file changed, 52 insertions(+), 23 deletions(-)
diff --git a/mtcl/main.lua b/mtcl/main.lua
@@ -2,24 +2,27 @@
local file = io.open(arg[1], "rb")
local contents = file:read("*all")
+local a = string.byte("a")
+local z = string.byte("z")
+local A = string.byte("A")
+local Z = string.byte("Z")
+local zero = string.byte("0")
+local nine = string.byte("9")
+function is_alpha(c)
+ return (c >= a and c <= z) or (c >= A and c <= Z)
+end
+
+function alpha_num(c)
+ return is_alpha(c) or (c >= zero and c <= nine)
+end
function read_ident(start, src)
local word = ""
- local a = string.byte("a")
- local z = string.byte("z")
- local A = string.byte("A")
- local Z = string.byte("Z")
- local zero = string.byte("0")
- local nine = string.byte("9")
local adv = 0
local i = start
while i <= #src do
local c = src:sub(i, i):byte()
- if
- not (c >= a and c <= z)
- and not (c >= A and c <= Z)
- and not (c >= zero and c <= nine)
- then
+ if not alpha_num(c) then
break
end
i = i + 1
@@ -29,27 +32,53 @@ function read_ident(start, src)
return word, i
end
+TK = {
+ IDK = "UNKNOWN",
+ COLON = "colon",
+ COLONCOLON = "colcol",
+ IDENT = "ident",
+ DOT = "dot",
+}
+
+Token = { kind = TK.IDK }
+
+local function print_token(t)
+ print("type: " .. t.kind .. "\t value: " .. t.lexeme)
+end
+
function lex(src)
- local function skip() end
- local whitespacers = {
- ["\t"] = skip,
- ["\r"] = skip,
- ["\n"] = skip,
- [" "] = skip,
- }
+ local tokens = {}
local i = 1
+
+ local function next()
+ if i + 1 <= #src then
+ return src:sub(i + 1, i + 1)
+ end
+ end
+
while i <= #src do
local c = src:sub(i, i)
- local char_fx = whitespacers[c]
- if char_fx then
- char_fx()
- else
+ if c == "\t" or c == "\n" or c == " " or c == "\r" then
+ elseif c == ":" then
+ if next() == ":" then
+ tokens[#tokens + 1] = { kind = TK.COLONCOLON, lexeme = "::" }
+ i = i + 1
+ else
+ tokens[#tokens + 1] = { kind = TK.COLON, lexeme = ":" }
+ end
+ elseif is_alpha(c:byte()) then
word, i = read_ident(i, src)
- -- add word to tokens
+ tokens[#tokens + 1] = { kind = TK.IDENT, lexeme = word }
+ else
+ tokens[#tokens + 1] = { kind = TK.IDK, lexeme = "" }
end
i = i + 1
end
+
+ for i = 1, #tokens do
+ print_token(tokens[i])
+ end
end
lex(contents)