mighty

The mighty programming language, compiler and tools (WIP)
Log | Files | Refs

commit c3b65f7f8688d268cac9e824c14428d867a56286
parent 1fc64d1171d719ce9d54ebeef6283cd7c1993925
Author: citbl <citbl@citbl.org>
Date:   Sat, 23 May 2026 21:15:52 +1000

refac

Diffstat:
Amtcl/lexer.lua | 143+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mmtcl/main.lua | 141+++----------------------------------------------------------------------------
2 files changed, 147 insertions(+), 137 deletions(-)

diff --git a/mtcl/lexer.lua b/mtcl/lexer.lua @@ -0,0 +1,143 @@ +local lexer = { tokens = {} } + +local a = string.byte("a") +local z = string.byte("z") +local A = string.byte("A") +local Z = string.byte("Z") +local zero = string.byte("0") +local nine = string.byte("9") + +function is_alpha(c) + return (c >= a and c <= z) or (c >= A and c <= Z) +end + +function is_digit(c) + return (c >= zero and c <= nine) +end + +function alpha_num(c) + return is_alpha(c) or is_digit(c) +end + +function read_ident(start, src) + local word = "" + local i = start + local c = 0 + while i <= #src do + c = src:sub(i, i):byte() + if not alpha_num(c) then + i = i - 1 -- rewind + break + end + i = i + 1 + end + word = src:sub(start, i) + return word, i +end + +function read_number(start, src) + local num = "" + local i = start + local c = 0 + local ch = "" + local is_float = false + while i <= #src do + ch = src:sub(i, i) + c = ch:byte() + if ch == "." then + is_float = true + end + if not is_digit(c) and ch ~= "." then + i = i - 1 -- rewind + break + end + i = i + 1 + end + num = src:sub(start, i) + return num, i, is_float +end + +TK = { + IDK = "UNKNOWN", + COLON = "colon", + COLONCOLON = "colcol", + IDENT = "ident", + DOT = "dot", + EQ = "assign", + EQEQ = "equality", + L_PAREN = "l paren", + R_PAREN = "r paren", + DBL_QUOTE = "dbl quote", + SGL_QUOTE = "sgl quote", + LIT_FLOAT = "float lit", + LIT_INT = "int lit", +} + +Token = { kind = TK.IDK } + +local function print_token(t) + print("type: " .. t.kind .. "\t value: " .. t.lexeme) +end + +function lexer:lex(src) + local i = 1 + local tokens = {} + + local function next() + if i + 1 <= #src then + return src:sub(i + 1, i + 1) + end + end + + while i <= #src do + local c = src:sub(i, i) + + if c == "\t" or c == "\n" or c == " " or c == "\r" then + -- nothing + elseif is_alpha(c:byte()) then + word, i = read_ident(i, src) + tokens[#tokens + 1] = { kind = TK.IDENT, lexeme = word } + elseif is_digit(c:byte()) then + number, i, is_float = read_number(i, src) + local kind = TK.IDK + if is_float then + kind = TK.LIT_FLOAT + else + kind = TK.LIT_INT + end + tokens[#tokens + 1] = { kind = kind, lexeme = number } + elseif c == ":" then + if next() == ":" then + tokens[#tokens + 1] = { kind = TK.COLONCOLON, lexeme = "::" } + i = i + 1 + else + tokens[#tokens + 1] = { kind = TK.COLON, lexeme = ":" } + end + elseif c == "=" then + if next() == "=" then + tokens[#tokens + 1] = { kind = TK.EQEQ, lexeme = "==" } + i = i + 1 + else + tokens[#tokens + 1] = { kind = TK.EQ, lexeme = "=" } + end + elseif c == "(" then + tokens[#tokens + 1] = { kind = TK.L_PAREN, lexeme = "(" } + elseif c == ")" then + tokens[#tokens + 1] = { kind = TK.R_PAREN, lexeme = ")" } + elseif c == "'" then + tokens[#tokens + 1] = { kind = TK.SGL_QUOTE, lexeme = "'" } + elseif c == '"' then + tokens[#tokens + 1] = { kind = TK.DBL_QUOTE, lexeme = '"' } + else + tokens[#tokens + 1] = { kind = TK.IDK, lexeme = "" } + end + i = i + 1 + end + + for i = 1, #tokens do + print_token(tokens[i]) + end + self.tokens = tokens +end + +return lexer diff --git a/mtcl/main.lua b/mtcl/main.lua @@ -1,143 +1,10 @@ -- comp +local lexer = require("lexer") + local file = io.open(arg[1], "rb") local contents = file:read("*all") -local a = string.byte("a") -local z = string.byte("z") -local A = string.byte("A") -local Z = string.byte("Z") -local zero = string.byte("0") -local nine = string.byte("9") -function is_alpha(c) - return (c >= a and c <= z) or (c >= A and c <= Z) -end - -function is_digit(c) - return (c >= zero and c <= nine) -end - -function alpha_num(c) - return is_alpha(c) or is_digit(c) -end - -function read_ident(start, src) - local word = "" - local i = start - local c = 0 - while i <= #src do - c = src:sub(i, i):byte() - if not alpha_num(c) then - i = i - 1 -- rewind - break - end - i = i + 1 - end - word = src:sub(start, i) - return word, i -end - -function read_number(start, src) - local num = "" - local i = start - local c = 0 - local ch = "" - local is_float = false - while i <= #src do - ch = src:sub(i, i) - c = ch:byte() - if ch == "." then - is_float = true - end - if not is_digit(c) and ch ~= "." then - i = i - 1 -- rewind - break - end - i = i + 1 - end - num = src:sub(start, i) - return num, i, is_float -end - -TK = { - IDK = "UNKNOWN", - COLON = "colon", - COLONCOLON = "colcol", - IDENT = "ident", - DOT = "dot", - EQ = "assign", - EQEQ = "equality", - L_PAREN = "l paren", - R_PAREN = "r paren", - DBL_QUOTE = "dbl quote", - SGL_QUOTE = "sgl quote", - LIT_FLOAT = "float lit", - LIT_INT = "int lit", -} - -Token = { kind = TK.IDK } - -local function print_token(t) - print("type: " .. t.kind .. "\t value: " .. t.lexeme) -end - -function lex(src) - local tokens = {} - local i = 1 - - local function next() - if i + 1 <= #src then - return src:sub(i + 1, i + 1) - end - end - - while i <= #src do - local c = src:sub(i, i) - - if c == "\t" or c == "\n" or c == " " or c == "\r" then - -- nothing - elseif is_alpha(c:byte()) then - word, i = read_ident(i, src) - tokens[#tokens + 1] = { kind = TK.IDENT, lexeme = word } - elseif is_digit(c:byte()) then - number, i, is_float = read_number(i, src) - local kind = TK.IDK - if is_float then - kind = TK.LIT_FLOAT - else - kind = TK.LIT_INT - end - tokens[#tokens + 1] = { kind = kind, lexeme = number } - elseif c == ":" then - if next() == ":" then - tokens[#tokens + 1] = { kind = TK.COLONCOLON, lexeme = "::" } - i = i + 1 - else - tokens[#tokens + 1] = { kind = TK.COLON, lexeme = ":" } - end - elseif c == "=" then - if next() == "=" then - tokens[#tokens + 1] = { kind = TK.EQEQ, lexeme = "==" } - i = i + 1 - else - tokens[#tokens + 1] = { kind = TK.EQ, lexeme = "=" } - end - elseif c == "(" then - tokens[#tokens + 1] = { kind = TK.L_PAREN, lexeme = "(" } - elseif c == ")" then - tokens[#tokens + 1] = { kind = TK.R_PAREN, lexeme = ")" } - elseif c == "'" then - tokens[#tokens + 1] = { kind = TK.SGL_QUOTE, lexeme = "'" } - elseif c == '"' then - tokens[#tokens + 1] = { kind = TK.DBL_QUOTE, lexeme = '"' } - else - tokens[#tokens + 1] = { kind = TK.IDK, lexeme = "" } - end - i = i + 1 - end - for i = 1, #tokens do - print_token(tokens[i]) - end -end +lexer:lex(contents) -lex(contents) +print(#lexer.tokens .. " tokens")