lexer.lua (5001B)
local lexer = {
tokens = {},
file = "---",
line = 1,
col = 1,
}
local a = string.byte("a")
local z = string.byte("z")
local A = string.byte("A")
local Z = string.byte("Z")
local zero = string.byte("0")
local nine = string.byte("9")
function is_alpha(c)
return (c >= a and c <= z) or (c >= A and c <= Z)
end
function is_digit(c)
return (c >= zero and c <= nine)
end
function alpha_num(c)
return is_alpha(c) or is_digit(c)
end
function is_space(c)
return c == "\t" or c == "\n" or c == " " or c == "\r"
end
function read_ident(start, src)
local word = ""
local i = start
local c = 0
while i <= #src do
c = src:sub(i, i):byte()
if not alpha_num(c) then
i = i - 1 -- rewind
break
end
i = i + 1
end
word = src:sub(start, i)
return word, i
end
function read_number(start, src)
local num = ""
local i = start
local c = 0
local ch = ""
local is_float = false
while i <= #src do
ch = src:sub(i, i)
c = ch:byte()
if ch == "." then
is_float = true
end
if not is_digit(c) and ch ~= "." then
i = i - 1 -- rewind
break
end
i = i + 1
end
num = src:sub(start, i)
return num, i, is_float
end
function read_string_literal(start, src)
start = start + 1 -- consume leading dbl_quote
local str = ""
local i = start
local c = 0
while i <= #src do
c = src:sub(i, i)
-- todo escaping \"
-- todo blocking multiple lines
if c == '"' then
i = i - 1
break
end
i = i + 1
end
word = src:sub(start, i)
i = i + 1 -- consume trailing dbl_quote
return word, i
end
TK = {
IDK = "UNKNOWN",
COLON = "colon",
COLONCOLON = "colcol",
EOS = "EOS end stmt",
DO = "DO KEYWORD",
IDENT = "ident",
KEYWORD = "keyword",
DOT = "dot",
EQ = "assign",
EQEQ = "equality",
L_PAREN = "l paren",
R_PAREN = "r paren",
L_BRACE = "l brace",
R_BRACE = "r brace",
DBL_QUOTE = "dbl quo",
SGL_QUOTE = "sgl quo",
LIT_FLOAT = "a float",
LIT_INT = "a int",
LIT_STRING = "a string",
COMMA = "comma",
}
local keywords = {
["ns"] = true,
["use"] = true,
["from"] = true,
["ffi"] = true,
["as"] = true,
["and"] = true,
["or"] = true,
["struct"] = true,
["enum"] = true,
["if"] = true,
["else"] = true,
["end"] = true,
["pub"] = true,
["fx"] = true,
["fn"] = true,
["return"] = true,
}
Token = { kind = TK.IDK, lexeme = "", file = "", line = 0, col = 0 }
local function print_token(t)
print(
t.file
.. " L"
.. t.line
.. ":"
.. t.col
.. " \t type: "
.. t.kind
.. "\t value: "
.. t.lexeme
)
end
local function upgrade(word)
if keywords[word] then
print("upgraded " .. word)
return TK.KEYWORD
end
return TK.IDENT
end
function lexer:add(kind, lexeme)
token = {
kind = kind,
lexeme = lexeme,
file = self.file,
line = self.line,
col = self.col,
}
table.insert(self.tokens, token)
end
function lexer:lex(file, src)
local i = 1
self.file = file
local function next()
if i + 1 <= #src then
return src:sub(i + 1, i + 1)
end
end
while i <= #src do
local c = src:sub(i, i)
if is_space(c) then
if c == "\n" then
self:add(TK.EOS, "CR")
self.col = 1
self.line = self.line + 1
else
self.col = self.col + 1
end
i = i + 1
elseif is_alpha(c:byte()) then
word, i = read_ident(i, src)
kind = upgrade(word)
self:add(kind, word)
self.col = self.col + #word
i = i + 1
elseif is_digit(c:byte()) then
number, i, is_float = read_number(i, src)
if is_float then
self:add(TK.LIT_FLOAT, number)
else
self:add(TK.LIT_INT, number)
end
self.col = self.col + #number
i = i + 1
elseif c == '"' then
str, i = read_string_literal(i, src)
self:add(TK.LIT_STRING, str)
self.col = self.col + #str + 2
i = i + 1
elseif c == ":" then
if next() == ":" then
self:add(TK.COLONCOLON, "::")
self.col = self.col + 2
i = i + 2
else
self:add(TK.COLON, ":")
self.col = self.col + 1
i = i + 1
end
elseif c == "=" then
if next() == "=" then
self:add(TK.EQEQ, "==")
self.col = self.col + 2
i = i + 2
else
self:add(TK.EQ, "=")
self.col = self.col + 1
i = i + 1
end
elseif c == "(" then
self:add(TK.L_PAREN, "(")
self.col = self.col + 1
i = i + 1
elseif c == ")" then
self:add(TK.R_PAREN, ")")
self.col = self.col + 1
i = i + 1
elseif c == "{" then
self:add(TK.L_BRACE, "{")
self.col = self.col + 1
i = i + 1
elseif c == "}" then
self:add(TK.R_BRACE, "}")
self.col = self.col + 1
i = i + 1
elseif c == ";" then
self:add(TK.EOS, ";")
self.col = self.col + 1
i = i + 1
elseif c == "," then
self:add(TK.COMMA, ",")
self.col = self.col + 1
i = i + 1
elseif c == "'" then
self:add(TK.SGL_QUOTE, "'")
self.col = self.col + 1
i = i + 1
elseif c == '"' then
self:add(TK.DBL_QUOTE, '"')
self.col = self.col + 1
i = i + 1
else
self:add(TK.IDK, "")
self.col = self.col + 1
i = i + 1
end
end
for i = 1, #self.tokens do
print_token(self.tokens[i])
end
end
return lexer