lexer.go (6001B)
package lexer
import (
"fmt"
)
type Kind int
const (
Ident Kind = iota
KeywordNs
KeywordIn
KeywordFrom
KeywordUse
KeywordFfi
KeywordDrop
KeywordAs
KeywordOf
KeywordAnd
KeywordOr
KeywordRef
KeywordStruct
KeywordEnum
KeywordPre
KeywordPost
KeywordInv
KeywordIf
KeywordElse
KeywordWhere
KeywordPub
Dot
Colon
ColonColon
Comma
EOF
Eq
LiteralBool
LiteralChar
LiteralFloat
LiteralInt
LiteralString
Minus
MinusEq
MinusMinus
LParen
RParen
LBrace
RBrace
LBracket
RBracket
Plus
PlusEq
PlusPlus
Slash
Star
BadToken
)
var kindsKeywords = map[Kind]string{
KeywordNs: "ns",
KeywordIn: "in",
KeywordFrom: "from",
KeywordUse: "use",
KeywordFfi: "ffi",
KeywordDrop: "drop",
KeywordAs: "as",
KeywordOf: "of",
KeywordAnd: "and",
KeywordOr: "or",
KeywordRef: "ref",
KeywordStruct: "struct",
KeywordEnum: "enum",
KeywordPre: "pre",
KeywordPost: "post",
KeywordInv: "inv",
KeywordIf: "if",
KeywordElse: "else",
KeywordWhere: "where",
KeywordPub: "pub",
}
var keywordKinds = map[string]Kind{
"ns": KeywordNs,
"in": KeywordIn,
"from": KeywordFrom,
"use": KeywordUse,
"ffi": KeywordFfi,
"drop": KeywordDrop,
"as": KeywordAs,
"of": KeywordOf,
"and": KeywordAnd,
"or": KeywordOr,
"ref": KeywordRef,
"struct": KeywordStruct,
"enum": KeywordEnum,
"pre": KeywordPre,
"post": KeywordPost,
"inv": KeywordInv,
"if": KeywordIf,
"else": KeywordElse,
"where": KeywordWhere,
"pub": KeywordPub,
}
type Token struct {
Kind Kind
Value string
Line, Col int
}
func Lex(filename string, src string) []Token {
var res []Token
i := 0
line := 1
col := 1
for i < len(src) {
c := src[i]
if is_space(c) {
if c == '\n' {
line++
col = 1
} else {
col++
}
i++
continue
}
start := i
startCol := col
if is_alpha(c) || is__(c) {
for i < len(src) && (is_alphanum(src[i]) || is__(src[i])) {
i++
col++
}
ident := src[start:i]
kind := Ident
if kw, ok := keywordKinds[ident]; ok {
kind = kw
}
res = append(res, Token{kind, src[start:i], line, startCol})
continue
}
if is_digit(c) {
numeric := LiteralInt
for i < len(src) && (is_digit(src[i]) || is__(src[i]) && is_dot(src[i])) {
if is_dot(src[i]) {
numeric = LiteralFloat
}
i++
col++
}
res = append(res, Token{numeric, src[start:i], line, startCol})
continue
}
if c == '"' {
i++ // consume opening dbquote
col++
for i < len(src) && src[i] != '"' {
i++
col++
}
res = append(res, Token{LiteralString, src[start+1 : i], line, startCol})
i++ // consume closing dbquote
col++
continue
}
var cx byte
if i+1 < len(src) {
cx = src[i+1]
}
if c == '/' && cx == '/' {
for i < len(src) && src[i] != '\n' {
i++
col++
}
continue
}
switch c {
case '.':
res = append(res, Token{Dot, src[i : i+1], line, col})
case ':':
switch cx {
case ':':
res = append(res, Token{ColonColon, src[i : i+2], line, col})
i++
col++
default:
res = append(res, Token{Colon, src[i : i+1], line, col})
}
case '+':
switch cx {
case '=':
res = append(res, Token{PlusEq, src[i : i+2], line, col})
i++
col++
case '+':
res = append(res, Token{PlusPlus, src[i : i+2], line, col})
i++
col++
default:
res = append(res, Token{Plus, src[i : i+1], line, col})
}
case '=':
res = append(res, Token{Eq, src[i : i+1], line, col})
case '/':
res = append(res, Token{Slash, src[i : i+1], line, col})
case '*':
res = append(res, Token{Star, src[i : i+1], line, col})
case ',':
res = append(res, Token{Comma, src[i : i+1], line, col})
case '(':
res = append(res, Token{LParen, src[i : i+1], line, col})
case ')':
res = append(res, Token{RParen, src[i : i+1], line, col})
case '[':
res = append(res, Token{LBracket, src[i : i+1], line, col})
case ']':
res = append(res, Token{RBracket, src[i : i+1], line, col})
case '{':
res = append(res, Token{LBrace, src[i : i+1], line, col})
case '}':
res = append(res, Token{RBrace, src[i : i+1], line, col})
default:
res = append(res, Token{BadToken, src[i : i+1], line, col})
}
i++
col++
}
res = append(res, Token{EOF, "", line, col})
return res
}
func is__(c byte) bool { return c == '_' }
func is_alpha(c byte) bool { return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' }
func is_alphanum(c byte) bool { return is_alpha(c) || is_digit(c) }
func is_digit(c byte) bool { return c >= '0' && c <= '9' }
func is_dot(c byte) bool { return c == '.' }
func is_space(c byte) bool { return c == ' ' || c == '\t' || c == '\r' || c == '\n' }
func (k Kind) String() string {
if text, ok := kindsKeywords[k]; ok {
return text
}
switch k {
case Dot:
return "Dot"
case Comma:
return "Comma"
case Ident:
return "Ident"
case Eq:
return "Eq"
case Plus:
return "Plus"
case PlusEq:
return "PlusEq"
case PlusPlus:
return "PlusPlus"
case Star:
return "Star"
case Slash:
return "Slash"
case Minus:
return "Minus"
case MinusEq:
return "MinusEq"
case MinusMinus:
return "MinusMinus"
case Colon:
return "Colon"
case ColonColon:
return "ColonColon"
case LParen:
return "LParen"
case RParen:
return "RParen"
case LBrace:
return "LBrace"
case RBrace:
return "RBrace"
case LBracket:
return "LBracket"
case RBracket:
return "RBracket"
case EOF:
return "EOF"
case LiteralInt:
return "IntLiteral"
case LiteralFloat:
return "FloatLiteral"
case LiteralString:
return "StringLiteral"
case LiteralChar:
return "CharLiteral"
case LiteralBool:
return "BoolLiteral"
case BadToken:
return "BAD~~~TOKEN"
default:
return "Print:Unknown"
}
}
func Print_tokens(tokens []Token) {
for _, tok := range tokens {
fmt.Printf("%-d:%-2d %-16s %-16q\n", tok.Line, tok.Col, tok.Kind, tok.Value)
}
}