lexer.go (4102B)
1 package lexer 2 3 import ( 4 "fmt" 5 ) 6 7 type Kind int 8 9 const ( 10 Ident Kind = iota 11 Dot 12 Colon 13 ColonColon 14 Comma 15 EOF 16 Eq 17 LiteralBool 18 LiteralChar 19 LiteralFloat 20 LiteralInt 21 LiteralString 22 Minus 23 MinusEq 24 MinusMinus 25 LParen 26 RParen 27 Plus 28 PlusEq 29 PlusPlus 30 Slash 31 Star 32 BadToken 33 ) 34 35 type Token struct { 36 Kind Kind 37 Value string 38 Line, Col int 39 } 40 41 func Lex(src string) []Token { 42 var res []Token 43 i := 0 44 line := 1 45 col := 1 46 47 for i < len(src) { 48 49 c := src[i] 50 51 if is_space(c) { 52 if c == '\n' { 53 line++ 54 col = 1 55 } else { 56 col++ 57 } 58 i++ 59 continue 60 } 61 62 start := i 63 startCol := col 64 65 if is_alpha(c) || is__(c) { 66 for i < len(src) && (is_alphanum(src[i]) || is__(src[i])) { 67 i++ 68 col++ 69 } 70 res = append(res, Token{Ident, src[start:i], line, startCol}) 71 continue 72 } 73 if is_digit(c) { 74 numeric := LiteralInt 75 for i < len(src) && (is_digit(src[i]) || is__(src[i]) && is_dot(src[i])) { 76 if is_dot(src[i]) { 77 numeric = LiteralFloat 78 } 79 i++ 80 col++ 81 } 82 83 res = append(res, Token{numeric, src[start:i], line, startCol}) 84 continue 85 } 86 if c == '"' { 87 i++ // consume opening dbquote 88 col++ 89 for i < len(src) && src[i] != '"' { 90 i++ 91 col++ 92 } 93 94 res = append(res, Token{LiteralString, src[start+1 : i], line, startCol}) 95 i++ // consume closing dbquote 96 col++ 97 continue 98 } 99 var cx byte 100 if i+1 < len(src) { 101 cx = src[i+1] 102 } 103 if c == '/' && cx == '/' { 104 for i < len(src) && src[i] != '\n' { 105 i++ 106 col++ 107 } 108 continue 109 } 110 111 switch c { 112 case '.': 113 res = append(res, Token{Dot, src[i : i+1], line, col}) 114 case ':': 115 switch cx { 116 case ':': 117 res = append(res, Token{ColonColon, src[i : i+2], line, col}) 118 i++ 119 col++ 120 default: 121 res = append(res, Token{Colon, src[i : i+1], line, col}) 122 } 123 case '+': 124 switch cx { 125 case '=': 126 res = append(res, Token{PlusEq, src[i : i+2], line, col}) 127 i++ 128 col++ 129 case '+': 130 res = append(res, Token{PlusPlus, src[i : i+2], line, col}) 131 i++ 132 col++ 133 default: 134 res = append(res, Token{Plus, src[i : i+1], line, col}) 135 } 136 case '=': 137 res = append(res, Token{Eq, src[i : i+1], line, col}) 138 case '/': 139 res = append(res, Token{Slash, src[i : i+1], line, col}) 140 case '*': 141 res = append(res, Token{Star, src[i : i+1], line, col}) 142 case ',': 143 res = append(res, Token{Comma, src[i : i+1], line, col}) 144 case '(': 145 res = append(res, Token{LParen, src[i : i+1], line, col}) 146 case ')': 147 res = append(res, Token{RParen, src[i : i+1], line, col}) 148 default: 149 res = append(res, Token{BadToken, src[i : i+1], line, col}) 150 } 151 152 i++ 153 col++ 154 } 155 res = append(res, Token{EOF, "", line, col}) 156 return res 157 } 158 159 func is__(c byte) bool { return c == '_' } 160 func is_alpha(c byte) bool { return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' } 161 func is_alphanum(c byte) bool { return is_alpha(c) || is_digit(c) } 162 func is_digit(c byte) bool { return c >= '0' && c <= '9' } 163 func is_dot(c byte) bool { return c == '.' } 164 func is_space(c byte) bool { return c == ' ' || c == '\t' || c == '\r' || c == '\n' } 165 166 func (k Kind) String() string { 167 switch k { 168 case Dot: 169 return "Dot" 170 case Comma: 171 return "Comma" 172 case Ident: 173 return "Ident" 174 case Eq: 175 return "Eq" 176 case Plus: 177 return "Plus" 178 case PlusEq: 179 return "PlusEq" 180 case PlusPlus: 181 return "PlusPlus" 182 case Star: 183 return "Star" 184 case Slash: 185 return "Slash" 186 case Minus: 187 return "Minus" 188 case MinusEq: 189 return "MinusEq" 190 case MinusMinus: 191 return "MinusMinus" 192 case Colon: 193 return "Colon" 194 case ColonColon: 195 return "ColonColon" 196 case LParen: 197 return "LParen" 198 case RParen: 199 return "RParen" 200 case EOF: 201 return "EOF" 202 case LiteralInt: 203 return "IntLiteral" 204 case LiteralFloat: 205 return "FloatLiteral" 206 case LiteralString: 207 return "StringLiteral" 208 case LiteralChar: 209 return "CharLiteral" 210 case LiteralBool: 211 return "BoolLiteral" 212 case BadToken: 213 return "BAD~~~TOKEN" 214 default: 215 return "Print:Unknown" 216 } 217 } 218 219 func Print_tokens(tokens []Token) { 220 for _, tok := range tokens { 221 fmt.Printf("%-d:%-2d %-16s %-16q\n", tok.Line, tok.Col, tok.Kind, tok.Value) 222 } 223 }