mighty

The mighty programming language, compiler and tools (WIP)
Log | Files | Refs

lexer.go (4102B)


      1 package lexer
      2 
      3 import (
      4 	"fmt"
      5 )
      6 
      7 type Kind int
      8 
      9 const (
     10 	Ident Kind = iota
     11 	Dot
     12 	Colon
     13 	ColonColon
     14 	Comma
     15 	EOF
     16 	Eq
     17 	LiteralBool
     18 	LiteralChar
     19 	LiteralFloat
     20 	LiteralInt
     21 	LiteralString
     22 	Minus
     23 	MinusEq
     24 	MinusMinus
     25 	LParen
     26 	RParen
     27 	Plus
     28 	PlusEq
     29 	PlusPlus
     30 	Slash
     31 	Star
     32 	BadToken
     33 )
     34 
     35 type Token struct {
     36 	Kind      Kind
     37 	Value     string
     38 	Line, Col int
     39 }
     40 
     41 func Lex(src string) []Token {
     42 	var res []Token
     43 	i := 0
     44 	line := 1
     45 	col := 1
     46 
     47 	for i < len(src) {
     48 
     49 		c := src[i]
     50 
     51 		if is_space(c) {
     52 			if c == '\n' {
     53 				line++
     54 				col = 1
     55 			} else {
     56 				col++
     57 			}
     58 			i++
     59 			continue
     60 		}
     61 
     62 		start := i
     63 		startCol := col
     64 
     65 		if is_alpha(c) || is__(c) {
     66 			for i < len(src) && (is_alphanum(src[i]) || is__(src[i])) {
     67 				i++
     68 				col++
     69 			}
     70 			res = append(res, Token{Ident, src[start:i], line, startCol})
     71 			continue
     72 		}
     73 		if is_digit(c) {
     74 			numeric := LiteralInt
     75 			for i < len(src) && (is_digit(src[i]) || is__(src[i]) && is_dot(src[i])) {
     76 				if is_dot(src[i]) {
     77 					numeric = LiteralFloat
     78 				}
     79 				i++
     80 				col++
     81 			}
     82 
     83 			res = append(res, Token{numeric, src[start:i], line, startCol})
     84 			continue
     85 		}
     86 		if c == '"' {
     87 			i++ // consume opening dbquote
     88 			col++
     89 			for i < len(src) && src[i] != '"' {
     90 				i++
     91 				col++
     92 			}
     93 
     94 			res = append(res, Token{LiteralString, src[start+1 : i], line, startCol})
     95 			i++ // consume closing dbquote
     96 			col++
     97 			continue
     98 		}
     99 		var cx byte
    100 		if i+1 < len(src) {
    101 			cx = src[i+1]
    102 		}
    103 		if c == '/' && cx == '/' {
    104 			for i < len(src) && src[i] != '\n' {
    105 				i++
    106 				col++
    107 			}
    108 			continue
    109 		}
    110 
    111 		switch c {
    112 		case '.':
    113 			res = append(res, Token{Dot, src[i : i+1], line, col})
    114 		case ':':
    115 			switch cx {
    116 			case ':':
    117 				res = append(res, Token{ColonColon, src[i : i+2], line, col})
    118 				i++
    119 				col++
    120 			default:
    121 				res = append(res, Token{Colon, src[i : i+1], line, col})
    122 			}
    123 		case '+':
    124 			switch cx {
    125 			case '=':
    126 				res = append(res, Token{PlusEq, src[i : i+2], line, col})
    127 				i++
    128 				col++
    129 			case '+':
    130 				res = append(res, Token{PlusPlus, src[i : i+2], line, col})
    131 				i++
    132 				col++
    133 			default:
    134 				res = append(res, Token{Plus, src[i : i+1], line, col})
    135 			}
    136 		case '=':
    137 			res = append(res, Token{Eq, src[i : i+1], line, col})
    138 		case '/':
    139 			res = append(res, Token{Slash, src[i : i+1], line, col})
    140 		case '*':
    141 			res = append(res, Token{Star, src[i : i+1], line, col})
    142 		case ',':
    143 			res = append(res, Token{Comma, src[i : i+1], line, col})
    144 		case '(':
    145 			res = append(res, Token{LParen, src[i : i+1], line, col})
    146 		case ')':
    147 			res = append(res, Token{RParen, src[i : i+1], line, col})
    148 		default:
    149 			res = append(res, Token{BadToken, src[i : i+1], line, col})
    150 		}
    151 
    152 		i++
    153 		col++
    154 	}
    155 	res = append(res, Token{EOF, "", line, col})
    156 	return res
    157 }
    158 
    159 func is__(c byte) bool        { return c == '_' }
    160 func is_alpha(c byte) bool    { return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' }
    161 func is_alphanum(c byte) bool { return is_alpha(c) || is_digit(c) }
    162 func is_digit(c byte) bool    { return c >= '0' && c <= '9' }
    163 func is_dot(c byte) bool      { return c == '.' }
    164 func is_space(c byte) bool    { return c == ' ' || c == '\t' || c == '\r' || c == '\n' }
    165 
    166 func (k Kind) String() string {
    167 	switch k {
    168 	case Dot:
    169 		return "Dot"
    170 	case Comma:
    171 		return "Comma"
    172 	case Ident:
    173 		return "Ident"
    174 	case Eq:
    175 		return "Eq"
    176 	case Plus:
    177 		return "Plus"
    178 	case PlusEq:
    179 		return "PlusEq"
    180 	case PlusPlus:
    181 		return "PlusPlus"
    182 	case Star:
    183 		return "Star"
    184 	case Slash:
    185 		return "Slash"
    186 	case Minus:
    187 		return "Minus"
    188 	case MinusEq:
    189 		return "MinusEq"
    190 	case MinusMinus:
    191 		return "MinusMinus"
    192 	case Colon:
    193 		return "Colon"
    194 	case ColonColon:
    195 		return "ColonColon"
    196 	case LParen:
    197 		return "LParen"
    198 	case RParen:
    199 		return "RParen"
    200 	case EOF:
    201 		return "EOF"
    202 	case LiteralInt:
    203 		return "IntLiteral"
    204 	case LiteralFloat:
    205 		return "FloatLiteral"
    206 	case LiteralString:
    207 		return "StringLiteral"
    208 	case LiteralChar:
    209 		return "CharLiteral"
    210 	case LiteralBool:
    211 		return "BoolLiteral"
    212 	case BadToken:
    213 		return "BAD~~~TOKEN"
    214 	default:
    215 		return "Print:Unknown"
    216 	}
    217 }
    218 
    219 func Print_tokens(tokens []Token) {
    220 	for _, tok := range tokens {
    221 		fmt.Printf("%-d:%-2d   %-16s %-16q\n", tok.Line, tok.Col, tok.Kind, tok.Value)
    222 	}
    223 }