lexer.c (8150B)
#include <stdbool.h>
#include "array.h"
#include "string.h"
#include "lexer.h"
static bool is__(const char c);
static bool is_alpha(const char c);
static bool is_digit(const char c);
static bool is_space(const char c);
static bool is_alpha_numeric(const char c);
static bool is_dot(const char c);
static enum token_type compare_span_to_token(struct lexer *lexer, struct span ident, size_t);
static void add_token(struct lexer *, struct token);
static void print_tokens(struct lexer *);
void
lexer_lex(struct lexer *lexer) {
size_t len = strnlen(lexer->src, lexer->src_len);
const char *src = lexer->src;
char c, cx;
size_t i = 0;
size_t col = 1;
size_t line = 1;
size_t start = 0;
size_t start_col = 0;
struct span span;
struct token tok;
enum token_type type;
while (i < len) {
c = lexer->src[i];
if (is_space(c)) {
if (c == '\n') {
col = 1;
line++;
} else {
col++;
}
i++;
continue;
}
start = i;
start_col = col;
if (is_digit(c)) {
type = TOKEN_LITERAL_INT;
while (i < len && (is_digit(src[i]) || is_dot(src[i]))) {
if (is_dot(src[i])) {
type = TOKEN_LITERAL_FLOAT;
}
i++;
col++;
}
span = (struct span){
.filename = lexer->file, .col = start_col, .line = line, .start = start, .stop = i};
tok = (struct token){.span = span, .token_type = type};
add_token(lexer, tok);
continue;
}
if (is_alpha(c) || is__(c)) {
type = TOKEN_IDENT;
while (i < len && (is_alpha_numeric(src[i]) || is__(src[i]))) {
i++;
col++;
}
span = (struct span){
.filename = lexer->file, .col = start_col, .line = line, .start = start, .stop = i};
type = compare_span_to_token(lexer, span, i - start);
tok = (struct token){.span = span, .token_type = type};
add_token(lexer, tok);
continue;
}
cx = (i < len) ? lexer->src[i] : '\0';
if (c == '/' && cx == '/') {
while (i < len && src[i] != '\n') {
i++;
col++;
}
continue;
}
switch (c) {
case '(':
type = TOKEN_L_PAREN;
break;
case ')':
type = TOKEN_R_PAREN;
break;
case '[':
type = TOKEN_L_BRACKET;
break;
case ']':
type = TOKEN_R_BRACKET;
break;
case '{':
type = TOKEN_L_BRACE;
break;
case '}':
type = TOKEN_R_BRACE;
break;
case ',':
type = TOKEN_COMMA;
break;
case '=':
type = TOKEN_EQ;
break;
case ':':
type = TOKEN_COLON;
break;
case '+':
type = TOKEN_PLUS;
break;
case '-':
type = TOKEN_MINUS;
break;
case '*':
type = TOKEN_STAR;
break;
case '/':
type = TOKEN_SLASH;
break;
default:
type = TOKEN_BAD_TOKEN;
break;
}
i++;
col++;
span = (struct span){
.filename = lexer->file, .col = start_col, .line = line, .start = start, .stop = i};
tok = (struct token){.span = span, .token_type = type};
add_token(lexer, tok);
}
print_tokens(lexer);
printf("----------------------------\n");
}
static bool
is_space(const char c) {
return c == ' ' | c == '\t' | c == '\r' | c == '\n';
}
static bool
is__(const char c) {
return c == '_';
}
static bool
is_alpha(const char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
static bool
is_digit(const char c) {
return (c >= '0' && c <= '9');
}
static bool
is_alpha_numeric(const char c) {
return is_alpha(c) || is_digit(c);
}
static bool
is_dot(const char c) {
return c == '.';
}
static const char *NAMES_TOKEN[] = {
[TOKEN_IDENT] = "ident / type",
[TOKEN_L_PAREN] = "open paren",
[TOKEN_R_PAREN] = "close paren",
[TOKEN_L_BRACE] = "open brace",
[TOKEN_R_BRACE] = "close brace",
[TOKEN_COMMA] = "comma",
[TOKEN_COLON] = "colon",
[TOKEN_EQ] = "equal / assign",
[TOKEN_PLUS] = "plus",
[TOKEN_MINUS] = "minus",
[TOKEN_STAR] = "star / mult",
[TOKEN_SLASH] = "slash / div",
[TOKEN_KEYWORD_PUB] = "keyword public",
[TOKEN_LITERAL_INT] = "literal INT",
[TOKEN_LITERAL_FLOAT] = "literal FLOAT",
[TOKEN_LITERAL_CHAR] = "literal CHAR",
[TOKEN_LITERAL_STR] = "literal STRING",
[TOKEN_LITERAL_BOOL] = "literal BOOL",
[TOKEN_BAD_TOKEN] = "BAD TOKEN",
};
static void
print_tokens(struct lexer *lexer) {
size_t i;
const char *src = lexer->src;
struct token t;
for (i = 0; i < lexer->tok_len; i++) {
t = lexer->tokens[i];
if (NAMES_TOKEN[t.token_type] == NULL) {
printf("null token: %d:\n", t.token_type);
}
printf("L%zu:%zu \t%-14s '", t.span.line, t.span.col, NAMES_TOKEN[t.token_type]);
fwrite(src + t.span.start, 1, t.span.stop - t.span.start, stdout);
printf("'\n");
}
}
static enum token_type
compare_span_to_token(struct lexer *lexer, struct span ident, size_t len) {
enum token_type t = TOKEN_IDENT;
if (ident.start >= lexer->src_len || ident.stop >= lexer->src_len) return t;
if (strncmp(lexer->src + ident.start, "ns", 2) == 0 && len == 2)
t = TOKEN_KEYWORD_NS;
else if (strncmp(lexer->src + ident.start, "in", 2) == 0 && len == 2)
t = TOKEN_KEYWORD_IN;
else if (strncmp(lexer->src + ident.start, "from", 4) == 0 && len == 4)
t = TOKEN_KEYWORD_FROM;
else if (strncmp(lexer->src + ident.start, "use", 3) == 0 && len == 3)
t = TOKEN_KEYWORD_USE;
else if (strncmp(lexer->src + ident.start, "ffi", 3) == 0 && len == 3)
t = TOKEN_KEYWORD_FFI;
else if (strncmp(lexer->src + ident.start, "drop", 4) == 0 && len == 4)
t = TOKEN_KEYWORD_DROP;
else if (strncmp(lexer->src + ident.start, "as", 2) == 0 && len == 2)
t = TOKEN_KEYWORD_AS;
else if (strncmp(lexer->src + ident.start, "of", 2) == 0 && len == 2)
t = TOKEN_KEYWORD_OF;
else if (strncmp(lexer->src + ident.start, "and", 3) == 0 && len == 3)
t = TOKEN_KEYWORD_AND;
else if (strncmp(lexer->src + ident.start, "or", 2) == 0 && len == 2)
t = TOKEN_KEYWORD_OR;
else if (strncmp(lexer->src + ident.start, "ref", 3) == 0 && len == 3)
t = TOKEN_KEYWORD_REF;
else if (strncmp(lexer->src + ident.start, "struct", 6) == 0 && len == 6)
t = TOKEN_KEYWORD_STRUCT;
else if (strncmp(lexer->src + ident.start, "enum", 4) == 0 && len == 4)
t = TOKEN_KEYWORD_ENUM;
else if (strncmp(lexer->src + ident.start, "pre", 3) == 0 && len == 3)
t = TOKEN_KEYWORD_PRE;
else if (strncmp(lexer->src + ident.start, "post", 4) == 0 && len == 4)
t = TOKEN_KEYWORD_POST;
else if (strncmp(lexer->src + ident.start, "inv", 3) == 0 && len == 3)
t = TOKEN_KEYWORD_INV;
else if (strncmp(lexer->src + ident.start, "if", 2) == 0 && len == 2)
t = TOKEN_KEYWORD_IF;
else if (strncmp(lexer->src + ident.start, "else", 4) == 0 && len == 4)
t = TOKEN_KEYWORD_ELSE;
else if (strncmp(lexer->src + ident.start, "where", 5) == 0 && len == 5)
t = TOKEN_KEYWORD_WHERE;
else if (strncmp(lexer->src + ident.start, "pub", 3) == 0 && len == 3)
t = TOKEN_KEYWORD_PUB;
return t;
}
static void
add_token(struct lexer *l, struct token tok) {
ARRAY_PUSH(l->tokens, l->tok_len, l->tok_cap, tok);
}