nightshade

The nightshade programming language, compiler and tools (WIP)
Log | Files | Refs | README

lexer.adb (7868B)


      1 with Ada.Strings.Unbounded;   use Ada.Strings.Unbounded;
      2 with Ada.Text_IO;             use Ada.Text_IO;
      3 with Ada.Characters.Handling; use Ada.Characters.Handling;
      4 
      5 package body Lexer is
      6 
      7    function Init (File_Name : File_Name_Ref; File_Contents : Source_Ref) return Lexer is
      8       L : Lexer :=
      9         (Source    => File_Contents,
     10          File_Name => File_Name,
     11          Tokens    => Token_Vector.Empty_Vector,
     12          Pos       => 1,
     13          Line      => 1,
     14          Col       => 1);
     15    begin
     16       return L;
     17 
     18    end Init;
     19 
     20    function Peek (L : in Lexer) return Character is
     21    begin
     22       if L.Pos > L.Source'Length then
     23          return ASCII.NUL;
     24       end if;
     25       return L.Source (L.Pos);
     26    end;
     27 
     28    function Peek2 (L : in Lexer) return Character is
     29       C : Character;
     30    begin
     31       C := Peek (L);
     32       if L.Pos < L.Source'Length then
     33          C := L.Source (L.Pos + 1);
     34       end if;
     35       return C;
     36    end;
     37 
     38    -- nudge
     39 
     40    procedure Nudge (L : in out Lexer) is
     41       C : Character;
     42    begin
     43       C := Peek (L);
     44       if Is_Line_Terminator (C) then
     45          L.Line := L.Line + 1;
     46          L.Col := 1;
     47       else
     48          L.Col := L.Col + 1;
     49       end if;
     50       L.Pos := L.Pos + 1;
     51    end;
     52 
     53    -- skip spaces
     54 
     55    procedure Handle_Space_Comments (L : in out Lexer) is
     56       C : Character;
     57       T : Token;
     58    begin
     59       while true loop
     60          while true loop
     61             C := Peek (L);
     62             if Is_Line_Terminator (C) then
     63                -- we care about line return in this impl
     64                T :=
     65                  (Kind   => Separator,
     66                   Lexeme => To_Unbounded_String (""),
     67                   Line   => L.Line,
     68                   Col    => L.Col);
     69                Add_Token (L, T);
     70             end if;
     71             if Is_Space (C) or Is_Line_Terminator (C) or C = Character'Val (9) -- \t
     72             then
     73                Nudge (L);
     74             else
     75                exit;
     76             end if;
     77          end loop;
     78          if Peek (L) = '/' and Peek2 (L) = '/' then
     79             while not Is_Line_Terminator (Peek (L)) loop
     80                Nudge (L);
     81             end loop;
     82             goto Continue;
     83          end if;
     84          exit;
     85          <<Continue>>
     86          null;
     87       end loop;
     88    end Handle_Space_Comments;
     89 
     90    -- make ident
     91 
     92    function Make_Ident (L : in out Lexer) return Token is
     93       C     : Character;
     94       T     : Token;
     95       TK    : Token_Kind;
     96       Start : Natural;
     97       Word  : Unbounded_String;
     98       Line  : Positive;
     99       Col   : Positive;
    100    begin
    101       Start := L.Pos;
    102       Line := L.Line;
    103       Col := L.Col;
    104 
    105       while true loop
    106          if Is_Alphanumeric (Peek (L)) or Peek (L) = '_' then
    107             Nudge (L);
    108          else
    109             exit;
    110          end if;
    111       end loop;
    112 
    113       TK := Ident;
    114 
    115       Word := To_Unbounded_String (L.Source (Start .. L.Pos - 1));
    116 
    117       declare
    118          W : constant String := To_String (Word);
    119       begin
    120          if W = "use"
    121            or W = "ns"
    122            or W = "from"
    123            or W = "ffi"
    124            or W = "as"
    125            or W = "and"
    126            or W = "or"
    127            or W = "struct"
    128            or W = "enum"
    129            or W = "if"
    130            or W = "else"
    131            or W = "end"
    132            or W = "pub"
    133            or W = "pub"
    134            or W = "fx"
    135            or W = "fn"
    136            or W = "do"
    137            or W = "return"
    138            or W = "continue"
    139            or W = "break"
    140            or W = "is"
    141            or W = "for"
    142            or W = "while"
    143          then
    144             TK := Keyword;
    145          end if;
    146       end;
    147 
    148       T := (Kind => TK, Lexeme => Word, Line => Line, Col => Col);
    149 
    150       return T;
    151    end Make_Ident;
    152 
    153    -- make number
    154 
    155    function Make_Number (L : in out Lexer) return Token is
    156       T        : Token;
    157       Start    : Natural;
    158       Is_Float : Boolean := false;
    159       Line     : Positive;
    160       Col      : Positive;
    161    begin
    162       Start := L.Pos;
    163       Line := L.Line;
    164       Col := L.Col;
    165 
    166       if Peek (L) = '-' then
    167          Nudge (L);
    168       end if;
    169 
    170       while Is_Digit (Peek (L)) loop
    171          Nudge (L);
    172       end loop;
    173 
    174       if Peek (L) = '.' and Is_Digit (Peek2 (L)) then
    175          Is_Float := true;
    176          Nudge (L);
    177          while Is_Digit (Peek (L)) loop
    178             Nudge (L);
    179          end loop;
    180       end if;
    181       T :=
    182         (Kind   => Str_Literal,
    183          Lexeme => To_Unbounded_String (L.Source (Start .. L.Pos - 1)),
    184          Line   => Line,
    185          Col    => Col);
    186 
    187       return T;
    188    end Make_Number;
    189 
    190    -- make string
    191 
    192    function Make_String (L : in out Lexer) return Token is
    193       C     : Character;
    194       T     : Token;
    195       Start : Natural;
    196       Line  : Positive;
    197       Col   : Positive;
    198    begin
    199       Start := L.Pos;
    200       Line := L.Line;
    201       Col := L.Col;
    202       Nudge (L); -- advance `"`
    203 
    204       while true loop
    205          C := Peek (L);
    206          if C = '"' then
    207             exit;
    208          end if;
    209          Nudge (L);
    210       end loop;
    211 
    212       T :=
    213         (Kind   => Str_Literal,
    214          Lexeme => To_Unbounded_String (L.Source (Start .. L.Pos)),
    215          Line   => Line,
    216          Col    => Col);
    217 
    218       Nudge (L); -- advance `"`
    219       return T;
    220    end Make_String;
    221 
    222    -- next token
    223 
    224    function Next_Token (L : in out Lexer) return Token is
    225       T     : Token;
    226       TK    : Token_Kind;
    227       Start : Natural;
    228       C     : Character;
    229       C2    : Character;
    230       Line  : Positive;
    231       Col   : Positive;
    232    begin
    233       Handle_Space_Comments (L);
    234 
    235       Start := L.Pos;
    236       Line := L.Line;
    237       Col := L.Col;
    238 
    239       if L.Pos < L.Source'Length then
    240          TK := Unknown;
    241       else
    242          TK := EOF;
    243       end if;
    244 
    245       C := Peek (L);
    246       C2 := Peek2 (L);
    247 
    248       if Is_Letter (C) or C = '_' then
    249          T := Make_Ident (L);
    250          return T;
    251       end if;
    252 
    253       if C = '-' and Is_Digit (C2) then
    254          T := Make_Number (L);
    255          return T;
    256       end if;
    257 
    258       if Is_Digit (C) then
    259          T := Make_Number (L);
    260          return T;
    261       end if;
    262 
    263       if C = '"' then
    264          T := Make_String (L);
    265          return T;
    266       end if;
    267 
    268       if C = '(' then
    269          TK := L_Paren;
    270       elsif C = ')' then
    271          TK := R_Paren;
    272       elsif C = '[' then
    273          TK := L_Bracket;
    274       elsif C = ']' then
    275          TK := R_Bracket;
    276       elsif C = '{' then
    277          TK := L_Brace;
    278       elsif C = '}' then
    279          TK := R_Brace;
    280       elsif C = ',' then
    281          TK := Comma;
    282       elsif C = ':' then
    283          if C2 = ':' then
    284             TK := ColonColon;
    285             Nudge (L);
    286          else
    287             TK := Colon;
    288          end if;
    289       --  else
    290       --     Put_Line
    291       --       (Integer'Image (Line)
    292       --        & " :"
    293       --        & Integer'Image (Col)
    294       --        & ": "
    295       --        & "Unhandled: "
    296       --        & Integer'Image (Character'Pos (C)));
    297       end if;
    298 
    299       T := (Kind => TK, Lexeme => To_Unbounded_String (""), Line => Line, Col => Col);
    300       Nudge (L);
    301       return T;
    302    end Next_Token;
    303 
    304    -- add token
    305 
    306    procedure Add_Token (L : in out Lexer; T : Token) is
    307    begin
    308       L.Tokens.Append (T);
    309    end Add_Token;
    310 
    311    procedure Lex (L : in out Lexer) is
    312       Tok : Token;
    313    begin
    314 
    315       while true loop
    316          Tok := Next_Token (L);
    317          Add_Token (L, Tok);
    318          exit when Tok.Kind = EOF;
    319       end loop;
    320 
    321    end Lex;
    322 
    323    procedure Print_Tokens (L : in out Lexer) is
    324       I     : Natural := 0;
    325       Total : Natural := Natural (L.Tokens.Length);
    326    begin
    327       Put_Line ("total: " & (L.Tokens.Length'Image) & " tokens");
    328 
    329       while I < Total loop
    330 
    331          Put_Line
    332            (Integer'Image (L.Tokens (I).Line)
    333             & " :"
    334             & Integer'Image (L.Tokens (I).Col)
    335             & ": "
    336             & Token_Kind'Image (L.Tokens (I).Kind)
    337             & ": "
    338             & To_String (L.Tokens (I).Lexeme)
    339             & " at line");
    340          I := I + 1;
    341       end loop;
    342    end;
    343 
    344 end Lexer;