lexer.adb (7868B)
1 with Ada.Strings.Unbounded; use Ada.Strings.Unbounded; 2 with Ada.Text_IO; use Ada.Text_IO; 3 with Ada.Characters.Handling; use Ada.Characters.Handling; 4 5 package body Lexer is 6 7 function Init (File_Name : File_Name_Ref; File_Contents : Source_Ref) return Lexer is 8 L : Lexer := 9 (Source => File_Contents, 10 File_Name => File_Name, 11 Tokens => Token_Vector.Empty_Vector, 12 Pos => 1, 13 Line => 1, 14 Col => 1); 15 begin 16 return L; 17 18 end Init; 19 20 function Peek (L : in Lexer) return Character is 21 begin 22 if L.Pos > L.Source'Length then 23 return ASCII.NUL; 24 end if; 25 return L.Source (L.Pos); 26 end; 27 28 function Peek2 (L : in Lexer) return Character is 29 C : Character; 30 begin 31 C := Peek (L); 32 if L.Pos < L.Source'Length then 33 C := L.Source (L.Pos + 1); 34 end if; 35 return C; 36 end; 37 38 -- nudge 39 40 procedure Nudge (L : in out Lexer) is 41 C : Character; 42 begin 43 C := Peek (L); 44 if Is_Line_Terminator (C) then 45 L.Line := L.Line + 1; 46 L.Col := 1; 47 else 48 L.Col := L.Col + 1; 49 end if; 50 L.Pos := L.Pos + 1; 51 end; 52 53 -- skip spaces 54 55 procedure Handle_Space_Comments (L : in out Lexer) is 56 C : Character; 57 T : Token; 58 begin 59 while true loop 60 while true loop 61 C := Peek (L); 62 if Is_Line_Terminator (C) then 63 -- we care about line return in this impl 64 T := 65 (Kind => Separator, 66 Lexeme => To_Unbounded_String (""), 67 Line => L.Line, 68 Col => L.Col); 69 Add_Token (L, T); 70 end if; 71 if Is_Space (C) or Is_Line_Terminator (C) or C = Character'Val (9) -- \t 72 then 73 Nudge (L); 74 else 75 exit; 76 end if; 77 end loop; 78 if Peek (L) = '/' and Peek2 (L) = '/' then 79 while not Is_Line_Terminator (Peek (L)) loop 80 Nudge (L); 81 end loop; 82 goto Continue; 83 end if; 84 exit; 85 <<Continue>> 86 null; 87 end loop; 88 end Handle_Space_Comments; 89 90 -- make ident 91 92 function Make_Ident (L : in out Lexer) return Token is 93 C : Character; 94 T : Token; 95 TK : Token_Kind; 96 Start : Natural; 97 Word : Unbounded_String; 98 Line : Positive; 99 Col : Positive; 100 begin 101 Start := L.Pos; 102 Line := L.Line; 103 Col := L.Col; 104 105 while true loop 106 if Is_Alphanumeric (Peek (L)) or Peek (L) = '_' then 107 Nudge (L); 108 else 109 exit; 110 end if; 111 end loop; 112 113 TK := Ident; 114 115 Word := To_Unbounded_String (L.Source (Start .. L.Pos - 1)); 116 117 declare 118 W : constant String := To_String (Word); 119 begin 120 if W = "use" 121 or W = "ns" 122 or W = "from" 123 or W = "ffi" 124 or W = "as" 125 or W = "and" 126 or W = "or" 127 or W = "struct" 128 or W = "enum" 129 or W = "if" 130 or W = "else" 131 or W = "end" 132 or W = "pub" 133 or W = "pub" 134 or W = "fx" 135 or W = "fn" 136 or W = "do" 137 or W = "return" 138 or W = "continue" 139 or W = "break" 140 or W = "is" 141 or W = "for" 142 or W = "while" 143 then 144 TK := Keyword; 145 end if; 146 end; 147 148 T := (Kind => TK, Lexeme => Word, Line => Line, Col => Col); 149 150 return T; 151 end Make_Ident; 152 153 -- make number 154 155 function Make_Number (L : in out Lexer) return Token is 156 T : Token; 157 Start : Natural; 158 Is_Float : Boolean := false; 159 Line : Positive; 160 Col : Positive; 161 begin 162 Start := L.Pos; 163 Line := L.Line; 164 Col := L.Col; 165 166 if Peek (L) = '-' then 167 Nudge (L); 168 end if; 169 170 while Is_Digit (Peek (L)) loop 171 Nudge (L); 172 end loop; 173 174 if Peek (L) = '.' and Is_Digit (Peek2 (L)) then 175 Is_Float := true; 176 Nudge (L); 177 while Is_Digit (Peek (L)) loop 178 Nudge (L); 179 end loop; 180 end if; 181 T := 182 (Kind => Str_Literal, 183 Lexeme => To_Unbounded_String (L.Source (Start .. L.Pos - 1)), 184 Line => Line, 185 Col => Col); 186 187 return T; 188 end Make_Number; 189 190 -- make string 191 192 function Make_String (L : in out Lexer) return Token is 193 C : Character; 194 T : Token; 195 Start : Natural; 196 Line : Positive; 197 Col : Positive; 198 begin 199 Start := L.Pos; 200 Line := L.Line; 201 Col := L.Col; 202 Nudge (L); -- advance `"` 203 204 while true loop 205 C := Peek (L); 206 if C = '"' then 207 exit; 208 end if; 209 Nudge (L); 210 end loop; 211 212 T := 213 (Kind => Str_Literal, 214 Lexeme => To_Unbounded_String (L.Source (Start .. L.Pos)), 215 Line => Line, 216 Col => Col); 217 218 Nudge (L); -- advance `"` 219 return T; 220 end Make_String; 221 222 -- next token 223 224 function Next_Token (L : in out Lexer) return Token is 225 T : Token; 226 TK : Token_Kind; 227 Start : Natural; 228 C : Character; 229 C2 : Character; 230 Line : Positive; 231 Col : Positive; 232 begin 233 Handle_Space_Comments (L); 234 235 Start := L.Pos; 236 Line := L.Line; 237 Col := L.Col; 238 239 if L.Pos < L.Source'Length then 240 TK := Unknown; 241 else 242 TK := EOF; 243 end if; 244 245 C := Peek (L); 246 C2 := Peek2 (L); 247 248 if Is_Letter (C) or C = '_' then 249 T := Make_Ident (L); 250 return T; 251 end if; 252 253 if C = '-' and Is_Digit (C2) then 254 T := Make_Number (L); 255 return T; 256 end if; 257 258 if Is_Digit (C) then 259 T := Make_Number (L); 260 return T; 261 end if; 262 263 if C = '"' then 264 T := Make_String (L); 265 return T; 266 end if; 267 268 if C = '(' then 269 TK := L_Paren; 270 elsif C = ')' then 271 TK := R_Paren; 272 elsif C = '[' then 273 TK := L_Bracket; 274 elsif C = ']' then 275 TK := R_Bracket; 276 elsif C = '{' then 277 TK := L_Brace; 278 elsif C = '}' then 279 TK := R_Brace; 280 elsif C = ',' then 281 TK := Comma; 282 elsif C = ':' then 283 if C2 = ':' then 284 TK := ColonColon; 285 Nudge (L); 286 else 287 TK := Colon; 288 end if; 289 -- else 290 -- Put_Line 291 -- (Integer'Image (Line) 292 -- & " :" 293 -- & Integer'Image (Col) 294 -- & ": " 295 -- & "Unhandled: " 296 -- & Integer'Image (Character'Pos (C))); 297 end if; 298 299 T := (Kind => TK, Lexeme => To_Unbounded_String (""), Line => Line, Col => Col); 300 Nudge (L); 301 return T; 302 end Next_Token; 303 304 -- add token 305 306 procedure Add_Token (L : in out Lexer; T : Token) is 307 begin 308 L.Tokens.Append (T); 309 end Add_Token; 310 311 procedure Lex (L : in out Lexer) is 312 Tok : Token; 313 begin 314 315 while true loop 316 Tok := Next_Token (L); 317 Add_Token (L, Tok); 318 exit when Tok.Kind = EOF; 319 end loop; 320 321 end Lex; 322 323 procedure Print_Tokens (L : in out Lexer) is 324 I : Natural := 0; 325 Total : Natural := Natural (L.Tokens.Length); 326 begin 327 Put_Line ("total: " & (L.Tokens.Length'Image) & " tokens"); 328 329 while I < Total loop 330 331 Put_Line 332 (Integer'Image (L.Tokens (I).Line) 333 & " :" 334 & Integer'Image (L.Tokens (I).Col) 335 & ": " 336 & Token_Kind'Image (L.Tokens (I).Kind) 337 & ": " 338 & To_String (L.Tokens (I).Lexeme) 339 & " at line"); 340 I := I + 1; 341 end loop; 342 end; 343 344 end Lexer;