nightshade

The nightshade programming language, compiler and tools (WIP)
Log | Files | Refs

commit 4e30b242e98097476132b2b00420109c36fd3544
parent e0745f0500fa255357bb08de421fc3fcf6016334
Author: citbl <citbl@citbl.org>
Date:   Sun, 31 May 2026 18:24:10 +1000

wip lexer

Diffstat:
Mmakefile | 6+++---
Msrc/lexer.adb | 145+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
Msrc/lexer.ads | 22+++++++++++++++-------
Msrc/nightshade.adb | 1+
Atest.mty | 8++++++++
Dtest.txt | 6------
6 files changed, 151 insertions(+), 37 deletions(-)

diff --git a/makefile b/makefile @@ -1,10 +1,10 @@ .SILENT: -default: fmt + +default: mkdir -p ./bin/ gnatmake -o bin/nightshade src/nightshade.adb rm -rf *.o *.ali - ./bin/nightshade test.txt + ./bin/nightshade test.mty fmt: gnatformat src/*.adb src/*.ads - diff --git a/src/lexer.adb b/src/lexer.adb @@ -6,35 +6,138 @@ package body Lexer is function Init (File_Name : String; File_Contents : String) return Lexer is L : Lexer := - (Source => To_Unbounded_String (File_Contents), - File_Name => To_Unbounded_String (File_Name), - Tokens => Token_Vectors.Empty_Vector); + (Source => File_Contents, + Src_Len => File_Contents'Length, + File_Name => File_Name, + FN_Len => File_Name'Length, + Tokens => Token_Vectors.Empty_Vector, + Pos => 1, + Line => 1, + Col => 1); begin return L; end Init; - procedure Lex (L : in out Lexer) is - SRC : constant String := To_String (L.Source); - I : Positive := SRC'First; - I2 : Positive := 1; - Line : Positive := 1; - Col : Positive := 1; - C : Character; - begin - Put_Line (Natural'Image (Length (L.Source))); - - while I <= SRC'Last loop - if I < SRC'Last then - I2 := I + 1; - else - I2 := I; + function Peek (L : in Lexer) return Character is + begin + return L.Source (L.Pos); + end; + + function Peek2 (L : in Lexer) return Character is + C : Character; + begin + C := L.Source (L.Pos); + if L.Pos < L.Source'Length then + C := L.Source (L.Pos + 1); + end if; + return C; + end; + + -- nudge + + procedure Nudge (L : in out Lexer) is + C : Character; + begin + C := Peek (L); + if Is_Line_Terminator (C) then + L.Line := L.Line + 1; + L.Col := 1; + else + L.Col := L.Col + 1; + end if; + L.Pos := L.Pos + 1; + end; + + -- skip spaces + + procedure Skip_Spaces_Comments (L : in out Lexer) is + C : Character; + begin + while true loop + while true loop + C := Peek (L); + if Is_Space (C) or Is_Line_Terminator (C) then + Nudge (L); + else + exit; + end if; + end loop; + if Peek (L) = '/' and Peek2 (L) = '/' then + while not Is_Line_Terminator (Peek (L)) loop + Nudge (L); + end loop; + goto Continue; end if; - C := SRC (I); + exit; + <<Continue>> + null; + end loop; + end Skip_Spaces_Comments; + + -- next token - I := I + 1; + function Next_Token (L : in out Lexer) return Token is + T : Token; + TK : Token_Kind; + begin + Skip_Spaces_Comments (L); + Put (Peek (L)); + if L.Pos < L.Source'Length then + TK := Unknown; + else + TK := EOF; + end if; + T := + (Kind => TK, + Lexeme => To_Unbounded_String (""), + Line => 1, + Col => 1); + Nudge (L); + return T; + end Next_Token; + + -- add token + + procedure Add_Token (L : in out Lexer; T : Token) is + begin + L.Tokens.Append (T); + end Add_Token; + + procedure Lex (L : in out Lexer) is + Tok : Token; + begin + + while true loop + Tok := Next_Token (L); + Add_Token (L, Tok); + exit when Tok.Kind = EOF; end loop; - end; + -- Put_Line (Natural'Image (Length (L.Source))); + + -- while I <= SRC'Last loop + -- if I < SRC'Last then + -- I2 := I + 1; + -- else + -- I2 := I; + -- end if; + + -- C := SRC (I); + + -- if Is_Space (C) or Is_Line_Terminator (C) then + -- if C = ASCII.LF then + -- Put_Line ("found return at line:" & I'Image); + -- end if; + -- end if; + -- if Is_Letter (C) then + + -- null; + -- end if; + + -- I := I + 1; + -- end loop; + + end Lex; end Lexer; diff --git a/src/lexer.ads b/src/lexer.ads @@ -2,12 +2,17 @@ with Ada.Containers.Vectors; with Ada.Strings.Unbounded; use Ada.Strings.Unbounded; package Lexer is - - type Token_Type is - (Ident, Float_Literal, Int_Literal, Str_Literal, Bool_Literal, Unknown); + type Token_Kind is + (Ident, + Float_Literal, + Int_Literal, + Str_Literal, + Bool_Literal, + Unknown, + EOF); type Token is record - Kind : Token_Type; + Kind : Token_Kind; Lexeme : Unbounded_String; Line, Col : Positive; end record; @@ -15,10 +20,13 @@ package Lexer is package Token_Vectors is new Ada.Containers.Vectors (Index_Type => Natural, Element_Type => Token); - type Lexer is record - Source : Unbounded_String; - File_Name : Unbounded_String; + type Lexer(Src_Len: Positive; FN_Len: Positive) is record + Source : String(1 .. Src_Len); + File_Name : String(1 .. FN_Len); Tokens : Token_Vectors.Vector; + Pos : Positive; + Line : Positive; + Col : Positive; end record; function Init (File_Name : String; File_Contents : String) return Lexer; diff --git a/src/nightshade.adb b/src/nightshade.adb @@ -18,6 +18,7 @@ begin L : Lexer.Lexer := Lexer.Init (File_Name, File_Contents); begin Lexer.Lex (L); + Put_Line (""); Put_Line ("done"); end; end Nightshade; diff --git a/test.mty b/test.mty @@ -0,0 +1,8 @@ +use io + +// this is a test program + +fx main() :: + print("hello") +end + diff --git a/test.txt b/test.txt @@ -1,6 +0,0 @@ -This is some text to be injected. - -Hello world. I am sam. - -I am not sam, but for the sake of this example, I am Andre. -