From 22caf48f1a5c2ff3e7018a16967b6ce0c8a3d330 Mon Sep 17 00:00:00 2001 From: Aasmundur Date: Fri, 13 Feb 2026 10:07:01 +0100 Subject: [PATCH 1/9] made tokenType enum and Token struct, together with a bit of setup --- src/lexer/mod.rs | 1 + src/lexer/token.rs | 63 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 src/lexer/mod.rs create mode 100644 src/lexer/token.rs diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 0000000..2108f68 --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1 @@ +pub mod token; \ No newline at end of file diff --git a/src/lexer/token.rs b/src/lexer/token.rs new file mode 100644 index 0000000..d33a495 --- /dev/null +++ b/src/lexer/token.rs @@ -0,0 +1,63 @@ +#[derive(Debug, Clone, PartialEq)] +pub enum TokenType { + // Keywords + Func, + Let, + If, + Then, + Else, + Not, + While, + Print, + Do, + Is, + + // Types + Integer, + Boolean, + + // Literals + True, + False, + IntegerLiteral(i64), + StringLiteral(String), + + // Identifiers + Identifier(String), + + // Operators + Colon, // : + Arrow, // -> + Assign, // = + GreaterThan, // > + Equals, // == + Plus, // + + + // Punctuation + LeftParen, // ( + RightParen, // ) + LeftBrace, // { + RightBrace, // } + Comma, // , + Semicolon, // ; + + // Special + Eof, +} + +#[derive(Debug, Clone)] +pub struct Token { + pub token_type: TokenType, + pub line: usize, + pub column: usize, +} + +impl Token { + pub fn new(token_type: TokenType, line: usize, column: usize) -> Self { + Token { + token_type, + line, + column, + } + } +} From 4f53a7d41ad2a4dd27f88edf42ec084799f78c75 Mon Sep 17 00:00:00 2001 From: Aasmundur Date: Fri, 13 Feb 2026 11:05:07 +0100 Subject: [PATCH 2/9] added some file structure, symbol, symbol table, types struct --- src/lexer/tokenizer.rs | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 src/lexer/tokenizer.rs diff --git a/src/lexer/tokenizer.rs b/src/lexer/tokenizer.rs new file mode 100644 index 0000000..e723f26 --- /dev/null +++ b/src/lexer/tokenizer.rs @@ -0,0 +1,5 @@ +mod token; + +pub fn tokenizer () -> std::vec { + +} \ No newline at end of file From 71183a734061e8306b9a677bec6d4b7c6dea183d Mon Sep 17 00:00:00 2001 From: Aasmundur Date: Sun, 15 Feb 2026 16:20:12 +0100 Subject: [PATCH 3/9] started making lexer struct implementation, made test for new() function --- src/lexer/lexer.rs | 45 ++++++++++++++++++++++++++++++++++++ src/lexer/mod.rs | 3 ++- src/lexer/tokenizer.rs | 5 ---- src/main.rs | 6 ++++- src/semantic/mod.rs | 2 ++ src/semantic/symbol_table.rs | 11 +++++++++ src/semantic/types.rs | 0 7 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 src/lexer/lexer.rs delete mode 100644 src/lexer/tokenizer.rs create mode 100644 src/semantic/mod.rs create mode 100644 src/semantic/symbol_table.rs create mode 100644 src/semantic/types.rs diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs new file mode 100644 index 0000000..6d9f44c --- /dev/null +++ b/src/lexer/lexer.rs @@ -0,0 +1,45 @@ + + +#[derive(Debug)] +pub struct Lexer { + input: Vec, + position: usize, + line: usize, + column: usize +} + +impl Lexer { + pub fn new(input: String) -> Self{ + Self { + input:input.chars().collect(), + position: 0, + line: 1, + column: 1, + } + } + +} +impl PartialEq for Lexer { + fn eq(&self, other: &Self) -> bool { + self.input == other.input && + self.position == other.position && + self.column == other.column && + self.line == other.line + } +} +mod tests{ + use crate::lexer::lexer::Lexer; + #[test] + fn new_creates_lexer_correctly(){ + let actual = Lexer::new("a = 2".to_string()); + + let expected = Lexer { + input: vec!['a', ' ', '=', ' ', '2'], + position: 0, + line: 1, + column: 1, + }; + + assert_eq!(actual, expected); + } +} \ No newline at end of file diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 2108f68..325f721 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -1 +1,2 @@ -pub mod token; \ No newline at end of file +pub mod token; +pub mod lexer; \ No newline at end of file diff --git a/src/lexer/tokenizer.rs b/src/lexer/tokenizer.rs deleted file mode 100644 index e723f26..0000000 --- a/src/lexer/tokenizer.rs +++ /dev/null @@ -1,5 +0,0 @@ -mod token; - -pub fn tokenizer () -> std::vec { - -} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index c97fc64..7cf8432 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,14 @@ +mod lexer; + fn main() { - println!("Hello, world!"); + } #[cfg(test)] mod tests{ use pretty_assertions::{assert_eq}; + + use crate::lexer::lexer::Lexer; #[test] fn zero_eq_zero(){ assert_eq!(0,0); diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs new file mode 100644 index 0000000..3533058 --- /dev/null +++ b/src/semantic/mod.rs @@ -0,0 +1,2 @@ +pub mod Type; +pub mod SymbolTable; \ No newline at end of file diff --git a/src/semantic/symbol_table.rs b/src/semantic/symbol_table.rs new file mode 100644 index 0000000..815f864 --- /dev/null +++ b/src/semantic/symbol_table.rs @@ -0,0 +1,11 @@ +use std::collections::HashMap; +pub struct Symbol { + symbol_type: Type, + scope_level: usize, + //perchance we need to add some more info, for functions (return types, param names) +} + +pub struct SymbolTable { + scopes: Vec>, + //key = name i figured, so no "name" property in Symbol struct +} \ No newline at end of file diff --git a/src/semantic/types.rs b/src/semantic/types.rs new file mode 100644 index 0000000..e69de29 From 545446fc0fb8a45018f3e6bfb02517d5b0d0f961 Mon Sep 17 00:00:00 2001 From: Aasmundur Date: Sun, 15 Feb 2026 16:49:48 +0100 Subject: [PATCH 4/9] started the dang tokenize function, dis shit is harder than i thought --- src/lexer/lexer.rs | 56 +++++++++++++++++++++++++++++++++++++++++++ src/lexer/token.rs | 2 ++ src/semantic/types.rs | 5 ++++ 3 files changed, 63 insertions(+) diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index 6d9f44c..8d99179 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -1,3 +1,5 @@ +use crate::lexer::token::{Token, TokenType}; + #[derive(Debug)] @@ -17,6 +19,60 @@ impl Lexer { column: 1, } } + pub fn tokenize(&mut self) -> Vec { + let mut tokens: Vec = Vec::new(); + while let Some(ch) = self.current_char() { + match ch { + ' ' | '\t' => { + self.advance(); + } + + '\n' => { + self.advance_line(); + } + '=' => { + self.simple_token(TokenType::Assign); + } + '+' => { + self.simple_token(TokenType::Plus); + } + '-' => { + self.simple_token(TokenType::Minus); + } + '*' => { + self.simple_token(TokenType::Multiply); + } + _ => panic!("Suuuper wrongdog in here, unexpected char '{}' at {}:{}", ch, self.line, self.column), + } + } + tokens + } + fn current_char(&self) -> Option { + self.input.get(self.position).copied() + } + + fn advance(&mut self) { + self.position += 1; + self.column += 1; + } + + fn advance_line(&mut self) { + self.position += 1; + self.line += 1; + self.column = 1; + } + + fn simple_token(&self, token_type: TokenType) -> Token { + Token::new(token_type, self.line, self.column) + } + + fn read_number(&mut self) -> Token { + // TODO: + } + + fn read_identifier(&mut self) -> Token { + // TODO: + } } impl PartialEq for Lexer { diff --git a/src/lexer/token.rs b/src/lexer/token.rs index d33a495..5a965b2 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -32,6 +32,8 @@ pub enum TokenType { GreaterThan, // > Equals, // == Plus, // + + Minus, // - + Multiply, // * // Punctuation LeftParen, // ( diff --git a/src/semantic/types.rs b/src/semantic/types.rs index e69de29..da7232b 100644 --- a/src/semantic/types.rs +++ b/src/semantic/types.rs @@ -0,0 +1,5 @@ +#[derive(Debug, Clone, PartialEq)] +pub enum Type { + Integer, + Boolean, +} From ba6e568d5460966faa6a775ef0168003289a9ff8 Mon Sep 17 00:00:00 2001 From: Aasmundur Date: Sun, 15 Feb 2026 17:10:34 +0100 Subject: [PATCH 5/9] added a test that needs to be fixed :) --- src/lexer/lexer.rs | 33 ++++++++++++++++++++++++++++++--- src/lexer/token.rs | 2 +- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index 8d99179..aa055dc 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -42,6 +42,18 @@ impl Lexer { '*' => { self.simple_token(TokenType::Multiply); } + '{' => { + self.simple_token(TokenType::LeftBrace); + } + '}' => { + self.simple_token(TokenType::RightBrace); + } + '(' => { + self.simple_token(TokenType::LeftParen); + } + ')' => { + self.simple_token(TokenType::RightBrace); + } _ => panic!("Suuuper wrongdog in here, unexpected char '{}' at {}:{}", ch, self.line, self.column), } } @@ -67,11 +79,13 @@ impl Lexer { } fn read_number(&mut self) -> Token { - // TODO: + // TODO: implement this shi + Token::new(TokenType::Integer, self.line, self.column) } fn read_identifier(&mut self) -> Token { - // TODO: + // TODO: implement this shi + Token::new(TokenType::Integer, self.line, self.column) } } @@ -84,7 +98,7 @@ impl PartialEq for Lexer { } } mod tests{ - use crate::lexer::lexer::Lexer; + use crate::lexer::{lexer::Lexer, token::{Token, TokenType}}; #[test] fn new_creates_lexer_correctly(){ let actual = Lexer::new("a = 2".to_string()); @@ -98,4 +112,17 @@ mod tests{ assert_eq!(actual, expected); } + + fn tokenize_works_as_intended(){ + let lex: &mut Lexer = Lexer::new("a = 2".to_string()); //wrongdog fix later + let actualTokenVec: Vec = Lexer::tokenize(lex); + + let expected: Vec = vec![ + Token::new(TokenType::Identifier("a".to_string()), 1, 1), //idk if true, check later + Token::new(TokenType::Assign, 1, 3), + Token::new(TokenType::Integer, 1, 5), + ]; + + assert_eq!(actualTokenVec, expected);// idk man i tried, does not work + } } \ No newline at end of file diff --git a/src/lexer/token.rs b/src/lexer/token.rs index 5a965b2..11fa721 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -47,7 +47,7 @@ pub enum TokenType { Eof, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct Token { pub token_type: TokenType, pub line: usize, From 75155be1cb285388d71c65ef46d5143e81e09590 Mon Sep 17 00:00:00 2001 From: Aasmundur Date: Tue, 17 Feb 2026 11:17:37 +0100 Subject: [PATCH 6/9] made read_identifyer function --- src/lexer/lexer.rs | 39 +++++++++++++++++++++++++++++---------- src/lexer/token.rs | 2 +- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index aa055dc..920e2dd 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -1,3 +1,5 @@ +use std::{any::Any, string}; + use crate::lexer::token::{Token, TokenType}; @@ -30,6 +32,9 @@ impl Lexer { '\n' => { self.advance_line(); } + 'A'..='Z' | 'a'..='z' => { + self.read_identifier(ch); + } '=' => { self.simple_token(TokenType::Assign); } @@ -78,14 +83,28 @@ impl Lexer { Token::new(token_type, self.line, self.column) } - fn read_number(&mut self) -> Token { + fn read_number(&mut self, first_ch: char) -> Token { // TODO: implement this shi - Token::new(TokenType::Integer, self.line, self.column) + Token::new(TokenType::Integer(num), self.line, self.column) } - fn read_identifier(&mut self) -> Token { - // TODO: implement this shi - Token::new(TokenType::Integer, self.line, self.column) + fn read_identifier(&mut self, first_ch: char) -> Token { + let mut name:String = "".to_string(); + let startColumnNum: usize = self.column; + name.push(first_ch); + self.advance(); + while let Some(ch) = self.current_char() { + match ch { + 'A'..='Z' | 'a'..='z' => { + name.push(ch); + self.advance(); + } + _ => { + break; + } + } + } + Token::new(TokenType::Identifier(name), self.line, startColumnNum) } } @@ -112,17 +131,17 @@ mod tests{ assert_eq!(actual, expected); } - + #[test] fn tokenize_works_as_intended(){ - let lex: &mut Lexer = Lexer::new("a = 2".to_string()); //wrongdog fix later - let actualTokenVec: Vec = Lexer::tokenize(lex); + let mut lex: Lexer = Lexer::new("a = 2".to_string()); //wrongdog fix later + let actual_token_vec: Vec = lex.tokenize(); let expected: Vec = vec![ Token::new(TokenType::Identifier("a".to_string()), 1, 1), //idk if true, check later Token::new(TokenType::Assign, 1, 3), - Token::new(TokenType::Integer, 1, 5), + Token::new(TokenType::Integer(2), 1, 5), ]; - assert_eq!(actualTokenVec, expected);// idk man i tried, does not work + assert_eq!(actual_token_vec, expected);// idk man i tried, does not work } } \ No newline at end of file diff --git a/src/lexer/token.rs b/src/lexer/token.rs index 11fa721..41ce3bc 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -13,7 +13,7 @@ pub enum TokenType { Is, // Types - Integer, + Integer(i64), Boolean, // Literals From 82c85d6aab8e2191e5038cc4213ae2f2f0c6d1c6 Mon Sep 17 00:00:00 2001 From: Aasmundur Date: Tue, 17 Feb 2026 11:28:46 +0100 Subject: [PATCH 7/9] made read_number function --- src/lexer/lexer.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index 920e2dd..712f5e0 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -85,12 +85,28 @@ impl Lexer { fn read_number(&mut self, first_ch: char) -> Token { // TODO: implement this shi + let mut num_string: String = "".to_string(); + let start_col_num :usize= self.column; + num_string.push(first_ch); + self.advance(); + while let Some(ch) = self.current_char() { + match ch { + '0'..='9' =>{ + num_string.push(ch); + self.advance(); + } + _ => { + break; + } + } + } + let num = num_string.parse::().unwrap(); Token::new(TokenType::Integer(num), self.line, self.column) } fn read_identifier(&mut self, first_ch: char) -> Token { let mut name:String = "".to_string(); - let startColumnNum: usize = self.column; + let start_col_num: usize = self.column; name.push(first_ch); self.advance(); while let Some(ch) = self.current_char() { @@ -104,7 +120,7 @@ impl Lexer { } } } - Token::new(TokenType::Identifier(name), self.line, startColumnNum) + Token::new(TokenType::Identifier(name), self.line, start_col_num) } } From 2dda38194c3cb57414dc2eec476f8c0416819b07 Mon Sep 17 00:00:00 2001 From: Aasmundur Date: Tue, 17 Feb 2026 12:52:56 +0100 Subject: [PATCH 8/9] made the final helper functions for string litterals and comments, should have bool also tho, so i need to make this as well --- test.sl => program.trv | 0 src/lexer/lexer.rs | 123 ++++++++++++++++++++++++++++++++++------- src/lexer/token.rs | 10 +++- src/main.rs | 24 +++++++- 4 files changed, 132 insertions(+), 25 deletions(-) rename test.sl => program.trv (100%) diff --git a/test.sl b/program.trv similarity index 100% rename from test.sl rename to program.trv diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index 712f5e0..b5dd6b0 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -32,32 +32,53 @@ impl Lexer { '\n' => { self.advance_line(); } - 'A'..='Z' | 'a'..='z' => { - self.read_identifier(ch); + 'A'..='Z' | 'a'..='z' | '_' => { + tokens.push(self.read_identifier(ch)); + } + '0' ..= '9' => { + tokens.push(self.read_number(ch)); } '=' => { - self.simple_token(TokenType::Assign); + tokens.push(self.simple_token(TokenType::Assign)); + } + ':' => { + tokens.push(self.simple_token(TokenType::Colon)); } '+' => { - self.simple_token(TokenType::Plus); + tokens.push(self.simple_token(TokenType::Plus)); } '-' => { - self.simple_token(TokenType::Minus); + tokens.push(self.simple_token(TokenType::Minus)); } '*' => { - self.simple_token(TokenType::Multiply); + tokens.push(self.simple_token(TokenType::Multiply)); } '{' => { - self.simple_token(TokenType::LeftBrace); + tokens.push(self.simple_token(TokenType::LeftBrace)); } '}' => { - self.simple_token(TokenType::RightBrace); + tokens.push(self.simple_token(TokenType::RightBrace)); } '(' => { - self.simple_token(TokenType::LeftParen); + tokens.push(self.simple_token(TokenType::LeftParen)); } ')' => { - self.simple_token(TokenType::RightBrace); + tokens.push(self.simple_token(TokenType::RightParen)); + } + '>' =>{ + tokens.push(self.simple_token(TokenType::GreaterThan)); + } + '<' =>{ + tokens.push(self.simple_token(TokenType::LessThan)); + } + ';' =>{ + tokens.push(self.simple_token(TokenType::Semicolon)); + } + '"' => { + tokens.push(self.read_string_literal()); + } + '#' => { + self.read_comment(); } _ => panic!("Suuuper wrongdog in here, unexpected char '{}' at {}:{}", ch, self.line, self.column), } @@ -79,12 +100,25 @@ impl Lexer { self.column = 1; } - fn simple_token(&self, token_type: TokenType) -> Token { - Token::new(token_type, self.line, self.column) + fn simple_token(&mut self, token_type: TokenType) -> Token { + let start_col_num = self.column; + self.advance(); + Token::new(token_type, self.line, start_col_num) + + } + fn read_comment(&mut self) { + while let Some(ch) = self.current_char(){ + match ch{ + '\n' => { + break; + } + _ => { + self.advance(); + } + } + } } - fn read_number(&mut self, first_ch: char) -> Token { - // TODO: implement this shi let mut num_string: String = "".to_string(); let start_col_num :usize= self.column; num_string.push(first_ch); @@ -101,7 +135,28 @@ impl Lexer { } } let num = num_string.parse::().unwrap(); - Token::new(TokenType::Integer(num), self.line, self.column) + Token::new(TokenType::Integer(num), self.line, start_col_num) + } + + fn read_string_literal(&mut self) -> Token{ + let mut the_litteral: String = "".to_string(); + let start_col_num: usize = self.column; + the_litteral.push('"'); + self.advance(); + while let Some(ch) = self.current_char(){ + match ch { + '"' => { + the_litteral.push(ch); + self.advance(); + break; + } + _ => { + the_litteral.push(ch); + self.advance(); + } + } + } + Token::new(TokenType::StringLiteral(the_litteral), self.line, start_col_num) } fn read_identifier(&mut self, first_ch: char) -> Token { @@ -111,7 +166,7 @@ impl Lexer { self.advance(); while let Some(ch) = self.current_char() { match ch { - 'A'..='Z' | 'a'..='z' => { + 'A'..='Z' | 'a'..='z' | '_' => { name.push(ch); self.advance(); } @@ -149,15 +204,41 @@ mod tests{ } #[test] fn tokenize_works_as_intended(){ - let mut lex: Lexer = Lexer::new("a = 2".to_string()); //wrongdog fix later + let mut lex: Lexer = Lexer::new("abc_def = 2".to_string()); + let actual_token_vec: Vec = lex.tokenize(); + + let expected: Vec = vec![ + Token::new(TokenType::Identifier("abc_def".to_string()), 1, 1), + Token::new(TokenType::Assign, 1, 9), + Token::new(TokenType::Integer(2), 1, 11), + ]; + + assert_eq!(actual_token_vec, expected); + } + #[test] + fn reading_comments_tokenize_lexer_line_col_are_correct(){ + let mut lex: Lexer = Lexer::new("#abc_def = 2\n".to_string()); + lex.tokenize(); + assert_eq!((lex.line, lex.column), (2,1)); + } + #[test] + fn reading_comments_tokenize_returns_empty_vector(){ + let mut lex: Lexer = Lexer::new("#abc_def = 2\n".to_string()); + let actual_token_vec: Vec = lex.tokenize(); + + let expected: Vec = vec![]; + + assert_eq!(actual_token_vec, expected); + } + #[test] + fn read_string_literal_makes_correct_token(){ + let mut lex: Lexer = Lexer::new("\"test\"".to_string()); let actual_token_vec: Vec = lex.tokenize(); let expected: Vec = vec![ - Token::new(TokenType::Identifier("a".to_string()), 1, 1), //idk if true, check later - Token::new(TokenType::Assign, 1, 3), - Token::new(TokenType::Integer(2), 1, 5), + Token::new(TokenType::StringLiteral("\"test\"".to_string()), 1, 1) ]; - assert_eq!(actual_token_vec, expected);// idk man i tried, does not work + assert_eq!(actual_token_vec, expected); } } \ No newline at end of file diff --git a/src/lexer/token.rs b/src/lexer/token.rs index 41ce3bc..76f5a3b 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -30,6 +30,7 @@ pub enum TokenType { Arrow, // -> Assign, // = GreaterThan, // > + LessThan, // < Equals, // == Plus, // + Minus, // - @@ -47,7 +48,7 @@ pub enum TokenType { Eof, } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone)] pub struct Token { pub token_type: TokenType, pub line: usize, @@ -63,3 +64,10 @@ impl Token { } } } +impl PartialEq for Token { + fn eq(&self, other: &Self) -> bool { + self.token_type == other.token_type && + self.line == other.line && + self.column == other.column + } +} diff --git a/src/main.rs b/src/main.rs index 7cf8432..ef90e80 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,14 +1,32 @@ mod lexer; +use std::env; +use std::fs; + +use lexer::lexer::Lexer; // adjust if needed + fn main() { - + let args: Vec = env::args().collect(); + + if args.len() < 3 { + eprintln!("Usage: triviC "); + std::process::exit(1); + } + + let filename = &args[2]; + + let source = fs::read_to_string(filename) + .expect("Failed to read file"); + + let mut lexer = Lexer::new(source); + let _tokens = lexer.tokenize(); + + println!("Lexing completed successfully."); } #[cfg(test)] mod tests{ use pretty_assertions::{assert_eq}; - - use crate::lexer::lexer::Lexer; #[test] fn zero_eq_zero(){ assert_eq!(0,0); From 4c14f497a457d8c2c8c4db3d5dd4cc8501a5c1e0 Mon Sep 17 00:00:00 2001 From: Aasmundur Date: Tue, 17 Feb 2026 15:10:21 +0100 Subject: [PATCH 9/9] all but the initialization of symboltable is done with the lexer --- program.trv | 2 +- src/lexer/lexer.rs | 52 ++++++++++++++++++++++++++++++++++++++++------ src/lexer/token.rs | 2 +- 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/program.trv b/program.trv index 6af6c8d..28e349b 100644 --- a/program.trv +++ b/program.trv @@ -5,7 +5,7 @@ func is_greater_than_44(params : Integer) -> Integer { if x > 44 is True then { 1 } - else if not { + else { 0 } } diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index b5dd6b0..3d24724 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -39,7 +39,7 @@ impl Lexer { tokens.push(self.read_number(ch)); } '=' => { - tokens.push(self.simple_token(TokenType::Assign)); + tokens.push(self.assign_or_equals()); } ':' => { tokens.push(self.simple_token(TokenType::Colon)); @@ -48,7 +48,7 @@ impl Lexer { tokens.push(self.simple_token(TokenType::Plus)); } '-' => { - tokens.push(self.simple_token(TokenType::Minus)); + tokens.push(self.minus_or_arrow()); } '*' => { tokens.push(self.simple_token(TokenType::Multiply)); @@ -77,6 +77,9 @@ impl Lexer { '"' => { tokens.push(self.read_string_literal()); } + ',' => { + tokens.push(self.simple_token(TokenType::Comma)); + } '#' => { self.read_comment(); } @@ -106,6 +109,24 @@ impl Lexer { Token::new(token_type, self.line, start_col_num) } + fn assign_or_equals(&mut self) -> Token{ + let original_col = self.column; + self.advance(); + if self.current_char().unwrap() == '=' { + Token::new(TokenType::Equals, self.line, original_col) + } else { + Token::new(TokenType::Assign, self.line, original_col) + } + } + fn minus_or_arrow(&mut self) -> Token { + let original_col = self.column; + self.advance(); + if self.current_char().unwrap() == '>' { + Token::new(TokenType::Arrow, self.line, original_col) + } else { + Token::new(TokenType::Minus, self.line, original_col) + } + } fn read_comment(&mut self) { while let Some(ch) = self.current_char(){ match ch{ @@ -135,7 +156,7 @@ impl Lexer { } } let num = num_string.parse::().unwrap(); - Token::new(TokenType::Integer(num), self.line, start_col_num) + Token::new(TokenType::IntegerLiteral(num), self.line, start_col_num) } fn read_string_literal(&mut self) -> Token{ @@ -175,9 +196,28 @@ impl Lexer { } } } - Token::new(TokenType::Identifier(name), self.line, start_col_num) + self.give_keyword_or_literal_token(name.as_mut_str(), self.line, start_col_num) + } + fn give_keyword_or_literal_token(&mut self, name: &str, line: usize, col: usize) -> Token{ + match name { + "let" => Token::new(TokenType::Let, line, col), + "func" => Token::new(TokenType::Func, line, col), + "if" => Token::new(TokenType::If, line, col), + "then" => Token::new(TokenType::Then, line, col), + "else" => Token::new(TokenType::Else, line, col), + "not" => Token::new(TokenType::Not, line, col), + "while" => Token::new(TokenType::While, line, col), + "print" => Token::new(TokenType::Print, line, col), + "do" => Token::new(TokenType::Do, line, col), + "is" => Token::new(TokenType::Is, line, col), + "Integer"=> Token::new(TokenType::Integer, line, col), + "Boolean"=> Token::new(TokenType::Boolean, line, col), + "True" => Token::new(TokenType::True, line, col), + "False" => Token::new(TokenType::False, line, col), + "Eof" => Token::new(TokenType::Eof, line, col), + _ => Token::new(TokenType::Identifier(name.to_string()), line, col), + } } - } impl PartialEq for Lexer { fn eq(&self, other: &Self) -> bool { @@ -210,7 +250,7 @@ mod tests{ let expected: Vec = vec![ Token::new(TokenType::Identifier("abc_def".to_string()), 1, 1), Token::new(TokenType::Assign, 1, 9), - Token::new(TokenType::Integer(2), 1, 11), + Token::new(TokenType::IntegerLiteral(2), 1, 11), ]; assert_eq!(actual_token_vec, expected); diff --git a/src/lexer/token.rs b/src/lexer/token.rs index 76f5a3b..d3c7e0a 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -13,7 +13,7 @@ pub enum TokenType { Is, // Types - Integer(i64), + Integer, Boolean, // Literals