diff --git a/test.sl b/program.trv similarity index 97% rename from test.sl rename to program.trv index 6af6c8d..28e349b 100644 --- a/test.sl +++ b/program.trv @@ -5,7 +5,7 @@ func is_greater_than_44(params : Integer) -> Integer { if x > 44 is True then { 1 } - else if not { + else { 0 } } diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs new file mode 100644 index 0000000..3d24724 --- /dev/null +++ b/src/lexer/lexer.rs @@ -0,0 +1,284 @@ +use std::{any::Any, string}; + +use crate::lexer::token::{Token, TokenType}; + + + +#[derive(Debug)] +pub struct Lexer { + input: Vec, + position: usize, + line: usize, + column: usize +} + +impl Lexer { + pub fn new(input: String) -> Self{ + Self { + input:input.chars().collect(), + position: 0, + line: 1, + column: 1, + } + } + pub fn tokenize(&mut self) -> Vec { + let mut tokens: Vec = Vec::new(); + while let Some(ch) = self.current_char() { + match ch { + ' ' | '\t' => { + self.advance(); + } + + '\n' => { + self.advance_line(); + } + 'A'..='Z' | 'a'..='z' | '_' => { + tokens.push(self.read_identifier(ch)); + } + '0' ..= '9' => { + tokens.push(self.read_number(ch)); + } + '=' => { + tokens.push(self.assign_or_equals()); + } + ':' => { + tokens.push(self.simple_token(TokenType::Colon)); + } + '+' => { + tokens.push(self.simple_token(TokenType::Plus)); + } + '-' => { + tokens.push(self.minus_or_arrow()); + } + '*' => { + tokens.push(self.simple_token(TokenType::Multiply)); + } + '{' => { + tokens.push(self.simple_token(TokenType::LeftBrace)); + } + '}' => { + tokens.push(self.simple_token(TokenType::RightBrace)); + } + '(' => { + tokens.push(self.simple_token(TokenType::LeftParen)); + } + ')' => { + tokens.push(self.simple_token(TokenType::RightParen)); + } + '>' =>{ + tokens.push(self.simple_token(TokenType::GreaterThan)); + } + '<' =>{ + tokens.push(self.simple_token(TokenType::LessThan)); + } + ';' =>{ + tokens.push(self.simple_token(TokenType::Semicolon)); + } + '"' => { + tokens.push(self.read_string_literal()); + } + ',' => { + tokens.push(self.simple_token(TokenType::Comma)); + } + '#' => { + self.read_comment(); + } + _ => panic!("Suuuper wrongdog in here, unexpected char '{}' at {}:{}", ch, self.line, self.column), + } + } + tokens + } + fn current_char(&self) -> Option { + self.input.get(self.position).copied() + } + + fn advance(&mut self) { + self.position += 1; + self.column += 1; + } + + fn advance_line(&mut self) { + self.position += 1; + self.line += 1; + self.column = 1; + } + + fn simple_token(&mut self, token_type: TokenType) -> Token { + let start_col_num = self.column; + self.advance(); + Token::new(token_type, self.line, start_col_num) + + } + fn assign_or_equals(&mut self) -> Token{ + let original_col = self.column; + self.advance(); + if self.current_char().unwrap() == '=' { + Token::new(TokenType::Equals, self.line, original_col) + } else { + Token::new(TokenType::Assign, self.line, original_col) + } + } + fn minus_or_arrow(&mut self) -> Token { + let original_col = self.column; + self.advance(); + if self.current_char().unwrap() == '>' { + Token::new(TokenType::Arrow, self.line, original_col) + } else { + Token::new(TokenType::Minus, self.line, original_col) + } + } + fn read_comment(&mut self) { + while let Some(ch) = self.current_char(){ + match ch{ + '\n' => { + break; + } + _ => { + self.advance(); + } + } + } + } + fn read_number(&mut self, first_ch: char) -> Token { + let mut num_string: String = "".to_string(); + let start_col_num :usize= self.column; + num_string.push(first_ch); + self.advance(); + while let Some(ch) = self.current_char() { + match ch { + '0'..='9' =>{ + num_string.push(ch); + self.advance(); + } + _ => { + break; + } + } + } + let num = num_string.parse::().unwrap(); + Token::new(TokenType::IntegerLiteral(num), self.line, start_col_num) + } + + fn read_string_literal(&mut self) -> Token{ + let mut the_litteral: String = "".to_string(); + let start_col_num: usize = self.column; + the_litteral.push('"'); + self.advance(); + while let Some(ch) = self.current_char(){ + match ch { + '"' => { + the_litteral.push(ch); + self.advance(); + break; + } + _ => { + the_litteral.push(ch); + self.advance(); + } + } + } + Token::new(TokenType::StringLiteral(the_litteral), self.line, start_col_num) + } + + fn read_identifier(&mut self, first_ch: char) -> Token { + let mut name:String = "".to_string(); + let start_col_num: usize = self.column; + name.push(first_ch); + self.advance(); + while let Some(ch) = self.current_char() { + match ch { + 'A'..='Z' | 'a'..='z' | '_' => { + name.push(ch); + self.advance(); + } + _ => { + break; + } + } + } + self.give_keyword_or_literal_token(name.as_mut_str(), self.line, start_col_num) + } + fn give_keyword_or_literal_token(&mut self, name: &str, line: usize, col: usize) -> Token{ + match name { + "let" => Token::new(TokenType::Let, line, col), + "func" => Token::new(TokenType::Func, line, col), + "if" => Token::new(TokenType::If, line, col), + "then" => Token::new(TokenType::Then, line, col), + "else" => Token::new(TokenType::Else, line, col), + "not" => Token::new(TokenType::Not, line, col), + "while" => Token::new(TokenType::While, line, col), + "print" => Token::new(TokenType::Print, line, col), + "do" => Token::new(TokenType::Do, line, col), + "is" => Token::new(TokenType::Is, line, col), + "Integer"=> Token::new(TokenType::Integer, line, col), + "Boolean"=> Token::new(TokenType::Boolean, line, col), + "True" => Token::new(TokenType::True, line, col), + "False" => Token::new(TokenType::False, line, col), + "Eof" => Token::new(TokenType::Eof, line, col), + _ => Token::new(TokenType::Identifier(name.to_string()), line, col), + } + } +} +impl PartialEq for Lexer { + fn eq(&self, other: &Self) -> bool { + self.input == other.input && + self.position == other.position && + self.column == other.column && + self.line == other.line + } +} +mod tests{ + use crate::lexer::{lexer::Lexer, token::{Token, TokenType}}; + #[test] + fn new_creates_lexer_correctly(){ + let actual = Lexer::new("a = 2".to_string()); + + let expected = Lexer { + input: vec!['a', ' ', '=', ' ', '2'], + position: 0, + line: 1, + column: 1, + }; + + assert_eq!(actual, expected); + } + #[test] + fn tokenize_works_as_intended(){ + let mut lex: Lexer = Lexer::new("abc_def = 2".to_string()); + let actual_token_vec: Vec = lex.tokenize(); + + let expected: Vec = vec![ + Token::new(TokenType::Identifier("abc_def".to_string()), 1, 1), + Token::new(TokenType::Assign, 1, 9), + Token::new(TokenType::IntegerLiteral(2), 1, 11), + ]; + + assert_eq!(actual_token_vec, expected); + } + #[test] + fn reading_comments_tokenize_lexer_line_col_are_correct(){ + let mut lex: Lexer = Lexer::new("#abc_def = 2\n".to_string()); + lex.tokenize(); + assert_eq!((lex.line, lex.column), (2,1)); + } + #[test] + fn reading_comments_tokenize_returns_empty_vector(){ + let mut lex: Lexer = Lexer::new("#abc_def = 2\n".to_string()); + let actual_token_vec: Vec = lex.tokenize(); + + let expected: Vec = vec![]; + + assert_eq!(actual_token_vec, expected); + } + #[test] + fn read_string_literal_makes_correct_token(){ + let mut lex: Lexer = Lexer::new("\"test\"".to_string()); + let actual_token_vec: Vec = lex.tokenize(); + + let expected: Vec = vec![ + Token::new(TokenType::StringLiteral("\"test\"".to_string()), 1, 1) + ]; + + assert_eq!(actual_token_vec, expected); + } +} \ No newline at end of file diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 0000000..325f721 --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1,2 @@ +pub mod token; +pub mod lexer; \ No newline at end of file diff --git a/src/lexer/token.rs b/src/lexer/token.rs new file mode 100644 index 0000000..d3c7e0a --- /dev/null +++ b/src/lexer/token.rs @@ -0,0 +1,73 @@ +#[derive(Debug, Clone, PartialEq)] +pub enum TokenType { + // Keywords + Func, + Let, + If, + Then, + Else, + Not, + While, + Print, + Do, + Is, + + // Types + Integer, + Boolean, + + // Literals + True, + False, + IntegerLiteral(i64), + StringLiteral(String), + + // Identifiers + Identifier(String), + + // Operators + Colon, // : + Arrow, // -> + Assign, // = + GreaterThan, // > + LessThan, // < + Equals, // == + Plus, // + + Minus, // - + Multiply, // * + + // Punctuation + LeftParen, // ( + RightParen, // ) + LeftBrace, // { + RightBrace, // } + Comma, // , + Semicolon, // ; + + // Special + Eof, +} + +#[derive(Debug, Clone)] +pub struct Token { + pub token_type: TokenType, + pub line: usize, + pub column: usize, +} + +impl Token { + pub fn new(token_type: TokenType, line: usize, column: usize) -> Self { + Token { + token_type, + line, + column, + } + } +} +impl PartialEq for Token { + fn eq(&self, other: &Self) -> bool { + self.token_type == other.token_type && + self.line == other.line && + self.column == other.column + } +} diff --git a/src/main.rs b/src/main.rs index c97fc64..ef90e80 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,27 @@ +mod lexer; + +use std::env; +use std::fs; + +use lexer::lexer::Lexer; // adjust if needed + fn main() { - println!("Hello, world!"); + let args: Vec = env::args().collect(); + + if args.len() < 3 { + eprintln!("Usage: triviC "); + std::process::exit(1); + } + + let filename = &args[2]; + + let source = fs::read_to_string(filename) + .expect("Failed to read file"); + + let mut lexer = Lexer::new(source); + let _tokens = lexer.tokenize(); + + println!("Lexing completed successfully."); } #[cfg(test)] diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs new file mode 100644 index 0000000..3533058 --- /dev/null +++ b/src/semantic/mod.rs @@ -0,0 +1,2 @@ +pub mod Type; +pub mod SymbolTable; \ No newline at end of file diff --git a/src/semantic/symbol_table.rs b/src/semantic/symbol_table.rs new file mode 100644 index 0000000..815f864 --- /dev/null +++ b/src/semantic/symbol_table.rs @@ -0,0 +1,11 @@ +use std::collections::HashMap; +pub struct Symbol { + symbol_type: Type, + scope_level: usize, + //perchance we need to add some more info, for functions (return types, param names) +} + +pub struct SymbolTable { + scopes: Vec>, + //key = name i figured, so no "name" property in Symbol struct +} \ No newline at end of file diff --git a/src/semantic/types.rs b/src/semantic/types.rs new file mode 100644 index 0000000..da7232b --- /dev/null +++ b/src/semantic/types.rs @@ -0,0 +1,5 @@ +#[derive(Debug, Clone, PartialEq)] +pub enum Type { + Integer, + Boolean, +}