diff options
author | RaindropsSys <raindrops@equestria.dev> | 2024-04-06 23:33:53 +0200 |
---|---|---|
committer | RaindropsSys <raindrops@equestria.dev> | 2024-04-06 23:33:53 +0200 |
commit | 3be57519a819291ef77087116e21ae9e167434dc (patch) | |
tree | be304879b661b253d126894eeb06082b40c0a9d3 /axisc | |
parent | 245d3a35dfc47cc723022c1fbfdae62e40547127 (diff) | |
download | axis-3be57519a819291ef77087116e21ae9e167434dc.tar.gz axis-3be57519a819291ef77087116e21ae9e167434dc.tar.bz2 axis-3be57519a819291ef77087116e21ae9e167434dc.zip |
Initial error handling and fully featured lexerv0.0.3
Diffstat (limited to 'axisc')
-rw-r--r-- | axisc/Cargo.toml | 2 | ||||
-rw-r--r-- | axisc/src/colors.rs | 42 | ||||
-rw-r--r-- | axisc/src/error.rs | 79 | ||||
-rw-r--r-- | axisc/src/lexer.rs | 134 | ||||
-rw-r--r-- | axisc/src/main.rs | 100 | ||||
-rw-r--r-- | axisc/src/reader.rs | 61 |
6 files changed, 356 insertions, 62 deletions
diff --git a/axisc/Cargo.toml b/axisc/Cargo.toml index 63583de..80525d7 100644 --- a/axisc/Cargo.toml +++ b/axisc/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "axisc" -version = "0.0.2" +version = "0.0.3" edition = "2021" description = "A compiler for the Axis programming language." authors = ["Raindrops", "ryze312"] diff --git a/axisc/src/colors.rs b/axisc/src/colors.rs new file mode 100644 index 0000000..866e367 --- /dev/null +++ b/axisc/src/colors.rs @@ -0,0 +1,42 @@ +pub const _COLOR_FG_BLACK: &str = "\x1b[30m"; +pub const _COLOR_FG_RED: &str = "\x1b[31m"; +pub const _COLOR_FG_GREEN: &str = "\x1b[32m"; +pub const _COLOR_FG_YELLOW: &str = "\x1b[33m"; +pub const _COLOR_FG_BLUE: &str = "\x1b[34m"; +pub const _COLOR_FG_MAGENTA: &str = "\x1b[35m"; +pub const _COLOR_FG_CYAN: &str = "\x1b[36m"; +pub const _COLOR_FG_GRAY_BRIGHT: &str = "\x1b[37m"; +pub const _COLOR_FG_GRAY: &str = "\x1b[90m"; +pub const _COLOR_FG_RED_BRIGHT: &str = "\x1b[91m"; +pub const _COLOR_FG_GREEN_BRIGHT: &str = "\x1b[92m"; +pub const _COLOR_FG_YELLOW_BRIGHT: &str = "\x1b[93m"; +pub const _COLOR_FG_BLUE_BRIGHT: &str = "\x1b[94m"; +pub const _COLOR_FG_MAGENTA_BRIGHT: &str = "\x1b[95m"; +pub const _COLOR_FG_CYAN_BRIGHT: &str = "\x1b[96m"; +pub const _COLOR_FG_WHITE: &str = "\x1b[97m"; + +pub const _COLOR_BG_BLACK: &str = "\x1b[40m"; +pub const _COLOR_BG_RED: &str = "\x1b[41m"; +pub const _COLOR_BG_GREEN: &str = "\x1b[42m"; +pub const _COLOR_BG_YELLOW: &str = "\x1b[43m"; +pub const _COLOR_BG_BLUE: &str = "\x1b[44m"; +pub const _COLOR_BG_MAGENTA: &str = "\x1b[45m"; +pub const _COLOR_BG_CYAN: &str = "\x1b[46m"; +pub const _COLOR_BG_GRAY_BRIGHT: &str = "\x1b[47m"; +pub const _COLOR_BG_GRAY: &str = "\x1b[100m"; +pub const _COLOR_BG_RED_BRIGHT: &str = "\x1b[101m"; +pub const _COLOR_BG_GREEN_BRIGHT: &str = "\x1b[102m"; +pub const _COLOR_BG_YELLOW_BRIGHT: &str = "\x1b[103m"; +pub const _COLOR_BG_BLUE_BRIGHT: &str = "\x1b[104m"; +pub const _COLOR_BG_MAGENTA_BRIGHT: &str = "\x1b[105m"; +pub const _COLOR_BG_CYAN_BRIGHT: &str = "\x1b[106m"; +pub const _COLOR_BG_WHITE: &str = "\x1b[107m"; + +pub const _FORMAT_RESET: &str = "\x1b[0m"; +pub const _FORMAT_BOLD: &str = "\x1b[1m"; +pub const _FORMAT_DIM: &str = "\x1b[2m"; +pub const _FORMAT_ITALIC: &str = "\x1b[3m"; +pub const _FORMAT_UNDERLINE: &str = "\x1b[4m"; +pub const _FORMAT_BLINK_SLOW: &str = "\x1b[5m"; +pub const _FORMAT_BLINK_FAST: &str = "\x1b[6m"; +pub const _FORMAT_INVERT: &str = "\x1b[7m"; diff --git a/axisc/src/error.rs b/axisc/src/error.rs new file mode 100644 index 0000000..71c5f34 --- /dev/null +++ b/axisc/src/error.rs @@ -0,0 +1,79 @@ +use std::fmt; +use std::process::exit; +use crate::colors::{_COLOR_FG_BLUE_BRIGHT, _COLOR_FG_RED, _FORMAT_BOLD, _FORMAT_RESET}; +use crate::lexer::TokenPosition; +use crate::reader::TokenScanner; + +pub fn raise_error(token_scanner: &TokenScanner, error: CompileError, base_hint: Option<&str>, hints: Option<ErrorHints>, note: Option<&str>) -> ! { + let hints = hints.unwrap_or_default(); + let mut longest_line = token_scanner.line.to_string().len(); + + for hint in &hints { + let hint_line_length = hint.position.line.to_string().len(); + if hint_line_length > longest_line { + longest_line = hint_line_length; + } + } + + println!("{_FORMAT_BOLD}{_COLOR_FG_RED}error{_FORMAT_RESET}{_FORMAT_BOLD}: [{:?}] {}", error, error); + println!("{}{_FORMAT_BOLD}{_COLOR_FG_BLUE_BRIGHT}--> {}:{}:{}", " ".repeat(longest_line), token_scanner.file_name.display(), token_scanner.line, token_scanner.column); + println!("{}{_FORMAT_BOLD}{_COLOR_FG_BLUE_BRIGHT} |", " ".repeat(longest_line)); + + for hint in &hints { + if hint.position.line < token_scanner.line { + println!("{}{_FORMAT_BOLD}{_COLOR_FG_BLUE_BRIGHT}{} | {_FORMAT_RESET}{}", " ".repeat(longest_line - hint.position.line.to_string().len()), hint.position.line, token_scanner.get_line(hint.position.line)); + println!("{}{_FORMAT_BOLD}{_COLOR_FG_BLUE_BRIGHT} | {}- {}", " ".repeat(longest_line), " ".repeat(hint.position.column - 1), hint.message); + } + } + + println!("{}{_FORMAT_BOLD}{_COLOR_FG_BLUE_BRIGHT}{} | {_FORMAT_RESET}{}", " ".repeat(longest_line - token_scanner.line.to_string().len()), token_scanner.line, token_scanner.get_current_line()); + println!("{}{_FORMAT_BOLD}{_COLOR_FG_BLUE_BRIGHT} | {}{_FORMAT_BOLD}{_COLOR_FG_RED}^ {}", " ".repeat(longest_line), " ".repeat(token_scanner.column - 1), base_hint.unwrap_or("")); + + for hint in &hints { + if hint.position.line > token_scanner.line { + println!("{}{_FORMAT_BOLD}{_COLOR_FG_BLUE_BRIGHT}{} | {_FORMAT_RESET}{}", " ".repeat(longest_line - hint.position.line.to_string().len()), hint.position.line, token_scanner.get_line(hint.position.line)); + println!("{}{_FORMAT_BOLD}{_COLOR_FG_BLUE_BRIGHT} | {}- {}", " ".repeat(longest_line), " ".repeat(hint.position.column - 1), hint.message); + } + } + + let note = note.unwrap_or_default(); + if !note.is_empty() { + println!("{}{_FORMAT_BOLD}{_COLOR_FG_BLUE_BRIGHT} |", " ".repeat(longest_line)); + println!("{}{_FORMAT_BOLD}{_COLOR_FG_BLUE_BRIGHT} = {_FORMAT_RESET}{_FORMAT_BOLD}note{_FORMAT_RESET}: {}", " ".repeat(longest_line), note); + } + + println!(); + + exit(-1) +} + +#[derive(Debug)] +pub enum CompileError { + UnexpectedStringEOF, + UnexpectedStringLineBreak +} + +impl fmt::Display for CompileError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CompileError::UnexpectedStringEOF => write!(f, "unexpected end of file while reading string"), + CompileError::UnexpectedStringLineBreak => write!(f, "unexpected line break while reading string"), + } + } +} + +pub type ErrorHints<'a> = Vec<ErrorHint<'a>>; + +pub struct ErrorHint<'a> { + pub position: TokenPosition, + pub message: &'a str +} + +impl <'a> ErrorHint<'a> { + pub fn new(position: TokenPosition, message: &'a str) -> Self { + Self { + position, + message + } + } +} diff --git a/axisc/src/lexer.rs b/axisc/src/lexer.rs index ff13fd3..49913a4 100644 --- a/axisc/src/lexer.rs +++ b/axisc/src/lexer.rs @@ -1,11 +1,24 @@ +use std::fmt; +use crate::error::{CompileError, ErrorHint, ErrorHints, raise_error}; use crate::reader::TokenScanner; #[derive(Debug)] -pub enum Token { +pub enum TokenKind { Identifier(String), Keyword(Keyword), Literal(Literal), - SimpleToken(SimpleToken) + Simple(SimpleToken) +} + +impl fmt::Display for TokenKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TokenKind::Identifier(s) => write!(f, "`{}`", s), + TokenKind::Keyword(k) => write!(f, "{:?}", k), + TokenKind::Literal(l) => write!(f, "{}", l), + TokenKind::Simple(t) => write!(f, "{}", t) + } + } } #[derive(Debug)] @@ -43,12 +56,39 @@ pub enum SimpleToken { Caret, Dollar, Equals, + DoubleEquals, Semicolon, GreaterThan, LessThan, Add, Subtract, LineFeed, + Comment, + Arrow +} + +impl fmt::Display for SimpleToken { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SimpleToken::Multiply => write!(f, "multiply"), + SimpleToken::Divide => write!(f, "divide"), + SimpleToken::Pound => write!(f, "pound"), + SimpleToken::Colon => write!(f, "colon"), + SimpleToken::Percent => write!(f, "percent"), + SimpleToken::Caret => write!(f, "caret"), + SimpleToken::Dollar => write!(f, "dollar"), + SimpleToken::Equals => write!(f, "equals"), + SimpleToken::DoubleEquals => write!(f, "double equals"), + SimpleToken::Semicolon => write!(f, "semicolon"), + SimpleToken::GreaterThan => write!(f, "greater than"), + SimpleToken::LessThan => write!(f, "less than"), + SimpleToken::Add => write!(f, "plus"), + SimpleToken::Subtract => write!(f, "minus "), + SimpleToken::LineFeed => write!(f, "line break"), + SimpleToken::Comment => write!(f, "comment"), + SimpleToken::Arrow => write!(f, "arrow"), + } + } } #[derive(Debug)] @@ -59,6 +99,17 @@ pub enum Literal { Array(Array) } +impl fmt::Display for Literal { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Literal::String(_) => write!(f, "string"), + Literal::Number(_) => write!(f, "number"), + Literal::Boolean(_) => write!(f, "boolean"), + Literal::Array(a) => write!(f, "{}", a), + } + } +} + #[derive(Debug)] pub enum Keyword { Into, @@ -68,29 +119,98 @@ pub enum Keyword { String, As, Let, + Free, + Derive, + Checkpoint, + Subroutine, + GoTo, + AsFn, + If, + Then, + Else, + And, + Or, + Xor, + Not, + For, + To, + Next, + While, + Forever, + Repeat, } #[derive(Debug)] pub enum Array { - StringArray(Vec<String>), - NumberArray(Vec<f64>), - BooleanArray(Vec<bool>), - MultiDimensionalArray(Vec<Array>) + String(Vec<String>), + Number(Vec<f64>), + Boolean(Vec<bool>), + MultiDimensional(Vec<Array>) +} + +impl fmt::Display for Array { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Array::String(_) => write!(f, "string array"), + Array::Number(_) => write!(f, "number array"), + Array::Boolean(_) => write!(f, "boolean array"), + Array::MultiDimensional(_) => write!(f, "multi-dimensional array"), + } + } } impl Literal { pub fn string_literal_from_scanner(token_scanner: &mut TokenScanner) -> Literal { token_scanner.advance(1); + + let start_position = TokenPosition::from_scanner(token_scanner); + let start_position_hint = ErrorHint::new(start_position, "string starts here"); + let start_position_hints = ErrorHints::from(vec![start_position_hint]); + let mut string = String::new(); loop { match token_scanner.advance(1) { Some('"') => break, + Some('\n') => raise_error(token_scanner, CompileError::UnexpectedStringLineBreak, Some("line break occurs here"), Some(start_position_hints), Some("strings are wrapped in \"double quotes\"")), Some(c) => string.push(c), - None => todo!("Reached EOF before end of string") + None => raise_error(token_scanner, CompileError::UnexpectedStringEOF, Some("file ends here"), Some(start_position_hints), Some("strings are wrapped in \"double quotes\"")), } } Literal::String(string) } } + +#[derive(Debug)] +pub struct Token { + position: TokenPosition, + token: TokenKind +} + +impl Token { + pub fn new(position: (usize, usize), token: TokenKind) -> Self { + Self { + position: TokenPosition { + line: position.0, + column: position.1 + }, + token + } + } +} + +#[derive(Debug)] +pub struct TokenPosition { + pub line: usize, + pub column: usize +} + +impl TokenPosition { + pub fn from_scanner(token_scanner: &TokenScanner) -> Self { + Self { + line: token_scanner.line, + column: token_scanner.column + } + } +} diff --git a/axisc/src/main.rs b/axisc/src/main.rs index 6b49b6f..5243c5c 100644 --- a/axisc/src/main.rs +++ b/axisc/src/main.rs @@ -4,107 +4,129 @@ use cli::Cli; use clap::Parser; use reader::read_file; use reader::TokenScanner; -use lexer::Token; -use crate::lexer::{Keyword, Literal, SimpleToken}; +use lexer::TokenKind; +use crate::lexer::{Token, Keyword, Literal, SimpleToken}; mod cli; mod reader; mod lexer; +mod error; +mod colors; fn main() { let args = Cli::parse(); let string = read_file(&args.source); - let mut scanner = TokenScanner::from_string(&string); + let mut scanner = TokenScanner::from_file_string(&string, &args.source); let mut tokens: Vec<Token> = vec![]; + let mut pos: (usize, usize) = (1, 1); loop { match scanner.peek() { None => break, Some('#') => { scanner.advance(1); - tokens.push(Token::SimpleToken(SimpleToken::Pound)) + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::Pound))) }, Some('*') => { scanner.advance(1); - tokens.push(Token::SimpleToken(SimpleToken::Multiply)) + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::Multiply))) }, Some('-') => { scanner.advance(1); - tokens.push(Token::SimpleToken(SimpleToken::Subtract)) + + if scanner.peek().unwrap_or(&' ').eq(&'-') { + scanner.advance_line(); + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::Comment))) + } else if scanner.peek().unwrap_or(&' ').eq(&'>') { + scanner.advance(1); + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::Arrow))) + } else { + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::Subtract))) + } }, Some('+') => { scanner.advance(1); - tokens.push(Token::SimpleToken(SimpleToken::Add)) + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::Add))) }, Some('<') => { scanner.advance(1); - tokens.push(Token::SimpleToken(SimpleToken::LessThan)) + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::LessThan))) }, Some('>') => { scanner.advance(1); - tokens.push(Token::SimpleToken(SimpleToken::GreaterThan)) + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::GreaterThan))) }, Some('=') => { scanner.advance(1); - tokens.push(Token::SimpleToken(SimpleToken::Equals)) + + if scanner.peek().unwrap_or(&' ').eq(&'=') { + scanner.advance(1); + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::DoubleEquals))) + } else { + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::Equals))) + } }, Some(';') => { scanner.advance(1); - tokens.push(Token::SimpleToken(SimpleToken::Semicolon)) + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::Semicolon))) }, Some('/') => { scanner.advance(1); - tokens.push(Token::SimpleToken(SimpleToken::Divide)) + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::Divide))) }, Some('%') => { scanner.advance(1); - tokens.push(Token::SimpleToken(SimpleToken::Percent)) + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::Percent))) }, Some('^') => { scanner.advance(1); - tokens.push(Token::SimpleToken(SimpleToken::Caret)) + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::Caret))) }, Some(':') => { scanner.advance(1); - tokens.push(Token::SimpleToken(SimpleToken::Colon)) + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::Colon))) }, Some('$') => { scanner.advance(1); - tokens.push(Token::SimpleToken(SimpleToken::Dollar)) + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::Dollar))) }, Some('\n') => { scanner.advance(1); - tokens.push(Token::SimpleToken(SimpleToken::LineFeed)) + tokens.push(Token::new(pos, TokenKind::Simple(SimpleToken::LineFeed))) }, Some('"') => { - tokens.push(Token::Literal(Literal::string_literal_from_scanner(&mut scanner))) - }, - Some(' ' | '\n') => { - scanner.advance(1); - }, - Some(_) => { - let word = scanner.advance_word(); - let word_number: Result<f64, ParseFloatError> = f64::from_str(&word); + tokens.push(Token::new(pos, TokenKind::Literal(Literal::string_literal_from_scanner(&mut scanner)))) + }, + Some(c) => match c { + ' ' | '\n' => { + scanner.advance(1); + }, + _ => { + let word = scanner.advance_word(); + let word_number: Result<f64, ParseFloatError> = f64::from_str(&word); - match word_number { - Ok(n) => tokens.push(Token::Literal(Literal::Number(n))), - Err(_) => match word.as_ref() { - "" => (), - "True" => tokens.push(Token::Literal(Literal::Boolean(true))), - "False" => tokens.push(Token::Literal(Literal::Boolean(false))), - "Into" => tokens.push(Token::Keyword(Keyword::Into)), - "Let" => tokens.push(Token::Keyword(Keyword::Let)), - "As" => tokens.push(Token::Keyword(Keyword::As)), - "String" => tokens.push(Token::Keyword(Keyword::String)), - "Number" => tokens.push(Token::Keyword(Keyword::Number)), - "Array" => tokens.push(Token::Keyword(Keyword::Array)), - "Boolean" => tokens.push(Token::Keyword(Keyword::Boolean)), - _ => tokens.push(Token::Identifier(word)) + match word_number { + Ok(n) => tokens.push(Token::new(pos, TokenKind::Literal(Literal::Number(n)))), + Err(_) => match word.as_ref() { + "" => (), + "True" => tokens.push(Token::new(pos, TokenKind::Literal(Literal::Boolean(true)))), + "False" => tokens.push(Token::new(pos, TokenKind::Literal(Literal::Boolean(false)))), + "Into" => tokens.push(Token::new(pos, TokenKind::Keyword(Keyword::Into))), + "Let" => tokens.push(Token::new(pos, TokenKind::Keyword(Keyword::Let))), + "As" => tokens.push(Token::new(pos, TokenKind::Keyword(Keyword::As))), + "String" => tokens.push(Token::new(pos, TokenKind::Keyword(Keyword::String))), + "Number" => tokens.push(Token::new(pos, TokenKind::Keyword(Keyword::Number))), + "Array" => tokens.push(Token::new(pos, TokenKind::Keyword(Keyword::Array))), + "Boolean" => tokens.push(Token::new(pos, TokenKind::Keyword(Keyword::Boolean))), + _ => tokens.push(Token::new(pos, TokenKind::Identifier(word))) + } } } } } + + pos = (scanner.line, scanner.column) } println!("{:?}", tokens); diff --git a/axisc/src/reader.rs b/axisc/src/reader.rs index fd600a7..b62fd94 100644 --- a/axisc/src/reader.rs +++ b/axisc/src/reader.rs @@ -15,28 +15,35 @@ pub fn read_file(source: &PathBuf) -> String { let mut string = String::from(""); for line in reader.lines() { - let mut line = line.unwrap(); + let line = line.unwrap(); - if let Some(char_index) = line.find("--") { - line.truncate(char_index); - } - - if !line.is_empty() { - string.push_str(line.trim()); - string.push('\n'); - } + string.push_str(line.trim()); + string.push('\n'); } string } pub struct TokenScanner<'a> { - pub chars: Peekable<Chars<'a>> + pub chars: Peekable<Chars<'a>>, + pub string: &'a str, + pub line: usize, + pub column: usize, + pub file_name: &'a PathBuf } impl <'a> TokenScanner<'a> { pub fn advance(&mut self, num: usize) -> Option<char> { - self.chars.nth(num - 1) + let ch = self.chars.nth(num - 1); + + if ch.map_or(false, |c| c == '\n') { + self.line += 1; + self.column = 1; + ch + } else { + self.column += 1; + ch + } } pub fn peek(&mut self) -> Option<&char> { @@ -52,19 +59,43 @@ impl <'a> TokenScanner<'a> { } word.push(*char); + self.column += 1; self.chars.next(); } word } - pub fn peek_word(&mut self) -> String { - todo!(":(") + pub fn advance_line(&mut self) -> String { + let mut word = String::new(); + + while let Some(char) = self.chars.peek() { + if char.eq(&'\n') { + break; + } + + word.push(*char); + self.chars.next(); + } + + word } - pub fn from_string(string: &'a str) -> Self { + pub fn from_file_string(string: &'a str, file_name: &'a PathBuf) -> Self { Self { - chars: string.chars().peekable() + chars: string.chars().peekable(), + string, + column: 1, + line: 1, + file_name } } + + pub fn get_current_line(&self) -> &str { + self.string.lines().nth(self.line - 1).unwrap_or_default() + } + + pub fn get_line(&self, line: usize) -> &str { + self.string.lines().nth(line - 1).unwrap_or_default() + } } |