-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move scanner to own module, start implementing parser
- Loading branch information
Vegard
committed
May 8, 2023
1 parent
8173667
commit a21d836
Showing
8 changed files
with
445 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
/target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
use std::fmt; | ||
use crate::Token; | ||
/* | ||
Expression grammar: | ||
expression → equality ; | ||
equality → comparison ( ( "!=" | "==" ) comparison )* ; | ||
comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )*; | ||
term → factor ( ( "-" | "+" ) factor )* ; | ||
factor → unary ( ( "/" | "*" ) unary )* ; | ||
unary → ( "-" | "!" ) unary | primary ; | ||
primary → NUMBER | STRING | "true" | "false" | "nil" | "(" expression ")" ; | ||
*/ | ||
|
||
pub enum Expr { | ||
Literal(LiteralOp), | ||
Unary(Token, Box<Expr>), | ||
Binary(Box<Expr>, Token, Box<Expr>), | ||
Grouping(Box<Expr>), | ||
} | ||
|
||
impl fmt::Display for Expr { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
match self { | ||
Expr::Literal(op) => write!(f, "{}", &op.op_type), | ||
Expr::Unary(t, exp) => { | ||
if let Ok(s) = std::str::from_utf8(&t.lexeme) { | ||
return write!(f, "( {} {})", s, &*exp); | ||
} else { | ||
return Err(fmt::Error); | ||
} | ||
|
||
} | ||
Expr::Binary(exp_lhs, t, exp_rhs) => { | ||
if let Ok(s) = std::str::from_utf8(&t.lexeme) { | ||
return write!(f, "( {} {} {})", s, &*exp_lhs, &*exp_rhs); | ||
} else { | ||
return Err(fmt::Error); | ||
} | ||
} | ||
Expr::Grouping(exp) => write!(f, "( group {})", &*exp), | ||
} | ||
} | ||
} | ||
|
||
|
||
pub enum LiteralOpType { | ||
Number(f64), | ||
Str(String), | ||
True, | ||
False, | ||
Nil, | ||
} | ||
|
||
|
||
impl fmt::Display for LiteralOpType { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
match self { | ||
LiteralOpType::Number(n) => write!(f, "{}", n), | ||
LiteralOpType::Str(s) => write!(f, "{}", s), | ||
LiteralOpType::True => write!(f, "True"), | ||
LiteralOpType::False => write!(f, "False"), | ||
LiteralOpType::Nil => write!(f, "Nil"), | ||
} | ||
} | ||
} | ||
|
||
pub struct LiteralOp { | ||
pub op_type: LiteralOpType, | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
mod expr; | ||
mod parser; | ||
|
||
use expr::*; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
use crate::{Expr, Token}; | ||
|
||
struct Parser { | ||
tokens: Vec<Token>, | ||
current: usize, | ||
} | ||
|
||
impl Parser { | ||
pub fn new(tokens: Vec<Token>) -> Self { | ||
Parser { | ||
tokens, | ||
current: 0, | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
mod scanner; | ||
mod token; | ||
|
||
pub use scanner::*; | ||
pub use token::*; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,260 @@ | ||
use crate::Error; | ||
use crate::{Literal, Token, TokenType}; | ||
use std::collections::HashMap; | ||
|
||
pub struct Scanner { | ||
source: Vec<u8>, | ||
tokens: Vec<Token>, | ||
start: usize, | ||
current: usize, | ||
line: usize, | ||
keywords: HashMap<String, TokenType>, | ||
} | ||
|
||
impl Scanner { | ||
pub fn new(source: String) -> Self { | ||
Scanner { | ||
source: source.into_bytes(), | ||
tokens: Vec::<Token>::new(), | ||
start: 0, | ||
current: 0, | ||
line: 1, | ||
keywords: vec![ | ||
("and", TokenType::And), | ||
("class", TokenType::Class), | ||
("else", TokenType::Else), | ||
("false", TokenType::False), | ||
("for", TokenType::For), | ||
("fun", TokenType::Fun), | ||
("if", TokenType::If), | ||
("nil", TokenType::Nil), | ||
("or", TokenType::Or), | ||
("print", TokenType::Print), | ||
("return", TokenType::Return), | ||
("super", TokenType::Super), | ||
("this", TokenType::This), | ||
("true", TokenType::True), | ||
("var", TokenType::Var), | ||
("while", TokenType::While), | ||
("lambda", TokenType::Lambda), | ||
] | ||
.into_iter() | ||
.map(|(k, v)| (String::from(k), v)) | ||
.collect(), | ||
} | ||
} | ||
|
||
pub fn scan_tokens(&mut self) -> Result<&Vec<Token>, Error> { | ||
while !self.is_at_end() { | ||
self.start = self.current; | ||
self.scan_token()?; | ||
} | ||
|
||
self.tokens.push(Token { | ||
token_type: TokenType::Eof, | ||
lexeme: Vec::<u8>::new(), | ||
literal: None, | ||
line: self.line, | ||
}); | ||
|
||
Ok(&self.tokens) | ||
} | ||
|
||
fn is_at_end(&self) -> bool { | ||
self.current >= self.source.len() | ||
} | ||
|
||
fn scan_token(&mut self) -> Result<(), Error> { | ||
let c = self.advance(); | ||
match c { | ||
'(' => self.add_token(TokenType::LeftParen), | ||
')' => self.add_token(TokenType::RightParen), | ||
'{' => self.add_token(TokenType::RightBrace), | ||
'}' => self.add_token(TokenType::LeftBrace), | ||
',' => self.add_token(TokenType::Comma), | ||
'.' => self.add_token(TokenType::Dot), | ||
'-' => self.add_token(TokenType::Minus), | ||
'+' => self.add_token(TokenType::Plus), | ||
';' => self.add_token(TokenType::Semicolon), | ||
'*' => self.add_token(TokenType::Star), | ||
'!' => { | ||
if self.matches('=') { | ||
self.add_token(TokenType::BangEqual) | ||
} else { | ||
self.add_token(TokenType::Bang) | ||
} | ||
} | ||
'=' => { | ||
if self.matches('=') { | ||
self.add_token(TokenType::EqualEqual) | ||
} else { | ||
self.add_token(TokenType::Equal) | ||
} | ||
} | ||
'<' => { | ||
if self.matches('=') { | ||
self.add_token(TokenType::LessEqual) | ||
} else { | ||
self.add_token(TokenType::Less) | ||
} | ||
} | ||
'>' => { | ||
if self.matches('=') { | ||
self.add_token(TokenType::GreaterEqual) | ||
} else { | ||
self.add_token(TokenType::Greater) | ||
} | ||
} | ||
'/' => { | ||
if self.matches('/') { | ||
while self.peek() != '\n' && !self.is_at_end() { | ||
self.advance(); | ||
} | ||
} else { | ||
self.add_token(TokenType::Slash) | ||
} | ||
} | ||
' ' | '\r' | '\t' => {} | ||
'\n' => self.line += 1, | ||
'"' => self.string()?, | ||
_ => { | ||
if self.is_digit(c) { | ||
self.number(); | ||
} else if self.is_alpha(c) { | ||
self.identifier(); | ||
} else { | ||
return Err(Error::SyntaxError( | ||
format!("{}", self.line), | ||
"Unexpected character".to_string(), | ||
c.to_string(), | ||
)); | ||
} | ||
} | ||
}; | ||
Ok(()) | ||
} | ||
|
||
fn is_alpha(&self, c: char) -> bool { | ||
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' | ||
} | ||
|
||
fn is_alphanumeric(&self, c: char) -> bool { | ||
self.is_alpha(c) || self.is_digit(c) | ||
} | ||
|
||
fn is_digit(&self, c: char) -> bool { | ||
c >= '0' && c <= '9' | ||
} | ||
|
||
fn identifier(&mut self) { | ||
while self.is_alphanumeric(self.peek()) { | ||
self.advance(); | ||
} | ||
let text = String::from_utf8(self.source[self.start..self.current].to_vec()).unwrap(); | ||
|
||
let token_type = match self.keywords.get(&text) { | ||
Some(kw_val) => *kw_val, | ||
None => TokenType::Identifier, | ||
}; | ||
|
||
match token_type { | ||
TokenType::Identifier => { | ||
self.add_token_literal(token_type, Some(Literal::Identifier(text))) | ||
} | ||
_ => self.add_token(token_type), | ||
} | ||
} | ||
|
||
fn number(&mut self) { | ||
while self.is_digit(self.peek()) { | ||
self.advance(); | ||
} | ||
// Look for a fractional part | ||
if self.peek() == '.' && self.is_digit(self.peek_next()) { | ||
// consume the '.' | ||
self.advance(); | ||
while self.is_digit(self.peek()) { | ||
self.advance(); | ||
} | ||
} | ||
|
||
let val: f64 = String::from_utf8(self.source[self.start..self.current].to_vec()) | ||
.unwrap() | ||
.parse() | ||
.unwrap(); | ||
|
||
self.add_token_literal(TokenType::Number, Some(Literal::Number(val))); | ||
} | ||
|
||
fn string(&mut self) -> Result<(), Error> { | ||
while self.peek() != '"' && !self.is_at_end() { | ||
if self.peek() == '\n' { | ||
self.line += 1; | ||
} | ||
self.advance(); | ||
} | ||
|
||
if self.is_at_end() { | ||
return Err(Error::SyntaxError( | ||
format!("{}", self.line), | ||
String::from("Parsing error"), | ||
String::from("Unterminated string"), | ||
)); | ||
} | ||
self.advance(); | ||
|
||
self.add_token_literal( | ||
TokenType::String, | ||
Some(Literal::Str( | ||
String::from_utf8(self.source[self.start + 1..self.current - 1].to_vec()).unwrap(), | ||
)), | ||
); | ||
Ok(()) | ||
} | ||
|
||
fn peek(&self) -> char { | ||
if self.is_at_end() { | ||
'\0' | ||
} else { | ||
char::from(self.source[self.current]) | ||
} | ||
} | ||
|
||
fn peek_next(&self) -> char { | ||
if self.current + 1 >= self.source.len() { | ||
'\0' | ||
} else { | ||
char::from(self.source[self.current + 1]) | ||
} | ||
} | ||
|
||
fn matches(&mut self, expected: char) -> bool { | ||
if self.is_at_end() { | ||
return false; | ||
} | ||
if char::from(self.source[self.current]) != expected { | ||
return false; | ||
} | ||
self.current += 1; | ||
true | ||
} | ||
|
||
fn add_token(&mut self, token_type: TokenType) { | ||
self.add_token_literal(token_type, None); | ||
} | ||
|
||
fn add_token_literal(&mut self, token: TokenType, literal: Option<Literal>) { | ||
let text = self.source[self.start..self.current].to_vec(); | ||
self.tokens.push(Token { | ||
token_type: token, | ||
lexeme: text, | ||
literal, | ||
line: self.line, | ||
}); | ||
} | ||
|
||
fn advance(&mut self) -> char { | ||
self.current += 1; | ||
char::from(self.source[self.current - 1]) | ||
} | ||
} |
Oops, something went wrong.