Skip to content

Commit

Permalink
Move scanner to own module, start implementing parser
Browse files Browse the repository at this point in the history
  • Loading branch information
Vegard committed May 8, 2023
1 parent 8173667 commit a21d836
Show file tree
Hide file tree
Showing 8 changed files with 445 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/target
7 changes: 3 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@ use clap::Parser;
use std::{fmt, path::PathBuf};
mod error;
pub use error::Error;
mod scanner;
pub use scanner::Scanner;
mod token;
pub mod scanner;
use rustyline::error::ReadlineError;
use rustyline::Editor;
pub use token::{Literal, Token, TokenType};
pub use scanner::Scanner;
pub use scanner::{Literal, Token, TokenType};

pub struct Repl {
history_path: String,
Expand Down
71 changes: 71 additions & 0 deletions src/parser/expr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
use std::fmt;
use crate::Token;
/*
Expression grammar:
expression → equality ;
equality → comparison ( ( "!=" | "==" ) comparison )* ;
comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )*;
term → factor ( ( "-" | "+" ) factor )* ;
factor → unary ( ( "/" | "*" ) unary )* ;
unary → ( "-" | "!" ) unary | primary ;
primary → NUMBER | STRING | "true" | "false" | "nil" | "(" expression ")" ;
*/

pub enum Expr {
Literal(LiteralOp),
Unary(Token, Box<Expr>),
Binary(Box<Expr>, Token, Box<Expr>),
Grouping(Box<Expr>),
}

impl fmt::Display for Expr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Expr::Literal(op) => write!(f, "{}", &op.op_type),
Expr::Unary(t, exp) => {
if let Ok(s) = std::str::from_utf8(&t.lexeme) {
return write!(f, "( {} {})", s, &*exp);
} else {
return Err(fmt::Error);
}

}
Expr::Binary(exp_lhs, t, exp_rhs) => {
if let Ok(s) = std::str::from_utf8(&t.lexeme) {
return write!(f, "( {} {} {})", s, &*exp_lhs, &*exp_rhs);
} else {
return Err(fmt::Error);
}
}
Expr::Grouping(exp) => write!(f, "( group {})", &*exp),
}
}
}


pub enum LiteralOpType {
Number(f64),
Str(String),
True,
False,
Nil,
}


impl fmt::Display for LiteralOpType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
LiteralOpType::Number(n) => write!(f, "{}", n),
LiteralOpType::Str(s) => write!(f, "{}", s),
LiteralOpType::True => write!(f, "True"),
LiteralOpType::False => write!(f, "False"),
LiteralOpType::Nil => write!(f, "Nil"),
}
}
}

pub struct LiteralOp {
pub op_type: LiteralOpType,
}

4 changes: 4 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mod expr;
mod parser;

use expr::*;
15 changes: 15 additions & 0 deletions src/parser/parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
use crate::{Expr, Token};

struct Parser {
tokens: Vec<Token>,
current: usize,
}

impl Parser {
pub fn new(tokens: Vec<Token>) -> Self {
Parser {
tokens,
current: 0,
}
}
}
5 changes: 5 additions & 0 deletions src/scanner/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
mod scanner;
mod token;

pub use scanner::*;
pub use token::*;
260 changes: 260 additions & 0 deletions src/scanner/scanner.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
use crate::Error;
use crate::{Literal, Token, TokenType};
use std::collections::HashMap;

pub struct Scanner {
source: Vec<u8>,
tokens: Vec<Token>,
start: usize,
current: usize,
line: usize,
keywords: HashMap<String, TokenType>,
}

impl Scanner {
pub fn new(source: String) -> Self {
Scanner {
source: source.into_bytes(),
tokens: Vec::<Token>::new(),
start: 0,
current: 0,
line: 1,
keywords: vec![
("and", TokenType::And),
("class", TokenType::Class),
("else", TokenType::Else),
("false", TokenType::False),
("for", TokenType::For),
("fun", TokenType::Fun),
("if", TokenType::If),
("nil", TokenType::Nil),
("or", TokenType::Or),
("print", TokenType::Print),
("return", TokenType::Return),
("super", TokenType::Super),
("this", TokenType::This),
("true", TokenType::True),
("var", TokenType::Var),
("while", TokenType::While),
("lambda", TokenType::Lambda),
]
.into_iter()
.map(|(k, v)| (String::from(k), v))
.collect(),
}
}

pub fn scan_tokens(&mut self) -> Result<&Vec<Token>, Error> {
while !self.is_at_end() {
self.start = self.current;
self.scan_token()?;
}

self.tokens.push(Token {
token_type: TokenType::Eof,
lexeme: Vec::<u8>::new(),
literal: None,
line: self.line,
});

Ok(&self.tokens)
}

fn is_at_end(&self) -> bool {
self.current >= self.source.len()
}

fn scan_token(&mut self) -> Result<(), Error> {
let c = self.advance();
match c {
'(' => self.add_token(TokenType::LeftParen),
')' => self.add_token(TokenType::RightParen),
'{' => self.add_token(TokenType::RightBrace),
'}' => self.add_token(TokenType::LeftBrace),
',' => self.add_token(TokenType::Comma),
'.' => self.add_token(TokenType::Dot),
'-' => self.add_token(TokenType::Minus),
'+' => self.add_token(TokenType::Plus),
';' => self.add_token(TokenType::Semicolon),
'*' => self.add_token(TokenType::Star),
'!' => {
if self.matches('=') {
self.add_token(TokenType::BangEqual)
} else {
self.add_token(TokenType::Bang)
}
}
'=' => {
if self.matches('=') {
self.add_token(TokenType::EqualEqual)
} else {
self.add_token(TokenType::Equal)
}
}
'<' => {
if self.matches('=') {
self.add_token(TokenType::LessEqual)
} else {
self.add_token(TokenType::Less)
}
}
'>' => {
if self.matches('=') {
self.add_token(TokenType::GreaterEqual)
} else {
self.add_token(TokenType::Greater)
}
}
'/' => {
if self.matches('/') {
while self.peek() != '\n' && !self.is_at_end() {
self.advance();
}
} else {
self.add_token(TokenType::Slash)
}
}
' ' | '\r' | '\t' => {}
'\n' => self.line += 1,
'"' => self.string()?,
_ => {
if self.is_digit(c) {
self.number();
} else if self.is_alpha(c) {
self.identifier();
} else {
return Err(Error::SyntaxError(
format!("{}", self.line),
"Unexpected character".to_string(),
c.to_string(),
));
}
}
};
Ok(())
}

fn is_alpha(&self, c: char) -> bool {
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'
}

fn is_alphanumeric(&self, c: char) -> bool {
self.is_alpha(c) || self.is_digit(c)
}

fn is_digit(&self, c: char) -> bool {
c >= '0' && c <= '9'
}

fn identifier(&mut self) {
while self.is_alphanumeric(self.peek()) {
self.advance();
}
let text = String::from_utf8(self.source[self.start..self.current].to_vec()).unwrap();

let token_type = match self.keywords.get(&text) {
Some(kw_val) => *kw_val,
None => TokenType::Identifier,
};

match token_type {
TokenType::Identifier => {
self.add_token_literal(token_type, Some(Literal::Identifier(text)))
}
_ => self.add_token(token_type),
}
}

fn number(&mut self) {
while self.is_digit(self.peek()) {
self.advance();
}
// Look for a fractional part
if self.peek() == '.' && self.is_digit(self.peek_next()) {
// consume the '.'
self.advance();
while self.is_digit(self.peek()) {
self.advance();
}
}

let val: f64 = String::from_utf8(self.source[self.start..self.current].to_vec())
.unwrap()
.parse()
.unwrap();

self.add_token_literal(TokenType::Number, Some(Literal::Number(val)));
}

fn string(&mut self) -> Result<(), Error> {
while self.peek() != '"' && !self.is_at_end() {
if self.peek() == '\n' {
self.line += 1;
}
self.advance();
}

if self.is_at_end() {
return Err(Error::SyntaxError(
format!("{}", self.line),
String::from("Parsing error"),
String::from("Unterminated string"),
));
}
self.advance();

self.add_token_literal(
TokenType::String,
Some(Literal::Str(
String::from_utf8(self.source[self.start + 1..self.current - 1].to_vec()).unwrap(),
)),
);
Ok(())
}

fn peek(&self) -> char {
if self.is_at_end() {
'\0'
} else {
char::from(self.source[self.current])
}
}

fn peek_next(&self) -> char {
if self.current + 1 >= self.source.len() {
'\0'
} else {
char::from(self.source[self.current + 1])
}
}

fn matches(&mut self, expected: char) -> bool {
if self.is_at_end() {
return false;
}
if char::from(self.source[self.current]) != expected {
return false;
}
self.current += 1;
true
}

fn add_token(&mut self, token_type: TokenType) {
self.add_token_literal(token_type, None);
}

fn add_token_literal(&mut self, token: TokenType, literal: Option<Literal>) {
let text = self.source[self.start..self.current].to_vec();
self.tokens.push(Token {
token_type: token,
lexeme: text,
literal,
line: self.line,
});
}

fn advance(&mut self) -> char {
self.current += 1;
char::from(self.source[self.current - 1])
}
}
Loading

0 comments on commit a21d836

Please sign in to comment.