Move scanner to own module, start implementing parser

vegkams · May 8, 2023 · a21d836 · a21d836
1 parent 8173667
commit a21d836
Show file tree

Hide file tree

Showing 8 changed files with 445 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/src/main.rs b/src/main.rs
@@ -3,12 +3,11 @@ use clap::Parser;
 use std::{fmt, path::PathBuf};
 mod error;
 pub use error::Error;
-mod scanner;
-pub use scanner::Scanner;
-mod token;
+pub mod scanner;
 use rustyline::error::ReadlineError;
 use rustyline::Editor;
-pub use token::{Literal, Token, TokenType};
+pub use scanner::Scanner;
+pub use scanner::{Literal, Token, TokenType};
 
 pub struct Repl {
     history_path: String,

diff --git a/src/parser/expr.rs b/src/parser/expr.rs
@@ -0,0 +1,71 @@
+use std::fmt;
+use crate::Token;
+/*
+Expression grammar:
+
+expression     → equality ;
+equality       → comparison ( ( "!=" | "==" ) comparison )* ;
+comparison     → term ( ( ">" | ">=" | "<" | "<=" ) term )*;
+term           → factor ( ( "-" | "+" )  factor )* ;
+factor         → unary ( ( "/" | "*" ) unary )* ;
+unary          → ( "-" | "!" ) unary | primary ;
+primary        → NUMBER | STRING | "true" | "false" | "nil" | "(" expression ")" ;
+*/
+
+pub enum Expr { 
+    Literal(LiteralOp),
+    Unary(Token, Box<Expr>),
+    Binary(Box<Expr>, Token, Box<Expr>),
+    Grouping(Box<Expr>),
+}
+
+impl fmt::Display for Expr {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Expr::Literal(op) => write!(f, "{}", &op.op_type),
+            Expr::Unary(t, exp) =>  {
+                if let Ok(s) = std::str::from_utf8(&t.lexeme) {
+                    return write!(f, "( {} {})", s, &*exp);
+                } else {
+                    return Err(fmt::Error);
+                }
+
+            }
+            Expr::Binary(exp_lhs, t, exp_rhs) => {
+                if let Ok(s) = std::str::from_utf8(&t.lexeme) {
+                    return write!(f, "( {} {} {})", s, &*exp_lhs, &*exp_rhs);
+                } else {
+                    return Err(fmt::Error);
+                }
+            }
+            Expr::Grouping(exp) => write!(f, "( group {})", &*exp),
+        }
+    }
+}
+
+
+pub enum LiteralOpType {
+    Number(f64),
+    Str(String),
+    True,
+    False,
+    Nil,
+}
+
+
+impl fmt::Display for LiteralOpType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            LiteralOpType::Number(n) => write!(f, "{}", n),
+            LiteralOpType::Str(s)    => write!(f, "{}", s),
+            LiteralOpType::True      => write!(f, "True"),
+            LiteralOpType::False     => write!(f, "False"),
+            LiteralOpType::Nil       => write!(f, "Nil"),
+        }
+    }
+}
+
+pub struct LiteralOp {
+    pub op_type: LiteralOpType,
+}
+
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
@@ -0,0 +1,4 @@
+mod expr;
+mod parser;
+
+use expr::*;
diff --git a/src/parser/parser.rs b/src/parser/parser.rs
@@ -0,0 +1,15 @@
+use crate::{Expr, Token};
+
+struct Parser {
+    tokens: Vec<Token>,
+    current: usize,
+}
+
+impl Parser {
+    pub fn new(tokens: Vec<Token>) -> Self {
+        Parser {
+            tokens,
+            current: 0,
+        }
+    }
+}
diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs
@@ -0,0 +1,5 @@
+mod scanner;
+mod token;
+
+pub use scanner::*;
+pub use token::*;
diff --git a/src/scanner/scanner.rs b/src/scanner/scanner.rs
@@ -0,0 +1,260 @@
+use crate::Error;
+use crate::{Literal, Token, TokenType};
+use std::collections::HashMap;
+
+pub struct Scanner {
+    source: Vec<u8>,
+    tokens: Vec<Token>,
+    start: usize,
+    current: usize,
+    line: usize,
+    keywords: HashMap<String, TokenType>,
+}
+
+impl Scanner {
+    pub fn new(source: String) -> Self {
+        Scanner {
+            source: source.into_bytes(),
+            tokens: Vec::<Token>::new(),
+            start: 0,
+            current: 0,
+            line: 1,
+            keywords: vec![
+                ("and", TokenType::And),
+                ("class", TokenType::Class),
+                ("else", TokenType::Else),
+                ("false", TokenType::False),
+                ("for", TokenType::For),
+                ("fun", TokenType::Fun),
+                ("if", TokenType::If),
+                ("nil", TokenType::Nil),
+                ("or", TokenType::Or),
+                ("print", TokenType::Print),
+                ("return", TokenType::Return),
+                ("super", TokenType::Super),
+                ("this", TokenType::This),
+                ("true", TokenType::True),
+                ("var", TokenType::Var),
+                ("while", TokenType::While),
+                ("lambda", TokenType::Lambda),
+            ]
+            .into_iter()
+            .map(|(k, v)| (String::from(k), v))
+            .collect(),
+        }
+    }
+
+    pub fn scan_tokens(&mut self) -> Result<&Vec<Token>, Error> {
+        while !self.is_at_end() {
+            self.start = self.current;
+            self.scan_token()?;
+        }
+
+        self.tokens.push(Token {
+            token_type: TokenType::Eof,
+            lexeme: Vec::<u8>::new(),
+            literal: None,
+            line: self.line,
+        });
+
+        Ok(&self.tokens)
+    }
+
+    fn is_at_end(&self) -> bool {
+        self.current >= self.source.len()
+    }
+
+    fn scan_token(&mut self) -> Result<(), Error> {
+        let c = self.advance();
+        match c {
+            '(' => self.add_token(TokenType::LeftParen),
+            ')' => self.add_token(TokenType::RightParen),
+            '{' => self.add_token(TokenType::RightBrace),
+            '}' => self.add_token(TokenType::LeftBrace),
+            ',' => self.add_token(TokenType::Comma),
+            '.' => self.add_token(TokenType::Dot),
+            '-' => self.add_token(TokenType::Minus),
+            '+' => self.add_token(TokenType::Plus),
+            ';' => self.add_token(TokenType::Semicolon),
+            '*' => self.add_token(TokenType::Star),
+            '!' => {
+                if self.matches('=') {
+                    self.add_token(TokenType::BangEqual)
+                } else {
+                    self.add_token(TokenType::Bang)
+                }
+            }
+            '=' => {
+                if self.matches('=') {
+                    self.add_token(TokenType::EqualEqual)
+                } else {
+                    self.add_token(TokenType::Equal)
+                }
+            }
+            '<' => {
+                if self.matches('=') {
+                    self.add_token(TokenType::LessEqual)
+                } else {
+                    self.add_token(TokenType::Less)
+                }
+            }
+            '>' => {
+                if self.matches('=') {
+                    self.add_token(TokenType::GreaterEqual)
+                } else {
+                    self.add_token(TokenType::Greater)
+                }
+            }
+            '/' => {
+                if self.matches('/') {
+                    while self.peek() != '\n' && !self.is_at_end() {
+                        self.advance();
+                    }
+                } else {
+                    self.add_token(TokenType::Slash)
+                }
+            }
+            ' ' | '\r' | '\t' => {}
+            '\n' => self.line += 1,
+            '"' => self.string()?,
+            _ => {
+                if self.is_digit(c) {
+                    self.number();
+                } else if self.is_alpha(c) {
+                    self.identifier();
+                } else {
+                    return Err(Error::SyntaxError(
+                        format!("{}", self.line),
+                        "Unexpected character".to_string(),
+                        c.to_string(),
+                    ));
+                }
+            }
+        };
+        Ok(())
+    }
+
+    fn is_alpha(&self, c: char) -> bool {
+        (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'
+    }
+
+    fn is_alphanumeric(&self, c: char) -> bool {
+        self.is_alpha(c) || self.is_digit(c)
+    }
+
+    fn is_digit(&self, c: char) -> bool {
+        c >= '0' && c <= '9'
+    }
+
+    fn identifier(&mut self) {
+        while self.is_alphanumeric(self.peek()) {
+            self.advance();
+        }
+        let text = String::from_utf8(self.source[self.start..self.current].to_vec()).unwrap();
+
+        let token_type = match self.keywords.get(&text) {
+            Some(kw_val) => *kw_val,
+            None => TokenType::Identifier,
+        };
+
+        match token_type {
+            TokenType::Identifier => {
+                self.add_token_literal(token_type, Some(Literal::Identifier(text)))
+            }
+            _ => self.add_token(token_type),
+        }
+    }
+
+    fn number(&mut self) {
+        while self.is_digit(self.peek()) {
+            self.advance();
+        }
+        // Look for a fractional part
+        if self.peek() == '.' && self.is_digit(self.peek_next()) {
+            // consume the '.'
+            self.advance();
+            while self.is_digit(self.peek()) {
+                self.advance();
+            }
+        }
+
+        let val: f64 = String::from_utf8(self.source[self.start..self.current].to_vec())
+            .unwrap()
+            .parse()
+            .unwrap();
+
+        self.add_token_literal(TokenType::Number, Some(Literal::Number(val)));
+    }
+
+    fn string(&mut self) -> Result<(), Error> {
+        while self.peek() != '"' && !self.is_at_end() {
+            if self.peek() == '\n' {
+                self.line += 1;
+            }
+            self.advance();
+        }
+
+        if self.is_at_end() {
+            return Err(Error::SyntaxError(
+                format!("{}", self.line),
+                String::from("Parsing error"),
+                String::from("Unterminated string"),
+            ));
+        }
+        self.advance();
+
+        self.add_token_literal(
+            TokenType::String,
+            Some(Literal::Str(
+                String::from_utf8(self.source[self.start + 1..self.current - 1].to_vec()).unwrap(),
+            )),
+        );
+        Ok(())
+    }
+
+    fn peek(&self) -> char {
+        if self.is_at_end() {
+            '\0'
+        } else {
+            char::from(self.source[self.current])
+        }
+    }
+
+    fn peek_next(&self) -> char {
+        if self.current + 1 >= self.source.len() {
+            '\0'
+        } else {
+            char::from(self.source[self.current + 1])
+        }
+    }
+
+    fn matches(&mut self, expected: char) -> bool {
+        if self.is_at_end() {
+            return false;
+        }
+        if char::from(self.source[self.current]) != expected {
+            return false;
+        }
+        self.current += 1;
+        true
+    }
+
+    fn add_token(&mut self, token_type: TokenType) {
+        self.add_token_literal(token_type, None);
+    }
+
+    fn add_token_literal(&mut self, token: TokenType, literal: Option<Literal>) {
+        let text = self.source[self.start..self.current].to_vec();
+        self.tokens.push(Token {
+            token_type: token,
+            lexeme: text,
+            literal,
+            line: self.line,
+        });
+    }
+
+    fn advance(&mut self) -> char {
+        self.current += 1;
+        char::from(self.source[self.current - 1])
+    }
+}