Skip to content

Latest commit

 

History

History
768 lines (723 loc) · 25.4 KB

07-leitor-de-expressoes.org

File metadata and controls

768 lines (723 loc) · 25.4 KB

Leitor de expressões

Arquivo: reader/mod.rs.

Teste de lib de UTF-8

use rustyline::error::ReadlineError;
use rustyline::Editor;
use rustf8::*;
use std::io::prelude::*;
use std::io::Cursor;

fn main() {
    let mut rl = Editor::<()>::new();
    loop {
        let readline = rl.readline(">> ");
        match readline {
            Ok(line) => {
                let input: Vec<u8> = line.into_bytes();
                let stream = Cursor::new(input);
                let iter = stream.bytes();
                let chiter = Utf8Iterator::new(iter);
                for c in chiter {
                    let c = c.unwrap();
                    println!("{}", c);
                    if c == '⊃' {
                        println!("Ok!");
                    }
                }
            },
            Err(ReadlineError::Interrupted) |
            Err(ReadlineError::Eof) => break,
            Err(err) => {
                println!("Error: {:?}", err);
                break
            }
        }
    }
    println!("Hello, world!");
}

Leitor: Protótipo

Esse aqui é o tokenizador e o parser na fase de protótipos. Eles ainda não permitem read macros.

Os códigos a seguir podem conter altas doses de câncer. Mas… ainda bem que eles não vão para a versão final, não é?

pub mod tokenizer;
pub mod parser;

Tokenizador

Arquivo: reader/tokenizer.rs.

Toma como parâmetro uma string, retorna uma lista de tokens. Cada token é uma substring.

Aqui eu não vou usar nenhuma abstração de Majestic, por motivos de performance e de falta de implementação mesmo. Pra comparar strings, eu teria que usar =, que não está implementado. Além do mais, existe uma proposta pendente de que strings sejam armazenadas como vetores – algo que seria melhor feito tendo vetores na linguagem em primeiro lugar.

Pra não dar um gargalo enorme, vou usar primeiro uma implementação intangível do ponto de vista de read macros. O importante é ter uma lista de tokens individuais que eu possa usar para interpretar.

pub fn maj_tokenize(
    text: &str
) -> Result<Vec<String>, (i64, &'static str)> {
    let mut tokens = Vec::new();
    let mut buffer = String::new();
    let mut ignore_count = 0;
    let mut line = 1;

    for (i, c) in text.chars().enumerate() {
        if ignore_count > 0 {
            ignore_count -= 1;
        } else {
            match c {
                // Read macros
                ';' => {
                    let mut count = 1;
                    loop {
                        let c = text.chars().nth(i + count);
                        match c {
                            Some('\n') | None => {
                                break;
                            },
                            _ => {
                                count += 1;
                            }
                        }
                    }
                    ignore_count = count;
                },
                '\"' => {
                    // Keep fetching string
                    buffer.push('"');
                    let mut count = 1;
                    let mut ignore_next = false;
                    loop {
                        let c = text.chars().nth(i + count);
                        if ignore_next {
                            count += 2;
                            ignore_next = false;
                        } else {
                            match c {
                                Some('"') => {
                                    buffer.push('"');
                                    tokens.push(buffer.clone());
                                    buffer = String::new();
                                    ignore_count = count;
                                    break;
                                },
                                Some('\\') => {
                                    buffer.push('\\');
                                    if let Some(next) = text.chars().nth(i + count + 1) {
                                        buffer.push(next);
                                    } else {
                                        return Err((line, "Unexpected EOF while reading escaped character on string constant"));
                                    }
                                    ignore_next = true;
                                },
                                Some(c) => {
                                    count += 1;
                                    buffer.push(c);
                                },
                                None => {
                                    return Err((line, "Unexpected EOF while reading string constant"));
                                },
                            }
                        }
                    }
                },
                '#' => {
                    buffer.push('#');
                    match text.chars().nth(i + 1) {
                        Some('\\') => {
                            buffer.push('\\');
                            if let Some(c) = text.chars().nth(i + 2) {
                                buffer.push(c);
                            } else {
                                return Err((line, "Unexpected EOF while reading character constant"));
                            }
                            // Keep fetching until white space or EOF
                            let mut count = 2;
                            loop {
                                let c = text.chars().nth(i + 1 + count);
                                match c {
                                    // Every delimiter on  ):
                                    // This needs a clever way to never include
                                    // delimiters.
                                    Some(' ')  |
                                    Some('\n') |
                                    Some('\t') |
                                    Some(')')  |
                                    Some('(') |
                                    Some(']') |
                                    Some('[') |
                                    Some('"') |
                                    None => {
                                        if buffer.len() > 2 {
                                            tokens.push(buffer.clone());
                                            buffer = String::new();
                                            ignore_count = count;
                                            break;
                                        } else {
                                            return Err((line, "Unexpected end of character constant"));
                                        }
                                    },
                                    Some(c) => {
                                        buffer.push(c);
                                        count += 1;
                                    },
                                }
                            }
                        },
                        _ => return Err((line, "Unexpected character while reading character constant")),
                    }
                    // Parse character
                    //unimplemented!("Character tokenization");
                },
                '\'' => {
                    if buffer != "" {
                        tokens.push(buffer.clone());
                        buffer = String::new();
                    }
                    tokens.push(String::from("'"));
                },
                '`' => {
                    if buffer != "" {
                        tokens.push(buffer.clone());
                        buffer = String::new();
                    }
                    tokens.push(String::from("`"));
                }
                ',' => {
                    if buffer != "" {
                        tokens.push(buffer.clone());
                        buffer = String::new();
                    }
                    let nextchar = text.chars().nth(i + 1);
                    tokens.push(String::from(
                        match nextchar {
                            Some(c) => {
                                if c == '@' {
                                    ignore_count = 1;
                                    ",@"
                                } else { "," }
                            },
                            None => ",",
                        }));
                },
                '@' => {
                    // Syntax error: @ alone
                    return Err((line, "'@' should be preceeded by ','"));
                },
                '(' => {
                    if buffer != "" {
                        tokens.push(buffer.clone());
                        buffer = String::new();
                    }
                    tokens.push(String::from("("));
                },
                ')' => {
                    if buffer != "" {
                        tokens.push(buffer.clone());
                        buffer = String::new();
                    }
                    tokens.push(String::from(")"));
                },
                '[' => {
                    if buffer != "" {
                        tokens.push(buffer.clone());
                        buffer = String::new();
                    }
                    tokens.push(String::from("["));
                },
                ']' => {
                    if buffer != "" {
                        tokens.push(buffer.clone());
                        buffer = String::new();
                    }
                    tokens.push(String::from("]"));
                },
                
                // Delimiters
                ' ' | '\n' | '\t' => {
                    if buffer != "" {
                        tokens.push(buffer.clone());
                        buffer = String::new();
                    }
                    if c == '\n' {
                        line += 1;
                    }
                },

                // Anything else is pushed
                _ => buffer.push(c),
            }
        }
    }
    if buffer != "" {
        tokens.push(buffer.clone());
    }
    Ok(tokens)
}

Tokenizador para arquivos

pub fn maj_tokenize_file<'a>(
    filename: &str,
    mut buffer: &'a mut String
) -> Result<Vec<String>, (i64, &'static str)> {
    use std::fs::File;
    use std::io::Read;
    match File::open(filename) {
        Ok(mut file) => {
            match file.read_to_string(&mut buffer) {
                Ok(_) => {
                    // Remove shebang line
                    if buffer.len() >= 2 && &buffer[0..2] == "#!" {
                        *buffer = buffer.replacen("#!", ";;", 1);
                    }
                    maj_tokenize(buffer.as_ref())
                },
                Err(_) => Err((0, "Cannot read file")),
            }
        },
        Err(_) => Err((0, "Cannot open file")),
    }
}

Parser

Arquivo: reader/parser.rs.

use gc::Gc;
use crate::core::{ MajState, Maj };
use crate::axioms::predicates::maj_errorp;
use crate::axioms::primitives::maj_err;
fn maj_read_one<'a>(
    mut state: &mut MajState,
    tokens: &'a [String]
) -> Result<(Gc<Maj>, &'a [String]), &'static str> {
    if tokens.len() == 0 {
        return Ok((Maj::nil(), tokens));
    }

    let mut tokens = tokens.clone();
    let first = tokens.first().unwrap();
    match first.as_ref() {
        "[" => {
            tokens = &tokens[1..];
            // Empty vector
            if let Some(t) = tokens.first() {
                if t == "]" {
                    use crate::maj_list;
                    return Ok(
                        (maj_list!(
                            Maj::vector_sym()),
                         &tokens[1..]));
                }
            }
            // Keep collecting to build a non-empty vector
            let mut vector_elts = Vec::new();
            loop {
                match maj_read_one(&mut state, tokens) {
                    Ok((obj, slice)) => {
                        tokens = slice;
                        vector_elts.push(obj);
                    },
                    Err(msg) => {
                        return Err(msg);
                    },  
                }
                match tokens.first() {
                    Some(t) => {
                        match t.as_ref() {
                            "]" => {
                                return Ok(
                                    (maj_parser_into_vector(vector_elts),
                                     &tokens[1..]));
                            },
                            // TODO: Check dotted?
                            _ => {},
                        }
                    },
                    None => return Err("Unmatched open bracket"),
                }
            }
        },
        "(" => {
            tokens = &tokens[1..];
            // Empty list
            if let Some(t) = tokens.first() {
                if t == ")" {
                    return Ok((Maj::nil(), &tokens[1..]));
                }
            }
            // Keep collecting to build a non-empty list
            let mut list = Vec::new();
            loop {
                match maj_read_one(&mut state, tokens) {
                    Ok((obj, slice)) => {
                        tokens = slice;
                        list.push(obj);
                    },
                    Err(msg) => {
                        return Err(msg);
                    },
                }
                match tokens.first() {
                    Some(t) => {
                        match t.as_ref() {
                            ")" =>
                                return Ok((maj_parser_into_list(list),
                                           &tokens[1..])),
                            "." => {
                                // Read one more, cons to last
                                // element, begone
                                match maj_read_one(
                                    &mut state,
                                    &tokens[1..]) {
                                    Ok((obj, slice)) => {
                                        if let Some(e) = list.pop() {
                                            tokens = slice;
                                            if tokens.len() == 0 {
                                                return Err("Unexpected EOF when reading dotted pair");
                                            } else if tokens[0] != ")" {
                                                return Err("Invalid usage of dotted element");
                                            }
                                            
                                            let elt = Maj::cons(e, obj);
                                            list.push(elt);
                                            return Ok(
                                                (maj_parser_into_dotted_list(list),
                                                 &tokens[1..]));
                                        } else {
                                            return Err("Unexpected EOF while reading dotted element");
                                        }
                                    },
                                    Err(msg) => {
                                        return Err(msg);
                                    },
                                }
                            },
                            _ => {},
                        }
                    },
                    None => return Err("Unmatched parenthesis"),
                }
            }
        },
        ")" => Err("Unmatched close parenthesis"),
        "]" => Err("Unmatched close brackets"),
        "." => {
            // Best option for dotted stuff is here
            Err("Invalid cons pair notation")
        },
        // TODO: Handle subsequent for the quote and quasiquote
        // related!
        "'" => {
            if tokens.len() <= 1 {
                return Err("Unmatched quote");
            }

            match maj_read_one(&mut state, &tokens[1..]) {
                Ok((obj, slice)) => {
                    Ok((maj_parser_into_list(vec![Maj::quote(), obj]),
                        slice))
                },
                Err(msg) => Err(msg),
            }
        },
        "`" => {
            if tokens.len() <= 1 {
                return Err("Unmatched quasiquote");
            }

            match maj_read_one(&mut state, &tokens[1..]) {
                Ok((obj, slice)) => {
                    Ok((maj_parser_into_list(vec![Maj::quasiquote(), obj]),
                        slice))
                },
                Err(msg) => Err(msg),
            }
        },
        "," => {
            if tokens.len() <= 1 {
                return Err("Unmatched unquote");
            }

            match maj_read_one(&mut state, &tokens[1..]) {
                Ok((obj, slice)) => {
                    Ok((maj_parser_into_list(vec![Maj::unquote(), obj]),
                        slice))
                },
                Err(msg) => Err(msg),
            }
        },
        ",@" => {
            if tokens.len() <= 1 {
                return Err("Unmatched unquote-splice");
            }

            match maj_read_one(&mut state, &tokens[1..]) {
                Ok((obj, slice)) => {
                    Ok((maj_parser_into_list(
                        vec![Maj::unquote_splice(), obj]),
                        slice))
                },
                Err(msg) => Err(msg),
            }
        },
        _ => {
            // Symbols
            if let Some(obj) = maj_parse_character(first.as_ref()) {
                // Character
                Ok((obj, &tokens[1..]))
            } else if let Some(obj) = maj_parse_string(first.as_ref()) {
                // String
                Ok((obj, &tokens[1..]))
            } else if let Some(obj) = maj_parse_number(first.as_ref()) {
                // Number
                Ok((obj, &tokens[1..]))
            } else {
                let token: &str = first.as_ref();
                if (token.len() >= 2) && (&token[0..2] == "#\\") {
                    Err("Unknown character")
                } else {
                    // Ordinary symbol
                    Ok((Maj::symbol(&mut state, first), &tokens[1..]))
                }
            }
        }
    }
}
fn maj_parse_number(token: &str) -> Option<Gc<Maj>> {
    if let Some(pos) = maj_token_once_p(&token.to_uppercase(), 'J') {
        // Test for complex
        let real_token = &token[0..pos];
        let imag_token = &token[(pos+1)..];
        let real = maj_parse_number(real_token);
        let imag = maj_parse_number(imag_token);
        if real.is_some() && imag.is_some() {
            let (real, imag) = (real.unwrap(), imag.unwrap());
            if maj_errorp(real.clone()).to_bool() {
                return Some(real);
            } else if maj_errorp(imag.clone()).to_bool() {
                return Some(imag);
            } else if let Some(num) = imag.to_integer() {
                if num == 0 {
                    return Some(real);
                }
            }
            Some(Maj::complex(real, imag))
        } else {
            None
        }
    } else if let Some(_) = maj_token_once_p(token, '.') {
        // Test for float
        let float: f64 =
            match token.parse() {
                Ok(num) => num,
                Err(_)  => return None,
            };
        Some(Maj::float(float))
    } else if let Some(pos) = maj_token_once_p(token, '/') {
        // Test for fraction
        let numer_token = &token[0..pos];
        let denom_token = &token[(pos+1)..];
        let numer = maj_parse_number(numer_token);
        let denom = maj_parse_number(denom_token);

        if numer.is_some() && denom.is_some() {
            let numer = numer.unwrap().to_integer();
            let denom = denom.unwrap().to_integer();
            if numer.is_some() && denom.is_some() {
                use crate::axioms::utils::simplify_frac_raw;
                if denom.unwrap() == 0 {
                    return Some(maj_err(
                        Maj::string("Division by zero"),
                        Maj::nil()));
                }
                let (numer, denom) =
                    simplify_frac_raw(numer.unwrap(),
                                      denom.unwrap());
                Some(Maj::fraction(numer, denom))
            } else {
                None
            }
        } else {
            None
        }
    } else if maj_numeric_token_p(token) {
        // Test for integer
        let integer: i64 =
                match token.parse() {
                    Ok(num) => num,
                    Err(_)  => return None,
                };
        Some(Maj::integer(integer))
    } else {
        None
    }
}
fn maj_numeric_token_p(token: &str) -> bool {
    if token.len() == 0 {
        return false;
    }
    let token = if token.chars().nth(0).unwrap() == '-' {
        &token[1..]
    } else { token };
    token.chars().all(|x| x.is_digit(10))
}
fn maj_token_once_p(token: &str, ch: char) -> Option<usize> {
    let mut occurp = 0;
    let mut num    = 0;
    for (i, c) in token.chars().enumerate() {
        if c == ch {
            occurp = i;
            num += 1;
        }
    }
    if num == 1 {
        Some(occurp)
    } else {
        None
    }
}
fn maj_parse_string(token: &str) -> Option<Gc<Maj>> {
    if (token.len() < 2)
        || (token.chars().nth(0).unwrap() != '"')
        || (token.chars().last().unwrap() != '"') {
            None
        } else {
            let mut buffer = String::new();
            let mut ignore_next = false;
            for i in 1..(token.len()-1) {
                if ignore_next {
                    ignore_next = false;
                } else {
                    match token.chars().nth(i).unwrap() {
                        '\\' => {
                            match token.chars().nth(i+1).unwrap() {
                                'n' => {
                                    buffer.push('\n');
                                    ignore_next = true;
                                },
                                't' => {
                                    buffer.push('\t');
                                    ignore_next = true;
                                },
                                c => {
                                    buffer.push(c);
                                    ignore_next = true;
                                },
                            };
                        },
                        c    => buffer.push(c),
                    }
                }
            }
            Some(Maj::string(buffer.as_ref()))
        }
}
fn maj_parse_character(token: &str) -> Option<Gc<Maj>> {
    if (token.len() <= 2) || (&token[0..2] != "#\\") {
        None
    } else {
        let chr = &token[2..];
        Some(Maj::character(match chr {
            "␇" | "bel" => '\x07',
            "newline" => '\n',
            "tab" => '\t',
            // Those should not be needed!
            "space"    => ' ',
            "lparen"   => '(',
            "rparen"   => ')',
            "lbracket" => '[',
            "rbracket" => ']',
            c => {
                if c.len() == 1 {
                    c.chars().nth(0).unwrap()
                } else {
                    return None;
                }
            },
        }))
    }
}
pub fn maj_parse(
    mut state: &mut MajState, tokens: Vec<String>
) -> Result<Gc<Maj>, &str> {
    let mut list = Vec::new();
    let mut tokens = &tokens[..];
    while tokens.len() > 0 {
        match maj_read_one(&mut state, tokens) {
            Ok((expr, slice)) => {
                tokens = slice;
                list.push(expr);
            },
            Err(msg) => return Err(msg),
        }
    }
    Ok(maj_parser_into_list(list))
}
fn maj_parser_into_list(list: Vec<Gc<Maj>>) -> Gc<Maj> {
    if list.len() == 0 {
        Maj::nil()
    } else {
        let mut expr = Maj::nil();
        for elt in list.iter().rev() {
            expr = Maj::cons(elt.clone(), expr);
        }
        expr
    }
}
fn maj_parser_into_dotted_list(list: Vec<Gc<Maj>>) -> Gc<Maj> {
    match list.len() {
        0 => Maj::nil(),
        _ => {
            let mut list = list.clone();
            let mut expr = list.pop().unwrap();
            for elt in list.iter().rev() {
                expr = Maj::cons(elt.clone(), expr);
            }
            expr
        }
    }
}
fn maj_parser_into_vector(elts: Vec<Gc<Maj>>) -> Gc<Maj> {
    if elts.len() == 0 {
        Maj::nil()
    } else {
        let mut expr = Maj::nil();
        for elt in elts.iter().rev() {
            expr = Maj::cons(elt.clone(), expr);
        }
        Maj::cons(Maj::vector_sym(), expr)
    }
}

Construindo o objeto do leitor

Macros do leitor de expressões