From ad9593a64ae14d1dc87fe70e28267b31185d57b3 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Wed, 3 Jul 2019 21:26:01 +0100 Subject: [PATCH 01/18] Add Comment about Missing Delim on Call --- src/syntax/tree/expression.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/syntax/tree/expression.rs b/src/syntax/tree/expression.rs index 563676d5..9ac11e1b 100644 --- a/src/syntax/tree/expression.rs +++ b/src/syntax/tree/expression.rs @@ -151,6 +151,10 @@ pub struct CallExpression { /// The opening `(` of this call pub open_paren: Box, /// The list of arguments to the call. This could be empty. + /// + /// FIXME: This should be a delimited list rather than a palin + /// vec. We are missing the `,` between arguments to the call from + /// the tree otherwise. pub arguments: Vec, /// THe closing `)` of this call pub close_paren: Box, From f4195bb7eba792f8552b6781d0bb065b99545c6f Mon Sep 17 00:00:00 2001 From: Will Speak Date: Sun, 19 May 2019 08:37:41 +0100 Subject: [PATCH 02/18] Convert Function Arguments to `DelimItem`s This stops the `,` between arguments being lost. --- src/syntax/parse.rs | 12 +------ src/syntax/parse/checkparse_tests.rs | 54 ++++++++++++++++++---------- src/syntax/tree.rs | 2 +- src/syntax/tree/expression.rs | 12 +++++-- 4 files changed, 48 insertions(+), 32 deletions(-) diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index 4e460773..72b421a3 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -367,17 +367,7 @@ impl<'a> Parser<'a> { // Function call TokenKind::OpenBracket => { let open = token; - let mut params = Vec::new(); - while !self.current_is(&TokenKind::CloseBracket) { - let param = self.top_level_expression(); - params.push(param); - if !self.current_is(&TokenKind::CloseBracket) { - // FIXME: Delimited lists. Should fit in with - // tuple parsing. Currently this delimiter - // token is getting lost. - let _delim = self.expect(&TokenKind::Comma); - } - } + let params = self.delimited(|p| p.top_level_expression(), TokenKind::Comma, TokenKind::CloseBracket); let close = self.expect(&TokenKind::CloseBracket); Expression::call(lhs, open, params, close) } diff --git a/src/syntax/parse/checkparse_tests.rs b/src/syntax/parse/checkparse_tests.rs index e0ccbcf4..42349830 100644 --- a/src/syntax/parse/checkparse_tests.rs +++ b/src/syntax/parse/checkparse_tests.rs @@ -226,17 +226,29 @@ fn parse_complex_call() { mk_ident(&s, "hello"), Token::new(TokenKind::OpenBracket), vec![ - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), - Expression::infix( - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), - Token::new(TokenKind::Plus), - InfixOp::Add, - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(23))), 23), + DelimItem::First(Expression::constant_num( + Token::new(TokenKind::Literal(Literal::Number(1))), + 1 + )), + DelimItem::Follow( + Token::new(TokenKind::Comma), + Expression::infix( + Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), + Token::new(TokenKind::Plus), + InfixOp::Add, + Expression::constant_num( + Token::new(TokenKind::Literal(Literal::Number(23))), + 23 + ), + ) ), - Expression::prefix( - Token::new(TokenKind::Minus), - PrefixOp::Negate, - mk_ident(&s, "world"), + DelimItem::Follow( + Token::new(TokenKind::Comma), + Expression::prefix( + Token::new(TokenKind::Minus), + PrefixOp::Negate, + mk_ident(&s, "world"), + ) ), ], Token::new(TokenKind::CloseBracket), @@ -276,12 +288,18 @@ fn parse_indexing() { ), Token::new(TokenKind::OpenBracket), vec![ - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), - Expression::index( - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(2))), 2), - Token::new(TokenKind::OpenSqBracket), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(3))), 3), - Token::new(TokenKind::CloseSqBracket) + DelimItem::First(Expression::constant_num( + Token::new(TokenKind::Literal(Literal::Number(1))), + 1 + )), + DelimItem::Follow( + Token::new(TokenKind::Comma), + Expression::index( + Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(2))), 2), + Token::new(TokenKind::OpenSqBracket), + Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(3))), 3), + Token::new(TokenKind::CloseSqBracket) + ) ), ], Token::new(TokenKind::CloseBracket), @@ -309,10 +327,10 @@ fn parse_ternary_if() { Expression::call( mk_ident(&s, "hello"), Token::new(TokenKind::OpenBracket), - vec![Expression::constant_num( + vec![DelimItem::First(Expression::constant_num( Token::new(TokenKind::Literal(Literal::Number(1))), 1, - )], + ))], Token::new(TokenKind::CloseBracket), ), Token::new(TokenKind::Word(s.intern("else"))), diff --git a/src/syntax/tree.rs b/src/syntax/tree.rs index efdbe4b6..9ed34218 100644 --- a/src/syntax/tree.rs +++ b/src/syntax/tree.rs @@ -145,7 +145,7 @@ where Expression::Prefix(p) => vec![&p.inner], Expression::Infix(i) => vec![&i.left, &i.right], Expression::Call(c) => std::iter::once(&*c.callee) - .chain(c.arguments.iter()) + .chain(c.arguments.iter().map(|a| a.as_inner())) .collect(), Expression::Index(i) => vec![&i.index, &i.indexee], Expression::IfThenElse(i) => vec![&i.cond, &i.if_true, &i.if_false], diff --git a/src/syntax/tree/expression.rs b/src/syntax/tree/expression.rs index 9ac11e1b..2395ecdc 100644 --- a/src/syntax/tree/expression.rs +++ b/src/syntax/tree/expression.rs @@ -77,6 +77,14 @@ impl DelimItem { DelimItem::Follow(_, ref t) => t, } } + + /// Transform the delim item into the inner type + pub fn into_inner(self) -> T { + match self { + DelimItem::First(t) => t, + DelimItem::Follow(_, t) => t, + } + } } /// Literal / Constant Value @@ -155,7 +163,7 @@ pub struct CallExpression { /// FIXME: This should be a delimited list rather than a palin /// vec. We are missing the `,` between arguments to the call from /// the tree otherwise. - pub arguments: Vec, + pub arguments: Vec>, /// THe closing `)` of this call pub close_paren: Box, } @@ -412,7 +420,7 @@ impl Expression { pub fn call( callee: Expression, open_paren: Token, - args: Vec, + args: Vec>, close_paren: Token, ) -> Self { Expression::Call(CallExpression { From ebac96157be3001a5313dad62302ef3941dab207 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Wed, 3 Jul 2019 21:52:32 +0100 Subject: [PATCH 03/18] Make Call Arguments `DelimItem`s Tracks `,` in calls by storing call arguments as delim items. This means we now get better position information in call parameter count errors. --- spec/fail/badcalls.ulg | 4 ++-- src/sem/binder.rs | 5 ++--- src/syntax/tree/expression.rs | 25 ++++++++++++++++--------- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/spec/fail/badcalls.ulg b/spec/fail/badcalls.ulg index 256ae473..b7c9b765 100644 --- a/spec/fail/badcalls.ulg +++ b/spec/fail/badcalls.ulg @@ -9,10 +9,10 @@ foo(100, '', 100) # !> Too many arguments to call foo(false, '', 100) # !> Too many arguments to call foo(100, '', '', false) # !> Too many arguments to call -foo(100, 100) # !> 12:9:error: Invalid argument. Expected 'String' but found 'Number' +foo(100, 100) # !> 12:7:error: Invalid argument. Expected 'String' but found 'Number' # !> 16:4:error: Invalid argument. Expected 'Number' but found 'String' -# !> 16:13:error: Invalid argument. Expected 'String' but found 'Bool' +# !> 16:11:error: Invalid argument. Expected 'String' but found 'Bool' foo('hello', false) let bar = 100 diff --git a/src/sem/binder.rs b/src/sem/binder.rs index df44643b..f6710a62 100644 --- a/src/sem/binder.rs +++ b/src/sem/binder.rs @@ -504,7 +504,7 @@ impl Binder { .iter() .zip(param_tys) .map(|(arg, param)| { - let bound_arg = self.bind_expression(arg, source); + let bound_arg = self.bind_expression(arg.as_inner(), source); if bound_arg.typ != Some(param) { self.diagnostics.push(Diagnostic::new( format!( @@ -853,8 +853,7 @@ mod test { use super::*; use crate::syntax::text::Interner; use crate::syntax::{ - IdentifierExpression, Literal, LiteralExpression, - PrefixExpression, Token, TokenKind, + IdentifierExpression, Literal, LiteralExpression, PrefixExpression, Token, TokenKind, }; #[test] diff --git a/src/syntax/tree/expression.rs b/src/syntax/tree/expression.rs index 2395ecdc..720b92bb 100644 --- a/src/syntax/tree/expression.rs +++ b/src/syntax/tree/expression.rs @@ -8,6 +8,7 @@ use super::super::SyntaxNode; use super::operators::{InfixOp, PrefixOp}; use super::token::{Token, TokenKind}; use super::types::TypeAnno; +use std::borrow::Cow; /// An identifier, with an optional type attached #[derive(Debug, PartialEq)] @@ -77,12 +78,22 @@ impl DelimItem { DelimItem::Follow(_, ref t) => t, } } +} - /// Transform the delim item into the inner type - pub fn into_inner(self) -> T { - match self { - DelimItem::First(t) => t, - DelimItem::Follow(_, t) => t, +impl SyntaxNode for DelimItem { + fn description(&self, source: &SourceText) -> Cow { + match *self { + DelimItem::First(ref t) => format!("Delim::First({})", t.description(source)).into(), + DelimItem::Follow(ref sep, ref t) => { + format!("Delim::Follow({}, {})", sep.kind, t.description(source)).into() + } + } + } + + fn span(&self) -> Span { + match *self { + DelimItem::First(ref t) => t.span(), + DelimItem::Follow(ref sep, ref t) => Span::enclosing(sep.span(), t.span()), } } } @@ -159,10 +170,6 @@ pub struct CallExpression { /// The opening `(` of this call pub open_paren: Box, /// The list of arguments to the call. This could be empty. - /// - /// FIXME: This should be a delimited list rather than a palin - /// vec. We are missing the `,` between arguments to the call from - /// the tree otherwise. pub arguments: Vec>, /// THe closing `)` of this call pub close_paren: Box, From 370615aaac9291db0342522456f8cbf93ebeef82 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Thu, 4 Jul 2019 07:17:45 +0100 Subject: [PATCH 04/18] Make `Expression::typ` Non-Optional Now we have types properly flowing thorugh the tree we don't need to have optional types in the bound tree. This also frees us from having to fall back to concocting a type from the `LLVMValue` when printing. --- src/compile/lower.rs | 60 ++++++++-------------------- src/sem/binder.rs | 94 ++++++++++++++++++++------------------------ src/sem/tree.rs | 32 ++++++++++----- 3 files changed, 81 insertions(+), 105 deletions(-) diff --git a/src/compile/lower.rs b/src/compile/lower.rs index 7db17d3f..c4ee5a2d 100644 --- a/src/compile/lower.rs +++ b/src/compile/lower.rs @@ -103,7 +103,7 @@ fn add_decls(ctx: &mut LowerContext<'_>, expr: &Expression) { .params .iter() .map(|p| { - ctx.llvm_type(p.ty.unwrap_or(Typ::Builtin(BuiltinType::Number))) + ctx.llvm_type(p.ty) .expect("no type in context for function param") }) .collect::>(); @@ -145,7 +145,7 @@ pub fn lower_internal( let global = ctx.module.add_global(initialiser, "s_const"); let string_ty = ctx - .llvm_type(expr.typ.unwrap()) + .llvm_type(expr.typ) .expect("no type in context for string literal"); Ok(builder.build_bitcast(global, string_ty, "string_const")) } @@ -162,7 +162,7 @@ pub fn lower_internal( let lhs_val = lower_internal(ctx, fun, builder, vars, *lhs)?; let rhs_val = lower_internal(ctx, fun, builder, vars, *rhs)?; let val = match op { - InfixOp::Add => match expr.typ.unwrap() { + InfixOp::Add => match expr.typ { Typ::Builtin(BuiltinType::Number) => builder.build_add(lhs_val, rhs_val), Typ::Builtin(BuiltinType::String) => { build_string_concat(ctx, builder, lhs_val, rhs_val) @@ -217,9 +217,8 @@ pub fn lower_internal( ExpressionKind::IfThenElse(iff, then, els) => { let cond = lower_internal(ctx, fun, builder, vars, *iff)?; - let typ = expr - .typ - .and_then(|t| ctx.llvm_type(t)) + let typ = ctx + .llvm_type(expr.typ) .ok_or_else(|| CompError::from("No type for if expression".to_string()))?; let ret = builder.build_alloca(typ, "if"); @@ -257,7 +256,7 @@ pub fn lower_internal( .enumerate() .map(|(i, p)| { let typ = ctx - .llvm_type(p.ty.unwrap_or(Typ::Builtin(BuiltinType::Number))) + .llvm_type(p.ty) .expect("no type in context for function parameter"); let param = builder.build_alloca(typ, &p.ident); builder.build_store(fun.get_param(i as u32), param); @@ -304,27 +303,22 @@ pub fn lower_internal( // do this so that some values, such as `bool`s can be // converted before printing. // - // There are a few TODOs with this though: // TODO: Once Strings become available we should switch to - // `to_string` here - // TODO: Stop falling back to the LLVM type here. - let (to_format, format) = expr - .typ - .and_then(|t| fmt_from_type(t, ctx, fun, builder, val)) - .unwrap_or_else(|| fmt_from_llvm(ctx, fun, builder, val)); - fmt(ctx, builder, to_format, format); + // `to_string` here rather than `fmt_from_type`. + if let Some((to_format, format)) = fmt_from_type(expr.typ, ctx, fun, builder, val) { + fmt(ctx, builder, to_format, format); + } else { + eprintln!("Can't format value of type: {:?}", expr.typ); + unimplemented!(); + } Ok(val) } ExpressionKind::Declaration(decl, is_mut, initialiser) => { let initialiser = lower_internal(ctx, fun, builder, vars, *initialiser)?; let value = if is_mut { - let typ = decl.ty.map_or_else( - || ctx.llvm_ctx.get_type(initialiser), - |ty| { - ctx.llvm_type(ty) - .expect("no type in context for declaration") - }, - ); + let typ = ctx + .llvm_type(decl.ty) + .expect("no type in context for declaration"); let stackloc = builder.build_alloca(typ, &decl.ident); builder.build_store(initialiser, stackloc); @@ -466,25 +460,3 @@ fn fmt_convert_bool( builder.build_load(temp) } - -/// Format from LLVM Type -/// -/// Gets a format string specifier from the LLVM type. This is only -/// used as a fallback. -/// -/// FIXME: Stop falling back to this function for printing. -fn fmt_from_llvm( - ctx: &mut LowerContext<'_>, - fun: &mut Function, - builder: &mut Builder, - val: LLVMValueRef, -) -> (Vec, &'static str) { - match Type::from(ctx.llvm_ctx.get_type(val)) { - Type::Int(1) => { - let formatted = fmt_convert_bool(ctx, fun, builder, val); - (vec![formatted], "printf_cstr_format") - } - Type::Int(_) => (vec![val], "printf_num_format"), - _ => unimplemented!(), - } -} diff --git a/src/sem/binder.rs b/src/sem/binder.rs index f6710a62..b8c52f8d 100644 --- a/src/sem/binder.rs +++ b/src/sem/binder.rs @@ -310,10 +310,10 @@ impl Binder { if let Some(sym) = self.scopes.lookup(ident.ident) { let id_str = source.interned_value(ident.ident); let typ = match sym { - Symbol::Variable(_, t) => Some(t), - Symbol::Function(..) => Some(Typ::Function(ident.ident)), + Symbol::Variable(_, t) => t, + Symbol::Function(..) => Typ::Function(ident.ident), // FIXME: First-class types? - Symbol::Type(..) => None, + Symbol::Type(..) => Typ::Error, }; Expression::new(ExpressionKind::Identifier(id_str), typ) } else { @@ -336,7 +336,7 @@ impl Binder { Constant::Number(_) => BuiltinType::Number, Constant::String(_) => BuiltinType::String, }); - Expression::new(ExpressionKind::Literal(constant_value), Some(typ)) + Expression::new(ExpressionKind::Literal(constant_value), typ) } /// Prefix operation @@ -382,15 +382,12 @@ impl Binder { let lhs = self.bind_expression(&infix.left, source); let rhs = self.bind_expression(&infix.right, source); - let lhs_typ = lhs.typ.unwrap_or(Typ::Unknown); - let rhs_typ = rhs.typ.unwrap_or(Typ::Unknown); - // Look the operator up in the operator table to check if // it is permissable and what the reutnr type is. - match operators::find_builtin_op(infix.op, lhs_typ, rhs_typ) { + match operators::find_builtin_op(infix.op, lhs.typ, rhs.typ) { Some(operator) => Expression::new( ExpressionKind::Infix(Box::new(lhs), infix.op, Box::new(rhs)), - Some(operator.result_typ), + operator.result_typ, ), None => { self.diagnostics.push(Diagnostic::new( @@ -431,7 +428,7 @@ impl Binder { )); } let rhs = self.bind_expression(&infix.right, source); - let resolved_ty = rhs.typ.unwrap_or(typ); + let resolved_ty = rhs.typ; if resolved_ty != typ { self.diagnostics.push(Diagnostic::new( format!( @@ -443,7 +440,7 @@ impl Binder { } Expression::new( ExpressionKind::Assignment(source.interned_value(id.ident), Box::new(rhs)), - Some(resolved_ty), + resolved_ty, ) } Some(_) => { @@ -479,7 +476,7 @@ impl Binder { pub fn bind_call(&mut self, call: &syntax::CallExpression, source: &SourceText) -> Expression { let callee = self.bind_expression(&call.callee, source); match callee.typ { - Some(Typ::Function(id)) => match self.scopes.lookup(id) { + Typ::Function(id) => match self.scopes.lookup(id) { Some(Symbol::Function(param_tys, ret_ty)) => { let param_count = param_tys.len(); let arg_count = call.arguments.len(); @@ -505,12 +502,12 @@ impl Binder { .zip(param_tys) .map(|(arg, param)| { let bound_arg = self.bind_expression(arg.as_inner(), source); - if bound_arg.typ != Some(param) { + if bound_arg.typ != param { self.diagnostics.push(Diagnostic::new( format!( "Invalid argument. Expected '{}' but found '{}'", param.name(), - bound_arg.typ.unwrap_or(Typ::Unknown).name() + bound_arg.typ.name() ), arg.span(), )) @@ -519,7 +516,7 @@ impl Binder { }) .collect(); - Expression::new(ExpressionKind::Call(Box::new(callee), args), Some(ret_ty)) + Expression::new(ExpressionKind::Call(Box::new(callee), args), ret_ty) } _ => { unreachable!(); @@ -566,7 +563,7 @@ impl Binder { // // TODO: Bind a conversion to bool here to allow `if` to // coerce values to `Bool` - let cond_ty = cond.typ.unwrap_or(Typ::Unknown); + let cond_ty = cond.typ; if cond_ty != Typ::Builtin(BuiltinType::Bool) { self.diagnostics.push(Diagnostic::new( format!( @@ -578,19 +575,13 @@ impl Binder { } let typ = if_true.typ; - let true_typ = if_true.typ.unwrap_or(Typ::Unknown); - let false_typ = if_false.typ.unwrap_or(Typ::Unknown); - - // TODO: This doesn't deal with the case of both types being - // missing. Hopefully we can get rid of optional types - // on the bound tree and rely on `Typ::Unknown` so we - // don't have to handle such cases. - if true_typ != false_typ { + + if if_true.typ != if_false.typ { self.diagnostics.push(Diagnostic::new( format!( "If and else have mismatched types. '{}' and '{}'", - true_typ.name(), - false_typ.name() + if_true.typ.name(), + if_false.typ.name() ), Span::enclosing(if_else.if_true.span(), if_else.if_false.span()), )); @@ -626,7 +617,7 @@ impl Binder { .iter() .map(|p| { let p = p.as_inner(); - let typ = match p.typ.as_ref() { + let ty = match p.typ.as_ref() { Some(anno) => self.bind_type(&anno.type_ref), None => { self.diagnostics.push(Diagnostic::new( @@ -645,10 +636,10 @@ impl Binder { p.id_tok.span(), )); } - parent_scope.try_declare(p.id, Symbol::Variable(VarStyle::Mutable, typ)); + parent_scope.try_declare(p.id, Symbol::Variable(VarStyle::Mutable, ty)); VarDecl { ident: source.interned_value(p.id), - ty: Some(typ), + ty, } }) .collect(); @@ -667,7 +658,7 @@ impl Binder { params, body: Box::new(bound_body), }), - Some(Typ::Error), + Typ::Error, ) } @@ -688,7 +679,7 @@ impl Binder { let body = self.bind_block(&loop_expr.body, source); Expression::new( ExpressionKind::Loop(Box::new(condition), Box::new(body)), - Some(Typ::Unit), + Typ::Unit, ) } @@ -702,8 +693,8 @@ impl Binder { .iter() .map(|e| self.bind_expression(e, source)) .collect(); - let typ = transformed.last().and_then(|e| e.typ).unwrap_or(Typ::Unit); - Expression::new(ExpressionKind::Sequence(transformed), Some(typ)) + let typ = transformed.last().map(|e| e.typ).unwrap_or(Typ::Unit); + Expression::new(ExpressionKind::Sequence(transformed), typ) } /// Bind a `print` expression @@ -744,20 +735,19 @@ impl Binder { // If we don't have a type annotation in the declaration then // infer the type from the initialiser let ty = if decl_type != Typ::Unknown { - match bound_initialiser.typ { - Some(t) if t != decl_type => { - // The declaration type doesn't match the - // expression being used to initialise it. - self.diagnostics.push(Diagnostic::new( - format!( - "Initialiser doesn't match declaration type for '{}'", - source.interned_value(id) - ), - decl.id.id_tok.span(), - )); - Some(Typ::Error) - } - _ => Some(decl_type), + if bound_initialiser.typ != decl_type { + // The declaration type doesn't match the + // expression being used to initialise it. + self.diagnostics.push(Diagnostic::new( + format!( + "Initialiser doesn't match declaration type for '{}'", + source.interned_value(id) + ), + decl.id.id_tok.span(), + )); + Typ::Error + } else { + decl_type } } else { bound_initialiser.typ @@ -765,7 +755,7 @@ impl Binder { self.scopes .current_mut() - .try_declare(id, Symbol::Variable(decl.style, ty.unwrap_or(Typ::Unknown))); + .try_declare(id, Symbol::Variable(decl.style, ty)); let is_mut = decl.style == VarStyle::Mutable; Expression::new( @@ -1024,7 +1014,7 @@ mod test { ); assert_eq!(ExpressionKind::Identifier("melles".into()), bound.kind); - assert_eq!(Some(Typ::Builtin(BuiltinType::Bool)), bound.typ); + assert_eq!(Typ::Builtin(BuiltinType::Bool), bound.typ); } #[test] @@ -1037,7 +1027,7 @@ mod test { }); assert_eq!(ExpressionKind::Literal(Constant::Number(1337)), bound.kind); - assert_eq!(Some(Typ::Builtin(BuiltinType::Number)), bound.typ); + assert_eq!(Typ::Builtin(BuiltinType::Number), bound.typ); } #[test] @@ -1062,12 +1052,12 @@ mod test { PrefixOp::Negate, Box::new(Expression::new( ExpressionKind::Literal(Constant::Number(23)), - Some(Typ::Builtin(BuiltinType::Number)) + Typ::Builtin(BuiltinType::Number) )) ), bound.kind ); - assert_eq!(Some(Typ::Builtin(BuiltinType::Number)), bound.typ); + assert_eq!(Typ::Builtin(BuiltinType::Number), bound.typ); } // TODO: need a better way of creating the expression trees to run diff --git a/src/sem/tree.rs b/src/sem/tree.rs index 7abba220..36bd6c08 100644 --- a/src/sem/tree.rs +++ b/src/sem/tree.rs @@ -40,7 +40,7 @@ pub struct VarDecl { pub ident: String, /// The type of the identifier, if one was specified or inferred. - pub ty: Option, + pub ty: Typ, } /// A Semantically Decorated Expression @@ -50,11 +50,22 @@ pub struct VarDecl { /// representation of the code as it was written. #[derive(Debug, PartialEq)] pub struct Expression { - /// The contents of this expression. + /// The contents of this expression + /// + /// This is a union of the different expression kinds. pub kind: ExpressionKind, - /// The type of this node, if known - pub typ: Option, + /// The type of this node + /// + /// All expressions in the tree will have some form of type. Error + /// expressions or expressions where the type can't be calculated + /// due to a syntax or semantic error in the program are typed + /// `Typ::Error`. Parts of the tree where the type has yet to be + /// inferred are typed `Typ::Unknown`. + /// + /// Before lowering a well-formed tree should contain neither of + /// these types. + pub typ: Typ, } /// The Expression Kind Enum @@ -141,16 +152,19 @@ impl Expression { /// /// Constructs a new semantic expression tree node from /// constituent parts. The type information for a given node can - /// be set to none if no type inference has yet been run for this - /// expression. - pub fn new(kind: ExpressionKind, typ: Option) -> Self { + /// be set to `Typ::Unknown` if no type inference has yet been run + /// for this expression. + pub fn new(kind: ExpressionKind, typ: Typ) -> Self { Expression { kind, typ } } /// Create an Error Expresion /// - /// Convenience function for returning error expressions. + /// Convenience function for returning error expressions. Error + /// expressions have a kind of `ExpressionKind::Error` and a type + /// of `Typ::Error`. They are used to mark invalid or + /// uncalculateable portions of the bound tree. pub fn error() -> Self { - Expression::new(ExpressionKind::Error, Some(Typ::Error)) + Expression::new(ExpressionKind::Error, Typ::Error) } } From 1ee2241195546bbf3ad0a8f64434f8f67e26a537 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Fri, 5 Jul 2019 07:14:39 +0100 Subject: [PATCH 05/18] Bail from `Parser::delimited` on No Progress Add some tests for malformed calls with invalid `,` counts. This exposes an issue where call expressions with the wrong delimiters at the end would result in an infinite loop of stubbing out expressions witout making process. This fixes the loop by keeping track of the position in the file we are at and bailing from the loop if no progress is made. --- spec/malformed/invalid_calls.ulg | 9 +++++++++ src/syntax/parse.rs | 8 ++++++++ 2 files changed, 17 insertions(+) create mode 100644 spec/malformed/invalid_calls.ulg diff --git a/spec/malformed/invalid_calls.ulg b/spec/malformed/invalid_calls.ulg new file mode 100644 index 00000000..ee1683f9 --- /dev/null +++ b/spec/malformed/invalid_calls.ulg @@ -0,0 +1,9 @@ +fn foo(n: Number, b: Bool): Number + n if b else 2 +end + +# !> 6:4:error: unexpected token: expected expression but found ',' +foo(,) + +# !> 9:15:error: unexpected token: expected expression but found ')' +foo(100, false,) \ No newline at end of file diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index 72b421a3..86ce5b10 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -409,9 +409,17 @@ impl<'a> Parser<'a> { if !self.current_is(&close) { res.push(DelimItem::First(p(self))); } + let mut last_span = self.current.as_ref().map(|t| t.span()); while !self.current_is(&close) { let delim = self.expect(&delimiter); res.push(DelimItem::Follow(delim, p(self))); + + let cur_span = self.current.as_ref().map(|t| t.span()); + if cur_span.is_some() && last_span == cur_span { + break; + } else { + last_span = cur_span; + } } res } From c1fef2f3da4358fe19e787f5111bc5bb65a52fe9 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Fri, 5 Jul 2019 07:28:25 +0100 Subject: [PATCH 06/18] Only Expose Root Expression from `SyntaxTree` We don't need more than a reference to the root expression for lowering now. Fixup a TODO and simplify the bind. --- src/sem/binder.rs | 6 +++--- src/syntax/tree.rs | 11 +++++------ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/sem/binder.rs b/src/sem/binder.rs index b8c52f8d..2269e295 100644 --- a/src/sem/binder.rs +++ b/src/sem/binder.rs @@ -216,9 +216,9 @@ impl Binder { pub fn bind_tree(&mut self, tree: syntax::SyntaxTree<'_>) -> Expression { let source = tree.source(); add_builtin_types(self.scopes.current_mut(), source); - let (expr, _end) = tree.into_parts(); - self.declare_expression(&expr); - self.bind_expression(&expr, source) + let expr = tree.root_expression(); + self.declare_expression(expr); + self.bind_expression(expr, source) } /// Declare any items in the current expression that should be diff --git a/src/syntax/tree.rs b/src/syntax/tree.rs index 9ed34218..573cc4ef 100644 --- a/src/syntax/tree.rs +++ b/src/syntax/tree.rs @@ -96,12 +96,12 @@ impl<'a> SyntaxTree<'a> { !self.diagnostics.is_empty() } - /// Returns the root of the expression tree and the EOF token + /// Get the Root Expression /// - /// FIXME: should root and token just be public and remove this, - /// `root()`, and `end()`? - pub fn into_parts(self) -> (Expression, Token) { - (self.root, self.end) + /// Accesses the base of the expression tree. The only other part + /// of the tree is the `end` token. + pub fn root_expression(&self) -> &Expression { + &self.root } /// Access the Borrowed Source @@ -125,7 +125,6 @@ impl<'a> SyntaxTree<'a> { } } -/// /// Walks the subnodes of this tree and prints a text representation /// of them as an ASCII tree. fn pretty_tree( From e957db5b0e91068d351f203025627615df412ac1 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Fri, 5 Jul 2019 12:18:21 +0100 Subject: [PATCH 07/18] Replace `DelimItem` Vectors with `SepList` Introduces a new type for managing lists of seprated items. Instead of having a `Vec>` we can now just have a `SepList`. The plan is that later tranformations can more easily index in and find separators and items. --- spec/fail/badcalls.ulg | 4 +- spec/malformed/invalid_calls.ulg | 8 +- src/sem/binder.rs | 4 +- src/syntax/parse.rs | 25 +++-- src/syntax/parse/checkparse_tests.rs | 143 +++++++++++++------------- src/syntax/tree.rs | 14 +-- src/syntax/tree/expression.rs | 51 +--------- src/syntax/tree/seplist.rs | 147 +++++++++++++++++++++++++++ src/syntax/tree/types.rs | 7 +- 9 files changed, 252 insertions(+), 151 deletions(-) create mode 100644 src/syntax/tree/seplist.rs diff --git a/spec/fail/badcalls.ulg b/spec/fail/badcalls.ulg index b7c9b765..256ae473 100644 --- a/spec/fail/badcalls.ulg +++ b/spec/fail/badcalls.ulg @@ -9,10 +9,10 @@ foo(100, '', 100) # !> Too many arguments to call foo(false, '', 100) # !> Too many arguments to call foo(100, '', '', false) # !> Too many arguments to call -foo(100, 100) # !> 12:7:error: Invalid argument. Expected 'String' but found 'Number' +foo(100, 100) # !> 12:9:error: Invalid argument. Expected 'String' but found 'Number' # !> 16:4:error: Invalid argument. Expected 'Number' but found 'String' -# !> 16:11:error: Invalid argument. Expected 'String' but found 'Bool' +# !> 16:13:error: Invalid argument. Expected 'String' but found 'Bool' foo('hello', false) let bar = 100 diff --git a/spec/malformed/invalid_calls.ulg b/spec/malformed/invalid_calls.ulg index ee1683f9..ff9912a8 100644 --- a/spec/malformed/invalid_calls.ulg +++ b/spec/malformed/invalid_calls.ulg @@ -5,5 +5,9 @@ end # !> 6:4:error: unexpected token: expected expression but found ',' foo(,) -# !> 9:15:error: unexpected token: expected expression but found ')' -foo(100, false,) \ No newline at end of file +# it's OK to have a trailing `,` +foo(100, false,) + +# don't go nuts though... +# !> 13:15:error: unexpected token: expected expression but found ',' +foo(969, true, ,) \ No newline at end of file diff --git a/src/sem/binder.rs b/src/sem/binder.rs index 2269e295..74042eb1 100644 --- a/src/sem/binder.rs +++ b/src/sem/binder.rs @@ -256,7 +256,6 @@ impl Binder { .iter() .map(|param| { param - .as_inner() .typ .as_ref() .map(|t| self.bind_type(&t.type_ref)) @@ -501,7 +500,7 @@ impl Binder { .iter() .zip(param_tys) .map(|(arg, param)| { - let bound_arg = self.bind_expression(arg.as_inner(), source); + let bound_arg = self.bind_expression(arg, source); if bound_arg.typ != param { self.diagnostics.push(Diagnostic::new( format!( @@ -616,7 +615,6 @@ impl Binder { .params .iter() .map(|p| { - let p = p.as_inner(); let ty = match p.typ.as_ref() { Some(anno) => self.bind_type(&anno.type_ref), None => { diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index 86ce5b10..1f762d90 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -20,10 +20,8 @@ mod tokeniser; mod checkparse_tests; use super::text::{Ident, SourceText, DUMMY_SPAN}; -use super::tree::{Literal, SyntaxTree, Token, TokenKind}; -use super::{ - BlockBody, DelimItem, Expression, InfixOp, PrefixOp, TypeAnno, TypeRef, TypedId, VarStyle, -}; +use super::tree::{Literal, SepList, SyntaxTree, Token, TokenKind}; +use super::{BlockBody, Expression, InfixOp, PrefixOp, TypeAnno, TypeRef, TypedId, VarStyle}; use crate::diag::Diagnostic; use std::iter::Iterator; use tokeniser::{TokenStream, Tokeniser}; @@ -401,27 +399,28 @@ impl<'a> Parser<'a> { /// Returns a list of zero or more elemnets delimited by the given /// tokens. Used to parse the parameter list for a function and /// the argument list for a call site. - fn delimited(&mut self, p: P, delimiter: TokenKind, close: TokenKind) -> Vec> + fn delimited(&mut self, p: P, delimiter: TokenKind, close: TokenKind) -> SepList where P: Fn(&mut Parser) -> T, { - let mut res = Vec::new(); - if !self.current_is(&close) { - res.push(DelimItem::First(p(self))); - } + let mut builder = SepList::builder(); let mut last_span = self.current.as_ref().map(|t| t.span()); while !self.current_is(&close) { - let delim = self.expect(&delimiter); - res.push(DelimItem::Follow(delim, p(self))); + let with_item = builder.push_item(p(self)); + if !self.current_is(&close) { + builder = with_item.push_sep(self.expect(&delimiter)); + } else { + return with_item.build(); + } - let cur_span = self.current.as_ref().map(|t| t.span()); + let cur_span = self.current.as_ref().map(|t| t.span()); if cur_span.is_some() && last_span == cur_span { break; } else { last_span = cur_span; } } - res + builder.build() } /// Parse Null Denotation diff --git a/src/syntax/parse/checkparse_tests.rs b/src/syntax/parse/checkparse_tests.rs index 42349830..c3c4d420 100644 --- a/src/syntax/parse/checkparse_tests.rs +++ b/src/syntax/parse/checkparse_tests.rs @@ -215,7 +215,7 @@ fn parse_simple_call() { check_parse!("foo()", |s| Expression::call( mk_ident(&s, "foo"), Token::new(TokenKind::OpenBracket), - Vec::new(), + SepList::empty(), Token::new(TokenKind::CloseBracket) )); } @@ -225,32 +225,25 @@ fn parse_complex_call() { check_parse!("hello(1, 1 + 23, -world)", |s| Expression::call( mk_ident(&s, "hello"), Token::new(TokenKind::OpenBracket), - vec![ - DelimItem::First(Expression::constant_num( + SepList::builder() + .push_item(Expression::constant_num( Token::new(TokenKind::Literal(Literal::Number(1))), 1 - )), - DelimItem::Follow( - Token::new(TokenKind::Comma), - Expression::infix( - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), - Token::new(TokenKind::Plus), - InfixOp::Add, - Expression::constant_num( - Token::new(TokenKind::Literal(Literal::Number(23))), - 23 - ), - ) - ), - DelimItem::Follow( - Token::new(TokenKind::Comma), - Expression::prefix( - Token::new(TokenKind::Minus), - PrefixOp::Negate, - mk_ident(&s, "world"), - ) - ), - ], + )) + .push_sep(Token::new(TokenKind::Comma)) + .push_item(Expression::infix( + Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), + Token::new(TokenKind::Plus), + InfixOp::Add, + Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(23))), 23), + )) + .push_sep(Token::new(TokenKind::Comma)) + .push_item(Expression::prefix( + Token::new(TokenKind::Minus), + PrefixOp::Negate, + mk_ident(&s, "world"), + )) + .build(), Token::new(TokenKind::CloseBracket), )); } @@ -287,21 +280,19 @@ fn parse_indexing() { Token::new(TokenKind::CloseSqBracket) ), Token::new(TokenKind::OpenBracket), - vec![ - DelimItem::First(Expression::constant_num( + SepList::builder() + .push_item(Expression::constant_num( Token::new(TokenKind::Literal(Literal::Number(1))), 1 - )), - DelimItem::Follow( - Token::new(TokenKind::Comma), - Expression::index( - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(2))), 2), - Token::new(TokenKind::OpenSqBracket), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(3))), 3), - Token::new(TokenKind::CloseSqBracket) - ) - ), - ], + )) + .push_sep(Token::new(TokenKind::Comma)) + .push_item(Expression::index( + Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(2))), 2), + Token::new(TokenKind::OpenSqBracket), + Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(3))), 3), + Token::new(TokenKind::CloseSqBracket) + )) + .build(), Token::new(TokenKind::CloseBracket), )); } @@ -327,10 +318,12 @@ fn parse_ternary_if() { Expression::call( mk_ident(&s, "hello"), Token::new(TokenKind::OpenBracket), - vec![DelimItem::First(Expression::constant_num( - Token::new(TokenKind::Literal(Literal::Number(1))), - 1, - ))], + SepList::builder() + .push_item(Expression::constant_num( + Token::new(TokenKind::Literal(Literal::Number(1))), + 1, + )) + .build(), Token::new(TokenKind::CloseBracket), ), Token::new(TokenKind::Word(s.intern("else"))), @@ -376,7 +369,7 @@ fn parse_function_def() { Token::new(TokenKind::Word(s.intern("fn"))), s.intern("test"), Token::new(TokenKind::OpenBracket), - Vec::new(), + SepList::empty(), Token::new(TokenKind::CloseBracket), mk_simple_ty_anno(&s, "Num"), blockify(vec![Expression::constant_num( @@ -392,7 +385,7 @@ fn parse_function_def() { Token::new(TokenKind::Word(s.intern("fn"))), s.intern("ünécød3"), Token::new(TokenKind::OpenBracket), - Vec::new(), + SepList::empty(), Token::new(TokenKind::CloseBracket), mk_simple_ty_anno(&s, "Num"), blockify(vec![Expression::if_then_else( @@ -429,10 +422,12 @@ fn parse_function_with_args() { Token::new(TokenKind::Word(s.intern("fn"))), s.intern("neg"), Token::new(TokenKind::OpenBracket), - vec![DelimItem::First(TypedId::new( - Token::new(TokenKind::Word(s.intern("i"))), - mk_simple_ty_anno(&s, "Num") - ))], + SepList::builder() + .push_item(TypedId::new( + Token::new(TokenKind::Word(s.intern("i"))), + mk_simple_ty_anno(&s, "Num") + )) + .build(), Token::new(TokenKind::CloseBracket), mk_simple_ty_anno(&s, "Num"), blockify(vec![Expression::prefix( @@ -447,23 +442,21 @@ fn parse_function_with_args() { Token::new(TokenKind::Word(s.intern("fn"))), s.intern("test"), Token::new(TokenKind::OpenBracket), - vec![ - DelimItem::First(TypedId::new( + SepList::builder() + .push_item(TypedId::new( Token::new(TokenKind::Word(s.intern("i"))), mk_simple_ty_anno(&s, "Num"), - )), - DelimItem::Follow( - Token::new(TokenKind::Comma), - TypedId::new_without_type(Token::new(TokenKind::Word(s.intern("j")))), - ), - DelimItem::Follow( - Token::new(TokenKind::Comma), - TypedId::new( - Token::new(TokenKind::Word(s.intern("k"))), - mk_simple_ty_anno(&s, "String"), - ), - ), - ], + )) + .push_sep(Token::new(TokenKind::Comma)) + .push_item(TypedId::new_without_type(Token::new(TokenKind::Word( + s.intern("j"), + )))) + .push_sep(Token::new(TokenKind::Comma)) + .push_item(TypedId::new( + Token::new(TokenKind::Word(s.intern("k"))), + mk_simple_ty_anno(&s, "String"), + )) + .build(), Token::new(TokenKind::CloseBracket), mk_simple_ty_anno(&s, "String"), blockify(vec![Expression::infix( @@ -523,7 +516,9 @@ fn parse_simple_tuple() { Token::new(TokenKind::Colon), TypeRef::tuple( Token::new(TokenKind::OpenBracket), - vec![DelimItem::First(mk_simple_ty(&s, "Num"))], + SepList::builder() + .push_item(mk_simple_ty(&s, "Num")) + .build(), Token::new(TokenKind::CloseBracket) ) )), @@ -540,17 +535,15 @@ fn parse_simple_tuple() { Token::new(TokenKind::Colon), TypeRef::tuple( Token::new(TokenKind::OpenBracket), - vec![ - DelimItem::First(mk_simple_ty(&s, "Num")), - DelimItem::Follow( - Token::new(TokenKind::Comma), - TypeRef::array( - Token::new(TokenKind::OpenSqBracket), - mk_simple_ty(&s, "String"), - Token::new(TokenKind::CloseSqBracket) - ) - ), - ], + SepList::builder() + .push_item(mk_simple_ty(&s, "Num")) + .push_sep(Token::new(TokenKind::Comma)) + .push_item(TypeRef::array( + Token::new(TokenKind::OpenSqBracket), + mk_simple_ty(&s, "String"), + Token::new(TokenKind::CloseSqBracket) + )) + .build(), Token::new(TokenKind::CloseBracket) ) )), diff --git a/src/syntax/tree.rs b/src/syntax/tree.rs index 573cc4ef..e482a491 100644 --- a/src/syntax/tree.rs +++ b/src/syntax/tree.rs @@ -6,6 +6,7 @@ pub mod expression; pub mod operators; +mod seplist; mod token; mod trivia; pub mod types; @@ -16,11 +17,12 @@ use crate::diag::Diagnostic; use crate::parse::Parser; use crate::text::SourceText; +pub use self::seplist::{SepList, SepListBuilder}; pub use self::token::{Literal, Token, TokenKind}; pub use self::trivia::{TriviaToken, TriviaTokenKind}; +use self::expression::Expression; use super::SyntaxNode; -use expression::Expression; /// Syntax tree /// @@ -46,12 +48,12 @@ impl<'a> SyntaxTree<'a> { /// # Parameters /// /// * `root`: The body of the file. This could be an empty - /// sequence if the file is empty + /// sequence if the file is empty /// * `diagnostics`: Diagnostics raised in the parsing of the - /// source. + /// source. /// * `end`: The closing EOF token. This may have some leading - /// trivia attached and is therefore required for a full-fidelity - /// tree. + /// trivia attached and is therefore required for a + /// full-fidelity tree. pub fn new( source: &'a SourceText, root: Expression, @@ -144,7 +146,7 @@ where Expression::Prefix(p) => vec![&p.inner], Expression::Infix(i) => vec![&i.left, &i.right], Expression::Call(c) => std::iter::once(&*c.callee) - .chain(c.arguments.iter().map(|a| a.as_inner())) + .chain(c.arguments.iter()) .collect(), Expression::Index(i) => vec![&i.index, &i.indexee], Expression::IfThenElse(i) => vec![&i.cond, &i.if_true, &i.if_false], diff --git a/src/syntax/tree/expression.rs b/src/syntax/tree/expression.rs index 720b92bb..91bfdc31 100644 --- a/src/syntax/tree/expression.rs +++ b/src/syntax/tree/expression.rs @@ -8,7 +8,7 @@ use super::super::SyntaxNode; use super::operators::{InfixOp, PrefixOp}; use super::token::{Token, TokenKind}; use super::types::TypeAnno; -use std::borrow::Cow; +use super::SepList; /// An identifier, with an optional type attached #[derive(Debug, PartialEq)] @@ -57,47 +57,6 @@ impl TypedId { } } -/// Delimited Item -/// -/// A single element in a list of token-delimited values. -#[derive(Debug, PartialEq)] -pub enum DelimItem { - /// The first item in a list. Doesn't have a corresponding - /// dlimiter token. - First(T), - /// The follwing items in the list. Carries the token which - /// separated it from the previous element. - Follow(Token, T), -} - -impl DelimItem { - /// Borrow in inner item - pub fn as_inner(&self) -> &T { - match *self { - DelimItem::First(ref t) => t, - DelimItem::Follow(_, ref t) => t, - } - } -} - -impl SyntaxNode for DelimItem { - fn description(&self, source: &SourceText) -> Cow { - match *self { - DelimItem::First(ref t) => format!("Delim::First({})", t.description(source)).into(), - DelimItem::Follow(ref sep, ref t) => { - format!("Delim::Follow({}, {})", sep.kind, t.description(source)).into() - } - } - } - - fn span(&self) -> Span { - match *self { - DelimItem::First(ref t) => t.span(), - DelimItem::Follow(ref sep, ref t) => Span::enclosing(sep.span(), t.span()), - } - } -} - /// Literal / Constant Value #[derive(Debug, PartialEq, Clone)] pub enum Constant { @@ -170,7 +129,7 @@ pub struct CallExpression { /// The opening `(` of this call pub open_paren: Box, /// The list of arguments to the call. This could be empty. - pub arguments: Vec>, + pub arguments: SepList, /// THe closing `)` of this call pub close_paren: Box, } @@ -222,7 +181,7 @@ pub struct FunctionExpression { /// The open `(` before the parameter list pub params_open: Box, /// Function parameters - pub params: Vec>, + pub params: SepList, /// The closing `)` after the parameter list pub params_close: Box, /// Function return type @@ -427,7 +386,7 @@ impl Expression { pub fn call( callee: Expression, open_paren: Token, - args: Vec>, + args: SepList, close_paren: Token, ) -> Self { Expression::Call(CallExpression { @@ -479,7 +438,7 @@ impl Expression { fn_kw: Token, identifier: Ident, params_open: Token, - params: Vec>, + params: SepList, params_close: Token, return_type: TypeAnno, body: BlockBody, diff --git a/src/syntax/tree/seplist.rs b/src/syntax/tree/seplist.rs new file mode 100644 index 00000000..a06fd1b3 --- /dev/null +++ b/src/syntax/tree/seplist.rs @@ -0,0 +1,147 @@ +//! Separeted Syntax List +//! +//! This module holds the definition of the `SepList` +//! type. Separed lists are used in the syntax tree to hold delimited +//! items such as parameter or argument lists. +//! +//! A `SepList` is made up of two lists of items, the main tokens and +//! the separators. + +use super::Token; +use std::marker::PhantomData; + +/// The separated list type holds a list of syntax items and the +/// separators between then. +#[derive(Debug, PartialEq)] +pub struct SepList { + /// The items in the list + items: Vec, + /// The separators between the items + separators: Vec, +} + +impl SepList { + /// Create a new seplist from the given items and separators + /// + /// The separator length should be equal or 1 shorter than the + /// items length. + pub fn new(items: Vec, separators: Vec) -> Self { + SepList { items, separators } + } + + /// Create an empty separated list + /// + /// The new list will contain no items and no separators. This is + /// mainly useful for tests or when fabricating trees by hand. The + /// parser will usually genrate an empty list by calling + /// `SepList::builder().build()` + pub fn empty() -> Self { + SepList::new(Vec::new(), Vec::new()) + } + + /// Create a list builder. This provides a structured way of + /// incrementally building a separated list. + pub fn builder() -> SepListBuilder { + SepListBuilder { + items: Vec::new(), + separators: Vec::new(), + state: Default::default(), + } + } + + /// Borrow the separators as a slice + /// + /// Standard iteration of this collection just accesses the main + /// items. This allows access to the separators too. + pub fn separators(&self) -> &[S] { + &self.separators + } +} + +impl std::ops::Deref for SepList { + type Target = [T]; + + fn deref(&self) -> &[T] { + &self.items + } +} + +/// Fluent typestate API for builing a separated list +pub struct SepListBuilder { + /// The buffered items for this list + items: Vec, + /// The buffered separators for this list + separators: Vec, + /// Phantom state data + state: PhantomData, +} + +/// Initial state for the separated list builder +pub struct Item {} + +/// Separated list builder state when item has been seen +pub struct Separator {} + +impl SepListBuilder { + /// Finish building the list + pub fn build(self) -> SepList { + SepList::new(self.items, self.separators) + } +} + +impl SepListBuilder { + /// Push an item into the separated list and wait for a separator + pub fn push_item(mut self, item: T) -> SepListBuilder { + self.items.push(item); + SepListBuilder { + items: self.items, + separators: self.separators, + state: Default::default(), + } + } +} + +impl SepListBuilder { + /// Push a separator onto the list and wait for another item + pub fn push_sep(mut self, sep: S) -> SepListBuilder { + self.separators.push(sep); + SepListBuilder { + items: self.items, + separators: self.separators, + state: Default::default(), + } + } +} + +#[cfg(test)] +mod test { + + use super::*; + + #[test] + fn create_new_seplist() { + let empty = SepList::<(), u32>::new(Vec::new(), Vec::new()); + let with_items = SepList::new(vec![1, 2, 4, 8], vec![',', '!', '*']); + + assert_eq!(0, empty.len()); + assert_eq!(4, with_items.len()); + } + + #[test] + fn seplist_builder() { + let list = SepList::builder() + .push_item(123) + .push_sep(',') + .push_item(456) + .push_sep('.') + .build(); + + assert_eq!(2, list.len()); + assert_eq!(Some(&123), list.get(0)); + assert_eq!(Some(&456), list.get(1)); + assert_eq!(None, list.get(2)); + assert_eq!(Some(&','), list.separators().get(0)); + assert_eq!(Some(&'.'), list.separators().get(1)); + assert_eq!(None, list.separators().get(2)); + } +} diff --git a/src/syntax/tree/types.rs b/src/syntax/tree/types.rs index 455a4a16..a1f7f848 100644 --- a/src/syntax/tree/types.rs +++ b/src/syntax/tree/types.rs @@ -4,8 +4,7 @@ //! reference types. use super::super::text::{SourceText, Span, DUMMY_SPAN}; -use super::super::SyntaxNode; -use super::expression::DelimItem; +use super::super::{SepList, SyntaxNode}; use super::Token; use std::borrow::Cow; @@ -20,7 +19,7 @@ pub enum TypeRef { /// The Unit Type Unit(Box, Box), /// A non-empty Tuple - Tuple(Box, Vec>, Box), + Tuple(Box, SepList, Box), /// An Array Type Array(Box, Box, Box), /// Missing type. Used to represent type information being missing @@ -63,7 +62,7 @@ impl TypeRef { /// /// A tuple type is an ordered collection of values. Each value /// can be of a different type. - pub fn tuple(open: Token, inner: Vec>, close: Token) -> Self { + pub fn tuple(open: Token, inner: SepList, close: Token) -> Self { if inner.is_empty() { Self::unit(open, close) } else { From 9b55bcb417fd904b16fbeec80eb75bb363a91531 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Sat, 6 Jul 2019 10:47:14 +0100 Subject: [PATCH 08/18] Initial `syntaxfact` Work The idea is this module will contain some functions to simplify buildnig expression trees by hand. --- src/syntax.rs | 1 + src/syntax/parse/checkparse_tests.rs | 20 +++++++++++++ src/syntax/syntaxfact.rs | 42 ++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 src/syntax/syntaxfact.rs diff --git a/src/syntax.rs b/src/syntax.rs index cd7304fc..af35982f 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -8,6 +8,7 @@ mod node; pub mod parse; pub mod text; pub mod tree; +pub mod syntaxfact; pub use self::node::*; pub use self::tree::expression::*; diff --git a/src/syntax/parse/checkparse_tests.rs b/src/syntax/parse/checkparse_tests.rs index c3c4d420..b3d9385b 100644 --- a/src/syntax/parse/checkparse_tests.rs +++ b/src/syntax/parse/checkparse_tests.rs @@ -602,6 +602,20 @@ fn parse_bool_literal() { )); } +#[test] +fn parse_bool_literal_syntaxfact() { + check_parse!("true", |s| Expression::constant_bool( + syntaxfact::word(s.intern("true")), + true + )); + check_parse!("false", |s| Expression::constant_bool( + syntaxfact::word(s.intern("false")), + false + )); + check_parse!("true", syntaxfact::const_bool(true)); + check_parse!("false", syntaxfact::const_bool(false)); +} + #[test] fn parse_string_literal() { check_parse!( @@ -621,3 +635,9 @@ fn parse_string_literal() { ) ); } + +#[test] +fn parse_string_literal_syntaxfact() { + check_parse!("'foobar'", syntaxfact::raw_string("foobar")); + check_parse!("'münchen'", syntaxfact::raw_string("münchen")); +} diff --git a/src/syntax/syntaxfact.rs b/src/syntax/syntaxfact.rs new file mode 100644 index 00000000..e5568e1b --- /dev/null +++ b/src/syntax/syntaxfact.rs @@ -0,0 +1,42 @@ +//! Syntax Tree Factory +//! +//! Ergonomic methods to build new expression trees +//! +//! Used for building expression trees by hand in code rather than the +//! parser. + +use super::*; +use super::text::Ident; + +/// Build a raw string literal +/// +/// Takes a string and builds a string literal expression from it. The +/// token is stubbed and will contian a dummy span. +pub fn raw_string>(value: S) -> Expression { + let value = value.into(); + Expression::constant_string( + Token::new(TokenKind::Literal(Literal::RawString(value.clone()))), + value, + ) +} + +/// Constant Boolean Expression +/// +/// Taks a given bool value and builds a stubbed-out token for it. The +/// token will contain a dummy span. +pub fn const_bool(value: bool) -> Expression { + Expression::constant_bool( + word(if value { + Ident::True + } else { + Ident::False + }), + value, + ) +} + +/// Word token from identifier. Wraps the identifier in a token with +/// missing position information +pub fn word(id: Ident) -> Token { + Token::new(TokenKind::Word(id)) +} From c9b2fc1208c6a81ec27c2aa178593c73308c58a0 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Sat, 6 Jul 2019 18:32:27 +0100 Subject: [PATCH 09/18] Add Builders for Word Tokens and Identifer Expressions Switch the checkparse tests to a `syntaxfact` builder for `TokenKind:Word` tokens that saves a bit of space. Gets rid of the FIXME `mk_ident` and replaces it with `ident_expr`. --- src/syntax/parse/checkparse_tests.rs | 177 ++++++++++++--------------- src/syntax/syntaxfact.rs | 6 + 2 files changed, 82 insertions(+), 101 deletions(-) diff --git a/src/syntax/parse/checkparse_tests.rs b/src/syntax/parse/checkparse_tests.rs index b3d9385b..d73f70dd 100644 --- a/src/syntax/parse/checkparse_tests.rs +++ b/src/syntax/parse/checkparse_tests.rs @@ -22,17 +22,6 @@ macro_rules! check_parse { }; } -/// Creates an Identifier Expression -/// -/// This funciton handles interning the indentifier and creating a -/// mock token for the idnetifier expression to use. -/// -/// FIXME: Replace with a proper builder API for trees -fn mk_ident(source: &SourceText, id: &str) -> Expression { - let id = source.intern(id); - Expression::identifier(Token::new(TokenKind::Word(id)), id) -} - /// Create a Simple TypeRef /// /// Takes the given string, interns it and creates a type reference to @@ -40,7 +29,7 @@ fn mk_ident(source: &SourceText, id: &str) -> Expression { /// /// FIXME: Replace with a proper builder API for trees fn mk_simple_ty(source: &SourceText, simple_name: &str) -> TypeRef { - TypeRef::simple(Token::new(TokenKind::Word(source.intern(simple_name)))) + TypeRef::simple(syntaxfact::word(source.intern(simple_name))) } /// Stub a Type Annotation @@ -59,14 +48,14 @@ fn mk_simple_ty_anno(source: &SourceText, simple_name: &str) -> TypeAnno { fn blockify(contents: Vec) -> BlockBody { BlockBody { contents: Box::new(Expression::Sequence(contents)), - close: Box::new(Token::new(TokenKind::Word(Ident::End))), + close: Box::new(syntaxfact::word(Ident::End)), } } #[test] fn parse_simple_string() { check_parse!("hello + 123", |s| Expression::infix( - mk_ident(&s, "hello"), + syntaxfact::ident_expr(s.intern("hello")), Token::new(TokenKind::Plus), InfixOp::Add, Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(123))), 123), @@ -76,70 +65,70 @@ fn parse_simple_string() { #[test] fn parse_operators() { check_parse!("a = b", |s| Expression::infix( - mk_ident(&s, "a"), + syntaxfact::ident_expr(s.intern("a")), Token::new(TokenKind::Equals), InfixOp::Assign, - mk_ident(&s, "b"), + syntaxfact::ident_expr(s.intern("b")), )); check_parse!("a + b", |s| Expression::infix( - mk_ident(&s, "a"), + syntaxfact::ident_expr(s.intern("a")), Token::new(TokenKind::Plus), InfixOp::Add, - mk_ident(&s, "b"), + syntaxfact::ident_expr(s.intern("b")), )); check_parse!("a - b", |s| Expression::infix( - mk_ident(&s, "a"), + syntaxfact::ident_expr(s.intern("a")), Token::new(TokenKind::Minus), InfixOp::Sub, - mk_ident(&s, "b"), + syntaxfact::ident_expr(s.intern("b")), )); check_parse!("a * b", |s| Expression::infix( - mk_ident(&s, "a"), + syntaxfact::ident_expr(s.intern("a")), Token::new(TokenKind::Star), InfixOp::Mul, - mk_ident(&s, "b"), + syntaxfact::ident_expr(s.intern("b")), )); check_parse!("a / b", |s| Expression::infix( - mk_ident(&s, "a"), + syntaxfact::ident_expr(s.intern("a")), Token::new(TokenKind::Slash), InfixOp::Div, - mk_ident(&s, "b"), + syntaxfact::ident_expr(s.intern("b")), )); check_parse!("a == b", |s| Expression::infix( - mk_ident(&s, "a"), + syntaxfact::ident_expr(s.intern("a")), Token::new(TokenKind::DoubleEquals), InfixOp::Eq, - mk_ident(&s, "b"), + syntaxfact::ident_expr(s.intern("b")), )); check_parse!("a != b", |s| Expression::infix( - mk_ident(&s, "a"), + syntaxfact::ident_expr(s.intern("a")), Token::new(TokenKind::BangEquals), InfixOp::NotEq, - mk_ident(&s, "b"), + syntaxfact::ident_expr(s.intern("b")), )); check_parse!("a < b", |s| Expression::infix( - mk_ident(&s, "a"), + syntaxfact::ident_expr(s.intern("a")), Token::new(TokenKind::LessThan), InfixOp::Lt, - mk_ident(&s, "b"), + syntaxfact::ident_expr(s.intern("b")), )); check_parse!("a <= b", |s| Expression::infix( - mk_ident(&s, "a"), + syntaxfact::ident_expr(s.intern("a")), Token::new(TokenKind::LessThanEqual), InfixOp::LtEq, - mk_ident(&s, "b"), + syntaxfact::ident_expr(s.intern("b")), )); check_parse!("a > b", |s| Expression::infix( - mk_ident(&s, "a"), + syntaxfact::ident_expr(s.intern("a")), Token::new(TokenKind::MoreThan), InfixOp::Gt, - mk_ident(&s, "b"), + syntaxfact::ident_expr(s.intern("b")), )); check_parse!("a >= b", |s| Expression::infix( - mk_ident(&s, "a"), + syntaxfact::ident_expr(s.intern("a")), Token::new(TokenKind::MoreThanEqual), InfixOp::GtEq, - mk_ident(&s, "b"), + syntaxfact::ident_expr(s.intern("b")), )); } @@ -192,20 +181,20 @@ fn parse_prefix_expressions() { check_parse!("!a", |s| Expression::prefix( Token::new(TokenKind::Bang), PrefixOp::Not, - mk_ident(&s, "a") + syntaxfact::ident_expr(s.intern("a")) )); check_parse!("!a != !b", |s| Expression::infix( Expression::prefix( Token::new(TokenKind::Bang), PrefixOp::Not, - mk_ident(&s, "a") + syntaxfact::ident_expr(s.intern("a")) ), Token::new(TokenKind::BangEquals), InfixOp::NotEq, Expression::prefix( Token::new(TokenKind::Bang), PrefixOp::Not, - mk_ident(&s, "b") + syntaxfact::ident_expr(s.intern("b")) ), )); } @@ -213,7 +202,7 @@ fn parse_prefix_expressions() { #[test] fn parse_simple_call() { check_parse!("foo()", |s| Expression::call( - mk_ident(&s, "foo"), + syntaxfact::ident_expr(s.intern("foo")), Token::new(TokenKind::OpenBracket), SepList::empty(), Token::new(TokenKind::CloseBracket) @@ -223,7 +212,7 @@ fn parse_simple_call() { #[test] fn parse_complex_call() { check_parse!("hello(1, 1 + 23, -world)", |s| Expression::call( - mk_ident(&s, "hello"), + syntaxfact::ident_expr(s.intern("hello")), Token::new(TokenKind::OpenBracket), SepList::builder() .push_item(Expression::constant_num( @@ -241,7 +230,7 @@ fn parse_complex_call() { .push_item(Expression::prefix( Token::new(TokenKind::Minus), PrefixOp::Negate, - mk_ident(&s, "world"), + syntaxfact::ident_expr(s.intern("world")), )) .build(), Token::new(TokenKind::CloseBracket), @@ -274,9 +263,9 @@ fn parse_groups_with_parens() { fn parse_indexing() { check_parse!("hello[world](1, 2[3])", |s| Expression::call( Expression::index( - mk_ident(&s, "hello"), + syntaxfact::ident_expr(s.intern("hello")), Token::new(TokenKind::OpenSqBracket), - mk_ident(&s, "world"), + syntaxfact::ident_expr(s.intern("world")), Token::new(TokenKind::CloseSqBracket) ), Token::new(TokenKind::OpenBracket), @@ -300,23 +289,23 @@ fn parse_indexing() { #[test] fn parse_ternary_if() { check_parse!("1 if 2 else 3", |s| Expression::if_then_else( - Token::new(TokenKind::Word(s.intern("if"))), + syntaxfact::word(s.intern("if")), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(2))), 2), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), - Token::new(TokenKind::Word(s.intern("else"))), + syntaxfact::word(s.intern("else")), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(3))), 3), )); check_parse!("hello(1) if foo[23] else world[1 if foo else 2]", |s| { Expression::if_then_else( - Token::new(TokenKind::Word(s.intern("if"))), + syntaxfact::word(s.intern("if")), Expression::index( - mk_ident(&s, "foo"), + syntaxfact::ident_expr(s.intern("foo")), Token::new(TokenKind::OpenSqBracket), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(23))), 23), Token::new(TokenKind::CloseSqBracket), ), Expression::call( - mk_ident(&s, "hello"), + syntaxfact::ident_expr(s.intern("hello")), Token::new(TokenKind::OpenBracket), SepList::builder() .push_item(Expression::constant_num( @@ -326,15 +315,15 @@ fn parse_ternary_if() { .build(), Token::new(TokenKind::CloseBracket), ), - Token::new(TokenKind::Word(s.intern("else"))), + syntaxfact::word(s.intern("else")), Expression::index( - mk_ident(&s, "world"), + syntaxfact::ident_expr(s.intern("world")), Token::new(TokenKind::OpenSqBracket), Expression::if_then_else( - Token::new(TokenKind::Word(s.intern("if"))), - mk_ident(&s, "foo"), + syntaxfact::word(s.intern("if")), + syntaxfact::ident_expr(s.intern("foo")), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), - Token::new(TokenKind::Word(s.intern("else"))), + syntaxfact::word(s.intern("else")), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(2))), 2), ), Token::new(TokenKind::CloseSqBracket), @@ -344,10 +333,10 @@ fn parse_ternary_if() { check_parse!( "0 unless 1 else 2", Expression::if_then_else( - Token::new(TokenKind::Word(s.intern("unless"))), + syntaxfact::word(s.intern("unless")), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(2))), 2), - Token::new(TokenKind::Word(s.intern("else"))), + syntaxfact::word(s.intern("else")), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(0))), 0), ) ); @@ -356,17 +345,17 @@ fn parse_ternary_if() { #[test] fn parse_unicode_identifiers() { check_parse!(" übåℝ * ßeåk ", |s| Expression::infix( - mk_ident(&s, "übåℝ"), + syntaxfact::ident_expr(s.intern("übåℝ")), Token::new(TokenKind::Star), InfixOp::Mul, - mk_ident(&s, "ßeåk"), + syntaxfact::ident_expr(s.intern("ßeåk")), )); } #[test] fn parse_function_def() { check_parse!("fn test() :Num 100 end", |s| Expression::function( - Token::new(TokenKind::Word(s.intern("fn"))), + syntaxfact::word(s.intern("fn")), s.intern("test"), Token::new(TokenKind::OpenBracket), SepList::empty(), @@ -382,17 +371,17 @@ fn parse_function_def() { 0 if 74 else 888 end", |s| Expression::function( - Token::new(TokenKind::Word(s.intern("fn"))), + syntaxfact::word(s.intern("fn")), s.intern("ünécød3"), Token::new(TokenKind::OpenBracket), SepList::empty(), Token::new(TokenKind::CloseBracket), mk_simple_ty_anno(&s, "Num"), blockify(vec![Expression::if_then_else( - Token::new(TokenKind::Word(s.intern("if"))), + syntaxfact::word(s.intern("if")), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(74))), 74), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(0))), 0), - Token::new(TokenKind::Word(s.intern("else"))), + syntaxfact::word(s.intern("else")), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(888))), 888), )]) ) @@ -402,12 +391,12 @@ fn parse_function_def() { #[test] fn parse_while_loop() { check_parse!("while 1 end", |s| Expression::loop_while( - Token::new(TokenKind::Word(s.intern("while"))), + syntaxfact::word(s.intern("while")), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), blockify(Vec::new()) )); check_parse!("while 0 44 234 end", |s| Expression::loop_while( - Token::new(TokenKind::Word(s.intern("while"))), + syntaxfact::word(s.intern("while")), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(0))), 0), blockify(vec![ Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(44))), 44), @@ -419,12 +408,12 @@ fn parse_while_loop() { #[test] fn parse_function_with_args() { check_parse!("fn neg(i: Num): Num - i end", |s| Expression::function( - Token::new(TokenKind::Word(s.intern("fn"))), + syntaxfact::word(s.intern("fn")), s.intern("neg"), Token::new(TokenKind::OpenBracket), SepList::builder() .push_item(TypedId::new( - Token::new(TokenKind::Word(s.intern("i"))), + syntaxfact::word(s.intern("i")), mk_simple_ty_anno(&s, "Num") )) .build(), @@ -433,27 +422,25 @@ fn parse_function_with_args() { blockify(vec![Expression::prefix( Token::new(TokenKind::Minus), PrefixOp::Negate, - mk_ident(&s, "i"), + syntaxfact::ident_expr(s.intern("i")), )]) )); check_parse!("fn test(i: Num, j, k: String): String i + j + k end", |s| { Expression::function( - Token::new(TokenKind::Word(s.intern("fn"))), + syntaxfact::word(s.intern("fn")), s.intern("test"), Token::new(TokenKind::OpenBracket), SepList::builder() .push_item(TypedId::new( - Token::new(TokenKind::Word(s.intern("i"))), + syntaxfact::word(s.intern("i")), mk_simple_ty_anno(&s, "Num"), )) .push_sep(Token::new(TokenKind::Comma)) - .push_item(TypedId::new_without_type(Token::new(TokenKind::Word( - s.intern("j"), - )))) + .push_item(TypedId::new_without_type(syntaxfact::word(s.intern("j")))) .push_sep(Token::new(TokenKind::Comma)) .push_item(TypedId::new( - Token::new(TokenKind::Word(s.intern("k"))), + syntaxfact::word(s.intern("k")), mk_simple_ty_anno(&s, "String"), )) .build(), @@ -461,14 +448,14 @@ fn parse_function_with_args() { mk_simple_ty_anno(&s, "String"), blockify(vec![Expression::infix( Expression::infix( - mk_ident(&s, "i"), + syntaxfact::ident_expr(s.intern("i")), Token::new(TokenKind::Plus), InfixOp::Add, - mk_ident(&s, "j"), + syntaxfact::ident_expr(s.intern("j")), ), Token::new(TokenKind::Plus), InfixOp::Add, - mk_ident(&s, "k"), + syntaxfact::ident_expr(s.intern("k")), )]), ) }); @@ -477,9 +464,9 @@ fn parse_function_with_args() { #[test] fn parse_simple_array_type() { check_parse!("let f: [Num] = 100", |s| Expression::declaration( - Token::new(TokenKind::Word(s.intern("let"))), + syntaxfact::word(s.intern("let")), TypedId::from_parts( - Token::new(TokenKind::Word(s.intern("f"))), + syntaxfact::word(s.intern("f")), Some(TypeAnno::new( Token::new(TokenKind::Colon), TypeRef::array( @@ -498,8 +485,8 @@ fn parse_simple_array_type() { #[test] fn parse_simple_let() { check_parse!("let foo = 100", |s| Expression::declaration( - Token::new(TokenKind::Word(s.intern("let"))), - TypedId::from_parts(Token::new(TokenKind::Word(s.intern("foo"))), None), + syntaxfact::word(s.intern("let")), + TypedId::from_parts(syntaxfact::word(s.intern("foo")), None), VarStyle::Immutable, Token::new(TokenKind::Equals), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(100))), 100), @@ -509,9 +496,9 @@ fn parse_simple_let() { #[test] fn parse_simple_tuple() { check_parse!("let f: (Num) = 100", |s| Expression::declaration( - Token::new(TokenKind::Word(s.intern("let"))), + syntaxfact::word(s.intern("let")), TypedId::from_parts( - Token::new(TokenKind::Word(s.intern("f"))), + syntaxfact::word(s.intern("f")), Some(TypeAnno::new( Token::new(TokenKind::Colon), TypeRef::tuple( @@ -528,9 +515,9 @@ fn parse_simple_tuple() { Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(100))), 100), )); check_parse!("let f: (Num, [String]) = 100", |s| Expression::declaration( - Token::new(TokenKind::Word(s.intern("let"))), + syntaxfact::word(s.intern("let")), TypedId::from_parts( - Token::new(TokenKind::Word(s.intern("f"))), + syntaxfact::word(s.intern("f")), Some(TypeAnno::new( Token::new(TokenKind::Colon), TypeRef::tuple( @@ -557,16 +544,16 @@ fn parse_simple_tuple() { #[test] fn parse_variable_decl() { check_parse!("var foo = 93", |s| Expression::declaration( - Token::new(TokenKind::Word(s.intern("var"))), - TypedId::from_parts(Token::new(TokenKind::Word(s.intern("foo"))), None), + syntaxfact::word(s.intern("var")), + TypedId::from_parts(syntaxfact::word(s.intern("foo")), None), VarStyle::Mutable, Token::new(TokenKind::Equals), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(93))), 93), )); check_parse!("var foo_bar: Number = -99999", |s| Expression::declaration( - Token::new(TokenKind::Word(s.intern("var"))), + syntaxfact::word(s.intern("var")), TypedId::from_parts( - Token::new(TokenKind::Word(s.intern("foo_bar"))), + syntaxfact::word(s.intern("foo_bar")), Some(mk_simple_ty_anno(&s, "Number")) ), VarStyle::Mutable, @@ -585,25 +572,13 @@ fn parse_variable_decl() { #[test] fn parse_print_operator() { check_parse!("print 1334", |s| Expression::print( - Token::new(TokenKind::Word(s.intern("print"))), + syntaxfact::word(s.intern("print")), Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1334))), 1334) )); } #[test] fn parse_bool_literal() { - check_parse!("true", |s| Expression::constant_bool( - Token::new(TokenKind::Word(s.intern("true"))), - true - )); - check_parse!("false", |s| Expression::constant_bool( - Token::new(TokenKind::Word(s.intern("false"))), - false - )); -} - -#[test] -fn parse_bool_literal_syntaxfact() { check_parse!("true", |s| Expression::constant_bool( syntaxfact::word(s.intern("true")), true diff --git a/src/syntax/syntaxfact.rs b/src/syntax/syntaxfact.rs index e5568e1b..ebf8cc20 100644 --- a/src/syntax/syntaxfact.rs +++ b/src/syntax/syntaxfact.rs @@ -35,6 +35,12 @@ pub fn const_bool(value: bool) -> Expression { ) } +/// Identifier Expression. Reads a value from a variable or provides a +/// reference to a function or other named item. +pub fn ident_expr(value: Ident) -> Expression { + Expression::identifier(word(value), value) +} + /// Word token from identifier. Wraps the identifier in a token with /// missing position information pub fn word(id: Ident) -> Token { From a426665bcac3142e6a8c22d82ad8cebfa89ed1fa Mon Sep 17 00:00:00 2001 From: Will Speak Date: Sat, 6 Jul 2019 18:54:17 +0100 Subject: [PATCH 10/18] Add Syntaxfact Support for Number Constnats Introduce some parsing tests for numbers of varying sizes. Move to using a new `const_num` overload in the `checkparse` tests. This slims down the size of quite a lot of the test case's expression tree builders quite nicely. --- src/syntax.rs | 2 +- src/syntax/parse/checkparse_tests.rs | 111 +++++++++++++-------------- src/syntax/syntaxfact.rs | 22 +++--- 3 files changed, 67 insertions(+), 68 deletions(-) diff --git a/src/syntax.rs b/src/syntax.rs index af35982f..03d68819 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -6,9 +6,9 @@ mod node; pub mod parse; +pub mod syntaxfact; pub mod text; pub mod tree; -pub mod syntaxfact; pub use self::node::*; pub use self::tree::expression::*; diff --git a/src/syntax/parse/checkparse_tests.rs b/src/syntax/parse/checkparse_tests.rs index d73f70dd..f010f58b 100644 --- a/src/syntax/parse/checkparse_tests.rs +++ b/src/syntax/parse/checkparse_tests.rs @@ -52,13 +52,26 @@ fn blockify(contents: Vec) -> BlockBody { } } +#[test] +fn parse_number_literals() { + check_parse!("0", syntaxfact::const_num(0)); + check_parse!("1", syntaxfact::const_num(1)); + check_parse!("12356", syntaxfact::const_num(12356)); + check_parse!("65536", syntaxfact::const_num(65536)); + check_parse!("4294967296", syntaxfact::const_num(4294967296)); + check_parse!( + "9223372036854775807", + syntaxfact::const_num(9223372036854775807) + ); +} + #[test] fn parse_simple_string() { check_parse!("hello + 123", |s| Expression::infix( syntaxfact::ident_expr(s.intern("hello")), Token::new(TokenKind::Plus), InfixOp::Add, - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(123))), 123), + syntaxfact::const_num(123), )); } @@ -137,14 +150,14 @@ fn parse_with_precedence() { check_parse!( "1 + 2 * 3", Expression::infix( - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), + syntaxfact::const_num(1), Token::new(TokenKind::Plus), InfixOp::Add, Expression::infix( - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(2))), 2), + syntaxfact::const_num(2), Token::new(TokenKind::Star), InfixOp::Mul, - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(3))), 3), + syntaxfact::const_num(3), ), ) ); @@ -159,14 +172,14 @@ fn parse_prefix_expressions() { Expression::prefix( Token::new(TokenKind::Plus), PrefixOp::Identity, - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1) + syntaxfact::const_num(1) ), Token::new(TokenKind::Star), InfixOp::Mul, Expression::prefix( Token::new(TokenKind::Minus), PrefixOp::Negate, - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(2))), 2) + syntaxfact::const_num(2) ), ), Token::new(TokenKind::Plus), @@ -174,7 +187,7 @@ fn parse_prefix_expressions() { Expression::prefix( Token::new(TokenKind::Plus), PrefixOp::Identity, - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(3))), 3) + syntaxfact::const_num(3) ) ) ); @@ -215,16 +228,13 @@ fn parse_complex_call() { syntaxfact::ident_expr(s.intern("hello")), Token::new(TokenKind::OpenBracket), SepList::builder() - .push_item(Expression::constant_num( - Token::new(TokenKind::Literal(Literal::Number(1))), - 1 - )) + .push_item(syntaxfact::const_num(1)) .push_sep(Token::new(TokenKind::Comma)) .push_item(Expression::infix( - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), + syntaxfact::const_num(1), Token::new(TokenKind::Plus), InfixOp::Add, - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(23))), 23), + syntaxfact::const_num(23), )) .push_sep(Token::new(TokenKind::Comma)) .push_item(Expression::prefix( @@ -245,16 +255,16 @@ fn parse_groups_with_parens() { Expression::grouping( Token::new(TokenKind::OpenBracket), Expression::infix( - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), + syntaxfact::const_num(1), Token::new(TokenKind::Plus), InfixOp::Add, - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(2))), 2), + syntaxfact::const_num(2), ), Token::new(TokenKind::CloseBracket), ), Token::new(TokenKind::Star), InfixOp::Mul, - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(3))), 3) + syntaxfact::const_num(3) ) ); } @@ -270,15 +280,12 @@ fn parse_indexing() { ), Token::new(TokenKind::OpenBracket), SepList::builder() - .push_item(Expression::constant_num( - Token::new(TokenKind::Literal(Literal::Number(1))), - 1 - )) + .push_item(syntaxfact::const_num(1)) .push_sep(Token::new(TokenKind::Comma)) .push_item(Expression::index( - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(2))), 2), + syntaxfact::const_num(2), Token::new(TokenKind::OpenSqBracket), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(3))), 3), + syntaxfact::const_num(3), Token::new(TokenKind::CloseSqBracket) )) .build(), @@ -290,10 +297,10 @@ fn parse_indexing() { fn parse_ternary_if() { check_parse!("1 if 2 else 3", |s| Expression::if_then_else( syntaxfact::word(s.intern("if")), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(2))), 2), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), + syntaxfact::const_num(2), + syntaxfact::const_num(1), syntaxfact::word(s.intern("else")), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(3))), 3), + syntaxfact::const_num(3), )); check_parse!("hello(1) if foo[23] else world[1 if foo else 2]", |s| { Expression::if_then_else( @@ -301,17 +308,14 @@ fn parse_ternary_if() { Expression::index( syntaxfact::ident_expr(s.intern("foo")), Token::new(TokenKind::OpenSqBracket), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(23))), 23), + syntaxfact::const_num(23), Token::new(TokenKind::CloseSqBracket), ), Expression::call( syntaxfact::ident_expr(s.intern("hello")), Token::new(TokenKind::OpenBracket), SepList::builder() - .push_item(Expression::constant_num( - Token::new(TokenKind::Literal(Literal::Number(1))), - 1, - )) + .push_item(syntaxfact::const_num(1)) .build(), Token::new(TokenKind::CloseBracket), ), @@ -322,9 +326,9 @@ fn parse_ternary_if() { Expression::if_then_else( syntaxfact::word(s.intern("if")), syntaxfact::ident_expr(s.intern("foo")), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), + syntaxfact::const_num(1), syntaxfact::word(s.intern("else")), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(2))), 2), + syntaxfact::const_num(2), ), Token::new(TokenKind::CloseSqBracket), ), @@ -334,10 +338,10 @@ fn parse_ternary_if() { "0 unless 1 else 2", Expression::if_then_else( syntaxfact::word(s.intern("unless")), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(2))), 2), + syntaxfact::const_num(1), + syntaxfact::const_num(2), syntaxfact::word(s.intern("else")), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(0))), 0), + syntaxfact::const_num(0), ) ); } @@ -361,10 +365,7 @@ fn parse_function_def() { SepList::empty(), Token::new(TokenKind::CloseBracket), mk_simple_ty_anno(&s, "Num"), - blockify(vec![Expression::constant_num( - Token::new(TokenKind::Literal(Literal::Number(100))), - 100 - )]) + blockify(vec![syntaxfact::const_num(100)]) )); check_parse!( "fn ünécød3() :Num @@ -379,10 +380,10 @@ fn parse_function_def() { mk_simple_ty_anno(&s, "Num"), blockify(vec![Expression::if_then_else( syntaxfact::word(s.intern("if")), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(74))), 74), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(0))), 0), + syntaxfact::const_num(74), + syntaxfact::const_num(0), syntaxfact::word(s.intern("else")), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(888))), 888), + syntaxfact::const_num(888), )]) ) ); @@ -392,16 +393,13 @@ fn parse_function_def() { fn parse_while_loop() { check_parse!("while 1 end", |s| Expression::loop_while( syntaxfact::word(s.intern("while")), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1))), 1), + syntaxfact::const_num(1), blockify(Vec::new()) )); check_parse!("while 0 44 234 end", |s| Expression::loop_while( syntaxfact::word(s.intern("while")), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(0))), 0), - blockify(vec![ - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(44))), 44), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(234))), 234) - ]), + syntaxfact::const_num(0), + blockify(vec![syntaxfact::const_num(44), syntaxfact::const_num(234)]), )); } @@ -478,7 +476,7 @@ fn parse_simple_array_type() { ), VarStyle::Immutable, Token::new(TokenKind::Equals), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(100))), 100), + syntaxfact::const_num(100), )); } @@ -489,7 +487,7 @@ fn parse_simple_let() { TypedId::from_parts(syntaxfact::word(s.intern("foo")), None), VarStyle::Immutable, Token::new(TokenKind::Equals), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(100))), 100), + syntaxfact::const_num(100), )); } @@ -512,7 +510,7 @@ fn parse_simple_tuple() { ), VarStyle::Immutable, Token::new(TokenKind::Equals), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(100))), 100), + syntaxfact::const_num(100), )); check_parse!("let f: (Num, [String]) = 100", |s| Expression::declaration( syntaxfact::word(s.intern("let")), @@ -537,7 +535,7 @@ fn parse_simple_tuple() { ), VarStyle::Immutable, Token::new(TokenKind::Equals), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(100))), 100), + syntaxfact::const_num(100), )); } @@ -548,7 +546,7 @@ fn parse_variable_decl() { TypedId::from_parts(syntaxfact::word(s.intern("foo")), None), VarStyle::Mutable, Token::new(TokenKind::Equals), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(93))), 93), + syntaxfact::const_num(93), )); check_parse!("var foo_bar: Number = -99999", |s| Expression::declaration( syntaxfact::word(s.intern("var")), @@ -561,10 +559,7 @@ fn parse_variable_decl() { Expression::prefix( Token::new(TokenKind::Minus), PrefixOp::Negate, - Expression::constant_num( - Token::new(TokenKind::Literal(Literal::Number(99999))), - 99999 - ) + syntaxfact::const_num(99999) ), )); } @@ -573,7 +568,7 @@ fn parse_variable_decl() { fn parse_print_operator() { check_parse!("print 1334", |s| Expression::print( syntaxfact::word(s.intern("print")), - Expression::constant_num(Token::new(TokenKind::Literal(Literal::Number(1334))), 1334) + syntaxfact::const_num(1334) )); } diff --git a/src/syntax/syntaxfact.rs b/src/syntax/syntaxfact.rs index ebf8cc20..c658df15 100644 --- a/src/syntax/syntaxfact.rs +++ b/src/syntax/syntaxfact.rs @@ -5,8 +5,8 @@ //! Used for building expression trees by hand in code rather than the //! parser. -use super::*; use super::text::Ident; +use super::*; /// Build a raw string literal /// @@ -22,15 +22,19 @@ pub fn raw_string>(value: S) -> Expression { /// Constant Boolean Expression /// -/// Taks a given bool value and builds a stubbed-out token for it. The -/// token will contain a dummy span. +/// Takes a given bool value and builds a stubbed-out token for +/// it. The token will contain a dummy span. pub fn const_bool(value: bool) -> Expression { - Expression::constant_bool( - word(if value { - Ident::True - } else { - Ident::False - }), + Expression::constant_bool(word(if value { Ident::True } else { Ident::False }), value) +} + +/// Constant numeric value +/// +/// Takes a given numeric value and builds a stubbed-out token for +/// it. The token will contain a dummy span. +pub fn const_num(value: i64) -> Expression { + Expression::constant_num( + Token::new(TokenKind::Literal(Literal::Number(value))), value, ) } From 5dbdc7f7b3485f79fefec2d84b9a52cdf88e984b Mon Sep 17 00:00:00 2001 From: Will Speak Date: Sat, 6 Jul 2019 19:21:35 +0100 Subject: [PATCH 11/18] Rename `syntaxfact` -> `syntax_builder` This seems like a nicer and more logical name for the module. --- src/syntax.rs | 2 +- src/syntax/parse/checkparse_tests.rs | 263 +++++++++--------- .../{syntaxfact.rs => syntax_builder.rs} | 0 3 files changed, 135 insertions(+), 130 deletions(-) rename src/syntax/{syntaxfact.rs => syntax_builder.rs} (100%) diff --git a/src/syntax.rs b/src/syntax.rs index 03d68819..688fc54c 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -6,7 +6,7 @@ mod node; pub mod parse; -pub mod syntaxfact; +pub mod syntax_builder; pub mod text; pub mod tree; diff --git a/src/syntax/parse/checkparse_tests.rs b/src/syntax/parse/checkparse_tests.rs index f010f58b..37e21050 100644 --- a/src/syntax/parse/checkparse_tests.rs +++ b/src/syntax/parse/checkparse_tests.rs @@ -29,7 +29,7 @@ macro_rules! check_parse { /// /// FIXME: Replace with a proper builder API for trees fn mk_simple_ty(source: &SourceText, simple_name: &str) -> TypeRef { - TypeRef::simple(syntaxfact::word(source.intern(simple_name))) + TypeRef::simple(syntax_builder::word(source.intern(simple_name))) } /// Stub a Type Annotation @@ -48,100 +48,100 @@ fn mk_simple_ty_anno(source: &SourceText, simple_name: &str) -> TypeAnno { fn blockify(contents: Vec) -> BlockBody { BlockBody { contents: Box::new(Expression::Sequence(contents)), - close: Box::new(syntaxfact::word(Ident::End)), + close: Box::new(syntax_builder::word(Ident::End)), } } #[test] fn parse_number_literals() { - check_parse!("0", syntaxfact::const_num(0)); - check_parse!("1", syntaxfact::const_num(1)); - check_parse!("12356", syntaxfact::const_num(12356)); - check_parse!("65536", syntaxfact::const_num(65536)); - check_parse!("4294967296", syntaxfact::const_num(4294967296)); + check_parse!("0", syntax_builder::const_num(0)); + check_parse!("1", syntax_builder::const_num(1)); + check_parse!("12356", syntax_builder::const_num(12356)); + check_parse!("65536", syntax_builder::const_num(65536)); + check_parse!("4294967296", syntax_builder::const_num(4294967296)); check_parse!( "9223372036854775807", - syntaxfact::const_num(9223372036854775807) + syntax_builder::const_num(9223372036854775807) ); } #[test] fn parse_simple_string() { check_parse!("hello + 123", |s| Expression::infix( - syntaxfact::ident_expr(s.intern("hello")), + syntax_builder::ident_expr(s.intern("hello")), Token::new(TokenKind::Plus), InfixOp::Add, - syntaxfact::const_num(123), + syntax_builder::const_num(123), )); } #[test] fn parse_operators() { check_parse!("a = b", |s| Expression::infix( - syntaxfact::ident_expr(s.intern("a")), + syntax_builder::ident_expr(s.intern("a")), Token::new(TokenKind::Equals), InfixOp::Assign, - syntaxfact::ident_expr(s.intern("b")), + syntax_builder::ident_expr(s.intern("b")), )); check_parse!("a + b", |s| Expression::infix( - syntaxfact::ident_expr(s.intern("a")), + syntax_builder::ident_expr(s.intern("a")), Token::new(TokenKind::Plus), InfixOp::Add, - syntaxfact::ident_expr(s.intern("b")), + syntax_builder::ident_expr(s.intern("b")), )); check_parse!("a - b", |s| Expression::infix( - syntaxfact::ident_expr(s.intern("a")), + syntax_builder::ident_expr(s.intern("a")), Token::new(TokenKind::Minus), InfixOp::Sub, - syntaxfact::ident_expr(s.intern("b")), + syntax_builder::ident_expr(s.intern("b")), )); check_parse!("a * b", |s| Expression::infix( - syntaxfact::ident_expr(s.intern("a")), + syntax_builder::ident_expr(s.intern("a")), Token::new(TokenKind::Star), InfixOp::Mul, - syntaxfact::ident_expr(s.intern("b")), + syntax_builder::ident_expr(s.intern("b")), )); check_parse!("a / b", |s| Expression::infix( - syntaxfact::ident_expr(s.intern("a")), + syntax_builder::ident_expr(s.intern("a")), Token::new(TokenKind::Slash), InfixOp::Div, - syntaxfact::ident_expr(s.intern("b")), + syntax_builder::ident_expr(s.intern("b")), )); check_parse!("a == b", |s| Expression::infix( - syntaxfact::ident_expr(s.intern("a")), + syntax_builder::ident_expr(s.intern("a")), Token::new(TokenKind::DoubleEquals), InfixOp::Eq, - syntaxfact::ident_expr(s.intern("b")), + syntax_builder::ident_expr(s.intern("b")), )); check_parse!("a != b", |s| Expression::infix( - syntaxfact::ident_expr(s.intern("a")), + syntax_builder::ident_expr(s.intern("a")), Token::new(TokenKind::BangEquals), InfixOp::NotEq, - syntaxfact::ident_expr(s.intern("b")), + syntax_builder::ident_expr(s.intern("b")), )); check_parse!("a < b", |s| Expression::infix( - syntaxfact::ident_expr(s.intern("a")), + syntax_builder::ident_expr(s.intern("a")), Token::new(TokenKind::LessThan), InfixOp::Lt, - syntaxfact::ident_expr(s.intern("b")), + syntax_builder::ident_expr(s.intern("b")), )); check_parse!("a <= b", |s| Expression::infix( - syntaxfact::ident_expr(s.intern("a")), + syntax_builder::ident_expr(s.intern("a")), Token::new(TokenKind::LessThanEqual), InfixOp::LtEq, - syntaxfact::ident_expr(s.intern("b")), + syntax_builder::ident_expr(s.intern("b")), )); check_parse!("a > b", |s| Expression::infix( - syntaxfact::ident_expr(s.intern("a")), + syntax_builder::ident_expr(s.intern("a")), Token::new(TokenKind::MoreThan), InfixOp::Gt, - syntaxfact::ident_expr(s.intern("b")), + syntax_builder::ident_expr(s.intern("b")), )); check_parse!("a >= b", |s| Expression::infix( - syntaxfact::ident_expr(s.intern("a")), + syntax_builder::ident_expr(s.intern("a")), Token::new(TokenKind::MoreThanEqual), InfixOp::GtEq, - syntaxfact::ident_expr(s.intern("b")), + syntax_builder::ident_expr(s.intern("b")), )); } @@ -150,14 +150,14 @@ fn parse_with_precedence() { check_parse!( "1 + 2 * 3", Expression::infix( - syntaxfact::const_num(1), + syntax_builder::const_num(1), Token::new(TokenKind::Plus), InfixOp::Add, Expression::infix( - syntaxfact::const_num(2), + syntax_builder::const_num(2), Token::new(TokenKind::Star), InfixOp::Mul, - syntaxfact::const_num(3), + syntax_builder::const_num(3), ), ) ); @@ -172,14 +172,14 @@ fn parse_prefix_expressions() { Expression::prefix( Token::new(TokenKind::Plus), PrefixOp::Identity, - syntaxfact::const_num(1) + syntax_builder::const_num(1) ), Token::new(TokenKind::Star), InfixOp::Mul, Expression::prefix( Token::new(TokenKind::Minus), PrefixOp::Negate, - syntaxfact::const_num(2) + syntax_builder::const_num(2) ), ), Token::new(TokenKind::Plus), @@ -187,27 +187,27 @@ fn parse_prefix_expressions() { Expression::prefix( Token::new(TokenKind::Plus), PrefixOp::Identity, - syntaxfact::const_num(3) + syntax_builder::const_num(3) ) ) ); check_parse!("!a", |s| Expression::prefix( Token::new(TokenKind::Bang), PrefixOp::Not, - syntaxfact::ident_expr(s.intern("a")) + syntax_builder::ident_expr(s.intern("a")) )); check_parse!("!a != !b", |s| Expression::infix( Expression::prefix( Token::new(TokenKind::Bang), PrefixOp::Not, - syntaxfact::ident_expr(s.intern("a")) + syntax_builder::ident_expr(s.intern("a")) ), Token::new(TokenKind::BangEquals), InfixOp::NotEq, Expression::prefix( Token::new(TokenKind::Bang), PrefixOp::Not, - syntaxfact::ident_expr(s.intern("b")) + syntax_builder::ident_expr(s.intern("b")) ), )); } @@ -215,7 +215,7 @@ fn parse_prefix_expressions() { #[test] fn parse_simple_call() { check_parse!("foo()", |s| Expression::call( - syntaxfact::ident_expr(s.intern("foo")), + syntax_builder::ident_expr(s.intern("foo")), Token::new(TokenKind::OpenBracket), SepList::empty(), Token::new(TokenKind::CloseBracket) @@ -225,22 +225,22 @@ fn parse_simple_call() { #[test] fn parse_complex_call() { check_parse!("hello(1, 1 + 23, -world)", |s| Expression::call( - syntaxfact::ident_expr(s.intern("hello")), + syntax_builder::ident_expr(s.intern("hello")), Token::new(TokenKind::OpenBracket), SepList::builder() - .push_item(syntaxfact::const_num(1)) + .push_item(syntax_builder::const_num(1)) .push_sep(Token::new(TokenKind::Comma)) .push_item(Expression::infix( - syntaxfact::const_num(1), + syntax_builder::const_num(1), Token::new(TokenKind::Plus), InfixOp::Add, - syntaxfact::const_num(23), + syntax_builder::const_num(23), )) .push_sep(Token::new(TokenKind::Comma)) .push_item(Expression::prefix( Token::new(TokenKind::Minus), PrefixOp::Negate, - syntaxfact::ident_expr(s.intern("world")), + syntax_builder::ident_expr(s.intern("world")), )) .build(), Token::new(TokenKind::CloseBracket), @@ -255,16 +255,16 @@ fn parse_groups_with_parens() { Expression::grouping( Token::new(TokenKind::OpenBracket), Expression::infix( - syntaxfact::const_num(1), + syntax_builder::const_num(1), Token::new(TokenKind::Plus), InfixOp::Add, - syntaxfact::const_num(2), + syntax_builder::const_num(2), ), Token::new(TokenKind::CloseBracket), ), Token::new(TokenKind::Star), InfixOp::Mul, - syntaxfact::const_num(3) + syntax_builder::const_num(3) ) ); } @@ -273,19 +273,19 @@ fn parse_groups_with_parens() { fn parse_indexing() { check_parse!("hello[world](1, 2[3])", |s| Expression::call( Expression::index( - syntaxfact::ident_expr(s.intern("hello")), + syntax_builder::ident_expr(s.intern("hello")), Token::new(TokenKind::OpenSqBracket), - syntaxfact::ident_expr(s.intern("world")), + syntax_builder::ident_expr(s.intern("world")), Token::new(TokenKind::CloseSqBracket) ), Token::new(TokenKind::OpenBracket), SepList::builder() - .push_item(syntaxfact::const_num(1)) + .push_item(syntax_builder::const_num(1)) .push_sep(Token::new(TokenKind::Comma)) .push_item(Expression::index( - syntaxfact::const_num(2), + syntax_builder::const_num(2), Token::new(TokenKind::OpenSqBracket), - syntaxfact::const_num(3), + syntax_builder::const_num(3), Token::new(TokenKind::CloseSqBracket) )) .build(), @@ -296,39 +296,39 @@ fn parse_indexing() { #[test] fn parse_ternary_if() { check_parse!("1 if 2 else 3", |s| Expression::if_then_else( - syntaxfact::word(s.intern("if")), - syntaxfact::const_num(2), - syntaxfact::const_num(1), - syntaxfact::word(s.intern("else")), - syntaxfact::const_num(3), + syntax_builder::word(s.intern("if")), + syntax_builder::const_num(2), + syntax_builder::const_num(1), + syntax_builder::word(s.intern("else")), + syntax_builder::const_num(3), )); check_parse!("hello(1) if foo[23] else world[1 if foo else 2]", |s| { Expression::if_then_else( - syntaxfact::word(s.intern("if")), + syntax_builder::word(s.intern("if")), Expression::index( - syntaxfact::ident_expr(s.intern("foo")), + syntax_builder::ident_expr(s.intern("foo")), Token::new(TokenKind::OpenSqBracket), - syntaxfact::const_num(23), + syntax_builder::const_num(23), Token::new(TokenKind::CloseSqBracket), ), Expression::call( - syntaxfact::ident_expr(s.intern("hello")), + syntax_builder::ident_expr(s.intern("hello")), Token::new(TokenKind::OpenBracket), SepList::builder() - .push_item(syntaxfact::const_num(1)) + .push_item(syntax_builder::const_num(1)) .build(), Token::new(TokenKind::CloseBracket), ), - syntaxfact::word(s.intern("else")), + syntax_builder::word(s.intern("else")), Expression::index( - syntaxfact::ident_expr(s.intern("world")), + syntax_builder::ident_expr(s.intern("world")), Token::new(TokenKind::OpenSqBracket), Expression::if_then_else( - syntaxfact::word(s.intern("if")), - syntaxfact::ident_expr(s.intern("foo")), - syntaxfact::const_num(1), - syntaxfact::word(s.intern("else")), - syntaxfact::const_num(2), + syntax_builder::word(s.intern("if")), + syntax_builder::ident_expr(s.intern("foo")), + syntax_builder::const_num(1), + syntax_builder::word(s.intern("else")), + syntax_builder::const_num(2), ), Token::new(TokenKind::CloseSqBracket), ), @@ -337,11 +337,11 @@ fn parse_ternary_if() { check_parse!( "0 unless 1 else 2", Expression::if_then_else( - syntaxfact::word(s.intern("unless")), - syntaxfact::const_num(1), - syntaxfact::const_num(2), - syntaxfact::word(s.intern("else")), - syntaxfact::const_num(0), + syntax_builder::word(s.intern("unless")), + syntax_builder::const_num(1), + syntax_builder::const_num(2), + syntax_builder::word(s.intern("else")), + syntax_builder::const_num(0), ) ); } @@ -349,41 +349,41 @@ fn parse_ternary_if() { #[test] fn parse_unicode_identifiers() { check_parse!(" übåℝ * ßeåk ", |s| Expression::infix( - syntaxfact::ident_expr(s.intern("übåℝ")), + syntax_builder::ident_expr(s.intern("übåℝ")), Token::new(TokenKind::Star), InfixOp::Mul, - syntaxfact::ident_expr(s.intern("ßeåk")), + syntax_builder::ident_expr(s.intern("ßeåk")), )); } #[test] fn parse_function_def() { check_parse!("fn test() :Num 100 end", |s| Expression::function( - syntaxfact::word(s.intern("fn")), + syntax_builder::word(s.intern("fn")), s.intern("test"), Token::new(TokenKind::OpenBracket), SepList::empty(), Token::new(TokenKind::CloseBracket), mk_simple_ty_anno(&s, "Num"), - blockify(vec![syntaxfact::const_num(100)]) + blockify(vec![syntax_builder::const_num(100)]) )); check_parse!( "fn ünécød3() :Num 0 if 74 else 888 end", |s| Expression::function( - syntaxfact::word(s.intern("fn")), + syntax_builder::word(s.intern("fn")), s.intern("ünécød3"), Token::new(TokenKind::OpenBracket), SepList::empty(), Token::new(TokenKind::CloseBracket), mk_simple_ty_anno(&s, "Num"), blockify(vec![Expression::if_then_else( - syntaxfact::word(s.intern("if")), - syntaxfact::const_num(74), - syntaxfact::const_num(0), - syntaxfact::word(s.intern("else")), - syntaxfact::const_num(888), + syntax_builder::word(s.intern("if")), + syntax_builder::const_num(74), + syntax_builder::const_num(0), + syntax_builder::word(s.intern("else")), + syntax_builder::const_num(888), )]) ) ); @@ -392,26 +392,29 @@ fn parse_function_def() { #[test] fn parse_while_loop() { check_parse!("while 1 end", |s| Expression::loop_while( - syntaxfact::word(s.intern("while")), - syntaxfact::const_num(1), + syntax_builder::word(s.intern("while")), + syntax_builder::const_num(1), blockify(Vec::new()) )); check_parse!("while 0 44 234 end", |s| Expression::loop_while( - syntaxfact::word(s.intern("while")), - syntaxfact::const_num(0), - blockify(vec![syntaxfact::const_num(44), syntaxfact::const_num(234)]), + syntax_builder::word(s.intern("while")), + syntax_builder::const_num(0), + blockify(vec![ + syntax_builder::const_num(44), + syntax_builder::const_num(234) + ]), )); } #[test] fn parse_function_with_args() { check_parse!("fn neg(i: Num): Num - i end", |s| Expression::function( - syntaxfact::word(s.intern("fn")), + syntax_builder::word(s.intern("fn")), s.intern("neg"), Token::new(TokenKind::OpenBracket), SepList::builder() .push_item(TypedId::new( - syntaxfact::word(s.intern("i")), + syntax_builder::word(s.intern("i")), mk_simple_ty_anno(&s, "Num") )) .build(), @@ -420,25 +423,27 @@ fn parse_function_with_args() { blockify(vec![Expression::prefix( Token::new(TokenKind::Minus), PrefixOp::Negate, - syntaxfact::ident_expr(s.intern("i")), + syntax_builder::ident_expr(s.intern("i")), )]) )); check_parse!("fn test(i: Num, j, k: String): String i + j + k end", |s| { Expression::function( - syntaxfact::word(s.intern("fn")), + syntax_builder::word(s.intern("fn")), s.intern("test"), Token::new(TokenKind::OpenBracket), SepList::builder() .push_item(TypedId::new( - syntaxfact::word(s.intern("i")), + syntax_builder::word(s.intern("i")), mk_simple_ty_anno(&s, "Num"), )) .push_sep(Token::new(TokenKind::Comma)) - .push_item(TypedId::new_without_type(syntaxfact::word(s.intern("j")))) + .push_item(TypedId::new_without_type(syntax_builder::word( + s.intern("j"), + ))) .push_sep(Token::new(TokenKind::Comma)) .push_item(TypedId::new( - syntaxfact::word(s.intern("k")), + syntax_builder::word(s.intern("k")), mk_simple_ty_anno(&s, "String"), )) .build(), @@ -446,14 +451,14 @@ fn parse_function_with_args() { mk_simple_ty_anno(&s, "String"), blockify(vec![Expression::infix( Expression::infix( - syntaxfact::ident_expr(s.intern("i")), + syntax_builder::ident_expr(s.intern("i")), Token::new(TokenKind::Plus), InfixOp::Add, - syntaxfact::ident_expr(s.intern("j")), + syntax_builder::ident_expr(s.intern("j")), ), Token::new(TokenKind::Plus), InfixOp::Add, - syntaxfact::ident_expr(s.intern("k")), + syntax_builder::ident_expr(s.intern("k")), )]), ) }); @@ -462,9 +467,9 @@ fn parse_function_with_args() { #[test] fn parse_simple_array_type() { check_parse!("let f: [Num] = 100", |s| Expression::declaration( - syntaxfact::word(s.intern("let")), + syntax_builder::word(s.intern("let")), TypedId::from_parts( - syntaxfact::word(s.intern("f")), + syntax_builder::word(s.intern("f")), Some(TypeAnno::new( Token::new(TokenKind::Colon), TypeRef::array( @@ -476,27 +481,27 @@ fn parse_simple_array_type() { ), VarStyle::Immutable, Token::new(TokenKind::Equals), - syntaxfact::const_num(100), + syntax_builder::const_num(100), )); } #[test] fn parse_simple_let() { check_parse!("let foo = 100", |s| Expression::declaration( - syntaxfact::word(s.intern("let")), - TypedId::from_parts(syntaxfact::word(s.intern("foo")), None), + syntax_builder::word(s.intern("let")), + TypedId::from_parts(syntax_builder::word(s.intern("foo")), None), VarStyle::Immutable, Token::new(TokenKind::Equals), - syntaxfact::const_num(100), + syntax_builder::const_num(100), )); } #[test] fn parse_simple_tuple() { check_parse!("let f: (Num) = 100", |s| Expression::declaration( - syntaxfact::word(s.intern("let")), + syntax_builder::word(s.intern("let")), TypedId::from_parts( - syntaxfact::word(s.intern("f")), + syntax_builder::word(s.intern("f")), Some(TypeAnno::new( Token::new(TokenKind::Colon), TypeRef::tuple( @@ -510,12 +515,12 @@ fn parse_simple_tuple() { ), VarStyle::Immutable, Token::new(TokenKind::Equals), - syntaxfact::const_num(100), + syntax_builder::const_num(100), )); check_parse!("let f: (Num, [String]) = 100", |s| Expression::declaration( - syntaxfact::word(s.intern("let")), + syntax_builder::word(s.intern("let")), TypedId::from_parts( - syntaxfact::word(s.intern("f")), + syntax_builder::word(s.intern("f")), Some(TypeAnno::new( Token::new(TokenKind::Colon), TypeRef::tuple( @@ -535,23 +540,23 @@ fn parse_simple_tuple() { ), VarStyle::Immutable, Token::new(TokenKind::Equals), - syntaxfact::const_num(100), + syntax_builder::const_num(100), )); } #[test] fn parse_variable_decl() { check_parse!("var foo = 93", |s| Expression::declaration( - syntaxfact::word(s.intern("var")), - TypedId::from_parts(syntaxfact::word(s.intern("foo")), None), + syntax_builder::word(s.intern("var")), + TypedId::from_parts(syntax_builder::word(s.intern("foo")), None), VarStyle::Mutable, Token::new(TokenKind::Equals), - syntaxfact::const_num(93), + syntax_builder::const_num(93), )); check_parse!("var foo_bar: Number = -99999", |s| Expression::declaration( - syntaxfact::word(s.intern("var")), + syntax_builder::word(s.intern("var")), TypedId::from_parts( - syntaxfact::word(s.intern("foo_bar")), + syntax_builder::word(s.intern("foo_bar")), Some(mk_simple_ty_anno(&s, "Number")) ), VarStyle::Mutable, @@ -559,7 +564,7 @@ fn parse_variable_decl() { Expression::prefix( Token::new(TokenKind::Minus), PrefixOp::Negate, - syntaxfact::const_num(99999) + syntax_builder::const_num(99999) ), )); } @@ -567,23 +572,23 @@ fn parse_variable_decl() { #[test] fn parse_print_operator() { check_parse!("print 1334", |s| Expression::print( - syntaxfact::word(s.intern("print")), - syntaxfact::const_num(1334) + syntax_builder::word(s.intern("print")), + syntax_builder::const_num(1334) )); } #[test] fn parse_bool_literal() { check_parse!("true", |s| Expression::constant_bool( - syntaxfact::word(s.intern("true")), + syntax_builder::word(s.intern("true")), true )); check_parse!("false", |s| Expression::constant_bool( - syntaxfact::word(s.intern("false")), + syntax_builder::word(s.intern("false")), false )); - check_parse!("true", syntaxfact::const_bool(true)); - check_parse!("false", syntaxfact::const_bool(false)); + check_parse!("true", syntax_builder::const_bool(true)); + check_parse!("false", syntax_builder::const_bool(false)); } #[test] @@ -607,7 +612,7 @@ fn parse_string_literal() { } #[test] -fn parse_string_literal_syntaxfact() { - check_parse!("'foobar'", syntaxfact::raw_string("foobar")); - check_parse!("'münchen'", syntaxfact::raw_string("münchen")); +fn parse_string_literal_syntax_builder() { + check_parse!("'foobar'", syntax_builder::raw_string("foobar")); + check_parse!("'münchen'", syntax_builder::raw_string("münchen")); } diff --git a/src/syntax/syntaxfact.rs b/src/syntax/syntax_builder.rs similarity index 100% rename from src/syntax/syntaxfact.rs rename to src/syntax/syntax_builder.rs From 911ba4978d2a2c3567a4af2b7b79727498efc9c9 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Sat, 6 Jul 2019 20:13:22 +0100 Subject: [PATCH 12/18] Replace `mem::forget` Useage This function has become quite frowned upon. Do some trickery with `ManuallyDrop` to acchive the same results. --- src/low_loader/module.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/low_loader/module.rs b/src/low_loader/module.rs index cc3afc60..9652dfbc 100644 --- a/src/low_loader/module.rs +++ b/src/low_loader/module.rs @@ -177,14 +177,15 @@ impl Drop for Module { } impl From for LLVMModuleRef { - /// Convert from Module - /// /// Consume the wrapped module and return it's interal module /// reference. This transfers the ownership of the module back to /// the caller preventing the it from being automaticaly freed. fn from(m: Module) -> LLVMModuleRef { - let mod_ref = m.raw; - ::std::mem::forget(m); - mod_ref + unsafe { + // an apparently nicer alterantive to calling `forget` we + // instead create a `ManuallyDrop` item and then don't + // drop it here. + std::mem::ManuallyDrop::new(m).as_raw() + } } } From 4b1a972ca865ca73b4651f0ac5ccae9cef564482 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Sun, 16 Aug 2020 09:52:21 +0100 Subject: [PATCH 13/18] Reformat Run `cargo fmt` --- src/compile.rs | 10 +++++----- src/low_loader/module.rs | 18 +++++++++--------- src/syntax/parse/checkparse_tests.rs | 4 +--- src/syntax/parse/tokeniser.rs | 5 +---- 4 files changed, 16 insertions(+), 21 deletions(-) diff --git a/src/compile.rs b/src/compile.rs index 97be5c32..57fd5c19 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -106,11 +106,11 @@ impl Compilation { module.verify_or_panic(); // Create a tempdir to write the LLVM IR or bitcode to - let suffix = if cfg!(feature="bitcode_link") { - ".bc" - } else { - ".ll" - }; + let suffix = if cfg!(feature = "bitcode_link") { + ".bc" + } else { + ".ll" + }; let temp_file = Builder::new().prefix("ullage").suffix(suffix).tempfile()?; // check if we have optimiation enabled and run the diff --git a/src/low_loader/module.rs b/src/low_loader/module.rs index 9652dfbc..aac8ce64 100644 --- a/src/low_loader/module.rs +++ b/src/low_loader/module.rs @@ -4,7 +4,7 @@ use super::function::Function; use super::llvm_sys::prelude::*; -use super::llvm_sys::{analysis, core, bit_writer}; +use super::llvm_sys::{analysis, bit_writer, core}; use super::pass_manager::{OptLevel, OptSize, PassManagerBuilder}; use super::targets::Target; @@ -89,20 +89,20 @@ impl Module { } /// Write the Module to the Given File as LLVM IR or Bitcode - /// - /// If the path's extension is `.ll` then the file is written as - /// LLVM IR, otherwise the file is written as bitcode. + /// + /// If the path's extension is `.ll` then the file is written as + /// LLVM IR, otherwise the file is written as bitcode. pub fn write_to_file(&self, path: &Path) -> Result<(), String> { - let is_il = path.extension().map(|e| e == "ll").unwrap_or(false); + let is_il = path.extension().map(|e| e == "ll").unwrap_or(false); let path = path.to_str().and_then(|s| CString::new(s).ok()).unwrap(); unsafe { let mut message = ptr::null_mut(); let r = if is_il { - core::LLVMPrintModuleToFile(self.raw, path.as_ptr(), &mut message) - } else { - bit_writer::LLVMWriteBitcodeToFile(self.raw, path.as_ptr()) - }; + core::LLVMPrintModuleToFile(self.raw, path.as_ptr(), &mut message) + } else { + bit_writer::LLVMWriteBitcodeToFile(self.raw, path.as_ptr()) + }; if r == 0 { Ok(()) } else { diff --git a/src/syntax/parse/checkparse_tests.rs b/src/syntax/parse/checkparse_tests.rs index 37e21050..6e2b461a 100644 --- a/src/syntax/parse/checkparse_tests.rs +++ b/src/syntax/parse/checkparse_tests.rs @@ -603,9 +603,7 @@ fn parse_string_literal() { check_parse!( "'über ∂elta'", Expression::constant_string( - Token::new(TokenKind::Literal(Literal::RawString( - "über ∂elta".into() - ))), + Token::new(TokenKind::Literal(Literal::RawString("über ∂elta".into()))), "über ∂elta" ) ); diff --git a/src/syntax/parse/tokeniser.rs b/src/syntax/parse/tokeniser.rs index 1b125fac..c2f27bad 100644 --- a/src/syntax/parse/tokeniser.rs +++ b/src/syntax/parse/tokeniser.rs @@ -435,10 +435,7 @@ mod test { "# longer comment", RawTokenKind::Trivia(TriviaTokenKind::Comment) ); - check_lex!( - "# ∆¬∞€#", - RawTokenKind::Trivia(TriviaTokenKind::Comment) - ); + check_lex!("# ∆¬∞€#", RawTokenKind::Trivia(TriviaTokenKind::Comment)); // We recognise _all_ denominations of newline check_lex!("\n", RawTokenKind::Trivia(TriviaTokenKind::Newline)); From 7e5930175a9421cfe8211a1c9647e6774ba05ed7 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Sun, 16 Aug 2020 10:08:54 +0100 Subject: [PATCH 14/18] Add Editor Cofnig File --- .editorconfig | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..f3a88619 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,9 @@ + +root = true + +[*.rs] +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +indent_style = space +indent_size = 4 From ac3e2ac6601a9bf8f5bea2fd8c935b684a6c5ff7 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Sun, 16 Aug 2020 11:24:01 +0100 Subject: [PATCH 15/18] Expose Linker as Command Line Option Rather than baking in the link mode at compile time allow switching at runtime using `--link-mode`. --- Cargo.toml | 10 +----- src/compile.rs | 11 +++---- src/compile/linker.rs | 73 ++++++++++++++++++++++++++++++++++++++++++ src/compile/options.rs | 8 +++++ src/main.rs | 35 +++++++++++++++++++- 5 files changed, 121 insertions(+), 16 deletions(-) create mode 100644 src/compile/linker.rs diff --git a/Cargo.toml b/Cargo.toml index 8d95119e..042ff0c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,19 +11,11 @@ keywords = [ "llvm", "parser", "compiler" ] categories = [ "parsing" ] edition = "2018" -[features] - -# bitcode link feature outputs intermediate files as bitcode rather than -# LLMV IR. -bitcode_link = [] - -default = [] - [dependencies] llvm-sys = "90" # FIXME: Want to upgrade to LLVM 10, but Clang doesn't seem capable of linking # the IL or Bitcode output from it. Do we need to wait for LLVM 11? Is -# It time to stop relying on `cc` for linking? +# It time to stop relying on `cc` for linking? # llvm-sys = { version = "100", git = "https://gitlab.com/iwillspeak/llvm-sys.rs.git", branch = "macos-tbd-link" } docopt = "1.1" serde = { version = "1.0", features = ["derive"] } diff --git a/src/compile.rs b/src/compile.rs index 57fd5c19..7ddb7236 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -8,12 +8,14 @@ use crate::syntax; use std::path::Path; use std::process::Command; use tempfile::Builder; +use linker::Linker; pub use self::error::{CompError, CompResult}; pub use self::options::{CompilationOptions, OptimisationLevel}; pub mod error; pub mod options; +pub mod linker; mod lower; mod lower_context; @@ -105,13 +107,10 @@ impl Compilation { fun.verify_or_panic(); module.verify_or_panic(); + let linker = self.options.linker.unwrap_or_else(Linker::default); + // Create a tempdir to write the LLVM IR or bitcode to - let suffix = if cfg!(feature = "bitcode_link") { - ".bc" - } else { - ".ll" - }; - let temp_file = Builder::new().prefix("ullage").suffix(suffix).tempfile()?; + let temp_file = Builder::new().prefix("ullage").suffix(linker.asset_ty.extension()).tempfile()?; // check if we have optimiation enabled and run the // corresponding optimisations if we do. diff --git a/src/compile/linker.rs b/src/compile/linker.rs new file mode 100644 index 00000000..bf422efd --- /dev/null +++ b/src/compile/linker.rs @@ -0,0 +1,73 @@ +//! Linker Abstractions +//! +//! This module contains types to deal with linking object files. The +//! main struct `Linker` specifies the information needed to perform +//! a link. + +use std::default::Default; + +/// The information for performing a link +pub struct Linker { + /// The linker command. Currently only `clang` is supported. + pub cmd: LinkerCommand, + /// The intermediate asset type the linker expects + pub asset_ty: LinkerAssetType, +} + +/// The executable type to use for linking +#[derive(Debug,Copy,Clone)] +pub enum LinkerCommand { + /// The Clang c compiler + Clang, +} + +/// The intermediate asset type to pass to the linker +#[derive(Debug,Copy,Clone)] +pub enum LinkerAssetType { + /// LLVM IR text files + LlvmIr, + /// LLVM IR bticode files + LlvmBc, +} + +impl Linker { + /// Create a new linker from the command and asset type + pub fn new(cmd: LinkerCommand, asset_ty: LinkerAssetType) -> Self { + Linker { cmd, asset_ty } + } + + /// Create a linker from the given command + pub fn from_command(cmd: LinkerCommand) -> Self { + Linker { cmd, asset_ty: cmd.default_asset_ty() } + } +} + +impl Default for Linker { + fn default() -> Self { + Linker::from_command(LinkerCommand::default()) + } +} + +impl LinkerCommand { + /// Get the Default Asset Type for this Linker + pub fn default_asset_ty(&self) -> LinkerAssetType { + LinkerAssetType::LlvmBc + } +} + +impl Default for LinkerCommand { + fn default() -> Self { + LinkerCommand::Clang + } +} + +impl LinkerAssetType { + + /// Get the file extension for the asset type + pub fn extension(&self) -> &str { + match *self { + LinkerAssetType::LlvmIr => ".ll", + LinkerAssetType::LlvmBc => ".bc", + } + } +} diff --git a/src/compile/options.rs b/src/compile/options.rs index 5c6f76cb..ddd365d8 100644 --- a/src/compile/options.rs +++ b/src/compile/options.rs @@ -4,6 +4,7 @@ //! compilation output. use crate::low_loader::pass_manager as pm; +use super::linker::Linker; /// Compilation Options /// @@ -14,6 +15,8 @@ pub struct CompilationOptions { pub dump_ir: bool, /// Optimisation level to use when emitting code pub opt_level: OptimisationLevel, + /// Linker option + pub linker: Option, } /// Optimisation levels @@ -49,6 +52,11 @@ impl CompilationOptions { pub fn with_opt_level(self, opt_level: OptimisationLevel) -> Self { CompilationOptions { opt_level, ..self } } + + /// Set the linker command to use + pub fn with_linker(self, linker: Linker) -> Self { + CompilationOptions { linker: Some(linker), ..self } + } } impl Default for OptimisationLevel { diff --git a/src/main.rs b/src/main.rs index dd16de8c..cc839444 100644 --- a/src/main.rs +++ b/src/main.rs @@ -41,6 +41,8 @@ Options: 0 = off, 1 = low, 2 = medium, 3 = high, s = size. -o, --output= Write the output to . --target= Set the compilation target triple. + --link-mode= Set the type of intermediate assets to produce + for linking. Use `llvmIr`, or `llvmBc`. --dumpir Dump the LLVM IR for the module. --dumpast Dump the syntax tree to stdout and exit. --prettytree Dump a prettified summary of the syntax tree. @@ -57,6 +59,7 @@ struct Args { flag_output: Option, flag_optimise: Option, flag_target: Option, + flag_link_mode: Option, arg_file: Option, // TODO: maybe move these dump options into a single flag? @@ -78,6 +81,20 @@ enum OptFlag { Size, } +/// Liker Mode +/// +/// Chose the type of intermediate assets to produce when +/// performnig the link. +#[derive(Debug, Deserialize)] +enum LinkMode { + /// Intermediate langauge files + #[serde(rename = "il")] + LlvmIr, + /// Bitcode files + #[serde(rename = "bc")] + LlvmBc, +} + /// Custom Deserialiser for Optimisation Flags /// /// This deserialiser will handle both numeric values and 's' or @@ -135,6 +152,18 @@ impl From for OptimisationLevel { } } +impl From for linker::Linker { + fn from(mode: LinkMode) -> Self { + linker::Linker::new( + linker::LinkerCommand::default(), + match mode { + LinkMode::LlvmIr => linker::LinkerAssetType::LlvmIr, + LinkMode::LlvmBc => linker::LinkerAssetType::LlvmBc, + } + ) + } +} + /// Main /// /// The main function for `ullage`. Parses the options and runs the @@ -200,12 +229,16 @@ fn main() { exit(0); } - let options = CompilationOptions::default() + let mut options = CompilationOptions::default() .with_dump_ir(args.flag_dumpir) .with_opt_level( args.flag_optimise .map_or(OptimisationLevel::Off, |o| o.into()), ); + if let Some(link_mode) = args.flag_link_mode { + let linker = linker::Linker::from(link_mode); + options = options.with_linker(linker); + } let comp = match Compilation::new(tree, options) { Ok(c) => c, Err(e) => handle_comp_err(&e), From 535be4c4cbcb2f6e149bf7bc126e961fcb0a0291 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Sun, 16 Aug 2020 11:45:48 +0100 Subject: [PATCH 16/18] Use Linker Exectuable in Link Prepping for being able to choose a different linker exectuable. In the future when custom linkers are supported and platform default linking is supported the linker itself will probably want to build the whole command rather than just providing the exectuable stirng to be used. This works for now though. --- src/compile.rs | 2 +- src/compile/linker.rs | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/compile.rs b/src/compile.rs index 7ddb7236..a9f085d4 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -125,7 +125,7 @@ impl Compilation { module.write_to_file(temp_file.path())?; // Shell out to Clang to link the final assembly - let output = Command::new("clang") + let output = Command::new(linker.cmd.executable()) .arg(temp_file.path()) .arg(format!("--target={}", target.triple())) .arg("-o") diff --git a/src/compile/linker.rs b/src/compile/linker.rs index bf422efd..9cc94305 100644 --- a/src/compile/linker.rs +++ b/src/compile/linker.rs @@ -53,6 +53,17 @@ impl LinkerCommand { pub fn default_asset_ty(&self) -> LinkerAssetType { LinkerAssetType::LlvmBc } + + /// Get the executable this command should call. + /// + pub fn executable(&self) -> &str { + // FIXME: instead of exposing a &str. We should + // make the linker buidl the command + // rather than the compiler. + match *self { + LinkerCommand::Clang => "clang" + } + } } impl Default for LinkerCommand { From f1d8b00132f66e64d14e915dbf91f206861faf77 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Sun, 28 Aug 2022 10:37:03 +0100 Subject: [PATCH 17/18] Fixup Object Linking Re-add support for specifying object files as the linker asset type. This was lost in the catchup merge. --- src/main.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main.rs b/src/main.rs index a50c3c9c..d296a262 100644 --- a/src/main.rs +++ b/src/main.rs @@ -92,6 +92,9 @@ enum LinkMode { /// Bitcode files #[serde(rename = "bc")] LlvmBc, + /// Native Objects + #[serde(rename = "obj")] + Objects, } /// Custom Deserialiser for Optimisation Flags @@ -158,6 +161,7 @@ impl From for linker::Linker { match mode { LinkMode::LlvmIr => linker::LinkerAssetType::LlvmIr, LinkMode::LlvmBc => linker::LinkerAssetType::LlvmBc, + LinkMode::Objects => linker::LinkerAssetType::Object, }, ) } From 829ef8d9c54e0491c4a064772c3987e774eb3843 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Sun, 28 Aug 2022 10:52:03 +0100 Subject: [PATCH 18/18] Remove `failure` Reference Replace use of the deprecated failure crate with custom display impls for our errors. fixes: #43 --- Cargo.lock | 101 -------------------------------------- Cargo.toml | 1 - src/compile/error.rs | 49 +++++++++++++----- src/low_loader/targets.rs | 13 +++-- 4 files changed, 48 insertions(+), 116 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 614076c6..71e7f6d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,21 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "addr2line" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9ecd88a8c8378ca913a680cd98f0f13ac67383d35993f86c90a70e3f137816b" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - [[package]] name = "aho-corasick" version = "0.7.18" @@ -32,21 +17,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" -[[package]] -name = "backtrace" -version = "0.3.66" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cab84319d616cfb654d03394f38ab7e6f0919e181b1b57e1fd15e7fb4077d9a7" -dependencies = [ - "addr2line", - "cc", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - [[package]] name = "bitflags" version = "1.3.2" @@ -77,28 +47,6 @@ dependencies = [ "strsim", ] -[[package]] -name = "failure" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d32e9bd16cc02eae7db7ef620b392808b89f6a5e16bb3497d159c6b92a0f4f86" -dependencies = [ - "backtrace", - "failure_derive", -] - -[[package]] -name = "failure_derive" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - [[package]] name = "fastrand" version = "1.7.0" @@ -108,12 +56,6 @@ dependencies = [ "instant", ] -[[package]] -name = "gimli" -version = "0.26.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4" - [[package]] name = "hashbrown" version = "0.12.2" @@ -196,24 +138,6 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" -[[package]] -name = "miniz_oxide" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc" -dependencies = [ - "adler", -] - -[[package]] -name = "object" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21158b2c33aa6d4561f1c0a6ea283ca92bc54802a93b263e910746d679a7eb53" -dependencies = [ - "memchr", -] - [[package]] name = "pest" version = "2.3.0" @@ -277,12 +201,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "rustc-demangle" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" - [[package]] name = "semver" version = "0.9.0" @@ -353,18 +271,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "synstructure" -version = "0.12.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "unicode-xid", -] - [[package]] name = "tempfile" version = "3.3.0" @@ -410,7 +316,6 @@ name = "ullage" version = "0.1.0" dependencies = [ "docopt", - "failure", "indexmap", "libc", "llvm-sys 100.2.3", @@ -426,12 +331,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" -[[package]] -name = "unicode-xid" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "957e51f3646910546462e67d5f7599b9e4fb8acdd304b087a6494730f9eebf04" - [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 0141301a..b199a026 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,5 @@ llvm-13 = { package = "llvm-sys", version = "130", optional = true } docopt = "1.1" serde = { version = "1.0", features = ["derive"] } tempfile = "3.1" -failure = "0.1" libc = "0.2" indexmap = "1.5" diff --git a/src/compile/error.rs b/src/compile/error.rs index 1bd30cba..0497f6d2 100644 --- a/src/compile/error.rs +++ b/src/compile/error.rs @@ -1,24 +1,40 @@ //! Compilation error module. Contains the Result and Error types for //! the compile module. -use failure::Fail; -use std::io; +use std::{fmt::Display, io}; /// Represents the different types of errors which can be encountered /// when compiling. -#[derive(Fail, Debug)] +#[derive(Debug)] pub enum CompError { /// Generic Error String - #[fail(display = "compilation error: {}", _0)] Generic(String), /// Linker Failure - #[fail(display = "linker failed: {}", _0)] - Linker(#[cause] LinkerError), + Linker(LinkerError), /// Wrapped IO Error - #[fail(display = "IO error: {}", _0)] - IO(#[cause] ::std::io::Error), + IO(io::Error), +} + +impl std::error::Error for CompError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + CompError::Linker(e) => Some(e), + CompError::IO(e) => Some(e), + _ => None, + } + } +} + +impl Display for CompError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + CompError::Generic(msg) => write!(f, "compilation error: {}", msg), + CompError::Linker(cause) => write!(f, "linker failed.: {}", cause), + CompError::IO(cause) => write!(f, "IO error: {}", cause), + } + } } /// Compilation result. Returned from each compilation stage. @@ -27,17 +43,28 @@ pub type CompResult = Result; /// Link Failure Type /// /// Used to group together the different failure modes for the linker. -#[derive(Fail, Debug)] +#[derive(Debug)] pub enum LinkerError { /// The linker failed with a known exit status - #[fail(display = "linker returned exit status {}: {}", _0, _1)] WithExitStatus(i32, String), /// The linker failed with an unknown exit status - #[fail(display = "unknown linker error: {}", _0)] UnknownFailure(String), } +impl std::error::Error for LinkerError {} + +impl Display for LinkerError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LinkerError::WithExitStatus(status, msg) => { + write!(f, "linker returned exit status {}: {}", status, msg) + } + LinkerError::UnknownFailure(msg) => write!(f, "unknown linker error: {}", msg), + } + } +} + impl From for CompError { /// Convert untyped errors to generic compilation errors. fn from(s: String) -> Self { diff --git a/src/low_loader/targets.rs b/src/low_loader/targets.rs index fab3e8cf..4a41f336 100644 --- a/src/low_loader/targets.rs +++ b/src/low_loader/targets.rs @@ -2,8 +2,8 @@ use super::llvm_sys::core::LLVMDisposeMessage; use super::llvm_sys::target_machine::*; -use failure::Fail; use std::ffi::{CStr, CString}; +use std::fmt::Display; use std::{fmt, ptr}; /// Compilation Target @@ -18,10 +18,17 @@ pub struct Target { /// Target Lookup Error /// /// Returned if a target couldn't be resolved from the given triple. -#[derive(Fail, Debug)] -#[fail(display = "Could not find target: '{}'", _0)] +#[derive(Debug)] pub struct TargetLookupError(String); +impl std::error::Error for TargetLookupError {} + +impl Display for TargetLookupError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Could not find target: '{}'", self.0) + } +} + impl fmt::Display for Target { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!(f, "Target information for {}:", self.triple)?;