diff --git a/crates/oxc_parser/src/cursor.rs b/crates/oxc_parser/src/cursor.rs index edd265295c695..eb1fc6325d6d2 100644 --- a/crates/oxc_parser/src/cursor.rs +++ b/crates/oxc_parser/src/cursor.rs @@ -47,7 +47,7 @@ impl<'a> ParserImpl<'a> { /// Get current source text pub(crate) fn cur_src(&self) -> &'a str { - let range = self.cur_token().span(); + let range = self.lexer.token_span(self.cur_token()); // SAFETY: // range comes from the lexer, which are ensured to meeting the criteria of `get_unchecked`. @@ -109,13 +109,23 @@ impl<'a> ParserImpl<'a> { self.cur_kind() == kind } + #[inline] + pub(crate) fn cur_token_span(&self) -> Span { + self.lexer.token_span(self.cur_token()) + } + + #[inline] + pub(crate) fn cur_token_end(&self) -> u32 { + self.lexer.token_end(self.cur_token()) + } + /// `StringValue` of `IdentifierName` normalizes any Unicode escape sequences /// in `IdentifierName` hence such escapes cannot be used to write an Identifier /// whose code point sequence is the same as a `ReservedWord`. #[inline] fn test_escaped_keyword(&mut self, kind: Kind) { if self.cur_token().escaped() && kind.is_all_keyword() { - let span = self.cur_token().span(); + let span = self.cur_token_span(); self.error(diagnostics::escaped_keyword(span)); } } @@ -124,7 +134,7 @@ impl<'a> ParserImpl<'a> { /// Checks if the current token is escaped if it is a keyword fn advance(&mut self, kind: Kind) { self.test_escaped_keyword(kind); - self.prev_token_end = self.token.end; + self.prev_token_end = self.lexer.token_end(self.token); self.token = self.lexer.next_token(); } @@ -132,7 +142,7 @@ impl<'a> ParserImpl<'a> { /// Checks if the current token is escaped if it is a keyword fn advance_for_jsx_child(&mut self, kind: Kind) { self.test_escaped_keyword(kind); - self.prev_token_end = self.token.end; + self.prev_token_end = self.lexer.token_end(self.token); self.token = self.lexer.next_jsx_child(); } @@ -184,13 +194,13 @@ impl<'a> ParserImpl<'a> { if kind == Kind::Semicolon { return true; } - kind == Kind::RCurly || kind.is_eof() || self.cur_token().is_on_new_line + kind == Kind::RCurly || kind.is_eof() || self.cur_token().is_on_new_line() } /// # Errors pub(crate) fn expect_without_advance(&mut self, kind: Kind) -> Result<()> { if !self.at(kind) { - let range = self.cur_token().span(); + let range = self.cur_token_span(); return Err(diagnostics::expect_token(kind.to_str(), self.cur_kind().to_str(), range)); } Ok(()) @@ -238,7 +248,7 @@ impl<'a> ParserImpl<'a> { /// Tell lexer to continue reading jsx identifier if the lexer character position is at `-` for `` pub(crate) fn continue_lex_jsx_identifier(&mut self) { - if let Some(token) = self.lexer.continue_lex_jsx_identifier() { + if let Some(token) = self.lexer.continue_lex_jsx_identifier(self.token.start) { self.token = token; } } diff --git a/crates/oxc_parser/src/js/arrow.rs b/crates/oxc_parser/src/js/arrow.rs index a93048733c376..1a5c6841c671c 100644 --- a/crates/oxc_parser/src/js/arrow.rs +++ b/crates/oxc_parser/src/js/arrow.rs @@ -57,7 +57,7 @@ impl<'a> ParserImpl<'a> { if self.at(Kind::Async) { let second_token = self.peek_token(); let second = second_token.kind; - if second_token.is_on_new_line { + if second_token.is_on_new_line() { return Tristate::False; } if second != Kind::LParen && second != Kind::LAngle { @@ -185,7 +185,7 @@ impl<'a> ParserImpl<'a> { let first = first_token.kind; // If the "async" is followed by "=>" token then it is not a beginning of an async arrow-function // but instead a simple arrow-function which will be parsed inside "parseAssignmentExpressionOrHigher" - if first_token.is_on_new_line || first == Kind::Arrow { + if first_token.is_on_new_line() || first == Kind::Arrow { return Tristate::False; } // Check for un-parenthesized AsyncArrowFunction @@ -230,8 +230,8 @@ impl<'a> ParserImpl<'a> { self.ctx = self.ctx.and_await(has_await); - if self.cur_token().is_on_new_line { - self.error(diagnostics::lineterminator_before_arrow(self.cur_token().span())); + if self.cur_token().is_on_new_line() { + self.error(diagnostics::lineterminator_before_arrow(self.cur_token_span())); } self.expect(Kind::Arrow)?; @@ -262,8 +262,8 @@ impl<'a> ParserImpl<'a> { self.ctx = self.ctx.and_await(has_await); - if self.cur_token().is_on_new_line { - self.error(diagnostics::lineterminator_before_arrow(self.cur_token().span())); + if self.cur_token().is_on_new_line() { + self.error(diagnostics::lineterminator_before_arrow(self.cur_token_span())); } self.expect(Kind::Arrow)?; diff --git a/crates/oxc_parser/src/js/binding.rs b/crates/oxc_parser/src/js/binding.rs index 33a4a6bb48231..2777172f76c0a 100644 --- a/crates/oxc_parser/src/js/binding.rs +++ b/crates/oxc_parser/src/js/binding.rs @@ -91,7 +91,7 @@ impl<'a> ParserImpl<'a> { let elem = self.parse_rest_element()?; if self.at(Kind::Comma) { if matches!(self.peek_kind(), Kind::RCurly | Kind::RBrack) { - let span = self.cur_token().span(); + let span = self.cur_token_span(); self.bump_any(); self.error(diagnostics::binding_rest_element_trailing_comma(span)); } @@ -111,7 +111,7 @@ impl<'a> ParserImpl<'a> { let kind = self.parse_binding_pattern_kind()?; // Rest element does not allow `?`, checked in checker/typescript.rs if self.at(Kind::Question) && self.is_ts { - let span = self.cur_token().span(); + let span = self.cur_token_span(); self.bump_any(); self.error(diagnostics::a_rest_parameter_cannot_be_optional(span)); } diff --git a/crates/oxc_parser/src/js/class.rs b/crates/oxc_parser/src/js/class.rs index 6f5132b16e410..685f5eecb07b6 100644 --- a/crates/oxc_parser/src/js/class.rs +++ b/crates/oxc_parser/src/js/class.rs @@ -218,7 +218,7 @@ impl<'a> ParserImpl<'a> { // async ... if key_name.is_none() && self.at(Kind::Async) && !self.peek_at(Kind::Question) { - if !self.peek_token().is_on_new_line + if !self.peek_token().is_on_new_line() && (self.peek_kind().is_class_element_name_start() || self.peek_at(Kind::Star)) { self.bump(Kind::Async); diff --git a/crates/oxc_parser/src/js/declaration.rs b/crates/oxc_parser/src/js/declaration.rs index 30ca2eed5b248..1b6728b3b6443 100644 --- a/crates/oxc_parser/src/js/declaration.rs +++ b/crates/oxc_parser/src/js/declaration.rs @@ -98,7 +98,7 @@ impl<'a> ParserImpl<'a> { let mut definite = false; if binding_kind.is_binding_identifier() && self.at(Kind::Bang) - && !self.cur_token().is_on_new_line + && !self.cur_token().is_on_new_line() { self.eat(Kind::Bang); definite = true; @@ -146,15 +146,15 @@ impl<'a> ParserImpl<'a> { self.expect(Kind::Using)?; // `[no LineTerminator here]` - if self.cur_token().is_on_new_line { + if self.cur_token().is_on_new_line() { self.error(diagnostics::line_terminator_before_using_declaration( - self.cur_token().span(), + self.cur_token_span(), )); } // [lookahead ≠ await] if self.cur_kind() == Kind::Await { - self.error(diagnostics::await_in_using_declaration(self.cur_token().span())); + self.error(diagnostics::await_in_using_declaration(self.cur_token_span())); self.eat(Kind::Await); } diff --git a/crates/oxc_parser/src/js/expression.rs b/crates/oxc_parser/src/js/expression.rs index bb9be04956b05..2a5f64a326895 100644 --- a/crates/oxc_parser/src/js/expression.rs +++ b/crates/oxc_parser/src/js/expression.rs @@ -75,7 +75,7 @@ impl<'a> ParserImpl<'a> { let cur = self.cur_kind(); if !cur.is_binding_identifier() { let err = if cur.is_reserved_keyword() { - diagnostics::identifier_reserved_word(self.cur_token().span(), cur.to_str()) + diagnostics::identifier_reserved_word(self.cur_token_span(), cur.to_str()) } else { self.unexpected() }; @@ -296,7 +296,7 @@ impl<'a> ParserImpl<'a> { } _ => unreachable!(), } - .map_err(|err| diagnostics::invalid_number(err, token.span()))?; + .map_err(|err| diagnostics::invalid_number(err, self.cur_token_span()))?; let base = match token.kind { Kind::Decimal => NumberBase::Decimal, Kind::Float => NumberBase::Float, @@ -329,7 +329,7 @@ impl<'a> ParserImpl<'a> { let raw = self.cur_src(); let src = raw.strip_suffix('n').unwrap(); let _value = parse_big_int(src, token.kind, token.has_separator()) - .map_err(|err| diagnostics::invalid_number(err, token.span()))?; + .map_err(|err| diagnostics::invalid_number(err, self.cur_token_span()))?; self.bump_any(); Ok(self.ast.big_int_literal(self.end_span(span), raw, base)) } @@ -341,7 +341,7 @@ impl<'a> ParserImpl<'a> { let pattern_start = self.cur_token().start + 1; // +1 to exclude left `/` let pattern_text = &self.source_text[pattern_start as usize..pattern_end as usize]; let flags_start = pattern_end + 1; // +1 to include right `/` - let flags_text = &self.source_text[flags_start as usize..self.cur_token().end as usize]; + let flags_text = &self.source_text[flags_start as usize..self.cur_token_end() as usize]; let raw = self.cur_src(); self.bump_any(); // Parse pattern if options is enabled and also flags are valid @@ -440,7 +440,7 @@ impl<'a> ParserImpl<'a> { /// , /// Elision , pub(crate) fn parse_elision(&mut self) -> ArrayExpressionElement<'a> { - self.ast.array_expression_element_elision(self.cur_token().span()) + self.ast.array_expression_element_elision(self.cur_token_span()) } /// Section [Template Literal](https://tc39.es/ecma262/#prod-TemplateLiteral) @@ -689,7 +689,7 @@ impl<'a> ParserImpl<'a> { Kind::LBrack if !self.ctx.has_decorator() => { self.parse_computed_member_expression(lhs_span, lhs, false)? } - Kind::Bang if !self.cur_token().is_on_new_line && self.is_ts => { + Kind::Bang if !self.cur_token().is_on_new_line() && self.is_ts => { self.bump_any(); self.ast.expression_ts_non_null(self.end_span(lhs_span), lhs) } @@ -926,7 +926,7 @@ impl<'a> ParserImpl<'a> { let span = self.start_span(); let lhs = self.parse_lhs_expression_or_higher()?; // ++ -- postfix update expressions - if self.cur_kind().is_update_operator() && !self.cur_token().is_on_new_line { + if self.cur_kind().is_update_operator() && !self.cur_token().is_on_new_line() { let operator = map_update_operator(self.cur_kind()); self.bump_any(); let lhs = SimpleAssignmentTarget::cover(lhs, self)?; @@ -1028,7 +1028,7 @@ impl<'a> ParserImpl<'a> { } if self.is_ts && matches!(kind, Kind::As | Kind::Satisfies) { - if self.cur_token().is_on_new_line { + if self.cur_token().is_on_new_line() { break; } self.bump_any(); @@ -1157,7 +1157,7 @@ impl<'a> ParserImpl<'a> { fn parse_await_expression(&mut self, lhs_span: Span) -> Result> { let span = self.start_span(); if !self.ctx.has_await() { - self.error(diagnostics::await_expression(self.cur_token().span())); + self.error(diagnostics::await_expression(self.cur_token_span())); } self.bump_any(); let argument = self.context(Context::Await, Context::empty(), |p| { @@ -1214,7 +1214,7 @@ impl<'a> ParserImpl<'a> { return false; } - return !peek_token.is_on_new_line && peek_token.kind.is_after_await_or_yield(); + return !peek_token.is_on_new_line() && peek_token.kind.is_after_await_or_yield(); } false } @@ -1229,7 +1229,7 @@ impl<'a> ParserImpl<'a> { if self.ctx.has_yield() { return true; } - return !peek_token.is_on_new_line && peek_token.kind.is_after_await_or_yield(); + return !peek_token.is_on_new_line() && peek_token.kind.is_after_await_or_yield(); } false } diff --git a/crates/oxc_parser/src/js/function.rs b/crates/oxc_parser/src/js/function.rs index 90b86d4750324..dadbbd8f56511 100644 --- a/crates/oxc_parser/src/js/function.rs +++ b/crates/oxc_parser/src/js/function.rs @@ -26,7 +26,7 @@ impl<'a> ParserImpl<'a> { self.at(Kind::Function) || self.at(Kind::Async) && self.peek_at(Kind::Function) - && !self.peek_token().is_on_new_line + && !self.peek_token().is_on_new_line() } pub(crate) fn parse_function_body(&mut self) -> Result>> { @@ -99,7 +99,7 @@ impl<'a> ParserImpl<'a> { let element = self.parse_rest_element()?; if self.at(Kind::Comma) { if matches!(self.peek_kind(), Kind::RCurly | Kind::RBrack) { - let span = self.cur_token().span(); + let span = self.cur_token_span(); self.bump_any(); self.error(diagnostics::binding_rest_element_trailing_comma(span)); } @@ -293,7 +293,7 @@ impl<'a> ParserImpl<'a> { let mut delegate = false; let mut argument = None; - if !self.cur_token().is_on_new_line { + if !self.cur_token().is_on_new_line() { delegate = self.eat(Kind::Star); let not_assignment_expr = matches!( self.cur_kind(), @@ -336,7 +336,7 @@ impl<'a> ParserImpl<'a> { if kind.is_id_required() && id.is_none() { match self.cur_kind() { Kind::LParen => { - self.error(diagnostics::expect_function_name(self.cur_token().span())); + self.error(diagnostics::expect_function_name(self.cur_token_span())); } kind if kind.is_reserved_keyword() => self.expect_without_advance(Kind::Ident)?, _ => {} diff --git a/crates/oxc_parser/src/js/module.rs b/crates/oxc_parser/src/js/module.rs index f22852803824f..93bbac6dd8c67 100644 --- a/crates/oxc_parser/src/js/module.rs +++ b/crates/oxc_parser/src/js/module.rs @@ -169,7 +169,7 @@ impl<'a> ParserImpl<'a> { /// [Import Attributes](https://tc39.es/proposal-import-attributes) fn parse_import_attributes(&mut self) -> Result>> { let attributes_keyword = match self.cur_kind() { - Kind::Assert if !self.cur_token().is_on_new_line => self.parse_identifier_name()?, + Kind::Assert if !self.cur_token().is_on_new_line() => self.parse_identifier_name()?, Kind::With => self.parse_identifier_name()?, _ => { return Ok(None); @@ -396,7 +396,7 @@ impl<'a> ParserImpl<'a> { self.parse_class_declaration(decl_span, &modifiers) .map(ExportDefaultDeclarationKind::ClassDeclaration)? } - _ if self.at(Kind::Interface) && !self.peek_token().is_on_new_line && self.is_ts => { + _ if self.at(Kind::Interface) && !self.peek_token().is_on_new_line() && self.is_ts => { self.parse_ts_interface_declaration(decl_span, &Modifiers::empty()).map(|decl| { match decl { Declaration::TSInterfaceDeclaration(decl) => { diff --git a/crates/oxc_parser/src/js/object.rs b/crates/oxc_parser/src/js/object.rs index 7dbabf09a71d4..88a7d8fe127c9 100644 --- a/crates/oxc_parser/src/js/object.rs +++ b/crates/oxc_parser/src/js/object.rs @@ -56,7 +56,7 @@ impl<'a> ParserImpl<'a> { // AsyncGeneratorMethod Kind::Async if (class_element_name || peek_kind == Kind::Star) - && !self.peek_token().is_on_new_line => + && !self.peek_token().is_on_new_line() => { self.parse_property_definition_method() } @@ -69,7 +69,7 @@ impl<'a> ParserImpl<'a> { && modifier_kind.is_modifier_kind() && peek_kind.is_identifier_or_keyword() => { - if let Ok(modifier) = Modifier::try_from(self.cur_token()) { + if let Ok(modifier) = Modifier::try_from_token(self.cur_token(), &self.lexer) { self.error(diagnostics::modifier_cannot_be_used_here(&modifier)); } else { #[cfg(debug_assertions)] diff --git a/crates/oxc_parser/src/js/statement.rs b/crates/oxc_parser/src/js/statement.rs index 7e4f64d9a193f..d0a7777b618d6 100644 --- a/crates/oxc_parser/src/js/statement.rs +++ b/crates/oxc_parser/src/js/statement.rs @@ -123,7 +123,7 @@ impl<'a> ParserImpl<'a> { self.parse_using() } Kind::Using if self.peek_kind().is_binding_identifier() => self.parse_using(), - Kind::Async if self.peek_at(Kind::Function) && !self.peek_token().is_on_new_line => { + Kind::Async if self.peek_at(Kind::Function) && !self.peek_token().is_on_new_line() => { self.parse_function_declaration(stmt_ctx) } _ if self.is_ts && self.at_start_of_ts_declaration() => { @@ -243,7 +243,7 @@ impl<'a> ParserImpl<'a> { // [+Await] let r#await = if self.at(Kind::Await) { if !self.ctx.has_await() { - self.error(diagnostics::await_expression(self.cur_token().span())); + self.error(diagnostics::await_expression(self.cur_token_span())); } self.bump_any(); true @@ -497,11 +497,11 @@ impl<'a> ParserImpl<'a> { fn parse_throw_statement(&mut self) -> Result> { let span = self.start_span(); self.bump_any(); // advance `throw` - if self.cur_token().is_on_new_line { + if self.cur_token().is_on_new_line() { self.error(diagnostics::illegal_newline( "throw", self.end_span(span), - self.cur_token().span(), + self.cur_token_span(), )); } let argument = self.parse_expr()?; diff --git a/crates/oxc_parser/src/lexer/comment.rs b/crates/oxc_parser/src/lexer/comment.rs index ddd2645a756ef..b16747ec4e3c2 100644 --- a/crates/oxc_parser/src/lexer/comment.rs +++ b/crates/oxc_parser/src/lexer/comment.rs @@ -76,14 +76,14 @@ impl<'a> Lexer<'a> { }, }; - self.token.is_on_new_line = true; + self.token.set_is_on_new_line(); Kind::Skip } /// Section 12.4 Multi Line Comment pub(super) fn skip_multi_line_comment(&mut self) -> Kind { // If `is_on_new_line` is already set, go directly to faster search which only looks for `*/` - if self.token.is_on_new_line { + if self.token.is_on_new_line() { return self.skip_multi_line_comment_after_line_break(self.source.position()); } @@ -120,7 +120,7 @@ impl<'a> Lexer<'a> { let next2 = unsafe { pos.add(1).read2() }; if matches!(next2, LS_BYTES_2_AND_3 | PS_BYTES_2_AND_3) { // Irregular line break - self.token.is_on_new_line = true; + self.token.set_is_on_new_line(); // Ideally we'd go on to `skip_multi_line_comment_after_line_break` here // but can't do that easily because can't use `return` in a closure. // But irregular line breaks are rare anyway. @@ -135,7 +135,7 @@ impl<'a> Lexer<'a> { } else { // Regular line break. // No need to look for more line breaks, so switch to faster search just for `*/`. - self.token.is_on_new_line = true; + self.token.set_is_on_new_line(); // SAFETY: Regular line breaks are ASCII, so skipping 1 byte is a UTF-8 char boundary. let after_line_break = unsafe { pos.add(1) }; return self.skip_multi_line_comment_after_line_break(after_line_break); @@ -184,7 +184,7 @@ impl<'a> Lexer<'a> { } self.consume_char(); } - self.token.is_on_new_line = true; + self.token.set_is_on_new_line(); Kind::HashbangComment } } diff --git a/crates/oxc_parser/src/lexer/jsx.rs b/crates/oxc_parser/src/lexer/jsx.rs index 10574d4b80c65..18de9c987cc58 100644 --- a/crates/oxc_parser/src/lexer/jsx.rs +++ b/crates/oxc_parser/src/lexer/jsx.rs @@ -100,7 +100,8 @@ impl Lexer<'_> { /// `IdentifierStart` /// `JSXIdentifier` `IdentifierPart` /// `JSXIdentifier` [no `WhiteSpace` or Comment here] - - pub(crate) fn continue_lex_jsx_identifier(&mut self) -> Option { + pub(crate) fn continue_lex_jsx_identifier(&mut self, start: u32) -> Option { + self.token.start = start; if self.peek_byte() != Some(b'-') { return None; } diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index 02397fdd2bda7..ed95cbf42e058 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -90,8 +90,16 @@ pub struct Lexer<'a> { /// `None` is saved when the string contains an invalid escape sequence. pub escaped_templates: FxHashMap>, + /// Ends of long tokens, indexed by [Token::start] when [Token::len] is [u16::MAX]. + long_token_ends: FxHashMap, + /// `memchr` Finder for end of multi-line comments. Created lazily when first used. multi_line_comment_end_finder: Option>, + + /// Flag indicating whether `self.token` is allowed to start at 0. + /// It's used in debug mode to check if `self.token.start` has been set when producing the next token. + #[cfg(debug_assertions)] + is_at_first_token: bool, } impl<'a> Lexer<'a> { @@ -120,7 +128,10 @@ impl<'a> Lexer<'a> { trivia_builder: TriviaBuilder::default(), escaped_strings: FxHashMap::default(), escaped_templates: FxHashMap::default(), + long_token_ends: FxHashMap::default(), multi_line_comment_end_finder: None, + #[cfg(debug_assertions)] + is_at_first_token: true, } } @@ -162,6 +173,10 @@ impl<'a> Lexer<'a> { pub fn rewind(&mut self, checkpoint: LexerCheckpoint<'a>) { self.errors.truncate(checkpoint.errors_pos); self.source.set_position(checkpoint.position); + #[cfg(debug_assertions)] + { + self.is_at_first_token = checkpoint.token.start == 0; + } self.token = checkpoint.token; self.lookahead.clear(); } @@ -216,14 +231,37 @@ impl<'a> Lexer<'a> { fn finish_next(&mut self, kind: Kind) -> Token { self.token.kind = kind; - self.token.end = self.offset(); - debug_assert!(self.token.start <= self.token.end); + let end = self.offset(); + self.token.set_end(end, &mut self.long_token_ends); + debug_assert!(self.token.start <= end); + #[cfg(debug_assertions)] + { + if !self.is_at_first_token { + assert_ne!( + self.token.start, 0, + "expect self.token.start to be set before producing {:?}", + self.token + ); + } + // The token starting at 0 can still be re-lexed starting at 0 (`/` to `/regex/`) + self.is_at_first_token = self.token.start == 0; + } let token = self.token; self.trivia_builder.handle_token(token); self.token = Token::default(); token } + #[inline] + pub fn token_end(&self, token: Token) -> u32 { + token.end(&self.long_token_ends) + } + + #[inline] + pub fn token_span(&self, token: Token) -> Span { + token.span(&self.long_token_ends) + } + // ---------- Private Methods ---------- // fn error(&mut self, error: OxcDiagnostic) { self.errors.push(error); diff --git a/crates/oxc_parser/src/lexer/punctuation.rs b/crates/oxc_parser/src/lexer/punctuation.rs index 6a834279c7f60..7af5d8a4368a2 100644 --- a/crates/oxc_parser/src/lexer/punctuation.rs +++ b/crates/oxc_parser/src/lexer/punctuation.rs @@ -41,7 +41,7 @@ impl Lexer<'_> { match self.peek_byte() { Some(b'-') => { self.consume_char(); - if self.token.is_on_new_line + if self.token.is_on_new_line() && self.source_type.is_script() && self.next_ascii_byte_eq(b'>') { @@ -59,6 +59,7 @@ impl Lexer<'_> { } pub(crate) fn next_right_angle(&mut self) -> Token { + self.token.start = self.offset() - 1; // include first `>` let kind = self.read_right_angle(); self.lookahead.clear(); self.finish_next(kind) diff --git a/crates/oxc_parser/src/lexer/string.rs b/crates/oxc_parser/src/lexer/string.rs index d8281fa238223..080b3eccb09c2 100644 --- a/crates/oxc_parser/src/lexer/string.rs +++ b/crates/oxc_parser/src/lexer/string.rs @@ -181,15 +181,15 @@ impl<'a> Lexer<'a> { return; } self.escaped_strings.insert(self.token.start, s); - self.token.escaped = true; + self.token.set_escaped(); } pub(crate) fn get_string(&self, token: Token) -> &'a str { - if token.escaped { + if token.escaped() { return self.escaped_strings[&token.start]; } - let raw = &self.source.whole()[token.start as usize..token.end as usize]; + let raw = &self.source.whole()[self.token_span(token)]; match token.kind { Kind::Str => { &raw[1..raw.len() - 1] // omit surrounding quotes diff --git a/crates/oxc_parser/src/lexer/template.rs b/crates/oxc_parser/src/lexer/template.rs index b850233e176d9..c311932bed7ea 100644 --- a/crates/oxc_parser/src/lexer/template.rs +++ b/crates/oxc_parser/src/lexer/template.rs @@ -320,14 +320,14 @@ impl<'a> Lexer<'a> { /// Save escaped template string fn save_template_string(&mut self, is_valid_escape_sequence: bool, s: &'a str) { self.escaped_templates.insert(self.token.start, is_valid_escape_sequence.then_some(s)); - self.token.escaped = true; + self.token.set_escaped(); } pub(crate) fn get_template_string(&self, token: Token) -> Option<&'a str> { - if token.escaped { + if token.escaped() { return self.escaped_templates[&token.start]; } - let raw = &self.source.whole()[token.start as usize..token.end as usize]; + let raw = &self.source.whole()[self.token_span(token)]; Some(match token.kind { Kind::NoSubstitutionTemplate | Kind::TemplateTail => { &raw[1..raw.len() - 1] // omit surrounding quotes or leading "}" and trailing "`" diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index 63e3a774b6e6c..e52e99e462e20 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -1,8 +1,33 @@ //! Token +use bitflags::bitflags; use oxc_span::Span; +use rustc_hash::FxHashMap; -use super::kind::Kind; +use super::{cold_branch, kind::Kind}; + +bitflags! { + #[derive(Debug, Clone, Copy, Default)] + struct TokenFlags: u8 { + /// Indicates the token is on a newline + const IsOnNewLine = 1 << 0; + + /// True if the identifier / string / template kinds has escaped strings. + /// The escaped strings are saved in [Lexer::escaped_strings] and [Lexer::escaped_templates] by + /// [Token::start]. + /// + /// [Lexer::escaped_strings]: [super::Lexer::escaped_strings] + /// [Lexer::escaped_templates]: [super::Lexer::escaped_templates] + const Escaped = 1 << 1; + + /// True if for numeric literal tokens that contain separator characters (`_`). + /// + /// Numeric literals are defined in Section 12.9.3 of the ECMAScript + /// standard and include [`Kind::Decimal`], [`Kind::Binary`], + /// [`Kind::Octal`], [`Kind::Hex`], etc. + const HasSeparator = 1 << 2; + } +} #[derive(Debug, Clone, Copy, Default)] pub struct Token { @@ -12,60 +37,90 @@ pub struct Token { /// Start offset in source pub start: u32, - /// End offset in source - pub end: u32, - - /// Indicates the token is on a newline - pub is_on_new_line: bool, - - /// True if the identifier / string / template kinds has escaped strings. - /// The escaped strings are saved in [Lexer::escaped_strings] and [Lexer::escaped_templates] by - /// [Token::start]. + /// Length of the token /// - /// [Lexer::escaped_strings]: [super::Lexer::escaped_strings] - /// [Lexer::escaped_templates]: [super::Lexer::escaped_templates] - pub escaped: bool, + /// [u16::MAX] is stored here if the token's length is greater than `u16::MAX` + /// (which is rare but can happen with large strings). + /// Actual ends of long tokens are stored in `lexer.long_token_ends`. + pub len: u16, - /// True if for numeric literal tokens that contain separator characters (`_`). - /// - /// Numeric literals are defined in Section 12.9.3 of the ECMAScript - /// standard and include [`Kind::Decimal`], [`Kind::Binary`], - /// [`Kind::Octal`], [`Kind::Hex`], etc. - has_separator: bool, - - // Padding to fill to 16 bytes. - // This makes copying a `Token` 1 x xmmword load & store, rather than 1 x dword + 1 x qword - // and `Token::default()` is 1 x xmmword store, rather than 1 x dword + 1 x qword. - _padding2: u32, + flags: TokenFlags, } impl Token { pub(super) fn new_on_new_line() -> Self { - Self { is_on_new_line: true, ..Self::default() } + Self { flags: TokenFlags::IsOnNewLine, ..Self::default() } + } + + #[inline] + pub fn span(&self, long_ends: &FxHashMap) -> Span { + Span::new(self.start, self.end(long_ends)) + } + + #[inline] + pub fn set_end(&mut self, end: u32, long_ends: &mut FxHashMap) { + if let Ok(len) = u16::try_from(end - self.start) { + self.len = len; + } else { + cold_branch(|| { + self.len = u16::MAX; + long_ends.insert(self.start, end); + }); + } } - pub fn span(&self) -> Span { - Span::new(self.start, self.end) + #[inline] + pub fn end(&self, long_ends: &FxHashMap) -> u32 { + #[allow(clippy::if_not_else)] + if self.len != u16::MAX { + self.start + u32::from(self.len) + } else { + cold_branch(|| { + long_ends.get(&self.start).copied().unwrap_or_else(|| + // The token's length happens to be exact `u16::MAX` + self.start + u32::from(u16::MAX)) + }) + } } + #[inline] pub fn escaped(&self) -> bool { - self.escaped + self.flags.contains(TokenFlags::Escaped) + } + #[inline] + pub fn set_escaped(&mut self) { + self.flags.insert(TokenFlags::Escaped); + } + + #[inline] + pub fn is_on_new_line(&self) -> bool { + self.flags.contains(TokenFlags::IsOnNewLine) + } + + #[inline] + pub fn set_is_on_new_line(&mut self) { + self.flags.insert(TokenFlags::IsOnNewLine); } #[inline] pub fn has_separator(&self) -> bool { - debug_assert!(!self.has_separator || self.kind.is_number()); - self.has_separator + let has_separator = self.flags.contains(TokenFlags::HasSeparator); + debug_assert!(!has_separator || self.kind.is_number()); + has_separator } pub(crate) fn set_has_separator(&mut self) { - debug_assert!(!self.has_separator || self.kind.is_number() || self.kind == Kind::default()); - self.has_separator = true; + debug_assert!( + !self.flags.contains(TokenFlags::HasSeparator) + || self.kind.is_number() + || self.kind == Kind::default() + ); + self.flags.insert(TokenFlags::HasSeparator); } } #[cfg(test)] mod size_asserts { use super::Token; - const _: () = assert!(std::mem::size_of::() == 16); + const _: () = assert!(std::mem::size_of::() == 8); } diff --git a/crates/oxc_parser/src/lexer/unicode.rs b/crates/oxc_parser/src/lexer/unicode.rs index bfa894080b443..4df37bb71d649 100644 --- a/crates/oxc_parser/src/lexer/unicode.rs +++ b/crates/oxc_parser/src/lexer/unicode.rs @@ -34,7 +34,7 @@ impl<'a> Lexer<'a> { } c if is_irregular_line_terminator(c) => { self.consume_char(); - self.token.is_on_new_line = true; + self.token.set_is_on_new_line(); self.trivia_builder.add_irregular_whitespace(self.token.start, self.offset()); Kind::Skip } diff --git a/crates/oxc_parser/src/lexer/whitespace.rs b/crates/oxc_parser/src/lexer/whitespace.rs index ddd2dfa4c6554..43fbca0cb60e8 100644 --- a/crates/oxc_parser/src/lexer/whitespace.rs +++ b/crates/oxc_parser/src/lexer/whitespace.rs @@ -8,7 +8,7 @@ static NOT_REGULAR_WHITESPACE_OR_LINE_BREAK_TABLE: SafeByteMatchTable = impl Lexer<'_> { pub(super) fn line_break_handler(&mut self) -> Kind { - self.token.is_on_new_line = true; + self.token.set_is_on_new_line(); self.trivia_builder.handle_newline(); // Indentation is common after a line break. diff --git a/crates/oxc_parser/src/lib.rs b/crates/oxc_parser/src/lib.rs index e5349ac6bbc8f..f9e77213c7965 100644 --- a/crates/oxc_parser/src/lib.rs +++ b/crates/oxc_parser/src/lib.rs @@ -560,7 +560,7 @@ impl<'a> ParserImpl<'a> { return error; } } - diagnostics::unexpected_token(self.cur_token().span()) + diagnostics::unexpected_token(self.cur_token_span()) } /// Push a Syntax Error diff --git a/crates/oxc_parser/src/modifiers.rs b/crates/oxc_parser/src/modifiers.rs index 972fa512e9cb8..6502fc530d0b6 100644 --- a/crates/oxc_parser/src/modifiers.rs +++ b/crates/oxc_parser/src/modifiers.rs @@ -6,7 +6,7 @@ use oxc_span::{GetSpan, Span, SPAN}; use crate::{ diagnostics, - lexer::{Kind, Token}, + lexer::{Kind, Lexer, Token}, ParserImpl, }; @@ -108,12 +108,13 @@ impl Modifier { pub fn is_static(&self) -> bool { matches!(self.kind, ModifierKind::Static) } -} -impl TryFrom for Modifier { - type Error = >::Error; - fn try_from(tok: Token) -> std::result::Result { - ModifierKind::try_from(tok.kind).map(|kind| Self { span: tok.span(), kind }) + #[inline] + pub fn try_from_token( + tok: Token, + lexer: &Lexer, + ) -> std::result::Result>::Error> { + ModifierKind::try_from(tok.kind).map(|kind| Self { span: lexer.token_span(tok), kind }) } } @@ -338,7 +339,7 @@ impl<'a> ParserImpl<'a> { // Rest modifiers cannot cross line _ => { self.bump_any(); - self.can_follow_modifier() && !self.cur_token().is_on_new_line + self.can_follow_modifier() && !self.cur_token().is_on_new_line() } } } @@ -482,7 +483,7 @@ impl<'a> ParserImpl<'a> { fn next_token_is_on_same_line_and_can_follow_modifier(&mut self) -> bool { self.bump_any(); - if self.cur_token().is_on_new_line { + if self.cur_token().is_on_new_line() { return false; } self.can_follow_modifier() @@ -525,12 +526,12 @@ impl<'a> ParserImpl<'a> { fn next_token_is_class_keyword_on_same_line(&mut self) -> bool { self.bump_any(); - self.cur_kind() == Kind::Class && !self.cur_token().is_on_new_line + self.cur_kind() == Kind::Class && !self.cur_token().is_on_new_line() } fn next_token_is_function_keyword_on_same_line(&mut self) -> bool { self.bump_any(); - self.cur_kind() == Kind::Function && !self.cur_token().is_on_new_line + self.cur_kind() == Kind::Function && !self.cur_token().is_on_new_line() } fn check_for_duplicate_modifiers(&mut self, seen_flags: ModifierFlags, modifier: &Modifier) { diff --git a/crates/oxc_parser/src/ts/statement.rs b/crates/oxc_parser/src/ts/statement.rs index 1e0d5b8e9b9e0..02daa18ffae04 100644 --- a/crates/oxc_parser/src/ts/statement.rs +++ b/crates/oxc_parser/src/ts/statement.rs @@ -84,10 +84,10 @@ impl<'a> ParserImpl<'a> { expr => Err(diagnostics::computed_property_names_not_allowed_in_enums(expr.span())), }, Kind::NoSubstitutionTemplate | Kind::TemplateHead => Err( - diagnostics::computed_property_names_not_allowed_in_enums(self.cur_token().span()), + diagnostics::computed_property_names_not_allowed_in_enums(self.cur_token_span()), ), kind if kind.is_number() => { - Err(diagnostics::enum_member_cannot_have_numeric_name(self.cur_token().span())) + Err(diagnostics::enum_member_cannot_have_numeric_name(self.cur_token_span())) } _ => { let ident_name = self.parse_identifier_name()?; @@ -182,7 +182,7 @@ impl<'a> ParserImpl<'a> { } pub(crate) fn is_at_interface_declaration(&mut self) -> bool { - if !self.at(Kind::Interface) || self.peek_token().is_on_new_line { + if !self.at(Kind::Interface) || self.peek_token().is_on_new_line() { false } else { self.peek_token().kind.is_binding_identifier() || self.peek_at(Kind::LCurly) @@ -251,7 +251,7 @@ impl<'a> ParserImpl<'a> { let next = self.nth(n + 1); - if next.is_on_new_line { + if next.is_on_new_line() { false } else { let followed_by_any_member = @@ -502,11 +502,11 @@ impl<'a> ParserImpl<'a> { Kind::Interface | Kind::Type => { self.bump_any(); return self.cur_kind().is_binding_identifier() - && !self.cur_token().is_on_new_line; + && !self.cur_token().is_on_new_line(); } Kind::Module | Kind::Namespace => { self.bump_any(); - return !self.cur_token().is_on_new_line + return !self.cur_token().is_on_new_line() && (self.cur_kind().is_binding_identifier() || self.cur_kind() == Kind::Str); } @@ -519,7 +519,7 @@ impl<'a> ParserImpl<'a> { | Kind::Public | Kind::Readonly => { self.bump_any(); - if self.cur_token().is_on_new_line { + if self.cur_token().is_on_new_line() { return false; } } diff --git a/crates/oxc_parser/src/ts/types.rs b/crates/oxc_parser/src/ts/types.rs index 85dcf1f6fffa4..c209538838826 100644 --- a/crates/oxc_parser/src/ts/types.rs +++ b/crates/oxc_parser/src/ts/types.rs @@ -19,7 +19,7 @@ impl<'a> ParserImpl<'a> { let span = self.start_span(); let ty = self.parse_union_type_or_higher()?; if !self.ctx.has_disallow_conditional_types() - && !self.cur_token().is_on_new_line + && !self.cur_token().is_on_new_line() && self.eat(Kind::Extends) { let extends_type = self.context( @@ -284,7 +284,7 @@ impl<'a> ParserImpl<'a> { let span = self.start_span(); let mut ty = self.parse_non_array_type()?; - while !self.cur_token().is_on_new_line { + while !self.cur_token().is_on_new_line() { match self.cur_kind() { Kind::Bang => { self.bump_any(); @@ -383,7 +383,7 @@ impl<'a> ParserImpl<'a> { let span = self.start_span(); self.bump_any(); // bump `this` let this_type = self.ast.ts_this_type(self.end_span(span)); - if self.peek_at(Kind::Is) && !self.peek_token().is_on_new_line { + if self.peek_at(Kind::Is) && !self.peek_token().is_on_new_line() { return self.parse_this_type_predicate(this_type); } Ok(TSType::TSThisType(self.alloc(this_type))) @@ -407,7 +407,7 @@ impl<'a> ParserImpl<'a> { Kind::Import => self.parse_ts_import_type(), Kind::Asserts => { let peek_token = self.peek_token(); - if peek_token.kind.is_identifier_name() && !peek_token.is_on_new_line { + if peek_token.kind.is_identifier_name() && !peek_token.is_on_new_line() { self.parse_asserts_type_predicate() } else { self.parse_type_reference() @@ -647,7 +647,7 @@ impl<'a> ParserImpl<'a> { let entity_name = self.parse_ts_type_name()?; // TODO: parseEntityName let entity_name = TSTypeQueryExprName::from(entity_name); let type_arguments = - if self.cur_token().is_on_new_line { None } else { self.try_parse_type_arguments()? }; + if self.cur_token().is_on_new_line() { None } else { self.try_parse_type_arguments()? }; Ok(self.ast.ts_type_type_query(self.end_span(span), entity_name, type_arguments)) } @@ -797,7 +797,7 @@ impl<'a> ParserImpl<'a> { &mut self, ) -> Result>>> { self.re_lex_l_angle(); - if !self.cur_token().is_on_new_line && self.re_lex_l_angle() == Kind::LAngle { + if !self.cur_token().is_on_new_line() && self.re_lex_l_angle() == Kind::LAngle { let span = self.start_span(); self.expect(Kind::LAngle)?; let params = self.parse_delimited_list( @@ -850,7 +850,7 @@ impl<'a> ParserImpl<'a> { Kind::LParen | Kind::NoSubstitutionTemplate | Kind::TemplateHead => true, Kind::LAngle | Kind::RAngle | Kind::Plus | Kind::Minus => false, _ => { - self.cur_token().is_on_new_line + self.cur_token().is_on_new_line() || self.is_binary_operator() || !self.is_start_of_expression() } @@ -1005,7 +1005,7 @@ impl<'a> ParserImpl<'a> { let span = self.start_span(); self.expect(Kind::LCurly)?; let attributes_keyword = match self.cur_kind() { - Kind::Assert if !self.cur_token().is_on_new_line => self.parse_identifier_name()?, + Kind::Assert if !self.cur_token().is_on_new_line() => self.parse_identifier_name()?, Kind::With => self.parse_identifier_name()?, _ => { return Err(self.unexpected()); @@ -1127,7 +1127,7 @@ impl<'a> ParserImpl<'a> { fn parse_type_predicate_prefix(&mut self) -> Result> { let id = self.parse_identifier_name()?; let token = self.cur_token(); - if token.kind == Kind::Is && !token.is_on_new_line { + if token.kind == Kind::Is && !token.is_on_new_line() { self.bump_any(); return Ok(id); } @@ -1333,7 +1333,7 @@ impl<'a> ParserImpl<'a> { #[allow(clippy::unnecessary_fallible_conversions)] if let Ok(kind) = ModifierKind::try_from(self.cur_kind()) { - let modifier = Modifier { kind, span: self.cur_token().span() }; + let modifier = Modifier { kind, span: self.cur_token_span() }; flags.set(kind.into(), true); modifiers.push(modifier); } else {