diff --git a/core/engine/src/builtins/date/tests.rs b/core/engine/src/builtins/date/tests.rs index 7cd287352d9..392394a1a50 100644 --- a/core/engine/src/builtins/date/tests.rs +++ b/core/engine/src/builtins/date/tests.rs @@ -1,4 +1,7 @@ -use crate::{js_string, run_test_actions, JsNativeErrorKind, TestAction}; +use crate::{ + builtins::date::utils::fast_atoi::{process_4, process_8}, + js_string, run_test_actions, JsNativeErrorKind, TestAction, +}; use boa_macros::js_str; use indoc::indoc; use time::{macros::format_description, OffsetDateTime}; @@ -72,6 +75,28 @@ fn timestamp_from_utc( t.unix_timestamp() * 1000 + i64::from(t.millisecond()) } +#[test] +fn parse_ascii_digits() { + let parse_8_ascii_digits = |val: &[u8; 8], len: usize| -> u64 { + let val = u64::from_le_bytes(*val); + process_8(val, len) + }; + assert_eq!(12_345_678, parse_8_ascii_digits(b"12345678", 8)); + assert_eq!(123_456, parse_8_ascii_digits(b"123456xx", 6)); + assert_eq!(123, parse_8_ascii_digits(b"123xxxxx", 3)); + assert_eq!(123, parse_8_ascii_digits(b"000123xx", 6)); + assert_eq!(0, parse_8_ascii_digits(b"00000000", 8)); + let parse_4_ascii_digits = |val: &[u8; 4], len: usize| -> u64 { + let val = u32::from_le_bytes(*val); + u64::from(process_4(val, len)) + }; + assert_eq!(1234, parse_4_ascii_digits(b"1234", 4)); + assert_eq!(12, parse_4_ascii_digits(b"12xx", 2)); + assert_eq!(3, parse_4_ascii_digits(b"003x", 3)); + assert_eq!(23, parse_4_ascii_digits(b"023x", 3)); + assert_eq!(0, parse_4_ascii_digits(b"0000", 8)); +} + #[test] fn date_this_time_value() { run_test_actions([TestAction::assert_native_error( diff --git a/core/engine/src/builtins/date/utils.rs b/core/engine/src/builtins/date/utils.rs index b8c8f5ebaaa..9d25feae0e5 100644 --- a/core/engine/src/builtins/date/utils.rs +++ b/core/engine/src/builtins/date/utils.rs @@ -1,6 +1,9 @@ use crate::{context::HostHooks, js_string, value::IntegerOrInfinity, JsStr, JsString}; use boa_macros::js_str; -use std::{iter::Peekable, str::Chars}; +use boa_string::JsStrVariant; +use std::iter::Peekable; +use std::slice::Iter; +use std::str; use time::{macros::format_description, OffsetDateTime, PrimitiveDateTime}; // Time-related Constants @@ -750,22 +753,37 @@ pub(super) fn pad_six(t: u32, output: &mut [u8; 6]) -> JsStr<'_> { /// [spec-format]: https://tc39.es/ecma262/#sec-date-time-string-format pub(super) fn parse_date(date: &JsString, hooks: &dyn HostHooks) -> Option { // All characters must be ASCII so we can return early if we find a non-ASCII character. - let Ok(date) = date.to_std_string() else { - return None; + let owned_js_str = date.as_str(); + let owned_string: String; + let date = match owned_js_str.variant() { + JsStrVariant::Latin1(s) => { + if !s.is_ascii() { + return None; + } + // SAFETY: Since all characters are ASCII we can safely convert this into str. + unsafe { str::from_utf8_unchecked(s) } + } + JsStrVariant::Utf16(s) => { + owned_string = String::from_utf16(s).ok()?; + if !owned_string.is_ascii() { + return None; + } + owned_string.as_str() + } }; // Date Time String Format: 'YYYY-MM-DDTHH:mm:ss.sssZ' - if let Some(dt) = DateParser::new(&date, hooks).parse() { + if let Some(dt) = DateParser::new(date, hooks).parse() { return Some(dt); } // `toString` format: `Thu Jan 01 1970 00:00:00 GMT+0000` - if let Ok(t) = OffsetDateTime::parse(&date, &format_description!("[weekday repr:short] [month repr:short] [day] [year] [hour]:[minute]:[second] GMT[offset_hour sign:mandatory][offset_minute][end]")) { + if let Ok(t) = OffsetDateTime::parse(date, &format_description!("[weekday repr:short] [month repr:short] [day] [year] [hour]:[minute]:[second] GMT[offset_hour sign:mandatory][offset_minute][end]")) { return Some(t.unix_timestamp() * 1000 + i64::from(t.millisecond())); } // `toUTCString` format: `Thu, 01 Jan 1970 00:00:00 GMT` - if let Ok(t) = PrimitiveDateTime::parse(&date, &format_description!("[weekday repr:short], [day] [month repr:short] [year] [hour]:[minute]:[second] GMT[end]")) { + if let Ok(t) = PrimitiveDateTime::parse(date, &format_description!("[weekday repr:short], [day] [month repr:short] [year] [hour]:[minute]:[second] GMT[end]")) { let t = t.assume_utc(); return Some(t.unix_timestamp() * 1000 + i64::from(t.millisecond())); } @@ -778,7 +796,7 @@ pub(super) fn parse_date(date: &JsString, hooks: &dyn HostHooks) -> Option /// [spec]: https://tc39.es/ecma262/#sec-date-time-string-format struct DateParser<'a> { hooks: &'a dyn HostHooks, - input: Peekable>, + input: Peekable>, year: i32, month: u32, day: u32, @@ -789,11 +807,32 @@ struct DateParser<'a> { offset: i64, } +// Copied from https://github.com/RoDmitry/atoi_simd/blob/master/src/fallback.rs, +// which is based on https://rust-malaysia.github.io/code/2020/07/11/faster-integer-parsing.html. +#[doc(hidden)] +#[allow(clippy::inline_always)] +pub(in crate::builtins::date) mod fast_atoi { + #[inline(always)] + pub(in crate::builtins::date) const fn process_8(mut val: u64, len: usize) -> u64 { + val <<= 64_usize.saturating_sub(len << 3); // << 3 - same as mult by 8 + val = (val & 0x0F0F_0F0F_0F0F_0F0F).wrapping_mul(0xA01) >> 8; + val = (val & 0x00FF_00FF_00FF_00FF).wrapping_mul(0x64_0001) >> 16; + (val & 0x0000_FFFF_0000_FFFF).wrapping_mul(0x2710_0000_0001) >> 32 + } + + #[inline(always)] + pub(in crate::builtins::date) const fn process_4(mut val: u32, len: usize) -> u32 { + val <<= 32_usize.saturating_sub(len << 3); // << 3 - same as mult by 8 + val = (val & 0x0F0F_0F0F).wrapping_mul(0xA01) >> 8; + (val & 0x00FF_00FF).wrapping_mul(0x64_0001) >> 16 + } +} + impl<'a> DateParser<'a> { fn new(s: &'a str, hooks: &'a dyn HostHooks) -> Self { Self { hooks, - input: s.chars().peekable(), + input: s.as_bytes().iter().peekable(), year: 0, month: 1, day: 1, @@ -805,20 +844,56 @@ impl<'a> DateParser<'a> { } } - fn next_expect(&mut self, expect: char) -> Option<()> { + fn next_expect(&mut self, expect: u8) -> Option<()> { self.input .next() - .and_then(|c| if c == expect { Some(()) } else { None }) + .and_then(|c| if *c == expect { Some(()) } else { None }) } - fn next_digit(&mut self) -> Option { - self.input.next().and_then(|c| { - if c.is_ascii_digit() { - Some((u32::from(c) - u32::from('0')) as u8) - } else { - None + fn next_ascii_digit(&mut self) -> Option { + self.input + .next() + .and_then(|c| if c.is_ascii_digit() { Some(*c) } else { None }) + } + + fn next_n_ascii_digits(&mut self) -> Option<[u8; N]> { + let mut digits = [0; N]; + for digit in &mut digits { + *digit = self.next_ascii_digit()?; + } + Some(digits) + } + + fn parse_n_ascii_digits(&mut self) -> Option { + assert!(N <= 8, "parse_n_ascii_digits parses no more than 8 digits"); + if N == 0 { + return None; + } + let ascii_digits = self.next_n_ascii_digits::()?; + match N { + 1..4 => { + // When N is small, process digits naively. + let mut res = 0; + for digit in ascii_digits { + res = res * 10 + u64::from(digit & 0xF); + } + Some(res) } - }) + 4 => { + // Process digits as an u32 block. + let mut src = [0; 4]; + src[..N].copy_from_slice(&ascii_digits); + let val = u32::from_le_bytes(src); + Some(u64::from(fast_atoi::process_4(val, N))) + } + _ => { + // Process digits as an u64 block. + let mut src = [0; 8]; + src[..N].copy_from_slice(&ascii_digits); + let val = u64::from_le_bytes(src); + Some(fast_atoi::process_8(val, N)) + } + } } fn finish(&mut self) -> Option { @@ -869,105 +944,85 @@ impl<'a> DateParser<'a> { } } + #[allow(clippy::as_conversions)] fn parse(&mut self) -> Option { self.parse_year()?; match self.input.peek() { - Some('T') => return self.parse_time(), + Some(b'T') => return self.parse_time(), None => return self.finish(), _ => {} } - self.next_expect('-')?; - self.month = u32::from(self.next_digit()?) * 10 + u32::from(self.next_digit()?); + self.next_expect(b'-')?; + self.month = self.parse_n_ascii_digits::<2>()? as u32; if self.month < 1 || self.month > 12 { return None; } match self.input.peek() { - Some('T') => return self.parse_time(), + Some(b'T') => return self.parse_time(), None => return self.finish(), _ => {} } - self.next_expect('-')?; - self.day = u32::from(self.next_digit()?) * 10 + u32::from(self.next_digit()?); + self.next_expect(b'-')?; + self.day = self.parse_n_ascii_digits::<2>()? as u32; if self.day < 1 || self.day > 31 { return None; } match self.input.peek() { - Some('T') => self.parse_time(), + Some(b'T') => self.parse_time(), _ => self.finish(), } } + #[allow(clippy::as_conversions)] fn parse_year(&mut self) -> Option<()> { - match self.input.next()? { - '+' => { - self.year = i32::from(self.next_digit()?) * 100_000 - + i32::from(self.next_digit()?) * 10000 - + i32::from(self.next_digit()?) * 1000 - + i32::from(self.next_digit()?) * 100 - + i32::from(self.next_digit()?) * 10 - + i32::from(self.next_digit()?); - Some(()) - } - '-' => { - let year = i32::from(self.next_digit()?) * 100_000 - + i32::from(self.next_digit()?) * 10000 - + i32::from(self.next_digit()?) * 1000 - + i32::from(self.next_digit()?) * 100 - + i32::from(self.next_digit()?) * 10 - + i32::from(self.next_digit()?); - if year == 0 { - return None; - } - self.year = -year; - Some(()) + if let &&sign @ (b'+' | b'-') = self.input.peek()? { + // Consume the sign. + self.input.next(); + let year = self.parse_n_ascii_digits::<6>()? as i32; + let neg = sign == b'-'; + if neg && year == 0 { + return None; } - c if c.is_ascii_digit() => { - self.year = i32::from((u32::from(c) - u32::from('0')) as u8) * 1000 - + i32::from(self.next_digit()?) * 100 - + i32::from(self.next_digit()?) * 10 - + i32::from(self.next_digit()?); - Some(()) - } - _ => None, + self.year = if neg { -year } else { year }; + } else { + self.year = self.parse_n_ascii_digits::<4>()? as i32; } + Some(()) } + #[allow(clippy::as_conversions)] fn parse_time(&mut self) -> Option { - self.next_expect('T')?; - self.hour = u32::from(self.next_digit()?) * 10 + u32::from(self.next_digit()?); + self.next_expect(b'T')?; + self.hour = self.parse_n_ascii_digits::<2>()? as u32; if self.hour > 24 { return None; } - self.next_expect(':')?; - self.minute = u32::from(self.next_digit()?) * 10 + u32::from(self.next_digit()?); + self.next_expect(b':')?; + self.minute = self.parse_n_ascii_digits::<2>()? as u32; if self.minute > 59 { return None; } match self.input.peek() { - Some(':') => {} + Some(b':') => self.input.next(), None => return self.finish_local(), _ => { self.parse_timezone()?; return self.finish(); } - } - self.next_expect(':')?; - self.second = u32::from(self.next_digit()?) * 10 + u32::from(self.next_digit()?); + }; + self.second = self.parse_n_ascii_digits::<2>()? as u32; if self.second > 59 { return None; } match self.input.peek() { - Some('.') => {} + Some(b'.') => self.input.next(), None => return self.finish_local(), _ => { self.parse_timezone()?; return self.finish(); } - } - self.next_expect('.')?; - self.millisecond = u32::from(self.next_digit()?) * 100 - + u32::from(self.next_digit()?) * 10 - + u32::from(self.next_digit()?); + }; + self.millisecond = self.parse_n_ascii_digits::<3>()? as u32; if self.input.peek().is_some() { self.parse_timezone()?; self.finish() @@ -976,44 +1031,26 @@ impl<'a> DateParser<'a> { } } + #[allow(clippy::as_conversions)] fn parse_timezone(&mut self) -> Option<()> { match self.input.next() { - Some('Z') => return Some(()), - Some('+') => { - let offset_hour = - i64::from(self.next_digit()?) * 10 + i64::from(self.next_digit()?); - if offset_hour > 23 { - return None; - } - self.offset = -offset_hour * 60; - if self.input.peek().is_none() { - return Some(()); - } - self.next_expect(':')?; - let offset_minute = - i64::from(self.next_digit()?) * 10 + i64::from(self.next_digit()?); - if offset_minute > 59 { - return None; - } - self.offset += -offset_minute; - } - Some('-') => { - let offset_hour = - i64::from(self.next_digit()?) * 10 + i64::from(self.next_digit()?); + Some(b'Z') => return Some(()), + Some(sign @ (b'+' | b'-')) => { + let neg = *sign == b'-'; + let offset_hour = self.parse_n_ascii_digits::<2>()? as i64; if offset_hour > 23 { return None; } - self.offset = offset_hour * 60; + self.offset = if neg { offset_hour } else { -offset_hour } * 60; if self.input.peek().is_none() { return Some(()); } - self.next_expect(':')?; - let offset_minute = - i64::from(self.next_digit()?) * 10 + i64::from(self.next_digit()?); + self.next_expect(b':')?; + let offset_minute = self.parse_n_ascii_digits::<2>()? as i64; if offset_minute > 59 { return None; } - self.offset += offset_minute; + self.offset += if neg { offset_minute } else { -offset_minute }; } _ => return None, }