From 5add5d79648df80b610f9c92e5b89d7eeb609ac0 Mon Sep 17 00:00:00 2001 From: CrazyboyQCD Date: Fri, 24 Jan 2025 11:34:55 +0800 Subject: [PATCH 1/7] chore: avoid allocation if string is ascii --- core/engine/src/builtins/date/utils.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/core/engine/src/builtins/date/utils.rs b/core/engine/src/builtins/date/utils.rs index b8c8f5ebaaa..2e0e32477fa 100644 --- a/core/engine/src/builtins/date/utils.rs +++ b/core/engine/src/builtins/date/utils.rs @@ -1,6 +1,9 @@ use crate::{context::HostHooks, js_string, value::IntegerOrInfinity, JsStr, JsString}; use boa_macros::js_str; -use std::{iter::Peekable, str::Chars}; +use boa_string::JsStrVariant; +use std::iter::Peekable; +use std::slice::Iter; +use std::str; use time::{macros::format_description, OffsetDateTime, PrimitiveDateTime}; // Time-related Constants @@ -750,8 +753,16 @@ pub(super) fn pad_six(t: u32, output: &mut [u8; 6]) -> JsStr<'_> { /// [spec-format]: https://tc39.es/ecma262/#sec-date-time-string-format pub(super) fn parse_date(date: &JsString, hooks: &dyn HostHooks) -> Option { // All characters must be ASCII so we can return early if we find a non-ASCII character. - let Ok(date) = date.to_std_string() else { - return None; + let owned_js_str = date.as_str(); + let owned_string: String; + let date = match owned_js_str.variant() { + JsStrVariant::Latin1(s) => + // SAFETY: Since all characters are ASCII we can safely convert this into str. + unsafe { str::from_utf8_unchecked(s) }, + JsStrVariant::Utf16(s) => { + owned_string = String::from_utf16(s).ok()?; + owned_string.as_str() + } }; // Date Time String Format: 'YYYY-MM-DDTHH:mm:ss.sssZ' From 44975275bddeade299ca9185df45eca1f1a1ad16 Mon Sep 17 00:00:00 2001 From: CrazyboyQCD Date: Fri, 24 Jan 2025 11:54:46 +0800 Subject: [PATCH 2/7] perf: replace `Chars<'_>` with `slice::Iter<'_, u8>` and add `next_n_digits` helper method --- core/engine/src/builtins/date/utils.rs | 148 +++++++++++++------------ 1 file changed, 77 insertions(+), 71 deletions(-) diff --git a/core/engine/src/builtins/date/utils.rs b/core/engine/src/builtins/date/utils.rs index 2e0e32477fa..0282063c589 100644 --- a/core/engine/src/builtins/date/utils.rs +++ b/core/engine/src/builtins/date/utils.rs @@ -761,6 +761,9 @@ pub(super) fn parse_date(date: &JsString, hooks: &dyn HostHooks) -> Option unsafe { str::from_utf8_unchecked(s) }, JsStrVariant::Utf16(s) => { owned_string = String::from_utf16(s).ok()?; + if !owned_string.is_ascii() { + return None; + } owned_string.as_str() } }; @@ -789,7 +792,7 @@ pub(super) fn parse_date(date: &JsString, hooks: &dyn HostHooks) -> Option /// [spec]: https://tc39.es/ecma262/#sec-date-time-string-format struct DateParser<'a> { hooks: &'a dyn HostHooks, - input: Peekable>, + input: Peekable>, year: i32, month: u32, day: u32, @@ -804,7 +807,7 @@ impl<'a> DateParser<'a> { fn new(s: &'a str, hooks: &'a dyn HostHooks) -> Self { Self { hooks, - input: s.chars().peekable(), + input: s.as_bytes().iter().peekable(), year: 0, month: 1, day: 1, @@ -816,22 +819,39 @@ impl<'a> DateParser<'a> { } } - fn next_expect(&mut self, expect: char) -> Option<()> { + fn next_expect(&mut self, expect: u8) -> Option<()> { self.input .next() - .and_then(|c| if c == expect { Some(()) } else { None }) + .and_then(|c| if *c == expect { Some(()) } else { None }) } + #[allow(unused)] fn next_digit(&mut self) -> Option { self.input.next().and_then(|c| { if c.is_ascii_digit() { - Some((u32::from(c) - u32::from('0')) as u8) + Some(c - b'0') } else { None } }) } + fn next_n_digits(&mut self) -> Option<[u8; N]> { + if self.input.len() < N { + return None; + } + let mut res = [0; N]; + for i in 0..N { + // SAFETY: Bound check has been done above. + let c = unsafe { *self.input.next().unwrap_unchecked() }; + if !c.is_ascii_digit() { + return None; + } + res[i] = c - b'0'; + } + Some(res) + } + fn finish(&mut self) -> Option { if self.input.peek().is_some() { return None; @@ -883,60 +903,54 @@ impl<'a> DateParser<'a> { fn parse(&mut self) -> Option { self.parse_year()?; match self.input.peek() { - Some('T') => return self.parse_time(), + Some(b'T') => return self.parse_time(), None => return self.finish(), _ => {} } - self.next_expect('-')?; - self.month = u32::from(self.next_digit()?) * 10 + u32::from(self.next_digit()?); + self.next_expect(b'-')?; + let month_digits = self.next_n_digits::<2>()?; + self.month = u32::from(month_digits[0] * 10 + month_digits[1]); if self.month < 1 || self.month > 12 { return None; } match self.input.peek() { - Some('T') => return self.parse_time(), + Some(b'T') => return self.parse_time(), None => return self.finish(), _ => {} } - self.next_expect('-')?; - self.day = u32::from(self.next_digit()?) * 10 + u32::from(self.next_digit()?); + self.next_expect(b'-')?; + let day_digits = self.next_n_digits::<2>()?; + self.day = u32::from(day_digits[0] * 10 + day_digits[1]); if self.day < 1 || self.day > 31 { return None; } match self.input.peek() { - Some('T') => self.parse_time(), + Some(b'T') => self.parse_time(), _ => self.finish(), } } fn parse_year(&mut self) -> Option<()> { match self.input.next()? { - '+' => { - self.year = i32::from(self.next_digit()?) * 100_000 - + i32::from(self.next_digit()?) * 10000 - + i32::from(self.next_digit()?) * 1000 - + i32::from(self.next_digit()?) * 100 - + i32::from(self.next_digit()?) * 10 - + i32::from(self.next_digit()?); - Some(()) - } - '-' => { - let year = i32::from(self.next_digit()?) * 100_000 - + i32::from(self.next_digit()?) * 10000 - + i32::from(self.next_digit()?) * 1000 - + i32::from(self.next_digit()?) * 100 - + i32::from(self.next_digit()?) * 10 - + i32::from(self.next_digit()?); - if year == 0 { + &b @ (b'+' | b'-') => { + let digits = self.next_n_digits::<6>()?.map(i32::from); + let year = digits[0] * 100_000 + + digits[1] * 10000 + + digits[2] * 1000 + + digits[3] * 100 + + digits[4] * 10 + + digits[5]; + let neg = b == b'-'; + if neg && year == 0 { return None; } - self.year = -year; + self.year = if neg { -year } else { year }; Some(()) } c if c.is_ascii_digit() => { - self.year = i32::from((u32::from(c) - u32::from('0')) as u8) * 1000 - + i32::from(self.next_digit()?) * 100 - + i32::from(self.next_digit()?) * 10 - + i32::from(self.next_digit()?); + let digits = self.next_n_digits::<3>()?.map(i32::from); + self.year = + i32::from(c - b'0') * 1000 + digits[0] * 100 + digits[1] * 10 + digits[2]; Some(()) } _ => None, @@ -944,41 +958,46 @@ impl<'a> DateParser<'a> { } fn parse_time(&mut self) -> Option { - self.next_expect('T')?; - self.hour = u32::from(self.next_digit()?) * 10 + u32::from(self.next_digit()?); + self.next_expect(b'T')?; + let hour_digits = self.next_n_digits::<2>()?; + self.hour = u32::from(hour_digits[0] * 10 + hour_digits[1]); if self.hour > 24 { return None; } - self.next_expect(':')?; - self.minute = u32::from(self.next_digit()?) * 10 + u32::from(self.next_digit()?); + self.next_expect(b':')?; + let minute_digits = self.next_n_digits::<2>()?; + self.minute = u32::from(minute_digits[0] * 10 + minute_digits[1]); if self.minute > 59 { return None; } match self.input.peek() { - Some(':') => {} + Some(b':') => { + self.input.next(); + } None => return self.finish_local(), _ => { self.parse_timezone()?; return self.finish(); } } - self.next_expect(':')?; - self.second = u32::from(self.next_digit()?) * 10 + u32::from(self.next_digit()?); + let second_digits = self.next_n_digits::<2>()?; + self.second = u32::from(second_digits[0] * 10 + second_digits[1]); if self.second > 59 { return None; } match self.input.peek() { - Some('.') => {} + Some(b'.') => { + self.input.next(); + } None => return self.finish_local(), _ => { self.parse_timezone()?; return self.finish(); } } - self.next_expect('.')?; - self.millisecond = u32::from(self.next_digit()?) * 100 - + u32::from(self.next_digit()?) * 10 - + u32::from(self.next_digit()?); + let millisecond_digits = self.next_n_digits::<3>()?.map(u32::from); + self.millisecond = + millisecond_digits[0] * 100 + millisecond_digits[1] * 10 + millisecond_digits[2]; if self.input.peek().is_some() { self.parse_timezone()?; self.finish() @@ -989,10 +1008,10 @@ impl<'a> DateParser<'a> { fn parse_timezone(&mut self) -> Option<()> { match self.input.next() { - Some('Z') => return Some(()), - Some('+') => { - let offset_hour = - i64::from(self.next_digit()?) * 10 + i64::from(self.next_digit()?); + Some(b'Z') => return Some(()), + Some(&b @ (b'+' | b'-')) => { + let offset_hour_digits = self.next_n_digits::<2>()?; + let offset_hour = i64::from(offset_hour_digits[0] * 10 + offset_hour_digits[1]); if offset_hour > 23 { return None; } @@ -1000,31 +1019,18 @@ impl<'a> DateParser<'a> { if self.input.peek().is_none() { return Some(()); } - self.next_expect(':')?; - let offset_minute = - i64::from(self.next_digit()?) * 10 + i64::from(self.next_digit()?); - if offset_minute > 59 { - return None; - } - self.offset += -offset_minute; - } - Some('-') => { - let offset_hour = - i64::from(self.next_digit()?) * 10 + i64::from(self.next_digit()?); - if offset_hour > 23 { - return None; - } - self.offset = offset_hour * 60; - if self.input.peek().is_none() { - return Some(()); - } - self.next_expect(':')?; + self.next_expect(b':')?; + let offset_minute_digits = self.next_n_digits::<2>()?; let offset_minute = - i64::from(self.next_digit()?) * 10 + i64::from(self.next_digit()?); + i64::from(offset_minute_digits[0] * 10 + offset_minute_digits[1]); if offset_minute > 59 { return None; } - self.offset += offset_minute; + self.offset += if b == b'-' { + -offset_minute + } else { + offset_minute + }; } _ => return None, } From 161aae5352eef9d6e901535bb5f8482d436bef63 Mon Sep 17 00:00:00 2001 From: CrazyboyQCD Date: Fri, 24 Jan 2025 14:31:13 +0800 Subject: [PATCH 3/7] chore: fix lint --- core/engine/src/builtins/date/utils.rs | 41 +++++++++++--------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/core/engine/src/builtins/date/utils.rs b/core/engine/src/builtins/date/utils.rs index 0282063c589..cc58a9927b2 100644 --- a/core/engine/src/builtins/date/utils.rs +++ b/core/engine/src/builtins/date/utils.rs @@ -769,17 +769,17 @@ pub(super) fn parse_date(date: &JsString, hooks: &dyn HostHooks) -> Option }; // Date Time String Format: 'YYYY-MM-DDTHH:mm:ss.sssZ' - if let Some(dt) = DateParser::new(&date, hooks).parse() { + if let Some(dt) = DateParser::new(date, hooks).parse() { return Some(dt); } // `toString` format: `Thu Jan 01 1970 00:00:00 GMT+0000` - if let Ok(t) = OffsetDateTime::parse(&date, &format_description!("[weekday repr:short] [month repr:short] [day] [year] [hour]:[minute]:[second] GMT[offset_hour sign:mandatory][offset_minute][end]")) { + if let Ok(t) = OffsetDateTime::parse(date, &format_description!("[weekday repr:short] [month repr:short] [day] [year] [hour]:[minute]:[second] GMT[offset_hour sign:mandatory][offset_minute][end]")) { return Some(t.unix_timestamp() * 1000 + i64::from(t.millisecond())); } // `toUTCString` format: `Thu, 01 Jan 1970 00:00:00 GMT` - if let Ok(t) = PrimitiveDateTime::parse(&date, &format_description!("[weekday repr:short], [day] [month repr:short] [year] [hour]:[minute]:[second] GMT[end]")) { + if let Ok(t) = PrimitiveDateTime::parse(date, &format_description!("[weekday repr:short], [day] [month repr:short] [year] [hour]:[minute]:[second] GMT[end]")) { let t = t.assume_utc(); return Some(t.unix_timestamp() * 1000 + i64::from(t.millisecond())); } @@ -840,16 +840,16 @@ impl<'a> DateParser<'a> { if self.input.len() < N { return None; } - let mut res = [0; N]; - for i in 0..N { + let mut digits = [0; N]; + for digit in &mut digits { // SAFETY: Bound check has been done above. let c = unsafe { *self.input.next().unwrap_unchecked() }; if !c.is_ascii_digit() { return None; } - res[i] = c - b'0'; + *digit = c - b'0'; } - Some(res) + Some(digits) } fn finish(&mut self) -> Option { @@ -932,7 +932,7 @@ impl<'a> DateParser<'a> { fn parse_year(&mut self) -> Option<()> { match self.input.next()? { - &b @ (b'+' | b'-') => { + sign @ (b'+' | b'-') => { let digits = self.next_n_digits::<6>()?.map(i32::from); let year = digits[0] * 100_000 + digits[1] * 10000 @@ -940,7 +940,7 @@ impl<'a> DateParser<'a> { + digits[3] * 100 + digits[4] * 10 + digits[5]; - let neg = b == b'-'; + let neg = *sign == b'-'; if neg && year == 0 { return None; } @@ -971,30 +971,26 @@ impl<'a> DateParser<'a> { return None; } match self.input.peek() { - Some(b':') => { - self.input.next(); - } + Some(b':') => self.input.next(), None => return self.finish_local(), _ => { self.parse_timezone()?; return self.finish(); } - } + }; let second_digits = self.next_n_digits::<2>()?; self.second = u32::from(second_digits[0] * 10 + second_digits[1]); if self.second > 59 { return None; } match self.input.peek() { - Some(b'.') => { - self.input.next(); - } + Some(b'.') => self.input.next(), None => return self.finish_local(), _ => { self.parse_timezone()?; return self.finish(); } - } + }; let millisecond_digits = self.next_n_digits::<3>()?.map(u32::from); self.millisecond = millisecond_digits[0] * 100 + millisecond_digits[1] * 10 + millisecond_digits[2]; @@ -1009,13 +1005,14 @@ impl<'a> DateParser<'a> { fn parse_timezone(&mut self) -> Option<()> { match self.input.next() { Some(b'Z') => return Some(()), - Some(&b @ (b'+' | b'-')) => { + Some(sign @ (b'+' | b'-')) => { + let neg = *sign == b'-'; let offset_hour_digits = self.next_n_digits::<2>()?; let offset_hour = i64::from(offset_hour_digits[0] * 10 + offset_hour_digits[1]); if offset_hour > 23 { return None; } - self.offset = -offset_hour * 60; + self.offset = if neg { offset_hour } else { -offset_hour } * 60; if self.input.peek().is_none() { return Some(()); } @@ -1026,11 +1023,7 @@ impl<'a> DateParser<'a> { if offset_minute > 59 { return None; } - self.offset += if b == b'-' { - -offset_minute - } else { - offset_minute - }; + self.offset += if neg { offset_minute } else { -offset_minute }; } _ => return None, } From e01bf469e29ec0be9f0dafbd627f201435f87ae1 Mon Sep 17 00:00:00 2001 From: CrazyboyQCD Date: Sat, 25 Jan 2025 16:55:08 +0800 Subject: [PATCH 4/7] chore: use `next_digit` in `next_n_digits` --- core/engine/src/builtins/date/utils.rs | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/core/engine/src/builtins/date/utils.rs b/core/engine/src/builtins/date/utils.rs index cc58a9927b2..83b0892d91c 100644 --- a/core/engine/src/builtins/date/utils.rs +++ b/core/engine/src/builtins/date/utils.rs @@ -825,7 +825,6 @@ impl<'a> DateParser<'a> { .and_then(|c| if *c == expect { Some(()) } else { None }) } - #[allow(unused)] fn next_digit(&mut self) -> Option { self.input.next().and_then(|c| { if c.is_ascii_digit() { @@ -837,17 +836,9 @@ impl<'a> DateParser<'a> { } fn next_n_digits(&mut self) -> Option<[u8; N]> { - if self.input.len() < N { - return None; - } let mut digits = [0; N]; for digit in &mut digits { - // SAFETY: Bound check has been done above. - let c = unsafe { *self.input.next().unwrap_unchecked() }; - if !c.is_ascii_digit() { - return None; - } - *digit = c - b'0'; + *digit = self.next_digit()?; } Some(digits) } From 3e2b90eeecbbc4a0bcd21782456e513a313ef50f Mon Sep 17 00:00:00 2001 From: CrazyboyQCD Date: Sat, 1 Feb 2025 08:33:49 +0800 Subject: [PATCH 5/7] chore: add `parse_n_ascii_digits` to remove duplicate calculation code --- core/engine/src/builtins/date/utils.rs | 107 +++++++++++++------------ 1 file changed, 57 insertions(+), 50 deletions(-) diff --git a/core/engine/src/builtins/date/utils.rs b/core/engine/src/builtins/date/utils.rs index 83b0892d91c..d6c84579794 100644 --- a/core/engine/src/builtins/date/utils.rs +++ b/core/engine/src/builtins/date/utils.rs @@ -825,24 +825,48 @@ impl<'a> DateParser<'a> { .and_then(|c| if *c == expect { Some(()) } else { None }) } - fn next_digit(&mut self) -> Option { - self.input.next().and_then(|c| { - if c.is_ascii_digit() { - Some(c - b'0') - } else { - None - } - }) + fn next_ascii_digit(&mut self) -> Option { + self.input + .next() + .and_then(|c| if c.is_ascii_digit() { Some(*c) } else { None }) } - fn next_n_digits(&mut self) -> Option<[u8; N]> { + fn next_n_ascii_digits(&mut self) -> Option<[u8; N]> { let mut digits = [0; N]; for digit in &mut digits { - *digit = self.next_digit()?; + *digit = self.next_ascii_digit()?; } Some(digits) } + #[allow(clippy::items_after_statements)] + #[allow(clippy::inline_always)] + fn parse_n_ascii_digits(&mut self) -> Option { + assert!(N <= 8, "parse_n_ascii_digits parses no more than 8 digits"); + + let digits = self.next_n_ascii_digits::()?; + if N <= 4 { + let mut res = 0; + for digit in digits { + res = res * 10 + u64::from(digit & 0xF); + } + Some(res) + } else { + // Copied from https://github.com/RoDmitry/atoi_simd/blob/master/src/fallback.rs + #[inline(always)] + fn process_8(mut val: u64, len: usize) -> u64 { + val <<= 64_usize.saturating_sub(len << 3); // << 3 - same as mult by 8 + val = (val & 0x0F0F_0F0F_0F0F_0F0F).wrapping_mul(0xA01) >> 8; + val = (val & 0x00FF_00FF_00FF_00FF).wrapping_mul(0x64_0001) >> 16; + (val & 0x0000_FFFF_0000_FFFF).wrapping_mul(0x2710_0000_0001) >> 32 + } + let mut src = [0; 8]; + src[..N].copy_from_slice(&digits); + let val = u64::from_le_bytes(src); + Some(process_8(val, N)) + } + } + fn finish(&mut self) -> Option { if self.input.peek().is_some() { return None; @@ -891,6 +915,7 @@ impl<'a> DateParser<'a> { } } + #[allow(clippy::as_conversions)] fn parse(&mut self) -> Option { self.parse_year()?; match self.input.peek() { @@ -899,8 +924,7 @@ impl<'a> DateParser<'a> { _ => {} } self.next_expect(b'-')?; - let month_digits = self.next_n_digits::<2>()?; - self.month = u32::from(month_digits[0] * 10 + month_digits[1]); + self.month = self.parse_n_ascii_digits::<2>()? as u32; if self.month < 1 || self.month > 12 { return None; } @@ -910,8 +934,7 @@ impl<'a> DateParser<'a> { _ => {} } self.next_expect(b'-')?; - let day_digits = self.next_n_digits::<2>()?; - self.day = u32::from(day_digits[0] * 10 + day_digits[1]); + self.day = self.parse_n_ascii_digits::<2>()? as u32; if self.day < 1 || self.day > 31 { return None; } @@ -921,43 +944,32 @@ impl<'a> DateParser<'a> { } } + #[allow(clippy::as_conversions)] fn parse_year(&mut self) -> Option<()> { - match self.input.next()? { - sign @ (b'+' | b'-') => { - let digits = self.next_n_digits::<6>()?.map(i32::from); - let year = digits[0] * 100_000 - + digits[1] * 10000 - + digits[2] * 1000 - + digits[3] * 100 - + digits[4] * 10 - + digits[5]; - let neg = *sign == b'-'; - if neg && year == 0 { - return None; - } - self.year = if neg { -year } else { year }; - Some(()) - } - c if c.is_ascii_digit() => { - let digits = self.next_n_digits::<3>()?.map(i32::from); - self.year = - i32::from(c - b'0') * 1000 + digits[0] * 100 + digits[1] * 10 + digits[2]; - Some(()) + if let &&sign @ (b'+' | b'-') = self.input.peek()? { + // Consume the sign. + self.input.next(); + let year = self.parse_n_ascii_digits::<6>()? as i32; + let neg = sign == b'-'; + if neg && year == 0 { + return None; } - _ => None, + self.year = if neg { -year } else { year }; + } else { + self.year = self.parse_n_ascii_digits::<4>()? as i32; } + Some(()) } + #[allow(clippy::as_conversions)] fn parse_time(&mut self) -> Option { self.next_expect(b'T')?; - let hour_digits = self.next_n_digits::<2>()?; - self.hour = u32::from(hour_digits[0] * 10 + hour_digits[1]); + self.hour = self.parse_n_ascii_digits::<2>()? as u32; if self.hour > 24 { return None; } self.next_expect(b':')?; - let minute_digits = self.next_n_digits::<2>()?; - self.minute = u32::from(minute_digits[0] * 10 + minute_digits[1]); + self.minute = self.parse_n_ascii_digits::<2>()? as u32; if self.minute > 59 { return None; } @@ -969,8 +981,7 @@ impl<'a> DateParser<'a> { return self.finish(); } }; - let second_digits = self.next_n_digits::<2>()?; - self.second = u32::from(second_digits[0] * 10 + second_digits[1]); + self.second = self.parse_n_ascii_digits::<2>()? as u32; if self.second > 59 { return None; } @@ -982,9 +993,7 @@ impl<'a> DateParser<'a> { return self.finish(); } }; - let millisecond_digits = self.next_n_digits::<3>()?.map(u32::from); - self.millisecond = - millisecond_digits[0] * 100 + millisecond_digits[1] * 10 + millisecond_digits[2]; + self.millisecond = self.parse_n_ascii_digits::<3>()? as u32; if self.input.peek().is_some() { self.parse_timezone()?; self.finish() @@ -993,13 +1002,13 @@ impl<'a> DateParser<'a> { } } + #[allow(clippy::as_conversions)] fn parse_timezone(&mut self) -> Option<()> { match self.input.next() { Some(b'Z') => return Some(()), Some(sign @ (b'+' | b'-')) => { let neg = *sign == b'-'; - let offset_hour_digits = self.next_n_digits::<2>()?; - let offset_hour = i64::from(offset_hour_digits[0] * 10 + offset_hour_digits[1]); + let offset_hour = self.parse_n_ascii_digits::<2>()? as i64; if offset_hour > 23 { return None; } @@ -1008,9 +1017,7 @@ impl<'a> DateParser<'a> { return Some(()); } self.next_expect(b':')?; - let offset_minute_digits = self.next_n_digits::<2>()?; - let offset_minute = - i64::from(offset_minute_digits[0] * 10 + offset_minute_digits[1]); + let offset_minute = self.parse_n_ascii_digits::<2>()? as i64; if offset_minute > 59 { return None; } From c9cf38f67d83743c9f2df0fa4ba9ed790d9f505b Mon Sep 17 00:00:00 2001 From: CrazyboyQCD Date: Sat, 8 Feb 2025 09:04:48 +0800 Subject: [PATCH 6/7] chore: add ASCII check before convert `Latin1` into `str` --- core/engine/src/builtins/date/utils.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/core/engine/src/builtins/date/utils.rs b/core/engine/src/builtins/date/utils.rs index d6c84579794..900f84a9e0e 100644 --- a/core/engine/src/builtins/date/utils.rs +++ b/core/engine/src/builtins/date/utils.rs @@ -756,9 +756,13 @@ pub(super) fn parse_date(date: &JsString, hooks: &dyn HostHooks) -> Option let owned_js_str = date.as_str(); let owned_string: String; let date = match owned_js_str.variant() { - JsStrVariant::Latin1(s) => - // SAFETY: Since all characters are ASCII we can safely convert this into str. - unsafe { str::from_utf8_unchecked(s) }, + JsStrVariant::Latin1(s) => { + if !s.is_ascii() { + return None; + } + // SAFETY: Since all characters are ASCII we can safely convert this into str. + unsafe { str::from_utf8_unchecked(s) } + } JsStrVariant::Utf16(s) => { owned_string = String::from_utf16(s).ok()?; if !owned_string.is_ascii() { From 51974b729389b40c1d1a123697876ca942bc804e Mon Sep 17 00:00:00 2001 From: CrazyboyQCD Date: Sat, 8 Feb 2025 12:02:48 +0800 Subject: [PATCH 7/7] chore: add tests for digits parsing --- core/engine/src/builtins/date/tests.rs | 27 ++++++++++- core/engine/src/builtins/date/utils.rs | 67 ++++++++++++++++++-------- 2 files changed, 72 insertions(+), 22 deletions(-) diff --git a/core/engine/src/builtins/date/tests.rs b/core/engine/src/builtins/date/tests.rs index 7cd287352d9..392394a1a50 100644 --- a/core/engine/src/builtins/date/tests.rs +++ b/core/engine/src/builtins/date/tests.rs @@ -1,4 +1,7 @@ -use crate::{js_string, run_test_actions, JsNativeErrorKind, TestAction}; +use crate::{ + builtins::date::utils::fast_atoi::{process_4, process_8}, + js_string, run_test_actions, JsNativeErrorKind, TestAction, +}; use boa_macros::js_str; use indoc::indoc; use time::{macros::format_description, OffsetDateTime}; @@ -72,6 +75,28 @@ fn timestamp_from_utc( t.unix_timestamp() * 1000 + i64::from(t.millisecond()) } +#[test] +fn parse_ascii_digits() { + let parse_8_ascii_digits = |val: &[u8; 8], len: usize| -> u64 { + let val = u64::from_le_bytes(*val); + process_8(val, len) + }; + assert_eq!(12_345_678, parse_8_ascii_digits(b"12345678", 8)); + assert_eq!(123_456, parse_8_ascii_digits(b"123456xx", 6)); + assert_eq!(123, parse_8_ascii_digits(b"123xxxxx", 3)); + assert_eq!(123, parse_8_ascii_digits(b"000123xx", 6)); + assert_eq!(0, parse_8_ascii_digits(b"00000000", 8)); + let parse_4_ascii_digits = |val: &[u8; 4], len: usize| -> u64 { + let val = u32::from_le_bytes(*val); + u64::from(process_4(val, len)) + }; + assert_eq!(1234, parse_4_ascii_digits(b"1234", 4)); + assert_eq!(12, parse_4_ascii_digits(b"12xx", 2)); + assert_eq!(3, parse_4_ascii_digits(b"003x", 3)); + assert_eq!(23, parse_4_ascii_digits(b"023x", 3)); + assert_eq!(0, parse_4_ascii_digits(b"0000", 8)); +} + #[test] fn date_this_time_value() { run_test_actions([TestAction::assert_native_error( diff --git a/core/engine/src/builtins/date/utils.rs b/core/engine/src/builtins/date/utils.rs index 900f84a9e0e..9d25feae0e5 100644 --- a/core/engine/src/builtins/date/utils.rs +++ b/core/engine/src/builtins/date/utils.rs @@ -807,6 +807,27 @@ struct DateParser<'a> { offset: i64, } +// Copied from https://github.com/RoDmitry/atoi_simd/blob/master/src/fallback.rs, +// which is based on https://rust-malaysia.github.io/code/2020/07/11/faster-integer-parsing.html. +#[doc(hidden)] +#[allow(clippy::inline_always)] +pub(in crate::builtins::date) mod fast_atoi { + #[inline(always)] + pub(in crate::builtins::date) const fn process_8(mut val: u64, len: usize) -> u64 { + val <<= 64_usize.saturating_sub(len << 3); // << 3 - same as mult by 8 + val = (val & 0x0F0F_0F0F_0F0F_0F0F).wrapping_mul(0xA01) >> 8; + val = (val & 0x00FF_00FF_00FF_00FF).wrapping_mul(0x64_0001) >> 16; + (val & 0x0000_FFFF_0000_FFFF).wrapping_mul(0x2710_0000_0001) >> 32 + } + + #[inline(always)] + pub(in crate::builtins::date) const fn process_4(mut val: u32, len: usize) -> u32 { + val <<= 32_usize.saturating_sub(len << 3); // << 3 - same as mult by 8 + val = (val & 0x0F0F_0F0F).wrapping_mul(0xA01) >> 8; + (val & 0x00FF_00FF).wrapping_mul(0x64_0001) >> 16 + } +} + impl<'a> DateParser<'a> { fn new(s: &'a str, hooks: &'a dyn HostHooks) -> Self { Self { @@ -843,31 +864,35 @@ impl<'a> DateParser<'a> { Some(digits) } - #[allow(clippy::items_after_statements)] - #[allow(clippy::inline_always)] fn parse_n_ascii_digits(&mut self) -> Option { assert!(N <= 8, "parse_n_ascii_digits parses no more than 8 digits"); - - let digits = self.next_n_ascii_digits::()?; - if N <= 4 { - let mut res = 0; - for digit in digits { - res = res * 10 + u64::from(digit & 0xF); + if N == 0 { + return None; + } + let ascii_digits = self.next_n_ascii_digits::()?; + match N { + 1..4 => { + // When N is small, process digits naively. + let mut res = 0; + for digit in ascii_digits { + res = res * 10 + u64::from(digit & 0xF); + } + Some(res) } - Some(res) - } else { - // Copied from https://github.com/RoDmitry/atoi_simd/blob/master/src/fallback.rs - #[inline(always)] - fn process_8(mut val: u64, len: usize) -> u64 { - val <<= 64_usize.saturating_sub(len << 3); // << 3 - same as mult by 8 - val = (val & 0x0F0F_0F0F_0F0F_0F0F).wrapping_mul(0xA01) >> 8; - val = (val & 0x00FF_00FF_00FF_00FF).wrapping_mul(0x64_0001) >> 16; - (val & 0x0000_FFFF_0000_FFFF).wrapping_mul(0x2710_0000_0001) >> 32 + 4 => { + // Process digits as an u32 block. + let mut src = [0; 4]; + src[..N].copy_from_slice(&ascii_digits); + let val = u32::from_le_bytes(src); + Some(u64::from(fast_atoi::process_4(val, N))) + } + _ => { + // Process digits as an u64 block. + let mut src = [0; 8]; + src[..N].copy_from_slice(&ascii_digits); + let val = u64::from_le_bytes(src); + Some(fast_atoi::process_8(val, N)) } - let mut src = [0; 8]; - src[..N].copy_from_slice(&digits); - let val = u64::from_le_bytes(src); - Some(process_8(val, N)) } }