From 61ce8a843cda4ebdf887dd34f992089e3865d0d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elias=20Sj=C3=B6green?= Date: Sun, 19 Jan 2025 16:10:49 +0100 Subject: [PATCH] feat(ecmascript): Implement Annex-B string prototype methods I also added some feature flags for including parts of or the whole of annex-b. Made them on-by-default, but might make more sense for them to be off? --- nova_vm/Cargo.toml | 15 +- nova_vm/src/builtin_strings | 17 +- .../string_objects/string_prototype.rs | 462 +++++++++++++++++- 3 files changed, 488 insertions(+), 6 deletions(-) diff --git a/nova_vm/Cargo.toml b/nova_vm/Cargo.toml index 6200fc72..64453297 100644 --- a/nova_vm/Cargo.toml +++ b/nova_vm/Cargo.toml @@ -40,7 +40,8 @@ default = [ "shared-array-buffer", "weak-refs", "atomics", - "regexp" + "regexp", + "annex-b" ] array-buffer = [] atomics = ["array-buffer", "shared-array-buffer"] @@ -52,6 +53,18 @@ regexp = [] shared-array-buffer = [] weak-refs = [] typescript = [] +# Enables features defined by [Annex B](https://tc39.es/ecma262/#sec-additional-ecmascript-features-for-web-browsers) +annex-b = ["annex-b-string", "annex-b-global", "annex-b-date", "annex-b-regexp"] +# Adds the additional properties to the global object as defined by Annex B section [B.2.1](https://tc39.es/ecma262/#sec-additional-properties-of-the-global-object) +annex-b-global = [] +# Adds the additional properties to the string prototype as defined by Annex B section [B.2.2](https://tc39.es/ecma262/#sec-additional-properties-of-the-string.prototype-object) +annex-b-string = [] +# Adds the additional properties to the date prototype as defined by Annex B section [B.2.3](https://tc39.es/ecma262/#sec-additional-properties-of-the-date.prototype-object) +annex-b-date = ["date"] +# Adds the additional properties and syntax to regular expressions as defined by Annex B section: +# - [B.1.2](https://tc39.es/ecma262/#sec-regular-expressions-patterns) +# - [B.2.4](https://tc39.es/ecma262/#sec-additional-properties-of-the-regexp.prototype-object) +annex-b-regexp = ["regexp"] [build-dependencies] small_string = { path = "../small_string" } diff --git a/nova_vm/src/builtin_strings b/nova_vm/src/builtin_strings index ba070b23..05221278 100644 --- a/nova_vm/src/builtin_strings +++ b/nova_vm/src/builtin_strings @@ -31,6 +31,7 @@ AggregateError all allSettled and +anchor anonymous any apply @@ -52,11 +53,14 @@ atan atan2 atanh Atomics +big bigint BigInt BigInt64Array BigUint64Array bind +blink +bold boolean Boolean bound @@ -125,12 +129,15 @@ find findIndex findLast findLastIndex +fixed flags flat flatMap Float32Array Float64Array floor +fontcolor +fontsize for forEach freeze @@ -232,11 +239,13 @@ isSafeInteger isSealed isView isWellFormed +italics iterator join JSON keyFor keys +link lastIndex lastIndexOf length @@ -356,6 +365,7 @@ sin sinh size slice +small some sort source @@ -368,13 +378,16 @@ SQRT2 startsWith sticky store +strike string String -stringify String Iterator +stringify sub subarray +substr substring +sup symbol Symbol Symbol.asyncIterator @@ -423,6 +436,8 @@ transfer transferToFixedLength trim trimEnd +trimLeft +trimRight trimStart true trunc diff --git a/nova_vm/src/ecmascript/builtins/text_processing/string_objects/string_prototype.rs b/nova_vm/src/ecmascript/builtins/text_processing/string_objects/string_prototype.rs index 20a304a9..67b491b1 100644 --- a/nova_vm/src/ecmascript/builtins/text_processing/string_objects/string_prototype.rs +++ b/nova_vm/src/ecmascript/builtins/text_processing/string_objects/string_prototype.rs @@ -259,6 +259,154 @@ impl Builtin for StringPrototypeIterator { const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::iterator); } +#[cfg(feature = "annex-b-string")] +struct StringPrototypeSubstr; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeSubstr { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.substr; + const LENGTH: u8 = 2; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::substr); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeAnchor; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeAnchor { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.anchor; + const LENGTH: u8 = 0; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::anchor); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeBig; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeBig { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.big; + const LENGTH: u8 = 0; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::big); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeBlink; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeBlink { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.blink; + const LENGTH: u8 = 0; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::blink); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeBold; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeBold { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.bold; + const LENGTH: u8 = 0; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::bold); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeFixed; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeFixed { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.fixed; + const LENGTH: u8 = 0; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::fixed); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeFontcolor; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeFontcolor { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.fontcolor; + const LENGTH: u8 = 1; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::fontcolor); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeFontsize; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeFontsize { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.fontsize; + const LENGTH: u8 = 1; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::fontsize); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeItalics; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeItalics { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.italics; + const LENGTH: u8 = 0; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::italics); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeLink; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeLink { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.link; + const LENGTH: u8 = 1; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::link); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeSmall; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeSmall { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.small; + const LENGTH: u8 = 0; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::small); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeStrike; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeStrike { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.strike; + const LENGTH: u8 = 0; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::strike); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeSub; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeSub { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.sub; + const LENGTH: u8 = 0; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::sub); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeSup; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeSup { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.sup; + const LENGTH: u8 = 0; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::sup); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeTrimLeft; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeTrimLeft { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.trimStart; + const KEY: Option> = + Some(BUILTIN_STRING_MEMORY.trimLeft.to_property_key()); + const LENGTH: u8 = 0; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::trim_start); +} + +#[cfg(feature = "annex-b-string")] +struct StringPrototypeTrimRight; +#[cfg(feature = "annex-b-string")] +impl Builtin for StringPrototypeTrimRight { + const NAME: String<'static> = BUILTIN_STRING_MEMORY.trimEnd; + const KEY: Option> = + Some(BUILTIN_STRING_MEMORY.trimRight.to_property_key()); + const LENGTH: u8 = 0; + const BEHAVIOUR: Behaviour = Behaviour::Regular(StringPrototype::trim_end); +} + impl StringPrototype { fn at( agent: &mut Agent, @@ -1787,6 +1935,225 @@ impl StringPrototype { todo!() } + /// ### [B.2.2.1 String.prototype.substr ( start, length )](https://tc39.es/ecma262/#sec-string.prototype.substr) + /// + /// This method returns a substring of the result of converting the this + /// value to a String, starting from index start and running for length + /// code units (or through the end of the String if length is undefined). + /// If start is negative, it is treated as sourceLength + start where + /// sourceLength is the length of the String. The result is a String value, + /// not a String object. + #[cfg(feature = "annex-b-string")] + fn substr( + agent: &mut Agent, + this_value: Value, + args: ArgumentsList, + mut gc: GcScope, + ) -> JsResult { + let start = args.get(0).bind(gc.nogc()); + let length = args.get(1).bind(gc.nogc()); + + let s = if let Ok(s) = String::try_from(this_value) { + s.bind(gc.nogc()) + } else { + // 1. Let O be ? RequireObjectCoercible(this value). + let o = require_object_coercible(agent, this_value, gc.nogc())?; + // 2. Let S be ? ToString(O). + to_string(agent, o, gc.reborrow())?.unbind().bind(gc.nogc()) + }; + let scoped_s = s.scope(agent, gc.nogc()); + + // 3. Let size be the length of S. + let size = s.utf16_len(agent) as i64; + + // 4. Let intStart be ? ToIntegerOrInfinity(start). + let int_start = to_integer_or_infinity(agent, start, gc.reborrow())?; + + // 5. If intStart = -∞, set intStart to 0. + let int_start = if int_start.is_neg_infinity() { + 0 + } else if int_start.is_negative() { + // 6. Else if intStart < 0, set intStart to max(size + intStart, 0). + (int_start.into_i64() + size).max(0) + } else { + // 7. Else, set intStart to min(intStart, size). + int_start.into_i64().min(size) + }; + + // 8. If length is undefined, let intLength be size; otherwise let intLength be ? ToIntegerOrInfinity(length). + let int_length = if length.is_undefined() { + size + } else { + to_integer_or_infinity(agent, length, gc.reborrow())?.into_i64() + }; + + // 9. Set intLength to the result of clamping intLength between 0 and size. + let int_length = int_length.clamp(0, size); + + // 10. Let intEnd be min(intStart + intLength, size). + let int_end = (int_start + int_length).min(size); + + // 11. Return the substring of S from intStart to intEnd. + let s = scoped_s.get(agent).bind(gc.nogc()); + let s_str = s.as_str(agent); + Ok(String::from_string( + agent, + s_str[int_start as usize..int_end as usize].to_string(), + gc.nogc(), + ) + .into_value()) + } + + /// ### [B.2.2.2 String.prototype.anchor ( name )](https://tc39.es/ecma262/#sec-string.prototype.anchor) + #[cfg(feature = "annex-b-string")] + fn anchor( + agent: &mut Agent, + this_value: Value, + args: ArgumentsList, + gc: GcScope, + ) -> JsResult { + let name = args.get(0).bind(gc.nogc()); + + // 1. Let S be the this value. + // 2. Return ? CreateHTML(S, "a", "name", name). + create_html(agent, this_value, "a", Some(("name", name)), gc).map(Value::from) + } + + /// ### [B.2.2.3 String.prototype.big ( )](https://tc39.es/ecma262/#sec-string.prototype.big) + fn big(agent: &mut Agent, this_value: Value, _: ArgumentsList, gc: GcScope) -> JsResult { + // 1. Let S be the this value. + // 2. Return ? CreateHTML(S, "big", "", ""). + create_html(agent, this_value, "big", None, gc).map(Value::from) + } + + /// ### [B.2.2.4 String.prototype.blink ( )](https://tc39.es/ecma262/#sec-string.prototype.blink) + fn blink( + agent: &mut Agent, + this_value: Value, + _: ArgumentsList, + gc: GcScope, + ) -> JsResult { + // 1. Let S be the this value. + // 2. Return ? CreateHTML(S, "blink", "", ""). + create_html(agent, this_value, "blink", None, gc).map(Value::from) + } + + /// ### [B.2.2.5 String.prototype.bold ( )](https://tc39.es/ecma262/#sec-string.prototype.bold) + fn bold( + agent: &mut Agent, + this_value: Value, + _: ArgumentsList, + gc: GcScope, + ) -> JsResult { + // 1. Let S be the this value. + // 2. Return ? CreateHTML(S, "b", "", ""). + create_html(agent, this_value, "b", None, gc).map(Value::from) + } + + /// ### [B.2.2.6 String.prototype.fixed ( )](https://tc39.es/ecma262/#sec-string.prototype.fixed) + fn fixed( + agent: &mut Agent, + this_value: Value, + _: ArgumentsList, + gc: GcScope, + ) -> JsResult { + // 1. Let S be the this value. + // 2. Return ? CreateHTML(S, "tt", "", ""). + create_html(agent, this_value, "tt", None, gc).map(Value::from) + } + + /// ### [B.2.2.7 String.prototype.fontcolor ( colour )](https://tc39.es/ecma262/#sec-string.prototype.fontcolor) + fn fontcolor( + agent: &mut Agent, + this_value: Value, + args: ArgumentsList, + gc: GcScope, + ) -> JsResult { + let colour = args.get(0).bind(gc.nogc()); + + // 1. Let S be the this value. + // 2. Return ? CreateHTML(S, "font", "colour", colour). + create_html(agent, this_value, "font", Some(("colour", colour)), gc).map(Value::from) + } + + /// ### [B.2.2.8 String.prototype.fontsize ( size )](https://tc39.es/ecma262/#sec-string.prototype.fontsize) + fn fontsize( + agent: &mut Agent, + this_value: Value, + args: ArgumentsList, + gc: GcScope, + ) -> JsResult { + let size = args.get(0).bind(gc.nogc()); + + // 1. Let S be the this value. + // 2. Return ? CreateHTML(S, "font", "size", size). + create_html(agent, this_value, "font", Some(("size", size)), gc).map(Value::from) + } + + /// ### [B.2.2.9 String.prototype.italics ( )](https://tc39.es/ecma262/#sec-string.prototype.italics) + fn italics( + agent: &mut Agent, + this_value: Value, + _: ArgumentsList, + gc: GcScope, + ) -> JsResult { + // 1. Let S be the this value. + // 2. Return ? CreateHTML(S, "tt", "", ""). + create_html(agent, this_value, "i", None, gc).map(Value::from) + } + + /// ### [B.2.2.10 String.prototype.link ( url )](https://tc39.es/ecma262/#sec-string.prototype.link) + fn link( + agent: &mut Agent, + this_value: Value, + args: ArgumentsList, + gc: GcScope, + ) -> JsResult { + let url = args.get(0).bind(gc.nogc()); + + // 1. Let S be the this value. + // 2. Return ? CreateHTML(S, "a", "href", url). + create_html(agent, this_value, "a", Some(("href", url)), gc).map(Value::from) + } + + /// ### [B.2.2.11 String.prototype.small ( )](https://tc39.es/ecma262/#sec-string.prototype.small) + fn small( + agent: &mut Agent, + this_value: Value, + _: ArgumentsList, + gc: GcScope, + ) -> JsResult { + // 1. Let S be the this value. + // 2. Return ? CreateHTML(S, "small", "", ""). + create_html(agent, this_value, "small", None, gc).map(Value::from) + } + + /// ### [B.2.2.12 String.prototype.strike ( )](https://tc39.es/ecma262/#sec-string.prototype.strike) + fn strike( + agent: &mut Agent, + this_value: Value, + _: ArgumentsList, + gc: GcScope, + ) -> JsResult { + // 1. Let S be the this value. + // 2. Return ? CreateHTML(S, "strike", "", ""). + create_html(agent, this_value, "strike", None, gc).map(Value::from) + } + + /// ### [B.2.2.13 String.prototype.sub ( )](https://tc39.es/ecma262/#sec-string.prototype.sub) + fn sub(agent: &mut Agent, this_value: Value, _: ArgumentsList, gc: GcScope) -> JsResult { + // 1. Let S be the this value. + // 2. Return ? CreateHTML(S, "sub", "", ""). + create_html(agent, this_value, "sub", None, gc).map(Value::from) + } + + /// ### [B.2.2.14 String.prototype.sup ( )](https://tc39.es/ecma262/#sec-string.prototype.sup) + fn sup(agent: &mut Agent, this_value: Value, _: ArgumentsList, gc: GcScope) -> JsResult { + // 1. Let S be the this value. + // 2. Return ? CreateHTML(S, "sup", "", ""). + create_html(agent, this_value, "sup", None, gc).map(Value::from) + } + pub(crate) fn create_intrinsic(agent: &mut Agent, realm: RealmIdentifier) { let intrinsics = agent.get_realm(realm).intrinsics(); let object_prototype = intrinsics.object_prototype(); @@ -1794,8 +2161,12 @@ impl StringPrototype { let this_base_object = intrinsics.string_prototype_base_object().into(); let string_constructor = intrinsics.string(); - OrdinaryObjectBuilder::new_intrinsic_object(agent, realm, this_base_object) - .with_property_capacity(36) + let builder = OrdinaryObjectBuilder::new_intrinsic_object(agent, realm, this_base_object) + .with_property_capacity(if cfg!(feature = "annex-b-string") { + 52 + } else { + 36 + }) .with_prototype(object_prototype) .with_builtin_function_property::() .with_builtin_function_property::() @@ -1832,8 +2203,28 @@ impl StringPrototype { .with_builtin_intrinsic_function_property::() .with_builtin_intrinsic_function_property::() .with_builtin_function_property::() - .with_builtin_function_property::() - .build(); + .with_builtin_function_property::(); + + #[cfg(feature = "annex-b-string")] + let builder = builder + .with_builtin_function_property::() + .with_builtin_function_property::() + .with_builtin_function_property::() + .with_builtin_function_property::() + .with_builtin_function_property::() + .with_builtin_function_property::() + .with_builtin_function_property::() + .with_builtin_function_property::() + .with_builtin_function_property::() + .with_builtin_function_property::() + .with_builtin_function_property::() + .with_builtin_function_property::() + .with_builtin_function_property::() + .with_builtin_function_property::() + .with_builtin_function_property::() + .with_builtin_function_property::(); + + builder.build(); let slot = agent .heap @@ -2028,6 +2419,69 @@ fn this_string_value<'gc>( } } +/// ### [B.2.2.2.1 CreateHTML ( string, tag, attribute, value )](https://tc39.es/ecma262/#sec-createhtml) +/// +/// The abstract operation CreateHTML takes arguments string (an ECMAScript +/// language value), tag (a String), attribute (a String), and value (an +/// ECMAScript language value) and returns either a normal completion +/// containing a String or a throw completion. +#[cfg(feature = "annex-b-string")] +fn create_html<'gc>( + agent: &mut Agent, + string: Value, + tag: &str, + attribute_and_value: Option<(&str, Value)>, + mut gc: GcScope<'gc, '_>, +) -> JsResult> { + // 1. Let str be ? RequireObjectCoercible(string). + let str = require_object_coercible(agent, string, gc.nogc())?; + + // 2. Let S be ? ToString(str) + let s = to_string(agent, str, gc.reborrow())? + .unbind() + .bind(gc.nogc()); + let scoped_s = s.scope(agent, gc.nogc()); + + // 3. Let p1 be the string-concatenation of "<" and tag. + // 4. If attribute is not the empty String, then + // c. Set p1 to the string-concatenation of: + // - p1 + // - the code unit 0x0020 (SPACE) + // - attribute + // - the code unit 0x003D (EQUALS SIGN) + // - the code unit 0x0022 (QUOTATION MARK) + // - escapedV + // - the code unit 0x0022 (QUOTATION MARK) + // 5. Let p2 be the string-concatenation of p1 and ">". + // 6. Let p3 be the string-concatenation of p2 and S. + // 7. Let p4 be the string-concatenation of p3, "". + // 8. Return p4. + if let Some((attribute, value)) = attribute_and_value { + // a. Let V be ? ToString(value). + let v = to_string(agent, value, gc.reborrow())? + .unbind() + .bind(gc.nogc()); + // b. Let escapedV be the String value that is the same as V except that each occurrence of the code unit 0x0022 (QUOTATION MARK) in V has been replaced with the six code unit sequence """. + let escaped_v = v.as_str(agent).replace('"', """); + + let s = scoped_s.get(agent).bind(gc.nogc()); + let s_str = s.as_str(agent); + Ok(String::from_string( + agent, + format!("<{tag} {attribute}=\"{escaped_v}\">{s_str}"), + gc.into_nogc(), + )) + } else { + let s = scoped_s.get(agent).bind(gc.nogc()); + let s_str = s.as_str(agent); + Ok(String::from_string( + agent, + format!("<{tag}>{s_str}"), + gc.into_nogc(), + )) + } +} + enum TrimWhere { Start, End,