diff --git a/Cargo.toml b/Cargo.toml index 89a3dbea..d3bccfe6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,16 +32,17 @@ rust-version = "1.67.0" [workspace.dependencies] criterion = "0.5" -fluent-langneg = "0.13" +fluent-langneg = "0.14" futures = "0.3" iai = "0.1" -intl_pluralrules = "7.0.1" rustc-hash = "1" serde = "1.0" serde_json = "1.0" thiserror = "1.0" tokio = "1.0" -unic-langid = "0.9" +icu_locid = "1.4" +icu_plurals = { version = "1.4", features = ["experimental"] } +icu_provider = "1.4" fluent-bundle = { version = "0.15.3", path = "fluent-bundle" } fluent-fallback = { version = "0.7.1", path = "fluent-fallback" } diff --git a/fluent-bundle/Cargo.toml b/fluent-bundle/Cargo.toml index 0374d2b4..2b0890d9 100644 --- a/fluent-bundle/Cargo.toml +++ b/fluent-bundle/Cargo.toml @@ -26,9 +26,11 @@ include = [ [dependencies] fluent-langneg.workspace = true fluent-syntax.workspace = true -intl_pluralrules.workspace = true rustc-hash.workspace = true -unic-langid.workspace = true +icu_locid.workspace = true +icu_plurals.workspace = true +icu_provider.workspace = true +fixed_decimal = { version = "0.5.5", features = ["ryu"] } intl-memoizer = { version = "0.5.2", path = "../intl-memoizer" } self_cell = "1.0" smallvec = "1.13" @@ -37,12 +39,12 @@ smallvec = "1.13" criterion.workspace = true iai.workspace = true serde = { workspace = true, features = ["derive"] } -unic-langid = { workspace = true, features = ["macros"] } rand = "0.8" serde_yaml = "0.9" [features] default = [] +sync = ["intl-memoizer/sync", "icu_provider/sync"] all-benchmarks = [] [[bench]] diff --git a/fluent-bundle/README.md b/fluent-bundle/README.md index 488be2ea..3ff0b846 100644 --- a/fluent-bundle/README.md +++ b/fluent-bundle/README.md @@ -23,7 +23,7 @@ Usage ```rust use fluent_bundle::{FluentBundle, FluentResource}; -use unic_langid::langid; +use icu_locid::langid; fn main() { let ftl_string = "hello-world = Hello, world!".to_owned(); diff --git a/fluent-bundle/benches/resolver.rs b/fluent-bundle/benches/resolver.rs index a024da05..d98c923a 100644 --- a/fluent-bundle/benches/resolver.rs +++ b/fluent-bundle/benches/resolver.rs @@ -10,7 +10,7 @@ use std::rc::Rc; use fluent_bundle::{FluentArgs, FluentBundle, FluentResource, FluentValue}; use fluent_syntax::ast; -use unic_langid::langid; +use icu_locid::langid; fn read_file(path: &str) -> Result { let mut f = File::open(path)?; diff --git a/fluent-bundle/benches/resolver_iai.rs b/fluent-bundle/benches/resolver_iai.rs index 05df9bee..7dd2819f 100644 --- a/fluent-bundle/benches/resolver_iai.rs +++ b/fluent-bundle/benches/resolver_iai.rs @@ -1,6 +1,6 @@ use fluent_bundle::{FluentArgs, FluentBundle, FluentResource, FluentValue}; use fluent_syntax::ast; -use unic_langid::{langid, LanguageIdentifier}; +use icu_locid::{langid, LanguageIdentifier}; const LANG_EN: LanguageIdentifier = langid!("en"); diff --git a/fluent-bundle/examples/custom_formatter.rs b/fluent-bundle/examples/custom_formatter.rs index 8fc59f1f..f94ffc09 100644 --- a/fluent-bundle/examples/custom_formatter.rs +++ b/fluent-bundle/examples/custom_formatter.rs @@ -2,7 +2,7 @@ // to format selected types of values. // // This allows users to plug their own number formatter to Fluent. -use unic_langid::LanguageIdentifier; +use icu_locid::LanguageIdentifier; use fluent_bundle::memoizer::MemoizerKind; use fluent_bundle::types::{FluentNumber, FluentNumberOptions}; diff --git a/fluent-bundle/examples/custom_type.rs b/fluent-bundle/examples/custom_type.rs index a6093732..88f23e41 100644 --- a/fluent-bundle/examples/custom_type.rs +++ b/fluent-bundle/examples/custom_type.rs @@ -9,8 +9,8 @@ // Lastly, we'll also create a new formatter which will be memoizable. // // The type and its options are modelled after ECMA402 Intl.DateTimeFormat. +use icu_locid::LanguageIdentifier; use intl_memoizer::Memoizable; -use unic_langid::LanguageIdentifier; use fluent_bundle::types::FluentType; use fluent_bundle::{FluentArgs, FluentBundle, FluentResource, FluentValue}; @@ -107,6 +107,8 @@ impl FluentType for DateTime { }) .expect("Failed to format a date.") } + + #[cfg(feature = "sync")] fn as_string_threadsafe( &self, _: &intl_memoizer::concurrent::IntlLangMemoizer, diff --git a/fluent-bundle/examples/external_arguments.rs b/fluent-bundle/examples/external_arguments.rs index fa9250ab..86bfc9ac 100644 --- a/fluent-bundle/examples/external_arguments.rs +++ b/fluent-bundle/examples/external_arguments.rs @@ -1,5 +1,5 @@ use fluent_bundle::{FluentArgs, FluentBundle, FluentResource, FluentValue}; -use unic_langid::langid; +use icu_locid::langid; fn main() { let ftl_string = String::from( diff --git a/fluent-bundle/examples/functions.rs b/fluent-bundle/examples/functions.rs index cfa4f46b..fa1a3d4f 100644 --- a/fluent-bundle/examples/functions.rs +++ b/fluent-bundle/examples/functions.rs @@ -1,5 +1,5 @@ use fluent_bundle::{FluentBundle, FluentResource, FluentValue}; -use unic_langid::langid; +use icu_locid::langid; fn main() { // We define the resources here so that they outlive diff --git a/fluent-bundle/examples/simple-app.rs b/fluent-bundle/examples/simple-app.rs index 8844832c..201195c5 100644 --- a/fluent-bundle/examples/simple-app.rs +++ b/fluent-bundle/examples/simple-app.rs @@ -19,6 +19,7 @@ //! default one. use fluent_bundle::{FluentArgs, FluentBundle, FluentResource, FluentValue}; use fluent_langneg::{negotiate_languages, NegotiationStrategy}; +use icu_locid::{langid, LanguageIdentifier}; use std::env; use std::fs; use std::fs::File; @@ -26,7 +27,6 @@ use std::io; use std::io::prelude::*; use std::path::Path; use std::str::FromStr; -use unic_langid::{langid, LanguageIdentifier}; /// We need a generic file read helper function to /// read the localization resource file. diff --git a/fluent-bundle/src/bundle.rs b/fluent-bundle/src/bundle.rs index 41a00e24..65f15603 100644 --- a/fluent-bundle/src/bundle.rs +++ b/fluent-bundle/src/bundle.rs @@ -12,8 +12,8 @@ use std::default::Default; use std::fmt; use fluent_syntax::ast; +use icu_locid::LanguageIdentifier; use intl_memoizer::IntlLangMemoizer; -use unic_langid::LanguageIdentifier; use crate::args::FluentArgs; use crate::entry::Entry; @@ -32,7 +32,7 @@ use crate::types::FluentValue; /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource, FluentValue, FluentArgs}; -/// use unic_langid::langid; +/// use icu_locid::langid; /// /// // 1. Create a FluentResource /// @@ -163,7 +163,7 @@ impl FluentBundle { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from(" /// hello = Hi! @@ -253,7 +253,7 @@ impl FluentBundle { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from(" /// hello = Hi! @@ -359,7 +359,7 @@ impl FluentBundle { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from("hello = Hi!"); /// let resource = FluentResource::try_new(ftl_string) @@ -384,7 +384,7 @@ impl FluentBundle { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from("hello-world = Hello World!"); /// let resource = FluentResource::try_new(ftl_string) @@ -412,7 +412,7 @@ impl FluentBundle { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from("hello-world = Hello World!"); /// let resource = FluentResource::try_new(ftl_string) @@ -459,7 +459,7 @@ impl FluentBundle { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from("hello-world = Hello World!"); /// let resource = FluentResource::try_new(ftl_string) @@ -508,7 +508,7 @@ impl FluentBundle { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource, FluentValue}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from("length = { STRLEN(\"12345\") }"); /// let resource = FluentResource::try_new(ftl_string) @@ -622,7 +622,7 @@ impl FluentBundle { /// ``` /// use fluent_bundle::FluentBundle; /// use fluent_bundle::FluentResource; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let langid_en = langid!("en-US"); /// let mut bundle: FluentBundle = FluentBundle::new(vec![langid_en]); @@ -653,6 +653,7 @@ impl crate::memoizer::MemoizerKind for IntlLangMemoizer { Self::new(lang) } + #[cfg(feature = "sync")] fn with_try_get_threadsafe(&self, args: I::Args, cb: U) -> Result where Self: Sized, @@ -663,6 +664,17 @@ impl crate::memoizer::MemoizerKind for IntlLangMemoizer { self.with_try_get(args, cb) } + #[cfg(not(feature = "sync"))] + fn with_try_get(&self, args: I::Args, cb: U) -> Result + where + Self: Sized, + I: intl_memoizer::Memoizable + 'static, + I::Args: 'static, + U: FnOnce(&I) -> R, + { + self.with_try_get(args, cb) + } + fn stringify_value( &self, value: &dyn crate::types::FluentType, diff --git a/fluent-bundle/src/concurrent.rs b/fluent-bundle/src/concurrent.rs index de55f0a3..64df98c3 100644 --- a/fluent-bundle/src/concurrent.rs +++ b/fluent-bundle/src/concurrent.rs @@ -1,6 +1,10 @@ -use intl_memoizer::{concurrent::IntlLangMemoizer, Memoizable}; +use icu_locid::LanguageIdentifier; +#[cfg(feature = "sync")] +use intl_memoizer::concurrent::IntlLangMemoizer; +#[cfg(not(feature = "sync"))] +use intl_memoizer::IntlLangMemoizer; +use intl_memoizer::Memoizable; use rustc_hash::FxHashMap; -use unic_langid::LanguageIdentifier; use crate::memoizer::MemoizerKind; use crate::types::FluentType; @@ -23,7 +27,7 @@ impl FluentBundle { /// ``` /// use fluent_bundle::concurrent::FluentBundle; /// use fluent_bundle::FluentResource; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let langid_en = langid!("en-US"); /// let mut bundle: FluentBundle = @@ -51,6 +55,7 @@ impl MemoizerKind for IntlLangMemoizer { Self::new(lang) } + #[cfg(feature = "sync")] fn with_try_get_threadsafe(&self, args: I::Args, cb: U) -> Result where Self: Sized, @@ -58,7 +63,18 @@ impl MemoizerKind for IntlLangMemoizer { I::Args: Send + Sync + 'static, U: FnOnce(&I) -> R, { - self.with_try_get(args, cb) + Self::with_try_get(self, args, cb) + } + + #[cfg(not(feature = "sync"))] + fn with_try_get(&self, args: I::Args, cb: U) -> Result + where + Self: Sized, + I: Memoizable + 'static, + I::Args: 'static, + U: FnOnce(&I) -> R, + { + Self::with_try_get(self, args, cb) } fn stringify_value(&self, value: &dyn FluentType) -> std::borrow::Cow<'static, str> { diff --git a/fluent-bundle/src/errors.rs b/fluent-bundle/src/errors.rs index 58b1754b..4d2e4601 100644 --- a/fluent-bundle/src/errors.rs +++ b/fluent-bundle/src/errors.rs @@ -33,7 +33,7 @@ pub enum FluentError { /// /// ``` /// use fluent_bundle::{FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// /// let ftl_string = String::from("intro = Welcome, { $name }."); /// let res1 = FluentResource::try_new(ftl_string) diff --git a/fluent-bundle/src/lib.rs b/fluent-bundle/src/lib.rs index 93d7ea53..a0db15d5 100644 --- a/fluent-bundle/src/lib.rs +++ b/fluent-bundle/src/lib.rs @@ -18,7 +18,7 @@ //! ``` //! use fluent_bundle::{FluentBundle, FluentValue, FluentResource, FluentArgs}; //! // Used to provide a locale for the bundle. -//! use unic_langid::langid; +//! use icu_locid::langid; //! //! // 1. Crate a FluentResource //! @@ -101,6 +101,7 @@ mod args; pub mod builtins; pub mod bundle; +#[cfg(feature = "sync")] pub mod concurrent; mod entry; mod errors; diff --git a/fluent-bundle/src/memoizer.rs b/fluent-bundle/src/memoizer.rs index 1f03e308..560ba02f 100644 --- a/fluent-bundle/src/memoizer.rs +++ b/fluent-bundle/src/memoizer.rs @@ -1,6 +1,6 @@ use crate::types::FluentType; +use icu_locid::LanguageIdentifier; use intl_memoizer::Memoizable; -use unic_langid::LanguageIdentifier; /// This trait contains thread-safe methods which extend [`intl_memoizer::IntlLangMemoizer`]. /// It is used as the generic bound in this crate when a memoizer is needed. @@ -18,6 +18,7 @@ pub trait MemoizerKind: 'static { /// /// `U` - The callback that accepts the instance of the intl formatter, and generates /// some kind of results `R`. + #[cfg(feature = "sync")] fn with_try_get_threadsafe(&self, args: I::Args, callback: U) -> Result where Self: Sized, @@ -25,6 +26,14 @@ pub trait MemoizerKind: 'static { I::Args: Send + Sync + 'static, U: FnOnce(&I) -> R; + #[cfg(not(feature = "sync"))] + fn with_try_get(&self, args: I::Args, callback: U) -> Result + where + Self: Sized, + I: Memoizable + 'static, + I::Args: 'static, + U: FnOnce(&I) -> R; + /// Wires up the `as_string` or `as_string_threadsafe` variants for [`FluentType`]. fn stringify_value(&self, value: &dyn FluentType) -> std::borrow::Cow<'static, str>; } diff --git a/fluent-bundle/src/types/mod.rs b/fluent-bundle/src/types/mod.rs index 585e90b6..2aab29b2 100644 --- a/fluent-bundle/src/types/mod.rs +++ b/fluent-bundle/src/types/mod.rs @@ -22,7 +22,7 @@ use std::borrow::{Borrow, Cow}; use std::fmt; use std::str::FromStr; -use intl_pluralrules::{PluralCategory, PluralRuleType}; +use icu_plurals::{PluralCategory, PluralRuleType}; use crate::memoizer::MemoizerKind; use crate::resolver::Scope; @@ -41,6 +41,7 @@ pub trait FluentType: fmt::Debug + AnyEq + 'static { /// Convert the custom type into a string value, for instance a custom `DateTime` /// type could return "Oct. 27, 2022". This operation is provided the threadsafe /// [`IntlLangMemoizer`](intl_memoizer::concurrent::IntlLangMemoizer). + #[cfg(feature = "sync")] fn as_string_threadsafe( &self, intls: &intl_memoizer::concurrent::IntlLangMemoizer, @@ -157,10 +158,10 @@ impl<'source> FluentValue<'source> { /// ``` /// use fluent_bundle::resolver::Scope; /// use fluent_bundle::{types::FluentValue, FluentBundle, FluentResource}; - /// use unic_langid::langid; + /// use icu_locid::langid; /// - /// let langid_ars = langid!("en"); - /// let bundle: FluentBundle = FluentBundle::new(vec![langid_ars]); + /// let langid_en = langid!("en"); + /// let bundle: FluentBundle = FluentBundle::new(vec![langid_en]); /// let scope = Scope::new(&bundle, None, None); /// /// // Matching examples: @@ -189,12 +190,12 @@ impl<'source> FluentValue<'source> { (FluentValue::Number(a), FluentValue::Number(b)) => a == b, (FluentValue::String(a), FluentValue::Number(b)) => { let cat = match a.as_ref() { - "zero" => PluralCategory::ZERO, - "one" => PluralCategory::ONE, - "two" => PluralCategory::TWO, - "few" => PluralCategory::FEW, - "many" => PluralCategory::MANY, - "other" => PluralCategory::OTHER, + "zero" => PluralCategory::Zero, + "one" => PluralCategory::One, + "two" => PluralCategory::Two, + "few" => PluralCategory::Few, + "many" => PluralCategory::Many, + "other" => PluralCategory::Other, _ => return false, }; // This string matches a plural rule keyword. Check if the number @@ -203,13 +204,25 @@ impl<'source> FluentValue<'source> { FluentNumberType::Cardinal => PluralRuleType::CARDINAL, FluentNumberType::Ordinal => PluralRuleType::ORDINAL, }; - scope + #[cfg(feature = "sync")] + let result = scope .bundle .intls - .with_try_get_threadsafe::((r#type,), |pr| { - pr.0.select(b) == Ok(cat) + .with_try_get_threadsafe::( + (PluralRuleType::Cardinal,), + |pr| pr.0.category_for(b) == cat, + ) + .unwrap(); + + #[cfg(not(feature = "sync"))] + let result = scope + .bundle + .intls + .with_try_get::((PluralRuleType::Cardinal,), |pr| { + pr.0.category_for(b) == cat }) - .unwrap() + .unwrap(); + result } _ => false, } diff --git a/fluent-bundle/src/types/number.rs b/fluent-bundle/src/types/number.rs index b9c3b2de..3d32db15 100644 --- a/fluent-bundle/src/types/number.rs +++ b/fluent-bundle/src/types/number.rs @@ -1,9 +1,8 @@ use std::borrow::Cow; -use std::convert::TryInto; use std::default::Default; use std::str::FromStr; -use intl_pluralrules::operands::PluralOperands; +use icu_plurals::PluralOperands; use crate::args::FluentArgs; use crate::types::FluentValue; @@ -231,18 +230,12 @@ macro_rules! from_num { impl From<&FluentNumber> for PluralOperands { fn from(input: &FluentNumber) -> Self { - let mut operands: Self = input - .value - .try_into() - .expect("Failed to generate operands out of FluentNumber"); + use fixed_decimal::{FixedDecimal, FloatPrecision}; + let mut fd = FixedDecimal::try_from_f64(input.value, FloatPrecision::Floating).unwrap(); if let Some(mfd) = input.options.minimum_fraction_digits { - if mfd > operands.v { - operands.f *= 10_u64.pow(mfd as u32 - operands.v as u32); - operands.v = mfd; - } + fd.pad_end(-(mfd as i16)); } - // XXX: Add support for other options. - operands + (&fd).into() } } diff --git a/fluent-bundle/src/types/plural.rs b/fluent-bundle/src/types/plural.rs index 1151fd6d..80fe7cb4 100644 --- a/fluent-bundle/src/types/plural.rs +++ b/fluent-bundle/src/types/plural.rs @@ -1,7 +1,6 @@ -use fluent_langneg::{negotiate_languages, NegotiationStrategy}; +use icu_locid::LanguageIdentifier; +use icu_plurals::{PluralRuleType, PluralRules as IntlPluralRules}; use intl_memoizer::Memoizable; -use intl_pluralrules::{PluralRuleType, PluralRules as IntlPluralRules}; -use unic_langid::LanguageIdentifier; pub struct PluralRules(pub IntlPluralRules); @@ -9,14 +8,11 @@ impl Memoizable for PluralRules { type Args = (PluralRuleType,); type Error = &'static str; fn construct(lang: LanguageIdentifier, args: Self::Args) -> Result { - let default_lang: LanguageIdentifier = "en".parse().unwrap(); - let pr_lang = negotiate_languages( - &[lang], - &IntlPluralRules::get_locales(args.0), - Some(&default_lang), - NegotiationStrategy::Lookup, - )[0] - .clone(); - Ok(Self(IntlPluralRules::create(pr_lang, args.0)?)) + let inner = match args.0 { + PluralRuleType::Cardinal => IntlPluralRules::try_new_cardinal(&lang.into()), + PluralRuleType::Ordinal => IntlPluralRules::try_new_ordinal(&lang.into()), + _ => todo!(), + }; + Ok(Self(inner.unwrap())) } } diff --git a/fluent-bundle/tests/bundle.rs b/fluent-bundle/tests/bundle.rs index 7d3e6206..7311f5cd 100644 --- a/fluent-bundle/tests/bundle.rs +++ b/fluent-bundle/tests/bundle.rs @@ -1,6 +1,6 @@ use fluent_bundle::{FluentArgs, FluentBundle, FluentResource}; +use icu_locid::langid; use std::borrow::Cow; -use unic_langid::langid; #[test] fn add_resource_override() { diff --git a/fluent-bundle/tests/custom_types.rs b/fluent-bundle/tests/custom_types.rs index 082f864a..1a153b35 100644 --- a/fluent-bundle/tests/custom_types.rs +++ b/fluent-bundle/tests/custom_types.rs @@ -4,7 +4,7 @@ use fluent_bundle::FluentArgs; use fluent_bundle::FluentBundle; use fluent_bundle::FluentResource; use fluent_bundle::FluentValue; -use unic_langid::langid; +use icu_locid::langid; #[test] fn fluent_custom_type() { @@ -26,6 +26,8 @@ fn fluent_custom_type() { fn as_string(&self, _: &intl_memoizer::IntlLangMemoizer) -> std::borrow::Cow<'static, str> { format!("{}", self.epoch).into() } + + #[cfg(feature = "sync")] fn as_string_threadsafe( &self, _: &intl_memoizer::concurrent::IntlLangMemoizer, @@ -118,6 +120,8 @@ fn fluent_date_time_builtin() { fn as_string(&self, _: &intl_memoizer::IntlLangMemoizer) -> std::borrow::Cow<'static, str> { format!("2020-01-20 {}:00", self.epoch).into() } + + #[cfg(feature = "sync")] fn as_string_threadsafe( &self, _intls: &intl_memoizer::concurrent::IntlLangMemoizer, diff --git a/fluent-bundle/tests/function.rs b/fluent-bundle/tests/function.rs index 1d403e2f..493ce269 100644 --- a/fluent-bundle/tests/function.rs +++ b/fluent-bundle/tests/function.rs @@ -1,5 +1,6 @@ use fluent_bundle::types::FluentNumber; use fluent_bundle::{FluentArgs, FluentBundle, FluentResource, FluentValue}; +use icu_locid::langid; #[test] fn test_function_resolve() { @@ -21,7 +22,7 @@ liked-count2 = { NUMBER($num) -> ); let res = FluentResource::try_new(ftl_string).expect("Could not parse an FTL string."); - let mut bundle = FluentBundle::default(); + let mut bundle = FluentBundle::new(vec![langid!("en")]); bundle .add_function("NUMBER", |positional, named| match positional.first() { diff --git a/fluent-bundle/tests/resolver_fixtures.rs b/fluent-bundle/tests/resolver_fixtures.rs index e242a390..20ed8532 100644 --- a/fluent-bundle/tests/resolver_fixtures.rs +++ b/fluent-bundle/tests/resolver_fixtures.rs @@ -10,9 +10,9 @@ use fluent_bundle::resolver::ResolverError; use fluent_bundle::FluentArgs; use fluent_bundle::FluentError; use fluent_bundle::{FluentBundle, FluentResource, FluentValue}; +use icu_locid::LanguageIdentifier; use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; -use unic_langid::LanguageIdentifier; use helpers::*; diff --git a/fluent-bundle/tests/types_test.rs b/fluent-bundle/tests/types_test.rs index 08d4d9be..b1e9011a 100644 --- a/fluent-bundle/tests/types_test.rs +++ b/fluent-bundle/tests/types_test.rs @@ -6,8 +6,8 @@ use fluent_bundle::FluentArgs; use fluent_bundle::FluentBundle; use fluent_bundle::FluentResource; use fluent_bundle::FluentValue; -use intl_pluralrules::operands::PluralOperands; -use unic_langid::langid; +use icu_locid::langid; +use icu_plurals::PluralOperands; #[test] fn fluent_value_try_number() { @@ -17,10 +17,10 @@ fn fluent_value_try_number() { #[test] fn fluent_value_matches() { - // We'll use `ars` locale since it happens to have all + // We'll use `ar` locale since it happens to have all // plural rules categories. - let langid_ars = langid!("ars"); - let bundle: FluentBundle = FluentBundle::new(vec![langid_ars]); + let langid_ar = langid!("ar"); + let bundle: FluentBundle = FluentBundle::new(vec![langid_ar]); let scope = Scope::new(&bundle, None, None); let string_val = FluentValue::from("string1"); @@ -139,18 +139,60 @@ fn fluent_number_style() { #[test] fn fluent_number_to_operands() { + use icu_plurals::rules::RawPluralOperands; + let num = FluentNumber::new(2.81, FluentNumberOptions::default()); let operands: PluralOperands = (&num).into(); assert_eq!( operands, - PluralOperands { - n: 2.81, + RawPluralOperands { i: 2, v: 2, w: 2, f: 81, t: 81, + c: 0, + } + .into() + ); +} + +#[test] +fn fluent_number_to_float_vs_int() { + // This test verifies that we coalesce f64 `1.0` to usize `1`. + // See `From for PluralOperands` for more details. + use icu_plurals::rules::RawPluralOperands; + + let num: FluentNumber = 1.0.into(); + let operands: PluralOperands = (&num).into(); + + assert_eq!( + operands, + RawPluralOperands { + i: 1, + v: 0, + w: 0, + f: 0, + t: 0, + c: 0, + } + .into() + ); + + let num: FluentNumber = 1.into(); + let operands: PluralOperands = (&num).into(); + + assert_eq!( + operands, + RawPluralOperands { + i: 1, + v: 0, + w: 0, + f: 0, + t: 0, + c: 0, } + .into() ); } diff --git a/fluent-fallback/Cargo.toml b/fluent-fallback/Cargo.toml index d017d3b0..0d974fd9 100644 --- a/fluent-fallback/Cargo.toml +++ b/fluent-fallback/Cargo.toml @@ -19,7 +19,7 @@ readme = "README.md" fluent-bundle.workspace = true futures.workspace = true rustc-hash.workspace = true -unic-langid.workspace = true +icu_locid.workspace = true async-trait = "0.1" chunky-vec = "0.1" once_cell = "1.19" @@ -27,6 +27,5 @@ pin-cell = "0.2" [dev-dependencies] fluent-langneg.workspace = true -unic-langid = { workspace = true, features = ["macros"] } tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } fluent-resmgr = { path = "../fluent-resmgr" } diff --git a/fluent-fallback/examples/simple-fallback.rs b/fluent-fallback/examples/simple-fallback.rs index 33fc4e82..a11d4877 100644 --- a/fluent-fallback/examples/simple-fallback.rs +++ b/fluent-fallback/examples/simple-fallback.rs @@ -28,8 +28,8 @@ use fluent_fallback::{ }; use fluent_langneg::{negotiate_languages, NegotiationStrategy}; +use icu_locid::{langid, LanguageIdentifier}; use rustc_hash::FxHashSet; -use unic_langid::{langid, LanguageIdentifier}; /// This helper struct holds the scheme for converting /// resource paths into full paths. It is used to customise diff --git a/fluent-fallback/src/env.rs b/fluent-fallback/src/env.rs index cf340fcf..4d69ffa1 100644 --- a/fluent-fallback/src/env.rs +++ b/fluent-fallback/src/env.rs @@ -13,7 +13,7 @@ //! are available. The list should also be sorted according to the user //! preference, as the order is significant for how [`Localization`](crate::Localization) performs //! fallbacking. -use unic_langid::LanguageIdentifier; +use icu_locid::LanguageIdentifier; /// A trait used to provide a selection of locales to be used by the /// [`Localization`](crate::Localization) instance for runtime @@ -23,7 +23,7 @@ use unic_langid::LanguageIdentifier; /// ``` /// use fluent_fallback::{Localization, env::LocalesProvider}; /// use fluent_resmgr::ResourceManager; -/// use unic_langid::LanguageIdentifier; +/// use icu_locid::LanguageIdentifier; /// use std::{ /// rc::Rc, /// cell::RefCell diff --git a/fluent-fallback/src/errors.rs b/fluent-fallback/src/errors.rs index 704bc84f..35553f3c 100644 --- a/fluent-fallback/src/errors.rs +++ b/fluent-fallback/src/errors.rs @@ -1,6 +1,6 @@ use fluent_bundle::FluentError; +use icu_locid::LanguageIdentifier; use std::error::Error; -use unic_langid::LanguageIdentifier; #[derive(Debug, PartialEq, Eq)] pub enum LocalizationError { diff --git a/fluent-fallback/src/generator.rs b/fluent-fallback/src/generator.rs index f13af63c..4a02fd01 100644 --- a/fluent-fallback/src/generator.rs +++ b/fluent-fallback/src/generator.rs @@ -1,8 +1,8 @@ use fluent_bundle::{FluentBundle, FluentError, FluentResource}; use futures::Stream; +use icu_locid::LanguageIdentifier; use rustc_hash::FxHashSet; use std::borrow::Borrow; -use unic_langid::LanguageIdentifier; use crate::types::ResourceId; diff --git a/fluent-fallback/src/lib.rs b/fluent-fallback/src/lib.rs index dee5906f..d14b0b1d 100644 --- a/fluent-fallback/src/lib.rs +++ b/fluent-fallback/src/lib.rs @@ -26,7 +26,7 @@ //! ``` //! use fluent_fallback::{Localization, types::{ResourceType, ToResourceId}}; //! use fluent_resmgr::ResourceManager; -//! use unic_langid::langid; +//! use icu_locid::langid; //! //! let res_mgr = ResourceManager::new("./tests/resources/{locale}/".to_string()); //! @@ -96,7 +96,7 @@ //! As a long lived structure, the [`Localization`] is intended to handle runtime locale //! management. //! -//! In the example above, [`Vec`](unic_langid::LanguageIdentifier) +//! In the example above, [`Vec`](icu_locid::LanguageIdentifier) //! provides a static list of locales that the [`Localization`] handles, but that's just the //! simplest implementation of the [`env::LocalesProvider`], and one can implement //! a much more sophisticated one that reacts to user or environment driven changes, and diff --git a/fluent-fallback/tests/localization_test.rs b/fluent-fallback/tests/localization_test.rs index ebe57314..5df89de1 100644 --- a/fluent-fallback/tests/localization_test.rs +++ b/fluent-fallback/tests/localization_test.rs @@ -11,10 +11,10 @@ use fluent_fallback::{ types::{L10nKey, ResourceId}, Localization, LocalizationError, }; +use icu_locid::{langid, LanguageIdentifier}; use rustc_hash::FxHashSet; use std::cell::RefCell; use std::rc::Rc; -use unic_langid::{langid, LanguageIdentifier}; struct InnerLocales { locales: RefCell>, diff --git a/fluent-pseudo/README.md b/fluent-pseudo/README.md index 2b97ce12..f15eb5cc 100644 --- a/fluent-pseudo/README.md +++ b/fluent-pseudo/README.md @@ -13,7 +13,7 @@ Usage ```rust use fluent_bundle::{FluentBundle, FluentResource}; -use unic_langid::langid; +use icu_locid::langid; use fluent_pseudo::transform; fn transform_wrapper(s: &str) -> Cow { diff --git a/fluent-resmgr/Cargo.toml b/fluent-resmgr/Cargo.toml index 7029083c..760ccd36 100644 --- a/fluent-resmgr/Cargo.toml +++ b/fluent-resmgr/Cargo.toml @@ -20,9 +20,8 @@ fluent-fallback.workspace = true futures.workspace = true rustc-hash.workspace = true thiserror.workspace = true -unic-langid.workspace = true +icu_locid.workspace = true elsa = "1.10" [dev-dependencies] fluent-langneg.workspace = true -unic-langid = { workspace = true, features = ["macros"] } diff --git a/fluent-resmgr/examples/simple-resmgr.rs b/fluent-resmgr/examples/simple-resmgr.rs index 81b1fdd1..d184d97f 100644 --- a/fluent-resmgr/examples/simple-resmgr.rs +++ b/fluent-resmgr/examples/simple-resmgr.rs @@ -20,12 +20,12 @@ use fluent_bundle::{FluentArgs, FluentValue}; use fluent_langneg::{negotiate_languages, NegotiationStrategy}; use fluent_resmgr::resource_manager::ResourceManager; +use icu_locid::LanguageIdentifier; use std::env; use std::fs; use std::io; use std::path::PathBuf; use std::str::FromStr; -use unic_langid::LanguageIdentifier; /// This helper function allows us to read the list /// of available locales by reading the list of diff --git a/fluent-resmgr/src/resource_manager.rs b/fluent-resmgr/src/resource_manager.rs index 9f2cfc57..39a8d493 100644 --- a/fluent-resmgr/src/resource_manager.rs +++ b/fluent-resmgr/src/resource_manager.rs @@ -5,11 +5,11 @@ use fluent_fallback::{ types::ResourceId, }; use futures::stream::Stream; +use icu_locid::LanguageIdentifier; use rustc_hash::FxHashSet; use std::io; use std::{fs, iter}; use thiserror::Error; -use unic_langid::LanguageIdentifier; fn read_file(path: &str) -> Result { fs::read_to_string(path) @@ -222,7 +222,7 @@ impl BundleGenerator for ResourceManager { #[cfg(test)] mod test { use super::*; - use unic_langid::langid; + use icu_locid::langid; #[test] fn caching() { diff --git a/fluent-resmgr/tests/localization_test.rs b/fluent-resmgr/tests/localization_test.rs index d1534f29..d413071d 100644 --- a/fluent-resmgr/tests/localization_test.rs +++ b/fluent-resmgr/tests/localization_test.rs @@ -1,7 +1,7 @@ use fluent_fallback::Localization; use fluent_resmgr::resource_manager::ResourceManager; +use icu_locid::langid; use std::borrow::Cow; -use unic_langid::langid; #[test] fn localization_format_value() { diff --git a/fluent-syntax/benches/parser.rs b/fluent-syntax/benches/parser.rs index 2397044d..71fe96e5 100644 --- a/fluent-syntax/benches/parser.rs +++ b/fluent-syntax/benches/parser.rs @@ -18,7 +18,7 @@ fn get_resources(tests: &[&'static str]) -> HashMap<&'static str, String> { let path = format!("./benches/{}", test); ftl_strings.insert(*test, read_file(&path).expect("Couldn't load file")); } - return ftl_strings; + ftl_strings } fn get_ctxs(tests: &[&'static str]) -> HashMap<&'static str, Vec> { diff --git a/fluent-syntax/tests/parser_fixtures.rs b/fluent-syntax/tests/parser_fixtures.rs index eb8b9d1f..a067d38e 100644 --- a/fluent-syntax/tests/parser_fixtures.rs +++ b/fluent-syntax/tests/parser_fixtures.rs @@ -27,7 +27,7 @@ fn parse_fixtures_compare() { let reference_path = path.replace(".ftl", ".json"); let reference_file = read_file(&reference_path, true).unwrap(); - let ftl_file = read_file(&path, false).unwrap(); + let ftl_file = read_file(path, false).unwrap(); println!("Parsing: {:#?}", path); let target_ast = match parse(ftl_file) { @@ -72,7 +72,7 @@ fn parse_bench_fixtures() { file_name.replace(".ftl", ".json") ); let reference_file = read_file(&reference_path, true).unwrap(); - let ftl_file = read_file(&path, false).unwrap(); + let ftl_file = read_file(path, false).unwrap(); println!("Parsing: {:#?}", path); let target_ast = match parse(ftl_file) { @@ -106,7 +106,7 @@ fn parse_bench_fixtures() { file_name.replace(".ftl", ".json") ); let reference_file = read_file(&reference_path, true).unwrap(); - let ftl_file = read_file(&path, false).unwrap(); + let ftl_file = read_file(path, false).unwrap(); println!("Parsing: {:#?}", path); let target_ast = match parse(ftl_file.clone()) { diff --git a/fluent/Cargo.toml b/fluent/Cargo.toml index c57985c0..47049327 100644 --- a/fluent/Cargo.toml +++ b/fluent/Cargo.toml @@ -26,4 +26,4 @@ include = [ [dependencies] fluent-bundle.workspace = true fluent-pseudo = { workspace = true, optional = true } -unic-langid.workspace = true +icu_locid.workspace = true diff --git a/fluent/README.md b/fluent/README.md index 875dd50d..1b15651b 100644 --- a/fluent/README.md +++ b/fluent/README.md @@ -23,7 +23,7 @@ Usage ```rust use fluent::{FluentBundle, FluentResource}; -use unic_langid::langid; +use icu_locid::langid; fn main() { let ftl_string = "hello-world = Hello, world!".to_owned(); diff --git a/fluent/src/lib.rs b/fluent/src/lib.rs index d91e9c52..cd7be073 100644 --- a/fluent/src/lib.rs +++ b/fluent/src/lib.rs @@ -20,7 +20,7 @@ //! use fluent::{FluentBundle, FluentValue, FluentResource, FluentArgs}; //! //! // Used to provide a locale for the bundle. -//! use unic_langid::LanguageIdentifier; +//! use icu_locid::LanguageIdentifier; //! //! let ftl_string = String::from(" //! hello-world = Hello, world! diff --git a/intl-memoizer/Cargo.toml b/intl-memoizer/Cargo.toml index b741604b..7aa05450 100644 --- a/intl-memoizer/Cargo.toml +++ b/intl-memoizer/Cargo.toml @@ -24,9 +24,28 @@ include = [ ] [dependencies] -unic-langid.workspace = true +icu_locid.workspace = true +icu_plurals.workspace = true +icu_provider.workspace = true type-map = "0.5" +hashbrown = "0.14" [dev-dependencies] -intl_pluralrules.workspace = true fluent-langneg.workspace = true +criterion.workspace = true +icu_datetime = {version = "1.4", features = ["serde"]} +icu_calendar = "1.4" +icu_decimal = "1.4" +icu_provider_blob = "1.4" +icu_collator = "1.4" +fixed_decimal = "0.5" +icu_list = { version = "1.4", features = ["serde"]} + +[features] +default = [] +sync = ["icu_provider/sync"] + +[[bench]] +name = "single" +harness = false + diff --git a/intl-memoizer/benches/single.rs b/intl-memoizer/benches/single.rs new file mode 100644 index 00000000..b85d7916 --- /dev/null +++ b/intl-memoizer/benches/single.rs @@ -0,0 +1,208 @@ +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; +use criterion::{Bencher, BenchmarkId}; +use icu_calendar::DateTime; +use icu_datetime::{ + options::length::{Date, Time}, + // DateTimeFormatterOptions, + DateFormatter, + // DateTimeFormatter, + TimeFormatter, +}; +// use icu_collator::{Collator, CollatorOptions}; +// use icu_decimal::{FixedDecimalFormatter, options::FixedDecimalFormatterOptions}; +// use fixed_decimal::FixedDecimal; +use icu_list::{ListFormatter, ListLength}; +use icu_locid::LanguageIdentifier; +use icu_plurals::{PluralRuleType, PluralRules}; +use intl_memoizer::{IntlLangMemoizer, Memoizable}; +use std::hint::black_box; + +use icu_provider_blob::BlobDataProvider; +const ICU4X_DATA: &[u8] = include_bytes!(concat!( + "/Users/zibi/projects/icu-perf/data/icu4x-1.4.postcard" +)); + +trait Testable { + type Input; + + fn execute(&self, input: Self::Input); +} + +macro_rules! define_testable_type { + ($name:ident, $type:ident, $args:tt, $constructor:ident, $method:ident, $input:ty) => { + define_testable_type!($name, $type, $args, $constructor); + + impl Testable for $name { + type Input = $input; + + fn execute(&self, input: Self::Input) { + let _ = self.0.$method(input); + } + } + }; + + ($name:ident, $type:ident, $args:tt, $constructor:ident, $method:ident, ref $input:ty) => { + define_testable_type!($name, $type, $args, $constructor); + + impl Testable for $name { + type Input = $input; + + fn execute(&self, input: Self::Input) { + let _ = self.0.$method(&input); + } + } + }; + + ($name:ident, $type:ident, $args:tt, $constructor:ident) => { + struct $name($type); + + impl Memoizable for $name { + type Args = $args; + type Provider = icu_provider_blob::BlobDataProvider; + type Error = (); + + fn construct( + lang: LanguageIdentifier, + args: Self::Args, + provider: Option<&Self::Provider>, + ) -> Result { + Ok(Self( + $type::$constructor(provider.unwrap(), &lang.into(), args.0).unwrap(), + )) + } + } + }; +} + +define_testable_type!(TF, TimeFormatter, (Time, ), try_new_with_length_with_buffer_provider, format_to_string, ref DateTime); +define_testable_type!(DF, DateFormatter, (Date, ), try_new_with_length_with_buffer_provider, format_to_string, ref DateTime); +// define_testable_type!(DTF, DateTimeFormatter, (DateTimeFormatterOptions, ), try_new_with_length_with_buffer_provider, format_to_string, ref DateTime); +define_testable_type!( + PR, + PluralRules, + (PluralRuleType,), + try_new_with_buffer_provider, + category_for, + usize +); +// define_testable_type!( +// C, +// Collator, +// (CollatorOptions,), +// try_new_with_buffer_provider, +// compare, +// &str, +// &str, +// ); +// define_testable_type!( +// D, +// FixedDecimalFormatter, +// (FixedDecimalFormatterOptions,), +// try_new_with_buffer_provider, +// format_to_string, +// ref FixedDecimal +// ); +define_testable_type!( + LF, + ListFormatter, + (ListLength,), + try_new_and_with_length_with_buffer_provider, + format_to_string, + std::vec::IntoIter +); + +macro_rules! without_memoizer_hoisted { + ($type:ident, $b:ident, $lang:ident, $provider:ident, $args:expr, $count:expr, $input:expr ) => { + $b.iter(|| { + let intl = $type::construct($lang.clone(), black_box($args), Some($provider)).unwrap(); + for _ in 0..$count { + let _ = intl.execute($input); + } + }) + }; +} + +macro_rules! without_memoizer { + ($type:ident, $b:ident, $lang:ident, $provider:ident, $args:expr, $count:expr, $input:expr ) => { + $b.iter(|| { + for _ in 0..$count { + let intl = + $type::construct($lang.clone(), black_box($args), Some($provider)).unwrap(); + let _ = intl.execute($input); + } + }) + }; +} + +macro_rules! with_memoizer { + ($type:ident, $b:ident, $lang:ident, $provider:ident, $args:expr, $count:expr, $input:expr ) => { + $b.iter(|| { + let memoizer = + IntlLangMemoizer::new(black_box($lang.clone()), Some(black_box($provider))); + for _ in 0..$count { + let _ = + memoizer.with_try_get(black_box(&$args), |intl: &$type| intl.execute($input)); + } + }) + }; +} + +fn bench_variants(c: &mut Criterion) { + let lang: LanguageIdentifier = "und".parse().unwrap(); + + let provider = + BlobDataProvider::try_new_from_static_blob(ICU4X_DATA).expect("Failed to load data"); + + let tf_input = DateTime::try_new_gregorian_datetime(2020, 9, 1, 12, 34, 28).unwrap(); + let tf_args = (Time::Short,); + + let pr_input = 5; + let pr_args = (PluralRuleType::Cardinal,); + + for component in ["time", "plurals"] { + let mut group = c.benchmark_group(component); + let counts: &[usize] = &[0, 1, 10, 100, 1000, 10000]; + + for count in counts { + group.bench_with_input( + BenchmarkId::new("without_memoizer_hoisted", count), + &(count, &provider), + |b: &mut Bencher, &(count, provider)| match component { + "time" => { + without_memoizer_hoisted!(TF, b, lang, provider, tf_args, *count, tf_input) + } + "plurals" => { + without_memoizer_hoisted!(PR, b, lang, provider, pr_args, *count, pr_input) + } + _ => unreachable!(), + }, + ); + group.bench_with_input( + BenchmarkId::new("without_memoizer", count), + &(count, &provider), + |b: &mut Bencher, &(count, provider)| match component { + "time" => without_memoizer!(TF, b, lang, provider, tf_args, *count, tf_input), + "plurals" => { + without_memoizer!(PR, b, lang, provider, pr_args, *count, pr_input) + } + _ => unreachable!(), + }, + ); + group.bench_with_input( + BenchmarkId::new("with_memoizer", count), + &(count, &provider), + |b: &mut Bencher, &(count, provider)| match component { + "time" => with_memoizer!(TF, b, lang, provider, tf_args, *count, tf_input), + "plurals" => with_memoizer!(PR, b, lang, provider, pr_args, *count, pr_input), + _ => unreachable!(), + }, + ); + } + group.finish(); + } +} + +criterion_group!(benches, bench_variants,); +criterion_main!(benches); diff --git a/intl-memoizer/examples/numberformat.rs b/intl-memoizer/examples/numberformat.rs index 793c890c..01aa1519 100644 --- a/intl-memoizer/examples/numberformat.rs +++ b/intl-memoizer/examples/numberformat.rs @@ -1,5 +1,5 @@ +use icu_locid::LanguageIdentifier; use intl_memoizer::{IntlMemoizer, Memoizable}; -use unic_langid::LanguageIdentifier; #[derive(Clone, Hash, PartialEq, Eq)] struct NumberFormatOptions { diff --git a/intl-memoizer/examples/pluralrules.rs b/intl-memoizer/examples/pluralrules.rs index a37f8d1a..b07d3b0d 100644 --- a/intl-memoizer/examples/pluralrules.rs +++ b/intl-memoizer/examples/pluralrules.rs @@ -1,12 +1,18 @@ +use icu_locid::LanguageIdentifier; +use icu_plurals::{PluralCategory, PluralRuleType, PluralRules as IntlPluralRules}; use intl_memoizer::{IntlMemoizer, Memoizable}; -use intl_pluralrules::{PluralCategory, PluralRuleType, PluralRules as IntlPluralRules}; -use unic_langid::LanguageIdentifier; struct PluralRules(pub IntlPluralRules); impl PluralRules { pub fn new(lang: LanguageIdentifier, pr_type: PluralRuleType) -> Result { - Ok(Self(IntlPluralRules::create(lang, pr_type)?)) + let locale = lang.into(); + let inner = match pr_type { + PluralRuleType::Cardinal => IntlPluralRules::try_new_cardinal(&locale), + PluralRuleType::Ordinal => IntlPluralRules::try_new_ordinal(&locale), + _ => todo!(), + }; + Ok(Self(inner.unwrap())) } } @@ -24,8 +30,8 @@ fn main() { let lang: LanguageIdentifier = "en".parse().unwrap(); let lang_memoizer = memoizer.get_for_lang(lang); let result = lang_memoizer - .with_try_get::((PluralRuleType::CARDINAL,), |pr| pr.0.select(5)) + .with_try_get::((PluralRuleType::Cardinal,), |pr| pr.0.category_for(5)) .unwrap(); - assert_eq!(result, Ok(PluralCategory::OTHER)); + assert_eq!(result, PluralCategory::Other); } diff --git a/intl-memoizer/src/lang_memoizer.rs b/intl-memoizer/src/lang_memoizer.rs new file mode 100644 index 00000000..80cc06e5 --- /dev/null +++ b/intl-memoizer/src/lang_memoizer.rs @@ -0,0 +1,48 @@ +// use std::collections::HashMap; +use crate::memoizable::Memoizable; +use hashbrown::HashMap; +use icu_locid::LanguageIdentifier; +use std::cell::RefCell; + +pub struct IntlLangMemoizer<'dp, DP = ()> { + lang: LanguageIdentifier, + provider: Option<&'dp DP>, + map: RefCell, +} + +impl<'dp, DP> IntlLangMemoizer<'dp, DP> { + pub fn new(lang: LanguageIdentifier, provider: Option<&'dp DP>) -> Self { + Self { + lang, + provider, + map: Default::default(), + } + } + + pub fn with_try_get( + &self, + construct_args: &I::Args, + callback: U, + ) -> Result + where + Self: Sized, + I: Memoizable + 'static, + U: FnOnce(&I) -> R, + { + let mut map = self.map.borrow_mut(); + + let cache = map.entry().or_insert_with(HashMap::::new); + + let (_, e) = cache + .raw_entry_mut() + .from_key(construct_args) + .or_insert_with(|| { + ( + construct_args.clone(), + I::construct(self.lang.clone(), construct_args.clone(), self.provider) + .expect("FOO"), + ) + }); + Ok(callback(e)) + } +} diff --git a/intl-memoizer/src/lib.rs b/intl-memoizer/src/lib.rs index d9986571..2468ac18 100644 --- a/intl-memoizer/src/lib.rs +++ b/intl-memoizer/src/lib.rs @@ -1,435 +1,7 @@ -//! This crate contains a memoizer for internationalization formatters. Often it is -//! expensive (in terms of performance and memory) to construct a formatter, but then -//! relatively cheap to run the format operation. -//! -//! The [`IntlMemoizer`] is the main struct that creates a per-locale [`IntlLangMemoizer`]. +mod lang_memoizer; +mod memoizable; +mod memoizer; -use std::cell::RefCell; -use std::collections::hash_map::Entry; -use std::collections::HashMap; -use std::hash::Hash; -use std::rc::{Rc, Weak}; -use unic_langid::LanguageIdentifier; - -pub mod concurrent; - -/// The trait that needs to be implemented for each intl formatter that needs to be -/// memoized. -pub trait Memoizable { - /// Type of the arguments that are used to construct the formatter. - type Args: 'static + Eq + Hash + Clone; - - /// Type of any errors that can occur during the construction process. - type Error; - - /// Construct a formatter. This maps the [`Self::Args`] type to the actual constructor - /// for an intl formatter. - fn construct(lang: LanguageIdentifier, args: Self::Args) -> Result - where - Self: std::marker::Sized; -} - -/// The [`IntlLangMemoizer`] can memoize multiple constructed internationalization -/// formatters, and their configuration for a single locale. For instance, given "en-US", -/// a memorizer could retain 3 `DateTimeFormat` instances, and a `PluralRules`. -/// -/// For memoizing with multiple locales, see [`IntlMemoizer`]. -/// -/// # Example -/// -/// The code example does the following steps: -/// -/// 1. Create a static counter -/// 2. Create an `ExampleFormatter` -/// 3. Implement [`Memoizable`] for `ExampleFormatter`. -/// 4. Use `IntlLangMemoizer::with_try_get` to run `ExampleFormatter::format` -/// 5. Demonstrate the memoization using the static counter -/// -/// ``` -/// use intl_memoizer::{IntlLangMemoizer, Memoizable}; -/// use unic_langid::LanguageIdentifier; -/// -/// // Create a static counter so that we can demonstrate the side effects of when -/// // the memoizer re-constructs an API. -/// -/// static mut INTL_EXAMPLE_CONSTRUCTS: u32 = 0; -/// fn increment_constructs() { -/// unsafe { -/// INTL_EXAMPLE_CONSTRUCTS += 1; -/// } -/// } -/// -/// fn get_constructs_count() -> u32 { -/// unsafe { INTL_EXAMPLE_CONSTRUCTS } -/// } -/// -/// /// Create an example formatter, that doesn't really do anything useful. In a real -/// /// implementation, this could be a PluralRules or DateTimeFormat struct. -/// struct ExampleFormatter { -/// lang: LanguageIdentifier, -/// /// This is here to show how to initiate the API with an argument. -/// prefix: String, -/// } -/// -/// impl ExampleFormatter { -/// /// Perform an example format by printing information about the formatter -/// /// configuration, and the arguments passed into the individual format operation. -/// fn format(&self, example_string: &str) -> String { -/// format!( -/// "{} lang({}) string({})", -/// self.prefix, self.lang, example_string -/// ) -/// } -/// } -/// -/// /// Multiple classes of structs may be add1ed to the memoizer, with the restriction -/// /// that they must implement the `Memoizable` trait. -/// impl Memoizable for ExampleFormatter { -/// /// The arguments will be passed into the constructor. Here a single `String` -/// /// will be used as a prefix to the formatting operation. -/// type Args = (String,); -/// -/// /// If the constructor is fallible, than errors can be described here. -/// type Error = (); -/// -/// /// This function wires together the `Args` and `Error` type to construct -/// /// the intl API. In our example, there is -/// fn construct(lang: LanguageIdentifier, args: Self::Args) -> Result { -/// // Keep track for example purposes that this was constructed. -/// increment_constructs(); -/// -/// Ok(Self { -/// lang, -/// prefix: args.0, -/// }) -/// } -/// } -/// -/// // The following demonstrates how these structs are actually used with the memoizer. -/// -/// // Construct a new memoizer. -/// let lang = "en-US".parse().expect("Failed to parse."); -/// let memoizer = IntlLangMemoizer::new(lang); -/// -/// // These arguments are passed into the constructor for `ExampleFormatter`. -/// let construct_args = (String::from("prefix:"),); -/// let message1 = "The format operation will run"; -/// let message2 = "ExampleFormatter will be re-used, when a second format is run"; -/// -/// // Run `IntlLangMemoizer::with_try_get`. The name of the method means "with" an -/// // intl formatter, "try and get" the result. See the method documentation for -/// // more details. -/// -/// let result1 = memoizer -/// .with_try_get::(construct_args.clone(), |intl_example| { -/// intl_example.format(message1) -/// }); -/// -/// // The memoized instance of `ExampleFormatter` will be re-used. -/// let result2 = memoizer -/// .with_try_get::(construct_args.clone(), |intl_example| { -/// intl_example.format(message2) -/// }); -/// -/// assert_eq!( -/// result1.unwrap(), -/// "prefix: lang(en-US) string(The format operation will run)" -/// ); -/// assert_eq!( -/// result2.unwrap(), -/// "prefix: lang(en-US) string(ExampleFormatter will be re-used, when a second format is run)" -/// ); -/// assert_eq!( -/// get_constructs_count(), -/// 1, -/// "The constructor was only run once." -/// ); -/// -/// let construct_args = (String::from("re-init:"),); -/// -/// // Since the constructor args changed, `ExampleFormatter` will be re-constructed. -/// let result1 = memoizer -/// .with_try_get::(construct_args.clone(), |intl_example| { -/// intl_example.format(message1) -/// }); -/// -/// // The memoized instance of `ExampleFormatter` will be re-used. -/// let result2 = memoizer -/// .with_try_get::(construct_args.clone(), |intl_example| { -/// intl_example.format(message2) -/// }); -/// -/// assert_eq!( -/// result1.unwrap(), -/// "re-init: lang(en-US) string(The format operation will run)" -/// ); -/// assert_eq!( -/// result2.unwrap(), -/// "re-init: lang(en-US) string(ExampleFormatter will be re-used, when a second format is run)" -/// ); -/// assert_eq!( -/// get_constructs_count(), -/// 2, -/// "The constructor was invalidated and ran again." -/// ); -/// ``` -#[derive(Debug)] -pub struct IntlLangMemoizer { - lang: LanguageIdentifier, - map: RefCell, -} - -impl IntlLangMemoizer { - /// Create a new [`IntlLangMemoizer`] that is unique to a specific - /// [`LanguageIdentifier`] - pub fn new(lang: LanguageIdentifier) -> Self { - Self { - lang, - map: RefCell::new(type_map::TypeMap::new()), - } - } - - /// `with_try_get` means `with` an internationalization formatter, `try` and `get` a result. - /// The (potentially expensive) constructor for the formatter (such as `PluralRules` or - /// `DateTimeFormat`) will be memoized and only constructed once for a given - /// `construct_args`. After that the format operation can be run multiple times - /// inexpensively. - /// - /// The first generic argument `I` must be provided, but the `R` and `U` will be - /// deduced by the typing of the `callback` argument that is provided. - /// - /// I - The memoizable intl object, for instance a `PluralRules` instance. This - /// must implement the Memoizable trait. - /// - /// R - The return result from the callback `U`. - /// - /// U - The callback function. Takes an instance of `I` as the first parameter and - /// returns the R value. - pub fn with_try_get(&self, construct_args: I::Args, callback: U) -> Result - where - Self: Sized, - I: Memoizable + 'static, - U: FnOnce(&I) -> R, - { - let mut map = self - .map - .try_borrow_mut() - .expect("Cannot use memoizer reentrantly"); - let cache = map - .entry::>() - .or_insert_with(HashMap::new); - - let e = match cache.entry(construct_args.clone()) { - Entry::Occupied(entry) => entry.into_mut(), - Entry::Vacant(entry) => { - let val = I::construct(self.lang.clone(), construct_args)?; - entry.insert(val) - } - }; - Ok(callback(e)) - } -} - -/// [`IntlMemoizer`] is designed to handle lazily-initialized references to -/// internationalization formatters. -/// -/// Constructing a new formatter is often expensive in terms of memory and performance, -/// and the instance is often read-only during its lifetime. The format operations in -/// comparison are relatively cheap. -/// -/// Because of this relationship, it can be helpful to memoize the constructors, and -/// re-use them across multiple format operations. This strategy is used where all -/// instances of intl APIs such as `PluralRules`, `DateTimeFormat` etc. are memoized -/// between all `FluentBundle` instances. -/// -/// # Example -/// -/// For a more complete example of the memoization, see the [`IntlLangMemoizer`] documentation. -/// This example provides a higher-level overview. -/// -/// ``` -/// # use intl_memoizer::{IntlMemoizer, IntlLangMemoizer, Memoizable}; -/// # use unic_langid::LanguageIdentifier; -/// # use std::rc::Rc; -/// # -/// # struct ExampleFormatter { -/// # lang: LanguageIdentifier, -/// # prefix: String, -/// # } -/// # -/// # impl ExampleFormatter { -/// # fn format(&self, example_string: &str) -> String { -/// # format!( -/// # "{} lang({}) string({})", -/// # self.prefix, self.lang, example_string -/// # ) -/// # } -/// # } -/// # -/// # impl Memoizable for ExampleFormatter { -/// # type Args = (String,); -/// # type Error = (); -/// # fn construct(lang: LanguageIdentifier, args: Self::Args) -> Result { -/// # Ok(Self { -/// # lang, -/// # prefix: args.0, -/// # }) -/// # } -/// # } -/// # -/// let mut memoizer = IntlMemoizer::default(); -/// -/// // The memoziation happens per-locale. -/// let en_us = "en-US".parse().expect("Failed to parse."); -/// let en_us_memoizer: Rc = memoizer.get_for_lang(en_us); -/// -/// // These arguments are passed into the constructor for `ExampleFormatter`. The -/// // construct_args will be used for determining the memoization, but the message -/// // can be different and re-use the constructed instance. -/// let construct_args = (String::from("prefix:"),); -/// let message = "The format operation will run"; -/// -/// // Use the `ExampleFormatter` from the `IntlLangMemoizer` example. It returns a -/// // string that demonstrates the configuration of the formatter. This step will -/// // construct a new formatter if needed, and run the format operation. -/// // -/// // See `IntlLangMemoizer` for more details on this step. -/// let en_us_result = en_us_memoizer -/// .with_try_get::(construct_args.clone(), |intl_example| { -/// intl_example.format(message) -/// }); -/// -/// // The example formatter constructs a string with diagnostic information about -/// // the configuration. -/// assert_eq!( -/// en_us_result.unwrap(), -/// "prefix: lang(en-US) string(The format operation will run)" -/// ); -/// -/// // The process can be repeated for a new locale. -/// -/// let de_de = "de-DE".parse().expect("Failed to parse."); -/// let de_de_memoizer: Rc = memoizer.get_for_lang(de_de); -/// -/// let de_de_result = de_de_memoizer -/// .with_try_get::(construct_args.clone(), |intl_example| { -/// intl_example.format(message) -/// }); -/// -/// assert_eq!( -/// de_de_result.unwrap(), -/// "prefix: lang(de-DE) string(The format operation will run)" -/// ); -/// ``` -#[derive(Default)] -pub struct IntlMemoizer { - map: HashMap>, -} - -impl IntlMemoizer { - /// Get a [`IntlLangMemoizer`] for a given language. If one does not exist for - /// a locale, it will be constructed and weakly retained. See [`IntlLangMemoizer`] - /// for more detailed documentation how to use it. - pub fn get_for_lang(&mut self, lang: LanguageIdentifier) -> Rc { - match self.map.entry(lang.clone()) { - Entry::Vacant(empty) => { - let entry = Rc::new(IntlLangMemoizer::new(lang)); - empty.insert(Rc::downgrade(&entry)); - entry - } - Entry::Occupied(mut entry) => { - if let Some(entry) = entry.get().upgrade() { - entry - } else { - let e = Rc::new(IntlLangMemoizer::new(lang)); - entry.insert(Rc::downgrade(&e)); - e - } - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use fluent_langneg::{negotiate_languages, NegotiationStrategy}; - use intl_pluralrules::{PluralCategory, PluralRuleType, PluralRules as IntlPluralRules}; - use std::{sync::Arc, thread}; - - struct PluralRules(pub IntlPluralRules); - - impl PluralRules { - pub fn new( - lang: LanguageIdentifier, - pr_type: PluralRuleType, - ) -> Result { - let default_lang: LanguageIdentifier = "en".parse().unwrap(); - let pr_lang = negotiate_languages( - &[lang], - &IntlPluralRules::get_locales(pr_type), - Some(&default_lang), - NegotiationStrategy::Lookup, - )[0] - .clone(); - - Ok(Self(IntlPluralRules::create(pr_lang, pr_type)?)) - } - } - - impl Memoizable for PluralRules { - type Args = (PluralRuleType,); - type Error = &'static str; - fn construct(lang: LanguageIdentifier, args: Self::Args) -> Result { - Self::new(lang, args.0) - } - } - - #[test] - fn test_single_thread() { - let lang: LanguageIdentifier = "en".parse().unwrap(); - - let mut memoizer = IntlMemoizer::default(); - { - let en_memoizer = memoizer.get_for_lang(lang.clone()); - - let result = en_memoizer - .with_try_get::((PluralRuleType::CARDINAL,), |cb| cb.0.select(5)) - .unwrap(); - assert_eq!(result, Ok(PluralCategory::OTHER)); - } - - { - let en_memoizer = memoizer.get_for_lang(lang); - - let result = en_memoizer - .with_try_get::((PluralRuleType::CARDINAL,), |cb| cb.0.select(5)) - .unwrap(); - assert_eq!(result, Ok(PluralCategory::OTHER)); - } - } - - #[test] - fn test_concurrent() { - let lang: LanguageIdentifier = "en".parse().unwrap(); - let memoizer = Arc::new(concurrent::IntlLangMemoizer::new(lang)); - let mut threads = vec![]; - - // Spawn four threads that all use the PluralRules. - for _ in 0..4 { - let memoizer = Arc::clone(&memoizer); - threads.push(thread::spawn(move || { - memoizer - .with_try_get::((PluralRuleType::CARDINAL,), |cb| { - cb.0.select(5) - }) - .expect("Failed to get a PluralRules result.") - })); - } - - for thread in threads.drain(..) { - let result = thread.join().expect("Failed to join thread."); - assert_eq!(result, Ok(PluralCategory::OTHER)); - } - } -} +pub use lang_memoizer::IntlLangMemoizer; +pub use memoizable::Memoizable; +pub use memoizer::IntlMemoizer; diff --git a/intl-memoizer/src/memoizable.rs b/intl-memoizer/src/memoizable.rs new file mode 100644 index 00000000..793098f4 --- /dev/null +++ b/intl-memoizer/src/memoizable.rs @@ -0,0 +1,17 @@ +use icu_locid::LanguageIdentifier; +use std::hash::Hash; + +pub trait Memoizable { + type Args: 'static + Eq + Hash + Clone; + type Provider; + + type Error: std::fmt::Debug; + + fn construct( + lang: LanguageIdentifier, + args: Self::Args, + provider: Option<&Self::Provider>, + ) -> Result + where + Self: std::marker::Sized; +} diff --git a/intl-memoizer/src/memoizer.rs b/intl-memoizer/src/memoizer.rs new file mode 100644 index 00000000..54772c61 --- /dev/null +++ b/intl-memoizer/src/memoizer.rs @@ -0,0 +1,28 @@ +use crate::IntlLangMemoizer; +use icu_locid::LanguageIdentifier; +use std::collections::HashMap; +use std::rc::Rc; + +pub struct IntlMemoizer<'dp, DP> { + provider: Option<&'dp DP>, + map: HashMap>>, +} + +impl<'dp, DP> IntlMemoizer<'dp, DP> { + pub fn new(provider: Option<&'dp DP>) -> Self { + Self { + provider, + map: HashMap::default(), + } + } + + pub fn get_for_lang(&mut self, lang: LanguageIdentifier) -> Rc> { + if let Some(memoizer) = self.map.get(&lang) { + memoizer.clone() + } else { + let memoizer = Rc::new(IntlLangMemoizer::new(lang.clone(), self.provider)); + self.map.insert(lang, memoizer.clone()); + memoizer + } + } +} diff --git a/intl-memoizer/tests/single.rs b/intl-memoizer/tests/single.rs new file mode 100644 index 00000000..7ee3ae9e --- /dev/null +++ b/intl-memoizer/tests/single.rs @@ -0,0 +1,14 @@ + +static mut INTL_EXAMPLE_CONSTRUCTS: u32 = 0; +fn increment_constructs() { + unsafe { + INTL_EXAMPLE_CONSTRUCTS += 1; + } +} + +fn get_constructs_count() -> u32 { + unsafe { INTL_EXAMPLE_CONSTRUCTS } +} + +#[test] +fn test_memoizable() {}