From 0e2f8592d251bcc46f26acf84aa407be770dcc3e Mon Sep 17 00:00:00 2001 From: Jonathan Woollett-Light Date: Sat, 14 Dec 2024 00:09:17 +0000 Subject: [PATCH] `zip_squash` & `zip_stretch` Introduces new zip alternatives. --- src/free.rs | 2 + src/lib.rs | 57 ++++++++++++++++++++++- src/zip_squash.rs | 92 ++++++++++++++++++++++++++++++++++++ src/zip_stretch.rs | 114 +++++++++++++++++++++++++++++++++++++++++++++ tests/zip.rs | 46 ++++++++++++++++++ 5 files changed, 310 insertions(+), 1 deletion(-) create mode 100644 src/zip_squash.rs create mode 100644 src/zip_stretch.rs diff --git a/src/free.rs b/src/free.rs index 4c6820543..29bc197d8 100644 --- a/src/free.rs +++ b/src/free.rs @@ -28,6 +28,8 @@ pub use crate::put_back_n_impl::put_back_n; #[cfg(feature = "use_alloc")] pub use crate::rciter_impl::rciter; pub use crate::zip_eq_impl::zip_eq; +pub use crate::zip_squash::zip_squash; +pub use crate::zip_stretch::zip_stretch; /// Iterate `iterable` with a particular value inserted between each element. /// diff --git a/src/lib.rs b/src/lib.rs index 834a48dea..47896ba3d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -144,6 +144,8 @@ pub mod structs { pub use crate::with_position::WithPosition; pub use crate::zip_eq_impl::ZipEq; pub use crate::zip_longest::ZipLongest; + pub use crate::zip_squash::ZipSquash; + pub use crate::zip_stretch::ZipStretch; pub use crate::ziptuple::Zip; } @@ -235,6 +237,8 @@ mod unziptuple; mod with_position; mod zip_eq_impl; mod zip_longest; +mod zip_squash; +mod zip_stretch; mod ziptuple; #[macro_export] @@ -4537,10 +4541,61 @@ pub trait Itertools: Iterator { _ => Err(sh), } } + + /// Create an iterator which iterates over both this and the specified + /// iterator simultaneously, yielding pairs of elements. + /// + /// Similar to [`Iterator::zip`] except elements are evenly sampled from + /// the longest iterator. + /// + /// ``` + /// use itertools::Itertools; + /// let a = vec![1, 2]; + /// let b = vec![1, 2, 3]; + /// + /// let it = a.into_iter().zip_squash(b.into_iter()); + /// itertools::assert_equal(it, vec![(1, 1), (2, 2)]); + /// ``` + #[inline] + fn zip_squash(self, other: J) -> ZipSquash + where + J: IntoIterator, + ::IntoIter: ExactSizeIterator, + Self: ExactSizeIterator + Sized, + { + zip_squash::zip_squash(self, other) + } + /// Create an iterator which iterates over both this and the specified + /// iterator simultaneously, yielding pairs of elements. + /// + /// Always yielding the first and last elements of both iterators by cloning + /// elements in the shortest iterator. + /// + /// Similar to [`Itertools::zip_longest`] except elements in the shortest + /// iterator are evenly spread. + /// + /// ``` + /// use itertools::Itertools; + /// let a = vec![1, 2]; + /// let b = vec![1, 2, 3]; + /// + /// let it = a.into_iter().zip_stretch(b.into_iter()); + /// itertools::assert_equal(it, vec![(1, 1), (1, 2), (2, 3)]); + /// ``` + #[inline] + fn zip_stretch(self, other: J) -> ZipStretch + where + J: IntoIterator, + ::IntoIter: ExactSizeIterator, + <::IntoIter as IntoIterator>::Item: Clone, + Self: ExactSizeIterator + Sized, + ::Item: Clone, + { + zip_stretch::zip_stretch(self, other) + } } impl Itertools for T where T: Iterator + ?Sized {} - /// Return `true` if both iterables produce equal sequences /// (elements pairwise equal and sequences of the same length), /// `false` otherwise. diff --git a/src/zip_squash.rs b/src/zip_squash.rs new file mode 100644 index 000000000..3ddafd210 --- /dev/null +++ b/src/zip_squash.rs @@ -0,0 +1,92 @@ +use super::size_hint; +use std::cmp::Ordering; + +/// An iterator which iterates two other iterators simultaneously +/// always returning elements are evenly sampled from the longest iterator. +/// +/// See [`.zip_squash()`](crate::Itertools::zip_squash) for more information. +#[derive(Clone, Debug)] +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +pub struct ZipSquash { + a: I, + b: J, + a_delta: f32, + b_delta: f32, + a_index: f32, + b_index: f32, +} + +/// Zips two iterators skipping elements of the longest iterator to ensure it fully consumes both +/// iterators. +/// +/// [`IntoIterator`] enabled version of [`Itertools::zip_squash`](crate::Itertools::zip_squash). +pub fn zip_squash(i: I, j: J) -> ZipSquash +where + I: IntoIterator, + J: IntoIterator, + ::IntoIter: ExactSizeIterator, + ::IntoIter: ExactSizeIterator, +{ + use std::iter::ExactSizeIterator; + let (a, b) = (i.into_iter(), j.into_iter()); + let (a_delta, b_delta) = match a.len().cmp(&b.len()) { + Ordering::Equal => (1f32, 1f32), + Ordering::Less => (1f32, b.len() as f32 / a.len() as f32), + Ordering::Greater => (a.len() as f32 / b.len() as f32, 1f32), + }; + debug_assert!(a_delta >= 1f32); + debug_assert!(b_delta >= 1f32); + ZipSquash { + a, + b, + a_delta, + b_delta, + a_index: 0f32, + b_index: 0f32, + } +} + +impl Iterator for ZipSquash +where + I: ExactSizeIterator, + J: ExactSizeIterator, +{ + type Item = (I::Item, J::Item); + + fn next(&mut self) -> Option { + let (a, b) = (self.a.next(), self.b.next()); + let a_new = self.a_index + self.a_delta; + if let Some(skip) = ((a_new.floor() - self.a_index.floor()) as usize).checked_sub(2) { + self.a.nth(skip); + } + self.a_index = a_new; + + let b_new = self.b_index + self.b_delta; + if let Some(skip) = ((b_new.floor() - self.b_index.floor()) as usize).checked_sub(2) { + self.b.nth(skip); + } + self.b_index = b_new; + + match (a, b) { + (None, None) => None, + (Some(a), Some(b)) => Some((a, b)), + (None, Some(_)) | (Some(_), None) => { + // Assert this is the first iteration. + debug_assert!(self.a_index <= self.a_delta); + debug_assert!(self.b_index <= self.b_delta); + None + } + } + } + + fn size_hint(&self) -> (usize, Option) { + size_hint::min(self.a.size_hint(), self.b.size_hint()) + } +} + +impl ExactSizeIterator for ZipSquash +where + I: ExactSizeIterator, + J: ExactSizeIterator, +{ +} diff --git a/src/zip_stretch.rs b/src/zip_stretch.rs new file mode 100644 index 000000000..b7eca9db2 --- /dev/null +++ b/src/zip_stretch.rs @@ -0,0 +1,114 @@ +use super::size_hint; +use std::cmp::Ordering; +use std::fmt; + +/// An iterator which iterates two other iterators simultaneously +/// always returning the first and last elements of both iterators by using +/// cloning to extend the length of the shortest iterator. +/// +/// See [`.zip_stretch()`](crate::Itertools::zip_stretch) for more information. +#[derive(Clone)] +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +pub struct ZipStretch +where + ::Item: Clone, + ::Item: Clone, +{ + a: I, + b: J, + a_delta: f32, + b_delta: f32, + a_index: f32, + b_index: f32, + a_dupe: Option<::Item>, + b_dupe: Option<::Item>, +} + +impl fmt::Debug + for ZipStretch +where + ::Item: Clone, + ::Item: Clone, +{ + debug_fmt_fields!(ZipStretch, a, b, a_delta, b_delta, a_index, b_index); +} + +/// Zips two iterators cloning elements to extend the length of the shortest iterator to +/// ensure it fully consumes both iterators. +/// +/// [`IntoIterator`] enabled version of [`Itertools::zip_stretch`](crate::Itertools::zip_stretch). +pub fn zip_stretch(i: I, j: J) -> ZipStretch +where + I: IntoIterator, + J: IntoIterator, + ::IntoIter: ExactSizeIterator, + ::IntoIter: ExactSizeIterator, + <::IntoIter as IntoIterator>::Item: Clone, + <::IntoIter as IntoIterator>::Item: Clone, +{ + use std::iter::ExactSizeIterator; + let (a, b) = (i.into_iter(), j.into_iter()); + let (a_delta, b_delta) = match a.len().cmp(&b.len()) { + Ordering::Equal => (1f32, 1f32), + Ordering::Less => (a.len() as f32 / b.len() as f32, 1f32), + Ordering::Greater => (1f32, b.len() as f32 / a.len() as f32), + }; + debug_assert!(a_delta <= 1f32); + debug_assert!(b_delta <= 1f32); + ZipStretch { + a, + b, + a_delta, + b_delta, + a_index: 0f32, + b_index: 0f32, + a_dupe: None, + b_dupe: None, + } +} + +impl Iterator for ZipStretch +where + I: ExactSizeIterator, + J: ExactSizeIterator, + ::Item: Clone, + ::Item: Clone, +{ + type Item = (I::Item, J::Item); + + fn next(&mut self) -> Option { + if self.a_index.fract() < self.a_delta { + self.a_dupe = self.a.next(); + } + self.a_index += self.a_delta; + + if self.b_index.fract() < self.b_delta { + self.b_dupe = self.b.next(); + } + self.b_index += self.b_delta; + + match (&self.a_dupe, &self.b_dupe) { + (Some(a), Some(b)) => Some((a.clone(), b.clone())), + (None, Some(_)) | (Some(_), None) => { + // Assert this is the first iteration. + debug_assert!(self.a_index <= self.a_delta); + debug_assert!(self.b_index <= self.b_delta); + None + } + (None, None) => None, + } + } + + fn size_hint(&self) -> (usize, Option) { + size_hint::min(self.a.size_hint(), self.b.size_hint()) + } +} + +impl ExactSizeIterator for ZipStretch +where + I: ExactSizeIterator, + J: ExactSizeIterator, + ::Item: Clone, + ::Item: Clone, +{ +} diff --git a/tests/zip.rs b/tests/zip.rs index daed31e32..79b170cbf 100644 --- a/tests/zip.rs +++ b/tests/zip.rs @@ -2,6 +2,52 @@ use itertools::multizip; use itertools::EitherOrBoth::{Both, Left, Right}; use itertools::Itertools; +#[test] +fn zip_squash() { + let a: [i32; 0] = []; + let b: [i32; 0] = []; + let it = a.iter().copied().zip_squash(b.iter().copied()); + itertools::assert_equal(it, vec![]); + + let a = [1, 2, 3, 4, 5, 6]; + let b: [i32; 0] = []; + let it = a.iter().copied().zip_squash(b.iter().copied()); + itertools::assert_equal(it, vec![]); + + let a: [i32; 0] = []; + let b = [1, 2, 3, 7]; + let it = a.iter().copied().zip_squash(b.iter().copied()); + itertools::assert_equal(it, vec![]); + + let a = [1, 2, 3, 4, 5, 6]; + let b = [1, 2, 3, 7]; + let it = a.iter().copied().zip_squash(b.iter().copied()); + itertools::assert_equal(it, vec![(1, 1), (2, 2), (4, 3), (5, 7)]); +} + +#[test] +fn zip_stretch() { + let a: [i32; 0] = []; + let b: [i32; 0] = []; + let it = a.iter().copied().zip_stretch(b.iter().copied()); + itertools::assert_equal(it, vec![]); + + let a = [1, 2, 3, 4, 5, 6]; + let b: [i32; 0] = []; + let it = a.iter().copied().zip_stretch(b.iter().copied()); + itertools::assert_equal(it, vec![]); + + let a: [i32; 0] = []; + let b = [1, 2, 3, 7]; + let it = a.iter().copied().zip_stretch(b.iter().copied()); + itertools::assert_equal(it, vec![]); + + let a = [1, 2, 3, 4, 5, 6]; + let b = [1, 2, 3, 7]; + let it = a.iter().copied().zip_stretch(b.iter().copied()); + itertools::assert_equal(it, vec![(1, 1), (2, 1), (3, 2), (4, 3), (5, 3), (6, 7)]); +} + #[test] fn zip_longest_fused() { let a = [Some(1), None, Some(3), Some(4)];