From 0d52b664f1a1d0ae9621a9521e0cae29930a77dd Mon Sep 17 00:00:00 2001 From: Miguel Young Date: Mon, 15 Jul 2024 17:11:47 -0400 Subject: [PATCH] Move almost all view functions on `best::vec` and `best::strbuf` behind `operator->` (#25) This patch introduces `best::arrow`, a helper for returning anything out of an `operator->`. The doc comment on the type explains why it exists and what C++ semantics it abuses. `best::vec` and `best::strbuf` now have `operator->`s that effectively return `best::span` and `best::str`, respectively. This allows the removal of a ton of duplicated functions, and means that the owned types can't "lag" behind the unowned types! This also means that e.g. `best::box` does not need to duplicate all of the functions from `best::span`, like to `best::vec` used to. --- best/base/fwd.h | 4 + best/cli/parser.cc | 12 +- best/container/BUILD | 1 + best/container/span_test.cc | 10 +- best/container/vec.h | 231 +++++---------------------- best/func/BUILD | 8 + best/func/arrow.h | 86 +++++++++++ best/test/test.cc | 2 +- best/text/strbuf.h | 301 ++++++++++++------------------------ best/text/strbuf_test.cc | 146 ++++++++--------- 10 files changed, 319 insertions(+), 482 deletions(-) create mode 100644 best/func/arrow.h diff --git a/best/base/fwd.h b/best/base/fwd.h index 1ed56c8..7218cf5 100644 --- a/best/base/fwd.h +++ b/best/base/fwd.h @@ -72,6 +72,10 @@ class pun; // template > // class span; +// best/func/arrow.h +template +class arrow; + // best/func/fnref.h template class fnref; diff --git a/best/cli/parser.cc b/best/cli/parser.cc index 23b2b4d..93abaf0 100644 --- a/best/cli/parser.cc +++ b/best/cli/parser.cc @@ -135,8 +135,8 @@ void normalize(best::strbuf& name, const auto& about) { } // TODO: also check the ends. - if (name.starts_with('-') || name.starts_with('_') || - name.contains(&reserved_rune)) { + if (name->starts_with('-') || name->starts_with('_') || + name->contains(&reserved_rune)) { best::wtf("field {}::{}'s name ({:?}) contains reserved runes", about.strukt->path(), about.field, name); } @@ -342,8 +342,8 @@ void cli::init() { }); // Now, sort the flags so we can bisect through them later. - impl_->sorted_flags.sort(&impl::entry::key); - impl_->sorted_subs.sort(&impl::entry::key); + impl_->sorted_flags->sort(&impl::entry::key); + impl_->sorted_subs->sort(&impl::entry::key); // Check for duplicates. best::option prev; @@ -647,7 +647,7 @@ best::strbuf cli::usage(best::pretext exe, bool hidden) const { best::format(out, "Usage: {}", exe); if (!parents.is_empty()) { - parents.reverse(); + parents->reverse(); for (auto sub : parents) { best::format(out, " {}", sub); } @@ -809,7 +809,7 @@ best::strbuf cli::usage(best::pretext exe, bool hidden) const { } best::str prefix = e.key[{ - .end = e.key.size() - e.key.split('.').last()->size(), + .end = e.key.size() - e.key->split('.').last()->size(), }]; // Chop off everything past the last `.` to make the prefix. diff --git a/best/container/BUILD b/best/container/BUILD index bf16792..5d28582 100644 --- a/best/container/BUILD +++ b/best/container/BUILD @@ -169,6 +169,7 @@ cc_library( ":object", ":option", ":span", + "//best/func:arrow", "//best/math:bit", "//best/math:overflow", "//best/memory:allocator", diff --git a/best/container/span_test.cc b/best/container/span_test.cc index 39af335..21ee3d1 100644 --- a/best/container/span_test.cc +++ b/best/container/span_test.cc @@ -237,10 +237,10 @@ best::test FrontAndBack = [](auto& t) { best::test Swap = [](auto& t) { best::vec ints = {1, 2, 3, 4, 5}; - ints.as_span().swap(1, 2); + ints->swap(1, 2); t.expect_eq(ints, best::span{1, 3, 2, 4, 5}); - ints.as_span().reverse(); + ints->reverse(); t.expect_eq(ints, best::span{5, 4, 2, 3, 1}); }; @@ -332,13 +332,13 @@ best::test Sort = [](auto& t) { best::mark_sort_header_used(); best::vec ints = {5, 4, 3, 2, 1}; - ints.as_span().sort(); + ints->sort(); t.expect_eq(ints, best::span{1, 2, 3, 4, 5}); - ints.as_span().stable_sort([](int x) { return best::count_ones(x); }); + ints->stable_sort([](int x) { return best::count_ones(x); }); t.expect_eq(ints, best::span{1, 2, 4, 3, 5}); - ints.as_span().sort([](int x, int y) { return y <=> x; }); + ints->sort([](int x, int y) { return y <=> x; }); t.expect_eq(ints, best::span{5, 4, 3, 2, 1}); }; diff --git a/best/container/vec.h b/best/container/vec.h index 38edc4e..93f9aac 100644 --- a/best/container/vec.h +++ b/best/container/vec.h @@ -27,6 +27,7 @@ #include "best/container/object.h" #include "best/container/option.h" #include "best/container/span.h" +#include "best/func/arrow.h" #include "best/log/internal/crash.h" #include "best/log/location.h" #include "best/math/bit.h" @@ -77,8 +78,9 @@ constexpr size_t vec_inline_default() { /// corresponding span type. Most common span functions are re-implemented on /// `best::vec`, for convenience. /// -/// In addition to all of the `best::span` functions, `best::vec` offers the -/// usual complement of push, insert, remove, etc. functionality. +/// Note that `best::vec` only provides a subset of the `best::span` functions. +/// To access the full suite of span operations, you must access them through +/// `->`, e.g., `vec->sort()`. template (), best::allocator A = best::malloc> class vec final { @@ -216,11 +218,16 @@ class vec final { bool is_inlined() const { return on_heap().is_empty(); } bool is_on_heap() { return on_heap().has_value(); } - /// # `vec::as_span()` + /// # `vec::as_span()`, `vec::operator->()` /// /// Returns a span over the array this vector manages. + /// + /// All of the span methods, including those not explicitly delegated, are + /// accessible through `->`. For example, `my_vec->size()` works. best::span as_span() const { return *this; } best::span as_span() { return *this; } + best::arrow> operator->() const { return as_span(); } + best::arrow> operator->() { return as_span(); } /// # `vec::spare_capacity()` /// @@ -232,48 +239,23 @@ class vec final { return best::span(data() + size(), capacity() - size()); } - /// # `vec::first()` + /// # `vec::first()`, `vec::last()` /// - /// Returns the first, or first `m`, elements of this vector, or `best::none` - /// if there are not enough elements. + /// Returns the first or last element of the vector, or `best::none` if the + /// vector is empty. best::option first() const { return as_span().first(); } best::option first() { return as_span().first(); } - template - best::option> first(best::index_t i = {}) const { - return as_span().first(i); - } - template - best::option> first(best::index_t i = {}) { - return as_span().first(i); - } - - /// # `vec::last()` - /// - /// Returns the last, or last `m`, elements of this vector, or `best::none` if - /// there are not enough elements. best::option last() const { return as_span().last(); } best::option last() { return as_span().last(); } - template - best::option> last(best::index_t i = {}) const { - return as_span().last(i); - } - template - best::option> last(best::index_t i = {}) { - return as_span().last(i); - } - /// # `vec[idx]` + /// # `vec[idx]`, `vec[{.start = ...}]` /// - /// Extracts a single element. Crashes if the requested index is + /// Extracts a single element or a subspan. Crashes if the requested index is /// out-of-bounds. const T& operator[](best::track_location idx) const { return as_span()[idx]; } T& operator[](best::track_location idx) { return as_span()[idx]; } - - /// # `vec[{.start = ...}]` - /// - /// Extracts a subspan. Crashes if the requested range is out-of-bounds. best::span operator[](best::bounds::with_location range) const { return as_span()[range]; } @@ -281,147 +263,13 @@ class vec final { return as_span()[range]; } - /// # `vec::at(idx)` + /// # `vec::at(idx)`, `vec::at({.start = ...})` /// - /// Extracts a single element. If the requested index is out-of-bounds, - /// returns best::none. + /// Extracts a single element or a subspan. If the requested index is + /// out-of-bounds, returns `best::none`. best::option at(size_t idx) const { return as_span().at(idx); } best::option at(size_t idx) { return as_span().at(idx); } - /// # `vec::at(unsafe, idx)` - /// - /// Extracts a single element. If the requested index is out-of-bounds, - /// Undefined Behavior. - const T& at(unsafe u, size_t idx) const { return as_span().at(u, idx); } - T& at(unsafe u, size_t idx) { return as_span().at(u, idx); } - - /// # `vec::at({.start = ...})` - /// - /// Extracts a subspan. If the requested range is out-of-bounds, returns - /// best::none. - best::option> at(best::bounds range) const { - return as_span().at(range); - } - best::option> at(best::bounds range) { - return as_span().at(range); - } - - /// # `vec::at(unsafe, {.start = ...})` - /// - /// Extracts a subspan. If the requested range is out-of-bounds, returns - /// best::none. - best::span at(unsafe u, best::bounds range) const { - return as_span().at(u, range); - } - best::span at(unsafe u, best::bounds range) { - return as_span().at(u, range); - } - - /// # `vec::reverse()` - /// - /// Reverses the order of the elements in this vector, in-place. - void reverse() { as_span().reverse(); } - - /// # `vec::contains()` - /// - /// Performs a linear search for a matching element. - bool contains(const best::equatable auto& needle) const { - return as_span().contains(needle); - } - - /// # `vec::starts_with()` - /// - /// Checks if this vector starts with a particular pattern. - template U = T> - constexpr bool starts_with(best::span needle) const { - return as_span().starts_with(needle); - } - - /// # `vec::ends_with()` - /// - /// Checks if this vector ends with a particular pattern. - template U = T> - constexpr bool ends_with(best::span needle) const { - return as_span().ends_with(needle); - } - - /// # `vec::strip_prefix()` - /// - /// If this vector starts with `prefix`, removes it and returns the rest; - /// otherwise returns `best::none`. - template U = T> - best::option> strip_prefix( - best::span prefix) const { - return as_span().strip_prefix(prefix); - } - template U = T> - best::option> strip_prefix(best::span prefix) { - return as_span().strip_prefix(prefix); - } - - /// # `vec::strip_suffix()` - /// - /// If this vector ends with `suffix`, removes it and returns the rest; - /// otherwise returns `best::none`. - template U = T> - best::option> strip_suffix( - best::span suffix) const { - return as_span().strip_prefix(suffix); - } - template U = T> - best::option> strip_suffix(best::span suffix) { - return as_span().strip_prefix(suffix); - } - - /// # `span::sort()` - /// - /// Sorts the vector in place. See span::sort() for more information on - /// the three overloads. - /// - /// Because this is implemented using the header, which would - /// pull in a completely unacceptable amount of this, the implementations of - /// these functions live in `//best/container/span_sort.h`, which must be - /// included separately. - void sort() - requires best::comparable - { - as_span().sort(); - } - void sort(best::callable auto&& get_key) { - as_span().sort(BEST_FWD(get_key)); - } - void sort(best::callable auto&& cmp) { - as_span().sort(BEST_FWD(cmp)); - } - - /// # `vec::stable_sort()` - /// - /// Identical to `sort()`, but uses a stable sort which guarantees that equal - /// items are not reordered past each other. This usually means the algorithm - /// is slower. - void stable_sort() - requires best::comparable - { - as_span().stable_sort(); - } - void stable_sort(best::callable auto&& get_key) { - as_span().stable_sort(BEST_FWD(get_key)); - } - void stable_sort( - best::callable auto&& cmp) { - as_span().stable_sort(BEST_FWD(cmp)); - } - - /// # `vec::copy_from()` - /// - /// Copies values from src. This has the same semantics as Go's `copy()` - /// builtin: if the lengths are not equal, only the overlapping part is - /// copied. - template - void copy_from(best::span src) { - as_span().copy_from(src); - } - /// # `vec::citer`, `vec::iter`, `vec::begin()`, `vec::end()`. /// /// Spans are iterable exactly how you'd expect. @@ -550,18 +398,6 @@ class vec final { /// this resizes the underlying buffer. Otherwise, does nothing. void resize_uninit(size_t new_size); - /// # `vec::shift_within()` - /// - /// Performs an internal `memmove()`. This relocates `count` elements starting - /// at `src` to `dst`. - /// - /// NOTE! This function assumes that the destination range is uninitialized, - /// *and* that the source range is initialized. It will not update the - /// size of the vector; the caller is responsible for doing that themselves. - void shift_within(unsafe u, size_t dst, size_t src, size_t count) { - as_span().shift_within(u, dst, src, count); - } - /// # `vec::spill_to_heap()` /// /// Forces this vector to be in heap mode instead of inlined mode. The vector @@ -858,24 +694,29 @@ void vec::splice_within(size_t idx, size_t start, if (idx > end) { // The spliced-from region is before the insertion point, so we have // one loop. - at(u, {.start = idx, .count = count}) - .emplace_from(at(u, {.start = start, .end = end})); + as_span() + .at(u, {.start = idx, .count = count}) + .emplace_from(as_span().at(u, {.start = start, .end = end})); } else if (idx < start) { // The spliced-from region is after the insertion point. This is the // same as above, but we need to offset the slice operation by // `that.size()`. - at(u, {.start = idx, .count = count}) - .emplace_from(at(u, {.start = start + count, .end = end + count})); + as_span() + .at(u, {.start = idx, .count = count}) + .emplace_from( + as_span().at(u, {.start = start + count, .end = end + count})); } else { // The annoying case. We need to do the copy in two parts. size_t before = idx - start; size_t after = count - before; - at(u, {.start = idx, .count = before}) - .emplace_from(at(u, {.start = start, .count = before})); - at(u, {.start = idx + before, .count = after}) - .emplace_from(at(u, {.start = idx, .count = after})); + as_span() + .at(u, {.start = idx, .count = before}) + .emplace_from(as_span().at(u, {.start = start, .count = before})); + as_span() + .at(u, {.start = idx + before, .count = after}) + .emplace_from(as_span().at(u, {.start = idx, .count = after})); } } @@ -915,8 +756,9 @@ void vec::erase(best::bounds bounds) { size_t start = bounds.start; size_t end = bounds.start + range.size(); size_t len = size() - end; - shift_within(unsafe("shifting elements over the ones we just destroyed"), - start, end, len); + as_span().shift_within( + unsafe("shifting elements over the ones we just destroyed"), start, end, + len); set_size(unsafe("updating length to exclude the range we just deleted"), size() - range.size()); } @@ -935,7 +777,7 @@ best::object_ptr vec::insert_uninit(unsafe u, size_t start, /// Relocate elements to create an empty space. auto end = start + count; if (start < size()) { - shift_within(u, end, start, size() - start); + as_span().shift_within(u, end, start, size() - start); } set_size(u, size() + count); return data() + start; @@ -946,7 +788,8 @@ void vec::resize_uninit(size_t new_size) { auto old_size = this->size(); if (new_size <= capacity()) { if (new_size < old_size) { - at(unsafe{"we just did a bounds check (above)"}, {.start = new_size}) + as_span() + .at(unsafe{"we just did a bounds check (above)"}, {.start = new_size}) .destroy_in_place(); set_size(unsafe("elements beyond new_size destroyed above"), new_size); } diff --git a/best/func/BUILD b/best/func/BUILD index 8e06930..c15bfbc 100644 --- a/best/func/BUILD +++ b/best/func/BUILD @@ -1,5 +1,13 @@ package(default_visibility = ["//visibility:public"]) +cc_library( + name = "arrow", + hdrs = ["arrow.h"], + deps = [ + "//best/meta:taxonomy", + ] +) + cc_library( name = "call", hdrs = [ diff --git a/best/func/arrow.h b/best/func/arrow.h new file mode 100644 index 0000000..50d8821 --- /dev/null +++ b/best/func/arrow.h @@ -0,0 +1,86 @@ +/* //-*- C++ -*-///////////////////////////////////////////////////////////// *\ + + Copyright 2024 + Miguel Young de la Sota and the Best Contributors πŸ§ΆπŸˆβ€β¬› + + Licensed under the Apache License, Version 2.0 (the "License"); you may not + use this file except in compliance with the License. You may obtain a copy + of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + License for the specific language governing permissions and limitations + under the License. + +\* ////////////////////////////////////////////////////////////////////////// */ + +#ifndef BEST_FUNC_ARROW_H_ +#define BEST_FUNC_ARROW_H_ + +#include "best/meta/taxonomy.h" + +//! Arrows, helpers for implementing exotic `operator->`s. +//! +//! When C++ sees `a->b`, and `a` is not a raw pointer, it replaces it with +//! `a.operator->()->b`. It does so recursively until it obtains a raw pointer. +//! +//! Suppose we wanted to return some kind of non-pointer view from `operator->`; +//! for example, we want to inject all of the functions of some type `T` into +//! our type `U`, findable by `->`. But it doesn't contain a `T` that we can +//! return a pointer to! +//! +//! An obviously incorrect implementation of `operator->` in this case would be +//! +//! ``` +//! T* operator->() { +//! T view = ...; +//! return &view; +//! } +//! ``` +//! +//! When the `operator->` returns, it returns a pointer to an object whose +//! lifetime is over. Oops! However, we can instead return a `best::arrow`: +//! +//! ``` +//! best::arrow operator->() { +//! return T{...}; +//! } +//! ``` +//! +//! Then, when C++ goes to desugar `a->b`, it will expand to +//! `a.operator->()->b`, calling `U`'s `operator->`; then it will expand again +//! to `a.operator->().operator->()->b`, calling `best::arrow::operator->()`. +//! +//! Because we returned an actual `T`, it is not destroyed until the end of the +//! full expression, so the pointer that the final `->b` offsets will be valid. + +namespace best { +/// # `best::arrow` +/// +/// A wrapper over a `T` that implements a `operator->` that returns a pointer +/// to that value. +template +class arrow final { + public: + constexpr arrow(auto&& arg) : value_(BEST_FWD(arg)) {} + constexpr const T* operator->() const { return best::addr(value_); } + constexpr T* operator->() { return best::addr(value_); } + + arrow() = delete; + constexpr arrow(const arrow&) = default; + constexpr arrow& operator=(const arrow&) = default; + constexpr arrow(arrow&&) = default; + constexpr arrow& operator=(arrow&&) = default; + + private: + T value_; +}; + +template +arrow(T&&) -> arrow>; +} // namespace best + +#endif // BEST_FUNC_ARROW_H_ diff --git a/best/test/test.cc b/best/test/test.cc index 906ac82..8cc77b5 100644 --- a/best/test/test.cc +++ b/best/test/test.cc @@ -77,7 +77,7 @@ bool test::run_all(const flags& flags) { if (test->name().contains(skip)) goto skip; } if (!flags.filters.is_empty()) { - auto found = flags.filters.as_span().contains( + auto found = flags.filters->contains( [&](const auto& f) { return test->name().contains(f); }); if (!found) goto skip; diff --git a/best/text/strbuf.h b/best/text/strbuf.h index 7c27cb7..d8f87f7 100644 --- a/best/text/strbuf.h +++ b/best/text/strbuf.h @@ -24,6 +24,7 @@ #include "best/container/span.h" #include "best/container/vec.h" +#include "best/func/arrow.h" #include "best/memory/allocator.h" #include "best/text/encoding.h" #include "best/text/rune.h" @@ -73,6 +74,10 @@ using strbuf32 = best::textbuf; /// A `best::textbuf` may not point to invalidly-text data. Constructors from /// unauthenticated strings must go through factories that return /// `best::optional`. +/// +/// Note that `best::textbuf` only provides a subset of the `best::textbuf` +/// functions. To access the full suite of span operations, you must access them +/// through `->`, e.g., `buf->find()`. template class textbuf final { public: @@ -218,15 +223,19 @@ class textbuf final { /// Returns the underlying text encoding. const encoding& enc() const { return enc_; } - /// # `textbuf::as_text()` + /// # `textbuf::as_text()`, `vec::operator->()` /// /// Returns the span of code units that backs this string. This is also /// an implicit conversion. + /// + /// All of the text methods, including those not explicitly delegated, are + /// accessible through `->`. For example, `my_str->size()` works. text as_text() const { return text(unsafe("buf_ is always validly encoded"), {buf_.as_span(), enc()}); } operator text() const { return as_text(); } + best::arrow operator->() const { return as_text(); } /// # `textbuf::operator buf` /// @@ -235,22 +244,6 @@ class textbuf final { buf into_buf() && { return std::move(buf_); } operator buf() && { return std::move(buf_); } - /// # `textbuf::as_codes()` - /// - /// Returns the span of code units that backs this string. - best::span as_codes() const { return buf_; } - - /// # `textbuf::is_rune_boundary()` - /// - /// Returns whether or not `idx` is a rune boundary or not. Returns `false` - /// for oud-of-bounds indices. - /// - /// For stateless encodings, this is an O(1) check. For non-synchronizing - /// encodings, it is O(n). - bool is_rune_boundary(size_t idx) const { - return as_text().is_rune_boundary(idx); - } - /// # `text[{...}]` /// /// Gets the substring in the given range. Crashes on out-of-bounds access @@ -272,11 +265,6 @@ class textbuf final { return as_text().at(range); } - /// # `textbuf::at(unsafe)` - /// - /// Gets the substring in the given range, performing no bounds checks. - text at(unsafe u, best::bounds range) const { return as_text().at(u, range); } - /// # `text::rune_iter`, `text::runes()`. /// /// An iterator over the runes of a `best::text`. @@ -290,109 +278,6 @@ class textbuf final { using rune_index_iter = text::rune_index_iter; rune_index_iter rune_indices() const { return as_text().rune_indices(); } - /// # `textbuf::starts_with()` - /// - /// Checks whether this string begins with the specifies substring or rune. - bool starts_with(rune prefix) const { return as_text().starts_with(prefix); } - bool starts_with(const string_type auto& prefix) const { - return as_text().starts_with(prefix); - } - bool starts_with(best::callable auto&& pred) const { - return as_text().starts_with(BEST_FWD(pred)); - } - - /// # `textbuf::trim_prefix()` - /// - /// If this string starts with the given prefix, returns a copy of this string - /// with that prefix removed. - best::option strip_prefix(rune prefix) const { - return as_text().strip_prefix(prefix); - } - best::option strip_prefix(const string_type auto& prefix) const { - return as_text().strip_prefix(prefix); - } - best::option strip_prefix( - best::callable auto&& pred) const { - return as_text().strip_prefix(BEST_FWD(pred)); - } - - /// # `textbuf::split_at()` - /// - /// Splits this string into two on the given index. If the desired split point - /// is out of bounds, returns `best::none`. - best::option> split_at(size_t n) const { - return as_text().split_at(n); - } - - /// # `textbuf::find()`. - /// - /// Finds the first occurrence of a pattern by linear search, and returns its - /// position. - /// - /// A pattern may be: - /// - /// - A rune. - /// - A string type. - /// - A rune predicate. - /// - /// Where possible, this function will automatically call vectorized - /// implementations of e.g. `memchr` and `memcmp` for finding the desired - /// pattern. Therefore, when possible, prefer to provide a needle by value. - best::option find(best::rune needle) const { - return as_text().find(needle); - } - best::option find(const best::string_type auto& needle) const { - return as_text().find(needle); - } - best::option find(best::callable auto&& pred) const { - return as_text().find(BEST_FWD(pred)); - } - - /// # `text::contains()` - /// - /// Determines whether a substring exists that matches some pattern. - /// - /// A pattern may be as in `textbuf::find()`. - bool contains(rune needle) const { return find(needle).has_value(); } - bool contains(const string_type auto& needle) const { - return find(needle).has_value(); - } - bool contains(best::callable auto&& needle) const { - return find(BEST_FWD(needle)).has_value(); - } - - /// # `text::split_once()` - /// - /// Calls `text::find()` to find the first occurrence of some pattern, and - /// if found, returns the substrings before and after the separator. - /// - /// A pattern for a separator may be as in `textbuf::find()`. - best::option> split_once(rune needle) const { - return as_text().split_once(needle); - } - best::option> split_once( - const string_type auto& needle) const { - return as_text().split_once(needle); - } - best::option> split_once( - best::callable auto&& pred) const { - return as_text().split_once(BEST_FWD(pred)); - } - - /// # `pretext::split()`. - /// - /// Returns an iterator over substrings separated by some pattern. Internally, - /// it calls `text::split_once()` until it is out of string. - /// - /// A pattern for a separator may be as in `pretext::find()`. - auto split(best::rune needle) const { return as_text().split(needle); } - auto split(const best::string_type auto& needle) const { - return as_text().split(needle); - } - auto split(best::callable auto&& pred) const { - return as_text().split(BEST_FWD(pred)); - } - /// # `textbuf::reserve()`. /// /// Ensures that pushing an additional `count` code units would not cause this @@ -414,43 +299,8 @@ class textbuf final { /// /// Pushes a rune or string to this vector. Returns `false` if input text /// contains characters that cannot be transcoded to this strings's encoding. - bool push(rune r) { - code buf[About.max_codes_per_rune]; - if (auto codes = r.encode(buf, enc())) { - buf_.append(*codes); - return true; - } - return false; - } - bool push(const string_type auto& that) { - if constexpr (best::is_text && - best::same_encoding_code()) { - if (best::same_encoding(*this, that)) { - buf_.append(that); - return true; - } - } - - if constexpr (best::is_text || - best::is_pretext) { - size_t watermark = size(); - for (auto r : that.runes()) { - reserve(About.max_codes_per_rune); - best::span buf{buf_.data() + buf_.size(), - About.max_codes_per_rune}; - if (auto codes = r.encode(buf, this->enc())) { - buf_.set_size(unsafe("we just wrote this much data in encode()"), - size() + codes.ok()->size()); - continue; - } - truncate(watermark); - return false; - } - return true; - } else { - return push(best::pretext(that)); - } - } + bool push(rune r); + bool push(const string_type auto& that); /// # `textbuf::push_lossy()`. /// @@ -458,47 +308,8 @@ class textbuf final { /// characters that cannot be transcoded into this string's encoding, they /// are replaced with `rune::Replacement`, or if that cannot be encoded, with /// `?`. - void push_lossy(rune r) { - code buf[About.max_codes_per_rune]; - if (auto codes = r.encode(buf, enc())) { - buf_.append(*codes); - } else if (auto codes = rune::Replacement.encode(buf, enc())) { - buf_.append(*codes); - } else { - codes = rune('?').encode(buf, enc()); - buf_.append(*codes); - } - } - void push_lossy(const string_type auto& that) { - if constexpr (best::is_text && - best::same_encoding_code()) { - if (best::same_encoding(*this, that)) { - buf_.append(that); - return; - } - } - - if constexpr (best::is_text || - best::is_pretext) { - for (auto r : that.runes()) { - reserve(About.max_codes_per_rune); - best::span buf = {buf_.data() + buf_.size(), - About.max_codes_per_rune}; - - unsafe u("we just wrote this much data in encode()"); - if (auto codes = r.encode(buf, this->enc())) { - buf_.set_size(u, size() + codes.ok()->size()); - } else if (auto codes = rune::Replacement.encode(buf, this->enc())) { - buf_.set_size(u, size() + codes.ok()->size()); - } else { - codes = rune('?').encode(buf, this->enc()); - buf_.set_size(u, size() + codes.ok()->size()); - } - } - } else { - push_lossy(best::pretext(that)); - } - } + void push_lossy(rune r); + void push_lossy(const string_type auto& that); /// # `textbuf::clear()`. /// @@ -559,6 +370,90 @@ best::option> textbuf::from(buf data, encoding enc) { return textbuf(best::in_place, std::move(data), std::move(enc)); } + +template +bool textbuf::push(rune r) { + code buf[About.max_codes_per_rune]; + if (auto codes = r.encode(buf, enc())) { + buf_.append(*codes); + return true; + } + return false; +} +template +bool textbuf::push(const string_type auto& that) { + if constexpr (best::is_text && + best::same_encoding_code()) { + if (best::same_encoding(*this, that)) { + buf_.append(that); + return true; + } + } + + if constexpr (best::is_text || + best::is_pretext) { + size_t watermark = size(); + for (auto r : that.runes()) { + reserve(About.max_codes_per_rune); + best::span buf{buf_.data() + buf_.size(), About.max_codes_per_rune}; + if (auto codes = r.encode(buf, this->enc())) { + buf_.set_size(unsafe("we just wrote this much data in encode()"), + size() + codes.ok()->size()); + continue; + } + truncate(watermark); + return false; + } + return true; + } else { + return push(best::pretext(that)); + } +} + +template +void textbuf::push_lossy(rune r) { + code buf[About.max_codes_per_rune]; + if (auto codes = r.encode(buf, enc())) { + buf_.append(*codes); + } else if (auto codes = rune::Replacement.encode(buf, enc())) { + buf_.append(*codes); + } else { + codes = rune('?').encode(buf, enc()); + buf_.append(*codes); + } +} + +template +void textbuf::push_lossy(const string_type auto& that) { + if constexpr (best::is_text && + best::same_encoding_code()) { + if (best::same_encoding(*this, that)) { + buf_.append(that); + return; + } + } + + if constexpr (best::is_text || + best::is_pretext) { + for (auto r : that.runes()) { + reserve(About.max_codes_per_rune); + best::span buf = {buf_.data() + buf_.size(), + About.max_codes_per_rune}; + + unsafe u("we just wrote this much data in encode()"); + if (auto codes = r.encode(buf, this->enc())) { + buf_.set_size(u, size() + codes.ok()->size()); + } else if (auto codes = rune::Replacement.encode(buf, this->enc())) { + buf_.set_size(u, size() + codes.ok()->size()); + } else { + codes = rune('?').encode(buf, this->enc()); + buf_.set_size(u, size() + codes.ok()->size()); + } + } + } else { + push_lossy(best::pretext(that)); + } +} } // namespace best #endif // BEST_TEXT_STRBUF_H_ diff --git a/best/text/strbuf_test.cc b/best/text/strbuf_test.cc index 2906724..cfb5695 100644 --- a/best/text/strbuf_test.cc +++ b/best/text/strbuf_test.cc @@ -104,126 +104,126 @@ best::test PushLossy = [](auto& t) { best::test Affix = [](auto& t) { best::strbuf haystack = "a complicated string. see solomon: πŸˆβ€β¬›"; - t.expect(haystack.starts_with("a complicated string")); - t.expect(!haystack.starts_with("complicated string")); - t.expect(haystack.starts_with(u"a complicated string")); - t.expect(!haystack.starts_with(u"complicated string")); - t.expect(haystack.starts_with(str("a complicated string"))); - t.expect(!haystack.starts_with(str("complicated string"))); - t.expect(haystack.starts_with(str16(u"a complicated string"))); - t.expect(!haystack.starts_with(str16(u"complicated string"))); - - t.expect(haystack.starts_with('a')); - t.expect(!haystack.starts_with('z')); - t.expect(!haystack.starts_with(U'🧢')); + t.expect(haystack->starts_with("a complicated string")); + t.expect(!haystack->starts_with("complicated string")); + t.expect(haystack->starts_with(u"a complicated string")); + t.expect(!haystack->starts_with(u"complicated string")); + t.expect(haystack->starts_with(str("a complicated string"))); + t.expect(!haystack->starts_with(str("complicated string"))); + t.expect(haystack->starts_with(str16(u"a complicated string"))); + t.expect(!haystack->starts_with(str16(u"complicated string"))); + + t.expect(haystack->starts_with('a')); + t.expect(!haystack->starts_with('z')); + t.expect(!haystack->starts_with(U'🧢')); }; best::test Contains = [](auto& t) { best::strbuf haystack = "a complicated string. see solomon: πŸˆβ€β¬›"; - t.expect(haystack.contains("solomon")); - t.expect(!haystack.contains("daisy")); - t.expect(haystack.contains(u"solomon")); - t.expect(!haystack.contains(u"daisy")); + t.expect(haystack->contains("solomon")); + t.expect(!haystack->contains("daisy")); + t.expect(haystack->contains(u"solomon")); + t.expect(!haystack->contains(u"daisy")); - t.expect(haystack.contains(U'🐈')); - t.expect(!haystack.contains('z')); - t.expect(!haystack.contains(U'🍣')); - t.expect(haystack.contains(U"πŸˆβ€β¬›")); + t.expect(haystack->contains(U'🐈')); + t.expect(!haystack->contains('z')); + t.expect(!haystack->contains(U'🍣')); + t.expect(haystack->contains(U"πŸˆβ€β¬›")); }; best::test Find = [](auto& t) { best::strbuf haystack = "a complicated string. see solomon: πŸˆβ€β¬›"; - t.expect_eq(haystack.find("solomon"), 26); - t.expect_eq(haystack.find("daisy"), best::none); - t.expect_eq(haystack.find(u"solomon"), 26); - t.expect_eq(haystack.find(u"daisy"), best::none); + t.expect_eq(haystack->find("solomon"), 26); + t.expect_eq(haystack->find("daisy"), best::none); + t.expect_eq(haystack->find(u"solomon"), 26); + t.expect_eq(haystack->find(u"daisy"), best::none); - t.expect_eq(haystack.find(U'🐈'), 35); - t.expect_eq(haystack.find('z'), best::none); - t.expect_eq(haystack.find(U'🍣'), best::none); - t.expect_eq(haystack.find(U"πŸˆβ€β¬›"), 35); + t.expect_eq(haystack->find(U'🐈'), 35); + t.expect_eq(haystack->find('z'), best::none); + t.expect_eq(haystack->find(U'🍣'), best::none); + t.expect_eq(haystack->find(U"πŸˆβ€β¬›"), 35); - t.expect_eq(haystack.find(&rune::is_ascii_punct), 20); + t.expect_eq(haystack->find(&rune::is_ascii_punct), 20); }; best::test Find16 = [](auto& t) { best::strbuf16 haystack = u"a complicated string. see solomon: πŸˆβ€β¬›"; - t.expect_eq(haystack.find("solomon"), 26); - t.expect_eq(haystack.find("daisy"), best::none); - t.expect_eq(haystack.find(u"solomon"), 26); - t.expect_eq(haystack.find(u"daisy"), best::none); + t.expect_eq(haystack->find("solomon"), 26); + t.expect_eq(haystack->find("daisy"), best::none); + t.expect_eq(haystack->find(u"solomon"), 26); + t.expect_eq(haystack->find(u"daisy"), best::none); - t.expect_eq(haystack.find(U'🐈'), 35); - t.expect_eq(haystack.find('z'), best::none); - t.expect_eq(haystack.find(U'🍣'), best::none); - t.expect_eq(haystack.find(U"πŸˆβ€β¬›"), 35); + t.expect_eq(haystack->find(U'🐈'), 35); + t.expect_eq(haystack->find('z'), best::none); + t.expect_eq(haystack->find(U'🍣'), best::none); + t.expect_eq(haystack->find(U"πŸˆβ€β¬›"), 35); - t.expect_eq(haystack.find(&rune::is_ascii_punct), 20); + t.expect_eq(haystack->find(&rune::is_ascii_punct), 20); }; best::test SplitAt = [](auto& t) { best::strbuf test = "ι»’ηŒ«"; - t.expect_eq(test.split_at(0), best::row{"", "ι»’ηŒ«"}); - t.expect_eq(test.split_at(1), best::none); - t.expect_eq(test.split_at(2), best::none); - t.expect_eq(test.split_at(3), best::row{"ι»’", "猫"}); - t.expect_eq(test.split_at(4), best::none); - t.expect_eq(test.split_at(5), best::none); - t.expect_eq(test.split_at(6), best::row{"ι»’ηŒ«", ""}); + t.expect_eq(test->split_at(0), best::row{"", "ι»’ηŒ«"}); + t.expect_eq(test->split_at(1), best::none); + t.expect_eq(test->split_at(2), best::none); + t.expect_eq(test->split_at(3), best::row{"ι»’", "猫"}); + t.expect_eq(test->split_at(4), best::none); + t.expect_eq(test->split_at(5), best::none); + t.expect_eq(test->split_at(6), best::row{"ι»’ηŒ«", ""}); test = "πŸˆβ€β¬›"; - t.expect_eq(test.split_at(0), best::row{"", "πŸˆβ€β¬›"}); - t.expect_eq(test.split_at(1), best::none); - t.expect_eq(test.split_at(2), best::none); - t.expect_eq(test.split_at(3), best::none); - t.expect_eq(test.split_at(4), best::row{"🐈", "\u200d⬛"}); - t.expect_eq(test.split_at(5), best::none); - t.expect_eq(test.split_at(6), best::none); - t.expect_eq(test.split_at(7), best::row{"🐈\u200d", "⬛"}); - t.expect_eq(test.split_at(8), best::none); - t.expect_eq(test.split_at(9), best::none); - t.expect_eq(test.split_at(10), best::row{"πŸˆβ€β¬›", ""}); + t.expect_eq(test->split_at(0), best::row{"", "πŸˆβ€β¬›"}); + t.expect_eq(test->split_at(1), best::none); + t.expect_eq(test->split_at(2), best::none); + t.expect_eq(test->split_at(3), best::none); + t.expect_eq(test->split_at(4), best::row{"🐈", "\u200d⬛"}); + t.expect_eq(test->split_at(5), best::none); + t.expect_eq(test->split_at(6), best::none); + t.expect_eq(test->split_at(7), best::row{"🐈\u200d", "⬛"}); + t.expect_eq(test->split_at(8), best::none); + t.expect_eq(test->split_at(9), best::none); + t.expect_eq(test->split_at(10), best::row{"πŸˆβ€β¬›", ""}); }; best::test SplitAt16 = [](auto& t) { best::strbuf16 test = u"ι»’ηŒ«"; - t.expect_eq(test.split_at(0), best::row{u"", u"ι»’ηŒ«"}); - t.expect_eq(test.split_at(1), best::row{u"ι»’", u"猫"}); - t.expect_eq(test.split_at(2), best::row{u"ι»’ηŒ«", u""}); + t.expect_eq(test->split_at(0), best::row{u"", u"ι»’ηŒ«"}); + t.expect_eq(test->split_at(1), best::row{u"ι»’", u"猫"}); + t.expect_eq(test->split_at(2), best::row{u"ι»’ηŒ«", u""}); test = u"πŸˆβ€β¬›"; - t.expect_eq(test.split_at(0), best::row{u"", u"πŸˆβ€β¬›"}); - t.expect_eq(test.split_at(1), best::none); - t.expect_eq(test.split_at(2), best::row{u"🐈", u"\u200d⬛"}); - t.expect_eq(test.split_at(3), best::row{u"🐈\u200d", u"⬛"}); - t.expect_eq(test.split_at(4), best::row{u"πŸˆβ€β¬›", u""}); + t.expect_eq(test->split_at(0), best::row{u"", u"πŸˆβ€β¬›"}); + t.expect_eq(test->split_at(1), best::none); + t.expect_eq(test->split_at(2), best::row{u"🐈", u"\u200d⬛"}); + t.expect_eq(test->split_at(3), best::row{u"🐈\u200d", u"⬛"}); + t.expect_eq(test->split_at(4), best::row{u"πŸˆβ€β¬›", u""}); }; best::test SplitOn = [](auto& t) { best::strbuf haystack = "a complicated string. see solomon: πŸˆβ€β¬›"; - t.expect_eq(haystack.split_once("solomon"), + t.expect_eq(haystack->split_once("solomon"), best::row{"a complicated string. see ", ": πŸˆβ€β¬›"}); - t.expect_eq(haystack.split_once("daisy"), best::none); - t.expect_eq(haystack.split_once(u"solomon"), + t.expect_eq(haystack->split_once("daisy"), best::none); + t.expect_eq(haystack->split_once(u"solomon"), best::row{"a complicated string. see ", ": πŸˆβ€β¬›"}); - t.expect_eq(haystack.split_once(u"daisy"), best::none); + t.expect_eq(haystack->split_once(u"daisy"), best::none); - t.expect_eq(haystack.split_once(U'🐈'), + t.expect_eq(haystack->split_once(U'🐈'), best::row{"a complicated string. see solomon: ", "\u200d⬛"}); - t.expect_eq(haystack.split_once('z'), best::none); - t.expect_eq(haystack.split_once(U'🍣'), best::none); - t.expect_eq(haystack.split_once(U"πŸˆβ€β¬›"), + t.expect_eq(haystack->split_once('z'), best::none); + t.expect_eq(haystack->split_once(U'🍣'), best::none); + t.expect_eq(haystack->split_once(U"πŸˆβ€β¬›"), best::row{"a complicated string. see solomon: ", ""}); - t.expect_eq(haystack.split_once(&rune::is_ascii_punct), + t.expect_eq(haystack->split_once(&rune::is_ascii_punct), best::row{"a complicated string", " see solomon: πŸˆβ€β¬›"}); }; } // namespace best::strbuf_test