From 67689face63ca96b47e3ec56a1b7c9ae0ea411d3 Mon Sep 17 00:00:00 2001 From: Andrew Wong Date: Tue, 21 Jan 2025 18:18:03 -0800 Subject: [PATCH 1/3] storage_e2e_test: add log messages indicating test progress The test itself has many steps that make it hard to follow what has happened. Adds some log messages to indicate different steps in the test. --- src/v/storage/tests/storage_e2e_test.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/v/storage/tests/storage_e2e_test.cc b/src/v/storage/tests/storage_e2e_test.cc index a77a32b57d20..bf05d97ee4f8 100644 --- a/src/v/storage/tests/storage_e2e_test.cc +++ b/src/v/storage/tests/storage_e2e_test.cc @@ -4907,6 +4907,7 @@ FIXTURE_TEST(test_offset_range_size2_compacted, storage_test_fixture) { BOOST_REQUIRE(result->on_disk_size >= target_size); } + info("Prefix truncating"); auto new_start_offset = model::next_offset(first_segment_last_offset); log ->truncate_prefix(storage::truncate_prefix_config( @@ -4918,6 +4919,7 @@ FIXTURE_TEST(test_offset_range_size2_compacted, storage_test_fixture) { // Check that out of range access triggers exception. + info("Checking for null on out-of-range"); BOOST_REQUIRE( log ->offset_range_size( @@ -4959,6 +4961,7 @@ FIXTURE_TEST(test_offset_range_size2_compacted, storage_test_fixture) { .get() == std::nullopt); + info("Checking the last batch"); // Check that the last batch can be measured independently auto res = log ->offset_range_size( @@ -4980,6 +4983,7 @@ FIXTURE_TEST(test_offset_range_size2_compacted, storage_test_fixture) { size_t tail_length = 5; for (size_t i = 0; i < tail_length; i++) { + info("Checking i = {}", i); auto ix_batch = c_summaries.size() - 1 - i; res = log ->offset_range_size( @@ -4997,6 +5001,7 @@ FIXTURE_TEST(test_offset_range_size2_compacted, storage_test_fixture) { } // Check that the min_size is respected + info("Checking the back segment"); BOOST_REQUIRE( log ->offset_range_size( From 8514bfd917a4a4ee60cf8e6b1fc0a427a176f723 Mon Sep 17 00:00:00 2001 From: Andrew Wong Date: Tue, 21 Jan 2025 18:19:10 -0800 Subject: [PATCH 2/3] model: add bounded_offset_interval::optional() Adds a static constructor to return an optional interval, which per the class comments, should represent an empty interval. I intend on using this for some upcoming bounds checks. --- src/v/model/offset_interval.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/v/model/offset_interval.h b/src/v/model/offset_interval.h index be1711e4d50b..d49b0af0758f 100644 --- a/src/v/model/offset_interval.h +++ b/src/v/model/offset_interval.h @@ -27,10 +27,19 @@ class bounded_offset_interval { unchecked(model::offset min, model::offset max) noexcept { return {min, max}; } - + // Returns std::nullopt if the range is invalid (e.g. invalid start, + // invalid end, or end > start). + static std::optional + optional(model::offset min, model::offset max) { + if (min < model::offset(0) || max < model::offset(0) || min > max) { + return std::nullopt; + } + return unchecked(min, max); + } static bounded_offset_interval checked(model::offset min, model::offset max) { - if (min < model::offset(0) || max < model::offset(0) || min > max) { + auto ret = optional(min, max); + if (!ret.has_value()) { throw std::invalid_argument(fmt::format( "Invalid arguments for constructing a non-empty bounded offset " "interval: min({}) <= max({})", @@ -38,7 +47,7 @@ class bounded_offset_interval { max)); } - return {min, max}; + return ret.value(); } inline bool overlaps(const bounded_offset_interval& other) const noexcept { From 69e4666759eb90283530ce156e89ff784708faab Mon Sep 17 00:00:00 2001 From: Andrew Wong Date: Tue, 21 Jan 2025 18:20:22 -0800 Subject: [PATCH 3/3] storage: fix bounds check in offset range size method The methods to find offset range size currently use segment bounds to determine whether a given offset is available to be queried. This doesn't account for the case when the segment set contains offsets that don't fall in the log's offset range (e.g. follow a delete records request that trims mid-segment). This commit adds appropriate bounds checks to both methods. With an upcoming change to merge compact after windowed compaction, test_offset_range_size2_compacted would fail because it would prefix truncate mid-segment following a merge compaction, and then trip over this, hitting an unexpected exception when creating a reader: ``` std::runtime_error: Reader cannot read before start of the log 0 < 887 ``` --- src/v/storage/disk_log_impl.cc | 28 ++++++++++++- src/v/storage/tests/storage_e2e_test.cc | 54 +++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/src/v/storage/disk_log_impl.cc b/src/v/storage/disk_log_impl.cc index 91ad021d58d9..6d58b2af730e 100644 --- a/src/v/storage/disk_log_impl.cc +++ b/src/v/storage/disk_log_impl.cc @@ -15,6 +15,7 @@ #include "model/adl_serde.h" #include "model/fundamental.h" #include "model/namespace.h" +#include "model/offset_interval.h" #include "model/record_batch_types.h" #include "model/timeout_clock.h" #include "model/timestamp.h" @@ -1981,12 +1982,23 @@ ss::future disk_log_impl::get_file_offset( ss::future> disk_log_impl::offset_range_size( model::offset first, model::offset last, ss::io_priority_class io_priority) { + auto log_offsets = offsets(); vlog( stlog.debug, "Offset range size, first: {}, last: {}, lstat: {}", first, last, - offsets()); + log_offsets); + auto log_interval = model::bounded_offset_interval::optional( + log_offsets.start_offset, log_offsets.committed_offset); + if (!log_interval.has_value()) { + vlog(stlog.debug, "Log is empty, returning early"); + co_return std::nullopt; + } + if (!log_interval->contains(first) || !log_interval->contains(last)) { + vlog(stlog.debug, "Log does not include entire range"); + co_return std::nullopt; + } // build the collection const auto segments = [&] { @@ -2141,13 +2153,25 @@ disk_log_impl::offset_range_size( model::offset first, offset_range_size_requirements_t target, ss::io_priority_class io_priority) { + auto log_offsets = offsets(); vlog( stlog.debug, "Offset range size, first: {}, target size: {}/{}, lstat: {}", first, target.target_size, target.min_size, - offsets()); + log_offsets); + auto log_interval = model::bounded_offset_interval::optional( + log_offsets.start_offset, log_offsets.committed_offset); + if (!log_interval.has_value()) { + vlog(stlog.debug, "Log is empty, returning early"); + co_return std::nullopt; + } + if (!log_interval->contains(first)) { + vlog(stlog.debug, "Log does not include offset {}", first); + co_return std::nullopt; + } + auto base_it = _segs.lower_bound(first); // Invariant: 'first' offset should be present in the log. If the segment is diff --git a/src/v/storage/tests/storage_e2e_test.cc b/src/v/storage/tests/storage_e2e_test.cc index bf05d97ee4f8..b2ab533e6063 100644 --- a/src/v/storage/tests/storage_e2e_test.cc +++ b/src/v/storage/tests/storage_e2e_test.cc @@ -4237,6 +4237,60 @@ FIXTURE_TEST(reader_reusability_max_bytes, storage_test_fixture) { test_case(400000, 300000, false); } +FIXTURE_TEST( + test_offset_range_size_after_mid_segment_truncation, storage_test_fixture) { + size_t num_segments = 2; + model::offset first_segment_last_offset; + auto cfg = default_log_config(test_dir); + storage::log_manager mgr = make_log_manager(cfg); + info("Configuration: {}", mgr.config()); + auto deferred = ss::defer([&mgr]() mutable { mgr.stop().get(); }); + auto ntp = model::ntp("redpanda", "test-topic", 0); + + storage::ntp_config ntp_cfg(ntp, mgr.config().base_dir); + + auto log = mgr.manage(std::move(ntp_cfg)).get(); + for (size_t i = 0; i < num_segments; i++) { + append_random_batches( + log, + 10, + model::term_id(0), + custom_ts_batch_generator(model::timestamp::now())); + if (first_segment_last_offset == model::offset{}) { + first_segment_last_offset = log->offsets().dirty_offset; + } + log->force_roll(ss::default_priority_class()).get(); + } + + // Prefix truncate such that offset 1 is the new log start. + log + ->truncate_prefix(storage::truncate_prefix_config( + model::offset(1), ss::default_priority_class())) + .get(); + + // Run size queries on ranges that don't exist in the log, but whose range + // is still included in a segment. + + BOOST_CHECK( + log + ->offset_range_size( + model::offset(0), model::offset(1), ss::default_priority_class()) + .get() + == std::nullopt); + + BOOST_CHECK( + log + ->offset_range_size( + model::offset(0), + storage::log::offset_range_size_requirements_t{ + .target_size = 1, + .min_size = 0, + }, + ss::default_priority_class()) + .get() + == std::nullopt); +} + FIXTURE_TEST(test_offset_range_size, storage_test_fixture) { #ifdef NDEBUG size_t num_test_cases = 5000;