diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 415ccc080..bf048eefa 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -10,7 +10,7 @@ FOREACH (F ${ALL_SRC}) ADD_EXECUTABLE(${prjName} ${F}) TARGET_LINK_LIBRARIES(${prjName} common pthread dl benchmark::benchmark) if(NOT ${prjName} STREQUAL "memtracer_performance_test") - TARGET_LINK_LIBRARIES(${prjName} observer_static) + TARGET_LINK_LIBRARIES(${prjName} observer_static oblsm) endif() ENDFOREACH (F) diff --git a/benchmark/oblsm_performance_test.cpp b/benchmark/oblsm_performance_test.cpp new file mode 100644 index 000000000..c500b942b --- /dev/null +++ b/benchmark/oblsm_performance_test.cpp @@ -0,0 +1,146 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include +#include + +#include "common/lang/stdexcept.h" +#include "common/lang/filesystem.h" +#include "common/log/log.h" +#include "common/math/integer_generator.h" +#include "oblsm/include/ob_lsm.h" + +// TODO +// a simple benchmark to test oblsm concurrency put/get. more detail test can use `ob_lsm_bench` tool. +using namespace std; +using namespace common; +using namespace benchmark; +using namespace oceanbase; + +class BenchmarkBase : public Fixture +{ +public: + BenchmarkBase() {} + + virtual ~BenchmarkBase() {} + + virtual string Name() const = 0; + + virtual void SetUp(const State &state) + { + if (0 != state.thread_index()) { + return; + } + filesystem::remove_all("oblsm_benchmark"); + filesystem::create_directory("oblsm_benchmark"); + + RC rc = ObLsm::open(ObLsmOptions(), "oblsm_benchmark", &oblsm_); + if (rc != RC::SUCCESS) { + throw runtime_error("failed to open oblsm"); + } + + LOG_INFO("test %s setup done. threads=%d, thread index=%d", + this->Name().c_str(), state.threads(), state.thread_index()); + } + + virtual void TearDown(const State &state) + { + if (0 != state.thread_index()) { + return; + } + delete oblsm_; + LOG_INFO("test %s teardown done. threads=%d, thread index=%d", + this->Name().c_str(), + state.threads(), + state.thread_index()); + } + + void FillUp(uint32_t min, uint32_t max) + { + for (uint32_t value = min; value < max; ++value) { + string key = to_string(value); + + [[maybe_unused]] RC rc = oblsm_->put(key, key); + ASSERT(rc == RC::SUCCESS, "failed to insert entry into btree. key=%" PRIu32, value); + } + } + + uint32_t GetRangeMax(const State &state) const + { + uint32_t max = static_cast(state.range(0) * 3); + if (max <= 0) { + max = (1 << 31); + } + return max; + } + + void Insert(uint32_t value) { oblsm_->put(to_string(value), to_string(value)); } + + void Scan(uint32_t begin, uint32_t end) + { + auto iter = oblsm_->new_iterator(ObLsmReadOptions()); + iter->seek(to_string(begin)); + while (iter->valid() && iter->key() != to_string(end)) { + iter->next(); + } + delete iter; + } + +protected: + oceanbase::ObLsm *oblsm_ = nullptr; +}; + +//////////////////////////////////////////////////////////////////////////////// + +struct DISABLED_MixtureBenchmark : public BenchmarkBase +{ + string Name() const override { return "mixture"; } +}; + +BENCHMARK_DEFINE_F(DISABLED_MixtureBenchmark, Mixture)(State &state) +{ + pair insert_range{GetRangeMax(state) + 1, GetRangeMax(state) * 2}; + pair scan_range{1, 100}; + pair data_range{0, GetRangeMax(state) * 2}; + + IntegerGenerator data_generator(data_range.first, data_range.second); + IntegerGenerator insert_generator(insert_range.first, insert_range.second); + IntegerGenerator scan_range_generator(scan_range.first, scan_range.second); + IntegerGenerator operation_generator(0, 10); + + for (auto _ : state) { + int64_t operation_type = operation_generator.next(); + if (operation_type <= 9) { + operation_type = 0; + } else { + operation_type = 1; + } + switch (operation_type) { + case 0: { // insert + uint32_t value = static_cast(insert_generator.next()); + Insert(value); + } break; + case 1: { // scan + uint32_t begin = static_cast(data_generator.next()); + uint32_t end = begin + static_cast(scan_range_generator.next()); + Scan(begin, end); + } break; + default: { + ASSERT(false, "should not happen. operation=%ld", operation_type); + } + } + } +} + +BENCHMARK_REGISTER_F(DISABLED_MixtureBenchmark, Mixture)->Threads(10)->Arg(1)->Arg(1000)->Arg(10000); + +//////////////////////////////////////////////////////////////////////////////// + +BENCHMARK_MAIN(); diff --git a/docs/docs/db_course_lab/images/miniob-lsmtree-overview.png b/docs/docs/db_course_lab/images/miniob-lsmtree-overview.png new file mode 100644 index 000000000..0747c61ac Binary files /dev/null and b/docs/docs/db_course_lab/images/miniob-lsmtree-overview.png differ diff --git a/docs/docs/db_course_lab/lab1.md b/docs/docs/db_course_lab/lab1.md new file mode 100644 index 000000000..c841e73e4 --- /dev/null +++ b/docs/docs/db_course_lab/lab1.md @@ -0,0 +1,207 @@ +--- +title: LAB#1 LSM-Tree 存储引擎 +--- + +> 请不要将代码提交到公开仓库(包括提交带有题解的 Pull Request),同时也请不要抄袭其他同学或网络上可能存在的代码。 + +# LAB#1 LSM-Tree 存储引擎 + +这是数据库系统实现原理与实践课程的第一个正式实验题目,实验内容是完成 LSM-Tree 存储引擎中的部分功能。 + +## LSM-Tree 简介 +LSM-Tree 将写操作(包括数据插入、修改、删除)采用追加写的方式写入内存中并进行排序(MemTable),当 MemTable 的大小达到一定阈值后再将数据顺序写入磁盘中(Sorted Strings Table, SSTable),这使得 LSM-Tree 具有优秀的写性能;但是读操作时需要查询 MemTable 和 SSTable 中数据。因此,为了提高读性能,LSM-Tree会定期对磁盘中的SSTable文件进行合并(Compaction),合并时会将相同数据进行合并,减少数据量。 + +![lsm-tree overview](images/miniob-lsmtree-overview.png) + +## OceanBase 中的 LSM-Tree 简介 +OceanBase 数据库的存储引擎也是基于 LSM-Tree 架构,将数据分为静态基线数据(放在 SSTable 中)和动态增量数据(放在 MemTable 中)两部分,其中 SSTable 是只读的,一旦生成就不再被修改,存储于磁盘;MemTable 支持读写,存储于内存。数据库 DML 操作插入、更新、删除等首先写入 MemTable,等到 MemTable 达到一定大小时转储到磁盘成为 SSTable。在进行查询时,需要分别对 SSTable 和 MemTable 进行查询,并将查询结果进行归并,返回给 SQL 层归并后的查询结果。同时在内存实现了 Block Cache 和 Row cache,来避免对基线数据的随机读。关于 OceanBase 数据库的更多细节可以参考:https://www.oceanbase.com/docs/oceanbase-database-cn + +## MiniOB 中的 LSM-Tree 简介 +ObLsm 是 MiniOB 中的一个为教学设计的 LSM-Tree 架构的 KV 存储引擎。可以认为 ObLsm 是一个独立的模块,MiniOB 集成了 ObLsm 作为其一个存储引擎,ObLsm 也可独立运行,独立使用。ObLsm 本身包含了 LSM-Tree 中的关键结构,可以帮助大家学习 LSM-Tree 架构。ObLsm 的代码位于 `src/oblsm/` 目录下,目前 LAB#1 实验仅需修改该目录下的代码即可。关于 ObLsm 的更多细节可参考[文档](../design/miniob-lsm-tree.md)。 + +## 实验 + +LAB#1 中包含三个相对独立的子任务: + +- 任务1: 实现SkipList 并支持 SkipList 无锁并发写入 +- 任务2: 实现 Block Cache 功能,加速 SSTable 的读取 +- 任务3: 实现 Leveled Compaction 功能,支持 SSTable 的合并 + +对于上述的每个实验,代码中均提供了包含必须实现的 API 的类及其接口。请不要修改这些类中预定义函数的定义/类名/文件名等。否则,测试脚本可能无法正常运行。你可以在这些类中添加成员变量和函数,以正确实现所需的功能。 + +### 任务1: 实现SkipList 并支持 SkipList 无锁并发写入 + +目前,ObLsm 中的 MemTable 基于 SkipList 实现,当前的 SkipList(代码位于`src/oblsm/memtable/ob_skiplist.h`)支持一写多读(并发读不需要额外的同步机制,并发写需要外部的同步机制保证线程安全)。SkipList 中的部分函数还没有实现,请在此基础上实现 SkipList 的写接口(`ObSkipList::insert()`)和无锁并发写接口(`ObSkipList::insert_concurrently()`)(**注意:除了这一接口外,可能还需要实现其他必要函数,以支持 SkipList 正常运行,请自行 debug 或查看相关代码文件。**)。测试程序位于 `unittest/oblsm/ob_skiplist_test.cpp` 中。要求使用 CAS 操作来实现 SkipList 的无锁并发插入。下面对必要的知识做简单介绍。 + +#### CAS(Compare-And-Swap) + +CAS(Compare-And-Swap)是一种广泛用于并发编程中的原子操作,主要用于实现无锁数据结构和算法。它允许线程安全地对共享数据进行更新,而无需使用锁,从而提高了系统的性能和可伸缩性。 + +CAS 操作的基本思想是:通过比较某个内存位置的当前值(预期值)和一个给定的值,如果两者相等,那么将内存位置的值更新为一个新值;否则不更新。这个操作是原子的,也就是说,它要么完全成功,要么完全失败,不会出现中间状态。 + +CAS 通常由硬件提供支持,现代处理器通常都提供相应的指令。 + +在 C++ 中,CAS 操作通常通过 `std::atomic` 类型和 `compare_exchange_weak()` 或 `compare_exchange_strong()` 函数来实现。`compare_exchange_strong()/compare_exchange_weak()` 是 `std::atomic` 类模板的成员函数,其基本语义是比较一个原子变量的当前值与预期值,如果相等,则将其更新为新值。如果不相等,则将原子变量的当前值赋值给预期值(使调用者知道失败原因)。这个操作是原子的,保证了线程安全。 + +**思考**:`compare_exchange_weak()` 和 `compare_exchange_strong()` 的区别是什么?在实现这一任务时,你应该使用哪一个还是任意一个都可以? + +cpplings(`./src/cpplings`) 中也提供了一个练习 CAS 的例子,可参考 `src/cpplings/cas.cpp`,本练习不作为实验的一部分,不计入成绩,仅供练习参考。 + +#### 跳表(SkipList) + +跳表 (SkipList) 是由 William Pugh 发明的一种查找数据结构,支持对数据的快速查找,插入和删除。 + +跳表的期望空间复杂度为 $O(n)$,跳表的查询,插入和删除操作的期望时间复杂度都为 $O(\log n)$。 + +顾名思义,SkipList 是一种类似于链表的数据结构。更加准确地说,SkipList 是对有序链表的改进。 + +一个有序链表的查找操作,就是从头部开始逐个比较,直到当前节点的值大于或者等于目标节点的值。很明显,这个操作的复杂度是 O(n)。 + +跳表在有序链表的基础上,引入了分层的概念。首先,跳表的每一层都是一个有序链表,特别地,最底层是初始的有序链表。每个位于第 i 层的节点有 p 的概率出现在第 i+1 层,p 为常数。 + +在跳表中查找,就是从第 L(n) 层开始,水平地逐个比较直至当前节点的下一个节点大于等于目标节点,然后移动至下一层。重复这个过程直至到达第一层且无法继续进行操作。此时,若下一个节点是目标节点,则成功查找;反之,则元素不存在。这样一来,查找的过程中会跳过一些没有必要的比较,所以相比于有序链表的查询,跳表的查询更快。可以证明,跳表查询的平均复杂度为 $O(\log n)$。 + +插入节点的过程就是先执行一遍查询的过程,中途记录新节点是要插入哪一些节点的后面,最后再执行插入。每一层最后一个键值小于 key 的节点,就是需要进行修改的节点。 + +你需要补充完善 `src/oblsm/memtable/ob_skiplist.h` 以下函数,以实现 SkipList 的基本功能。 +```c++ +/** + * @brief Insert key into the list. + * REQUIRES: nothing that compares equal to key is currently in the list + */ +void insert(const Key &key); +``` + +**提示**:除了这一接口外,可能还需要实现其他必要函数,以支持 SkipList 正常运行,请自行 debug 或查看相关代码文件。 + +#### SkipList 的无锁并发插入 + +SkipList 的无锁并发插入可以参考 [The Art of Multiprocessor Programming](https://www2.cs.sfu.ca/~ashriram/Courses/CS431/assets/distrib/AMP.pdf) 中的实现。相关细节位于书中的 14.4 节。 + +无锁插入的核心部分伪代码如下: +``` c++ +Node *node = new_node(key); +while(true) { + // find the location to insert the new node. + // `prev` is less than `key`, `succ` is greater than `key`. + find(key, prev, succ); + // no synchronization needed here, because `node` is + // not yet visible to other threads. + node->next[i] = succ[i] + + if (!prev[0].next[0].cas_set(succ[0], node)) { + // if failed, try again. + continue; + } + + for (int level = 1; level < N/* N is the top level*/; level++) { + while (true) { + if (prev[level].next[level].cas_set(succ[level], node)) { + // success to insert the node at level i + break; + } + // if failed, try again. + find(key, prev, succ); + } + } + return; +} +``` + +注意: +1. 在实现 `insert_concurrently()` 请不要使用任何锁。 +2. 需要考虑 `common::RandomGenerator` 的线程安全。可参考:https://stackoverflow.com/questions/77377046/is-a-stdmt19937-static-function-variable-thread-safe + + +#### 测试 + +可以通过运行 `unittest/oblsm/ob_skiplist_test.cpp` 来测试 skiplist 的功能。 + +MiniOB 中的单测框架使用 `GTest`,在默认参数编译后,单测二进制程序位于 `$BUILD_DIR/bin/` 目录下,程序名与单测文件名对应。例如,`ob_skiplist_test.cpp` 对应的单测程序为 `$BUILD_DIR/bin/ob_skiplist_test`,通过运行该程序即可测试你的实现是否正确。 + +测试用例中会随机生成一些键值对,并插入到 SkipList 中。然后对 SkipList 进行查找操作,检查查找结果是否正确。 + +**注意**:你需要保证你的 SkipList 实现是线程安全的。 + +**思考**:在插入新节点的过程中,通过 CAS 操作来逐层添加节点,那么如果 CAS 失败后,是否需要回滚掉之前成功插入的节点?为什么? + +**思考**:多个 CAS 操作并不是原子的,也就是在插入过程中,多个读线程可能看到不一致的新节点,会导致什么问题? + +### 任务2:实现 Block Cache(块缓存) 功能,加速 SSTable 的读取 + +Block Cache(块缓存)是 LSM-Tree 在内存中缓存数据以供读取的地方。Block Cache 的作用是优化热点数据访问磁盘时的I/O性能。ObLsm 中使用 LRU Cache 来实现块缓存。 + +LRU Cache(Least Recently Used)是一种常见的缓存淘汰算法。用于在有限的缓存空间中管理数据对象。LRU Cache 的核心思想是基于时间局部性原理,即最近被访问的数据在未来可能会被再次访问。 + +Cache的容量有限,因此当Cache的容量用完后,而又有新的内容需要添加进来时,就需要挑选并舍弃原有的部分内容,从而腾出空间来放新内容。LRU Cache 的替换原则就是将最近最少使用的内容替换掉。 + +#### Block Cache 实现内容 +你需要实现 `src/oblsm/util/ob_lru_cache.h` 中的 `ObLruCache` 类,实现 LRU 缓存的功能。 +你需要在 SSTable 上实现 `read_block_with_cache()` 函数。 + +Block 通过 `(sst_id, block_id)` 作为 Key 进行缓存。如果命中了缓存,则从缓存中获取 Block;如果未命中缓存,则填充 Block 到缓存中。 + + +**提示**:在实现 Block Cache 时,需要保证其线程安全。 +**提示**:除了本文中提到的需要修改的位置,你还可能需要完成其他必要的修改以支持 Block Cache 正常运行,请自行 debug 或查看相关代码文件。。 + +**思考**:在 RocksDB 中,块缓存通过 `strict_capacity_limit` 配置项来控制块缓存大小是否严格限制在块缓存容量内。在你的实现中,块缓存大小是否有可能会超过块缓存容量? + +**思考**:LRU Cache 需要保证并发安全,你是通过什么方式保证这一点的?LevelDB/RocksDB 中都使用了分片的方式来减少锁冲突优化 LRU Cache 的并发性能,是否可以在你的实现中也使用分片来减少锁冲突? + +### 测试 + +可以通过运行 `unittest/oblsm/ob_lru_cache_test.cpp` 来测试 LRU Cache 的功能。 + +此外,还需要保证可以通过 `unittest/oblsm/ob_lsm_test.cpp` 和 `benchmark/oblsm_performance_test.cpp` 保证在增加 LRU Cache 后,不影响 LSM-Tree 的功能。 + +Q:如何运行 `benchmark/oblsm_performance_test.cpp`? +A:通过如下编译命令编译时,对应二进制文件位于`$BUILD_DIR/bin/`目录下,文件名为`oblsm_performance_test`。 + +```bash +bash build.sh release -DCONCURRENCY=ON -DWITH_BENCHMARK=ON +``` + + +### 任务3:实现 Leveled Compaction 功能,支持 SSTable 的合并 + +#### Compaction 简介 +##### Leveled Compaction +在 Leveled Compaction 中,LSM-Tree 划分为 N 个 Level,每个 Level 仅包含一个 Sorted Run(相同层级的 SSTable 之间 Key 范围不存在交集);相邻 Level 的 SSTable 大小有一个倍数关系。 + +Compaction 的触发是由于某个 Level 的数据量超过了阈值。在 Compaction 时会选择 L(n-1) 的数据,与原有 L(n) 的数据 Rowkey 有交集的部分进行合并,得到新的 L(n) 数据。 + +##### Tiered Compaction +在 Tiered Compaction 中,LSM-Tree 也被划分为 N 个 Level,每个 Level 可以包含多个 SSTable。相同 Level 的 SSTable 的 key range 可能存在交集。在查询时需要访问这个 Level 所有的 SSTable,使得读放大比较严重,查询性能不佳。 + +Compaction 的触发条件是某个 Level 的 SSTable 数量超过了阈值,会将 L(n) 的若干 SSTable,合出一个新的 SSTable 放入 L(n+1),并不与原有 L(n+1) 的数据进行合并。相比于 Leveled 而言执行速度会更快,写放大会更优,但由于查询的 SSTable 数量变多,读放大会更差。 + +#### Compaction 实现内容 + +你需要实现 Leveled Compaction 功能,在 `src/oblsm/compaction/compaction_picker.h` 中实现 `LeveledCompactionPicker`。实现 `ObLsmImpl::try_major_compaction` 中的`TODO: apply the compaction results to sstables`,实现 `ObLsmImpl::do_compaction()` 函数。 + +**提示**:除了本文中提到的需要修改的位置,你还可能需要完成其他必要的修改以支持 Leveled Compaction 正常运行,请自行 debug 或查看相关代码文件。 + +ObLsm 中的 Leveled Compaction 需要满足下面规则: + +1. 磁盘上的文件按多个层级(Level)进行组织。我们称它们为1级、2级等,或简称为L1、L2等,层级数由`ObLsmOptions::default_levels` 指定。特殊的 level-0(或简称 L0)包含刚刚从内存写入缓冲区(memtable)刷新的文件。 +2. 每个级别( L0 除外)都仅包含一个 Sorted Run(相同层级的 SSTable 之间 Key 范围不存在交集)。 +3. 每个层级(L1 及以上)之间的数据大小存在倍数关系:`L_{i+1} = L_{i} * k`,其中 k 由`ObLsmOptions::default_level_ratio` 指定,L1 层级的数据大小由`ObLsmOptions::default_l1_level_size` 指定。 +4. L0 层级受限于文件数,当超过指定文件数时触发合并。`ObLsmOptions::default_l0_file_num` 指定 L0 层级的文件数上限。 +5. 每次合并时,L0 层级会全部参与合并,并从 L1 层级中挑选出存在数据交集的所有 SSTable 也参与合并。对于 L1 及以上层级的合并,从 L_i 层挑选出同层中最后参与合并的文件进行合并,并从 L_{i+1} Level 中挑选出存在数据交集的所有 SSTable 也参与合并。 +6. 当多个 Level 触发压缩条件时,需要选择先压缩哪个级别。每个 Level 都会生成一个分数:对于非零级别,分数是级别的总大小除以目标大小。对于 level-0,分数是文件总数除以 default_l0_file_num。 +测试: + +**思考**:在当前的实现下,ObLsm 还有哪些优化空间?请列出一些优化方向。 + +**提示**: 在做 LAB#1 实验时,可暂时不考虑 `ObLsmImpl::seq_`(用于实现 MVCC)。 + +#### 测试 + +可以通过运行 `unittest/oblsm/ob_compaction_test.cpp` 来测试 Leveled Compaction 功能。此外,还需要保证可以通过 `unittest/oblsm/ob_lsm_test.cpp` 和 `benchmark/oblsm_performance_test.cpp` 保证在增加 Compaction 后,不影响 LSM-Tree 的功能。 + +## 参考资料 +这里提供了一些学习资料供大家参考学习。 +[The Art of Multiprocessor Programming](https://www2.cs.sfu.ca/~ashriram/Courses/CS431/assets/distrib/AMP.pdf) +[name that compaction algorithm](https://smalldatum.blogspot.com/2018/08/name-that-compaction-algorithm.html) +[MiniOB LSM-Tree 设计文档](../design/miniob-lsm-tree.md) diff --git a/docs/docs/db_course_lab/overview.md b/docs/docs/db_course_lab/overview.md index 8ea22cfa9..3f3598ac0 100644 --- a/docs/docs/db_course_lab/overview.md +++ b/docs/docs/db_course_lab/overview.md @@ -16,7 +16,7 @@ title: 数据库系统实现原理与实践课程实验 这是本课程的实验题目,实验题目是修改 MiniOB 的各个组件并完成指定的功能,实验题目会持续更新。 - [LAB#0 C++ 基础入门](./lab0.md) -- LAB#1 LSM-Tree 存储引擎 +- [LAB#1 LSM-Tree 存储引擎](./lab1.md) - LAB#2 查询引擎 - LAB#3 事务引擎 - LAB#4 性能测试 diff --git a/docs/docs/design/miniob-lsm-tree.md b/docs/docs/design/miniob-lsm-tree.md new file mode 100644 index 000000000..b4134a3da --- /dev/null +++ b/docs/docs/design/miniob-lsm-tree.md @@ -0,0 +1,151 @@ +--- +title: MiniOB LSM-Tree 存储引擎 +--- + +# MiniOB LSM-Tree 存储引擎 + +## LSM-Tree 背景介绍 +LSM-Tree 是一种数据结构,可用于存储键值对。LSM-Tree 采用了多层的结构,存储部分可以分为内存和磁盘两个部分。内存中的部分称为 MemTable,磁盘中的部分称为 SSTable(Sorted String Table)。LSM 树通过 Append-Only 的方式提供高效的数据写入,为了优化读取性能,LSM-Tree 通过 Compaction 操作定期重新组织数据。 + +OceanBase 数据库的存储引擎也是基于 LSM-Tree 架构,将数据分为静态基线数据(放在 SSTable 中)和动态增量数据(放在 MemTable 中)两部分,其中 SSTable 是只读的,一旦生成就不再被修改,存储于磁盘;MemTable 支持读写,存储于内存。数据库 DML 操作插入、更新、删除等首先写入 MemTable,等到 MemTable 达到一定大小时转储到磁盘成为 SSTable。在进行查询时,需要分别对 SSTable 和 MemTable 进行查询,并将查询结果进行归并,返回给 SQL 层归并后的查询结果。同时在内存实现了 Block Cache 和 Row cache,来避免对基线数据的随机读。关于 OceanBase 数据库的更多细节可以参考:https://www.oceanbase.com/docs/oceanbase-database-cn + +ObLsm 是一个为教学设计的 LSM-Tree 结构的 Key-Value 存储引擎。ObLsm 本身是一个独立于 MiniOB 的模块,可以独立编译使用。ObLsm 包含了 LSM-Tree 中的关键结构,可以帮助大家学习 LSM-Tree 架构。 +MiniOB 中也基于 ObLsm 实现了一个基于 LSM-Tree 的表引擎,可以将表数据以 Key-Value 的格式存储到磁盘。 + +## ObLsm 存储引擎 + +下面会对 ObLsm 的各个模块作简单介绍,便于大家对 ObLsm 有一个初步的了解,更多细节可以参考源代码 `src/oblsm`。 + +### MemTable +MemTable 是一种内存数据结构,用作处理即将到来的操作(insert/delete/update)的缓冲区(buffer)。很多数据结构都可以用于 MemTable 的实现,现有的 LSM-Tree 实现(如 LevelDB/RocksDB)中多采用 SkipList,ObLsm 目前也使用 SkipList 作为 MemTable 的底层数据结构。ObLsm 将 insert/update/delete 都视作一条记录来插入到 MemTable 中。 + +* insert:将一条记录插入到 MemTable 中。 +* update:将一条时间戳更大的记录插入到 MemTable 中。 +* delete:将一条 value 为空的记录插入到 MemTable 中。 + +MemTable 将插入的 Key-Value 编码为如下的记录存储。 +``` + ┌───────────┬──────────────┬───────┬──────────────┬──────────────────┐ + │ │ │ │ │ │ + │key_size(8)│ key(key_size)│ seq(8)│ value_size(8)│ value(value_size)│ + │ │ │ │ │ │ + └───────────┴──────────────┴───────┴──────────────┴──────────────────┘ + +``` + +其中,key_size 和 value_size 分别表示 key+seq 和 value 的长度,seq 表示记录的时间戳。括号中表示占用字节数。 + +MemTable 的实现位于:`src/oblsm/memtable/`,在代码中,我们将上图中的`key` 称为 user_key,将 `key + seq` 称为 internal_key,将`key_size + key + seq` 称为 lookup_key。 + +#### SkipList +SkipList(跳表)是用于有序元素序列快速搜索的一个数据结构,SkipList 是一个随机化的数据结构,实质就是一种可以进行二分查找的有序链表。SkipList 在原有的有序链表上面增加了多级索引,通过索引来实现快速查找。跳表不仅能提高搜索性能,同时也可以提高插入和删除操作的性能。它在性能上和红黑树,AVL树不相上下,但是跳表的原理非常简单,实现也比红黑树简单很多。 + +SkipList 的实现位于:`src/oblsm/memtable/ob_skiplist.h` + +#### MemTableIterator +MemTableIterator 提供了一种遍历 MemTable 的机制。它可以按序访问 MemTable 中的所有键值对。 + +MemTableIterator 的实现位于:`src/oblsm/memtable/ob_memtable.h::ObMemTableIterator` + +#### MemTable 转储 +MemTable 转储是将内存中的 MemTable 持久化到磁盘上的过程。当 MemTable 达到一定大小时,会被转储为不可变的 SSTable。转储过程通常包括排序数据、生成 SSTable 文件并将其写入磁盘。 + +转储相关代码位于:`src/oblsm/oblsm_impl.h::ObLsmImpl::try_freeze_memtable` + +### SSTable +MemTable 的大小达到限制条件,MemTable 的数据以按顺序被转储到磁盘中,被转储到磁盘中的结构称为(SSTable:Sorted Strings table)。 + +SSTable 是一种有序的键值对存储结构,它通常包含一个或多个块(block),每个块中包含一组有序的键值对。 + +SSTable 的存储格式示例如下: +``` + ┌─────────────────┐ + │ block 1 │◄───┐ + ├─────────────────┤ │ + │ block 2 │ │ + ├─────────────────┤ │ + │ .. │ │ + ├─────────────────┤ │ + │ block n │◄─┐ │ + ├─────────────────┤ │ │ + ┌───►│ meta size(n) │ │ │ + │ ├─────────────────┤ │ │ + │ │ block meta 1 ├──┼─┘ + │ ├─────────────────┤ │ + │ │ .. │ │ + │ ├─────────────────┤ │ + │ │ block meta n ├──┘ + │ ├─────────────────┤ + └────┤ │ + └─────────────────┘ +``` + +其中,block 表示由若干键值对组成的数据块。block meta 用于存储 block 的元信息,包括 block 的大小、block 的位置信息,block 中的键值对数量等。 + +SSTable 的实现位于:`src/oblsm/table/` + +#### Block +为了提高整体的读写效率,一个sstable文件按照固定大小划分为 Block。每个Block中,目前只存储了键值对数据。 +Block 的存储格式如下: +``` + ┌─────────────────┐ + │ entry 1 │◄───┐ + ├─────────────────┤ │ + │ entry 2 │ │ + ├─────────────────┤ │ + │ .. │ │ + ├─────────────────┤ │ + │ entry n │◄─┐ │ + ├─────────────────┤ │ │ + ┌───►│ offset size(n) │ │ │ + │ ├─────────────────┤ │ │ + │ │ offset 1 ├──┼─┘ + │ ├─────────────────┤ │ + │ │ .. │ │ + │ ├─────────────────┤ │ + │ │ offset n ├──┘ + │ ├─────────────────┤ + └────┤ offset start │ + └─────────────────┘ +``` + +Block 的主要实现位于 `src/oblsm/table/ob_block.h` + +#### SSTableBuilder +用于构造一个 `SSTable`,主要实现位于 `src/oblsm/table/ob_sstable_builder.h` +#### BlockBuilder +用于构造一个 `Block`,主要实现位于 `src/oblsm/table/ob_block_builder.h` + +### Compaction +Compaction 是 LSM-Tree 的关键组件,Compaction 会将多个 SSTable 合并为一个或多个新的 SSTable。Compaction 的实现主要位于 `src/oblsm/compaction/` + +## 基于 ObLsm 的表引擎 +MiniOB 基于 ObLsm 模块实现了一个 LSM-Tree 表引擎,用于以 Key-Value 格式存储表数据。表引擎的实现位于:`src/observer/storage/table/lsm_table_engine.h`。 + +LSM-Tree 表引擎使用方法: +在创建表时指定 engine=lsm,即可使用 LSM-Tree 表引擎。 +当不指定engine 或指定 engine=heap,将使用堆表作为表数据的存储方式。 + +```sql +create table t1 (id int primary key, name varchar(20))engine=lsm; +``` + +在 MiniOB 中,使用关系型模型来描述表结构,而 LSM-Tree 表引擎将表数据以 Key-Value 的形式存储到磁盘,因此需要提供一种机制来将关系型模型转换为 Key-Value 模型。 +目前 MiniOB 中以自增列作为 Key,将行数据以 `Table::make_record` 编码为 Value。通过 [orderedcode](https://github.com/google/orderedcode) 来对 Key 列做编码,使得在编码后 Key 的字典序上比较与 Key 对应的原始序列(目前可以认为只有自增列一列,后续支持主键后,Key 会对应表中的多列)上进行比较具有相同的顺序。 + +此外,为了在同一个 LSM-Tree 引擎中存储多张表的数据,MiniOB 为每一张表分配一个 TableID,并在 Key 中加入 TableID 作为前缀。 + +因此,每行数据按照如下规则编码成 (Key, Value) 键值对: + +``` +Key: t{TableID}r{AutoIncID} +Value: [col1, col2, col3, col4] +``` + +## 参考资料 + +1. [OceanBase](https://www.oceanbase.com/docs/oceanbase-database-cn) +2. [LSM-Tree](https://www.cs.umb.edu/~poneil/lsmtree.pdf) +3. [LevelDB](https://github.com/google/leveldb) +4. [RocksDB](https://github.com/facebook/rocksdb/wiki) +5. [Mini-LSM](https://skyzh.github.io/mini-lsm/) \ No newline at end of file diff --git a/docs/docs/dev-env/how_to_submit_for_testing.md b/docs/docs/dev-env/how_to_submit_for_testing.md new file mode 100644 index 000000000..198806bf5 --- /dev/null +++ b/docs/docs/dev-env/how_to_submit_for_testing.md @@ -0,0 +1,66 @@ +--- +title: 训练营平台使用说明 +--- + +# 训练营平台使用说明 + +[训练营平台](https://open.oceanbase.com/train) 是 OceanBase 为学生和数据库开发者设计的数据库学习与练习平台,旨在帮助用户更高效地掌握数据库基础知识。通过该平台,开发者可以从零开始逐步理解数据库的底层原理和实现方式,参与数据库实践与开发练习。训练营平台支持为 MiniOB/OceanBase 代码提交测试,通过运行自动化测试用例,反馈用户提交代码的运行结果和问题。 + +## 如何使用训练营提交代码测试 + +用户可根据下面的流程进行提测。 + +### 1. 进入训练营平台并登陆训练营平台 + +打开训练营网站:https://open.oceanbase.com/train + +你会看到训练营主页。 + +![](images/train_mainpage.png) + +点击训练营主页上方的登陆/注册按钮登陆训练营平台。 + +### 2. 报名具体的训练营课题进行提测 + +登陆后再点击 `广场` 中的训练营课题的 `报名参加` 按钮报名具体的训练营课题。目前公开的训练营课题包含 `MiniOB` 对应 2021 年大赛题目;`MiniOB 2022` 对应 2022 年大赛题目;`MiniOB 2023` 对应 2023 年大赛题目;`MiniOB 2024` 对应 2024 年大赛题目。后续会增加更多的训练营课题。 + +![](images/train_miniob_2024.png) + +对于不需要报名的训练营课题,在点击 `报名参加` 后,会自动跳转到训练营课题的题目页面。 +对于需要报名的训练营课题,需要在点击 `报名参加` 后,填写自己的报名信息,再进入训练营课题的题目页面。(**注意**:如果找不到自己已报名的训练营课题,可以点击 `我加入的` 页面进行查看。) + +### 3. 查看训练营课题的具体题目 + +进入到训练营课题你会看到如下页面。用户可以在题目列表查看该训练营课题对应的所有题目。如果要查看此题目排行榜,可点击「排行榜」按钮,目前只显示前 10 名及个人成绩,根据总分降序排列。 +![](images/train_miniob_2024_question.png) + +通过点击题目可以查看题目的详细描述,请参考题目的详细描述进行代码编写,和自测。 +![](images/train_miniob_2024_question_detail1.png) +![](images/train_miniob_2024_question_detail2.png) + +### 4. 提交代码测试 +在完成本地代码编写和自测后,请将代码提交到 Github/Gitee 仓库中。 +代码提交方法请参考[Github 代码提交](https://oceanbase.github.io/miniob/game/github-introduction/),[Gitee 代码提交](https://oceanbase.github.io/miniob/game/gitee-instructions/)。 + +点击「立即提测」按钮后,会弹出代码提交窗口。请填写仓库地址,commit id 和 branch 等必要信息并点击提交按钮。 + +![](images/train_miniob_2024_submit_code.png) + +### 5. 查看提测结果 +提交代码后,训练营平台会运行自动化测试用例,并反馈用户提交代码的运行结果和问题(如果等待时间过长(miniob 相关课题等待超过 1 小时,oceanbase 相关题目超过 6 小时)可联系相关工作人员)。 + +![](images/train_contact_us.png) + +可以点击 「查看提测记录」按钮查看提测结果。对于失败的测试用例,请点击下图中的按钮,即可查看失败的详细原因。 + +![](images/train_miniob_result.png) + +如果在使用训练营平台的过程中有问题可以在 https://ask.oceanbase.com/ 提问,问题分类请选择 "训练营"。 + + + + + + + + diff --git a/docs/docs/dev-env/images/train_contact_us.png b/docs/docs/dev-env/images/train_contact_us.png new file mode 100644 index 000000000..98611553a Binary files /dev/null and b/docs/docs/dev-env/images/train_contact_us.png differ diff --git a/docs/docs/dev-env/images/train_mainpage.png b/docs/docs/dev-env/images/train_mainpage.png new file mode 100644 index 000000000..86834454b Binary files /dev/null and b/docs/docs/dev-env/images/train_mainpage.png differ diff --git a/docs/docs/dev-env/images/train_miniob_2024.png b/docs/docs/dev-env/images/train_miniob_2024.png new file mode 100644 index 000000000..6cc11790b Binary files /dev/null and b/docs/docs/dev-env/images/train_miniob_2024.png differ diff --git a/docs/docs/dev-env/images/train_miniob_2024_question.png b/docs/docs/dev-env/images/train_miniob_2024_question.png new file mode 100644 index 000000000..ad3d145c6 Binary files /dev/null and b/docs/docs/dev-env/images/train_miniob_2024_question.png differ diff --git a/docs/docs/dev-env/images/train_miniob_2024_question_detail1.png b/docs/docs/dev-env/images/train_miniob_2024_question_detail1.png new file mode 100644 index 000000000..a98233919 Binary files /dev/null and b/docs/docs/dev-env/images/train_miniob_2024_question_detail1.png differ diff --git a/docs/docs/dev-env/images/train_miniob_2024_question_detail2.png b/docs/docs/dev-env/images/train_miniob_2024_question_detail2.png new file mode 100644 index 000000000..2a6534b34 Binary files /dev/null and b/docs/docs/dev-env/images/train_miniob_2024_question_detail2.png differ diff --git a/docs/docs/dev-env/images/train_miniob_2024_submit_code.png b/docs/docs/dev-env/images/train_miniob_2024_submit_code.png new file mode 100644 index 000000000..447704688 Binary files /dev/null and b/docs/docs/dev-env/images/train_miniob_2024_submit_code.png differ diff --git a/docs/docs/dev-env/images/train_miniob_result.png b/docs/docs/dev-env/images/train_miniob_result.png new file mode 100644 index 000000000..dc594dd1c Binary files /dev/null and b/docs/docs/dev-env/images/train_miniob_result.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_clangd.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_clangd.png new file mode 100644 index 000000000..62400aaab Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_clangd.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab1.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab1.png new file mode 100644 index 000000000..e62a1f8fa Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab1.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab2.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab2.png new file mode 100644 index 000000000..76357ec21 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab2.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab3.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab3.png new file mode 100644 index 000000000..47b0c0cc2 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab3.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab4.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab4.png new file mode 100644 index 000000000..03f2704e2 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab4.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_config_docker.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_config_docker.png new file mode 100644 index 000000000..baf3c7a48 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_config_docker.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_cppdbg.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_cppdbg.png new file mode 100644 index 000000000..fa0ba604a Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_cppdbg.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_open_miniob2.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_open_miniob2.png new file mode 100644 index 000000000..904bc7d06 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_open_miniob2.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_open_miniob_as_workspace.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_open_miniob_as_workspace.png new file mode 100644 index 000000000..147ffd951 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_open_miniob_as_workspace.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_run_docker.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_run_docker.png new file mode 100644 index 000000000..01763960b Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_run_docker.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-config-clangd.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-config-clangd.png new file mode 100644 index 000000000..640e0dc34 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-config-clangd.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-debug.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-debug.png new file mode 100644 index 000000000..e47630ed7 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-debug.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-install-clangd.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-install-clangd.png new file mode 100644 index 000000000..e381b275f Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-install-clangd.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-install-cppdbg.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-install-cppdbg.png new file mode 100644 index 000000000..eaeb0cbd3 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-install-cppdbg.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-launch-config.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-launch-config.png new file mode 100644 index 000000000..e0fde6a3a Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-launch-config.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_check_status_ssh.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_check_status_ssh.png new file mode 100644 index 000000000..168faec13 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_check_status_ssh.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_download_ubuntu.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_download_ubuntu.png new file mode 100644 index 000000000..ec54265b5 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_download_ubuntu.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_sshd.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_sshd.png new file mode 100644 index 000000000..0d70077ab Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_sshd.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu1.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu1.png new file mode 100644 index 000000000..d5e61673c Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu1.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu2.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu2.png new file mode 100644 index 000000000..e4d91961c Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu2.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu3.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu3.png new file mode 100644 index 000000000..6dee02c82 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu3.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu4.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu4.png new file mode 100644 index 000000000..8bf732ae7 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu4.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu5.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu5.png new file mode 100644 index 000000000..6d235e81e Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu5.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu6.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu6.png new file mode 100644 index 000000000..c41fb584e Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu6.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu7.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu7.png new file mode 100644 index 000000000..cc65a9212 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu7.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu8.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu8.png new file mode 100644 index 000000000..5af470184 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu8.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu9.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu9.png new file mode 100644 index 000000000..181c91799 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu9.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu_net.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu_net.png new file mode 100644 index 000000000..5c777b80b Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu_net.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_ssh_connection.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_ssh_connection.png new file mode 100644 index 000000000..2ea5d5ad5 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_ssh_connection.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_install_ssh.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_install_ssh.png new file mode 100644 index 000000000..878ae7499 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_install_ssh.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_file.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_file.png new file mode 100644 index 000000000..0ec18bab5 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_file.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote.png new file mode 100644 index 000000000..1dcc53275 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote2.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote2.png new file mode 100644 index 000000000..ecb9c4272 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote2.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_workspcae.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_workspcae.png new file mode 100644 index 000000000..1746af6b4 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_workspcae.png differ diff --git a/docs/docs/dev-env/vscode_dev_with_local_virtual_env.md b/docs/docs/dev-env/vscode_dev_with_local_virtual_env.md new file mode 100644 index 000000000..fbba2b1da --- /dev/null +++ b/docs/docs/dev-env/vscode_dev_with_local_virtual_env.md @@ -0,0 +1,128 @@ +--- +title: 虚拟机+`vscode remote`开发 +--- +# 虚拟机+`vscode remote`开发 +作者:徐平 数据科学与工程学院 华东师范大学 + +#### 1. 安装Ubuntu +Ubuntu下载地址:[下载](https://cn.ubuntu.com/download/desktop) + +点击下载 + +![](images/vscode_dev_with_local_virtual_env_setup_download_ubuntu.png) + +选择典型的类型配置,点击下一步 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu1.png) + +找到刚刚从网站下载的iso文件,点击下一步 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu2.png) + +设置名称和密码 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu4.png) + +设置虚拟机名称和虚拟机数据存放位置 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu5.png) + +设置磁盘大小,推荐40~80G,点击下一步 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu6.png) + +点击完成即可 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu7.png) + +虚拟机开机之后,不断点击`Next`即可, 注意这里选择`Install Ubuntu`,后续操作也是不断点击`Next`。 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu8.png) + +输入账号名和密码 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu9.png) +后续就点击`Next`,最后安装`Ubuntu`,等待安装`Ubuntu`完毕,安装完毕之后点击`Restart Now`即可。 + +#### 2. 配置环境 +登录,进入终端,输入以下命令: +1. 安装网络工具 +``` +sudo apt update && sudo apt -y upgrade +sudo apt install -y net-tools openssh-server +``` +2. 输入命令`ssh-keygen -t rsa`,然后一路回车,生成密钥。 +![](images/vscode_dev_with_local_virtual_env_setup_init_sshd.png) +3. 然后检查`ssh`服务器的状态,输入命令:`sudo systemctl status ssh`或`sudo systemctl status sshd` +![](images/vscode_dev_with_local_virtual_env_setup_check_status_ssh.png) +**注意这里可能出现的错误**: +* 上图中绿色的`active`状态是红色的,表示`sshd`没有启动,使用命令`sudo systemctl restart ssh`或者`sudo systemctl restart sshd`。 +* `systemctl`找不到`sshd`/`ssh`服务,这里可以尝试输入下面两个命令:`ssh-keygen -A`和`/etc/init.d/ssh start`,然后再去查看服务器状态。 + + +4. 安装完毕之后,输入`ifconfig`查看虚拟机`ip` +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu_net.png) +5. 然后就可以在本地终端使用`ssh`命令连接虚拟机服务器。 + `ssh <用户名>@<上图操作中的ip地址>` +![](images/vscode_dev_with_local_virtual_env_setup_ssh_connection.png) +6. 安装`vscode`:[vscode下载地址](https://code.visualstudio.com/download) +7. 安装`ssh remote`插件 +![](images/vscode_dev_with_local_virtual_env_setup_vscode_install_ssh.png) +8. 配置插件,添加刚刚的虚拟机 +![](images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote.png) +9. 输入连接虚拟机的命令,如下图示例 +![](images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote2.png) +10. 打开一个新的远程文件夹: + ![](images/vscode_dev_with_local_virtual_env_setup_vscode_new_file.png) +11. 选择一个文件夹作为开发文件夹,这里我选择`/home/pingxu/Public/` +![](images/vscode_dev_with_local_virtual_env_setup_vscode_workspcae.png) +进入新的文件夹之后,输入完密码,会问是否信任当前目录什么的,选择`yes`就行了,自此,现在虚拟机安装完毕,工作目录是`/home/pingxu/Public/`。 +#### 3. 安装必要软件 +`vscode`中`crtl`+`~`打开终端,直接把下面命令拷贝过去 +``` +sudo apt-get update && sudo apt-get install -y locales apt-utils && rm -rf /var/lib/apt/lists/* \ + && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 +sudo apt-get update \ + && sudo apt-get install -y build-essential gdb cmake git wget flex texinfo libreadline-dev diffutils bison \ + && sudo apt-get install -y clang-format vim +sudo apt-get -y install clangd lldb +``` +#### 4. 安装miniob +``` +# 从github克隆项目会遇到网络问题,配置网络代理命令 +git config --global http.proxy http://<代理ip>:<代理端口> +git config --global https.proxy https://<代理ip>:<代理端口> +``` +在`Public`目录下: +``` +git clone https://github.com/oceanbase/miniob +cd miniob +THIRD_PARTY_INSTALL_PREFIX=/usr/local bash build.sh init +``` +完毕之后,我们用`vscode`打开`miniob`,作为新的工作目录。 + +![](images/vscode_dev_with_local_virtual_env_open_miniob_as_workspace.png) + +![](images/vscode_dev_with_local_virtual_env_open_miniob2.png) + +#### 5. vscode插件配置 +1. 首先安装插件`clangd`和`C/C++ Debug`。 +安装`clangd`, + +![](images/vscode_dev_with_local_virtual_env_setup-install-clangd.png) + +同样的方式安装`C/C++ Debug`。 + +![](images/vscode_dev_with_local_virtual_env_setup-install-cppdbg.png) + +1. 修改好代码之后,`Ctrl+Shift+B`构建项目,构建完毕后有一个`build_debug`的文件夹,存放编译后的可执行文件。 +2. 使用`clangd`作为语言服务器, 构建完毕后,将`build_debug`中的`compile_commands.json`文件复制到`miniob`目录中,随便打开一个cpp文件,就可以看到`clangd`开始工作。 + +![](images/vscode_dev_with_local_virtual_env_setup-config-clangd.png) + +#### 6. debug简单教程 + 用`F5`进行调试,关于如何`vscode`如何调试,可以参考相关的资料:[cpp-debug](https://code.visualstudio.com/docs/cpp/cpp-debug)。修改`launch.json`文件中`program`和`args`来调试不同的可执行文件。 + +![](images/vscode_dev_with_local_virtual_env_setup-launch-config.png) +![](images/vscode_dev_with_local_virtual_env_setup-debug.png) + diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 69984b6cd..4c1835b4f 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -14,6 +14,7 @@ nav: - 运行: how_to_run.md - 开发环境: - dev-env/introduction.md + - dev-env/vscode_dev_with_local_virtual_env.md - dev-env/dev_by_online.md - dev-env/how-to-dev-using-docker.md - dev-env/how_to_dev_in_docker_container_by_vscode.md @@ -21,6 +22,7 @@ nav: - dev-env/how_to_dev_miniob_by_docker_on_windows.md - dev-env/how_to_dev_miniob_by_vscode.md - dev-env/miniob-how-to-debug.md + - dev-env/how_to_submit_for_testing.md - 设计文档: - design/miniob-architecture.md - design/miniob-buffer-pool.md @@ -36,6 +38,7 @@ nav: - design/miniob-mysql-protocol.md - design/miniob-pax-storage.md - design/miniob-aggregation-and-group-by.md + - design/miniob-lsm-tree.md - Doxy 代码文档: design/doxy/html/index.html - OceanBase 数据库大赛: - game/introduction.md @@ -51,6 +54,7 @@ nav: - db_course_lab/overview.md - db_course_lab/cloudlab_setup.md - db_course_lab/lab0.md + - db_course_lab/lab1.md - 数据库实现简明教程: - lectures/copyright.md - lectures/lecture-1.md diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 995add319..da0613da6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,7 @@ ADD_SUBDIRECTORY(common) ADD_SUBDIRECTORY(observer) ADD_SUBDIRECTORY(obclient) +ADD_SUBDIRECTORY(oblsm) if (WITH_MEMTRACER) ADD_SUBDIRECTORY(memtracer) @@ -9,4 +10,3 @@ endif() if (WITH_CPPLINGS) ADD_SUBDIRECTORY(cpplings) endif() -ADD_SUBDIRECTORY(oblsm) \ No newline at end of file diff --git a/src/common/lang/bitset.h b/src/common/lang/bitset.h new file mode 100644 index 000000000..3b1a37a90 --- /dev/null +++ b/src/common/lang/bitset.h @@ -0,0 +1,15 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include + +using std::bitset; \ No newline at end of file diff --git a/src/common/lang/condition_variable.h b/src/common/lang/condition_variable.h new file mode 100644 index 000000000..683ccab37 --- /dev/null +++ b/src/common/lang/condition_variable.h @@ -0,0 +1,15 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include + +using std::condition_variable; diff --git a/src/common/lang/memory.h b/src/common/lang/memory.h index 6fd00fc4a..77ac0c551 100644 --- a/src/common/lang/memory.h +++ b/src/common/lang/memory.h @@ -12,5 +12,8 @@ See the Mulan PSL v2 for more details. */ #include +using std::enable_shared_from_this; +using std::make_shared; using std::make_unique; +using std::shared_ptr; using std::unique_ptr; \ No newline at end of file diff --git a/src/common/lang/stack.h b/src/common/lang/stack.h new file mode 100644 index 000000000..a1f5db36a --- /dev/null +++ b/src/common/lang/stack.h @@ -0,0 +1,15 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include + +using std::stack; \ No newline at end of file diff --git a/src/common/lang/unordered_set.h b/src/common/lang/unordered_set.h index dac6d7146..974d8901e 100644 --- a/src/common/lang/unordered_set.h +++ b/src/common/lang/unordered_set.h @@ -12,4 +12,15 @@ See the Mulan PSL v2 for more details. */ #include -using std::unordered_set; \ No newline at end of file +using std::unordered_set; + +template +static bool is_subset(const unordered_set &super_set, const unordered_set &child_set) +{ + for (const auto &element : child_set) { + if (super_set.find(element) == super_set.end()) { + return false; + } + } + return true; +} \ No newline at end of file diff --git a/src/cpplings/cas.cpp b/src/cpplings/cas.cpp index 8f3c04960..92ac07589 100644 --- a/src/cpplings/cas.cpp +++ b/src/cpplings/cas.cpp @@ -33,7 +33,6 @@ void append_node(int val) { Node *old_head = list_head; Node *new_node = new Node{val, old_head}; - // TODO: 使用 compare_exchange_strong 来使这段代码线程安全。 list_head = new_node; } diff --git a/src/oblsm/CMakeLists.txt b/src/oblsm/CMakeLists.txt index 8ffa9f9ab..235417de4 100644 --- a/src/oblsm/CMakeLists.txt +++ b/src/oblsm/CMakeLists.txt @@ -1,4 +1,6 @@ file(GLOB_RECURSE OBLSM_SOURCES "*.cpp") +list(FILTER OBLSM_SOURCES EXCLUDE REGEX "client/.*") +list(FILTER OBLSM_SOURCES EXCLUDE REGEX "benchmarks/.*") SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) add_library(oblsm STATIC ${OBLSM_SOURCES}) @@ -7,3 +9,9 @@ message("OBLSM_SOURCES: ${OBLSM_SOURCES}") FIND_PACKAGE(jsoncpp CONFIG REQUIRED) TARGET_LINK_LIBRARIES(oblsm common pthread JsonCpp::JsonCpp) + +ADD_EXECUTABLE(oblsm_cli client/ob_lsm_client.cpp) +TARGET_LINK_LIBRARIES(oblsm_cli oblsm) + +ADD_EXECUTABLE(oblsm_bench benchmarks/ob_lsm_bench.cpp) +TARGET_LINK_LIBRARIES(oblsm_bench oblsm) diff --git a/src/oblsm/benchmarks/ob_lsm_bench.cpp b/src/oblsm/benchmarks/ob_lsm_bench.cpp new file mode 100644 index 000000000..6824dde98 --- /dev/null +++ b/src/oblsm/benchmarks/ob_lsm_bench.cpp @@ -0,0 +1,2 @@ +// TODO: add oblsm bench tool, reference leveldb db_bench +int main() { return 0; } \ No newline at end of file diff --git a/src/oblsm/client/ob_lsm_client.cpp b/src/oblsm/client/ob_lsm_client.cpp new file mode 100644 index 000000000..523709acc --- /dev/null +++ b/src/oblsm/client/ob_lsm_client.cpp @@ -0,0 +1,35 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include +#include "common/lang/string.h" + +const char *startup_tips = R"( +Welcome to the OceanBase database implementation course. + +Copyright (c) 2021 OceanBase and/or its affiliates. + +Learn more about OceanBase at https://github.com/oceanbase/oceanbase +Learn more about MiniOB at https://github.com/oceanbase/miniob + +)"; + +int main(int argc, char *argv[]) +{ + printf("%s\n", startup_tips); + // TODO: a simple cli for oblsm, reference src/obclient/client.cpp + // usage example: + // put key1 value1 + // get key1 + // scan key1 key9 + printf("oblsm client is working in progress.\n"); + + return 0; +} diff --git a/src/oblsm/compaction/ob_compaction.cpp b/src/oblsm/compaction/ob_compaction.cpp new file mode 100644 index 000000000..e1dd39253 --- /dev/null +++ b/src/oblsm/compaction/ob_compaction.cpp @@ -0,0 +1,13 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/compaction/ob_compaction.h" + +namespace oceanbase {} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/compaction/ob_compaction.h b/src/oblsm/compaction/ob_compaction.h new file mode 100644 index 000000000..efa43fe56 --- /dev/null +++ b/src/oblsm/compaction/ob_compaction.h @@ -0,0 +1,81 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "oblsm/table/ob_sstable.h" +#include "oblsm/include/ob_lsm_options.h" + +namespace oceanbase { + +class ObCompactionPicker; + +/** + * @class ObCompaction + * @brief Represents a compaction task in the LSM-Tree. + * + * This class encapsulates the metadata and operations for a single compaction task, + * including input SSTables and the target level of the compaction. + */ +class ObCompaction +{ +public: + /** + * @brief Grants access to private members of this class to compaction picker classes. + * @see ObCompactionPicker, TiredCompactionPicker, LeveledCompactionPicker + */ + friend class ObCompactionPicker; + friend class TiredCompactionPicker; + friend class LeveledCompactionPicker; + + /** + * @brief Constructs a compaction task targeting a specific level. + * @param level The current level of the SSTables involved in the compaction. + */ + explicit ObCompaction(int level) : level_(level) {} + + ~ObCompaction() = default; + + /** + * @brief Gets the target level for this compaction. + * @return The integer value representing the level. + */ + int level() const { return level_; } + + /** + * @brief Retrieves an SSTable from the input SSTable list. + * @param which Index indicating which level's inputs to access (0 for `level_`, 1 for `level_ + 1`). + * @param i Index of the SSTable within the specified level's inputs. + * @return A shared pointer to the specified SSTable. + */ + shared_ptr input(int which, int i) const { return inputs_[which][i]; } + + /** + * @brief Computes the total number of input SSTables for the compaction task. + * @return The total number of input SSTables across both levels. + */ + int size() const { return inputs_[0].size() + inputs_[1].size(); } + + /** + * @brief Retrieves the vector of SSTables from the specified input level. + */ + const vector> &inputs(int which) const { return inputs_[which]; } + +private: + /// Each compaction reads inputs from "level_" and "level_+1" + std::vector> inputs_[2]; + + /** + * @brief The current level of SSTables involved in the compaction. + */ + int level_; +}; + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/compaction/ob_compaction_picker.cpp b/src/oblsm/compaction/ob_compaction_picker.cpp new file mode 100644 index 000000000..0602f10be --- /dev/null +++ b/src/oblsm/compaction/ob_compaction_picker.cpp @@ -0,0 +1,44 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/compaction/ob_compaction_picker.h" +#include "common/log/log.h" + +namespace oceanbase { + +// TODO: put it in options +unique_ptr TiredCompactionPicker::pick(SSTablesPtr sstables) +{ + if (sstables->size() < options_->default_run_num) { + return nullptr; + } + unique_ptr compaction(new ObCompaction(0)); + // TODO(opt): a tricky compaction picker, just pick all sstables if enough sstables. + for (size_t i = 0; i < sstables->size(); ++i) { + size_t tire_i_size = (*sstables)[i].size(); + for (size_t j = 0; j < tire_i_size; ++j) { + compaction->inputs_[0].emplace_back((*sstables)[i][j]); + } + } + // TODO: LOG_DEBUG for debug + return compaction; +} + +ObCompactionPicker *ObCompactionPicker::create(CompactionType type, ObLsmOptions *options) +{ + + switch (type) { + case CompactionType::TIRED: return new TiredCompactionPicker(options); + default: return nullptr; + } + return nullptr; +} + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/compaction/ob_compaction_picker.h b/src/oblsm/compaction/ob_compaction_picker.h new file mode 100644 index 000000000..4b844de95 --- /dev/null +++ b/src/oblsm/compaction/ob_compaction_picker.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "oblsm/table/ob_sstable.h" +#include "common/sys/rc.h" +#include "oblsm/compaction/ob_compaction.h" +#include "oblsm/util/ob_comparator.h" + +namespace oceanbase { + +/** + * @class ObCompactionPicker + * @brief Abstract base class for compaction picker strategies in an LSM-Tree. + * + * This class defines the interface for picking a compaction task from a set of SSTables. + * Derived classes will implement specific compaction strategies (e.g., tiered or leveled compaction). + */ +class ObCompactionPicker +{ +public: + /** + * @param options Pointer to the LSM-Tree options configuration. + */ + ObCompactionPicker(ObLsmOptions *options) : options_(options) {} + + virtual ~ObCompactionPicker() = default; + + /** + * @brief Pure virtual method to pick a compaction task. + * @param sstables A pointer to the SSTables available for compaction. + * @return A unique pointer to the selected compaction task. + */ + virtual unique_ptr pick(SSTablesPtr sstables) = 0; + + /** + * @brief Static factory method to create a specific compaction picker. + * @param type The type of compaction strategy (e.g., tiered, leveled). + * @param options Pointer to the LSM-Tree options configuration. + * @return A pointer to the created ObCompactionPicker instance. + */ + static ObCompactionPicker *create(CompactionType type, ObLsmOptions *options); + +protected: + ObLsmOptions *options_; ///< Pointer to the LSM-Tree options configuration. +}; + +/** + * @class TiredCompactionPicker + * @brief A class implementing the tiered compaction strategy. + * + */ +class TiredCompactionPicker : public ObCompactionPicker +{ +public: + /** + * @param options Pointer to the LSM-Tree options configuration. + */ + TiredCompactionPicker(ObLsmOptions *options) : ObCompactionPicker(options) {} + + ~TiredCompactionPicker() = default; + + /** + * @brief Implementation of the pick method for tiered compaction. + */ + unique_ptr pick(SSTablesPtr sstables) override; + +private: +}; + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/include/ob_lsm.h b/src/oblsm/include/ob_lsm.h new file mode 100644 index 000000000..175986173 --- /dev/null +++ b/src/oblsm/include/ob_lsm.h @@ -0,0 +1,99 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/string.h" +#include "common/lang/string_view.h" +#include "common/sys/rc.h" +#include "oblsm/include/ob_lsm_options.h" +#include "oblsm/include/ob_lsm_iterator.h" + +namespace oceanbase { + +/** + * @brief ObLsm is a key-value storage engine for educational purpose. + * ObLsm learned a lot about design from leveldb and streamlined it. + * TODO: add more comments about ObLsm. + */ +class ObLsm +{ +public: + /** + * @brief Opens an LSM-Tree database at the specified path. + * + * This is a static method that initializes an LSM-Tree database instance. + * It allocates memory for the database and returns a pointer to it through the + * `dbptr` parameter. The caller is responsible for freeing the memory allocated + * for the database by deleting the returned pointer when it is no longer needed. + * + * @param options A reference to the LSM-Tree options configuration. + * @param path A string specifying the path to the database. + * @param dbptr A double pointer to store the allocated database instance. + * @return An RC value indicating success or failure of the operation. + * @note The caller must delete the returned database pointer (`*dbptr`) when done. + */ + static RC open(const ObLsmOptions &options, const string &path, ObLsm **dbptr); + + ObLsm() = default; + + ObLsm(const ObLsm &) = delete; + + ObLsm &operator=(const ObLsm &) = delete; + + virtual ~ObLsm() = default; + + /** + * @brief Inserts or updates a key-value entry in the LSM-Tree. + * + * This method adds a new entry + * + * @param key The key to insert or update. + * @param value The value associated with the key. + * @return An RC value indicating success or failure of the operation. + */ + virtual RC put(const string_view &key, const string_view &value) = 0; + + /** + * @brief Retrieves the value associated with a specified key. + * + * This method looks up the value corresponding to the given key in the LSM-Tree. + * If the key exists, the value is stored in the output parameter `*value`. + * + * @param key The key to look up. + * @param value Pointer to a string where the retrieved value will be stored. + * @return An RC value indicating success or failure of the operation. + */ + virtual RC get(const string_view &key, string *value) = 0; + + /** + * @brief Creates a new iterator for traversing the LSM-Tree database. + * + * This method returns a heap-allocated iterator over the contents of the + * database. The iterator is initially invalid, and the caller must use one + * of the `seek`/`seek_to_first`/`seek_to_last` methods on the iterator + * before accessing any elements. + * + * @param options Read options to configure the behavior of the iterator. + * @return A pointer to the newly created iterator. + * @note The caller is responsible for deleting the iterator when it is no longer needed. + */ + virtual ObLsmIterator *new_iterator(ObLsmReadOptions options) = 0; + + /** + * @brief Dumps all SSTables for debugging purposes. + * + * This method outputs the structure and contents of all SSTables in the + * LSM-Tree for debugging or inspection purposes. + */ + virtual void dump_sstables() = 0; +}; + +} // namespace oceanbase diff --git a/src/oblsm/include/ob_lsm_iterator.h b/src/oblsm/include/ob_lsm_iterator.h new file mode 100644 index 000000000..90cb04181 --- /dev/null +++ b/src/oblsm/include/ob_lsm_iterator.h @@ -0,0 +1,103 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// An iterator yields a sequence of key/value pairs from a source. +// The following class defines the interface. Multiple implementations +// are provided by this library. In particular, iterators are provided +// to access the contents of a Table or a DB. +// +// Multiple threads can invoke const methods on an ObLsmIterator without +// external synchronization, but if any of the threads may call a +// non-const method, all threads accessing the same ObLsmIterator must use +// external synchronization. + +#pragma once + +#include "common/lang/string_view.h" +#include "common/sys/rc.h" + +namespace oceanbase { + +/** + * @class ObLsmIterator + * @brief Abstract class for iterating over key-value pairs in an LSM-Tree. + * + * This class provides an interface for iterators used to traverse key-value entries + * stored in an LSM-Tree. Derived classes must implement this interface to handle + * specific storage structures, such as SSTables or MemTables. + */ +class ObLsmIterator +{ +public: + ObLsmIterator(){}; + + ObLsmIterator(const ObLsmIterator &) = delete; + + ObLsmIterator &operator=(const ObLsmIterator &) = delete; + + virtual ~ObLsmIterator(){}; + + /** + * @brief Checks if the iterator is currently positioned at a valid key-value pair. + * + * @return `true` if the iterator is valid, `false` otherwise. + */ + virtual bool valid() const = 0; + + /** + * @brief Moves the iterator to the next key-value pair in the source. + */ + virtual void next() = 0; + + /** + * @brief Returns the key of the current entry the iterator is positioned at. + * + * This method retrieves the key corresponding to the key-value pair at the + * current position of the iterator. + * + * @return A `string_view` containing the key of the current entry. + */ + virtual string_view key() const = 0; + + /** + * @brief Returns the value of the current entry the iterator is positioned at. + * + * This method retrieves the value corresponding to the key-value pair at the + * current position of the iterator. + * + * @return A `string_view` containing the value of the current entry. + */ + virtual string_view value() const = 0; + + /** + * @brief Positions the iterator at the first entry with a key greater than or equal to the specified key. + * + * @param k The key to search for. + */ + virtual void seek(const string_view &k) = 0; + + /** + * @brief Positions the iterator at the first key-value pair in the source. + * + */ + virtual void seek_to_first() = 0; + + /** + * @brief Positions the iterator at the last key-value pair in the source. + * + */ + virtual void seek_to_last() = 0; +}; + +} // namespace oceanbase diff --git a/src/oblsm/include/ob_lsm_options.h b/src/oblsm/include/ob_lsm_options.h new file mode 100644 index 000000000..7202fde7e --- /dev/null +++ b/src/oblsm/include/ob_lsm_options.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include +#include "oblsm/ob_lsm_define.h" + +namespace oceanbase { + +/** + * @brief Configuration options for the LSM-Tree implementation. + */ +struct ObLsmOptions +{ + ObLsmOptions(){}; + + // TODO: all params are used for test, need to reset to appropriate values. + size_t memtable_size = 8 * 1024; + // sstable size + size_t table_size = 16 * 1024; + + // leveled compaction + size_t default_levels = 7; + size_t default_l1_level_size = 128 * 1024; + size_t default_level_ratio = 10; + size_t default_l0_file_num = 3; + + // tired compaction + size_t default_run_num = 7; + + // default compaction type + CompactionType type = CompactionType::LEVELED; +}; + +// TODO: UNIMPLEMENTED +struct ObLsmReadOptions +{ + ObLsmReadOptions(){}; +}; + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/memtable/ob_memtable.cpp b/src/oblsm/memtable/ob_memtable.cpp new file mode 100644 index 000000000..fbed53039 --- /dev/null +++ b/src/oblsm/memtable/ob_memtable.cpp @@ -0,0 +1,106 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/memtable/ob_memtable.h" +#include "common/lang/string.h" +#include "common/lang/memory.h" +#include "oblsm/util/ob_coding.h" +#include "oblsm/ob_lsm_define.h" + +namespace oceanbase { + +static string_view get_length_prefixed_string(const char *data) +{ + size_t len = get_numeric(data); + const char *p = data + sizeof(size_t); + return string_view(p, len); +} + +void ObMemTable::put(uint64_t seq, const string_view &key, const string_view &value) +{ + // TODO: add lookup_key, internal_key, user_key relationship and format in memtable/sstable/block + // TODO: unify the encode/decode logic in separate file. + // Format of an entry is concatenation of: + // key_size : internal_key.size() + // key bytes : char[internal_key.size()] + // seq : uint64(sequence) + // value_size : value.size() + // value bytes : char[value.size()] + size_t user_key_size = key.size(); + size_t val_size = value.size(); + size_t internal_key_size = user_key_size + SEQ_SIZE; + const size_t encoded_len = sizeof(size_t) + internal_key_size + sizeof(size_t) + val_size; + char * buf = reinterpret_cast(arena_.alloc(encoded_len)); + char * p = buf; + memcpy(p, &internal_key_size, sizeof(size_t)); + p += sizeof(size_t); + memcpy(p, key.data(), user_key_size); + p += user_key_size; + memcpy(p, &seq, sizeof(uint64_t)); + p += sizeof(uint64_t); + memcpy(p, &val_size, sizeof(size_t)); + p += sizeof(size_t); + memcpy(p, value.data(), val_size); + table_.insert(buf); +} + +// TODO: use iterator to simplify the code +RC ObMemTable::get(const string_view &lookup_key, string *value) +{ + RC rc = RC::SUCCESS; + ObSkipList::Iterator iter(&table_); + iter.seek(lookup_key.data()); + if (iter.valid()) { + const char *entry = iter.key(); + char * key_ptr = const_cast(entry); + size_t key_length = get_numeric(key_ptr); + key_ptr += sizeof(size_t); + // TODO: unify comparator and key lookup key in memtable and sstable. + string_view user_key = extract_user_key_from_lookup_key(lookup_key); + if (comparator_.comparator.user_comparator()->compare(string_view(key_ptr, key_length - SEQ_SIZE), user_key) == 0) { + key_ptr += key_length; + size_t val_len = get_numeric(key_ptr); + key_ptr += sizeof(size_t); + string_view val(key_ptr, val_len); + value->assign(val.data(), val.size()); + } else { + return RC::NOTFOUND; + } + } else { + return RC::NOTFOUND; + } + return rc; +} + +int ObMemTable::KeyComparator::operator()(const char *a, const char *b) const +{ + // Internal keys are encoded as length-prefixed strings. + string_view a_v = get_length_prefixed_string(a); + string_view b_v = get_length_prefixed_string(b); + return comparator.compare(a_v, b_v); +} + +ObLsmIterator *ObMemTable::new_iterator() { return new ObMemTableIterator(get_shared_ptr(), &table_); } + +string_view ObMemTableIterator::key() const { return get_length_prefixed_string(iter_.key()); } + +string_view ObMemTableIterator::value() const +{ + string_view key_slice = get_length_prefixed_string(iter_.key()); + return get_length_prefixed_string(key_slice.data() + key_slice.size()); +} + +void ObMemTableIterator::seek(const string_view &k) +{ + tmp_.clear(); + iter_.seek(k.data()); +} + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/memtable/ob_memtable.h b/src/oblsm/memtable/ob_memtable.h new file mode 100644 index 000000000..45216064e --- /dev/null +++ b/src/oblsm/memtable/ob_memtable.h @@ -0,0 +1,177 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/sys/rc.h" +#include "common/lang/string.h" +#include "common/lang/string_view.h" +#include "common/lang/memory.h" +#include "oblsm/memtable/ob_skiplist.h" +#include "oblsm/util/ob_comparator.h" +#include "oblsm/util/ob_arena.h" +#include "oblsm/include/ob_lsm_iterator.h" + +namespace oceanbase { + +/** + * @class ObMemTable + * @brief MemTable implementation for LSM-Tree. + * + * The `ObMemTable` represents an in-memory structure that stores key-value pairs + * before they are flushed to disk as SSTables. It supports key-value insertion, + * querying, and iteration. The implementation currently uses a skip list as + * the underlying data structure. + */ +class ObMemTable : public enable_shared_from_this +{ +public: + ObMemTable() : comparator_(), table_(comparator_){}; + + ~ObMemTable() = default; + + /** + * @brief Retrieves a shared pointer to the current ObMemTable instance. + * + * This method utilizes `std::enable_shared_from_this` to provide a shared pointer + * to the current object. Useful when the current object needs to be shared safely + * among multiple components. + * + * @return A shared pointer to the current `ObMemTable` instance. + */ + shared_ptr get_shared_ptr() { return shared_from_this(); } + + /** + * @brief Inserts a key-value pair into the memtable. + * + * Each entry is versioned using the provided `seq` number. If the same key is + * inserted multiple times, the version with the highest sequence number will + * take precedence when queried. + * + * @param seq A sequence number used for versioning the key-value entry. + * @param key The key to be inserted. + * @param value The value associated with the key. + */ + void put(uint64_t seq, const string_view &key, const string_view &value); + + /** + * @brief Retrieves the value associated with a specific key from the memtable. + * + * Searches for the specified key in the memtable. If the key exists, its + * associated value is stored in the output parameter `*value`. + * + * @param key The key to search for in the memtable. + * @param value A pointer to a string where the retrieved value will be stored. + * @return An RC value indicating the success or failure of the operation. + */ + RC get(const string_view &key, string *value); + + /** + * @brief Estimates the memory usage of the memtable. + * + * Returns the approximate memory usage of the memtable, including the + * skip list and associated memory allocations. + * + * @return The approximate memory usage in bytes. + */ + size_t appro_memory_usage() const { return arena_.memory_usage(); } + + /** + * @brief Creates a new iterator for traversing the contents of the memtable. + * + * This method returns a heap-allocated iterator for iterating over key-value + * pairs stored in the memtable. The caller is responsible for managing the + * lifetime of the returned iterator. + * + * @return A pointer to the newly created `ObLsmIterator` for the memtable. + */ + ObLsmIterator *new_iterator(); + +private: + friend class ObMemTableIterator; + /** + * @brief Compares two keys. + * + * Uses the internal comparator to perform lexicographical comparison between + * two keys. + * + * @param a Pointer to the first key. + * @param b Pointer to the second key. + * @return An integer indicating the result of the comparison: + * - Negative value if `a < b` + * - Zero if `a == b` + * - Positive value if `a > b` + */ + struct KeyComparator + { + const ObInternalKeyComparator comparator; + explicit KeyComparator() {} + int operator()(const char *a, const char *b) const; + }; + + // TODO: currently the memtable use skiplist as the underlying data structure, + // it is possible to use other data structure, for example, hash table. + typedef ObSkipList Table; + + /** + * @brief Comparator used for ordering keys in the memtable. + * + * This member defines the rules for comparing keys in the skip list. + * TODO: support user-defined comparator + */ + KeyComparator comparator_; + + /** + * @brief The underlying data structure used for key-value storage. + * + * Currently implemented as a skip list. Future versions may support + * alternative data structures, such as hash tables. + */ + Table table_; + + /** + * @brief Memory arena used for memory management in the memtable. + * + * Allocates and tracks memory usage for the skip list and other internal + * components of the memtable. + */ + ObArena arena_; +}; + +/** + * @class ObMemTableIterator + * @brief An iterator for traversing the contents of an `ObMemTable`. + */ +class ObMemTableIterator : public ObLsmIterator +{ +public: + explicit ObMemTableIterator(shared_ptr mem, ObMemTable::Table *table) : mem_(mem), iter_(table) {} + + ObMemTableIterator(const ObMemTableIterator &) = delete; + ObMemTableIterator &operator=(const ObMemTableIterator &) = delete; + + ~ObMemTableIterator() override = default; + + void seek(const string_view &k) override; + void seek_to_first() override { iter_.seek_to_first(); } + void seek_to_last() override { iter_.seek_to_last(); } + + bool valid() const override { return iter_.valid(); } + void next() override { iter_.next(); } + string_view key() const override; + string_view value() const override; + +private: + shared_ptr mem_; + ObMemTable::Table::Iterator iter_; + string tmp_; // For seek key +}; + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/memtable/ob_skiplist.h b/src/oblsm/memtable/ob_skiplist.h new file mode 100644 index 000000000..7046c1311 --- /dev/null +++ b/src/oblsm/memtable/ob_skiplist.h @@ -0,0 +1,398 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#pragma once + +// Thread safety +// ------------- +// +// Writes require external synchronization, most likely a mutex. +// Reads require a guarantee that the ObSkipList will not be destroyed +// while the read is in progress. Apart from that, reads progress +// without any internal locking or synchronization. +// +// Invariants: +// +// (1) Allocated nodes are never deleted until the ObSkipList is +// destroyed. This is trivially guaranteed by the code since we +// never delete any skip list nodes. +// +// (2) The contents of a Node except for the next/prev pointers are +// immutable after the Node has been linked into the ObSkipList. +// Only insert() modifies the list, and it is careful to initialize +// a node and use release-stores to publish the nodes in one or +// more lists. +// +// ... prev vs. next pointer ordering ... + +#include "common/math/random_generator.h" +#include "common/lang/atomic.h" +#include "common/lang/vector.h" +#include "common/log/log.h" + +namespace oceanbase { + +template +class ObSkipList +{ +private: + struct Node; + +public: + /** + * @brief Create a new ObSkipList object that will use "cmp" for comparing keys. + */ + explicit ObSkipList(ObComparator cmp); + + ObSkipList(const ObSkipList &) = delete; + ObSkipList &operator=(const ObSkipList &) = delete; + ~ObSkipList(); + + /** + * @brief Insert key into the list. + * REQUIRES: nothing that compares equal to key is currently in the list + */ + void insert(const Key &key); + + void insert_concurrently(const Key &key); + + /** + * @brief Returns true if an entry that compares equal to key is in the list. + * @param [in] key + * @return true if found, false otherwise + */ + bool contains(const Key &key) const; + + /** + * @brief Iteration over the contents of a skip list + */ + class Iterator + { + public: + /** + * @brief Initialize an iterator over the specified list. + * @return The returned iterator is not valid. + */ + explicit Iterator(const ObSkipList *list); + + /** + * @brief Returns true iff the iterator is positioned at a valid node. + */ + bool valid() const; + + /** + * @brief Returns the key at the current position. + * REQUIRES: valid() + */ + const Key &key() const; + + /** + * @brief Advance to the next entry in the list. + * REQUIRES: valid() + */ + void next(); + + /** + * @brief Advances to the previous position. + * REQUIRES: valid() + */ + void prev(); + + /** + * @brief Advance to the first entry with a key >= target + */ + void seek(const Key &target); + + /** + * @brief Position at the first entry in list. + * @note Final state of iterator is valid() iff list is not empty. + */ + void seek_to_first(); + + /** + * @brief Position at the last entry in list. + * @note Final state of iterator is valid() iff list is not empty. + */ + void seek_to_last(); + + private: + const ObSkipList *list_; + Node *node_; + }; + +private: + enum + { + kMaxHeight = 12 + }; + + inline int get_max_height() const { return max_height_.load(std::memory_order_relaxed); } + + Node *new_node(const Key &key, int height); + int random_height(); + bool equal(const Key &a, const Key &b) const { return (compare_(a, b) == 0); } + + // Return the earliest node that comes at or after key. + // Return nullptr if there is no such node. + // + // If prev is non-null, fills prev[level] with pointer to previous + // node at "level" for every level in [0..max_height_-1]. + Node *find_greater_or_equal(const Key &key, Node **prev) const; + + // Return the latest node with a key < key. + // Return head_ if there is no such node. + Node *find_less_than(const Key &key) const; + + // Return the last node in the list. + // Return head_ if list is empty. + Node *find_last() const; + + // Immutable after construction + ObComparator const compare_; + + Node *const head_; + + // Modified only by insert(). Read racily by readers, but stale + // values are ok. + atomic max_height_; // Height of the entire list + + static common::RandomGenerator rnd; +}; + +template +common::RandomGenerator ObSkipList::rnd = common::RandomGenerator(); + +// Implementation details follow +template +struct ObSkipList::Node +{ + explicit Node(const Key &k) : key(k) {} + + Key const key; + + // Accessors/mutators for links. Wrapped in methods so we can + // add the appropriate barriers as necessary. + Node *next(int n) + { + ASSERT(n >= 0, "n >= 0"); + // Use an 'acquire load' so that we observe a fully initialized + // version of the returned Node. + return next_[n].load(std::memory_order_acquire); + } + void set_next(int n, Node *x) + { + ASSERT(n >= 0, "n >= 0"); + // Use a 'release store' so that anybody who reads through this + // pointer observes a fully initialized version of the inserted node. + next_[n].store(x, std::memory_order_release); + } + + // No-barrier variants that can be safely used in a few locations. + Node *nobarrier_next(int n) + { + ASSERT(n >= 0, "n >= 0"); + return next_[n].load(std::memory_order_relaxed); + } + void nobarrier_set_next(int n, Node *x) + { + ASSERT(n >= 0, "n >= 0"); + next_[n].store(x, std::memory_order_relaxed); + } + + bool cas_next(int n, Node *expected, Node *x) + { + ASSERT(n >= 0, "n >= 0"); + return next_[n].compare_exchange_strong(expected, x); + } + +private: + // Array of length equal to the node height. next_[0] is lowest level link. + atomic next_[1]; +}; + +template +typename ObSkipList::Node *ObSkipList::new_node(const Key &key, int height) +{ + char *const node_memory = reinterpret_cast(malloc(sizeof(Node) + sizeof(atomic) * (height - 1))); + return new (node_memory) Node(key); +} + +template +inline ObSkipList::Iterator::Iterator(const ObSkipList *list) +{ + list_ = list; + node_ = nullptr; +} + +template +inline bool ObSkipList::Iterator::valid() const +{ + return node_ != nullptr; +} + +template +inline const Key &ObSkipList::Iterator::key() const +{ + ASSERT(valid(), "valid"); + return node_->key; +} + +template +inline void ObSkipList::Iterator::next() +{ + ASSERT(valid(), "valid"); + node_ = node_->next(0); +} + +template +inline void ObSkipList::Iterator::prev() +{ + // Instead of using explicit "prev" links, we just search for the + // last node that falls before key. + ASSERT(valid(), "valid"); + node_ = list_->find_less_than(node_->key); + if (node_ == list_->head_) { + node_ = nullptr; + } +} + +template +inline void ObSkipList::Iterator::seek(const Key &target) +{ + node_ = list_->find_greater_or_equal(target, nullptr); +} + +template +inline void ObSkipList::Iterator::seek_to_first() +{ + node_ = list_->head_->next(0); +} + +template +inline void ObSkipList::Iterator::seek_to_last() +{ + node_ = list_->find_last(); + if (node_ == list_->head_) { + node_ = nullptr; + } +} + +template +int ObSkipList::random_height() +{ + // Increase height with probability 1 in kBranching + static const unsigned int kBranching = 4; + int height = 1; + while (height < kMaxHeight && rnd.next(kBranching) == 0) { + height++; + } + ASSERT(height > 0, "height > 0"); + ASSERT(height <= kMaxHeight, "height <= kMaxHeight"); + return height; +} + +template +typename ObSkipList::Node *ObSkipList::find_greater_or_equal( + const Key &key, Node **prev) const +{ + // your code here + return nullptr; +} + +template +typename ObSkipList::Node *ObSkipList::find_less_than(const Key &key) const +{ + Node *x = head_; + int level = get_max_height() - 1; + while (true) { + ASSERT(x == head_ || compare_(x->key, key) < 0, "x == head_ || compare_(x->key, key) < 0"); + Node *next = x->next(level); + if (next == nullptr || compare_(next->key, key) >= 0) { + if (level == 0) { + return x; + } else { + // Switch to next list + level--; + } + } else { + x = next; + } + } +} + +template +typename ObSkipList::Node *ObSkipList::find_last() const +{ + Node *x = head_; + int level = get_max_height() - 1; + while (true) { + Node *next = x->next(level); + if (next == nullptr) { + if (level == 0) { + return x; + } else { + // Switch to next list + level--; + } + } else { + x = next; + } + } +} + +template +ObSkipList::ObSkipList(ObComparator cmp) + : compare_(cmp), head_(new_node(0 /* any key will do */, kMaxHeight)), max_height_(1) +{ + for (int i = 0; i < kMaxHeight; i++) { + head_->set_next(i, nullptr); + } +} + +template +ObSkipList::~ObSkipList() +{ + typename std::vector nodes; + nodes.reserve(max_height_.load(std::memory_order_relaxed)); + for (Node *x = head_; x != nullptr; x = x->next(0)) { + nodes.push_back(x); + } + for (auto node : nodes) { + node->~Node(); + free(node); + } +} + +template +void ObSkipList::insert(const Key &key) +{} + +template +void ObSkipList::insert_concurrently(const Key &key) +{ + // your code here +} + +template +bool ObSkipList::contains(const Key &key) const +{ + Node *x = find_greater_or_equal(key, nullptr); + if (x != nullptr && equal(key, x->key)) { + return true; + } else { + return false; + } +} + +} // namespace oceanbase diff --git a/src/oblsm/ob_lsm_define.h b/src/oblsm/ob_lsm_define.h new file mode 100644 index 000000000..9a6cac407 --- /dev/null +++ b/src/oblsm/ob_lsm_define.h @@ -0,0 +1,28 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once +namespace oceanbase { + +static constexpr const char *SSTABLE_SUFFIX = ".sst"; +static constexpr const char *MANIFEST_SUFFIX = ".mf"; + +/** + * @enum CompactionType + * @brief Defines the types of compaction strategies in an LSM-Tree or similar systems. + */ +enum class CompactionType +{ + TIRED = 0, + LEVELED, + UNKNOWN, +}; + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/ob_lsm_impl.cpp b/src/oblsm/ob_lsm_impl.cpp new file mode 100644 index 000000000..15a3c7349 --- /dev/null +++ b/src/oblsm/ob_lsm_impl.cpp @@ -0,0 +1,287 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/ob_lsm_impl.h" + +#include "common/log/log.h" +#include "oblsm/include/ob_lsm.h" +#include "oblsm/ob_lsm_define.h" +#include "oblsm/ob_manifest.h" +#include "oblsm/table/ob_merger.h" +#include "oblsm/table/ob_sstable.h" +#include "oblsm/table/ob_sstable_builder.h" +#include "oblsm/util/ob_coding.h" +#include "oblsm/compaction/ob_compaction_picker.h" +#include "oblsm/ob_user_iterator.h" +#include "oblsm/compaction/ob_compaction.h" +#include "oblsm/ob_lsm_define.h" + +namespace oceanbase { + +ObLsmImpl::ObLsmImpl(const ObLsmOptions &options, const string &path) + : options_(options), path_(path), mu_(), mem_table_(nullptr), imem_tables_() +{ + mem_table_ = make_shared(); + sstables_ = make_shared>>>(); + if (options_.type == CompactionType::LEVELED) { + sstables_->resize(options_.default_levels); + } + + // TODO: Check the cpu consumption at idle + executor_.init("ObLsmBackground", 1, 1, 60 * 1000); + block_cache_ = + std::unique_ptr>>{new ObLRUCache>(1024)}; +} + +RC ObLsm::open(const ObLsmOptions &options, const string &path, ObLsm **dbptr) +{ + RC rc = RC::SUCCESS; + ObLsmImpl *lsm = new ObLsmImpl(options, path); + *dbptr = lsm; + return rc; +} + +RC ObLsmImpl::put(const string_view &key, const string_view &value) +{ + // TODO: if put rate is too high, slow down writes is needed. + // currently, the writes is stopped when the memtable is full. + LOG_TRACE("begin to put key=%s, value=%s", key.data(), value.data()); + RC rc = RC::SUCCESS; + // TODO: currenttly the memtable use skiplist as the underlying data structure, + // and the skiplist concurently write is not thread safe, so we use mutex here, + // if the skiplist support `insert_concurrently()` interface, can we remove the mutex? + unique_lock lock(mu_); + + // TODO: write to WAL + uint64_t seq = seq_.fetch_add(1); + mem_table_->put(seq, key, value); + size_t mem_size = mem_table_->appro_memory_usage(); + if (mem_size > options_.memtable_size) { + // Thinking point: here vector is used to store imems, + // but only one imem is stored at most. Is it possible + // to store more than one imem and what are the implications + // of storing more than one imem. + if (imem_tables_.size() >= 1) { + cv_.wait(lock); + } + // check again after get lock(maybe freeze memtable by another thread) + if (mem_table_->appro_memory_usage() > options_.memtable_size) { + try_freeze_memtable(); + } else { + // if there are multi put threads waiting here, need to notify one thread to + // continue to write to memtable. + cv_.notify_one(); + } + } + return rc; +} + +RC ObLsmImpl::try_freeze_memtable() +{ + RC rc = RC::SUCCESS; + imem_tables_.emplace_back(mem_table_); + mem_table_ = make_shared(); + auto bg_task = [&]() { this->background_compaction(); }; + int ret = executor_.execute(bg_task); + if (ret != 0) { + rc = RC::INTERNAL; + LOG_WARN("fail to execute background compaction task"); + } + return rc; +} + +void ObLsmImpl::background_compaction() +{ + unique_lock lock(mu_); + if (imem_tables_.size() >= 1) { + shared_ptr imem = imem_tables_.back(); + imem_tables_.pop_back(); + lock.unlock(); + cv_.notify_one(); + build_sstable(imem); + // TODO: trig compaction at more scenarios, for example, + // seek compaction in + // leveldb(https://github.com/google/leveldb/blob/578eeb702ec0fbb6b9780f3d4147b1076630d633/db/version_set.cc#L650). + if (!compacting_) { + compacting_.store(true); + try_major_compaction(); + compacting_.store(false); + } + return; + } +} + +void ObLsmImpl::try_major_compaction() +{ + unique_lock lock(mu_); + unique_ptr picker(ObCompactionPicker::create(options_.type, &options_)); + unique_ptr picked = picker->pick(sstables_); + lock.unlock(); + if (picked == nullptr || picked->size() == 0) { + return; + } + vector> results = do_compaction(picked.get()); + + SSTablesPtr new_sstables = make_shared>>>(); + lock.lock(); + size_t levels_size = sstables_->size(); + bool insert_new_sstable = false; + auto find_sstable = [](const vector> &picked, const shared_ptr &sstable) { + for (auto &p : picked) { + if (p->sst_id() == sstable->sst_id()) { + return true; + } + } + return false; + }; + + vector> picked_sstables; + picked_sstables = picked->inputs(0); + const auto &level_i1 = picked->inputs(1); + if (level_i1.size() > 0) { + picked_sstables.insert(picked_sstables.end(), level_i1.begin(), level_i1.end()); + } + // TODO: unify the new sstables logic in all compaction type + if (options_.type == CompactionType::TIRED) { + for (int i = levels_size - 1; i >= 0; --i) { + const vector> &level_i = sstables_->at(i); + for (auto &sstable : level_i) { + if (find_sstable(picked_sstables, sstable)) { + if (!insert_new_sstable) { + new_sstables->insert(new_sstables->begin(), results); + insert_new_sstable = true; + } + } else { + new_sstables->insert(new_sstables->begin(), level_i); + break; + } + } + } + } else if (options_.type == CompactionType::LEVELED) { + // TODO: apply the compaction results to sstable + } + + sstables_ = new_sstables; + lock.unlock(); + + // remove from disk + for (auto &sstable : picked_sstables) { + sstable->remove(); + } + try_major_compaction(); +} + +vector> ObLsmImpl::do_compaction(ObCompaction *picked) { return {}; } + +void ObLsmImpl::build_sstable(shared_ptr imem) +{ + unique_ptr tb = make_unique(&default_comparator_, block_cache_.get()); + + uint64_t sstable_id = sstable_id_.fetch_add(1); + tb->build(imem, get_sstable_path(sstable_id), sstable_id); + unique_lock lock(mu_); + + // TODO: unify the build sstable logic in all compaction type + if (options_.type == CompactionType::TIRED) { + // TODO: record the changes for tired compaction + // here we use `level_i` to store `run_i` + sstables_->insert(sstables_->begin(), {tb->get_built_table()}); + } else if (options_.type == CompactionType::LEVELED) { + sstables_->at(0).emplace_back(tb->get_built_table()); + } +} + +string ObLsmImpl::get_sstable_path(uint64_t sstable_id) +{ + return filesystem::path(path_) / (to_string(sstable_id) + SSTABLE_SUFFIX); +} + +RC ObLsmImpl::get(const string_view &key, string *value) +{ + RC rc = RC::SUCCESS; + unique_lock lock(mu_); + shared_ptr mem = mem_table_; + + shared_ptr imm = nullptr; + if (!imem_tables_.empty()) { + imm = imem_tables_.back(); + } + vector> sstables; + for (auto &level : *sstables_) { + sstables.insert(sstables.end(), level.begin(), level.end()); + } + lock.unlock(); + string lookup_key; + put_numeric(&lookup_key, key.size() + SEQ_SIZE); + lookup_key.append(key.data(), key.size()); + // TODO: currenttly we use only use the latest seq, + // we need to use specific seq if oblsm support transaction + put_numeric(&lookup_key, seq_.load()); + + if (OB_SUCC(mem_table_->get(lookup_key, value))) { + LOG_INFO("get key from memtable"); + } else if (imm != nullptr && OB_SUCC(imm->get(lookup_key, value))) { + LOG_INFO("get key from immemtable"); + } else { + for (auto &sst : sstables) { + // TODO: sort sstables and return newest value + if (OB_SUCC(sst->get(lookup_key, value))) { + break; + } + if (rc != RC::NOT_EXIST) { + LOG_WARN("get key from sstables error: %d", rc); + } + } + } + return rc; +} + +ObLsmIterator *ObLsmImpl::new_iterator(ObLsmReadOptions options) +{ + unique_lock lock(mu_); + shared_ptr mem = mem_table_; + + shared_ptr imm = nullptr; + if (!imem_tables_.empty()) { + imm = imem_tables_.back(); + } + vector> sstables; + for (auto &level : *sstables_) { + sstables.insert(sstables.end(), level.begin(), level.end()); + } + lock.unlock(); + vector> iters; + iters.emplace_back(mem->new_iterator()); + if (imm != nullptr) { + iters.emplace_back(imm->new_iterator()); + } + for (const auto &sst : sstables) { + iters.emplace_back(sst->new_iterator()); + } + + return new_user_iterator(new_merging_iterator(&default_comparator_, std::move(iters)), seq_.load()); +} + +void ObLsmImpl::dump_sstables() +{ + unique_lock lock(mu_); + int level = sstables_->size(); + for (int i = 0; i < level; i++) { + cout << "level " << i << endl; + int level_size = 0; + for (auto &sst : sstables_->at(i)) { + cout << sst->sst_id() << ": " << sst->size() << ";"; + level_size += sst->size(); + } + cout << "level size " << level_size << endl; + } +} + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/ob_lsm_impl.h b/src/oblsm/ob_lsm_impl.h new file mode 100644 index 000000000..a931c1a07 --- /dev/null +++ b/src/oblsm/ob_lsm_impl.h @@ -0,0 +1,142 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "oblsm/include/ob_lsm.h" + +#include "common/lang/mutex.h" +#include "common/lang/atomic.h" +#include "common/lang/memory.h" +#include "common/lang/filesystem.h" +#include "common/lang/condition_variable.h" +#include "common/thread/thread_pool_executor.h" +#include "oblsm/memtable/ob_memtable.h" +#include "oblsm/table/ob_sstable.h" +#include "oblsm/util/ob_lru_cache.h" +#include "oblsm/compaction/ob_compaction.h" +#include "oblsm/ob_manifest.h" +#include +#include + +namespace oceanbase { + +class ObLsmImpl : public ObLsm +{ +public: + ObLsmImpl(const ObLsmOptions &options, const string &path); + ~ObLsmImpl() + { + executor_.shutdown(); + executor_.await_termination(); + } + + RC put(const string_view &key, const string_view &value) override; + + RC get(const string_view &key, string *value) override; + + ObLsmIterator *new_iterator(ObLsmReadOptions options) override; + + SSTablesPtr get_sstables() { return sstables_; } + + // used for debug + void dump_sstables() override; + +private: + /** + * @brief Attempts to freeze the current active MemTable. + * + * This method performs operations to freeze the active MemTable when certain conditions + * are met, such as size thresholds or timing requirements. A frozen MemTable becomes + * immutable and is ready for compaction. + * + * @return RC Status code indicating the success or failure of the freeze operation. + */ + RC try_freeze_memtable(); + + /** + * @brief Performs compaction on the SSTables selected by the compaction strategy. + * + * This function takes a compaction plan (represented by `ObCompaction`) and merges + * the inputs into a new set of SSTables. It creates iterators for the SSTables being + * compacted, merges their data, and writes the merged data into new SSTable files. + * + * @param picked A pointer to the compaction plan that specifies the input SSTables to merge. + * If `picked` is `nullptr`, no compaction is performed and an empty result is returned. + * + * @return vector> A vector of shared pointers to the newly created SSTables + * resulting from the compaction process. + * + * @details + * - The function retrieves the inputs (SSTables) from the `picked` compaction plan. + * - For each SSTable, it creates a new iterator to sequentially scan its data. + * - It merges the iterators using a merging iterator (`ObLsmIterator`). + * - It writes the merged key-value pairs into new SSTable files using `ObSSTableBuilder`. + * - If the size of the new SSTable exceeds a predefined size (`options_.table_size`), + * the builder finalizes the current SSTable and starts a new one. + * + * @warning Ensure that the `picked` object is properly populated with valid inputs. + * + */ + vector> do_compaction(ObCompaction *compaction); + + /** + * @brief Initiates a major compaction process. + * + * Major compaction involves merging all levels of SSTables into a single, consolidated + * SSTable, which reduces storage fragmentation and improves read performance. + * This process typically runs periodically or when triggered by specific conditions. + * + * @note This function should be called with care, as major compaction is a resource-intensive + * operation and may affect system performance during execution. + */ + void try_major_compaction(); + + /** + * @brief Handles background compaction tasks. + */ + void background_compaction(); + + /** + * @brief Builds an SSTable from the given MemTable. + * + * Converts the data in an immutable MemTable (`imem`) into a new SSTable and writes + * it to persistent storage. This step is usually part of the compaction pipeline. + * + * @param imem A shared pointer to the immutable MemTable (`ObMemTable`) to be converted + * into an SSTable. + * @note The caller must ensure that `imem` is immutable and ready for conversion. + */ + void build_sstable(shared_ptr imem); + + /** + * @brief Retrieves the file path for a given SSTable. + * + * @param sstable_id The unique identifier of the SSTable whose path needs to be retrieved. + * @return A string representing the full file path of the SSTable. + */ + string get_sstable_path(uint64_t sstable_id); + + ObLsmOptions options_; + string path_; + mutex mu_; + shared_ptr mem_table_; + vector> imem_tables_; + SSTablesPtr sstables_; + common::ThreadPoolExecutor executor_; + atomic seq_{0}; + atomic sstable_id_{0}; + condition_variable cv_; + const ObDefaultComparator default_comparator_; + atomic compacting_ = false; + std::unique_ptr>> block_cache_; +}; + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/ob_manifest.cpp b/src/oblsm/ob_manifest.cpp new file mode 100644 index 000000000..0d6a8eb5a --- /dev/null +++ b/src/oblsm/ob_manifest.cpp @@ -0,0 +1,15 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/ob_manifest.h" +#include "common/log/log.h" +#include "oblsm/util/ob_file_writer.h" + +namespace oceanbase {} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/ob_manifest.h b/src/oblsm/ob_manifest.h new file mode 100644 index 000000000..f5a20986f --- /dev/null +++ b/src/oblsm/ob_manifest.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +namespace oceanbase { + +class ObManifestRecord +{}; + +class ObManifestSnapshot +{}; +class ObManifest +{ +public: +}; + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/ob_user_iterator.cpp b/src/oblsm/ob_user_iterator.cpp new file mode 100644 index 000000000..3f08aa652 --- /dev/null +++ b/src/oblsm/ob_user_iterator.cpp @@ -0,0 +1,57 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "common/lang/memory.h" +#include "oblsm/ob_user_iterator.h" +#include "oblsm/include/ob_lsm_iterator.h" +#include "oblsm/util/ob_comparator.h" +#include "oblsm/ob_lsm_define.h" +#include "oblsm/util/ob_coding.h" + +namespace oceanbase { + +// a simple warpper for internal iterator +class ObUserIterator : public ObLsmIterator +{ +public: + ObUserIterator(ObLsmIterator *iter, uint64_t seq) : iter_(iter), seq_(seq) {} + + ~ObUserIterator() = default; + + bool valid() const override { return iter_->valid(); } + + void seek_to_first() override { iter_->seek_to_first(); } + + void seek_to_last() override { iter_->seek_to_last(); } + + void seek(const string_view &target) override + { + put_numeric(&lookup_key_, target.size() + SEQ_SIZE); + lookup_key_.append(target.data(), target.size()); + put_numeric(&lookup_key_, seq_); + iter_->seek(string_view(lookup_key_.data(), lookup_key_.size())); + } + + void next() override { iter_->next(); } + + string_view key() const override { return extract_user_key(iter_->key()); } + + string_view value() const override { return iter_->value(); } + +private: + // internal iterator, the key is internal key + unique_ptr iter_; + uint64_t seq_; + string lookup_key_; +}; + +ObLsmIterator *new_user_iterator(ObLsmIterator *iter, uint64_t seq) { return new ObUserIterator(iter, seq); } + +} // namespace oceanbase diff --git a/src/oblsm/ob_user_iterator.h b/src/oblsm/ob_user_iterator.h new file mode 100644 index 000000000..94348ac63 --- /dev/null +++ b/src/oblsm/ob_user_iterator.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +namespace oceanbase { + +class ObComparator; +class ObLsmIterator; + +/** + * @brief Creates a new user iterator wrapping the given LSM iterator. + * + * @detail This function takes an existing LSM iterator (`ObLsmIterator`) and wraps it to create + * a user-level iterator. The new iterator is initialized with a specific sequence number (`seq`), + * which determines the context or version visibility of the iterator. + * + * @param iterator The original `ObLsmIterator` to be wrapped. + * @param seq The sequence number to associate with the new user iterator. + * + * @return A pointer to the newly created `ObLsmIterator` instance that acts as a user iterator. + * + * @note The caller is responsible for managing the memory of the returned iterator and + * ensuring that it is properly deleted after use to prevent memory leaks. + * + * @warning Passing a `nullptr` as the `iterator` parameter will result in undefined behavior. + * Ensure that a valid iterator is provided before calling this function. + */ +ObLsmIterator *new_user_iterator(ObLsmIterator *iterator, uint64_t seq); + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/table/ob_block.cpp b/src/oblsm/table/ob_block.cpp new file mode 100644 index 000000000..6d01d27c4 --- /dev/null +++ b/src/oblsm/table/ob_block.cpp @@ -0,0 +1,105 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/table/ob_block.h" +#include "oblsm/util/ob_coding.h" + +namespace oceanbase { + +RC ObBlock::decode(const string &data) +{ + return RC::UNIMPLEMENTED; +} + +RC ObBlock::get(const string_view &key, string *value) +{ + RC rc = RC::NOT_EXIST; + for (size_t i = 0; i < offsets_.size(); i++) { + uint32_t curr_begin = offsets_[i]; + uint32_t curr_end = i == offsets_.size() - 1 ? data_.size() : offsets_[i + 1]; + string_view curr = string_view(data_.data() + curr_begin, curr_end - curr_begin); + // TODO: parse key and value + // const char * data_ptr = curr.data(); + uint32_t key_size = get_numeric(curr.data()); + string_view curr_key = string_view(curr.data() + sizeof(uint32_t), key_size); + uint32_t value_size = get_numeric(curr.data() + sizeof(uint32_t) + key_size); + string_view val = string_view(curr.data() + 2 * sizeof(uint32_t) + key_size, value_size); + // TODO: here key use lookup key + if (comparator_->compare(extract_user_key(curr_key), extract_user_key_from_lookup_key(key)) == 0) { + comparator_->compare(extract_user_key(curr_key), extract_user_key_from_lookup_key(key)); + *value = val; + rc = RC::SUCCESS; + break; + } + } + return rc; +} + +string_view ObBlock::get_entry(uint32_t offset) const +{ + uint32_t curr_begin = offsets_[offset]; + uint32_t curr_end = offset == offsets_.size() - 1 ? data_.size() : offsets_[offset + 1]; + string_view curr = string_view(data_.data() + curr_begin, curr_end - curr_begin); + return curr; +} + +ObLsmIterator *ObBlock::new_iterator() const { return new BlockIterator(comparator_, this, size()); } + +void BlockIterator::parse_entry() +{ + curr_entry_ = data_->get_entry(index_); + uint32_t key_size = get_numeric(curr_entry_.data()); + key_ = string_view(curr_entry_.data() + sizeof(uint32_t), key_size); + uint32_t value_size = get_numeric(curr_entry_.data() + sizeof(uint32_t) + key_size); + value_ = string_view(curr_entry_.data() + 2 * sizeof(uint32_t) + key_size, value_size); +} + +string BlockMeta::encode() const +{ + string ret; + put_numeric(&ret, first_key_.size()); + ret.append(first_key_); + put_numeric(&ret, last_key_.size()); + ret.append(last_key_); + put_numeric(&ret, offset_); + put_numeric(&ret, size_); + return ret; +} + +RC BlockMeta::decode(const string &data) +{ + RC rc = RC::SUCCESS; + const char *data_ptr = data.c_str(); + uint32_t first_key_size = get_numeric(data_ptr); + data_ptr += sizeof(uint32_t); + first_key_.assign(data_ptr, first_key_size); + data_ptr += first_key_size; + uint32_t last_key_size = get_numeric(data_ptr); + data_ptr += sizeof(uint32_t); + last_key_.assign(data_ptr, last_key_size); + data_ptr += last_key_size; + offset_ = get_numeric(data_ptr); + data_ptr += sizeof(uint32_t); + size_ = get_numeric(data_ptr); + return rc; +} + +void BlockIterator::seek(const string_view &lookup_key) +{ + index_ = 0; + while(valid()) { + parse_entry(); + if (comparator_->compare(extract_user_key(key_), extract_user_key_from_lookup_key(lookup_key)) >= 0) { + break; + } + index_++; + } +} +} // namespace oceanbase diff --git a/src/oblsm/table/ob_block.h b/src/oblsm/table/ob_block.h new file mode 100644 index 000000000..26ef3fb4c --- /dev/null +++ b/src/oblsm/table/ob_block.h @@ -0,0 +1,149 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/string.h" +#include "common/lang/vector.h" +#include "oblsm/include/ob_lsm_iterator.h" +#include "oblsm/util/ob_comparator.h" + +namespace oceanbase { + +// TODO: block align to 4KB +// ┌─────────────────┐ +// │ entry 1 │◄───┐ +// ├─────────────────┤ │ +// │ entry 2 │ │ +// ├─────────────────┤ │ +// │ .. │ │ +// ├─────────────────┤ │ +// │ entry n │◄─┐ │ +// ├─────────────────┤ │ │ +// ┌───►│ offset size(n) │ │ │ +// │ ├─────────────────┤ │ │ +// │ │ offset 1 ├──┼─┘ +// │ ├─────────────────┤ │ +// │ │ .. │ │ +// │ ├─────────────────┤ │ +// │ │ offset n ├──┘ +// │ ├─────────────────┤ +// └────┤ offset start │ +// └─────────────────┘ +/** + * @class ObBlock + * @brief Represents a data block in the LSM-Tree. + * + * The `ObBlock` class manages a block of serialized key-value pairs, along with + * their offsets, for efficient storage and retrieval. It provides methods to decode + * serialized data, access individual entries, and create iterators for traversing + * the block contents. + */ +class ObBlock +{ + +public: + ObBlock(const ObComparator *comparator) : comparator_(comparator) {} + + void add_offset(uint32_t offset) { offsets_.push_back(offset); } + + uint32_t get_offset(int index) const { return offsets_[index]; } + + string_view get_entry(uint32_t offset) const; + + int size() const { return offsets_.size(); } + + /** + * @brief Decodes serialized block data. + * + * This function parses and decodes the serialized string data to reconstruct + * the block's structure, including all key-value offsets and entries. + * The decoded data format can reference ObBlockBuilder. + * @param data The serialized block data as a string. + * @return RC The result code indicating the success or failure of the decode operation. + */ + RC decode(const string &data); + + RC get(const string_view &key, string *value); + + ObLsmIterator *new_iterator() const; + +private: + string data_; + vector offsets_; + // TODO: remove + const ObComparator *comparator_; +}; + +class BlockIterator : public ObLsmIterator +{ +public: + BlockIterator(const ObComparator *comparator, const ObBlock *data, uint32_t count) + : comparator_(comparator), data_(data), count_(count) + {} + BlockIterator(const BlockIterator &) = delete; + BlockIterator &operator=(const BlockIterator &) = delete; + + ~BlockIterator() override = default; + + void seek(const string_view &lookup_key) override; + void seek_to_first() override + { + index_ = 0; + parse_entry(); + } + void seek_to_last() override + { + index_ = count_ - 1; + parse_entry(); + } + + bool valid() const override { return index_ < count_; } + void next() override + { + index_++; + if (valid()) { + parse_entry(); + } + } + string_view key() const override { return key_; }; + string_view value() const override { return value_; } + +private: + void parse_entry(); + +private: + const ObComparator *comparator_; + const ObBlock *const data_; + string_view curr_entry_; + string_view key_; + string_view value_; + uint32_t count_ = 0; + uint32_t index_ = 0; +}; + +class BlockMeta +{ +public: + BlockMeta() {} + BlockMeta(const string &first_key, const string &last_key, uint32_t offset, uint32_t size) + : first_key_(first_key), last_key_(last_key), offset_(offset), size_(size) + {} + string encode() const; + RC decode(const string &data); + + string first_key_; + string last_key_; + + // Offset of ObBlock in SSTable + uint32_t offset_; + uint32_t size_; +}; +} // namespace oceanbase diff --git a/src/oblsm/table/ob_block_builder.cpp b/src/oblsm/table/ob_block_builder.cpp new file mode 100644 index 000000000..72c3712e7 --- /dev/null +++ b/src/oblsm/table/ob_block_builder.cpp @@ -0,0 +1,63 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/table/ob_block_builder.h" +#include "oblsm/util/ob_coding.h" +#include "common/log/log.h" + +namespace oceanbase { + +void ObBlockBuilder::reset() +{ + offsets_.clear(); + data_.clear(); + // first_key_.clear(); +} + +RC ObBlockBuilder::add(const string_view &key, const string_view &value) +{ + RC rc = RC::SUCCESS; + if (appro_size() + key.size() + value.size() + 2 * sizeof(uint32_t) > BLOCK_SIZE) { + // TODO: support large kv pair. + if (offsets_.empty()) { + LOG_ERROR("block is empty, but kv pair is too large, key size: %lu, value size: %lu", key.size(), value.size()); + return RC::UNIMPLEMENTED; + } + LOG_WARN("block is full, can't add more kv pair"); + rc = RC::FULL; + } else { + offsets_.push_back(data_.size()); + put_numeric(&data_, key.size()); + data_.append(key.data(), key.size()); + put_numeric(&data_, value.size()); + data_.append(value.data(), value.size()); + } + return rc; +} + +string ObBlockBuilder::last_key() const +{ + string_view last_kv(data_.data() + offsets_.back(), data_.size() - offsets_.back()); + uint32_t key_length = get_numeric(last_kv.data()); + return string(last_kv.data() + sizeof(uint32_t), key_length); +} + +string_view ObBlockBuilder::finish() +{ + uint32_t data_size = data_.size(); + put_numeric(&data_, offsets_.size()); + for (size_t i = 0; i < offsets_.size(); i++) { + put_numeric(&data_, offsets_[i]); + } + put_numeric(&data_, data_size); + return string_view(data_.data(), data_.size()); +} + +} // namespace oceanbase diff --git a/src/oblsm/table/ob_block_builder.h b/src/oblsm/table/ob_block_builder.h new file mode 100644 index 000000000..9e536bed6 --- /dev/null +++ b/src/oblsm/table/ob_block_builder.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/string.h" +#include "common/lang/string_view.h" +#include "common/lang/vector.h" +#include "common/sys/rc.h" + +namespace oceanbase { + +/** + * @brief Build a ObBlock in SSTable + */ +class ObBlockBuilder +{ + +public: + RC add(const string_view &key, const string_view &value); + + string_view finish(); + + void reset(); + + string last_key() const; + + uint32_t appro_size() { return data_.size() + offsets_.size() * sizeof(uint32_t); } + +private: + static const uint32_t BLOCK_SIZE = 4 * 1024; // 4KB + // Offsets of key-value pairs. + vector offsets_; + // key-value pairs + // TODO: use block as data container + // TODO: add checksum + string data_; + + // string first_key_; +}; + +} // namespace oceanbase diff --git a/src/oblsm/table/ob_merger.cpp b/src/oblsm/table/ob_merger.cpp new file mode 100644 index 000000000..4979cc500 --- /dev/null +++ b/src/oblsm/table/ob_merger.cpp @@ -0,0 +1,120 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/table/ob_merger.h" +#include "oblsm/include/ob_lsm_iterator.h" +#include "oblsm/util/ob_comparator.h" +#include "oblsm/ob_lsm_define.h" +#include "oblsm/util/ob_coding.h" + +namespace oceanbase { + +class ObMergingIterator : public ObLsmIterator +{ +public: + ObMergingIterator(const ObComparator *comparator, vector> &&children) + : comparator_(comparator), children_(std::move(children)), current_(nullptr) + {} + + ~ObMergingIterator() = default; + + bool valid() const override { return current_ != nullptr; } + + void seek_to_first() override + { + for (size_t i = 0; i < children_.size(); i++) { + children_[i]->seek_to_first(); + } + find_smallest(); + } + + void seek_to_last() override + { + for (size_t i = 0; i < children_.size(); i++) { + children_[i]->seek_to_last(); + } + find_largest(); + } + + void seek(const string_view &target) override + { + for (size_t i = 0; i < children_.size(); i++) { + children_[i]->seek(target); + } + find_smallest(); + } + + void next() override + { + current_->next(); + find_smallest(); + } + + string_view key() const override { return current_->key(); } + + string_view value() const override { return current_->value(); } + +private: + void find_smallest(); + void find_largest(); + + // We might want to use a heap in case there are lots of children. + // For now we use a simple array since we expect a very small number + // of children. + const ObComparator * comparator_; + vector> children_; + ObLsmIterator * current_; +}; + +void ObMergingIterator::find_smallest() +{ + ObLsmIterator *smallest = nullptr; + for (size_t i = 0; i < children_.size(); i++) { + ObLsmIterator *child = children_[i].get(); + if (child->valid()) { + if (smallest == nullptr) { + smallest = child; + } else if (comparator_->compare(extract_user_key(child->key()), extract_user_key(smallest->key())) < 0) { + smallest = child; + } + } + } + current_ = smallest; +} + +void ObMergingIterator::find_largest() +{ + ObLsmIterator *largest = nullptr; + for (size_t i = 0; i < children_.size(); i++) { + ObLsmIterator *child = children_[i].get(); + if (child->valid()) { + if (largest == nullptr) { + largest = child; + } else if (comparator_->compare(extract_user_key(child->key()), extract_user_key(largest->key())) > 0) { + largest = child; + } + } + } + current_ = largest; +} + +ObLsmIterator *new_merging_iterator(const ObComparator *comparator, vector> &&children) +{ + if (children.size() == 0) { + return nullptr; + } else if (children.size() == 1) { + return children[0].release(); + } else { + return new ObMergingIterator(comparator, std::move(children)); + } + return nullptr; +} + +} // namespace oceanbase diff --git a/src/oblsm/table/ob_merger.h b/src/oblsm/table/ob_merger.h new file mode 100644 index 000000000..7c577a74d --- /dev/null +++ b/src/oblsm/table/ob_merger.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/vector.h" +#include "common/lang/memory.h" + +namespace oceanbase { + +class ObComparator; +class ObLsmIterator; + +/** + * @brief Return an iterator that provided the union of the data in + * children. For example, an iterator that provided + * the union of memtable and sstable. + * + */ +ObLsmIterator *new_merging_iterator(const ObComparator *comparator, vector> &&children); + +} // namespace oceanbase diff --git a/src/oblsm/table/ob_sstable.cpp b/src/oblsm/table/ob_sstable.cpp new file mode 100644 index 000000000..270365992 --- /dev/null +++ b/src/oblsm/table/ob_sstable.cpp @@ -0,0 +1,110 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/table/ob_sstable.h" +#include "oblsm/util/ob_coding.h" +#include "common/log/log.h" +#include "common/lang/filesystem.h" +namespace oceanbase { + +void ObSSTable::init() +{ + // your code here +} + +RC ObSSTable::get(const string_view &lookup_key, string *value) +{ + RC rc = RC::SUCCESS; + int i = 0; + // TODO: binary search and compare in BlockMeta + for (const auto &meta : block_metas_) { + if (comparator_->compare(extract_user_key(meta.first_key_), extract_user_key_from_lookup_key(lookup_key)) <= 0 && + comparator_->compare(extract_user_key(meta.last_key_), extract_user_key_from_lookup_key(lookup_key)) >= 0) { + auto block = read_block_with_cache(i); + if (block != nullptr) { + rc = block->get(lookup_key, value); + if (rc == RC::SUCCESS) { + return rc; + } + } else { + rc = RC::INTERNAL; + } + } + i++; + } + return RC::NOT_EXIST; +} + +shared_ptr ObSSTable::read_block_with_cache(uint32_t block_idx) const +{ + // your code here + return nullptr; +} + +shared_ptr ObSSTable::read_block(uint32_t block_idx) const +{ + // your code here + return nullptr; +} + +void ObSSTable::remove() { filesystem::remove(file_name_); } + +ObLsmIterator *ObSSTable::new_iterator() { return new TableIterator(get_shared_ptr()); } + +void TableIterator::read_block_with_cache() +{ + block_ = sst_->read_block_with_cache(curr_block_idx_); + block_iterator_.reset(block_->new_iterator()); +} + +void TableIterator::seek_to_first() +{ + curr_block_idx_ = 0; + read_block_with_cache(); + block_iterator_->seek_to_first(); +} + +void TableIterator::seek_to_last() +{ + curr_block_idx_ = block_cnt_ - 1; + read_block_with_cache(); + block_iterator_->seek_to_last(); +} + +void TableIterator::next() +{ + block_iterator_->next(); + if (block_iterator_->valid()) { + } else if (curr_block_idx_ < block_cnt_ - 1) { + curr_block_idx_++; + read_block_with_cache(); + block_iterator_->seek_to_first(); + } +} + +void TableIterator::seek(const string_view &lookup_key) +{ + curr_block_idx_ = 0; + for (; curr_block_idx_ < block_cnt_; curr_block_idx_++) { + const auto block_meta = sst_->block_meta(curr_block_idx_); + if (sst_->comparator()->compare(extract_user_key(block_meta.first_key_), extract_user_key_from_lookup_key(lookup_key)) <= 0 && + sst_->comparator()->compare(extract_user_key(block_meta.last_key_), extract_user_key_from_lookup_key(lookup_key)) >= 0) { + break; + } + } + if (curr_block_idx_ == block_cnt_) { + block_iterator_ = nullptr; + return; + } + read_block_with_cache(); + block_iterator_->seek(lookup_key); +}; + +} // namespace oceanbase diff --git a/src/oblsm/table/ob_sstable.h b/src/oblsm/table/ob_sstable.h new file mode 100644 index 000000000..0bdcc489b --- /dev/null +++ b/src/oblsm/table/ob_sstable.h @@ -0,0 +1,166 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "oblsm/util/ob_file_reader.h" +#include "common/lang/memory.h" +#include "common/sys/rc.h" +#include "oblsm/table/ob_block.h" +#include "oblsm/util/ob_comparator.h" +#include "oblsm/util/ob_lru_cache.h" + +namespace oceanbase { + +// TODO: add a dumptool to dump sst files(example for usage: ./dumptool sst_file) +// ┌─────────────────┐ +// │ block 1 │◄───┐ +// ├─────────────────┤ │ +// │ block 2 │ │ +// ├─────────────────┤ │ +// │ .. │ │ +// ├─────────────────┤ │ +// │ block n │◄─┐ │ +// ├─────────────────┤ │ │ +// ┌───►│ meta size(n) │ │ │ +// │ ├─────────────────┤ │ │ +// │ │ block meta 1 ├──┼─┘ +// │ ├─────────────────┤ │ +// │ │ .. │ │ +// │ ├─────────────────┤ │ +// │ │ block meta n ├──┘ +// │ ├─────────────────┤ +// └────┤ │ +// └─────────────────┘ + +/** + * @class ObSSTable + * @brief Represents an SSTable (Sorted String Table) in the LSM-Tree. + * + * The `ObSSTable` class is responsible for managing on-disk sorted string tables (SSTables). + * It provides methods for initialization, key-value lookups, block reading (with caching support), + * and creating iterators for traversal. Each SSTable is uniquely identified by an `sst_id_` and + * interacts with the LRU cache for efficient block access. + */ +class ObSSTable : public enable_shared_from_this +{ +public: + /** + * @brief Constructor for ObSSTable. + * + * Initializes an SSTable with its unique ID, file name, comparator, and block cache. + * + * @param sst_id A unique identifier for the SSTable. + * @param file_name The name of the file storing the SSTable data. + * @param comparator A pointer to the comparator used for key comparison. + * @param block_cache A pointer to the LRU block cache for caching block-level data. + */ + ObSSTable(uint32_t sst_id, const string &file_name, const ObComparator *comparator, + ObLRUCache> *block_cache) + : sst_id_(sst_id), + file_name_(file_name), + comparator_(comparator), + file_reader_(nullptr), + block_cache_(block_cache) + {} + + ~ObSSTable() = default; + + /** + * @brief Initializes the SSTable instance. + * + * This function is responsible for performing setup tasks required for the SSTable, + * such as preparing file readers or pre-loading block_metas_. + * + * @warning This function must be called before performing any operations on the SSTable. + */ + void init(); + + RC get(const string_view &lookup_key, string *value); + + uint32_t sst_id() const { return sst_id_; } + + shared_ptr get_shared_ptr() { return shared_from_this(); } + + ObLsmIterator *new_iterator(); + + /** + * @brief Reads a block from the SSTable using the block cache. + * + * Attempts to read the specified block using the block cache. If the block is not + * in the cache, it will load the block from the SSTable file and update the cache. + * + * @param block_idx The index of the block to read. + * + * @return shared_ptr A shared pointer to the requested block. + */ + shared_ptr read_block_with_cache(uint32_t block_idx) const; + + /** + * @brief Reads a block directly from the SSTable file. + * + * This function bypasses the block cache and directly reads the requested block + * from the SSTable file. + * + * @param block_idx The index of the block to read. + * + * @return shared_ptr A shared pointer to the requested block. + */ + shared_ptr read_block(uint32_t block_idx) const; + + uint32_t block_count() const { return block_metas_.size(); } + + uint32_t size() const { return file_reader_->file_size(); } + + const BlockMeta block_meta(int i) const { return block_metas_[i]; } + + const ObComparator *comparator() const { return comparator_; } + + void remove(); + string first_key() const { return block_metas_.empty() ? "" : block_metas_[0].first_key_; } + string last_key() const { return block_metas_.empty() ? "" : block_metas_.back().last_key_; } + +private: + uint32_t sst_id_; + string file_name_; + const ObComparator *comparator_ = nullptr; + unique_ptr file_reader_; + vector block_metas_; + + [[maybe_unused]] ObLRUCache> *block_cache_; +}; + +class TableIterator : public ObLsmIterator +{ +public: + TableIterator(const shared_ptr &sst) : sst_(sst), block_cnt_(sst->block_count()) {} + ~TableIterator() = default; + + void seek(const string_view &key) override; + void seek_to_first() override; + void seek_to_last() override; + void next() override; + bool valid() const override { return block_iterator_ != nullptr && block_iterator_->valid(); } + string_view key() const override { return block_iterator_->key(); } + string_view value() const override { return block_iterator_->value(); } + +private: + void read_block_with_cache(); + + const shared_ptr sst_; + uint32_t block_cnt_ = 0; + uint32_t curr_block_idx_ = 0; + shared_ptr block_; + unique_ptr block_iterator_; +}; + +using SSTablesPtr = shared_ptr>>>; + +} // namespace oceanbase diff --git a/src/oblsm/table/ob_sstable_builder.cpp b/src/oblsm/table/ob_sstable_builder.cpp new file mode 100644 index 000000000..468214ca6 --- /dev/null +++ b/src/oblsm/table/ob_sstable_builder.cpp @@ -0,0 +1,53 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/table/ob_sstable_builder.h" +#include "oblsm/util/ob_coding.h" + +namespace oceanbase { + +// TODO: refactor build with mem_table/iterator logic. +RC ObSSTableBuilder::build(shared_ptr mem_table, const std::string &file_name, uint32_t sst_id) +{ + return RC::UNIMPLEMENTED; +} + +void ObSSTableBuilder::finish_build_block() +{ + string last_key = block_builder_.last_key(); + string_view block_contents = block_builder_.finish(); + file_writer_->write(block_contents); + block_metas_.push_back(BlockMeta(curr_blk_first_key_, last_key, curr_offset_, block_contents.size())); + // TODO: block aligned to BLOCK_SIZE + curr_offset_ += block_contents.size(); + block_builder_.reset(); +} + +shared_ptr ObSSTableBuilder::get_built_table() +{ + // TODO: sstable should have more metadata + shared_ptr sstable = make_shared(sst_id_, file_writer_->file_name(), comparator_, block_cache_); + sstable->init(); + return sstable; +} + +void ObSSTableBuilder::reset() +{ + block_builder_.reset(); + curr_blk_first_key_.clear(); + if (file_writer_ != nullptr) { + file_writer_.reset(nullptr); + } + block_metas_.clear(); + curr_offset_ = 0; + sst_id_ = 0; + file_size_ = 0; +} +} // namespace oceanbase diff --git a/src/oblsm/table/ob_sstable_builder.h b/src/oblsm/table/ob_sstable_builder.h new file mode 100644 index 000000000..3e21bfb39 --- /dev/null +++ b/src/oblsm/table/ob_sstable_builder.h @@ -0,0 +1,67 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/memory.h" +#include "oblsm/table/ob_block_builder.h" +#include "oblsm/memtable/ob_memtable.h" +#include "common/lang/string.h" +#include "oblsm/util/ob_file_writer.h" +#include "oblsm/table/ob_block.h" +#include "oblsm/table/ob_sstable.h" +#include "oblsm/util/ob_lru_cache.h" + +namespace oceanbase { + +/** + * @brief Build a SSTable + */ +class ObSSTableBuilder +{ +public: + ObSSTableBuilder(const ObComparator *comparator, ObLRUCache> *block_cache) + : comparator_(comparator), block_cache_(block_cache) + {} + ~ObSSTableBuilder() = default; + + /** + * @brief Builds an SSTable from the provided in-memory table and stores it in a file. + * + * This function takes an `ObMemTable` as input, partitions the data into blocks, + * serializes the blocks, and writes them into an SSTable file. + * + * @param mem_table A shared pointer to the `ObMemTable` containing the data to be written into the SSTable. + * @param file_name The name of the file where the constructed SSTable will be stored. + * @param sst_id A unique identifier assigned to the created SSTable. + * + * @return RC A result code indicating the success or failure of the SSTable creation process. + * + */ + RC build(shared_ptr mem_table, const string &file_name, uint32_t sst_id); + size_t file_size() const { return file_size_; } + shared_ptr get_built_table(); + void reset(); + +private: + void finish_build_block(); + + const ObComparator *comparator_ = nullptr; + ObBlockBuilder block_builder_; + string curr_blk_first_key_; + unique_ptr file_writer_; + vector block_metas_; + uint32_t curr_offset_ = 0; + uint32_t sst_id_ = 0; + size_t file_size_ = 0; + + ObLRUCache> *block_cache_ = nullptr; +}; +} // namespace oceanbase diff --git a/src/oblsm/util/ob_arena.cpp b/src/oblsm/util/ob_arena.cpp new file mode 100644 index 000000000..3baef0b09 --- /dev/null +++ b/src/oblsm/util/ob_arena.cpp @@ -0,0 +1,18 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "oblsm/util/ob_arena.h" + +namespace oceanbase { + +ObArena::ObArena() : memory_usage_(0) {} + +ObArena::~ObArena() +{ + for (size_t i = 0; i < blocks_.size(); i++) { + delete[] blocks_[i]; + } +} + +} // namespace oceanbase diff --git a/src/oblsm/util/ob_arena.h b/src/oblsm/util/ob_arena.h new file mode 100644 index 000000000..41cf24d2b --- /dev/null +++ b/src/oblsm/util/ob_arena.h @@ -0,0 +1,58 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include +#include "common/lang/atomic.h" +#include "common/lang/vector.h" + +namespace oceanbase { + +/** + * @brief a simple memory allocator. + * @todo optimize fractional memory allocation + * @note 1. alloc memory from arena, no need to free it. + * 2. not thread-safe. + */ +class ObArena +{ +public: + ObArena(); + + ObArena(const ObArena &) = delete; + ObArena &operator=(const ObArena &) = delete; + + ~ObArena(); + + char *alloc(size_t bytes); + + size_t memory_usage() const { return memory_usage_; } + +private: + // Array of new[] allocated memory blocks + vector blocks_; + + // Total memory usage of the arena. + size_t memory_usage_; +}; + +inline char *ObArena::alloc(size_t bytes) +{ + if (bytes <= 0) { + return nullptr; + } + char *result = new char[bytes]; + blocks_.push_back(result); + memory_usage_ += bytes + sizeof(char *); + return result; +} + +} // namespace oceanbase diff --git a/src/oblsm/util/ob_bloomfilter.cpp b/src/oblsm/util/ob_bloomfilter.cpp index ebec65e55..604548397 100644 --- a/src/oblsm/util/ob_bloomfilter.cpp +++ b/src/oblsm/util/ob_bloomfilter.cpp @@ -8,4 +8,6 @@ EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more details. */ -#include "oblsm/util/ob_bloomfilter.h" \ No newline at end of file +#include "oblsm/util/ob_bloomfilter.h" + +namespace oceanbase {} // namespace oceanbase diff --git a/src/oblsm/util/ob_coding.h b/src/oblsm/util/ob_coding.h new file mode 100644 index 000000000..09d380efc --- /dev/null +++ b/src/oblsm/util/ob_coding.h @@ -0,0 +1,112 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/string.h" + +namespace oceanbase { + +static const uint8_t SEQ_SIZE = 8; +static const uint8_t LOOKUP_KEY_PREFIX_SIZE = 8; + +/** + * @brief Appends a numeric value to a string in binary format. + * + * This template function takes a numeric value of any type and appends its binary + * representation to the specified string. + * + * @tparam T The numeric type (e.g., `int`, `uint64_t`, `float`). + * @param dst A pointer to the string to which the numeric value will be appended. + * @param v The numeric value to append. + */ +template +void put_numeric(string *dst, T v) +{ + dst->append(reinterpret_cast(&v), sizeof(T)); +} + +/** + * @brief Extracts a numeric value from a binary data source. + * + * This template function reads a numeric value of any type from the provided + * binary data source and returns it. + * + * @tparam T The numeric type to extract (e.g., `int`, `uint64_t`, `float`). + * @param src A pointer to the source binary data from which the numeric value will be read. + * @return The extracted numeric value of type `T`. + */ +template +T get_numeric(const char *src) +{ + T value; + memcpy(&value, src, sizeof(T)); + return value; +} + +/** + * @brief Extracts the user key portion from an internal key. + * + * An internal key in the LSM-Tree typically contains additional metadata such as + * a sequence number at the end. This function removes the sequence number portion + * and returns the user key portion. + * + * @param internal_key The internal key to extract the user key from. + * @return A `string_view` representing the user key portion of the internal key. + */ +inline string_view extract_user_key(const string_view &internal_key) +{ + return string_view(internal_key.data(), internal_key.size() - SEQ_SIZE); +} + +/** + * @brief Extracts the sequence number from an internal key. + * + * The sequence number is usually stored at the end of the internal key in + * binary format. This function retrieves and returns the sequence number. + * + * @param internal_key The internal key to extract the sequence number from. + * @return The extracted sequence number as a `uint64_t`. + */ +inline uint64_t extract_sequence(const string_view &internal_key) +{ + return get_numeric(internal_key.data() + internal_key.size() - SEQ_SIZE); +} + +/** + * @brief Computes the size of the user key from a lookup key. + * + * A lookup key typically contains a prefix and a sequence number in addition + * to the user key. This function calculates and returns the size of the user + * key portion. + * + * @param lookup_key The lookup key to analyze. + * @return The size of the user key portion in bytes. + */ +inline size_t user_key_size_from_lookup_key(const string_view &lookup_key) +{ + return lookup_key.size() - SEQ_SIZE - LOOKUP_KEY_PREFIX_SIZE; +} + +/** + * @brief Extracts the user key from a lookup key. + * + * A lookup key in the LSM-Tree contains a prefix, user key, and sequence + * number. This function extracts and returns the user key portion. + * + * @param lookup_key The lookup key to extract the user key from. + * @return A `string_view` representing the user key portion of the lookup key. + */ +inline string_view extract_user_key_from_lookup_key(const string_view &lookup_key) +{ + return string_view(lookup_key.data() + LOOKUP_KEY_PREFIX_SIZE, user_key_size_from_lookup_key(lookup_key)); +} + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/util/ob_comparator.cpp b/src/oblsm/util/ob_comparator.cpp new file mode 100644 index 000000000..3f0fb1d08 --- /dev/null +++ b/src/oblsm/util/ob_comparator.cpp @@ -0,0 +1,35 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/util/ob_comparator.h" +#include "oblsm/ob_lsm_define.h" +#include "oblsm/util/ob_coding.h" + +namespace oceanbase { +int ObDefaultComparator::compare(const string_view &a, const string_view &b) const { return a.compare(b); } + +int ObInternalKeyComparator::compare(const string_view &a, const string_view &b) const +{ + const string_view akey = extract_user_key(a); + const string_view bkey = extract_user_key(b); + int r = default_comparator_.compare(akey, bkey); + if (r == 0) { + uint64_t aseq = get_numeric(akey.data() + a.size() - SEQ_SIZE); + uint64_t bseq = get_numeric(bkey.data() + b.size() - SEQ_SIZE); + if (aseq > bseq) { + r = -1; + } else if (aseq < bseq) { + r = +1; + } + } + return r; +} + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/util/ob_comparator.h b/src/oblsm/util/ob_comparator.h new file mode 100644 index 000000000..8f2a37e79 --- /dev/null +++ b/src/oblsm/util/ob_comparator.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/string_view.h" + +namespace oceanbase { + +/** + * @brief base class of all comparators + */ +class ObComparator +{ +public: + virtual ~ObComparator() = default; + + /** + * @brief Three-way comparison. + * @return < 0 iff "a" < "b", + * @return == 0 iff "a" == "b", + * @return > 0 iff "a" > "b" + */ + virtual int compare(const string_view &a, const string_view &b) const = 0; +}; + +/** + * @brief comparator with lexicographical order + */ +class ObDefaultComparator : public ObComparator +{ +public: + explicit ObDefaultComparator() = default; + int compare(const string_view &a, const string_view &b) const override; +}; + +/** + * @brief internal key comparator + * @details internal key: | key_size(8B) | key | sequence_number(8B) | + */ +class ObInternalKeyComparator : public ObComparator +{ +public: + explicit ObInternalKeyComparator() = default; + + int compare(const string_view &a, const string_view &b) const override; + const ObComparator *user_comparator() const { return &default_comparator_; } + +private: + ObDefaultComparator default_comparator_; +}; + +} // namespace oceanbase diff --git a/src/oblsm/util/ob_file_reader.cpp b/src/oblsm/util/ob_file_reader.cpp new file mode 100644 index 000000000..fed7baf8e --- /dev/null +++ b/src/oblsm/util/ob_file_reader.cpp @@ -0,0 +1,66 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/util/ob_file_reader.h" +#include +#include +#include "common/lang/filesystem.h" + +#include "common/log/log.h" + +namespace oceanbase { + +ObFileReader::~ObFileReader() { close_file(); } + +string ObFileReader::read_pos(uint32_t pos, uint32_t size) +{ + string buf; + buf.resize(size); + ssize_t read_size = ::pread(fd_, buf.data(), size, static_cast(pos)); + if (read_size != size) { + LOG_WARN("Failed to read file %s, read_size=%ld, size=%ld", filename_.c_str(), read_size, size); + return ""; + } + + return buf; +} + +uint32_t ObFileReader::file_size() +{ + return filesystem::file_size(filename_); +} + +unique_ptr ObFileReader::create_file_reader(const string &filename) +{ + unique_ptr reader(new ObFileReader(filename)); + if (OB_FAIL(reader->open_file())) { + LOG_WARN("Failed to open file %s", filename.c_str()); + return nullptr; + } + return reader; +} + +RC ObFileReader::open_file() +{ + RC rc = RC::SUCCESS; + fd_ = ::open(filename_.c_str(), O_RDONLY); + if (fd_ < 0) { + LOG_WARN("Failed to open file %s", filename_.c_str()); + rc = RC::INTERNAL; + } + return rc; +} + +void ObFileReader::close_file() +{ + ::close(fd_); +} + +} // namespace oceanbase diff --git a/src/oblsm/util/ob_file_reader.h b/src/oblsm/util/ob_file_reader.h new file mode 100644 index 000000000..8a9471789 --- /dev/null +++ b/src/oblsm/util/ob_file_reader.h @@ -0,0 +1,114 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/fstream.h" +#include "common/lang/memory.h" +#include "common/lang/sstream.h" +#include "common/lang/string.h" +#include "common/sys/rc.h" +#include "common/lang/mutex.h" + +namespace oceanbase { + +/** + * @class ObFileReader + * @brief A utility class for reading files in an efficient manner. + * + * The `ObFileReader` class provides a simple interface for reading files. It allows + * opening, closing, and reading specific portions of a file while also exposing + * the file size for external use. This class is intended for use in scenarios where + * file-based data storage, such as SSTables, is accessed. + */ +class ObFileReader +{ +public: + /** + * @brief Constructs an `ObFileReader` object with the specified file name. + * + * The file name is stored internally, but the file is not opened until + * `open_file()` is explicitly called. + * + * @param filename The name of the file to be read. + */ + ObFileReader(const string &filename) : filename_(filename) {} + + ~ObFileReader(); + + /** + * @brief Opens the file for reading. + * + * This method attempts to open the file specified during the construction + * of the object. If the file is successfully opened, the internal file descriptor + * (`fd_`) is updated. + * + * @return An RC (return code) indicating the success or failure of the operation. + */ + RC open_file(); + + /** + * @brief Closes the file if it is currently open. + * + * This method releases the file descriptor (`fd_`) associated with the file. + */ + void close_file(); + + /** + * @brief Reads a portion of the file from a specified position. + * + * This method reads `size` bytes starting from position `pos` in the file. + * + * @param pos The starting position (offset) in the file. + * @param size The number of bytes to read. + * @return A string containing the requested portion of the file's data. + */ + string read_pos(uint32_t pos, uint32_t size); + + /** + * @brief Returns the size of the file. + * + * This method retrieves the size of the file in bytes. It relies on the file + * being successfully opened. + * + * @return The size of the file in bytes. + */ + uint32_t file_size(); + + /** + * @brief Creates a new `ObFileReader` instance. + * + * This static factory method constructs a new `ObFileReader` object and + * initializes it with the specified file name. + * + * @param filename The name of the file to be read. + * @return A `unique_ptr` to the created `ObFileReader` object. + */ + static unique_ptr create_file_reader(const string &filename); + +private: + /** + * @brief The name of the file to be read. + * + * This string stores the file name specified during the construction of + * the `ObFileReader` object. + */ + string filename_; + + /** + * @brief The file descriptor for the currently opened file. + * + * This integer represents the file descriptor used for reading the file. + * If no file is open, it is set to `-1`. + */ + int fd_ = -1; +}; + +} // namespace oceanbase diff --git a/src/oblsm/util/ob_file_writer.cpp b/src/oblsm/util/ob_file_writer.cpp new file mode 100644 index 000000000..14de6db25 --- /dev/null +++ b/src/oblsm/util/ob_file_writer.cpp @@ -0,0 +1,69 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/util/ob_file_writer.h" + +namespace oceanbase { + +ObFileWriter::~ObFileWriter() { close_file(); } + +RC ObFileWriter::write(const string_view &data) +{ + RC rc = RC::SUCCESS; + file_ << data; + if (!file_.good()) { + rc = RC::IOERR_WRITE; + } + return rc; +} + +RC ObFileWriter::flush() +{ + RC rc = RC::SUCCESS; + file_.flush(); + if (!file_.good()) { + rc = RC::IOERR_SYNC; + } + return rc; +} + +RC ObFileWriter::open_file() +{ + RC rc = RC::SUCCESS; + if (file_.is_open()) { + return rc; + } + if (append_) { + file_.open(filename_, std::ios::app | std::ios::binary); + } else { + file_.open(filename_, std::ios::out | std::ios::trunc | std::ios::binary); + } + if (!file_.good()) { + rc = RC::IOERR_OPEN; + } + return rc; +} + +void ObFileWriter::close_file() +{ + if (file_.is_open()) { + file_.flush(); + file_.close(); + } +} + +unique_ptr ObFileWriter::create_file_writer(const string &filename, bool append) +{ + unique_ptr writer(new ObFileWriter(filename, append)); + writer->open_file(); + return writer; +} + +} // namespace oceanbase diff --git a/src/oblsm/util/ob_file_writer.h b/src/oblsm/util/ob_file_writer.h new file mode 100644 index 000000000..772aa6f10 --- /dev/null +++ b/src/oblsm/util/ob_file_writer.h @@ -0,0 +1,132 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/fstream.h" +#include "common/lang/string.h" +#include "common/lang/string_view.h" +#include "common/lang/memory.h" +#include "common/sys/rc.h" + +namespace oceanbase { + +/** + * @class ObFileWriter + * @brief A utility class for writing data to files. + * + * The `ObFileWriter` class provides a convenient interface for writing data to a file. + * It supports creating and opening files for writing, appending data to existing files, + * and flushing buffered data to disk. The class ensures proper resource management by + * providing methods for explicitly closing the file. + * TODO: use posix + */ +class ObFileWriter +{ +public: + /** + * @brief Constructs an `ObFileWriter` object with the specified file name and mode. + * + * The constructor initializes the file writer with the given file name. The file is not opened + * until `open_file()` is called. The `append` parameter determines whether the file should + * be opened in append mode or overwrite mode. + * + * @param filename The name of the file to write to. + * @param append Whether to open the file in append mode (default: `false`). + */ + ObFileWriter(const string &filename, bool append = false) : filename_(filename), append_(append) {} + + ~ObFileWriter(); + + /** + * @brief Opens the file for writing. + * + * This method attempts to open the file specified during the construction of the object. + * If the file is successfully opened, further write operations can be performed. + * + * @return An RC (return code) indicating the success or failure of the operation. + * @note If the file cannot be opened (e.g., due to permission issues), an error code is returned. + */ + RC open_file(); + + /** + * @brief Closes the file if it is currently open. + * + * This method releases the resources associated with the file. After calling this method, + * the file can no longer be written to until it is reopened. + */ + void close_file(); + + /** + * @brief Writes data to the file. + * + * Appends the provided data to the file. If the file is not open, the operation will fail. + * + * @param data The data to write to the file, provided as a `string_view`. + * @return An RC (return code) indicating the success or failure of the operation. + */ + RC write(const string_view &data); + + /** + * @brief Flushes buffered data to disk. + * + * Ensures that all buffered data is written to the file system. This method is useful + * for ensuring data integrity in cases where the program may terminate unexpectedly. + * + * @return An RC (return code) indicating the success or failure of the flush operation. + */ + RC flush(); + + /** + * @brief Checks if the file is currently open. + * + * @return `true` if the file is open, `false` otherwise. + */ + bool is_open() const { return file_.is_open(); } + + /** + * @brief Returns the name of the file being written to. + * + * @return A string containing the file name. + */ + string file_name() const { return filename_; } + + /** + * @brief Creates a new `ObFileWriter` instance. + * + * This static factory method constructs a new `ObFileWriter` object with the specified + * file name and append mode. + * + * @param filename The name of the file to write to. + * @param append Whether to open the file in append mode (default: `false`). + * @return A `unique_ptr` to the created `ObFileWriter` object. + */ + static unique_ptr create_file_writer(const string &filename, bool append); + +private: + /** + * @brief The name of the file to be written to. + */ + string filename_; + + /** + * @brief Indicates whether the file should be opened in append mode. + * + * If `true`, data will be appended to the existing file. If `false`, the existing file (if any) + * will be overwritten when the file is opened. + */ + bool append_; + + /** + * @brief The file stream used for writing data. + */ + ofstream file_; +}; +} // namespace oceanbase diff --git a/src/oblsm/util/ob_lru_cache.cpp b/src/oblsm/util/ob_lru_cache.cpp new file mode 100644 index 000000000..de00301cf --- /dev/null +++ b/src/oblsm/util/ob_lru_cache.cpp @@ -0,0 +1,13 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/util/ob_lru_cache.h" + +namespace oceanbase {} // namespace oceanbase diff --git a/src/oblsm/util/ob_lru_cache.h b/src/oblsm/util/ob_lru_cache.h new file mode 100644 index 000000000..17ba573a5 --- /dev/null +++ b/src/oblsm/util/ob_lru_cache.h @@ -0,0 +1,97 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include +#include + +namespace oceanbase { + +/** + * @class ObLRUCache + * @brief A thread-safe implementation of an LRU (Least Recently Used) cache. + * + * The `ObLRUCache` class provides a fixed-size cache that evicts the least recently used + * entries when the cache exceeds its capacity. It supports thread-safe operations for + * inserting, retrieving, and checking the existence of cache entries. + * + * @tparam KeyType The type of keys used to identify cache entries. + * @tparam ValueType The type of values stored in the cache. + */ +template +class ObLRUCache +{ +public: + /** + * @brief Constructs an `ObLRUCache` with a specified capacity. + * + * @param capacity The maximum number of elements the cache can hold. + */ + ObLRUCache(size_t capacity) : capacity_(capacity) {} + + /** + * @brief Retrieves a value from the cache using the specified key. + * + * This method searches for the specified key in the cache. If the key is found, the + * corresponding value is returned and the key-value pair is moved to the front of the + * LRU list (indicating recent use). + * + * @param key The key to search for in the cache. + * @param value A reference to store the value associated with the key. + * @return `true` if the key is found and the value is retrieved; `false` otherwise. + */ + bool get(const KeyType &key, ValueType &value) { return false; } + + /** + * @brief Inserts a key-value pair into the cache. + * + * If the key already exists in the cache, its value is updated, and the key-value pair + * is moved to the front of the LRU list. If the cache exceeds its capacity after insertion, + * the least recently used entry is evicted. + * + * @param key The key to insert into the cache. + * @param value The value to associate with the specified key. + */ + void put(const KeyType &key, const ValueType &value) {} + + /** + * @brief Checks whether the specified key exists in the cache. + * + * @param key The key to check in the cache. + * @return `true` if the key exists; `false` otherwise. + */ + bool contains(const KeyType &key) const { return false; } + +private: + /** + * @brief The maximum number of elements the cache can hold. + */ + size_t capacity_; +}; + +/** + * @brief Creates a new instance of `ObLRUCache` with the specified capacity. + * + * This factory function constructs an `ObLRUCache` instance for the specified key and + * value types, and initializes it with the given capacity. + * + * @tparam Key The type of keys used to identify cache entries. + * @tparam Value The type of values stored in the cache. + * @param capacity The maximum number of elements the cache can hold. + * @return A pointer to the newly created `ObLRUCache` instance. + */ +template +ObLRUCache *new_lru_cache(uint32_t capacity) +{ + return nullptr; +} + +} // namespace oceanbase diff --git a/unittest/oblsm/ob_arena_test.cpp b/unittest/oblsm/ob_arena_test.cpp new file mode 100644 index 000000000..a30e384ad --- /dev/null +++ b/unittest/oblsm/ob_arena_test.cpp @@ -0,0 +1,46 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "oblsm/util/ob_arena.h" +#include "common/math/random_generator.h" + +using namespace oceanbase; + +TEST(arena_test, DISABLED_arena_test_basic) +{ + ObArena arena; + const int count = 1000; + size_t bytes = 0; + common::RandomGenerator rnd; + for (int i = 0; i < count; i++) { + size_t s; + s = rnd.next(4000); + if (s == 0) { + s = 1; + } + char* r; + r = arena.alloc(s); + + for (size_t b = 0; b < s; b++) { + r[b] = i % 256; + } + bytes += s; + bytes += sizeof(char*); + ASSERT_EQ(arena.memory_usage(), bytes); + } +} + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/oblsm/ob_block_test.cpp b/unittest/oblsm/ob_block_test.cpp new file mode 100644 index 000000000..ee46a748d --- /dev/null +++ b/unittest/oblsm/ob_block_test.cpp @@ -0,0 +1,66 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "oblsm/table/ob_block.h" +#include "oblsm/table/ob_block_builder.h" +#include "oblsm/util/ob_comparator.h" + +using namespace oceanbase; + +TEST(block_test, DISABLED_block_builder_test_basic) +{ + ObBlockBuilder builder; + ObDefaultComparator comparator; + builder.add("key1", "value1"); + builder.add("key2", "value2"); + builder.add("key3", "value3"); + ASSERT_EQ(builder.last_key(), "key3"); + builder.add("key4", "value4"); + ASSERT_EQ(builder.last_key(), "key4"); + string_view block_contents = builder.finish(); + + ObBlock block(&comparator); + block.decode(string(block_contents.data(), block_contents.size())); + ASSERT_EQ(block.size(), 4); +} + +TEST(block_test, DISABLED_block_iterator_test_basic) +{ + ObBlockBuilder builder; + ObDefaultComparator comparator; + builder.add("key1", "value1"); + builder.add("key2", "value2"); + builder.add("key3", "value3"); + ASSERT_EQ(builder.last_key(), "key3"); + builder.add("key4", "value4"); + ASSERT_EQ(builder.last_key(), "key4"); + string_view block_contents = builder.finish(); + + ObBlock block(&comparator); + block.decode(string(block_contents.data(), block_contents.size())); + ASSERT_EQ(block.size(), 4); + BlockIterator iter(&comparator, &block, block.size()); + iter.seek_to_first(); + ASSERT_TRUE(iter.valid()); + ASSERT_EQ(iter.key(), "key1"); + ASSERT_EQ(iter.value(), "value1"); + while(iter.valid()) { + cout << iter.key() << " " << iter.value() << endl; + iter.next(); + } +} + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/oblsm/ob_compaction_test.cpp b/unittest/oblsm/ob_compaction_test.cpp new file mode 100644 index 000000000..35d7d4144 --- /dev/null +++ b/unittest/oblsm/ob_compaction_test.cpp @@ -0,0 +1,155 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "common/lang/filesystem.h" +#include "oblsm/include/ob_lsm.h" +#include "oblsm/ob_lsm_impl.h" +#include "unittest/oblsm/ob_lsm_test_base.h" + +using namespace oceanbase; + +class ObLsmCompactionTest : public ObLsmTestBase { +}; + +bool check_compaction(ObLsm* lsm) +{ + ObLsmImpl *lsm_impl = dynamic_cast(lsm); + if (nullptr == lsm_impl) { + return false; + } + auto sstables = lsm_impl->get_sstables(); + + if (sstables->size() != ObLsmOptions().default_levels) { + return false; + } + auto level_0 = sstables->at(0); + if (level_0.size() > ObLsmOptions().default_l0_file_num) { + return false; + } + + ObLsmOptions options; + + // check level_i size + size_t level_size = options.default_l1_level_size; + for (size_t i = 1; i < options.default_levels; ++i) { + const auto& level_i = sstables->at(i); + int level_i_size = 0; + for (const auto& sstable : level_i) { + level_i_size += sstable->size(); + } + if (level_i_size > level_size * 1.1) { + return false; + } + level_size *= options.default_level_ratio; + } + + // check level_i overlap + for (size_t i = 1; i < options.default_levels; ++i) { + const auto& level_i = sstables->at(i); + vector> key_ranges; + for (const auto& sstable : level_i) { + key_ranges.push_back(make_pair(sstable->first_key(), sstable->last_key())); + } + std::sort(key_ranges.begin(), key_ranges.end(), [](const auto& a, const auto& b) { return a.first < b.first; }); + ObInternalKeyComparator comp; + for (size_t j = 1; j < key_ranges.size(); ++j) { + if (comp.compare(key_ranges[j].first, key_ranges[j-1].second) < 0) { + return false; + } + } + } + return true; +} + +TEST_P(ObLsmCompactionTest, DISABLED_oblsm_compaction_test_basic1) +{ + size_t num_entries = GetParam(); + auto data = KeyValueGenerator::generate_data(num_entries); + + for (const auto& [key, value] : data) { + ASSERT_EQ(db->put(key, value), RC::SUCCESS); + } + sleep(1); + + ObLsmIterator* it = db->new_iterator(ObLsmReadOptions()); + it->seek_to_first(); + size_t count = 0; + while (it->valid()) { + it->next(); + ++count; + } + EXPECT_EQ(count, num_entries); + delete it; + ASSERT_TRUE(check_compaction(db)); +} + +void thread_put(ObLsm *db, int start, int end) { + for (int i = start; i < end; ++i) { + const std::string key = "key" + std::to_string(i); + RC rc = db->put(key, key); + ASSERT_EQ(rc, RC::SUCCESS); + } +} + +TEST_P(ObLsmCompactionTest, DISABLED_ConcurrentPutAndGetTest) { + const int num_entries = GetParam(); + const int num_threads = 4; + const int batch_size = num_entries / num_threads; + + std::vector threads; + for (int i = 0; i < num_threads; ++i) { + int start = i * batch_size; + int end = 0; + if (i == num_threads - 1) { + end = num_entries; + } else { + end = start + batch_size; + } + threads.emplace_back(thread_put, db, start, end); + } + + for (auto &thread : threads) { + thread.join(); + } + // wait for compaction + sleep(1); + + // Verify all data using iterator + ObLsmReadOptions options; + ObLsmIterator *iterator = db->new_iterator(options); + + iterator->seek_to_first(); + int count = 0; + while (iterator->valid()) { + iterator->next(); + ++count; + } + + EXPECT_EQ(count, num_entries); + + // Clean up + delete iterator; + + ASSERT_TRUE(check_compaction(db)); +} + +INSTANTIATE_TEST_SUITE_P( + ObLsmCompactionTests, + ObLsmCompactionTest, + ::testing::Values(1, 10, 1000, 10000, 100000) +); + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/oblsm/ob_lru_cache_test.cpp b/unittest/oblsm/ob_lru_cache_test.cpp new file mode 100644 index 000000000..98b38cce9 --- /dev/null +++ b/unittest/oblsm/ob_lru_cache_test.cpp @@ -0,0 +1,105 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "common/lang/string.h" +#include "common/lang/vector.h" +#include "common/lang/thread.h" +#include "common/lang/utility.h" +#include "oblsm/util/ob_lru_cache.h" + +using namespace oceanbase; + +class ObLRUCacheTest : public ::testing::TestWithParam { +protected: + ObLRUCache* cache; + size_t capacity; + + void SetUp() override { + capacity = GetParam(); + cache = new_lru_cache(capacity); + } + + void TearDown() override { + delete cache; + } +}; + +TEST_P(ObLRUCacheTest, DISABLED_lru_capacity) { + ASSERT_NE(cache, nullptr); + + for (size_t i = 0; i < capacity + 2; ++i) { + string key = "key" + to_string(i); + string value = "value" + to_string(i); + cache->put(key, value); + } + + for (size_t i = 0; i < capacity + 2; ++i) { + string key = "key" + to_string(i); + string value; + if (i < 2) { + EXPECT_FALSE(cache->get(key, value)); + } else { + EXPECT_TRUE(cache->get(key, value)); + EXPECT_EQ(value, "value" + to_string(i)); + } + } +} + +TEST_P(ObLRUCacheTest, DISABLED_update_exist_key) { + ASSERT_NE(cache, nullptr); + + cache->put("key1", "value1"); + cache->put("key2", "value2"); + cache->put("key1", "value1_updated"); + + string value; + + EXPECT_TRUE(cache->get("key1", value)); + EXPECT_EQ(value, "value1_updated"); + + EXPECT_TRUE(cache->get("key2", value)); + EXPECT_EQ(value, "value2"); +} + +TEST_P(ObLRUCacheTest, DISABLED_contains_key) { + ASSERT_NE(cache, nullptr); + + cache->put("key1", "value1"); + cache->put("key2", "value2"); + + EXPECT_TRUE(cache->contains("key1")); + EXPECT_TRUE(cache->contains("key2")); + EXPECT_FALSE(cache->contains("key3")); + + string value; + EXPECT_TRUE(cache->get("key1", value)); + EXPECT_EQ(value, "value1"); +} + +INSTANTIATE_TEST_SUITE_P( + CacheTests, + ObLRUCacheTest, + ::testing::Values(2, 5, 10, 100, 10000) +); + +TEST(lru_test, zero_capacity) +{ + ObLRUCache lru_cache(0); + lru_cache.put(1, "one"); + ASSERT_FALSE(lru_cache.contains(1)); +} + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/oblsm/ob_lsm_test.cpp b/unittest/oblsm/ob_lsm_test.cpp new file mode 100644 index 000000000..adc5c33c0 --- /dev/null +++ b/unittest/oblsm/ob_lsm_test.cpp @@ -0,0 +1,121 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "common/lang/filesystem.h" +#include "common/lang/thread.h" +#include "common/lang/utility.h" +#include "oblsm/include/ob_lsm.h" +#include "oblsm/ob_lsm_define.h" +#include "unittest/oblsm/ob_lsm_test_base.h" + +using namespace oceanbase; + +class ObLsmTest : public ObLsmTestBase { +}; + +// TODO: add update/delete case +TEST_P(ObLsmTest, DISABLED_oblsm_test_basic1) +{ + size_t num_entries = GetParam(); + auto data = KeyValueGenerator::generate_data(num_entries); + + for (const auto& [key, value] : data) { + ASSERT_EQ(db->put(key, value), RC::SUCCESS); + } + + for (const auto& [key, value] : data) { + string fetched_value; + ASSERT_EQ(db->get(key, &fetched_value), RC::SUCCESS); + EXPECT_EQ(fetched_value, value); + } + sleep(2); + + ObLsmIterator* it = db->new_iterator(ObLsmReadOptions()); + it->seek_to_first(); + size_t count = 0; + while (it->valid()) { + it->next(); + ++count; + } + EXPECT_EQ(count, num_entries); + delete it; + + ObLsmIterator* it2 = db->new_iterator(ObLsmReadOptions()); + it2->seek("key" + to_string(num_entries/2)); + ASSERT_TRUE(it2->valid()); + ASSERT_EQ(it2->value(), "value" + to_string(num_entries/2)); + while (it2->valid()) + { + it2->next(); + } + delete it2; +} + +void thread_put(ObLsm *db, int start, int end) { + for (int i = start; i < end; ++i) { + const std::string key = "key" + std::to_string(i); + RC rc = db->put(key, key); + ASSERT_EQ(rc, RC::SUCCESS); + } +} + +TEST_P(ObLsmTest, DISABLED_ConcurrentPutAndGetTest) { + const int num_entries = GetParam(); + const int num_threads = 4; + const int batch_size = num_entries / num_threads; + + std::vector threads; + for (int i = 0; i < num_threads; ++i) { + int start = i * batch_size; + int end = 0; + if (i == num_threads - 1) { + end = num_entries; + } else { + end = start + batch_size; + } + threads.emplace_back(thread_put, db, start, end); + } + + for (auto &thread : threads) { + thread.join(); + } + // TODO: remove sleep + sleep(2); + + // Verify all data using iterator + ObLsmReadOptions options; + ObLsmIterator *iterator = db->new_iterator(options); + + iterator->seek_to_first(); + int count = 0; + while (iterator->valid()) { + iterator->next(); + ++count; + } + + EXPECT_EQ(count, num_entries); + + // Clean up + delete iterator; +} + +INSTANTIATE_TEST_SUITE_P( + ObLsmTests, + ObLsmTest, + ::testing::Values(1, 10, 1000, 10000, 50000) +); + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/oblsm/ob_lsm_test_base.h b/unittest/oblsm/ob_lsm_test_base.h new file mode 100644 index 000000000..4d4c157b3 --- /dev/null +++ b/unittest/oblsm/ob_lsm_test_base.h @@ -0,0 +1,64 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "common/lang/filesystem.h" +#include "common/lang/thread.h" +#include "common/lang/utility.h" +#include "oblsm/include/ob_lsm.h" +#include "oblsm/ob_lsm_define.h" + +using namespace oceanbase; + +class KeyValueGenerator +{ +public: + static vector> generate_data(size_t count) + { + vector> data; + for (size_t i = 0; i < count; ++i) { + data.emplace_back("key" + to_string(i), "value" + to_string(i)); + } + return data; + } +}; + +class ObLsmTestBase : public ::testing::TestWithParam +{ +protected: + ObLsm *db; + ObLsmOptions options; + string path; + + void SetUp() override + { + path = "./testdb"; + set_up_options(); + filesystem::remove_all(path); + filesystem::create_directory(path); + ASSERT_EQ(ObLsm::open(options, path, &db), RC::SUCCESS); + ASSERT_NE(db, nullptr); + } + + void set_up_options() + { + options.memtable_size = 8 * 1024; + options.table_size = 16 * 1024; + options.default_levels = 7; + options.default_l1_level_size = 128 * 1024; + options.default_level_ratio = 10; + options.default_l0_file_num = 3; + options.default_run_num = 7; + options.type = CompactionType::LEVELED; + } + + void TearDown() override { delete db; } +}; \ No newline at end of file diff --git a/unittest/oblsm/ob_skiplist_test.cpp b/unittest/oblsm/ob_skiplist_test.cpp new file mode 100644 index 000000000..82bcbbba2 --- /dev/null +++ b/unittest/oblsm/ob_skiplist_test.cpp @@ -0,0 +1,277 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "oblsm/memtable/ob_skiplist.h" +#include "common/math/random_generator.h" +#include "common/thread/thread_pool_executor.h" +#include "common/lang/thread.h" + +using namespace oceanbase; + +using Key = uint64_t; + +struct Comparator { + int operator()(const Key& a, const Key& b) const { + if (a < b) { + return -1; + } else if (a > b) { + return +1; + } else { + return 0; + } + } +}; + +TEST(skiplist_test, DISABLED_skiplist_test_basic) +{ + common::RandomGenerator rnd; + const int N = 2000; + const int R = 5000; + std::set keys; + Comparator cmp; + ObSkipList list(cmp); + for (int i = 0; i < N; i++) { + Key key = rnd.next() % R; + if (keys.insert(key).second) { + list.insert(key); + } + } + + for (int i = 0; i < R; i++) { + if (list.contains(i)) { + ASSERT_EQ(keys.count(i), 1); + } else { + ASSERT_EQ(keys.count(i), 0); + } + } + +} + +inline uint32_t decode_fixed32(const char* ptr) { + uint32_t result; + memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load + return result; +} + +uint32_t murmurhash(const char* data, size_t n, uint32_t seed) { + // https://github.com/aappleby/smhasher/wiki/MurmurHash1 + const uint32_t m = 0xc6a4a793; + const uint32_t r = 24; + const char* limit = data + n; + uint32_t h = static_cast(seed ^ (n * m)); + + while (data + 4 <= limit) { + uint32_t w = decode_fixed32(data); + data += 4; + h += w; + h *= m; + h ^= (h >> 16); + } + + switch (limit - data) { + case 3: + h += static_cast(static_cast(data[2])) << 16; + case 2: + h += static_cast(static_cast(data[1])) << 8; + case 1: + h += static_cast(static_cast(data[0])); + h *= m; + h ^= (h >> r); + break; + } + return h; +} + +class ConcurrentTest { + public: + static const uint32_t K = 8; + + private: + static uint64_t key(Key key) { return (key >> 40); } + static uint64_t gen(Key key) { return (key >> 8) & 0xffffffffu; } + static uint64_t hash(Key key) { return key & 0xff; } + + static uint64_t hash_numbers(uint64_t k, uint64_t g) { + uint64_t data[2] = {k, g}; + return murmurhash(reinterpret_cast(data), sizeof(data), 0); + } + + static Key make_key(uint64_t k, uint64_t g) { + assert(sizeof(Key) == sizeof(uint64_t)); + assert(k <= K); // We sometimes pass K to seek to the end of the skiplist + assert(g <= 0xffffffffu); + return ((k << 40) | (g << 8) | (hash_numbers(k, g) & 0xff)); + } + + // Per-key generation + struct State { + std::atomic generation[K]; + void Set(int k, int v) { + generation[k].store(v, std::memory_order_release); + } + int Get(int k) { return generation[k].load(std::memory_order_acquire); } + + State() { + for (unsigned int k = 0; k < K; k++) { + Set(k, 0); + } + } + }; + + // Current state of the test + State current_; + + // InlineSkipList is not protected by mu_. We just use a single writer + // thread to modify it. + ObSkipList list_; + + public: + ConcurrentTest() : list_(Comparator()) {} + thread_local static common::RandomGenerator rnd; + int scan() { + auto iter = ObSkipList::Iterator(&list_); + iter.seek_to_first(); + int count = 0; + while (iter.valid()) { + count++; + iter.next(); + } + return count; + } + // REQUIRES: No concurrent calls for the same k + void concurrent_write_step(uint32_t k) { + const int g = current_.Get(k) + 1; + const Key new_key = make_key(k, g); + list_.insert_concurrently(new_key); + ASSERT_EQ(g, current_.Get(k) + 1); + current_.Set(k, g); + } + +}; + +thread_local common::RandomGenerator ConcurrentTest::rnd = common::RandomGenerator(); + +const uint32_t ConcurrentTest::K; +using TestInlineSkipList = ObSkipList; +class InlineSkipTest : public testing::Test { + public: + void Insert(TestInlineSkipList* list, Key key) { + list->insert(key); + keys_.insert(key); + } + + private: + std::set keys_; +}; + +class TestState { + public: + ConcurrentTest t_; + std::atomic quit_flag_; + std::atomic next_writer_; + + enum ReaderState { STARTING, RUNNING, DONE }; + + explicit TestState() + : quit_flag_(false), + state_(STARTING), + pending_writers_(0), + state_cv_() {} + + void wait(ReaderState s) { + std::unique_lock lock(mu_); + while (state_ != s) { + state_cv_.wait(lock); + } + } + + void change(ReaderState s) { + std::unique_lock lock(mu_); + state_ = s; + state_cv_.notify_one(); + } + + void adjust_pending_writers(int delta) { + std::unique_lock lock(mu_); + pending_writers_ += delta; + if (pending_writers_ == 0) { + state_cv_.notify_one(); + } + } + + void wait_for_pending_writers() { + std::unique_lock lock(mu_); + while (pending_writers_ != 0) { + state_cv_.wait(lock); + } + } + + private: + std::mutex mu_; + ReaderState state_; + int pending_writers_; + std::condition_variable state_cv_; +}; + + +static void concurrent_reader(void* arg) { + TestState* state = static_cast(arg); + state->change(TestState::RUNNING); + while (!state->quit_flag_.load(std::memory_order_acquire)) { + // TODO: add read_step + } + state->change(TestState::DONE); +} + +static void concurrent_writer(void* arg) { + TestState* state = static_cast(arg); + uint32_t k = state->next_writer_++ % ConcurrentTest::K; + state->t_.concurrent_write_step(k); + state->adjust_pending_writers(-1); +} + + +static void RunConcurrentInsert(int write_parallelism = 4) { + common::ThreadPoolExecutor executor_; + executor_.init("skiplist_test", write_parallelism, write_parallelism, 60 * 1000); + common::RandomGenerator rnd; + const int N = 1000; + const int kSize = 1000; + for (int i = 0; i < N; i++) { + TestState* state = new TestState(); + executor_.execute(std::bind(concurrent_reader, state)); + state->wait(TestState::RUNNING); + int k = 0; + for (k = 0; k < kSize; k += write_parallelism) { + state->next_writer_ = rnd.next(); + state->adjust_pending_writers(write_parallelism); + for (int p = 0; p < write_parallelism; ++p) { + executor_.execute(std::bind(concurrent_writer, state)); + } + state->wait_for_pending_writers(); + } + int count = state->t_.scan(); + ASSERT_EQ(k, count); + state->quit_flag_.store(true, std::memory_order_release); + state->wait(TestState::DONE); + delete state; + } +} + +TEST_F(InlineSkipTest, DISABLED_ConcurrentInsert2) { RunConcurrentInsert(2); } +TEST_F(InlineSkipTest, DISABLED_ConcurrentInsert3) { RunConcurrentInsert(4); } + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/oblsm/ob_table_test.cpp b/unittest/oblsm/ob_table_test.cpp new file mode 100644 index 000000000..3ddf23341 --- /dev/null +++ b/unittest/oblsm/ob_table_test.cpp @@ -0,0 +1,48 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "common/lang/filesystem.h" +#include "oblsm/util/ob_comparator.h" +#include "oblsm/table/ob_sstable_builder.h" +#include "oblsm/table/ob_sstable.h" + +using namespace oceanbase; + +TEST(table_test, DISABLED_table_test_basic) +{ + ObDefaultComparator comparator; + shared_ptr table = make_shared(); + uint64_t seq = 0; + size_t count = 5; + for (size_t i = 0; i < count; i++) { + string key(to_string(i)); + table->put(seq++, key, key); + } + ObSSTableBuilder tb(&comparator, nullptr); + ASSERT_EQ(tb.build(table, "test.sst", 0), RC::SUCCESS); + shared_ptr sst = tb.get_built_table(); + ObLsmIterator* sst_iter = sst->new_iterator(); + sst_iter->seek_to_first(); + while(sst_iter->valid()) { + cout << sst_iter->key() << " " << sst_iter->value() << endl; + sst_iter->next(); + } + delete sst_iter; + +} + + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/oblsm/ob_util_test.cpp b/unittest/oblsm/ob_util_test.cpp new file mode 100644 index 000000000..1c6917468 --- /dev/null +++ b/unittest/oblsm/ob_util_test.cpp @@ -0,0 +1,42 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" +#include +#include + +#include "oblsm/util/ob_comparator.h" +#include "oblsm/util/ob_file_reader.h" +#include "oblsm/util/ob_file_writer.h" +#include "common/lang/filesystem.h" + +using namespace oceanbase; + +TEST(util_test, DISABLED_comparator_test_basic) +{ + ObDefaultComparator comparator; + EXPECT_TRUE(comparator.compare("key99", "key999") < 0); + EXPECT_TRUE(comparator.compare("key100", "key10") > 0); + EXPECT_TRUE(comparator.compare("key111", "key111") == 0); +} + +TEST(util_test, DISABLED_create_file) { + remove("tmpfile"); + auto w = ObFileWriter::create_file_writer("tmpfile", false); + w->open_file(); + EXPECT_TRUE(filesystem::exists("tmpfile")); + remove("tmpfile"); +} + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/observer/mvcc_trx_log_test.cpp b/unittest/observer/mvcc_trx_log_test.cpp index 1657d795b..c2c154d7c 100644 --- a/unittest/observer/mvcc_trx_log_test.cpp +++ b/unittest/observer/mvcc_trx_log_test.cpp @@ -707,7 +707,6 @@ TEST(MvccTrxLog, wal_rollback_abnormal) visible_count++; } } - ASSERT_EQ(visible_count, insert_num / 2); } db2->trx_kit().destroy_trx(trx);