diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 415ccc080..bf048eefa 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -10,7 +10,7 @@ FOREACH (F ${ALL_SRC}) ADD_EXECUTABLE(${prjName} ${F}) TARGET_LINK_LIBRARIES(${prjName} common pthread dl benchmark::benchmark) if(NOT ${prjName} STREQUAL "memtracer_performance_test") - TARGET_LINK_LIBRARIES(${prjName} observer_static) + TARGET_LINK_LIBRARIES(${prjName} observer_static oblsm) endif() ENDFOREACH (F) diff --git a/benchmark/oblsm_performance_test.cpp b/benchmark/oblsm_performance_test.cpp new file mode 100644 index 000000000..c500b942b --- /dev/null +++ b/benchmark/oblsm_performance_test.cpp @@ -0,0 +1,146 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. /* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved.
miniob is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
         http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details. */

#include <benchmark/benchmark.h>
#include <thread>

#include "common/lang/stdexcept.h"
#include "common/lang/filesystem.h"
#include "common/log/log.h"
#include "common/math/integer_generator.h"
#include "oblsm/include/ob_lsm.h"

// TODO
// a simple benchmark to test oblsm concurrency put/get. more detail test can use `ob_lsm_bench` tool.
using namespace std;
using namespace common;
using namespace benchmark;
using namespace oceanbase;

class BenchmarkBase : public Fixture
{
public:
  BenchmarkBase() {}

  virtual ~BenchmarkBase() {}

  virtual string Name() const = 0; + + virtual void SetUp(const State &state) + { + if (0 != state.thread_index()) { + return; + } + filesystem::remove_all("oblsm_benchmark"); + filesystem::create_directory("oblsm_benchmark"); + + RC rc = ObLsm::open(ObLsmOptions(), "oblsm_benchmark", &oblsm_); + if (rc != RC::SUCCESS) { + throw runtime_error("failed to open oblsm"); + } + + LOG_INFO("test %s setup done. threads=%d, thread index=%d", + this->Name().c_str(), state.threads(), state.thread_index()); + } + + virtual void TearDown(const State &state) + { + if (0 != state.thread_index()) { + return; + } + delete oblsm_; + LOG_INFO("test %s teardown done. threads=%d, thread index=%d", + this->Name().c_str(), + state.threads(), + state.thread_index()); + } + + void FillUp(uint32_t min, uint32_t max) + { + for (uint32_t value = min; value < max; ++value) { + string key = to_string(value); + + [[maybe_unused]] RC rc = oblsm_->put(key, key); + ASSERT(rc == RC::SUCCESS, "failed to insert entry into btree. key=%" PRIu32, value); + } + } + + uint32_t GetRangeMax(const State &state) const + { + uint32_t max = static_cast(state.range(0) * 3); + if (max <= 0) { + max = (1 << 31); + } + return max; + } + + void Insert(uint32_t value) { oblsm_->put(to_string(value), to_string(value)); } + + void Scan(uint32_t begin, uint32_t end) + { + auto iter = oblsm_->new_iterator(ObLsmReadOptions()); + iter->seek(to_string(begin)); + while (iter->valid() && iter->key() != to_string(end)) { + iter->next(); + } + delete iter; + } + +protected: + oceanbase::ObLsm *oblsm_ = nullptr; +}; + +//////////////////////////////////////////////////////////////////////////////// + +struct DISABLED_MixtureBenchmark : public BenchmarkBase +{ + string Name() const override { return "mixture"; } +}; + +BENCHMARK_DEFINE_F(DISABLED_MixtureBenchmark, Mixture)(State &state) +{ + pair insert_range{GetRangeMax(state) + 1, GetRangeMax(state) * 2}; + pair scan_range{1, 100}; + pair data_range{0, GetRangeMax(state) * 2}; + + IntegerGenerator data_generator(data_range.first, data_range.second); + IntegerGenerator insert_generator(insert_range.first, insert_range.second); + IntegerGenerator scan_range_generator(scan_range.first, scan_range.second); + IntegerGenerator operation_generator(0, 10); + + for (auto _ : state) { + int64_t operation_type = operation_generator.next(); + if (operation_type <= 9) { + operation_type = 0; + } else { + operation_type = 1; + } + switch (operation_type) { + case 0: { // insert + uint32_t value = static_cast(insert_generator.next()); + Insert(value); + } break; + case 1: { // scan + uint32_t begin = static_cast(data_generator.next()); + uint32_t end = begin + static_cast(scan_range_generator.next()); + Scan(begin, end); + } break; + default: { + ASSERT(false, "should not happen. operation=%ld", operation_type); + } + } + } +} + +BENCHMARK_REGISTER_F(DISABLED_MixtureBenchmark, Mixture)->Threads(10)->Arg(1)->Arg(1000)->Arg(10000); + +//////////////////////////////////////////////////////////////////////////////// + +BENCHMARK_MAIN(); diff --git a/docs/docs/db_course_lab/images/miniob-lsmtree-overview.png b/docs/docs/db_course_lab/images/miniob-lsmtree-overview.png new file mode 100644 index 000000000..0747c61ac Binary files /dev/null and b/docs/docs/db_course_lab/images/miniob-lsmtree-overview.png differ diff --git a/docs/docs/db_course_lab/lab1.md b/docs/docs/db_course_lab/lab1.md new file mode 100644 index 000000000..c841e73e4 --- /dev/null +++ b/docs/docs/db_course_lab/lab1.md @@ -0,0 +1,207 @@ +--- +title: LAB#1 LSM-Tree 存储引擎 +--- + +> 请不要将代码提交到公开仓库(包括提交带有题解的 Pull Request),同时也请不要抄袭其他同学或网络上可能存在的代码。 + +# LAB#1 LSM-Tree 存储引擎 + +这是数据库系统实现原理与实践课程的第一个正式实验题目,实验内容是完成 LSM-Tree 存储引擎中的部分功能。 + +## LSM-Tree 简介 +LSM-Tree 将写操作(包括数据插入、修改、删除)采用追加写的方式写入内存中并进行排序(MemTable),当 MemTable 的大小达到一定阈值后再将数据顺序写入磁盘中(Sorted Strings Table, SSTable),这使得 LSM-Tree 具有优秀的写性能;但是读操作时需要查询 MemTable 和 SSTable 中数据。因此,为了提高读性能,LSM-Tree会定期对磁盘中的SSTable文件进行合并(Compaction),合并时会将相同数据进行合并,减少数据量。 + +![lsm-tree overview](images/miniob-lsmtree-overview.png) + +## OceanBase 中的 LSM-Tree 简介 +OceanBase 数据库的存储引擎也是基于 LSM-Tree 架构,将数据分为静态基线数据(放在 SSTable 中)和动态增量数据(放在 MemTable 中)两部分,其中 SSTable 是只读的,一旦生成就不再被修改,存储于磁盘;MemTable 支持读写,存储于内存。数据库 DML 操作插入、更新、删除等首先写入 MemTable,等到 MemTable 达到一定大小时转储到磁盘成为 SSTable。在进行查询时,需要分别对 SSTable 和 MemTable 进行查询,并将查询结果进行归并,返回给 SQL 层归并后的查询结果。同时在内存实现了 Block Cache 和 Row cache,来避免对基线数据的随机读。关于 OceanBase 数据库的更多细节可以参考:https://www.oceanbase.com/docs/oceanbase-database-cn + +## MiniOB 中的 LSM-Tree 简介 +ObLsm 是 MiniOB 中的一个为教学设计的 LSM-Tree 架构的 KV 存储引擎。可以认为 ObLsm 是一个独立的模块,MiniOB 集成了 ObLsm 作为其一个存储引擎,ObLsm 也可独立运行,独立使用。ObLsm 本身包含了 LSM-Tree 中的关键结构,可以帮助大家学习 LSM-Tree 架构。ObLsm 的代码位于 `src/oblsm/` 目录下,目前 LAB#1 实验仅需修改该目录下的代码即可。关于 ObLsm 的更多细节可参考[文档](../design/miniob-lsm-tree.md)。 + +## 实验 + +LAB#1 中包含三个相对独立的子任务: + +- 任务1: 实现SkipList 并支持 SkipList 无锁并发写入 +- 任务2: 实现 Block Cache 功能,加速 SSTable 的读取 +- 任务3: 实现 Leveled Compaction 功能,支持 SSTable 的合并 + +对于上述的每个实验,代码中均提供了包含必须实现的 API 的类及其接口。请不要修改这些类中预定义函数的定义/类名/文件名等。否则,测试脚本可能无法正常运行。你可以在这些类中添加成员变量和函数,以正确实现所需的功能。 + +### 任务1: 实现SkipList 并支持 SkipList 无锁并发写入 + +目前,ObLsm 中的 MemTable 基于 SkipList 实现,当前的 SkipList(代码位于`src/oblsm/memtable/ob_skiplist.h`)支持一写多读(并发读不需要额外的同步机制,并发写需要外部的同步机制保证线程安全)。SkipList 中的部分函数还没有实现,请在此基础上实现 SkipList 的写接口(`ObSkipList::insert()`)和无锁并发写接口(`ObSkipList::insert_concurrently()`)(**注意:除了这一接口外,可能还需要实现其他必要函数,以支持 SkipList 正常运行,请自行 debug 或查看相关代码文件。**)。测试程序位于 `unittest/oblsm/ob_skiplist_test.cpp` 中。要求使用 CAS 操作来实现 SkipList 的无锁并发插入。下面对必要的知识做简单介绍。 + +#### CAS(Compare-And-Swap) + +CAS(Compare-And-Swap)是一种广泛用于并发编程中的原子操作,主要用于实现无锁数据结构和算法。它允许线程安全地对共享数据进行更新,而无需使用锁,从而提高了系统的性能和可伸缩性。 + +CAS 操作的基本思想是:通过比较某个内存位置的当前值(预期值)和一个给定的值,如果两者相等,那么将内存位置的值更新为一个新值;否则不更新。这个操作是原子的,也就是说,它要么完全成功,要么完全失败,不会出现中间状态。 + +CAS 通常由硬件提供支持,现代处理器通常都提供相应的指令。 + +在 C++ 中,CAS 操作通常通过 `std::atomic` 类型和 `compare_exchange_weak()` 或 `compare_exchange_strong()` 函数来实现。`compare_exchange_strong()/compare_exchange_weak()` 是 `std::atomic` 类模板的成员函数,其基本语义是比较一个原子变量的当前值与预期值,如果相等,则将其更新为新值。如果不相等,则将原子变量的当前值赋值给预期值(使调用者知道失败原因)。这个操作是原子的,保证了线程安全。 + +**思考**:`compare_exchange_weak()` 和 `compare_exchange_strong()` 的区别是什么?在实现这一任务时,你应该使用哪一个还是任意一个都可以? + +cpplings(`./src/cpplings`) 中也提供了一个练习 CAS 的例子,可参考 `src/cpplings/cas.cpp`,本练习不作为实验的一部分,不计入成绩,仅供练习参考。 + +#### 跳表(SkipList) + +跳表 (SkipList) 是由 William Pugh 发明的一种查找数据结构,支持对数据的快速查找,插入和删除。 + +跳表的期望空间复杂度为 $O(n)$,跳表的查询,插入和删除操作的期望时间复杂度都为 $O(\log n)$。 + +顾名思义,SkipList 是一种类似于链表的数据结构。更加准确地说,SkipList 是对有序链表的改进。 + +一个有序链表的查找操作,就是从头部开始逐个比较,直到当前节点的值大于或者等于目标节点的值。很明显,这个操作的复杂度是 O(n)。 + +跳表在有序链表的基础上,引入了分层的概念。首先,跳表的每一层都是一个有序链表,特别地,最底层是初始的有序链表。每个位于第 i 层的节点有 p 的概率出现在第 i+1 层,p 为常数。 + +在跳表中查找,就是从第 L(n) 层开始,水平地逐个比较直至当前节点的下一个节点大于等于目标节点,然后移动至下一层。重复这个过程直至到达第一层且无法继续进行操作。此时,若下一个节点是目标节点,则成功查找;反之,则元素不存在。这样一来,查找的过程中会跳过一些没有必要的比较,所以相比于有序链表的查询,跳表的查询更快。可以证明,跳表查询的平均复杂度为 $O(\log n)$。 + +插入节点的过程就是先执行一遍查询的过程,中途记录新节点是要插入哪一些节点的后面,最后再执行插入。每一层最后一个键值小于 key 的节点,就是需要进行修改的节点。 + +你需要补充完善 `src/oblsm/memtable/ob_skiplist.h` 以下函数,以实现 SkipList 的基本功能。 +```c++ +/** + * @brief Insert key into the list. + * REQUIRES: nothing that compares equal to key is currently in the list + */ +void insert(const Key &key); +``` + +**提示**:除了这一接口外,可能还需要实现其他必要函数,以支持 SkipList 正常运行,请自行 debug 或查看相关代码文件。 + +#### SkipList 的无锁并发插入 + +SkipList 的无锁并发插入可以参考 [The Art of Multiprocessor Programming](https://www2.cs.sfu.ca/~ashriram/Courses/CS431/assets/distrib/AMP.pdf) 中的实现。相关细节位于书中的 14.4 节。 + +无锁插入的核心部分伪代码如下: +``` c++ +Node *node = new_node(key); +while(true) { + // find the location to insert the new node. + // `prev` is less than `key`, `succ` is greater than `key`. + find(key, prev, succ); + // no synchronization needed here, because `node` is + // not yet visible to other threads. + node->next[i] = succ[i] + + if (!prev[0].next[0].cas_set(succ[0], node)) { + // if failed, try again. + continue; + } + + for (int level = 1; level < N/* N is the top level*/; level++) { + while (true) { + if (prev[level].next[level].cas_set(succ[level], node)) { + // success to insert the node at level i + break; + } + // if failed, try again. + find(key, prev, succ); + } + } + return; +} +``` + +注意: +1. 在实现 `insert_concurrently()` 请不要使用任何锁。 +2. 需要考虑 `common::RandomGenerator` 的线程安全。可参考:https://stackoverflow.com/questions/77377046/is-a-stdmt19937-static-function-variable-thread-safe + + +#### 测试 + +可以通过运行 `unittest/oblsm/ob_skiplist_test.cpp` 来测试 skiplist 的功能。 + +MiniOB 中的单测框架使用 `GTest`,在默认参数编译后,单测二进制程序位于 `$BUILD_DIR/bin/` 目录下,程序名与单测文件名对应。例如,`ob_skiplist_test.cpp` 对应的单测程序为 `$BUILD_DIR/bin/ob_skiplist_test`,通过运行该程序即可测试你的实现是否正确。 + +测试用例中会随机生成一些键值对,并插入到 SkipList 中。然后对 SkipList 进行查找操作,检查查找结果是否正确。 + +**注意**:你需要保证你的 SkipList 实现是线程安全的。 + +**思考**:在插入新节点的过程中,通过 CAS 操作来逐层添加节点,那么如果 CAS 失败后,是否需要回滚掉之前成功插入的节点?为什么? + +**思考**:多个 CAS 操作并不是原子的,也就是在插入过程中,多个读线程可能看到不一致的新节点,会导致什么问题? + +### 任务2:实现 Block Cache(块缓存) 功能,加速 SSTable 的读取 + +Block Cache(块缓存)是 LSM-Tree 在内存中缓存数据以供读取的地方。Block Cache 的作用是优化热点数据访问磁盘时的I/O性能。ObLsm 中使用 LRU Cache 来实现块缓存。 + +LRU Cache(Least Recently Used)是一种常见的缓存淘汰算法。用于在有限的缓存空间中管理数据对象。LRU Cache 的核心思想是基于时间局部性原理,即最近被访问的数据在未来可能会被再次访问。 + +Cache的容量有限,因此当Cache的容量用完后,而又有新的内容需要添加进来时,就需要挑选并舍弃原有的部分内容,从而腾出空间来放新内容。LRU Cache 的替换原则就是将最近最少使用的内容替换掉。 + +#### Block Cache 实现内容 +你需要实现 `src/oblsm/util/ob_lru_cache.h` 中的 `ObLruCache` 类,实现 LRU 缓存的功能。 +你需要在 SSTable 上实现 `read_block_with_cache()` 函数。 + +Block 通过 `(sst_id, block_id)` 作为 Key 进行缓存。如果命中了缓存,则从缓存中获取 Block;如果未命中缓存,则填充 Block 到缓存中。 + + +**提示**:在实现 Block Cache 时,需要保证其线程安全。 +**提示**:除了本文中提到的需要修改的位置,你还可能需要完成其他必要的修改以支持 Block Cache 正常运行,请自行 debug 或查看相关代码文件。。 + +**思考**:在 RocksDB 中,块缓存通过 `strict_capacity_limit` 配置项来控制块缓存大小是否严格限制在块缓存容量内。在你的实现中,块缓存大小是否有可能会超过块缓存容量? + +**思考**:LRU Cache 需要保证并发安全,你是通过什么方式保证这一点的?LevelDB/RocksDB 中都使用了分片的方式来减少锁冲突优化 LRU Cache 的并发性能,是否可以在你的实现中也使用分片来减少锁冲突? + +### 测试 + +可以通过运行 `unittest/oblsm/ob_lru_cache_test.cpp` 来测试 LRU Cache 的功能。 + +此外,还需要保证可以通过 `unittest/oblsm/ob_lsm_test.cpp` 和 `benchmark/oblsm_performance_test.cpp` 保证在增加 LRU Cache 后,不影响 LSM-Tree 的功能。 + +Q:如何运行 `benchmark/oblsm_performance_test.cpp`? +A:通过如下编译命令编译时,对应二进制文件位于`$BUILD_DIR/bin/`目录下,文件名为`oblsm_performance_test`。 + +```bash +bash build.sh release -DCONCURRENCY=ON -DWITH_BENCHMARK=ON +``` + + +### 任务3:实现 Leveled Compaction 功能,支持 SSTable 的合并 + +#### Compaction 简介 +##### Leveled Compaction +在 Leveled Compaction 中,LSM-Tree 划分为 N 个 Level,每个 Level 仅包含一个 Sorted Run(相同层级的 SSTable 之间 Key 范围不存在交集);相邻 Level 的 SSTable 大小有一个倍数关系。 + +Compaction 的触发是由于某个 Level 的数据量超过了阈值。在 Compaction 时会选择 L(n-1) 的数据,与原有 L(n) 的数据 Rowkey 有交集的部分进行合并,得到新的 L(n) 数据。 + +##### Tiered Compaction +在 Tiered Compaction 中,LSM-Tree 也被划分为 N 个 Level,每个 Level 可以包含多个 SSTable。相同 Level 的 SSTable 的 key range 可能存在交集。在查询时需要访问这个 Level 所有的 SSTable,使得读放大比较严重,查询性能不佳。 + +Compaction 的触发条件是某个 Level 的 SSTable 数量超过了阈值,会将 L(n) 的若干 SSTable,合出一个新的 SSTable 放入 L(n+1),并不与原有 L(n+1) 的数据进行合并。相比于 Leveled 而言执行速度会更快,写放大会更优,但由于查询的 SSTable 数量变多,读放大会更差。 + +#### Compaction 实现内容 + +你需要实现 Leveled Compaction 功能,在 `src/oblsm/compaction/compaction_picker.h` 中实现 `LeveledCompactionPicker`。实现 `ObLsmImpl::try_major_compaction` 中的`TODO: apply the compaction results to sstables`,实现 `ObLsmImpl::do_compaction()` 函数。 + +**提示**:除了本文中提到的需要修改的位置,你还可能需要完成其他必要的修改以支持 Leveled Compaction 正常运行,请自行 debug 或查看相关代码文件。 + +ObLsm 中的 Leveled Compaction 需要满足下面规则: + +1. 磁盘上的文件按多个层级(Level)进行组织。我们称它们为1级、2级等,或简称为L1、L2等,层级数由`ObLsmOptions::default_levels` 指定。特殊的 level-0(或简称 L0)包含刚刚从内存写入缓冲区(memtable)刷新的文件。 +2. 每个级别( L0 除外)都仅包含一个 Sorted Run(相同层级的 SSTable 之间 Key 范围不存在交集)。 +3. 每个层级(L1 及以上)之间的数据大小存在倍数关系:`L_{i+1} = L_{i} * k`,其中 k 由`ObLsmOptions::default_level_ratio` 指定,L1 层级的数据大小由`ObLsmOptions::default_l1_level_size` 指定。 +4. L0 层级受限于文件数,当超过指定文件数时触发合并。`ObLsmOptions::default_l0_file_num` 指定 L0 层级的文件数上限。 +5. 每次合并时,L0 层级会全部参与合并,并从 L1 层级中挑选出存在数据交集的所有 SSTable 也参与合并。对于 L1 及以上层级的合并,从 L_i 层挑选出同层中最后参与合并的文件进行合并,并从 L_{i+1} Level 中挑选出存在数据交集的所有 SSTable 也参与合并。 +6. 当多个 Level 触发压缩条件时,需要选择先压缩哪个级别。每个 Level 都会生成一个分数:对于非零级别,分数是级别的总大小除以目标大小。对于 level-0,分数是文件总数除以 default_l0_file_num。 +测试: + +**思考**:在当前的实现下,ObLsm 还有哪些优化空间?请列出一些优化方向。 + +**提示**: 在做 LAB#1 实验时,可暂时不考虑 `ObLsmImpl::seq_`(用于实现 MVCC)。 + +#### 测试 + +可以通过运行 `unittest/oblsm/ob_compaction_test.cpp` 来测试 Leveled Compaction 功能。此外,还需要保证可以通过 `unittest/oblsm/ob_lsm_test.cpp` 和 `benchmark/oblsm_performance_test.cpp` 保证在增加 Compaction 后,不影响 LSM-Tree 的功能。 + +## 参考资料 +这里提供了一些学习资料供大家参考学习。 +[The Art of Multiprocessor Programming](https://www2.cs.sfu.ca/~ashriram/Courses/CS431/assets/distrib/AMP.pdf) +[name that compaction algorithm](https://smalldatum.blogspot.com/2018/08/name-that-compaction-algorithm.html) +[MiniOB LSM-Tree 设计文档](../design/miniob-lsm-tree.md) diff --git a/docs/docs/db_course_lab/overview.md b/docs/docs/db_course_lab/overview.md index 8ea22cfa9..3f3598ac0 100644 --- a/docs/docs/db_course_lab/overview.md +++ b/docs/docs/db_course_lab/overview.md @@ -16,7 +16,7 @@ title: 数据库系统实现原理与实践课程实验 这是本课程的实验题目,实验题目是修改 MiniOB 的各个组件并完成指定的功能,实验题目会持续更新。 - [LAB#0 C++ 基础入门](./lab0.md) -- LAB#1 LSM-Tree 存储引擎 +- [LAB#1 LSM-Tree 存储引擎](./lab1.md) - LAB#2 查询引擎 - LAB#3 事务引擎 - LAB#4 性能测试 diff --git a/docs/docs/design/miniob-lsm-tree.md b/docs/docs/design/miniob-lsm-tree.md new file mode 100644 index 000000000..b4134a3da --- /dev/null +++ b/docs/docs/design/miniob-lsm-tree.md @@ -0,0 +1,151 @@ +--- +title: MiniOB LSM-Tree 存储引擎 +--- + +# MiniOB LSM-Tree 存储引擎 + +## LSM-Tree 背景介绍 +LSM-Tree 是一种数据结构,可用于存储键值对。LSM-Tree 采用了多层的结构,存储部分可以分为内存和磁盘两个部分。内存中的部分称为 MemTable,磁盘中的部分称为 SSTable(Sorted String Table)。LSM 树通过 Append-Only 的方式提供高效的数据写入,为了优化读取性能,LSM-Tree 通过 Compaction 操作定期重新组织数据。 + +OceanBase 数据库的存储引擎也是基于 LSM-Tree 架构,将数据分为静态基线数据(放在 SSTable 中)和动态增量数据(放在 MemTable 中)两部分,其中 SSTable 是只读的,一旦生成就不再被修改,存储于磁盘;MemTable 支持读写,存储于内存。数据库 DML 操作插入、更新、删除等首先写入 MemTable,等到 MemTable 达到一定大小时转储到磁盘成为 SSTable。在进行查询时,需要分别对 SSTable 和 MemTable 进行查询,并将查询结果进行归并,返回给 SQL 层归并后的查询结果。同时在内存实现了 Block Cache 和 Row cache,来避免对基线数据的随机读。关于 OceanBase 数据库的更多细节可以参考:https://www.oceanbase.com/docs/oceanbase-database-cn + +ObLsm 是一个为教学设计的 LSM-Tree 结构的 Key-Value 存储引擎。ObLsm 本身是一个独立于 MiniOB 的模块,可以独立编译使用。ObLsm 包含了 LSM-Tree 中的关键结构,可以帮助大家学习 LSM-Tree 架构。 +MiniOB 中也基于 ObLsm 实现了一个基于 LSM-Tree 的表引擎,可以将表数据以 Key-Value 的格式存储到磁盘。 + +## ObLsm 存储引擎 + +下面会对 ObLsm 的各个模块作简单介绍,便于大家对 ObLsm 有一个初步的了解,更多细节可以参考源代码 `src/oblsm`。 + +### MemTable +MemTable 是一种内存数据结构,用作处理即将到来的操作(insert/delete/update)的缓冲区(buffer)。很多数据结构都可以用于 MemTable 的实现,现有的 LSM-Tree 实现(如 LevelDB/RocksDB)中多采用 SkipList,ObLsm 目前也使用 SkipList 作为 MemTable 的底层数据结构。ObLsm 将 insert/update/delete 都视作一条记录来插入到 MemTable 中。 + +* insert:将一条记录插入到 MemTable 中。 +* update:将一条时间戳更大的记录插入到 MemTable 中。 +* delete:将一条 value 为空的记录插入到 MemTable 中。 + +MemTable 将插入的 Key-Value 编码为如下的记录存储。 +``` + ┌───────────┬──────────────┬───────┬──────────────┬──────────────────┐ + │ │ │ │ │ │ + │key_size(8)│ key(key_size)│ seq(8)│ value_size(8)│ value(value_size)│ + │ │ │ │ │ │ + └───────────┴──────────────┴───────┴──────────────┴──────────────────┘ + +``` + +其中,key_size 和 value_size 分别表示 key+seq 和 value 的长度,seq 表示记录的时间戳。括号中表示占用字节数。 + +MemTable 的实现位于:`src/oblsm/memtable/`,在代码中,我们将上图中的`key` 称为 user_key,将 `key + seq` 称为 internal_key,将`key_size + key + seq` 称为 lookup_key。 + +#### SkipList +SkipList(跳表)是用于有序元素序列快速搜索的一个数据结构,SkipList 是一个随机化的数据结构,实质就是一种可以进行二分查找的有序链表。SkipList 在原有的有序链表上面增加了多级索引,通过索引来实现快速查找。跳表不仅能提高搜索性能,同时也可以提高插入和删除操作的性能。它在性能上和红黑树,AVL树不相上下,但是跳表的原理非常简单,实现也比红黑树简单很多。 + +SkipList 的实现位于:`src/oblsm/memtable/ob_skiplist.h` + +#### MemTableIterator +MemTableIterator 提供了一种遍历 MemTable 的机制。它可以按序访问 MemTable 中的所有键值对。 + +MemTableIterator 的实现位于:`src/oblsm/memtable/ob_memtable.h::ObMemTableIterator` + +#### MemTable 转储 +MemTable 转储是将内存中的 MemTable 持久化到磁盘上的过程。当 MemTable 达到一定大小时,会被转储为不可变的 SSTable。转储过程通常包括排序数据、生成 SSTable 文件并将其写入磁盘。 + +转储相关代码位于:`src/oblsm/oblsm_impl.h::ObLsmImpl::try_freeze_memtable` + +### SSTable +MemTable 的大小达到限制条件,MemTable 的数据以按顺序被转储到磁盘中,被转储到磁盘中的结构称为(SSTable:Sorted Strings table)。 + +SSTable 是一种有序的键值对存储结构,它通常包含一个或多个块(block),每个块中包含一组有序的键值对。 + +SSTable 的存储格式示例如下: +``` + ┌─────────────────┐ + │ block 1 │◄───┐ + ├─────────────────┤ │ + │ block 2 │ │ + ├─────────────────┤ │ + │ .. │ │ + ├─────────────────┤ │ + │ block n │◄─┐ │ + ├─────────────────┤ │ │ + ┌───►│ meta size(n) │ │ │ + │ ├─────────────────┤ │ │ + │ │ block meta 1 ├──┼─┘ + │ ├─────────────────┤ │ + │ │ .. │ │ + │ ├─────────────────┤ │ + │ │ block meta n ├──┘ + │ ├─────────────────┤ + └────┤ │ + └─────────────────┘ +``` + +其中,block 表示由若干键值对组成的数据块。block meta 用于存储 block 的元信息,包括 block 的大小、block 的位置信息,block 中的键值对数量等。 + +SSTable 的实现位于:`src/oblsm/table/` + +#### Block +为了提高整体的读写效率,一个sstable文件按照固定大小划分为 Block。每个Block中,目前只存储了键值对数据。 +Block 的存储格式如下: +``` + ┌─────────────────┐ + │ entry 1 │◄───┐ + ├─────────────────┤ │ + │ entry 2 │ │ + ├─────────────────┤ │ + │ .. │ │ + ├─────────────────┤ │ + │ entry n │◄─┐ │ + ├─────────────────┤ │ │ + ┌───►│ offset size(n) │ │ │ + │ ├─────────────────┤ │ │ + │ │ offset 1 ├──┼─┘ + │ ├─────────────────┤ │ + │ │ .. │ │ + │ ├─────────────────┤ │ + │ │ offset n ├──┘ + │ ├─────────────────┤ + └────┤ offset start │ + └─────────────────┘ +``` + +Block 的主要实现位于 `src/oblsm/table/ob_block.h` + +#### SSTableBuilder +用于构造一个 `SSTable`,主要实现位于 `src/oblsm/table/ob_sstable_builder.h` +#### BlockBuilder +用于构造一个 `Block`,主要实现位于 `src/oblsm/table/ob_block_builder.h` + +### Compaction +Compaction 是 LSM-Tree 的关键组件,Compaction 会将多个 SSTable 合并为一个或多个新的 SSTable。Compaction 的实现主要位于 `src/oblsm/compaction/` + +## 基于 ObLsm 的表引擎 +MiniOB 基于 ObLsm 模块实现了一个 LSM-Tree 表引擎,用于以 Key-Value 格式存储表数据。表引擎的实现位于:`src/observer/storage/table/lsm_table_engine.h`。 + +LSM-Tree 表引擎使用方法: +在创建表时指定 engine=lsm,即可使用 LSM-Tree 表引擎。 +当不指定engine 或指定 engine=heap,将使用堆表作为表数据的存储方式。 + +```sql +create table t1 (id int primary key, name varchar(20))engine=lsm; +``` + +在 MiniOB 中,使用关系型模型来描述表结构,而 LSM-Tree 表引擎将表数据以 Key-Value 的形式存储到磁盘,因此需要提供一种机制来将关系型模型转换为 Key-Value 模型。 +目前 MiniOB 中以自增列作为 Key,将行数据以 `Table::make_record` 编码为 Value。通过 [orderedcode](https://github.com/google/orderedcode) 来对 Key 列做编码,使得在编码后 Key 的字典序上比较与 Key 对应的原始序列(目前可以认为只有自增列一列,后续支持主键后,Key 会对应表中的多列)上进行比较具有相同的顺序。 + +此外,为了在同一个 LSM-Tree 引擎中存储多张表的数据,MiniOB 为每一张表分配一个 TableID,并在 Key 中加入 TableID 作为前缀。 + +因此,每行数据按照如下规则编码成 (Key, Value) 键值对: + +``` +Key: t{TableID}r{AutoIncID} +Value: [col1, col2, col3, col4] +``` + +## 参考资料 + +1. [OceanBase](https://www.oceanbase.com/docs/oceanbase-database-cn) +2. [LSM-Tree](https://www.cs.umb.edu/~poneil/lsmtree.pdf) +3. [LevelDB](https://github.com/google/leveldb) +4. [RocksDB](https://github.com/facebook/rocksdb/wiki) +5. [Mini-LSM](https://skyzh.github.io/mini-lsm/) \ No newline at end of file diff --git a/docs/docs/dev-env/how_to_submit_for_testing.md b/docs/docs/dev-env/how_to_submit_for_testing.md new file mode 100644 index 000000000..198806bf5 --- /dev/null +++ b/docs/docs/dev-env/how_to_submit_for_testing.md @@ -0,0 +1,66 @@ +--- +title: 训练营平台使用说明 +--- + +# 训练营平台使用说明 + +[训练营平台](https://open.oceanbase.com/train) 是 OceanBase 为学生和数据库开发者设计的数据库学习与练习平台,旨在帮助用户更高效地掌握数据库基础知识。通过该平台,开发者可以从零开始逐步理解数据库的底层原理和实现方式,参与数据库实践与开发练习。训练营平台支持为 MiniOB/OceanBase 代码提交测试,通过运行自动化测试用例,反馈用户提交代码的运行结果和问题。 + +## 如何使用训练营提交代码测试 + +用户可根据下面的流程进行提测。 + +### 1. 进入训练营平台并登陆训练营平台 + +打开训练营网站:https://open.oceanbase.com/train + +你会看到训练营主页。 + +![](images/train_mainpage.png) + +点击训练营主页上方的登陆/注册按钮登陆训练营平台。 + +### 2. 报名具体的训练营课题进行提测 + +登陆后再点击 `广场` 中的训练营课题的 `报名参加` 按钮报名具体的训练营课题。目前公开的训练营课题包含 `MiniOB` 对应 2021 年大赛题目;`MiniOB 2022` 对应 2022 年大赛题目;`MiniOB 2023` 对应 2023 年大赛题目;`MiniOB 2024` 对应 2024 年大赛题目。后续会增加更多的训练营课题。 + +![](images/train_miniob_2024.png) + +对于不需要报名的训练营课题,在点击 `报名参加` 后,会自动跳转到训练营课题的题目页面。 +对于需要报名的训练营课题,需要在点击 `报名参加` 后,填写自己的报名信息,再进入训练营课题的题目页面。(**注意**:如果找不到自己已报名的训练营课题,可以点击 `我加入的` 页面进行查看。) + +### 3. 查看训练营课题的具体题目 + +进入到训练营课题你会看到如下页面。用户可以在题目列表查看该训练营课题对应的所有题目。如果要查看此题目排行榜,可点击「排行榜」按钮,目前只显示前 10 名及个人成绩,根据总分降序排列。 +![](images/train_miniob_2024_question.png) + +通过点击题目可以查看题目的详细描述,请参考题目的详细描述进行代码编写,和自测。 +![](images/train_miniob_2024_question_detail1.png) +![](images/train_miniob_2024_question_detail2.png) + +### 4. 提交代码测试 +在完成本地代码编写和自测后,请将代码提交到 Github/Gitee 仓库中。 +代码提交方法请参考[Github 代码提交](https://oceanbase.github.io/miniob/game/github-introduction/),[Gitee 代码提交](https://oceanbase.github.io/miniob/game/gitee-instructions/)。 + +点击「立即提测」按钮后,会弹出代码提交窗口。请填写仓库地址,commit id 和 branch 等必要信息并点击提交按钮。 + +![](images/train_miniob_2024_submit_code.png) + +### 5. 查看提测结果 +提交代码后,训练营平台会运行自动化测试用例,并反馈用户提交代码的运行结果和问题(如果等待时间过长(miniob 相关课题等待超过 1 小时,oceanbase 相关题目超过 6 小时)可联系相关工作人员)。 + +![](images/train_contact_us.png) + +可以点击 「查看提测记录」按钮查看提测结果。对于失败的测试用例,请点击下图中的按钮,即可查看失败的详细原因。 + +![](images/train_miniob_result.png) + +如果在使用训练营平台的过程中有问题可以在 https://ask.oceanbase.com/ 提问,问题分类请选择 "训练营"。 + + + + + + + + diff --git a/docs/docs/dev-env/images/train_contact_us.png b/docs/docs/dev-env/images/train_contact_us.png new file mode 100644 index 000000000..98611553a Binary files /dev/null and b/docs/docs/dev-env/images/train_contact_us.png differ diff --git a/docs/docs/dev-env/images/train_mainpage.png b/docs/docs/dev-env/images/train_mainpage.png new file mode 100644 index 000000000..86834454b Binary files /dev/null and b/docs/docs/dev-env/images/train_mainpage.png differ diff --git a/docs/docs/dev-env/images/train_miniob_2024.png b/docs/docs/dev-env/images/train_miniob_2024.png new file mode 100644 index 000000000..6cc11790b Binary files /dev/null and b/docs/docs/dev-env/images/train_miniob_2024.png differ diff --git a/docs/docs/dev-env/images/train_miniob_2024_question.png b/docs/docs/dev-env/images/train_miniob_2024_question.png new file mode 100644 index 000000000..ad3d145c6 Binary files /dev/null and b/docs/docs/dev-env/images/train_miniob_2024_question.png differ diff --git a/docs/docs/dev-env/images/train_miniob_2024_question_detail1.png b/docs/docs/dev-env/images/train_miniob_2024_question_detail1.png new file mode 100644 index 000000000..a98233919 Binary files /dev/null and b/docs/docs/dev-env/images/train_miniob_2024_question_detail1.png differ diff --git a/docs/docs/dev-env/images/train_miniob_2024_question_detail2.png b/docs/docs/dev-env/images/train_miniob_2024_question_detail2.png new file mode 100644 index 000000000..2a6534b34 Binary files /dev/null and b/docs/docs/dev-env/images/train_miniob_2024_question_detail2.png differ diff --git a/docs/docs/dev-env/images/train_miniob_2024_submit_code.png b/docs/docs/dev-env/images/train_miniob_2024_submit_code.png new file mode 100644 index 000000000..447704688 Binary files /dev/null and b/docs/docs/dev-env/images/train_miniob_2024_submit_code.png differ diff --git a/docs/docs/dev-env/images/train_miniob_result.png b/docs/docs/dev-env/images/train_miniob_result.png new file mode 100644 index 000000000..dc594dd1c Binary files /dev/null and b/docs/docs/dev-env/images/train_miniob_result.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_clangd.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_clangd.png new file mode 100644 index 000000000..62400aaab Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_clangd.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab1.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab1.png new file mode 100644 index 000000000..e62a1f8fa Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab1.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab2.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab2.png new file mode 100644 index 000000000..76357ec21 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab2.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab3.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab3.png new file mode 100644 index 000000000..47b0c0cc2 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab3.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab4.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab4.png new file mode 100644 index 000000000..03f2704e2 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_commit_lab4.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_config_docker.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_config_docker.png new file mode 100644 index 000000000..baf3c7a48 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_config_docker.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_cppdbg.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_cppdbg.png new file mode 100644 index 000000000..fa0ba604a Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_cppdbg.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_open_miniob2.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_open_miniob2.png new file mode 100644 index 000000000..904bc7d06 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_open_miniob2.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_open_miniob_as_workspace.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_open_miniob_as_workspace.png new file mode 100644 index 000000000..147ffd951 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_open_miniob_as_workspace.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_run_docker.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_run_docker.png new file mode 100644 index 000000000..01763960b Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_run_docker.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-config-clangd.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-config-clangd.png new file mode 100644 index 000000000..640e0dc34 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-config-clangd.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-debug.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-debug.png new file mode 100644 index 000000000..e47630ed7 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-debug.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-install-clangd.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-install-clangd.png new file mode 100644 index 000000000..e381b275f Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-install-clangd.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-install-cppdbg.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-install-cppdbg.png new file mode 100644 index 000000000..eaeb0cbd3 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-install-cppdbg.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-launch-config.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-launch-config.png new file mode 100644 index 000000000..e0fde6a3a Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup-launch-config.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_check_status_ssh.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_check_status_ssh.png new file mode 100644 index 000000000..168faec13 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_check_status_ssh.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_download_ubuntu.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_download_ubuntu.png new file mode 100644 index 000000000..ec54265b5 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_download_ubuntu.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_sshd.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_sshd.png new file mode 100644 index 000000000..0d70077ab Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_sshd.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu1.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu1.png new file mode 100644 index 000000000..d5e61673c Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu1.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu2.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu2.png new file mode 100644 index 000000000..e4d91961c Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu2.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu3.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu3.png new file mode 100644 index 000000000..6dee02c82 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu3.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu4.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu4.png new file mode 100644 index 000000000..8bf732ae7 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu4.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu5.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu5.png new file mode 100644 index 000000000..6d235e81e Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu5.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu6.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu6.png new file mode 100644 index 000000000..c41fb584e Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu6.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu7.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu7.png new file mode 100644 index 000000000..cc65a9212 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu7.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu8.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu8.png new file mode 100644 index 000000000..5af470184 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu8.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu9.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu9.png new file mode 100644 index 000000000..181c91799 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu9.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu_net.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu_net.png new file mode 100644 index 000000000..5c777b80b Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_init_ubuntu_net.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_ssh_connection.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_ssh_connection.png new file mode 100644 index 000000000..2ea5d5ad5 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_ssh_connection.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_install_ssh.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_install_ssh.png new file mode 100644 index 000000000..878ae7499 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_install_ssh.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_file.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_file.png new file mode 100644 index 000000000..0ec18bab5 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_file.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote.png new file mode 100644 index 000000000..1dcc53275 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote2.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote2.png new file mode 100644 index 000000000..ecb9c4272 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote2.png differ diff --git a/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_workspcae.png b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_workspcae.png new file mode 100644 index 000000000..1746af6b4 Binary files /dev/null and b/docs/docs/dev-env/images/vscode_dev_with_local_virtual_env_setup_vscode_workspcae.png differ diff --git a/docs/docs/dev-env/vscode_dev_with_local_virtual_env.md b/docs/docs/dev-env/vscode_dev_with_local_virtual_env.md new file mode 100644 index 000000000..fbba2b1da --- /dev/null +++ b/docs/docs/dev-env/vscode_dev_with_local_virtual_env.md @@ -0,0 +1,128 @@ +--- +title: 虚拟机+`vscode remote`开发 +--- +# 虚拟机+`vscode remote`开发 +作者:徐平 数据科学与工程学院 华东师范大学 + +#### 1. 安装Ubuntu +Ubuntu下载地址:[下载](https://cn.ubuntu.com/download/desktop) + +点击下载 + +![](images/vscode_dev_with_local_virtual_env_setup_download_ubuntu.png) + +选择典型的类型配置,点击下一步 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu1.png) + +找到刚刚从网站下载的iso文件,点击下一步 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu2.png) + +设置名称和密码 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu4.png) + +设置虚拟机名称和虚拟机数据存放位置 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu5.png) + +设置磁盘大小,推荐40~80G,点击下一步 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu6.png) + +点击完成即可 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu7.png) + +虚拟机开机之后,不断点击`Next`即可, 注意这里选择`Install Ubuntu`,后续操作也是不断点击`Next`。 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu8.png) + +输入账号名和密码 + +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu9.png) +后续就点击`Next`,最后安装`Ubuntu`,等待安装`Ubuntu`完毕,安装完毕之后点击`Restart Now`即可。 + +#### 2. 配置环境 +登录,进入终端,输入以下命令: +1. 安装网络工具 +``` +sudo apt update && sudo apt -y upgrade +sudo apt install -y net-tools openssh-server +``` +2. 输入命令`ssh-keygen -t rsa`,然后一路回车,生成密钥。 +![](images/vscode_dev_with_local_virtual_env_setup_init_sshd.png) +3. 然后检查`ssh`服务器的状态,输入命令:`sudo systemctl status ssh`或`sudo systemctl status sshd` +![](images/vscode_dev_with_local_virtual_env_setup_check_status_ssh.png) +**注意这里可能出现的错误**: +* 上图中绿色的`active`状态是红色的,表示`sshd`没有启动,使用命令`sudo systemctl restart ssh`或者`sudo systemctl restart sshd`。 +* `systemctl`找不到`sshd`/`ssh`服务,这里可以尝试输入下面两个命令:`ssh-keygen -A`和`/etc/init.d/ssh start`,然后再去查看服务器状态。 + + +4. 安装完毕之后,输入`ifconfig`查看虚拟机`ip` +![](images/vscode_dev_with_local_virtual_env_setup_init_ubuntu_net.png) +5. 然后就可以在本地终端使用`ssh`命令连接虚拟机服务器。 + `ssh <用户名>@<上图操作中的ip地址>` +![](images/vscode_dev_with_local_virtual_env_setup_ssh_connection.png) +6. 安装`vscode`:[vscode下载地址](https://code.visualstudio.com/download) +7. 安装`ssh remote`插件 +![](images/vscode_dev_with_local_virtual_env_setup_vscode_install_ssh.png) +8. 配置插件,添加刚刚的虚拟机 +![](images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote.png) +9. 输入连接虚拟机的命令,如下图示例 +![](images/vscode_dev_with_local_virtual_env_setup_vscode_new_remote2.png) +10. 打开一个新的远程文件夹: + ![](images/vscode_dev_with_local_virtual_env_setup_vscode_new_file.png) +11. 选择一个文件夹作为开发文件夹,这里我选择`/home/pingxu/Public/` +![](images/vscode_dev_with_local_virtual_env_setup_vscode_workspcae.png) +进入新的文件夹之后,输入完密码,会问是否信任当前目录什么的,选择`yes`就行了,自此,现在虚拟机安装完毕,工作目录是`/home/pingxu/Public/`。 +#### 3. 安装必要软件 +`vscode`中`crtl`+`~`打开终端,直接把下面命令拷贝过去 +``` +sudo apt-get update && sudo apt-get install -y locales apt-utils && rm -rf /var/lib/apt/lists/* \ + && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 +sudo apt-get update \ + && sudo apt-get install -y build-essential gdb cmake git wget flex texinfo libreadline-dev diffutils bison \ + && sudo apt-get install -y clang-format vim +sudo apt-get -y install clangd lldb +``` +#### 4. 安装miniob +``` +# 从github克隆项目会遇到网络问题,配置网络代理命令 +git config --global http.proxy http://<代理ip>:<代理端口> +git config --global https.proxy https://<代理ip>:<代理端口> +``` +在`Public`目录下: +``` +git clone https://github.com/oceanbase/miniob +cd miniob +THIRD_PARTY_INSTALL_PREFIX=/usr/local bash build.sh init +``` +完毕之后,我们用`vscode`打开`miniob`,作为新的工作目录。 + +![](images/vscode_dev_with_local_virtual_env_open_miniob_as_workspace.png) + +![](images/vscode_dev_with_local_virtual_env_open_miniob2.png) + +#### 5. vscode插件配置 +1. 首先安装插件`clangd`和`C/C++ Debug`。 +安装`clangd`, + +![](images/vscode_dev_with_local_virtual_env_setup-install-clangd.png) + +同样的方式安装`C/C++ Debug`。 + +![](images/vscode_dev_with_local_virtual_env_setup-install-cppdbg.png) + +1. 修改好代码之后,`Ctrl+Shift+B`构建项目,构建完毕后有一个`build_debug`的文件夹,存放编译后的可执行文件。 +2. 使用`clangd`作为语言服务器, 构建完毕后,将`build_debug`中的`compile_commands.json`文件复制到`miniob`目录中,随便打开一个cpp文件,就可以看到`clangd`开始工作。 + +![](images/vscode_dev_with_local_virtual_env_setup-config-clangd.png) + +#### 6. debug简单教程 + 用`F5`进行调试,关于如何`vscode`如何调试,可以参考相关的资料:[cpp-debug](https://code.visualstudio.com/docs/cpp/cpp-debug)。修改`launch.json`文件中`program`和`args`来调试不同的可执行文件。 + +![](images/vscode_dev_with_local_virtual_env_setup-launch-config.png) +![](images/vscode_dev_with_local_virtual_env_setup-debug.png) + diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 69984b6cd..4c1835b4f 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -14,6 +14,7 @@ nav: - 运行: how_to_run.md - 开发环境: - dev-env/introduction.md + - dev-env/vscode_dev_with_local_virtual_env.md - dev-env/dev_by_online.md - dev-env/how-to-dev-using-docker.md - dev-env/how_to_dev_in_docker_container_by_vscode.md @@ -21,6 +22,7 @@ nav: - dev-env/how_to_dev_miniob_by_docker_on_windows.md - dev-env/how_to_dev_miniob_by_vscode.md - dev-env/miniob-how-to-debug.md + - dev-env/how_to_submit_for_testing.md - 设计文档: - design/miniob-architecture.md - design/miniob-buffer-pool.md @@ -36,6 +38,7 @@ nav: - design/miniob-mysql-protocol.md - design/miniob-pax-storage.md - design/miniob-aggregation-and-group-by.md + - design/miniob-lsm-tree.md - Doxy 代码文档: design/doxy/html/index.html - OceanBase 数据库大赛: - game/introduction.md @@ -51,6 +54,7 @@ nav: - db_course_lab/overview.md - db_course_lab/cloudlab_setup.md - db_course_lab/lab0.md + - db_course_lab/lab1.md - 数据库实现简明教程: - lectures/copyright.md - lectures/lecture-1.md diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 995add319..da0613da6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,7 @@ ADD_SUBDIRECTORY(common) ADD_SUBDIRECTORY(observer) ADD_SUBDIRECTORY(obclient) +ADD_SUBDIRECTORY(oblsm) if (WITH_MEMTRACER) ADD_SUBDIRECTORY(memtracer) @@ -9,4 +10,3 @@ endif() if (WITH_CPPLINGS) ADD_SUBDIRECTORY(cpplings) endif() -ADD_SUBDIRECTORY(oblsm) \ No newline at end of file diff --git a/src/common/lang/bitset.h b/src/common/lang/bitset.h new file mode 100644 index 000000000..3b1a37a90 --- /dev/null +++ b/src/common/lang/bitset.h @@ -0,0 +1,15 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include + +using std::bitset; \ No newline at end of file diff --git a/src/common/lang/condition_variable.h b/src/common/lang/condition_variable.h new file mode 100644 index 000000000..683ccab37 --- /dev/null +++ b/src/common/lang/condition_variable.h @@ -0,0 +1,15 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include + +using std::condition_variable; diff --git a/src/common/lang/memory.h b/src/common/lang/memory.h index 6fd00fc4a..77ac0c551 100644 --- a/src/common/lang/memory.h +++ b/src/common/lang/memory.h @@ -12,5 +12,8 @@ See the Mulan PSL v2 for more details. */ #include +using std::enable_shared_from_this; +using std::make_shared; using std::make_unique; +using std::shared_ptr; using std::unique_ptr; \ No newline at end of file diff --git a/src/common/lang/stack.h b/src/common/lang/stack.h new file mode 100644 index 000000000..a1f5db36a --- /dev/null +++ b/src/common/lang/stack.h @@ -0,0 +1,15 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include + +using std::stack; \ No newline at end of file diff --git a/src/common/lang/unordered_set.h b/src/common/lang/unordered_set.h index dac6d7146..974d8901e 100644 --- a/src/common/lang/unordered_set.h +++ b/src/common/lang/unordered_set.h @@ -12,4 +12,15 @@ See the Mulan PSL v2 for more details. */ #include -using std::unordered_set; \ No newline at end of file +using std::unordered_set; + +template +static bool is_subset(const unordered_set &super_set, const unordered_set &child_set) +{ + for (const auto &element : child_set) { + if (super_set.find(element) == super_set.end()) { + return false; + } + } + return true; +} \ No newline at end of file diff --git a/src/cpplings/cas.cpp b/src/cpplings/cas.cpp index 8f3c04960..92ac07589 100644 --- a/src/cpplings/cas.cpp +++ b/src/cpplings/cas.cpp @@ -33,7 +33,6 @@ void append_node(int val) { Node *old_head = list_head; Node *new_node = new Node{val, old_head}; - // TODO: 使用 compare_exchange_strong 来使这段代码线程安全。 list_head = new_node; } diff --git a/src/oblsm/CMakeLists.txt b/src/oblsm/CMakeLists.txt index 8ffa9f9ab..235417de4 100644 --- a/src/oblsm/CMakeLists.txt +++ b/src/oblsm/CMakeLists.txt @@ -1,4 +1,6 @@ file(GLOB_RECURSE OBLSM_SOURCES "*.cpp") +list(FILTER OBLSM_SOURCES EXCLUDE REGEX "client/.*") +list(FILTER OBLSM_SOURCES EXCLUDE REGEX "benchmarks/.*") SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) add_library(oblsm STATIC ${OBLSM_SOURCES}) @@ -7,3 +9,9 @@ message("OBLSM_SOURCES: ${OBLSM_SOURCES}") FIND_PACKAGE(jsoncpp CONFIG REQUIRED) TARGET_LINK_LIBRARIES(oblsm common pthread JsonCpp::JsonCpp) + +ADD_EXECUTABLE(oblsm_cli client/ob_lsm_client.cpp) +TARGET_LINK_LIBRARIES(oblsm_cli oblsm) + +ADD_EXECUTABLE(oblsm_bench benchmarks/ob_lsm_bench.cpp) +TARGET_LINK_LIBRARIES(oblsm_bench oblsm) diff --git a/src/oblsm/benchmarks/ob_lsm_bench.cpp b/src/oblsm/benchmarks/ob_lsm_bench.cpp new file mode 100644 index 000000000..6824dde98 --- /dev/null +++ b/src/oblsm/benchmarks/ob_lsm_bench.cpp @@ -0,0 +1,2 @@ +// TODO: add oblsm bench tool, reference leveldb db_bench +int main() { return 0; } \ No newline at end of file diff --git a/src/oblsm/client/ob_lsm_client.cpp b/src/oblsm/client/ob_lsm_client.cpp new file mode 100644 index 000000000..523709acc --- /dev/null +++ b/src/oblsm/client/ob_lsm_client.cpp @@ -0,0 +1,35 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include +#include "common/lang/string.h" + +const char *startup_tips = R"( +Welcome to the OceanBase database implementation course. + +Copyright (c) 2021 OceanBase and/or its affiliates. + +Learn more about OceanBase at https://github.com/oceanbase/oceanbase +Learn more about MiniOB at https://github.com/oceanbase/miniob + +)"; + +int main(int argc, char *argv[]) +{ + printf("%s\n", startup_tips); + // TODO: a simple cli for oblsm, reference src/obclient/client.cpp + // usage example: + // put key1 value1 + // get key1 + // scan key1 key9 + printf("oblsm client is working in progress.\n"); + + return 0; +} diff --git a/src/oblsm/compaction/ob_compaction.cpp b/src/oblsm/compaction/ob_compaction.cpp new file mode 100644 index 000000000..e1dd39253 --- /dev/null +++ b/src/oblsm/compaction/ob_compaction.cpp @@ -0,0 +1,13 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/compaction/ob_compaction.h" + +namespace oceanbase {} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/compaction/ob_compaction.h b/src/oblsm/compaction/ob_compaction.h new file mode 100644 index 000000000..efa43fe56 --- /dev/null +++ b/src/oblsm/compaction/ob_compaction.h @@ -0,0 +1,81 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "oblsm/table/ob_sstable.h" +#include "oblsm/include/ob_lsm_options.h" + +namespace oceanbase { + +class ObCompactionPicker; + +/** + * @class ObCompaction + * @brief Represents a compaction task in the LSM-Tree. + * + * This class encapsulates the metadata and operations for a single compaction task, + * including input SSTables and the target level of the compaction. + */ +class ObCompaction +{ +public: + /** + * @brief Grants access to private members of this class to compaction picker classes. + * @see ObCompactionPicker, TiredCompactionPicker, LeveledCompactionPicker + */ + friend class ObCompactionPicker; + friend class TiredCompactionPicker; + friend class LeveledCompactionPicker; + + /** + * @brief Constructs a compaction task targeting a specific level. + * @param level The current level of the SSTables involved in the compaction. + */ + explicit ObCompaction(int level) : level_(level) {} + + ~ObCompaction() = default; + + /** + * @brief Gets the target level for this compaction. + * @return The integer value representing the level. + */ + int level() const { return level_; } + + /** + * @brief Retrieves an SSTable from the input SSTable list. + * @param which Index indicating which level's inputs to access (0 for `level_`, 1 for `level_ + 1`). + * @param i Index of the SSTable within the specified level's inputs. + * @return A shared pointer to the specified SSTable. + */ + shared_ptr input(int which, int i) const { return inputs_[which][i]; } + + /** + * @brief Computes the total number of input SSTables for the compaction task. + * @return The total number of input SSTables across both levels. + */ + int size() const { return inputs_[0].size() + inputs_[1].size(); } + + /** + * @brief Retrieves the vector of SSTables from the specified input level. + */ + const vector> &inputs(int which) const { return inputs_[which]; } + +private: + /// Each compaction reads inputs from "level_" and "level_+1" + std::vector> inputs_[2]; + + /** + * @brief The current level of SSTables involved in the compaction. + */ + int level_; +}; + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/compaction/ob_compaction_picker.cpp b/src/oblsm/compaction/ob_compaction_picker.cpp new file mode 100644 index 000000000..0602f10be --- /dev/null +++ b/src/oblsm/compaction/ob_compaction_picker.cpp @@ -0,0 +1,44 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/compaction/ob_compaction_picker.h" +#include "common/log/log.h" + +namespace oceanbase { + +// TODO: put it in options +unique_ptr TiredCompactionPicker::pick(SSTablesPtr sstables) +{ + if (sstables->size() < options_->default_run_num) { + return nullptr; + } + unique_ptr compaction(new ObCompaction(0)); + // TODO(opt): a tricky compaction picker, just pick all sstables if enough sstables. + for (size_t i = 0; i < sstables->size(); ++i) { + size_t tire_i_size = (*sstables)[i].size(); + for (size_t j = 0; j < tire_i_size; ++j) { + compaction->inputs_[0].emplace_back((*sstables)[i][j]); + } + } + // TODO: LOG_DEBUG for debug + return compaction; +} + +ObCompactionPicker *ObCompactionPicker::create(CompactionType type, ObLsmOptions *options) +{ + + switch (type) { + case CompactionType::TIRED: return new TiredCompactionPicker(options); + default: return nullptr; + } + return nullptr; +} + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/compaction/ob_compaction_picker.h b/src/oblsm/compaction/ob_compaction_picker.h new file mode 100644 index 000000000..4b844de95 --- /dev/null +++ b/src/oblsm/compaction/ob_compaction_picker.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "oblsm/table/ob_sstable.h" +#include "common/sys/rc.h" +#include "oblsm/compaction/ob_compaction.h" +#include "oblsm/util/ob_comparator.h" + +namespace oceanbase { + +/** + * @class ObCompactionPicker + * @brief Abstract base class for compaction picker strategies in an LSM-Tree. + * + * This class defines the interface for picking a compaction task from a set of SSTables. + * Derived classes will implement specific compaction strategies (e.g., tiered or leveled compaction). + */ +class ObCompactionPicker +{ +public: + /** + * @param options Pointer to the LSM-Tree options configuration. + */ + ObCompactionPicker(ObLsmOptions *options) : options_(options) {} + + virtual ~ObCompactionPicker() = default; + + /** + * @brief Pure virtual method to pick a compaction task. + * @param sstables A pointer to the SSTables available for compaction. + * @return A unique pointer to the selected compaction task. + */ + virtual unique_ptr pick(SSTablesPtr sstables) = 0; + + /** + * @brief Static factory method to create a specific compaction picker. + * @param type The type of compaction strategy (e.g., tiered, leveled). + * @param options Pointer to the LSM-Tree options configuration. + * @return A pointer to the created ObCompactionPicker instance. + */ + static ObCompactionPicker *create(CompactionType type, ObLsmOptions *options); + +protected: + ObLsmOptions *options_; ///< Pointer to the LSM-Tree options configuration. +}; + +/** + * @class TiredCompactionPicker + * @brief A class implementing the tiered compaction strategy. + * + */ +class TiredCompactionPicker : public ObCompactionPicker +{ +public: + /** + * @param options Pointer to the LSM-Tree options configuration. + */ + TiredCompactionPicker(ObLsmOptions *options) : ObCompactionPicker(options) {} + + ~TiredCompactionPicker() = default; + + /** + * @brief Implementation of the pick method for tiered compaction. + */ + unique_ptr pick(SSTablesPtr sstables) override; + +private: +}; + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/include/ob_lsm.h b/src/oblsm/include/ob_lsm.h new file mode 100644 index 000000000..175986173 --- /dev/null +++ b/src/oblsm/include/ob_lsm.h @@ -0,0 +1,99 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/string.h" +#include "common/lang/string_view.h" +#include "common/sys/rc.h" +#include "oblsm/include/ob_lsm_options.h" +#include "oblsm/include/ob_lsm_iterator.h" + +namespace oceanbase { + +/** + * @brief ObLsm is a key-value storage engine for educational purpose. + * ObLsm learned a lot about design from leveldb and streamlined it. + * TODO: add more comments about ObLsm. + */ +class ObLsm +{ +public: + /** + * @brief Opens an LSM-Tree database at the specified path. + * + * This is a static method that initializes an LSM-Tree database instance. + * It allocates memory for the database and returns a pointer to it through the + * `dbptr` parameter. The caller is responsible for freeing the memory allocated + * for the database by deleting the returned pointer when it is no longer needed. + * + * @param options A reference to the LSM-Tree options configuration. + * @param path A string specifying the path to the database. + * @param dbptr A double pointer to store the allocated database instance. + * @return An RC value indicating success or failure of the operation. + * @note The caller must delete the returned database pointer (`*dbptr`) when done. + */ + static RC open(const ObLsmOptions &options, const string &path, ObLsm **dbptr); + + ObLsm() = default; + + ObLsm(const ObLsm &) = delete; + + ObLsm &operator=(const ObLsm &) = delete; + + virtual ~ObLsm() = default; + + /** + * @brief Inserts or updates a key-value entry in the LSM-Tree. + * + * This method adds a new entry + * + * @param key The key to insert or update. + * @param value The value associated with the key. + * @return An RC value indicating success or failure of the operation. + */ + virtual RC put(const string_view &key, const string_view &value) = 0; + + /** + * @brief Retrieves the value associated with a specified key. + * + * This method looks up the value corresponding to the given key in the LSM-Tree. + * If the key exists, the value is stored in the output parameter `*value`. + * + * @param key The key to look up. + * @param value Pointer to a string where the retrieved value will be stored. + * @return An RC value indicating success or failure of the operation. + */ + virtual RC get(const string_view &key, string *value) = 0; + + /** + * @brief Creates a new iterator for traversing the LSM-Tree database. + * + * This method returns a heap-allocated iterator over the contents of the + * database. The iterator is initially invalid, and the caller must use one + * of the `seek`/`seek_to_first`/`seek_to_last` methods on the iterator + * before accessing any elements. + * + * @param options Read options to configure the behavior of the iterator. + * @return A pointer to the newly created iterator. + * @note The caller is responsible for deleting the iterator when it is no longer needed. + */ + virtual ObLsmIterator *new_iterator(ObLsmReadOptions options) = 0; + + /** + * @brief Dumps all SSTables for debugging purposes. + * + * This method outputs the structure and contents of all SSTables in the + * LSM-Tree for debugging or inspection purposes. + */ + virtual void dump_sstables() = 0; +}; + +} // namespace oceanbase diff --git a/src/oblsm/include/ob_lsm_iterator.h b/src/oblsm/include/ob_lsm_iterator.h new file mode 100644 index 000000000..90cb04181 --- /dev/null +++ b/src/oblsm/include/ob_lsm_iterator.h @@ -0,0 +1,103 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// An iterator yields a sequence of key/value pairs from a source. +// The following class defines the interface. Multiple implementations +// are provided by this library. In particular, iterators are provided +// to access the contents of a Table or a DB. +// +// Multiple threads can invoke const methods on an ObLsmIterator without +// external synchronization, but if any of the threads may call a +// non-const method, all threads accessing the same ObLsmIterator must use +// external synchronization. + +#pragma once + +#include "common/lang/string_view.h" +#include "common/sys/rc.h" + +namespace oceanbase { + +/** + * @class ObLsmIterator + * @brief Abstract class for iterating over key-value pairs in an LSM-Tree. + * + * This class provides an interface for iterators used to traverse key-value entries + * stored in an LSM-Tree. Derived classes must implement this interface to handle + * specific storage structures, such as SSTables or MemTables. + */ +class ObLsmIterator +{ +public: + ObLsmIterator(){}; + + ObLsmIterator(const ObLsmIterator &) = delete; + + ObLsmIterator &operator=(const ObLsmIterator &) = delete; + + virtual ~ObLsmIterator(){}; + + /** + * @brief Checks if the iterator is currently positioned at a valid key-value pair. + * + * @return `true` if the iterator is valid, `false` otherwise. + */ + virtual bool valid() const = 0; + + /** + * @brief Moves the iterator to the next key-value pair in the source. + */ + virtual void next() = 0; + + /** + * @brief Returns the key of the current entry the iterator is positioned at. + * + * This method retrieves the key corresponding to the key-value pair at the + * current position of the iterator. + * + * @return A `string_view` containing the key of the current entry. + */ + virtual string_view key() const = 0; + + /** + * @brief Returns the value of the current entry the iterator is positioned at. + * + * This method retrieves the value corresponding to the key-value pair at the + * current position of the iterator. + * + * @return A `string_view` containing the value of the current entry. + */ + virtual string_view value() const = 0; + + /** + * @brief Positions the iterator at the first entry with a key greater than or equal to the specified key. + * + * @param k The key to search for. + */ + virtual void seek(const string_view &k) = 0; + + /** + * @brief Positions the iterator at the first key-value pair in the source. + * + */ + virtual void seek_to_first() = 0; + + /** + * @brief Positions the iterator at the last key-value pair in the source. + * + */ + virtual void seek_to_last() = 0; +}; + +} // namespace oceanbase diff --git a/src/oblsm/include/ob_lsm_options.h b/src/oblsm/include/ob_lsm_options.h new file mode 100644 index 000000000..7202fde7e --- /dev/null +++ b/src/oblsm/include/ob_lsm_options.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include +#include "oblsm/ob_lsm_define.h" + +namespace oceanbase { + +/** + * @brief Configuration options for the LSM-Tree implementation. + */ +struct ObLsmOptions +{ + ObLsmOptions(){}; + + // TODO: all params are used for test, need to reset to appropriate values. + size_t memtable_size = 8 * 1024; + // sstable size + size_t table_size = 16 * 1024; + + // leveled compaction + size_t default_levels = 7; + size_t default_l1_level_size = 128 * 1024; + size_t default_level_ratio = 10; + size_t default_l0_file_num = 3; + + // tired compaction + size_t default_run_num = 7; + + // default compaction type + CompactionType type = CompactionType::LEVELED; +}; + +// TODO: UNIMPLEMENTED +struct ObLsmReadOptions +{ + ObLsmReadOptions(){}; +}; + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/memtable/ob_memtable.cpp b/src/oblsm/memtable/ob_memtable.cpp new file mode 100644 index 000000000..fbed53039 --- /dev/null +++ b/src/oblsm/memtable/ob_memtable.cpp @@ -0,0 +1,106 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/memtable/ob_memtable.h" +#include "common/lang/string.h" +#include "common/lang/memory.h" +#include "oblsm/util/ob_coding.h" +#include "oblsm/ob_lsm_define.h" + +namespace oceanbase { + +static string_view get_length_prefixed_string(const char *data) +{ + size_t len = get_numeric(data); + const char *p = data + sizeof(size_t); + return string_view(p, len); +} + +void ObMemTable::put(uint64_t seq, const string_view &key, const string_view &value) +{ + // TODO: add lookup_key, internal_key, user_key relationship and format in memtable/sstable/block + // TODO: unify the encode/decode logic in separate file. + // Format of an entry is concatenation of: + // key_size : internal_key.size() + // key bytes : char[internal_key.size()] + // seq : uint64(sequence) + // value_size : value.size() + // value bytes : char[value.size()] + size_t user_key_size = key.size(); + size_t val_size = value.size(); + size_t internal_key_size = user_key_size + SEQ_SIZE; + const size_t encoded_len = sizeof(size_t) + internal_key_size + sizeof(size_t) + val_size; + char * buf = reinterpret_cast(arena_.alloc(encoded_len)); + char * p = buf; + memcpy(p, &internal_key_size, sizeof(size_t)); + p += sizeof(size_t); + memcpy(p, key.data(), user_key_size); + p += user_key_size; + memcpy(p, &seq, sizeof(uint64_t)); + p += sizeof(uint64_t); + memcpy(p, &val_size, sizeof(size_t)); + p += sizeof(size_t); + memcpy(p, value.data(), val_size); + table_.insert(buf); +} + +// TODO: use iterator to simplify the code +RC ObMemTable::get(const string_view &lookup_key, string *value) +{ + RC rc = RC::SUCCESS; + ObSkipList::Iterator iter(&table_); + iter.seek(lookup_key.data()); + if (iter.valid()) { + const char *entry = iter.key(); + char * key_ptr = const_cast(entry); + size_t key_length = get_numeric(key_ptr); + key_ptr += sizeof(size_t); + // TODO: unify comparator and key lookup key in memtable and sstable. + string_view user_key = extract_user_key_from_lookup_key(lookup_key); + if (comparator_.comparator.user_comparator()->compare(string_view(key_ptr, key_length - SEQ_SIZE), user_key) == 0) { + key_ptr += key_length; + size_t val_len = get_numeric(key_ptr); + key_ptr += sizeof(size_t); + string_view val(key_ptr, val_len); + value->assign(val.data(), val.size()); + } else { + return RC::NOTFOUND; + } + } else { + return RC::NOTFOUND; + } + return rc; +} + +int ObMemTable::KeyComparator::operator()(const char *a, const char *b) const +{ + // Internal keys are encoded as length-prefixed strings. + string_view a_v = get_length_prefixed_string(a); + string_view b_v = get_length_prefixed_string(b); + return comparator.compare(a_v, b_v); +} + +ObLsmIterator *ObMemTable::new_iterator() { return new ObMemTableIterator(get_shared_ptr(), &table_); } + +string_view ObMemTableIterator::key() const { return get_length_prefixed_string(iter_.key()); } + +string_view ObMemTableIterator::value() const +{ + string_view key_slice = get_length_prefixed_string(iter_.key()); + return get_length_prefixed_string(key_slice.data() + key_slice.size()); +} + +void ObMemTableIterator::seek(const string_view &k) +{ + tmp_.clear(); + iter_.seek(k.data()); +} + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/memtable/ob_memtable.h b/src/oblsm/memtable/ob_memtable.h new file mode 100644 index 000000000..45216064e --- /dev/null +++ b/src/oblsm/memtable/ob_memtable.h @@ -0,0 +1,177 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/sys/rc.h" +#include "common/lang/string.h" +#include "common/lang/string_view.h" +#include "common/lang/memory.h" +#include "oblsm/memtable/ob_skiplist.h" +#include "oblsm/util/ob_comparator.h" +#include "oblsm/util/ob_arena.h" +#include "oblsm/include/ob_lsm_iterator.h" + +namespace oceanbase { + +/** + * @class ObMemTable + * @brief MemTable implementation for LSM-Tree. + * + * The `ObMemTable` represents an in-memory structure that stores key-value pairs + * before they are flushed to disk as SSTables. It supports key-value insertion, + * querying, and iteration. The implementation currently uses a skip list as + * the underlying data structure. + */ +class ObMemTable : public enable_shared_from_this +{ +public: + ObMemTable() : comparator_(), table_(comparator_){}; + + ~ObMemTable() = default; + + /** + * @brief Retrieves a shared pointer to the current ObMemTable instance. + * + * This method utilizes `std::enable_shared_from_this` to provide a shared pointer + * to the current object. Useful when the current object needs to be shared safely + * among multiple components. + * + * @return A shared pointer to the current `ObMemTable` instance. + */ + shared_ptr get_shared_ptr() { return shared_from_this(); } + + /** + * @brief Inserts a key-value pair into the memtable. + * + * Each entry is versioned using the provided `seq` number. If the same key is + * inserted multiple times, the version with the highest sequence number will + * take precedence when queried. + * + * @param seq A sequence number used for versioning the key-value entry. + * @param key The key to be inserted. + * @param value The value associated with the key. + */ + void put(uint64_t seq, const string_view &key, const string_view &value); + + /** + * @brief Retrieves the value associated with a specific key from the memtable. + * + * Searches for the specified key in the memtable. If the key exists, its + * associated value is stored in the output parameter `*value`. + * + * @param key The key to search for in the memtable. + * @param value A pointer to a string where the retrieved value will be stored. + * @return An RC value indicating the success or failure of the operation. + */ + RC get(const string_view &key, string *value); + + /** + * @brief Estimates the memory usage of the memtable. + * + * Returns the approximate memory usage of the memtable, including the + * skip list and associated memory allocations. + * + * @return The approximate memory usage in bytes. + */ + size_t appro_memory_usage() const { return arena_.memory_usage(); } + + /** + * @brief Creates a new iterator for traversing the contents of the memtable. + * + * This method returns a heap-allocated iterator for iterating over key-value + * pairs stored in the memtable. The caller is responsible for managing the + * lifetime of the returned iterator. + * + * @return A pointer to the newly created `ObLsmIterator` for the memtable. + */ + ObLsmIterator *new_iterator(); + +private: + friend class ObMemTableIterator; + /** + * @brief Compares two keys. + * + * Uses the internal comparator to perform lexicographical comparison between + * two keys. + * + * @param a Pointer to the first key. + * @param b Pointer to the second key. + * @return An integer indicating the result of the comparison: + * - Negative value if `a < b` + * - Zero if `a == b` + * - Positive value if `a > b` + */ + struct KeyComparator + { + const ObInternalKeyComparator comparator; + explicit KeyComparator() {} + int operator()(const char *a, const char *b) const; + }; + + // TODO: currently the memtable use skiplist as the underlying data structure, + // it is possible to use other data structure, for example, hash table. + typedef ObSkipList Table; + + /** + * @brief Comparator used for ordering keys in the memtable. + * + * This member defines the rules for comparing keys in the skip list. + * TODO: support user-defined comparator + */ + KeyComparator comparator_; + + /** + * @brief The underlying data structure used for key-value storage. + * + * Currently implemented as a skip list. Future versions may support + * alternative data structures, such as hash tables. + */ + Table table_; + + /** + * @brief Memory arena used for memory management in the memtable. + * + * Allocates and tracks memory usage for the skip list and other internal + * components of the memtable. + */ + ObArena arena_; +}; + +/** + * @class ObMemTableIterator + * @brief An iterator for traversing the contents of an `ObMemTable`. + */ +class ObMemTableIterator : public ObLsmIterator +{ +public: + explicit ObMemTableIterator(shared_ptr mem, ObMemTable::Table *table) : mem_(mem), iter_(table) {} + + ObMemTableIterator(const ObMemTableIterator &) = delete; + ObMemTableIterator &operator=(const ObMemTableIterator &) = delete; + + ~ObMemTableIterator() override = default; + + void seek(const string_view &k) override; + void seek_to_first() override { iter_.seek_to_first(); } + void seek_to_last() override { iter_.seek_to_last(); } + + bool valid() const override { return iter_.valid(); } + void next() override { iter_.next(); } + string_view key() const override; + string_view value() const override; + +private: + shared_ptr mem_; + ObMemTable::Table::Iterator iter_; + string tmp_; // For seek key +}; + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/memtable/ob_skiplist.h b/src/oblsm/memtable/ob_skiplist.h new file mode 100644 index 000000000..7046c1311 --- /dev/null +++ b/src/oblsm/memtable/ob_skiplist.h @@ -0,0 +1,398 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#pragma once + +// Thread safety +// ------------- +// +// Writes require external synchronization, most likely a mutex. +// Reads require a guarantee that the ObSkipList will not be destroyed +// while the read is in progress. Apart from that, reads progress +// without any internal locking or synchronization. +// +// Invariants: +// +// (1) Allocated nodes are never deleted until the ObSkipList is +// destroyed. This is trivially guaranteed by the code since we +// never delete any skip list nodes. +// +// (2) The contents of a Node except for the next/prev pointers are +// immutable after the Node has been linked into the ObSkipList. +// Only insert() modifies the list, and it is careful to initialize +// a node and use release-stores to publish the nodes in one or +// more lists. +// +// ... prev vs. next pointer ordering ... + +#include "common/math/random_generator.h" +#include "common/lang/atomic.h" +#include "common/lang/vector.h" +#include "common/log/log.h" + +namespace oceanbase { + +template +class ObSkipList +{ +private: + struct Node; + +public: + /** + * @brief Create a new ObSkipList object that will use "cmp" for comparing keys. + */ + explicit ObSkipList(ObComparator cmp); + + ObSkipList(const ObSkipList &) = delete; + ObSkipList &operator=(const ObSkipList &) = delete; + ~ObSkipList(); + + /** + * @brief Insert key into the list. + * REQUIRES: nothing that compares equal to key is currently in the list + */ + void insert(const Key &key); + + void insert_concurrently(const Key &key); + + /** + * @brief Returns true if an entry that compares equal to key is in the list. + * @param [in] key + * @return true if found, false otherwise + */ + bool contains(const Key &key) const; + + /** + * @brief Iteration over the contents of a skip list + */ + class Iterator + { + public: + /** + * @brief Initialize an iterator over the specified list. + * @return The returned iterator is not valid. + */ + explicit Iterator(const ObSkipList *list); + + /** + * @brief Returns true iff the iterator is positioned at a valid node. + */ + bool valid() const; + + /** + * @brief Returns the key at the current position. + * REQUIRES: valid() + */ + const Key &key() const; + + /** + * @brief Advance to the next entry in the list. + * REQUIRES: valid() + */ + void next(); + + /** + * @brief Advances to the previous position. + * REQUIRES: valid() + */ + void prev(); + + /** + * @brief Advance to the first entry with a key >= target + */ + void seek(const Key &target); + + /** + * @brief Position at the first entry in list. + * @note Final state of iterator is valid() iff list is not empty. + */ + void seek_to_first(); + + /** + * @brief Position at the last entry in list. + * @note Final state of iterator is valid() iff list is not empty. + */ + void seek_to_last(); + + private: + const ObSkipList *list_; + Node *node_; + }; + +private: + enum + { + kMaxHeight = 12 + }; + + inline int get_max_height() const { return max_height_.load(std::memory_order_relaxed); } + + Node *new_node(const Key &key, int height); + int random_height(); + bool equal(const Key &a, const Key &b) const { return (compare_(a, b) == 0); } + + // Return the earliest node that comes at or after key. + // Return nullptr if there is no such node. + // + // If prev is non-null, fills prev[level] with pointer to previous + // node at "level" for every level in [0..max_height_-1]. + Node *find_greater_or_equal(const Key &key, Node **prev) const; + + // Return the latest node with a key < key. + // Return head_ if there is no such node. + Node *find_less_than(const Key &key) const; + + // Return the last node in the list. + // Return head_ if list is empty. + Node *find_last() const; + + // Immutable after construction + ObComparator const compare_; + + Node *const head_; + + // Modified only by insert(). Read racily by readers, but stale + // values are ok. + atomic max_height_; // Height of the entire list + + static common::RandomGenerator rnd; +}; + +template +common::RandomGenerator ObSkipList::rnd = common::RandomGenerator(); + +// Implementation details follow +template +struct ObSkipList::Node +{ + explicit Node(const Key &k) : key(k) {} + + Key const key; + + // Accessors/mutators for links. Wrapped in methods so we can + // add the appropriate barriers as necessary. + Node *next(int n) + { + ASSERT(n >= 0, "n >= 0"); + // Use an 'acquire load' so that we observe a fully initialized + // version of the returned Node. + return next_[n].load(std::memory_order_acquire); + } + void set_next(int n, Node *x) + { + ASSERT(n >= 0, "n >= 0"); + // Use a 'release store' so that anybody who reads through this + // pointer observes a fully initialized version of the inserted node. + next_[n].store(x, std::memory_order_release); + } + + // No-barrier variants that can be safely used in a few locations. + Node *nobarrier_next(int n) + { + ASSERT(n >= 0, "n >= 0"); + return next_[n].load(std::memory_order_relaxed); + } + void nobarrier_set_next(int n, Node *x) + { + ASSERT(n >= 0, "n >= 0"); + next_[n].store(x, std::memory_order_relaxed); + } + + bool cas_next(int n, Node *expected, Node *x) + { + ASSERT(n >= 0, "n >= 0"); + return next_[n].compare_exchange_strong(expected, x); + } + +private: + // Array of length equal to the node height. next_[0] is lowest level link. + atomic next_[1]; +}; + +template +typename ObSkipList::Node *ObSkipList::new_node(const Key &key, int height) +{ + char *const node_memory = reinterpret_cast(malloc(sizeof(Node) + sizeof(atomic) * (height - 1))); + return new (node_memory) Node(key); +} + +template +inline ObSkipList::Iterator::Iterator(const ObSkipList *list) +{ + list_ = list; + node_ = nullptr; +} + +template +inline bool ObSkipList::Iterator::valid() const +{ + return node_ != nullptr; +} + +template +inline const Key &ObSkipList::Iterator::key() const +{ + ASSERT(valid(), "valid"); + return node_->key; +} + +template +inline void ObSkipList::Iterator::next() +{ + ASSERT(valid(), "valid"); + node_ = node_->next(0); +} + +template +inline void ObSkipList::Iterator::prev() +{ + // Instead of using explicit "prev" links, we just search for the + // last node that falls before key. + ASSERT(valid(), "valid"); + node_ = list_->find_less_than(node_->key); + if (node_ == list_->head_) { + node_ = nullptr; + } +} + +template +inline void ObSkipList::Iterator::seek(const Key &target) +{ + node_ = list_->find_greater_or_equal(target, nullptr); +} + +template +inline void ObSkipList::Iterator::seek_to_first() +{ + node_ = list_->head_->next(0); +} + +template +inline void ObSkipList::Iterator::seek_to_last() +{ + node_ = list_->find_last(); + if (node_ == list_->head_) { + node_ = nullptr; + } +} + +template +int ObSkipList::random_height() +{ + // Increase height with probability 1 in kBranching + static const unsigned int kBranching = 4; + int height = 1; + while (height < kMaxHeight && rnd.next(kBranching) == 0) { + height++; + } + ASSERT(height > 0, "height > 0"); + ASSERT(height <= kMaxHeight, "height <= kMaxHeight"); + return height; +} + +template +typename ObSkipList::Node *ObSkipList::find_greater_or_equal( + const Key &key, Node **prev) const +{ + // your code here + return nullptr; +} + +template +typename ObSkipList::Node *ObSkipList::find_less_than(const Key &key) const +{ + Node *x = head_; + int level = get_max_height() - 1; + while (true) { + ASSERT(x == head_ || compare_(x->key, key) < 0, "x == head_ || compare_(x->key, key) < 0"); + Node *next = x->next(level); + if (next == nullptr || compare_(next->key, key) >= 0) { + if (level == 0) { + return x; + } else { + // Switch to next list + level--; + } + } else { + x = next; + } + } +} + +template +typename ObSkipList::Node *ObSkipList::find_last() const +{ + Node *x = head_; + int level = get_max_height() - 1; + while (true) { + Node *next = x->next(level); + if (next == nullptr) { + if (level == 0) { + return x; + } else { + // Switch to next list + level--; + } + } else { + x = next; + } + } +} + +template +ObSkipList::ObSkipList(ObComparator cmp) + : compare_(cmp), head_(new_node(0 /* any key will do */, kMaxHeight)), max_height_(1) +{ + for (int i = 0; i < kMaxHeight; i++) { + head_->set_next(i, nullptr); + } +} + +template +ObSkipList::~ObSkipList() +{ + typename std::vector nodes; + nodes.reserve(max_height_.load(std::memory_order_relaxed)); + for (Node *x = head_; x != nullptr; x = x->next(0)) { + nodes.push_back(x); + } + for (auto node : nodes) { + node->~Node(); + free(node); + } +} + +template +void ObSkipList::insert(const Key &key) +{} + +template +void ObSkipList::insert_concurrently(const Key &key) +{ + // your code here +} + +template +bool ObSkipList::contains(const Key &key) const +{ + Node *x = find_greater_or_equal(key, nullptr); + if (x != nullptr && equal(key, x->key)) { + return true; + } else { + return false; + } +} + +} // namespace oceanbase diff --git a/src/oblsm/ob_lsm_define.h b/src/oblsm/ob_lsm_define.h new file mode 100644 index 000000000..9a6cac407 --- /dev/null +++ b/src/oblsm/ob_lsm_define.h @@ -0,0 +1,28 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once +namespace oceanbase { + +static constexpr const char *SSTABLE_SUFFIX = ".sst"; +static constexpr const char *MANIFEST_SUFFIX = ".mf"; + +/** + * @enum CompactionType + * @brief Defines the types of compaction strategies in an LSM-Tree or similar systems. + */ +enum class CompactionType +{ + TIRED = 0, + LEVELED, + UNKNOWN, +}; + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/ob_lsm_impl.cpp b/src/oblsm/ob_lsm_impl.cpp new file mode 100644 index 000000000..15a3c7349 --- /dev/null +++ b/src/oblsm/ob_lsm_impl.cpp @@ -0,0 +1,287 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/ob_lsm_impl.h" + +#include "common/log/log.h" +#include "oblsm/include/ob_lsm.h" +#include "oblsm/ob_lsm_define.h" +#include "oblsm/ob_manifest.h" +#include "oblsm/table/ob_merger.h" +#include "oblsm/table/ob_sstable.h" +#include "oblsm/table/ob_sstable_builder.h" +#include "oblsm/util/ob_coding.h" +#include "oblsm/compaction/ob_compaction_picker.h" +#include "oblsm/ob_user_iterator.h" +#include "oblsm/compaction/ob_compaction.h" +#include "oblsm/ob_lsm_define.h" + +namespace oceanbase { + +ObLsmImpl::ObLsmImpl(const ObLsmOptions &options, const string &path) + : options_(options), path_(path), mu_(), mem_table_(nullptr), imem_tables_() +{ + mem_table_ = make_shared(); + sstables_ = make_shared>>>(); + if (options_.type == CompactionType::LEVELED) { + sstables_->resize(options_.default_levels); + } + + // TODO: Check the cpu consumption at idle + executor_.init("ObLsmBackground", 1, 1, 60 * 1000); + block_cache_ = + std::unique_ptr>>{new ObLRUCache>(1024)}; +} + +RC ObLsm::open(const ObLsmOptions &options, const string &path, ObLsm **dbptr) +{ + RC rc = RC::SUCCESS; + ObLsmImpl *lsm = new ObLsmImpl(options, path); + *dbptr = lsm; + return rc; +} + +RC ObLsmImpl::put(const string_view &key, const string_view &value) +{ + // TODO: if put rate is too high, slow down writes is needed. + // currently, the writes is stopped when the memtable is full. + LOG_TRACE("begin to put key=%s, value=%s", key.data(), value.data()); + RC rc = RC::SUCCESS; + // TODO: currenttly the memtable use skiplist as the underlying data structure, + // and the skiplist concurently write is not thread safe, so we use mutex here, + // if the skiplist support `insert_concurrently()` interface, can we remove the mutex? + unique_lock lock(mu_); + + // TODO: write to WAL + uint64_t seq = seq_.fetch_add(1); + mem_table_->put(seq, key, value); + size_t mem_size = mem_table_->appro_memory_usage(); + if (mem_size > options_.memtable_size) { + // Thinking point: here vector is used to store imems, + // but only one imem is stored at most. Is it possible + // to store more than one imem and what are the implications + // of storing more than one imem. + if (imem_tables_.size() >= 1) { + cv_.wait(lock); + } + // check again after get lock(maybe freeze memtable by another thread) + if (mem_table_->appro_memory_usage() > options_.memtable_size) { + try_freeze_memtable(); + } else { + // if there are multi put threads waiting here, need to notify one thread to + // continue to write to memtable. + cv_.notify_one(); + } + } + return rc; +} + +RC ObLsmImpl::try_freeze_memtable() +{ + RC rc = RC::SUCCESS; + imem_tables_.emplace_back(mem_table_); + mem_table_ = make_shared(); + auto bg_task = [&]() { this->background_compaction(); }; + int ret = executor_.execute(bg_task); + if (ret != 0) { + rc = RC::INTERNAL; + LOG_WARN("fail to execute background compaction task"); + } + return rc; +} + +void ObLsmImpl::background_compaction() +{ + unique_lock lock(mu_); + if (imem_tables_.size() >= 1) { + shared_ptr imem = imem_tables_.back(); + imem_tables_.pop_back(); + lock.unlock(); + cv_.notify_one(); + build_sstable(imem); + // TODO: trig compaction at more scenarios, for example, + // seek compaction in + // leveldb(https://github.com/google/leveldb/blob/578eeb702ec0fbb6b9780f3d4147b1076630d633/db/version_set.cc#L650). + if (!compacting_) { + compacting_.store(true); + try_major_compaction(); + compacting_.store(false); + } + return; + } +} + +void ObLsmImpl::try_major_compaction() +{ + unique_lock lock(mu_); + unique_ptr picker(ObCompactionPicker::create(options_.type, &options_)); + unique_ptr picked = picker->pick(sstables_); + lock.unlock(); + if (picked == nullptr || picked->size() == 0) { + return; + } + vector> results = do_compaction(picked.get()); + + SSTablesPtr new_sstables = make_shared>>>(); + lock.lock(); + size_t levels_size = sstables_->size(); + bool insert_new_sstable = false; + auto find_sstable = [](const vector> &picked, const shared_ptr &sstable) { + for (auto &p : picked) { + if (p->sst_id() == sstable->sst_id()) { + return true; + } + } + return false; + }; + + vector> picked_sstables; + picked_sstables = picked->inputs(0); + const auto &level_i1 = picked->inputs(1); + if (level_i1.size() > 0) { + picked_sstables.insert(picked_sstables.end(), level_i1.begin(), level_i1.end()); + } + // TODO: unify the new sstables logic in all compaction type + if (options_.type == CompactionType::TIRED) { + for (int i = levels_size - 1; i >= 0; --i) { + const vector> &level_i = sstables_->at(i); + for (auto &sstable : level_i) { + if (find_sstable(picked_sstables, sstable)) { + if (!insert_new_sstable) { + new_sstables->insert(new_sstables->begin(), results); + insert_new_sstable = true; + } + } else { + new_sstables->insert(new_sstables->begin(), level_i); + break; + } + } + } + } else if (options_.type == CompactionType::LEVELED) { + // TODO: apply the compaction results to sstable + } + + sstables_ = new_sstables; + lock.unlock(); + + // remove from disk + for (auto &sstable : picked_sstables) { + sstable->remove(); + } + try_major_compaction(); +} + +vector> ObLsmImpl::do_compaction(ObCompaction *picked) { return {}; } + +void ObLsmImpl::build_sstable(shared_ptr imem) +{ + unique_ptr tb = make_unique(&default_comparator_, block_cache_.get()); + + uint64_t sstable_id = sstable_id_.fetch_add(1); + tb->build(imem, get_sstable_path(sstable_id), sstable_id); + unique_lock lock(mu_); + + // TODO: unify the build sstable logic in all compaction type + if (options_.type == CompactionType::TIRED) { + // TODO: record the changes for tired compaction + // here we use `level_i` to store `run_i` + sstables_->insert(sstables_->begin(), {tb->get_built_table()}); + } else if (options_.type == CompactionType::LEVELED) { + sstables_->at(0).emplace_back(tb->get_built_table()); + } +} + +string ObLsmImpl::get_sstable_path(uint64_t sstable_id) +{ + return filesystem::path(path_) / (to_string(sstable_id) + SSTABLE_SUFFIX); +} + +RC ObLsmImpl::get(const string_view &key, string *value) +{ + RC rc = RC::SUCCESS; + unique_lock lock(mu_); + shared_ptr mem = mem_table_; + + shared_ptr imm = nullptr; + if (!imem_tables_.empty()) { + imm = imem_tables_.back(); + } + vector> sstables; + for (auto &level : *sstables_) { + sstables.insert(sstables.end(), level.begin(), level.end()); + } + lock.unlock(); + string lookup_key; + put_numeric(&lookup_key, key.size() + SEQ_SIZE); + lookup_key.append(key.data(), key.size()); + // TODO: currenttly we use only use the latest seq, + // we need to use specific seq if oblsm support transaction + put_numeric(&lookup_key, seq_.load()); + + if (OB_SUCC(mem_table_->get(lookup_key, value))) { + LOG_INFO("get key from memtable"); + } else if (imm != nullptr && OB_SUCC(imm->get(lookup_key, value))) { + LOG_INFO("get key from immemtable"); + } else { + for (auto &sst : sstables) { + // TODO: sort sstables and return newest value + if (OB_SUCC(sst->get(lookup_key, value))) { + break; + } + if (rc != RC::NOT_EXIST) { + LOG_WARN("get key from sstables error: %d", rc); + } + } + } + return rc; +} + +ObLsmIterator *ObLsmImpl::new_iterator(ObLsmReadOptions options) +{ + unique_lock lock(mu_); + shared_ptr mem = mem_table_; + + shared_ptr imm = nullptr; + if (!imem_tables_.empty()) { + imm = imem_tables_.back(); + } + vector> sstables; + for (auto &level : *sstables_) { + sstables.insert(sstables.end(), level.begin(), level.end()); + } + lock.unlock(); + vector> iters; + iters.emplace_back(mem->new_iterator()); + if (imm != nullptr) { + iters.emplace_back(imm->new_iterator()); + } + for (const auto &sst : sstables) { + iters.emplace_back(sst->new_iterator()); + } + + return new_user_iterator(new_merging_iterator(&default_comparator_, std::move(iters)), seq_.load()); +} + +void ObLsmImpl::dump_sstables() +{ + unique_lock lock(mu_); + int level = sstables_->size(); + for (int i = 0; i < level; i++) { + cout << "level " << i << endl; + int level_size = 0; + for (auto &sst : sstables_->at(i)) { + cout << sst->sst_id() << ": " << sst->size() << ";"; + level_size += sst->size(); + } + cout << "level size " << level_size << endl; + } +} + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/ob_lsm_impl.h b/src/oblsm/ob_lsm_impl.h new file mode 100644 index 000000000..a931c1a07 --- /dev/null +++ b/src/oblsm/ob_lsm_impl.h @@ -0,0 +1,142 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "oblsm/include/ob_lsm.h" + +#include "common/lang/mutex.h" +#include "common/lang/atomic.h" +#include "common/lang/memory.h" +#include "common/lang/filesystem.h" +#include "common/lang/condition_variable.h" +#include "common/thread/thread_pool_executor.h" +#include "oblsm/memtable/ob_memtable.h" +#include "oblsm/table/ob_sstable.h" +#include "oblsm/util/ob_lru_cache.h" +#include "oblsm/compaction/ob_compaction.h" +#include "oblsm/ob_manifest.h" +#include +#include + +namespace oceanbase { + +class ObLsmImpl : public ObLsm +{ +public: + ObLsmImpl(const ObLsmOptions &options, const string &path); + ~ObLsmImpl() + { + executor_.shutdown(); + executor_.await_termination(); + } + + RC put(const string_view &key, const string_view &value) override; + + RC get(const string_view &key, string *value) override; + + ObLsmIterator *new_iterator(ObLsmReadOptions options) override; + + SSTablesPtr get_sstables() { return sstables_; } + + // used for debug + void dump_sstables() override; + +private: + /** + * @brief Attempts to freeze the current active MemTable. + * + * This method performs operations to freeze the active MemTable when certain conditions + * are met, such as size thresholds or timing requirements. A frozen MemTable becomes + * immutable and is ready for compaction. + * + * @return RC Status code indicating the success or failure of the freeze operation. + */ + RC try_freeze_memtable(); + + /** + * @brief Performs compaction on the SSTables selected by the compaction strategy. + * + * This function takes a compaction plan (represented by `ObCompaction`) and merges + * the inputs into a new set of SSTables. It creates iterators for the SSTables being + * compacted, merges their data, and writes the merged data into new SSTable files. + * + * @param picked A pointer to the compaction plan that specifies the input SSTables to merge. + * If `picked` is `nullptr`, no compaction is performed and an empty result is returned. + * + * @return vector> A vector of shared pointers to the newly created SSTables + * resulting from the compaction process. + * + * @details + * - The function retrieves the inputs (SSTables) from the `picked` compaction plan. + * - For each SSTable, it creates a new iterator to sequentially scan its data. + * - It merges the iterators using a merging iterator (`ObLsmIterator`). + * - It writes the merged key-value pairs into new SSTable files using `ObSSTableBuilder`. + * - If the size of the new SSTable exceeds a predefined size (`options_.table_size`), + * the builder finalizes the current SSTable and starts a new one. + * + * @warning Ensure that the `picked` object is properly populated with valid inputs. + * + */ + vector> do_compaction(ObCompaction *compaction); + + /** + * @brief Initiates a major compaction process. + * + * Major compaction involves merging all levels of SSTables into a single, consolidated + * SSTable, which reduces storage fragmentation and improves read performance. + * This process typically runs periodically or when triggered by specific conditions. + * + * @note This function should be called with care, as major compaction is a resource-intensive + * operation and may affect system performance during execution. + */ + void try_major_compaction(); + + /** + * @brief Handles background compaction tasks. + */ + void background_compaction(); + + /** + * @brief Builds an SSTable from the given MemTable. + * + * Converts the data in an immutable MemTable (`imem`) into a new SSTable and writes + * it to persistent storage. This step is usually part of the compaction pipeline. + * + * @param imem A shared pointer to the immutable MemTable (`ObMemTable`) to be converted + * into an SSTable. + * @note The caller must ensure that `imem` is immutable and ready for conversion. + */ + void build_sstable(shared_ptr imem); + + /** + * @brief Retrieves the file path for a given SSTable. + * + * @param sstable_id The unique identifier of the SSTable whose path needs to be retrieved. + * @return A string representing the full file path of the SSTable. + */ + string get_sstable_path(uint64_t sstable_id); + + ObLsmOptions options_; + string path_; + mutex mu_; + shared_ptr mem_table_; + vector> imem_tables_; + SSTablesPtr sstables_; + common::ThreadPoolExecutor executor_; + atomic seq_{0}; + atomic sstable_id_{0}; + condition_variable cv_; + const ObDefaultComparator default_comparator_; + atomic compacting_ = false; + std::unique_ptr>> block_cache_; +}; + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/ob_manifest.cpp b/src/oblsm/ob_manifest.cpp new file mode 100644 index 000000000..0d6a8eb5a --- /dev/null +++ b/src/oblsm/ob_manifest.cpp @@ -0,0 +1,15 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/ob_manifest.h" +#include "common/log/log.h" +#include "oblsm/util/ob_file_writer.h" + +namespace oceanbase {} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/ob_manifest.h b/src/oblsm/ob_manifest.h new file mode 100644 index 000000000..f5a20986f --- /dev/null +++ b/src/oblsm/ob_manifest.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +namespace oceanbase { + +class ObManifestRecord +{}; + +class ObManifestSnapshot +{}; +class ObManifest +{ +public: +}; + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/ob_user_iterator.cpp b/src/oblsm/ob_user_iterator.cpp new file mode 100644 index 000000000..3f08aa652 --- /dev/null +++ b/src/oblsm/ob_user_iterator.cpp @@ -0,0 +1,57 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "common/lang/memory.h" +#include "oblsm/ob_user_iterator.h" +#include "oblsm/include/ob_lsm_iterator.h" +#include "oblsm/util/ob_comparator.h" +#include "oblsm/ob_lsm_define.h" +#include "oblsm/util/ob_coding.h" + +namespace oceanbase { + +// a simple warpper for internal iterator +class ObUserIterator : public ObLsmIterator +{ +public: + ObUserIterator(ObLsmIterator *iter, uint64_t seq) : iter_(iter), seq_(seq) {} + + ~ObUserIterator() = default; + + bool valid() const override { return iter_->valid(); } + + void seek_to_first() override { iter_->seek_to_first(); } + + void seek_to_last() override { iter_->seek_to_last(); } + + void seek(const string_view &target) override + { + put_numeric(&lookup_key_, target.size() + SEQ_SIZE); + lookup_key_.append(target.data(), target.size()); + put_numeric(&lookup_key_, seq_); + iter_->seek(string_view(lookup_key_.data(), lookup_key_.size())); + } + + void next() override { iter_->next(); } + + string_view key() const override { return extract_user_key(iter_->key()); } + + string_view value() const override { return iter_->value(); } + +private: + // internal iterator, the key is internal key + unique_ptr iter_; + uint64_t seq_; + string lookup_key_; +}; + +ObLsmIterator *new_user_iterator(ObLsmIterator *iter, uint64_t seq) { return new ObUserIterator(iter, seq); } + +} // namespace oceanbase diff --git a/src/oblsm/ob_user_iterator.h b/src/oblsm/ob_user_iterator.h new file mode 100644 index 000000000..94348ac63 --- /dev/null +++ b/src/oblsm/ob_user_iterator.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +namespace oceanbase { + +class ObComparator; +class ObLsmIterator; + +/** + * @brief Creates a new user iterator wrapping the given LSM iterator. + * + * @detail This function takes an existing LSM iterator (`ObLsmIterator`) and wraps it to create + * a user-level iterator. The new iterator is initialized with a specific sequence number (`seq`), + * which determines the context or version visibility of the iterator. + * + * @param iterator The original `ObLsmIterator` to be wrapped. + * @param seq The sequence number to associate with the new user iterator. + * + * @return A pointer to the newly created `ObLsmIterator` instance that acts as a user iterator. + * + * @note The caller is responsible for managing the memory of the returned iterator and + * ensuring that it is properly deleted after use to prevent memory leaks. + * + * @warning Passing a `nullptr` as the `iterator` parameter will result in undefined behavior. + * Ensure that a valid iterator is provided before calling this function. + */ +ObLsmIterator *new_user_iterator(ObLsmIterator *iterator, uint64_t seq); + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/table/ob_block.cpp b/src/oblsm/table/ob_block.cpp new file mode 100644 index 000000000..6d01d27c4 --- /dev/null +++ b/src/oblsm/table/ob_block.cpp @@ -0,0 +1,105 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/table/ob_block.h" +#include "oblsm/util/ob_coding.h" + +namespace oceanbase { + +RC ObBlock::decode(const string &data) +{ + return RC::UNIMPLEMENTED; +} + +RC ObBlock::get(const string_view &key, string *value) +{ + RC rc = RC::NOT_EXIST; + for (size_t i = 0; i < offsets_.size(); i++) { + uint32_t curr_begin = offsets_[i]; + uint32_t curr_end = i == offsets_.size() - 1 ? data_.size() : offsets_[i + 1]; + string_view curr = string_view(data_.data() + curr_begin, curr_end - curr_begin); + // TODO: parse key and value + // const char * data_ptr = curr.data(); + uint32_t key_size = get_numeric(curr.data()); + string_view curr_key = string_view(curr.data() + sizeof(uint32_t), key_size); + uint32_t value_size = get_numeric(curr.data() + sizeof(uint32_t) + key_size); + string_view val = string_view(curr.data() + 2 * sizeof(uint32_t) + key_size, value_size); + // TODO: here key use lookup key + if (comparator_->compare(extract_user_key(curr_key), extract_user_key_from_lookup_key(key)) == 0) { + comparator_->compare(extract_user_key(curr_key), extract_user_key_from_lookup_key(key)); + *value = val; + rc = RC::SUCCESS; + break; + } + } + return rc; +} + +string_view ObBlock::get_entry(uint32_t offset) const +{ + uint32_t curr_begin = offsets_[offset]; + uint32_t curr_end = offset == offsets_.size() - 1 ? data_.size() : offsets_[offset + 1]; + string_view curr = string_view(data_.data() + curr_begin, curr_end - curr_begin); + return curr; +} + +ObLsmIterator *ObBlock::new_iterator() const { return new BlockIterator(comparator_, this, size()); } + +void BlockIterator::parse_entry() +{ + curr_entry_ = data_->get_entry(index_); + uint32_t key_size = get_numeric(curr_entry_.data()); + key_ = string_view(curr_entry_.data() + sizeof(uint32_t), key_size); + uint32_t value_size = get_numeric(curr_entry_.data() + sizeof(uint32_t) + key_size); + value_ = string_view(curr_entry_.data() + 2 * sizeof(uint32_t) + key_size, value_size); +} + +string BlockMeta::encode() const +{ + string ret; + put_numeric(&ret, first_key_.size()); + ret.append(first_key_); + put_numeric(&ret, last_key_.size()); + ret.append(last_key_); + put_numeric(&ret, offset_); + put_numeric(&ret, size_); + return ret; +} + +RC BlockMeta::decode(const string &data) +{ + RC rc = RC::SUCCESS; + const char *data_ptr = data.c_str(); + uint32_t first_key_size = get_numeric(data_ptr); + data_ptr += sizeof(uint32_t); + first_key_.assign(data_ptr, first_key_size); + data_ptr += first_key_size; + uint32_t last_key_size = get_numeric(data_ptr); + data_ptr += sizeof(uint32_t); + last_key_.assign(data_ptr, last_key_size); + data_ptr += last_key_size; + offset_ = get_numeric(data_ptr); + data_ptr += sizeof(uint32_t); + size_ = get_numeric(data_ptr); + return rc; +} + +void BlockIterator::seek(const string_view &lookup_key) +{ + index_ = 0; + while(valid()) { + parse_entry(); + if (comparator_->compare(extract_user_key(key_), extract_user_key_from_lookup_key(lookup_key)) >= 0) { + break; + } + index_++; + } +} +} // namespace oceanbase diff --git a/src/oblsm/table/ob_block.h b/src/oblsm/table/ob_block.h new file mode 100644 index 000000000..26ef3fb4c --- /dev/null +++ b/src/oblsm/table/ob_block.h @@ -0,0 +1,149 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/string.h" +#include "common/lang/vector.h" +#include "oblsm/include/ob_lsm_iterator.h" +#include "oblsm/util/ob_comparator.h" + +namespace oceanbase { + +// TODO: block align to 4KB +// ┌─────────────────┐ +// │ entry 1 │◄───┐ +// ├─────────────────┤ │ +// │ entry 2 │ │ +// ├─────────────────┤ │ +// │ .. │ │ +// ├─────────────────┤ │ +// │ entry n │◄─┐ │ +// ├─────────────────┤ │ │ +// ┌───►│ offset size(n) │ │ │ +// │ ├─────────────────┤ │ │ +// │ │ offset 1 ├──┼─┘ +// │ ├─────────────────┤ │ +// │ │ .. │ │ +// │ ├─────────────────┤ │ +// │ │ offset n ├──┘ +// │ ├─────────────────┤ +// └────┤ offset start │ +// └─────────────────┘ +/** + * @class ObBlock + * @brief Represents a data block in the LSM-Tree. + * + * The `ObBlock` class manages a block of serialized key-value pairs, along with + * their offsets, for efficient storage and retrieval. It provides methods to decode + * serialized data, access individual entries, and create iterators for traversing + * the block contents. + */ +class ObBlock +{ + +public: + ObBlock(const ObComparator *comparator) : comparator_(comparator) {} + + void add_offset(uint32_t offset) { offsets_.push_back(offset); } + + uint32_t get_offset(int index) const { return offsets_[index]; } + + string_view get_entry(uint32_t offset) const; + + int size() const { return offsets_.size(); } + + /** + * @brief Decodes serialized block data. + * + * This function parses and decodes the serialized string data to reconstruct + * the block's structure, including all key-value offsets and entries. + * The decoded data format can reference ObBlockBuilder. + * @param data The serialized block data as a string. + * @return RC The result code indicating the success or failure of the decode operation. + */ + RC decode(const string &data); + + RC get(const string_view &key, string *value); + + ObLsmIterator *new_iterator() const; + +private: + string data_; + vector offsets_; + // TODO: remove + const ObComparator *comparator_; +}; + +class BlockIterator : public ObLsmIterator +{ +public: + BlockIterator(const ObComparator *comparator, const ObBlock *data, uint32_t count) + : comparator_(comparator), data_(data), count_(count) + {} + BlockIterator(const BlockIterator &) = delete; + BlockIterator &operator=(const BlockIterator &) = delete; + + ~BlockIterator() override = default; + + void seek(const string_view &lookup_key) override; + void seek_to_first() override + { + index_ = 0; + parse_entry(); + } + void seek_to_last() override + { + index_ = count_ - 1; + parse_entry(); + } + + bool valid() const override { return index_ < count_; } + void next() override + { + index_++; + if (valid()) { + parse_entry(); + } + } + string_view key() const override { return key_; }; + string_view value() const override { return value_; } + +private: + void parse_entry(); + +private: + const ObComparator *comparator_; + const ObBlock *const data_; + string_view curr_entry_; + string_view key_; + string_view value_; + uint32_t count_ = 0; + uint32_t index_ = 0; +}; + +class BlockMeta +{ +public: + BlockMeta() {} + BlockMeta(const string &first_key, const string &last_key, uint32_t offset, uint32_t size) + : first_key_(first_key), last_key_(last_key), offset_(offset), size_(size) + {} + string encode() const; + RC decode(const string &data); + + string first_key_; + string last_key_; + + // Offset of ObBlock in SSTable + uint32_t offset_; + uint32_t size_; +}; +} // namespace oceanbase diff --git a/src/oblsm/table/ob_block_builder.cpp b/src/oblsm/table/ob_block_builder.cpp new file mode 100644 index 000000000..72c3712e7 --- /dev/null +++ b/src/oblsm/table/ob_block_builder.cpp @@ -0,0 +1,63 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/table/ob_block_builder.h" +#include "oblsm/util/ob_coding.h" +#include "common/log/log.h" + +namespace oceanbase { + +void ObBlockBuilder::reset() +{ + offsets_.clear(); + data_.clear(); + // first_key_.clear(); +} + +RC ObBlockBuilder::add(const string_view &key, const string_view &value) +{ + RC rc = RC::SUCCESS; + if (appro_size() + key.size() + value.size() + 2 * sizeof(uint32_t) > BLOCK_SIZE) { + // TODO: support large kv pair. + if (offsets_.empty()) { + LOG_ERROR("block is empty, but kv pair is too large, key size: %lu, value size: %lu", key.size(), value.size()); + return RC::UNIMPLEMENTED; + } + LOG_WARN("block is full, can't add more kv pair"); + rc = RC::FULL; + } else { + offsets_.push_back(data_.size()); + put_numeric(&data_, key.size()); + data_.append(key.data(), key.size()); + put_numeric(&data_, value.size()); + data_.append(value.data(), value.size()); + } + return rc; +} + +string ObBlockBuilder::last_key() const +{ + string_view last_kv(data_.data() + offsets_.back(), data_.size() - offsets_.back()); + uint32_t key_length = get_numeric(last_kv.data()); + return string(last_kv.data() + sizeof(uint32_t), key_length); +} + +string_view ObBlockBuilder::finish() +{ + uint32_t data_size = data_.size(); + put_numeric(&data_, offsets_.size()); + for (size_t i = 0; i < offsets_.size(); i++) { + put_numeric(&data_, offsets_[i]); + } + put_numeric(&data_, data_size); + return string_view(data_.data(), data_.size()); +} + +} // namespace oceanbase diff --git a/src/oblsm/table/ob_block_builder.h b/src/oblsm/table/ob_block_builder.h new file mode 100644 index 000000000..9e536bed6 --- /dev/null +++ b/src/oblsm/table/ob_block_builder.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/string.h" +#include "common/lang/string_view.h" +#include "common/lang/vector.h" +#include "common/sys/rc.h" + +namespace oceanbase { + +/** + * @brief Build a ObBlock in SSTable + */ +class ObBlockBuilder +{ + +public: + RC add(const string_view &key, const string_view &value); + + string_view finish(); + + void reset(); + + string last_key() const; + + uint32_t appro_size() { return data_.size() + offsets_.size() * sizeof(uint32_t); } + +private: + static const uint32_t BLOCK_SIZE = 4 * 1024; // 4KB + // Offsets of key-value pairs. + vector offsets_; + // key-value pairs + // TODO: use block as data container + // TODO: add checksum + string data_; + + // string first_key_; +}; + +} // namespace oceanbase diff --git a/src/oblsm/table/ob_merger.cpp b/src/oblsm/table/ob_merger.cpp new file mode 100644 index 000000000..4979cc500 --- /dev/null +++ b/src/oblsm/table/ob_merger.cpp @@ -0,0 +1,120 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/table/ob_merger.h" +#include "oblsm/include/ob_lsm_iterator.h" +#include "oblsm/util/ob_comparator.h" +#include "oblsm/ob_lsm_define.h" +#include "oblsm/util/ob_coding.h" + +namespace oceanbase { + +class ObMergingIterator : public ObLsmIterator +{ +public: + ObMergingIterator(const ObComparator *comparator, vector> &&children) + : comparator_(comparator), children_(std::move(children)), current_(nullptr) + {} + + ~ObMergingIterator() = default; + + bool valid() const override { return current_ != nullptr; } + + void seek_to_first() override + { + for (size_t i = 0; i < children_.size(); i++) { + children_[i]->seek_to_first(); + } + find_smallest(); + } + + void seek_to_last() override + { + for (size_t i = 0; i < children_.size(); i++) { + children_[i]->seek_to_last(); + } + find_largest(); + } + + void seek(const string_view &target) override + { + for (size_t i = 0; i < children_.size(); i++) { + children_[i]->seek(target); + } + find_smallest(); + } + + void next() override + { + current_->next(); + find_smallest(); + } + + string_view key() const override { return current_->key(); } + + string_view value() const override { return current_->value(); } + +private: + void find_smallest(); + void find_largest(); + + // We might want to use a heap in case there are lots of children. + // For now we use a simple array since we expect a very small number + // of children. + const ObComparator * comparator_; + vector> children_; + ObLsmIterator * current_; +}; + +void ObMergingIterator::find_smallest() +{ + ObLsmIterator *smallest = nullptr; + for (size_t i = 0; i < children_.size(); i++) { + ObLsmIterator *child = children_[i].get(); + if (child->valid()) { + if (smallest == nullptr) { + smallest = child; + } else if (comparator_->compare(extract_user_key(child->key()), extract_user_key(smallest->key())) < 0) { + smallest = child; + } + } + } + current_ = smallest; +} + +void ObMergingIterator::find_largest() +{ + ObLsmIterator *largest = nullptr; + for (size_t i = 0; i < children_.size(); i++) { + ObLsmIterator *child = children_[i].get(); + if (child->valid()) { + if (largest == nullptr) { + largest = child; + } else if (comparator_->compare(extract_user_key(child->key()), extract_user_key(largest->key())) > 0) { + largest = child; + } + } + } + current_ = largest; +} + +ObLsmIterator *new_merging_iterator(const ObComparator *comparator, vector> &&children) +{ + if (children.size() == 0) { + return nullptr; + } else if (children.size() == 1) { + return children[0].release(); + } else { + return new ObMergingIterator(comparator, std::move(children)); + } + return nullptr; +} + +} // namespace oceanbase diff --git a/src/oblsm/table/ob_merger.h b/src/oblsm/table/ob_merger.h new file mode 100644 index 000000000..7c577a74d --- /dev/null +++ b/src/oblsm/table/ob_merger.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/vector.h" +#include "common/lang/memory.h" + +namespace oceanbase { + +class ObComparator; +class ObLsmIterator; + +/** + * @brief Return an iterator that provided the union of the data in + * children. For example, an iterator that provided + * the union of memtable and sstable. + * + */ +ObLsmIterator *new_merging_iterator(const ObComparator *comparator, vector> &&children); + +} // namespace oceanbase diff --git a/src/oblsm/table/ob_sstable.cpp b/src/oblsm/table/ob_sstable.cpp new file mode 100644 index 000000000..270365992 --- /dev/null +++ b/src/oblsm/table/ob_sstable.cpp @@ -0,0 +1,110 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/table/ob_sstable.h" +#include "oblsm/util/ob_coding.h" +#include "common/log/log.h" +#include "common/lang/filesystem.h" +namespace oceanbase { + +void ObSSTable::init() +{ + // your code here +} + +RC ObSSTable::get(const string_view &lookup_key, string *value) +{ + RC rc = RC::SUCCESS; + int i = 0; + // TODO: binary search and compare in BlockMeta + for (const auto &meta : block_metas_) { + if (comparator_->compare(extract_user_key(meta.first_key_), extract_user_key_from_lookup_key(lookup_key)) <= 0 && + comparator_->compare(extract_user_key(meta.last_key_), extract_user_key_from_lookup_key(lookup_key)) >= 0) { + auto block = read_block_with_cache(i); + if (block != nullptr) { + rc = block->get(lookup_key, value); + if (rc == RC::SUCCESS) { + return rc; + } + } else { + rc = RC::INTERNAL; + } + } + i++; + } + return RC::NOT_EXIST; +} + +shared_ptr ObSSTable::read_block_with_cache(uint32_t block_idx) const +{ + // your code here + return nullptr; +} + +shared_ptr ObSSTable::read_block(uint32_t block_idx) const +{ + // your code here + return nullptr; +} + +void ObSSTable::remove() { filesystem::remove(file_name_); } + +ObLsmIterator *ObSSTable::new_iterator() { return new TableIterator(get_shared_ptr()); } + +void TableIterator::read_block_with_cache() +{ + block_ = sst_->read_block_with_cache(curr_block_idx_); + block_iterator_.reset(block_->new_iterator()); +} + +void TableIterator::seek_to_first() +{ + curr_block_idx_ = 0; + read_block_with_cache(); + block_iterator_->seek_to_first(); +} + +void TableIterator::seek_to_last() +{ + curr_block_idx_ = block_cnt_ - 1; + read_block_with_cache(); + block_iterator_->seek_to_last(); +} + +void TableIterator::next() +{ + block_iterator_->next(); + if (block_iterator_->valid()) { + } else if (curr_block_idx_ < block_cnt_ - 1) { + curr_block_idx_++; + read_block_with_cache(); + block_iterator_->seek_to_first(); + } +} + +void TableIterator::seek(const string_view &lookup_key) +{ + curr_block_idx_ = 0; + for (; curr_block_idx_ < block_cnt_; curr_block_idx_++) { + const auto block_meta = sst_->block_meta(curr_block_idx_); + if (sst_->comparator()->compare(extract_user_key(block_meta.first_key_), extract_user_key_from_lookup_key(lookup_key)) <= 0 && + sst_->comparator()->compare(extract_user_key(block_meta.last_key_), extract_user_key_from_lookup_key(lookup_key)) >= 0) { + break; + } + } + if (curr_block_idx_ == block_cnt_) { + block_iterator_ = nullptr; + return; + } + read_block_with_cache(); + block_iterator_->seek(lookup_key); +}; + +} // namespace oceanbase diff --git a/src/oblsm/table/ob_sstable.h b/src/oblsm/table/ob_sstable.h new file mode 100644 index 000000000..0bdcc489b --- /dev/null +++ b/src/oblsm/table/ob_sstable.h @@ -0,0 +1,166 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "oblsm/util/ob_file_reader.h" +#include "common/lang/memory.h" +#include "common/sys/rc.h" +#include "oblsm/table/ob_block.h" +#include "oblsm/util/ob_comparator.h" +#include "oblsm/util/ob_lru_cache.h" + +namespace oceanbase { + +// TODO: add a dumptool to dump sst files(example for usage: ./dumptool sst_file) +// ┌─────────────────┐ +// │ block 1 │◄───┐ +// ├─────────────────┤ │ +// │ block 2 │ │ +// ├─────────────────┤ │ +// │ .. │ │ +// ├─────────────────┤ │ +// │ block n │◄─┐ │ +// ├─────────────────┤ │ │ +// ┌───►│ meta size(n) │ │ │ +// │ ├─────────────────┤ │ │ +// │ │ block meta 1 ├──┼─┘ +// │ ├─────────────────┤ │ +// │ │ .. │ │ +// │ ├─────────────────┤ │ +// │ │ block meta n ├──┘ +// │ ├─────────────────┤ +// └────┤ │ +// └─────────────────┘ + +/** + * @class ObSSTable + * @brief Represents an SSTable (Sorted String Table) in the LSM-Tree. + * + * The `ObSSTable` class is responsible for managing on-disk sorted string tables (SSTables). + * It provides methods for initialization, key-value lookups, block reading (with caching support), + * and creating iterators for traversal. Each SSTable is uniquely identified by an `sst_id_` and + * interacts with the LRU cache for efficient block access. + */ +class ObSSTable : public enable_shared_from_this +{ +public: + /** + * @brief Constructor for ObSSTable. + * + * Initializes an SSTable with its unique ID, file name, comparator, and block cache. + * + * @param sst_id A unique identifier for the SSTable. + * @param file_name The name of the file storing the SSTable data. + * @param comparator A pointer to the comparator used for key comparison. + * @param block_cache A pointer to the LRU block cache for caching block-level data. + */ + ObSSTable(uint32_t sst_id, const string &file_name, const ObComparator *comparator, + ObLRUCache> *block_cache) + : sst_id_(sst_id), + file_name_(file_name), + comparator_(comparator), + file_reader_(nullptr), + block_cache_(block_cache) + {} + + ~ObSSTable() = default; + + /** + * @brief Initializes the SSTable instance. + * + * This function is responsible for performing setup tasks required for the SSTable, + * such as preparing file readers or pre-loading block_metas_. + * + * @warning This function must be called before performing any operations on the SSTable. + */ + void init(); + + RC get(const string_view &lookup_key, string *value); + + uint32_t sst_id() const { return sst_id_; } + + shared_ptr get_shared_ptr() { return shared_from_this(); } + + ObLsmIterator *new_iterator(); + + /** + * @brief Reads a block from the SSTable using the block cache. + * + * Attempts to read the specified block using the block cache. If the block is not + * in the cache, it will load the block from the SSTable file and update the cache. + * + * @param block_idx The index of the block to read. + * + * @return shared_ptr A shared pointer to the requested block. + */ + shared_ptr read_block_with_cache(uint32_t block_idx) const; + + /** + * @brief Reads a block directly from the SSTable file. + * + * This function bypasses the block cache and directly reads the requested block + * from the SSTable file. + * + * @param block_idx The index of the block to read. + * + * @return shared_ptr A shared pointer to the requested block. + */ + shared_ptr read_block(uint32_t block_idx) const; + + uint32_t block_count() const { return block_metas_.size(); } + + uint32_t size() const { return file_reader_->file_size(); } + + const BlockMeta block_meta(int i) const { return block_metas_[i]; } + + const ObComparator *comparator() const { return comparator_; } + + void remove(); + string first_key() const { return block_metas_.empty() ? "" : block_metas_[0].first_key_; } + string last_key() const { return block_metas_.empty() ? "" : block_metas_.back().last_key_; } + +private: + uint32_t sst_id_; + string file_name_; + const ObComparator *comparator_ = nullptr; + unique_ptr file_reader_; + vector block_metas_; + + [[maybe_unused]] ObLRUCache> *block_cache_; +}; + +class TableIterator : public ObLsmIterator +{ +public: + TableIterator(const shared_ptr &sst) : sst_(sst), block_cnt_(sst->block_count()) {} + ~TableIterator() = default; + + void seek(const string_view &key) override; + void seek_to_first() override; + void seek_to_last() override; + void next() override; + bool valid() const override { return block_iterator_ != nullptr && block_iterator_->valid(); } + string_view key() const override { return block_iterator_->key(); } + string_view value() const override { return block_iterator_->value(); } + +private: + void read_block_with_cache(); + + const shared_ptr sst_; + uint32_t block_cnt_ = 0; + uint32_t curr_block_idx_ = 0; + shared_ptr block_; + unique_ptr block_iterator_; +}; + +using SSTablesPtr = shared_ptr>>>; + +} // namespace oceanbase diff --git a/src/oblsm/table/ob_sstable_builder.cpp b/src/oblsm/table/ob_sstable_builder.cpp new file mode 100644 index 000000000..468214ca6 --- /dev/null +++ b/src/oblsm/table/ob_sstable_builder.cpp @@ -0,0 +1,53 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/table/ob_sstable_builder.h" +#include "oblsm/util/ob_coding.h" + +namespace oceanbase { + +// TODO: refactor build with mem_table/iterator logic. +RC ObSSTableBuilder::build(shared_ptr mem_table, const std::string &file_name, uint32_t sst_id) +{ + return RC::UNIMPLEMENTED; +} + +void ObSSTableBuilder::finish_build_block() +{ + string last_key = block_builder_.last_key(); + string_view block_contents = block_builder_.finish(); + file_writer_->write(block_contents); + block_metas_.push_back(BlockMeta(curr_blk_first_key_, last_key, curr_offset_, block_contents.size())); + // TODO: block aligned to BLOCK_SIZE + curr_offset_ += block_contents.size(); + block_builder_.reset(); +} + +shared_ptr ObSSTableBuilder::get_built_table() +{ + // TODO: sstable should have more metadata + shared_ptr sstable = make_shared(sst_id_, file_writer_->file_name(), comparator_, block_cache_); + sstable->init(); + return sstable; +} + +void ObSSTableBuilder::reset() +{ + block_builder_.reset(); + curr_blk_first_key_.clear(); + if (file_writer_ != nullptr) { + file_writer_.reset(nullptr); + } + block_metas_.clear(); + curr_offset_ = 0; + sst_id_ = 0; + file_size_ = 0; +} +} // namespace oceanbase diff --git a/src/oblsm/table/ob_sstable_builder.h b/src/oblsm/table/ob_sstable_builder.h new file mode 100644 index 000000000..3e21bfb39 --- /dev/null +++ b/src/oblsm/table/ob_sstable_builder.h @@ -0,0 +1,67 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/memory.h" +#include "oblsm/table/ob_block_builder.h" +#include "oblsm/memtable/ob_memtable.h" +#include "common/lang/string.h" +#include "oblsm/util/ob_file_writer.h" +#include "oblsm/table/ob_block.h" +#include "oblsm/table/ob_sstable.h" +#include "oblsm/util/ob_lru_cache.h" + +namespace oceanbase { + +/** + * @brief Build a SSTable + */ +class ObSSTableBuilder +{ +public: + ObSSTableBuilder(const ObComparator *comparator, ObLRUCache> *block_cache) + : comparator_(comparator), block_cache_(block_cache) + {} + ~ObSSTableBuilder() = default; + + /** + * @brief Builds an SSTable from the provided in-memory table and stores it in a file. + * + * This function takes an `ObMemTable` as input, partitions the data into blocks, + * serializes the blocks, and writes them into an SSTable file. + * + * @param mem_table A shared pointer to the `ObMemTable` containing the data to be written into the SSTable. + * @param file_name The name of the file where the constructed SSTable will be stored. + * @param sst_id A unique identifier assigned to the created SSTable. + * + * @return RC A result code indicating the success or failure of the SSTable creation process. + * + */ + RC build(shared_ptr mem_table, const string &file_name, uint32_t sst_id); + size_t file_size() const { return file_size_; } + shared_ptr get_built_table(); + void reset(); + +private: + void finish_build_block(); + + const ObComparator *comparator_ = nullptr; + ObBlockBuilder block_builder_; + string curr_blk_first_key_; + unique_ptr file_writer_; + vector block_metas_; + uint32_t curr_offset_ = 0; + uint32_t sst_id_ = 0; + size_t file_size_ = 0; + + ObLRUCache> *block_cache_ = nullptr; +}; +} // namespace oceanbase diff --git a/src/oblsm/util/ob_arena.cpp b/src/oblsm/util/ob_arena.cpp new file mode 100644 index 000000000..3baef0b09 --- /dev/null +++ b/src/oblsm/util/ob_arena.cpp @@ -0,0 +1,18 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "oblsm/util/ob_arena.h" + +namespace oceanbase { + +ObArena::ObArena() : memory_usage_(0) {} + +ObArena::~ObArena() +{ + for (size_t i = 0; i < blocks_.size(); i++) { + delete[] blocks_[i]; + } +} + +} // namespace oceanbase diff --git a/src/oblsm/util/ob_arena.h b/src/oblsm/util/ob_arena.h new file mode 100644 index 000000000..41cf24d2b --- /dev/null +++ b/src/oblsm/util/ob_arena.h @@ -0,0 +1,58 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include +#include "common/lang/atomic.h" +#include "common/lang/vector.h" + +namespace oceanbase { + +/** + * @brief a simple memory allocator. + * @todo optimize fractional memory allocation + * @note 1. alloc memory from arena, no need to free it. + * 2. not thread-safe. + */ +class ObArena +{ +public: + ObArena(); + + ObArena(const ObArena &) = delete; + ObArena &operator=(const ObArena &) = delete; + + ~ObArena(); + + char *alloc(size_t bytes); + + size_t memory_usage() const { return memory_usage_; } + +private: + // Array of new[] allocated memory blocks + vector blocks_; + + // Total memory usage of the arena. + size_t memory_usage_; +}; + +inline char *ObArena::alloc(size_t bytes) +{ + if (bytes <= 0) { + return nullptr; + } + char *result = new char[bytes]; + blocks_.push_back(result); + memory_usage_ += bytes + sizeof(char *); + return result; +} + +} // namespace oceanbase diff --git a/src/oblsm/util/ob_bloomfilter.cpp b/src/oblsm/util/ob_bloomfilter.cpp index ebec65e55..604548397 100644 --- a/src/oblsm/util/ob_bloomfilter.cpp +++ b/src/oblsm/util/ob_bloomfilter.cpp @@ -8,4 +8,6 @@ EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more details. */ -#include "oblsm/util/ob_bloomfilter.h" \ No newline at end of file +#include "oblsm/util/ob_bloomfilter.h" + +namespace oceanbase {} // namespace oceanbase diff --git a/src/oblsm/util/ob_coding.h b/src/oblsm/util/ob_coding.h new file mode 100644 index 000000000..09d380efc --- /dev/null +++ b/src/oblsm/util/ob_coding.h @@ -0,0 +1,112 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/string.h" + +namespace oceanbase { + +static const uint8_t SEQ_SIZE = 8; +static const uint8_t LOOKUP_KEY_PREFIX_SIZE = 8; + +/** + * @brief Appends a numeric value to a string in binary format. + * + * This template function takes a numeric value of any type and appends its binary + * representation to the specified string. + * + * @tparam T The numeric type (e.g., `int`, `uint64_t`, `float`). + * @param dst A pointer to the string to which the numeric value will be appended. + * @param v The numeric value to append. + */ +template +void put_numeric(string *dst, T v) +{ + dst->append(reinterpret_cast(&v), sizeof(T)); +} + +/** + * @brief Extracts a numeric value from a binary data source. + * + * This template function reads a numeric value of any type from the provided + * binary data source and returns it. + * + * @tparam T The numeric type to extract (e.g., `int`, `uint64_t`, `float`). + * @param src A pointer to the source binary data from which the numeric value will be read. + * @return The extracted numeric value of type `T`. + */ +template +T get_numeric(const char *src) +{ + T value; + memcpy(&value, src, sizeof(T)); + return value; +} + +/** + * @brief Extracts the user key portion from an internal key. + * + * An internal key in the LSM-Tree typically contains additional metadata such as + * a sequence number at the end. This function removes the sequence number portion + * and returns the user key portion. + * + * @param internal_key The internal key to extract the user key from. + * @return A `string_view` representing the user key portion of the internal key. + */ +inline string_view extract_user_key(const string_view &internal_key) +{ + return string_view(internal_key.data(), internal_key.size() - SEQ_SIZE); +} + +/** + * @brief Extracts the sequence number from an internal key. + * + * The sequence number is usually stored at the end of the internal key in + * binary format. This function retrieves and returns the sequence number. + * + * @param internal_key The internal key to extract the sequence number from. + * @return The extracted sequence number as a `uint64_t`. + */ +inline uint64_t extract_sequence(const string_view &internal_key) +{ + return get_numeric(internal_key.data() + internal_key.size() - SEQ_SIZE); +} + +/** + * @brief Computes the size of the user key from a lookup key. + * + * A lookup key typically contains a prefix and a sequence number in addition + * to the user key. This function calculates and returns the size of the user + * key portion. + * + * @param lookup_key The lookup key to analyze. + * @return The size of the user key portion in bytes. + */ +inline size_t user_key_size_from_lookup_key(const string_view &lookup_key) +{ + return lookup_key.size() - SEQ_SIZE - LOOKUP_KEY_PREFIX_SIZE; +} + +/** + * @brief Extracts the user key from a lookup key. + * + * A lookup key in the LSM-Tree contains a prefix, user key, and sequence + * number. This function extracts and returns the user key portion. + * + * @param lookup_key The lookup key to extract the user key from. + * @return A `string_view` representing the user key portion of the lookup key. + */ +inline string_view extract_user_key_from_lookup_key(const string_view &lookup_key) +{ + return string_view(lookup_key.data() + LOOKUP_KEY_PREFIX_SIZE, user_key_size_from_lookup_key(lookup_key)); +} + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/util/ob_comparator.cpp b/src/oblsm/util/ob_comparator.cpp new file mode 100644 index 000000000..3f0fb1d08 --- /dev/null +++ b/src/oblsm/util/ob_comparator.cpp @@ -0,0 +1,35 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/util/ob_comparator.h" +#include "oblsm/ob_lsm_define.h" +#include "oblsm/util/ob_coding.h" + +namespace oceanbase { +int ObDefaultComparator::compare(const string_view &a, const string_view &b) const { return a.compare(b); } + +int ObInternalKeyComparator::compare(const string_view &a, const string_view &b) const +{ + const string_view akey = extract_user_key(a); + const string_view bkey = extract_user_key(b); + int r = default_comparator_.compare(akey, bkey); + if (r == 0) { + uint64_t aseq = get_numeric(akey.data() + a.size() - SEQ_SIZE); + uint64_t bseq = get_numeric(bkey.data() + b.size() - SEQ_SIZE); + if (aseq > bseq) { + r = -1; + } else if (aseq < bseq) { + r = +1; + } + } + return r; +} + +} // namespace oceanbase \ No newline at end of file diff --git a/src/oblsm/util/ob_comparator.h b/src/oblsm/util/ob_comparator.h new file mode 100644 index 000000000..8f2a37e79 --- /dev/null +++ b/src/oblsm/util/ob_comparator.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/string_view.h" + +namespace oceanbase { + +/** + * @brief base class of all comparators + */ +class ObComparator +{ +public: + virtual ~ObComparator() = default; + + /** + * @brief Three-way comparison. + * @return < 0 iff "a" < "b", + * @return == 0 iff "a" == "b", + * @return > 0 iff "a" > "b" + */ + virtual int compare(const string_view &a, const string_view &b) const = 0; +}; + +/** + * @brief comparator with lexicographical order + */ +class ObDefaultComparator : public ObComparator +{ +public: + explicit ObDefaultComparator() = default; + int compare(const string_view &a, const string_view &b) const override; +}; + +/** + * @brief internal key comparator + * @details internal key: | key_size(8B) | key | sequence_number(8B) | + */ +class ObInternalKeyComparator : public ObComparator +{ +public: + explicit ObInternalKeyComparator() = default; + + int compare(const string_view &a, const string_view &b) const override; + const ObComparator *user_comparator() const { return &default_comparator_; } + +private: + ObDefaultComparator default_comparator_; +}; + +} // namespace oceanbase diff --git a/src/oblsm/util/ob_file_reader.cpp b/src/oblsm/util/ob_file_reader.cpp new file mode 100644 index 000000000..fed7baf8e --- /dev/null +++ b/src/oblsm/util/ob_file_reader.cpp @@ -0,0 +1,66 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/util/ob_file_reader.h" +#include +#include +#include "common/lang/filesystem.h" + +#include "common/log/log.h" + +namespace oceanbase { + +ObFileReader::~ObFileReader() { close_file(); } + +string ObFileReader::read_pos(uint32_t pos, uint32_t size) +{ + string buf; + buf.resize(size); + ssize_t read_size = ::pread(fd_, buf.data(), size, static_cast(pos)); + if (read_size != size) { + LOG_WARN("Failed to read file %s, read_size=%ld, size=%ld", filename_.c_str(), read_size, size); + return ""; + } + + return buf; +} + +uint32_t ObFileReader::file_size() +{ + return filesystem::file_size(filename_); +} + +unique_ptr ObFileReader::create_file_reader(const string &filename) +{ + unique_ptr reader(new ObFileReader(filename)); + if (OB_FAIL(reader->open_file())) { + LOG_WARN("Failed to open file %s", filename.c_str()); + return nullptr; + } + return reader; +} + +RC ObFileReader::open_file() +{ + RC rc = RC::SUCCESS; + fd_ = ::open(filename_.c_str(), O_RDONLY); + if (fd_ < 0) { + LOG_WARN("Failed to open file %s", filename_.c_str()); + rc = RC::INTERNAL; + } + return rc; +} + +void ObFileReader::close_file() +{ + ::close(fd_); +} + +} // namespace oceanbase diff --git a/src/oblsm/util/ob_file_reader.h b/src/oblsm/util/ob_file_reader.h new file mode 100644 index 000000000..8a9471789 --- /dev/null +++ b/src/oblsm/util/ob_file_reader.h @@ -0,0 +1,114 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/fstream.h" +#include "common/lang/memory.h" +#include "common/lang/sstream.h" +#include "common/lang/string.h" +#include "common/sys/rc.h" +#include "common/lang/mutex.h" + +namespace oceanbase { + +/** + * @class ObFileReader + * @brief A utility class for reading files in an efficient manner. + * + * The `ObFileReader` class provides a simple interface for reading files. It allows + * opening, closing, and reading specific portions of a file while also exposing + * the file size for external use. This class is intended for use in scenarios where + * file-based data storage, such as SSTables, is accessed. + */ +class ObFileReader +{ +public: + /** + * @brief Constructs an `ObFileReader` object with the specified file name. + * + * The file name is stored internally, but the file is not opened until + * `open_file()` is explicitly called. + * + * @param filename The name of the file to be read. + */ + ObFileReader(const string &filename) : filename_(filename) {} + + ~ObFileReader(); + + /** + * @brief Opens the file for reading. + * + * This method attempts to open the file specified during the construction + * of the object. If the file is successfully opened, the internal file descriptor + * (`fd_`) is updated. + * + * @return An RC (return code) indicating the success or failure of the operation. + */ + RC open_file(); + + /** + * @brief Closes the file if it is currently open. + * + * This method releases the file descriptor (`fd_`) associated with the file. + */ + void close_file(); + + /** + * @brief Reads a portion of the file from a specified position. + * + * This method reads `size` bytes starting from position `pos` in the file. + * + * @param pos The starting position (offset) in the file. + * @param size The number of bytes to read. + * @return A string containing the requested portion of the file's data. + */ + string read_pos(uint32_t pos, uint32_t size); + + /** + * @brief Returns the size of the file. + * + * This method retrieves the size of the file in bytes. It relies on the file + * being successfully opened. + * + * @return The size of the file in bytes. + */ + uint32_t file_size(); + + /** + * @brief Creates a new `ObFileReader` instance. + * + * This static factory method constructs a new `ObFileReader` object and + * initializes it with the specified file name. + * + * @param filename The name of the file to be read. + * @return A `unique_ptr` to the created `ObFileReader` object. + */ + static unique_ptr create_file_reader(const string &filename); + +private: + /** + * @brief The name of the file to be read. + * + * This string stores the file name specified during the construction of + * the `ObFileReader` object. + */ + string filename_; + + /** + * @brief The file descriptor for the currently opened file. + * + * This integer represents the file descriptor used for reading the file. + * If no file is open, it is set to `-1`. + */ + int fd_ = -1; +}; + +} // namespace oceanbase diff --git a/src/oblsm/util/ob_file_writer.cpp b/src/oblsm/util/ob_file_writer.cpp new file mode 100644 index 000000000..14de6db25 --- /dev/null +++ b/src/oblsm/util/ob_file_writer.cpp @@ -0,0 +1,69 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/util/ob_file_writer.h" + +namespace oceanbase { + +ObFileWriter::~ObFileWriter() { close_file(); } + +RC ObFileWriter::write(const string_view &data) +{ + RC rc = RC::SUCCESS; + file_ << data; + if (!file_.good()) { + rc = RC::IOERR_WRITE; + } + return rc; +} + +RC ObFileWriter::flush() +{ + RC rc = RC::SUCCESS; + file_.flush(); + if (!file_.good()) { + rc = RC::IOERR_SYNC; + } + return rc; +} + +RC ObFileWriter::open_file() +{ + RC rc = RC::SUCCESS; + if (file_.is_open()) { + return rc; + } + if (append_) { + file_.open(filename_, std::ios::app | std::ios::binary); + } else { + file_.open(filename_, std::ios::out | std::ios::trunc | std::ios::binary); + } + if (!file_.good()) { + rc = RC::IOERR_OPEN; + } + return rc; +} + +void ObFileWriter::close_file() +{ + if (file_.is_open()) { + file_.flush(); + file_.close(); + } +} + +unique_ptr ObFileWriter::create_file_writer(const string &filename, bool append) +{ + unique_ptr writer(new ObFileWriter(filename, append)); + writer->open_file(); + return writer; +} + +} // namespace oceanbase diff --git a/src/oblsm/util/ob_file_writer.h b/src/oblsm/util/ob_file_writer.h new file mode 100644 index 000000000..772aa6f10 --- /dev/null +++ b/src/oblsm/util/ob_file_writer.h @@ -0,0 +1,132 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include "common/lang/fstream.h" +#include "common/lang/string.h" +#include "common/lang/string_view.h" +#include "common/lang/memory.h" +#include "common/sys/rc.h" + +namespace oceanbase { + +/** + * @class ObFileWriter + * @brief A utility class for writing data to files. + * + * The `ObFileWriter` class provides a convenient interface for writing data to a file. + * It supports creating and opening files for writing, appending data to existing files, + * and flushing buffered data to disk. The class ensures proper resource management by + * providing methods for explicitly closing the file. + * TODO: use posix + */ +class ObFileWriter +{ +public: + /** + * @brief Constructs an `ObFileWriter` object with the specified file name and mode. + * + * The constructor initializes the file writer with the given file name. The file is not opened + * until `open_file()` is called. The `append` parameter determines whether the file should + * be opened in append mode or overwrite mode. + * + * @param filename The name of the file to write to. + * @param append Whether to open the file in append mode (default: `false`). + */ + ObFileWriter(const string &filename, bool append = false) : filename_(filename), append_(append) {} + + ~ObFileWriter(); + + /** + * @brief Opens the file for writing. + * + * This method attempts to open the file specified during the construction of the object. + * If the file is successfully opened, further write operations can be performed. + * + * @return An RC (return code) indicating the success or failure of the operation. + * @note If the file cannot be opened (e.g., due to permission issues), an error code is returned. + */ + RC open_file(); + + /** + * @brief Closes the file if it is currently open. + * + * This method releases the resources associated with the file. After calling this method, + * the file can no longer be written to until it is reopened. + */ + void close_file(); + + /** + * @brief Writes data to the file. + * + * Appends the provided data to the file. If the file is not open, the operation will fail. + * + * @param data The data to write to the file, provided as a `string_view`. + * @return An RC (return code) indicating the success or failure of the operation. + */ + RC write(const string_view &data); + + /** + * @brief Flushes buffered data to disk. + * + * Ensures that all buffered data is written to the file system. This method is useful + * for ensuring data integrity in cases where the program may terminate unexpectedly. + * + * @return An RC (return code) indicating the success or failure of the flush operation. + */ + RC flush(); + + /** + * @brief Checks if the file is currently open. + * + * @return `true` if the file is open, `false` otherwise. + */ + bool is_open() const { return file_.is_open(); } + + /** + * @brief Returns the name of the file being written to. + * + * @return A string containing the file name. + */ + string file_name() const { return filename_; } + + /** + * @brief Creates a new `ObFileWriter` instance. + * + * This static factory method constructs a new `ObFileWriter` object with the specified + * file name and append mode. + * + * @param filename The name of the file to write to. + * @param append Whether to open the file in append mode (default: `false`). + * @return A `unique_ptr` to the created `ObFileWriter` object. + */ + static unique_ptr create_file_writer(const string &filename, bool append); + +private: + /** + * @brief The name of the file to be written to. + */ + string filename_; + + /** + * @brief Indicates whether the file should be opened in append mode. + * + * If `true`, data will be appended to the existing file. If `false`, the existing file (if any) + * will be overwritten when the file is opened. + */ + bool append_; + + /** + * @brief The file stream used for writing data. + */ + ofstream file_; +}; +} // namespace oceanbase diff --git a/src/oblsm/util/ob_lru_cache.cpp b/src/oblsm/util/ob_lru_cache.cpp new file mode 100644 index 000000000..de00301cf --- /dev/null +++ b/src/oblsm/util/ob_lru_cache.cpp @@ -0,0 +1,13 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "oblsm/util/ob_lru_cache.h" + +namespace oceanbase {} // namespace oceanbase diff --git a/src/oblsm/util/ob_lru_cache.h b/src/oblsm/util/ob_lru_cache.h new file mode 100644 index 000000000..17ba573a5 --- /dev/null +++ b/src/oblsm/util/ob_lru_cache.h @@ -0,0 +1,97 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#pragma once + +#include +#include + +namespace oceanbase { + +/** + * @class ObLRUCache + * @brief A thread-safe implementation of an LRU (Least Recently Used) cache. + * + * The `ObLRUCache` class provides a fixed-size cache that evicts the least recently used + * entries when the cache exceeds its capacity. It supports thread-safe operations for + * inserting, retrieving, and checking the existence of cache entries. + * + * @tparam KeyType The type of keys used to identify cache entries. + * @tparam ValueType The type of values stored in the cache. + */ +template +class ObLRUCache +{ +public: + /** + * @brief Constructs an `ObLRUCache` with a specified capacity. + * + * @param capacity The maximum number of elements the cache can hold. + */ + ObLRUCache(size_t capacity) : capacity_(capacity) {} + + /** + * @brief Retrieves a value from the cache using the specified key. + * + * This method searches for the specified key in the cache. If the key is found, the + * corresponding value is returned and the key-value pair is moved to the front of the + * LRU list (indicating recent use). + * + * @param key The key to search for in the cache. + * @param value A reference to store the value associated with the key. + * @return `true` if the key is found and the value is retrieved; `false` otherwise. + */ + bool get(const KeyType &key, ValueType &value) { return false; } + + /** + * @brief Inserts a key-value pair into the cache. + * + * If the key already exists in the cache, its value is updated, and the key-value pair + * is moved to the front of the LRU list. If the cache exceeds its capacity after insertion, + * the least recently used entry is evicted. + * + * @param key The key to insert into the cache. + * @param value The value to associate with the specified key. + */ + void put(const KeyType &key, const ValueType &value) {} + + /** + * @brief Checks whether the specified key exists in the cache. + * + * @param key The key to check in the cache. + * @return `true` if the key exists; `false` otherwise. + */ + bool contains(const KeyType &key) const { return false; } + +private: + /** + * @brief The maximum number of elements the cache can hold. + */ + size_t capacity_; +}; + +/** + * @brief Creates a new instance of `ObLRUCache` with the specified capacity. + * + * This factory function constructs an `ObLRUCache` instance for the specified key and + * value types, and initializes it with the given capacity. + * + * @tparam Key The type of keys used to identify cache entries. + * @tparam Value The type of values stored in the cache. + * @param capacity The maximum number of elements the cache can hold. + * @return A pointer to the newly created `ObLRUCache` instance. + */ +template +ObLRUCache *new_lru_cache(uint32_t capacity) +{ + return nullptr; +} + +} // namespace oceanbase diff --git a/unittest/oblsm/ob_arena_test.cpp b/unittest/oblsm/ob_arena_test.cpp new file mode 100644 index 000000000..a30e384ad --- /dev/null +++ b/unittest/oblsm/ob_arena_test.cpp @@ -0,0 +1,46 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "oblsm/util/ob_arena.h" +#include "common/math/random_generator.h" + +using namespace oceanbase; + +TEST(arena_test, DISABLED_arena_test_basic) +{ + ObArena arena; + const int count = 1000; + size_t bytes = 0; + common::RandomGenerator rnd; + for (int i = 0; i < count; i++) { + size_t s; + s = rnd.next(4000); + if (s == 0) { + s = 1; + } + char* r; + r = arena.alloc(s); + + for (size_t b = 0; b < s; b++) { + r[b] = i % 256; + } + bytes += s; + bytes += sizeof(char*); + ASSERT_EQ(arena.memory_usage(), bytes); + } +} + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/oblsm/ob_block_test.cpp b/unittest/oblsm/ob_block_test.cpp new file mode 100644 index 000000000..ee46a748d --- /dev/null +++ b/unittest/oblsm/ob_block_test.cpp @@ -0,0 +1,66 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "oblsm/table/ob_block.h" +#include "oblsm/table/ob_block_builder.h" +#include "oblsm/util/ob_comparator.h" + +using namespace oceanbase; + +TEST(block_test, DISABLED_block_builder_test_basic) +{ + ObBlockBuilder builder; + ObDefaultComparator comparator; + builder.add("key1", "value1"); + builder.add("key2", "value2"); + builder.add("key3", "value3"); + ASSERT_EQ(builder.last_key(), "key3"); + builder.add("key4", "value4"); + ASSERT_EQ(builder.last_key(), "key4"); + string_view block_contents = builder.finish(); + + ObBlock block(&comparator); + block.decode(string(block_contents.data(), block_contents.size())); + ASSERT_EQ(block.size(), 4); +} + +TEST(block_test, DISABLED_block_iterator_test_basic) +{ + ObBlockBuilder builder; + ObDefaultComparator comparator; + builder.add("key1", "value1"); + builder.add("key2", "value2"); + builder.add("key3", "value3"); + ASSERT_EQ(builder.last_key(), "key3"); + builder.add("key4", "value4"); + ASSERT_EQ(builder.last_key(), "key4"); + string_view block_contents = builder.finish(); + + ObBlock block(&comparator); + block.decode(string(block_contents.data(), block_contents.size())); + ASSERT_EQ(block.size(), 4); + BlockIterator iter(&comparator, &block, block.size()); + iter.seek_to_first(); + ASSERT_TRUE(iter.valid()); + ASSERT_EQ(iter.key(), "key1"); + ASSERT_EQ(iter.value(), "value1"); + while(iter.valid()) { + cout << iter.key() << " " << iter.value() << endl; + iter.next(); + } +} + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/oblsm/ob_compaction_test.cpp b/unittest/oblsm/ob_compaction_test.cpp new file mode 100644 index 000000000..35d7d4144 --- /dev/null +++ b/unittest/oblsm/ob_compaction_test.cpp @@ -0,0 +1,155 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "common/lang/filesystem.h" +#include "oblsm/include/ob_lsm.h" +#include "oblsm/ob_lsm_impl.h" +#include "unittest/oblsm/ob_lsm_test_base.h" + +using namespace oceanbase; + +class ObLsmCompactionTest : public ObLsmTestBase { +}; + +bool check_compaction(ObLsm* lsm) +{ + ObLsmImpl *lsm_impl = dynamic_cast(lsm); + if (nullptr == lsm_impl) { + return false; + } + auto sstables = lsm_impl->get_sstables(); + + if (sstables->size() != ObLsmOptions().default_levels) { + return false; + } + auto level_0 = sstables->at(0); + if (level_0.size() > ObLsmOptions().default_l0_file_num) { + return false; + } + + ObLsmOptions options; + + // check level_i size + size_t level_size = options.default_l1_level_size; + for (size_t i = 1; i < options.default_levels; ++i) { + const auto& level_i = sstables->at(i); + int level_i_size = 0; + for (const auto& sstable : level_i) { + level_i_size += sstable->size(); + } + if (level_i_size > level_size * 1.1) { + return false; + } + level_size *= options.default_level_ratio; + } + + // check level_i overlap + for (size_t i = 1; i < options.default_levels; ++i) { + const auto& level_i = sstables->at(i); + vector> key_ranges; + for (const auto& sstable : level_i) { + key_ranges.push_back(make_pair(sstable->first_key(), sstable->last_key())); + } + std::sort(key_ranges.begin(), key_ranges.end(), [](const auto& a, const auto& b) { return a.first < b.first; }); + ObInternalKeyComparator comp; + for (size_t j = 1; j < key_ranges.size(); ++j) { + if (comp.compare(key_ranges[j].first, key_ranges[j-1].second) < 0) { + return false; + } + } + } + return true; +} + +TEST_P(ObLsmCompactionTest, DISABLED_oblsm_compaction_test_basic1) +{ + size_t num_entries = GetParam(); + auto data = KeyValueGenerator::generate_data(num_entries); + + for (const auto& [key, value] : data) { + ASSERT_EQ(db->put(key, value), RC::SUCCESS); + } + sleep(1); + + ObLsmIterator* it = db->new_iterator(ObLsmReadOptions()); + it->seek_to_first(); + size_t count = 0; + while (it->valid()) { + it->next(); + ++count; + } + EXPECT_EQ(count, num_entries); + delete it; + ASSERT_TRUE(check_compaction(db)); +} + +void thread_put(ObLsm *db, int start, int end) { + for (int i = start; i < end; ++i) { + const std::string key = "key" + std::to_string(i); + RC rc = db->put(key, key); + ASSERT_EQ(rc, RC::SUCCESS); + } +} + +TEST_P(ObLsmCompactionTest, DISABLED_ConcurrentPutAndGetTest) { + const int num_entries = GetParam(); + const int num_threads = 4; + const int batch_size = num_entries / num_threads; + + std::vector threads; + for (int i = 0; i < num_threads; ++i) { + int start = i * batch_size; + int end = 0; + if (i == num_threads - 1) { + end = num_entries; + } else { + end = start + batch_size; + } + threads.emplace_back(thread_put, db, start, end); + } + + for (auto &thread : threads) { + thread.join(); + } + // wait for compaction + sleep(1); + + // Verify all data using iterator + ObLsmReadOptions options; + ObLsmIterator *iterator = db->new_iterator(options); + + iterator->seek_to_first(); + int count = 0; + while (iterator->valid()) { + iterator->next(); + ++count; + } + + EXPECT_EQ(count, num_entries); + + // Clean up + delete iterator; + + ASSERT_TRUE(check_compaction(db)); +} + +INSTANTIATE_TEST_SUITE_P( + ObLsmCompactionTests, + ObLsmCompactionTest, + ::testing::Values(1, 10, 1000, 10000, 100000) +); + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/oblsm/ob_lru_cache_test.cpp b/unittest/oblsm/ob_lru_cache_test.cpp new file mode 100644 index 000000000..98b38cce9 --- /dev/null +++ b/unittest/oblsm/ob_lru_cache_test.cpp @@ -0,0 +1,105 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "common/lang/string.h" +#include "common/lang/vector.h" +#include "common/lang/thread.h" +#include "common/lang/utility.h" +#include "oblsm/util/ob_lru_cache.h" + +using namespace oceanbase; + +class ObLRUCacheTest : public ::testing::TestWithParam { +protected: + ObLRUCache* cache; + size_t capacity; + + void SetUp() override { + capacity = GetParam(); + cache = new_lru_cache(capacity); + } + + void TearDown() override { + delete cache; + } +}; + +TEST_P(ObLRUCacheTest, DISABLED_lru_capacity) { + ASSERT_NE(cache, nullptr); + + for (size_t i = 0; i < capacity + 2; ++i) { + string key = "key" + to_string(i); + string value = "value" + to_string(i); + cache->put(key, value); + } + + for (size_t i = 0; i < capacity + 2; ++i) { + string key = "key" + to_string(i); + string value; + if (i < 2) { + EXPECT_FALSE(cache->get(key, value)); + } else { + EXPECT_TRUE(cache->get(key, value)); + EXPECT_EQ(value, "value" + to_string(i)); + } + } +} + +TEST_P(ObLRUCacheTest, DISABLED_update_exist_key) { + ASSERT_NE(cache, nullptr); + + cache->put("key1", "value1"); + cache->put("key2", "value2"); + cache->put("key1", "value1_updated"); + + string value; + + EXPECT_TRUE(cache->get("key1", value)); + EXPECT_EQ(value, "value1_updated"); + + EXPECT_TRUE(cache->get("key2", value)); + EXPECT_EQ(value, "value2"); +} + +TEST_P(ObLRUCacheTest, DISABLED_contains_key) { + ASSERT_NE(cache, nullptr); + + cache->put("key1", "value1"); + cache->put("key2", "value2"); + + EXPECT_TRUE(cache->contains("key1")); + EXPECT_TRUE(cache->contains("key2")); + EXPECT_FALSE(cache->contains("key3")); + + string value; + EXPECT_TRUE(cache->get("key1", value)); + EXPECT_EQ(value, "value1"); +} + +INSTANTIATE_TEST_SUITE_P( + CacheTests, + ObLRUCacheTest, + ::testing::Values(2, 5, 10, 100, 10000) +); + +TEST(lru_test, zero_capacity) +{ + ObLRUCache lru_cache(0); + lru_cache.put(1, "one"); + ASSERT_FALSE(lru_cache.contains(1)); +} + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/oblsm/ob_lsm_test.cpp b/unittest/oblsm/ob_lsm_test.cpp new file mode 100644 index 000000000..adc5c33c0 --- /dev/null +++ b/unittest/oblsm/ob_lsm_test.cpp @@ -0,0 +1,121 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "common/lang/filesystem.h" +#include "common/lang/thread.h" +#include "common/lang/utility.h" +#include "oblsm/include/ob_lsm.h" +#include "oblsm/ob_lsm_define.h" +#include "unittest/oblsm/ob_lsm_test_base.h" + +using namespace oceanbase; + +class ObLsmTest : public ObLsmTestBase { +}; + +// TODO: add update/delete case +TEST_P(ObLsmTest, DISABLED_oblsm_test_basic1) +{ + size_t num_entries = GetParam(); + auto data = KeyValueGenerator::generate_data(num_entries); + + for (const auto& [key, value] : data) { + ASSERT_EQ(db->put(key, value), RC::SUCCESS); + } + + for (const auto& [key, value] : data) { + string fetched_value; + ASSERT_EQ(db->get(key, &fetched_value), RC::SUCCESS); + EXPECT_EQ(fetched_value, value); + } + sleep(2); + + ObLsmIterator* it = db->new_iterator(ObLsmReadOptions()); + it->seek_to_first(); + size_t count = 0; + while (it->valid()) { + it->next(); + ++count; + } + EXPECT_EQ(count, num_entries); + delete it; + + ObLsmIterator* it2 = db->new_iterator(ObLsmReadOptions()); + it2->seek("key" + to_string(num_entries/2)); + ASSERT_TRUE(it2->valid()); + ASSERT_EQ(it2->value(), "value" + to_string(num_entries/2)); + while (it2->valid()) + { + it2->next(); + } + delete it2; +} + +void thread_put(ObLsm *db, int start, int end) { + for (int i = start; i < end; ++i) { + const std::string key = "key" + std::to_string(i); + RC rc = db->put(key, key); + ASSERT_EQ(rc, RC::SUCCESS); + } +} + +TEST_P(ObLsmTest, DISABLED_ConcurrentPutAndGetTest) { + const int num_entries = GetParam(); + const int num_threads = 4; + const int batch_size = num_entries / num_threads; + + std::vector threads; + for (int i = 0; i < num_threads; ++i) { + int start = i * batch_size; + int end = 0; + if (i == num_threads - 1) { + end = num_entries; + } else { + end = start + batch_size; + } + threads.emplace_back(thread_put, db, start, end); + } + + for (auto &thread : threads) { + thread.join(); + } + // TODO: remove sleep + sleep(2); + + // Verify all data using iterator + ObLsmReadOptions options; + ObLsmIterator *iterator = db->new_iterator(options); + + iterator->seek_to_first(); + int count = 0; + while (iterator->valid()) { + iterator->next(); + ++count; + } + + EXPECT_EQ(count, num_entries); + + // Clean up + delete iterator; +} + +INSTANTIATE_TEST_SUITE_P( + ObLsmTests, + ObLsmTest, + ::testing::Values(1, 10, 1000, 10000, 50000) +); + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/oblsm/ob_lsm_test_base.h b/unittest/oblsm/ob_lsm_test_base.h new file mode 100644 index 000000000..4d4c157b3 --- /dev/null +++ b/unittest/oblsm/ob_lsm_test_base.h @@ -0,0 +1,64 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "common/lang/filesystem.h" +#include "common/lang/thread.h" +#include "common/lang/utility.h" +#include "oblsm/include/ob_lsm.h" +#include "oblsm/ob_lsm_define.h" + +using namespace oceanbase; + +class KeyValueGenerator +{ +public: + static vector> generate_data(size_t count) + { + vector> data; + for (size_t i = 0; i < count; ++i) { + data.emplace_back("key" + to_string(i), "value" + to_string(i)); + } + return data; + } +}; + +class ObLsmTestBase : public ::testing::TestWithParam +{ +protected: + ObLsm *db; + ObLsmOptions options; + string path; + + void SetUp() override + { + path = "./testdb"; + set_up_options(); + filesystem::remove_all(path); + filesystem::create_directory(path); + ASSERT_EQ(ObLsm::open(options, path, &db), RC::SUCCESS); + ASSERT_NE(db, nullptr); + } + + void set_up_options() + { + options.memtable_size = 8 * 1024; + options.table_size = 16 * 1024; + options.default_levels = 7; + options.default_l1_level_size = 128 * 1024; + options.default_level_ratio = 10; + options.default_l0_file_num = 3; + options.default_run_num = 7; + options.type = CompactionType::LEVELED; + } + + void TearDown() override { delete db; } +}; \ No newline at end of file diff --git a/unittest/oblsm/ob_skiplist_test.cpp b/unittest/oblsm/ob_skiplist_test.cpp new file mode 100644 index 000000000..82bcbbba2 --- /dev/null +++ b/unittest/oblsm/ob_skiplist_test.cpp @@ -0,0 +1,277 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "oblsm/memtable/ob_skiplist.h" +#include "common/math/random_generator.h" +#include "common/thread/thread_pool_executor.h" +#include "common/lang/thread.h" + +using namespace oceanbase; + +using Key = uint64_t; + +struct Comparator { + int operator()(const Key& a, const Key& b) const { + if (a < b) { + return -1; + } else if (a > b) { + return +1; + } else { + return 0; + } + } +}; + +TEST(skiplist_test, DISABLED_skiplist_test_basic) +{ + common::RandomGenerator rnd; + const int N = 2000; + const int R = 5000; + std::set keys; + Comparator cmp; + ObSkipList list(cmp); + for (int i = 0; i < N; i++) { + Key key = rnd.next() % R; + if (keys.insert(key).second) { + list.insert(key); + } + } + + for (int i = 0; i < R; i++) { + if (list.contains(i)) { + ASSERT_EQ(keys.count(i), 1); + } else { + ASSERT_EQ(keys.count(i), 0); + } + } + +} + +inline uint32_t decode_fixed32(const char* ptr) { + uint32_t result; + memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load + return result; +} + +uint32_t murmurhash(const char* data, size_t n, uint32_t seed) { + // https://github.com/aappleby/smhasher/wiki/MurmurHash1 + const uint32_t m = 0xc6a4a793; + const uint32_t r = 24; + const char* limit = data + n; + uint32_t h = static_cast(seed ^ (n * m)); + + while (data + 4 <= limit) { + uint32_t w = decode_fixed32(data); + data += 4; + h += w; + h *= m; + h ^= (h >> 16); + } + + switch (limit - data) { + case 3: + h += static_cast(static_cast(data[2])) << 16; + case 2: + h += static_cast(static_cast(data[1])) << 8; + case 1: + h += static_cast(static_cast(data[0])); + h *= m; + h ^= (h >> r); + break; + } + return h; +} + +class ConcurrentTest { + public: + static const uint32_t K = 8; + + private: + static uint64_t key(Key key) { return (key >> 40); } + static uint64_t gen(Key key) { return (key >> 8) & 0xffffffffu; } + static uint64_t hash(Key key) { return key & 0xff; } + + static uint64_t hash_numbers(uint64_t k, uint64_t g) { + uint64_t data[2] = {k, g}; + return murmurhash(reinterpret_cast(data), sizeof(data), 0); + } + + static Key make_key(uint64_t k, uint64_t g) { + assert(sizeof(Key) == sizeof(uint64_t)); + assert(k <= K); // We sometimes pass K to seek to the end of the skiplist + assert(g <= 0xffffffffu); + return ((k << 40) | (g << 8) | (hash_numbers(k, g) & 0xff)); + } + + // Per-key generation + struct State { + std::atomic generation[K]; + void Set(int k, int v) { + generation[k].store(v, std::memory_order_release); + } + int Get(int k) { return generation[k].load(std::memory_order_acquire); } + + State() { + for (unsigned int k = 0; k < K; k++) { + Set(k, 0); + } + } + }; + + // Current state of the test + State current_; + + // InlineSkipList is not protected by mu_. We just use a single writer + // thread to modify it. + ObSkipList list_; + + public: + ConcurrentTest() : list_(Comparator()) {} + thread_local static common::RandomGenerator rnd; + int scan() { + auto iter = ObSkipList::Iterator(&list_); + iter.seek_to_first(); + int count = 0; + while (iter.valid()) { + count++; + iter.next(); + } + return count; + } + // REQUIRES: No concurrent calls for the same k + void concurrent_write_step(uint32_t k) { + const int g = current_.Get(k) + 1; + const Key new_key = make_key(k, g); + list_.insert_concurrently(new_key); + ASSERT_EQ(g, current_.Get(k) + 1); + current_.Set(k, g); + } + +}; + +thread_local common::RandomGenerator ConcurrentTest::rnd = common::RandomGenerator(); + +const uint32_t ConcurrentTest::K; +using TestInlineSkipList = ObSkipList; +class InlineSkipTest : public testing::Test { + public: + void Insert(TestInlineSkipList* list, Key key) { + list->insert(key); + keys_.insert(key); + } + + private: + std::set keys_; +}; + +class TestState { + public: + ConcurrentTest t_; + std::atomic quit_flag_; + std::atomic next_writer_; + + enum ReaderState { STARTING, RUNNING, DONE }; + + explicit TestState() + : quit_flag_(false), + state_(STARTING), + pending_writers_(0), + state_cv_() {} + + void wait(ReaderState s) { + std::unique_lock lock(mu_); + while (state_ != s) { + state_cv_.wait(lock); + } + } + + void change(ReaderState s) { + std::unique_lock lock(mu_); + state_ = s; + state_cv_.notify_one(); + } + + void adjust_pending_writers(int delta) { + std::unique_lock lock(mu_); + pending_writers_ += delta; + if (pending_writers_ == 0) { + state_cv_.notify_one(); + } + } + + void wait_for_pending_writers() { + std::unique_lock lock(mu_); + while (pending_writers_ != 0) { + state_cv_.wait(lock); + } + } + + private: + std::mutex mu_; + ReaderState state_; + int pending_writers_; + std::condition_variable state_cv_; +}; + + +static void concurrent_reader(void* arg) { + TestState* state = static_cast(arg); + state->change(TestState::RUNNING); + while (!state->quit_flag_.load(std::memory_order_acquire)) { + // TODO: add read_step + } + state->change(TestState::DONE); +} + +static void concurrent_writer(void* arg) { + TestState* state = static_cast(arg); + uint32_t k = state->next_writer_++ % ConcurrentTest::K; + state->t_.concurrent_write_step(k); + state->adjust_pending_writers(-1); +} + + +static void RunConcurrentInsert(int write_parallelism = 4) { + common::ThreadPoolExecutor executor_; + executor_.init("skiplist_test", write_parallelism, write_parallelism, 60 * 1000); + common::RandomGenerator rnd; + const int N = 1000; + const int kSize = 1000; + for (int i = 0; i < N; i++) { + TestState* state = new TestState(); + executor_.execute(std::bind(concurrent_reader, state)); + state->wait(TestState::RUNNING); + int k = 0; + for (k = 0; k < kSize; k += write_parallelism) { + state->next_writer_ = rnd.next(); + state->adjust_pending_writers(write_parallelism); + for (int p = 0; p < write_parallelism; ++p) { + executor_.execute(std::bind(concurrent_writer, state)); + } + state->wait_for_pending_writers(); + } + int count = state->t_.scan(); + ASSERT_EQ(k, count); + state->quit_flag_.store(true, std::memory_order_release); + state->wait(TestState::DONE); + delete state; + } +} + +TEST_F(InlineSkipTest, DISABLED_ConcurrentInsert2) { RunConcurrentInsert(2); } +TEST_F(InlineSkipTest, DISABLED_ConcurrentInsert3) { RunConcurrentInsert(4); } + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/oblsm/ob_table_test.cpp b/unittest/oblsm/ob_table_test.cpp new file mode 100644 index 000000000..3ddf23341 --- /dev/null +++ b/unittest/oblsm/ob_table_test.cpp @@ -0,0 +1,48 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" + +#include "common/lang/filesystem.h" +#include "oblsm/util/ob_comparator.h" +#include "oblsm/table/ob_sstable_builder.h" +#include "oblsm/table/ob_sstable.h" + +using namespace oceanbase; + +TEST(table_test, DISABLED_table_test_basic) +{ + ObDefaultComparator comparator; + shared_ptr table = make_shared(); + uint64_t seq = 0; + size_t count = 5; + for (size_t i = 0; i < count; i++) { + string key(to_string(i)); + table->put(seq++, key, key); + } + ObSSTableBuilder tb(&comparator, nullptr); + ASSERT_EQ(tb.build(table, "test.sst", 0), RC::SUCCESS); + shared_ptr sst = tb.get_built_table(); + ObLsmIterator* sst_iter = sst->new_iterator(); + sst_iter->seek_to_first(); + while(sst_iter->valid()) { + cout << sst_iter->key() << " " << sst_iter->value() << endl; + sst_iter->next(); + } + delete sst_iter; + +} + + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/oblsm/ob_util_test.cpp b/unittest/oblsm/ob_util_test.cpp new file mode 100644 index 000000000..1c6917468 --- /dev/null +++ b/unittest/oblsm/ob_util_test.cpp @@ -0,0 +1,42 @@ +/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved. +miniob is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. */ + +#include "gtest/gtest.h" +#include +#include + +#include "oblsm/util/ob_comparator.h" +#include "oblsm/util/ob_file_reader.h" +#include "oblsm/util/ob_file_writer.h" +#include "common/lang/filesystem.h" + +using namespace oceanbase; + +TEST(util_test, DISABLED_comparator_test_basic) +{ + ObDefaultComparator comparator; + EXPECT_TRUE(comparator.compare("key99", "key999") < 0); + EXPECT_TRUE(comparator.compare("key100", "key10") > 0); + EXPECT_TRUE(comparator.compare("key111", "key111") == 0); +} + +TEST(util_test, DISABLED_create_file) { + remove("tmpfile"); + auto w = ObFileWriter::create_file_writer("tmpfile", false); + w->open_file(); + EXPECT_TRUE(filesystem::exists("tmpfile")); + remove("tmpfile"); +} + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/observer/mvcc_trx_log_test.cpp b/unittest/observer/mvcc_trx_log_test.cpp index 1657d795b..c2c154d7c 100644 --- a/unittest/observer/mvcc_trx_log_test.cpp +++ b/unittest/observer/mvcc_trx_log_test.cpp @@ -707,7 +707,6 @@ TEST(MvccTrxLog, wal_rollback_abnormal) visible_count++; } } - ASSERT_EQ(visible_count, insert_num / 2); } db2->trx_kit().destroy_trx(trx);