From cd171399fa88b4733a7750455c6b0ded47e6b058 Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Fri, 28 Jun 2024 02:21:08 +0530 Subject: [PATCH 01/14] package caskdb --- caskdb/__init__.py | 0 disk_store.py => caskdb/disk_store.py | 3 +- format.py => caskdb/format.py | 0 caskdb/memory_store.py | 12 ++++++++ example.py | 4 +-- memory_store.py | 19 ------------ mypy.ini | 2 ++ requirements_dev.txt | 3 +- setup.cfg | 43 +++++++++++++++++++++++++++ setup.py | 3 ++ tests/test_disk_store.py | 2 +- tests/test_format.py | 10 +++++-- tests/test_memory_store.py | 4 +-- 13 files changed, 77 insertions(+), 28 deletions(-) create mode 100644 caskdb/__init__.py rename disk_store.py => caskdb/disk_store.py (98%) rename format.py => caskdb/format.py (100%) create mode 100644 caskdb/memory_store.py delete mode 100644 memory_store.py create mode 100644 mypy.ini create mode 100644 setup.cfg create mode 100644 setup.py diff --git a/caskdb/__init__.py b/caskdb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/disk_store.py b/caskdb/disk_store.py similarity index 98% rename from disk_store.py rename to caskdb/disk_store.py index 3cd0e99..89f18a7 100644 --- a/disk_store.py +++ b/caskdb/disk_store.py @@ -19,11 +19,12 @@ # it also supports dictionary style API too: disk["hamlet"] = "shakespeare" """ + import os.path import time import typing -from format import KeyEntry, encode_kv, decode_kv, HEADER_SIZE, decode_header +from caskdb.format import KeyEntry, encode_kv, decode_kv, HEADER_SIZE, decode_header # We use `file.seek` method to move our cursor to certain byte offset for read # or write operations. The method takes two parameters file.seek(offset, whence). diff --git a/format.py b/caskdb/format.py similarity index 100% rename from format.py rename to caskdb/format.py diff --git a/caskdb/memory_store.py b/caskdb/memory_store.py new file mode 100644 index 0000000..0bae6f5 --- /dev/null +++ b/caskdb/memory_store.py @@ -0,0 +1,12 @@ +class MemoryStorage: + def __init__(self) -> None: + self.data: dict[str, str] = {} + + def set(self, key: str, value: str) -> None: + self.data[key] = value + + def get(self, key: str) -> str: + return self.data.get(key, "") + + def close(self) -> None: + return diff --git a/example.py b/example.py index b8a09df..70bffe6 100644 --- a/example.py +++ b/example.py @@ -1,5 +1,5 @@ -from memory_store import MemoryStorage -from disk_store import DiskStorage +from caskdb.memory_store import MemoryStorage +from caskdb.disk_store import DiskStorage def memory_db() -> None: diff --git a/memory_store.py b/memory_store.py deleted file mode 100644 index a59d6f6..0000000 --- a/memory_store.py +++ /dev/null @@ -1,19 +0,0 @@ -class MemoryStorage: - def __init__(self) -> None: - self.data: dict[str, str] = {} - - def set(self, key: str, value: str) -> None: - self.data[key] = value - - def get(self, key: str) -> str: - return self.data.get(key, "") - - def close(self) -> bool: - # NOTE: ideally, I would want this to have () -> None signature, but for some - # reason mypy complains about this: - # - # tests/test_memory_store.py:19: error: "close" of "MemoryStorage" does not - # return a value - # - # check here for more: https://github.com/python/mypy/issues/6549 - return True diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..7f8e667 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,2 @@ +[mypy] +exclude = venv|setup.py \ No newline at end of file diff --git a/requirements_dev.txt b/requirements_dev.txt index c636dba..6117e70 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,7 +1,8 @@ black>=22.1.0 +build>=1.2.1 coverage>=6.3.2 flake8>=4.0.1 ipdb>=0.13.9 mypy>=0.950 -pytype>=2022.4.26 pytest>=7.1.2 +pytype>=2022.4.26 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..edd0600 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,43 @@ +[metadata] +name = caskdb +version = 0.1.0 +description = Disk based Log Structured Hash Table Store +long_description = file: README.md +long_description_content_type = text/markdown +url = https://github.com/avinassh/py-caskdb +author = Avinash Sajjanshetty +author_email = opensource@avi.im +license = MIT +license_file = LICENSE +classifiers = + License :: OSI Approved :: MIT License + Operating System :: OS Independent + Programming Language :: Python :: 3 + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 + Programming Language :: Python :: Implementation :: CPython + Typing :: Typed + +[options] +packages = find: +python_requires = >=3.8 + +[options.extras_require] +dev = + black>=22.1.0 + build>=1.2.1 + coverage>=6.3.2 + flake8>=4.0.1 + ipdb>=0.13.9 + mypy>=0.950 + pytest>=7.1.2 + pytype>=2022.4.26 + + +[options.package_data] +caskdb = + example.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..6068493 --- /dev/null +++ b/setup.py @@ -0,0 +1,3 @@ +from setuptools import setup + +setup() diff --git a/tests/test_disk_store.py b/tests/test_disk_store.py index dc84157..654efa5 100644 --- a/tests/test_disk_store.py +++ b/tests/test_disk_store.py @@ -3,7 +3,7 @@ import typing import unittest -from disk_store import DiskStorage +from caskdb.disk_store import DiskStorage class TempStorageFile: diff --git a/tests/test_format.py b/tests/test_format.py index 9dd7961..ffc6795 100644 --- a/tests/test_format.py +++ b/tests/test_format.py @@ -5,8 +5,14 @@ import unittest import uuid -from format import encode_header, decode_header, encode_kv, decode_kv, HEADER_SIZE -from format import KeyEntry +from caskdb.format import ( + encode_header, + decode_header, + encode_kv, + decode_kv, + HEADER_SIZE, +) +from caskdb.format import KeyEntry def get_random_header() -> tuple[int, int, int]: diff --git a/tests/test_memory_store.py b/tests/test_memory_store.py index 5950de8..43cd3a8 100644 --- a/tests/test_memory_store.py +++ b/tests/test_memory_store.py @@ -1,6 +1,6 @@ import unittest -from memory_store import MemoryStorage +from caskdb.memory_store import MemoryStorage class TestInMemoryCaskDB(unittest.TestCase): @@ -15,4 +15,4 @@ def test_invalid_key(self) -> None: def test_close(self) -> None: store = MemoryStorage() - self.assertTrue(store.close()) + store.close() From b8b7ddc5889101e6f4dbed84c2ef54969418cb22 Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Fri, 28 Jun 2024 02:23:39 +0530 Subject: [PATCH 02/14] move to src layout --- setup.cfg | 5 +++++ {caskdb => src/caskdb}/__init__.py | 0 {caskdb => src/caskdb}/disk_store.py | 0 {caskdb => src/caskdb}/format.py | 0 {caskdb => src/caskdb}/memory_store.py | 0 5 files changed, 5 insertions(+) rename {caskdb => src/caskdb}/__init__.py (100%) rename {caskdb => src/caskdb}/disk_store.py (100%) rename {caskdb => src/caskdb}/format.py (100%) rename {caskdb => src/caskdb}/memory_store.py (100%) diff --git a/setup.cfg b/setup.cfg index edd0600..231949a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,6 +25,10 @@ classifiers = [options] packages = find: python_requires = >=3.8 +package_dir = =src + +[options.packages.find] +where = ./src [options.extras_require] dev = @@ -36,6 +40,7 @@ dev = mypy>=0.950 pytest>=7.1.2 pytype>=2022.4.26 + twine>=5.1.1 [options.package_data] diff --git a/caskdb/__init__.py b/src/caskdb/__init__.py similarity index 100% rename from caskdb/__init__.py rename to src/caskdb/__init__.py diff --git a/caskdb/disk_store.py b/src/caskdb/disk_store.py similarity index 100% rename from caskdb/disk_store.py rename to src/caskdb/disk_store.py diff --git a/caskdb/format.py b/src/caskdb/format.py similarity index 100% rename from caskdb/format.py rename to src/caskdb/format.py diff --git a/caskdb/memory_store.py b/src/caskdb/memory_store.py similarity index 100% rename from caskdb/memory_store.py rename to src/caskdb/memory_store.py From aae23ef20d3a0e521a84ff4bbd1eb62e2a33c640 Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Fri, 28 Jun 2024 02:24:06 +0530 Subject: [PATCH 03/14] newlines --- mypy.ini | 2 +- requirements_dev.txt | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/mypy.ini b/mypy.ini index 7f8e667..f015e27 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,2 +1,2 @@ [mypy] -exclude = venv|setup.py \ No newline at end of file +exclude = venv|setup.py diff --git a/requirements_dev.txt b/requirements_dev.txt index 6117e70..aefbcb6 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,8 +1 @@ -black>=22.1.0 -build>=1.2.1 -coverage>=6.3.2 -flake8>=4.0.1 -ipdb>=0.13.9 -mypy>=0.950 -pytest>=7.1.2 -pytype>=2022.4.26 +-e .[dev] From af6f3295bdd255458af7164271ecb2c74857f82c Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Fri, 28 Jun 2024 02:34:04 +0530 Subject: [PATCH 04/14] move example to src directory --- setup.cfg | 5 ----- example.py => src/caskdb/example.py | 0 2 files changed, 5 deletions(-) rename example.py => src/caskdb/example.py (100%) diff --git a/setup.cfg b/setup.cfg index 231949a..564899f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,8 +41,3 @@ dev = pytest>=7.1.2 pytype>=2022.4.26 twine>=5.1.1 - - -[options.package_data] -caskdb = - example.py diff --git a/example.py b/src/caskdb/example.py similarity index 100% rename from example.py rename to src/caskdb/example.py From 4e683397441f1a9c20e5e0a33a2f9ef1972d7469 Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Fri, 28 Jun 2024 02:49:24 +0530 Subject: [PATCH 05/14] Add `__all__` --- src/caskdb/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/caskdb/__init__.py b/src/caskdb/__init__.py index e69de29..6bdb237 100644 --- a/src/caskdb/__init__.py +++ b/src/caskdb/__init__.py @@ -0,0 +1,4 @@ +from caskdb.disk_store import DiskStorage +from caskdb.memory_store import MemoryStorage + +__all__ = ["DiskStorage", "MemoryStorage"] From 0b7aa4a724546815f4b27c946a357a468ac6e175 Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Fri, 28 Jun 2024 02:56:01 +0530 Subject: [PATCH 06/14] use top level imports for tests --- tests/test_disk_store.py | 2 +- tests/test_memory_store.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_disk_store.py b/tests/test_disk_store.py index 654efa5..05adc0d 100644 --- a/tests/test_disk_store.py +++ b/tests/test_disk_store.py @@ -3,7 +3,7 @@ import typing import unittest -from caskdb.disk_store import DiskStorage +from caskdb import DiskStorage class TempStorageFile: diff --git a/tests/test_memory_store.py b/tests/test_memory_store.py index 43cd3a8..29b0e8c 100644 --- a/tests/test_memory_store.py +++ b/tests/test_memory_store.py @@ -1,6 +1,6 @@ import unittest -from caskdb.memory_store import MemoryStorage +from caskdb import MemoryStorage class TestInMemoryCaskDB(unittest.TestCase): From 9e507da6c225e33d2cbf451b58f9d054bb35a3b2 Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Fri, 28 Jun 2024 03:32:01 +0530 Subject: [PATCH 07/14] Add strict to mypy.ini --- mypy.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/mypy.ini b/mypy.ini index f015e27..b352774 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,2 +1,3 @@ [mypy] exclude = venv|setup.py +strict = True From 38cc374b0fa9f5223512bd10d46a1e4c6d3154d1 Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Thu, 4 Jul 2024 03:14:38 +0530 Subject: [PATCH 08/14] Install caskdb before running tests --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 85f9ceb..3cbf4b7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -38,4 +38,5 @@ jobs: python-version: ${{ matrix.python-version }} - name: tests run: | + pip install . make test From f20a270aec3fa3a573a68dec2d7f7e5dc4e71782 Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Thu, 4 Jul 2024 03:16:45 +0530 Subject: [PATCH 09/14] Run mypy on the project, instead of individual files --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 31de09e..ec8c1b4 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ test: lint: black --check --diff $(FILES_TO_LINT) flake8 $(FILES_TO_LINT) - mypy --strict $(FILES_TO_LINT) + mypy . pytype $(FILES_TO_LINT) coverage: From 609e5d3e2dc27d25efd89a37bfa3515c4b3665fa Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Thu, 4 Jul 2024 03:21:16 +0530 Subject: [PATCH 10/14] Add py.typed file --- setup.cfg | 4 ++++ src/caskdb/py.typed | 1 + 2 files changed, 5 insertions(+) create mode 100644 src/caskdb/py.typed diff --git a/setup.cfg b/setup.cfg index 564899f..6a63e90 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,3 +41,7 @@ dev = pytest>=7.1.2 pytype>=2022.4.26 twine>=5.1.1 + +[options.package_data] +caskdb = + py.typed \ No newline at end of file diff --git a/src/caskdb/py.typed b/src/caskdb/py.typed new file mode 100644 index 0000000..d3245e7 --- /dev/null +++ b/src/caskdb/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561. This package uses inline types. From 71d15a9d173e24ab44efb674de8df5381a8b68fb Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Thu, 4 Jul 2024 03:21:29 +0530 Subject: [PATCH 11/14] newline at the end --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 6a63e90..59ededd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -44,4 +44,4 @@ dev = [options.package_data] caskdb = - py.typed \ No newline at end of file + py.typed From 43ee33efa519e53de31ac9c1c8e8b192b757ccc3 Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Thu, 4 Jul 2024 17:20:43 +0530 Subject: [PATCH 12/14] remove pytype --- Makefile | 1 - README.md | 2 +- setup.cfg | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Makefile b/Makefile index ec8c1b4..597d625 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,6 @@ lint: black --check --diff $(FILES_TO_LINT) flake8 $(FILES_TO_LINT) mypy . - pytype $(FILES_TO_LINT) coverage: coverage run -m unittest discover -vvv ./tests -p '*.py' -b diff --git a/README.md b/README.md index b239025..a3cd4b6 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ Throughout the workshop, you will implement the following: 4. Figure out how to store the data on disk and the row pointer in the memory. Implement the get/set operations. Tests for the same are in `test_disk_store.py` 5. Code from the task #2 and #3 should be enough to read an existing CaskDB file and load the keys into memory -Use `make lint` to run mypy, black, and pytype static analyser. Run `make test` to run the tests locally. Push the code to Github, and tests will run on different OS: ubuntu, mac, and windows. +Use `make lint` to run mypy and black. Run `make test` to run the tests locally. Push the code to Github, and tests will run on different OS: ubuntu, mac, and windows. Not sure how to proceed? Then check the [hints](hints.md) file which contains more details on the tasks and hints. diff --git a/setup.cfg b/setup.cfg index 59ededd..384da93 100644 --- a/setup.cfg +++ b/setup.cfg @@ -39,7 +39,6 @@ dev = ipdb>=0.13.9 mypy>=0.950 pytest>=7.1.2 - pytype>=2022.4.26 twine>=5.1.1 [options.package_data] From e6a813d3ccacf619f9c58d0b262841ee28253df8 Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Thu, 4 Jul 2024 23:29:37 +0530 Subject: [PATCH 13/14] Revert "remove pytype" This reverts commit 43ee33efa519e53de31ac9c1c8e8b192b757ccc3. --- Makefile | 1 + README.md | 2 +- setup.cfg | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 597d625..ec8c1b4 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,7 @@ lint: black --check --diff $(FILES_TO_LINT) flake8 $(FILES_TO_LINT) mypy . + pytype $(FILES_TO_LINT) coverage: coverage run -m unittest discover -vvv ./tests -p '*.py' -b diff --git a/README.md b/README.md index a3cd4b6..b239025 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ Throughout the workshop, you will implement the following: 4. Figure out how to store the data on disk and the row pointer in the memory. Implement the get/set operations. Tests for the same are in `test_disk_store.py` 5. Code from the task #2 and #3 should be enough to read an existing CaskDB file and load the keys into memory -Use `make lint` to run mypy and black. Run `make test` to run the tests locally. Push the code to Github, and tests will run on different OS: ubuntu, mac, and windows. +Use `make lint` to run mypy, black, and pytype static analyser. Run `make test` to run the tests locally. Push the code to Github, and tests will run on different OS: ubuntu, mac, and windows. Not sure how to proceed? Then check the [hints](hints.md) file which contains more details on the tasks and hints. diff --git a/setup.cfg b/setup.cfg index 384da93..59ededd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -39,6 +39,7 @@ dev = ipdb>=0.13.9 mypy>=0.950 pytest>=7.1.2 + pytype>=2022.4.26 twine>=5.1.1 [options.package_data] From 83f47827d014c546f44f4ec3678357a5ee588433 Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Thu, 4 Jul 2024 23:31:32 +0530 Subject: [PATCH 14/14] update mypy and pytype versions --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 59ededd..f2baada 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,9 +37,9 @@ dev = coverage>=6.3.2 flake8>=4.0.1 ipdb>=0.13.9 - mypy>=0.950 + mypy>=1.10.1 pytest>=7.1.2 - pytype>=2022.4.26 + pytype>=2024.4.11 twine>=5.1.1 [options.package_data]