Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ruff action #130

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']

steps:
- uses: actions/checkout@v4
Expand All @@ -22,14 +22,8 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest pytest-cov
python -m pip install pytest pytest-cov
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 probables/ --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 probables/ --count --exit-zero --max-complexity=11 --max-line-length=127 --statistics
- name: Test with pytest
run: |
# Run tests while also generating coverage statistics
Expand Down
15 changes: 15 additions & 0 deletions .github/workflows/ruff.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: Ruff
on: [workflow_dispatch, pull_request]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: "3.13"
- uses: astral-sh/ruff-action@v3
with:
args: "check --fix"
continue-on-error: false
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ keywords:
- Counting Cuckoo Filter
- Quotient Filter
license: MIT
version: 0.6.0
date-released: '2024-01-10'
version: 0.6.1
date-released: '2024-12-20'
6 changes: 3 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ PyProbables
.. image:: https://img.shields.io/github/release/barrust/pyprobables.svg
:target: https://github.com/barrust/pyprobables/releases
:alt: GitHub release
.. image:: https://github.com/barrust/pyprobables/workflows/Python%20package/badge.svg
:target: https://github.com/barrust/pyprobables/actions?query=workflow%3A%22Python+package%22
.. image:: https://github.com/barrust/pyprobables/workflows/Python%20package/badge.svg?branch=master
:target: https://github.com/barrust/pyprobables/actions?query=workflow%3A%22Python+package%22+branch%3Amaster
:alt: Build Status
.. image:: https://codecov.io/gh/barrust/pyprobables/branch/master/graph/badge.svg?token=OdETiNgz9k
:target: https://codecov.io/gh/barrust/pyprobables
Expand Down Expand Up @@ -56,7 +56,7 @@ To install `pyprobables`, simply clone the `repository on GitHub

$ python setup.py install

`pyprobables` supports python 3.6 - 3.11+
`pyprobables` supports python 3.9 - 3.13+

For *python 2.7* support, install `release 0.3.2 <https://github.com/barrust/pyprobables/releases/tag/v0.3.2>`__

Expand Down
6 changes: 3 additions & 3 deletions probables/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
""" pyprobables module """
"""pyprobables module"""

from typing import List
from __future__ import annotations

from probables.blooms import (
BloomFilter,
Expand All @@ -26,7 +26,7 @@
__email__ = "[email protected]"
__license__ = "MIT"
__version__ = "0.6.1"
__credits__: List[str] = []
__credits__: list[str] = []
__url__ = "https://github.com/barrust/pyprobables"
__bugtrack_url__ = "https://github.com/barrust/pyprobables/issues"

Expand Down
88 changes: 44 additions & 44 deletions probables/blooms/bloom.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
""" BloomFilter and BloomFiter on Disk, python implementation
License: MIT
Author: Tyler Barrus ([email protected])
URL: https://github.com/barrust/bloom
"""BloomFilter and BloomFiter on Disk, python implementation
License: MIT
Author: Tyler Barrus ([email protected])
URL: https://github.com/barrust/bloom
"""

from __future__ import annotations

import math
import os
from array import array
from binascii import hexlify, unhexlify
from io import BytesIO, IOBase
from collections.abc import ByteString
from io import BufferedRandom, BytesIO, IOBase
from mmap import mmap
from numbers import Number
from pathlib import Path
from shutil import copyfile
from struct import Struct
from textwrap import wrap
from typing import ByteString, Tuple, Union
from typing import Union

from probables.exceptions import InitializationError, NotSupportedError
from probables.hashes import HashFuncT, HashResultsT, KeyT, default_fnv_1a
Expand Down Expand Up @@ -66,11 +70,11 @@ class BloomFilter:

def __init__(
self,
est_elements: Union[int, None] = None,
false_positive_rate: Union[float, None] = None,
filepath: Union[str, Path, None] = None,
hex_string: Union[str, None] = None,
hash_function: Union[HashFuncT, None] = None,
est_elements: int | None = None,
false_positive_rate: float | None = None,
filepath: str | Path | None = None,
hex_string: str | None = None,
hash_function: HashFuncT | None = None,
):
# set some things up
self._on_disk = False
Expand Down Expand Up @@ -108,7 +112,7 @@ def _load_init(self, filepath, hash_function, hex_string, est_elements, false_po
_FPR_STRUCT = Struct("f")
_IMPT_STRUCT = Struct("B")

def __contains__(self, key: KeyT) -> Union[int, bool]:
def __contains__(self, key: KeyT) -> int | bool:
"""setup the `in` keyword"""
return self.check(key)

Expand Down Expand Up @@ -218,7 +222,7 @@ def clear(self) -> None:
for idx in range(self._bloom_length):
self._bloom[idx] = 0

def hashes(self, key: KeyT, depth: Union[int, None] = None) -> HashResultsT:
def hashes(self, key: KeyT, depth: int | None = None) -> HashResultsT:
"""Return the hashes based on the provided key

Args:
Expand Down Expand Up @@ -282,7 +286,7 @@ def export_hex(self) -> str:
bytes_string = hexlify(bytearray(self._bloom[: self.bloom_length])) + hexlify(footer_bytes)
return str(bytes_string, "utf-8")

def export(self, file: Union[Path, str, IOBase, mmap]) -> None:
def export(self, file: Path | str | IOBase | mmap) -> None:
"""Export the Bloom Filter to disk

Args:
Expand All @@ -301,16 +305,13 @@ def export(self, file: Union[Path, str, IOBase, mmap]) -> None:
)
)

def export_c_header(self, filename: Union[str, Path]) -> None:
def export_c_header(self, filename: str | Path) -> None:
"""Export the Bloom Filter to disk as a C header file.

Args:
filename (str): The filename to which the Bloom Filter will be written."""
data = (" " + line for line in wrap(", ".join(f"0x{e:02x}" for e in bytearray.fromhex(self.export_hex())), 80))
if self._type in ["regular", "regular-on-disk"]:
bloom_type = "standard BloomFilter"
else:
bloom_type = "CountingBloomFilter"
bloom_type = "standard BloomFilter" if self._type in {"regular", "regular-on-disk"} else "CountingBloomFilter"

with open(filename, "w", encoding="utf-8") as file:
print(f"/* BloomFilter Export of a {bloom_type} */", file=file)
Expand All @@ -323,7 +324,7 @@ def export_c_header(self, filename: Union[str, Path]) -> None:
print("const unsigned char bloom[] = {", *data, "};", sep="\n", file=file)

@classmethod
def frombytes(cls, b: ByteString, hash_function: Union[HashFuncT, None] = None) -> "BloomFilter":
def frombytes(cls, b: ByteString, hash_function: HashFuncT | None = None) -> BloomFilter:
"""
Args:
b (ByteString): The bytes to load as a Bloom Filter
Expand Down Expand Up @@ -369,7 +370,7 @@ def current_false_positive_rate(self) -> float:
exp = math.exp(dbl)
return math.pow((1 - exp), self.number_hashes)

def intersection(self, second: SimpleBloomT) -> Union[SimpleBloomT, None]:
def intersection(self, second: SimpleBloomT) -> SimpleBloomT | None:
"""Return a new Bloom Filter that contains the intersection of the
two

Expand Down Expand Up @@ -400,7 +401,7 @@ def intersection(self, second: SimpleBloomT) -> Union[SimpleBloomT, None]:
res.elements_added = res.estimate_elements()
return res

def union(self, second: SimpleBloomT) -> Union["BloomFilter", None]:
def union(self, second: SimpleBloomT) -> BloomFilter | None:
"""Return a new Bloom Filter that contains the union of the two

Args:
Expand Down Expand Up @@ -430,7 +431,7 @@ def union(self, second: SimpleBloomT) -> Union["BloomFilter", None]:
res.elements_added = res.estimate_elements()
return res

def jaccard_index(self, second: SimpleBloomT) -> Union[float, None]:
def jaccard_index(self, second: SimpleBloomT) -> float | None:
"""Calculate the jaccard similarity score between two Bloom Filters

Args:
Expand Down Expand Up @@ -465,7 +466,7 @@ def jaccard_index(self, second: SimpleBloomT) -> Union[float, None]:

# More private functions
@classmethod
def _get_optimized_params(cls, estimated_elements: int, false_positive_rate: float) -> Tuple[float, int, int]:
def _get_optimized_params(cls, estimated_elements: int, false_positive_rate: float) -> tuple[float, int, int]:
valid_prms = isinstance(estimated_elements, Number) and estimated_elements > 0
if not valid_prms:
msg = "Bloom: estimated elements must be greater than 0"
Expand All @@ -492,7 +493,7 @@ def _set_values(
fpr: float,
n_hashes: int,
n_bits: int,
hash_func: Union[HashFuncT, None],
hash_func: HashFuncT | None,
) -> None:
self._est_elements = est_els
self._fpr = fpr
Expand All @@ -505,7 +506,7 @@ def _set_values(
self._number_hashes = n_hashes
self._num_bits = n_bits

def _load_hex(self, hex_string: str, hash_function: Union[HashFuncT, None] = None) -> None:
def _load_hex(self, hex_string: str, hash_function: HashFuncT | None = None) -> None:
"""placeholder for loading from hex string"""
offset = self._FOOTER_STRUCT_BE.size * 2
est_els, els_added, fpr, n_hashes, n_bits = self._parse_footer(
Expand All @@ -517,8 +518,8 @@ def _load_hex(self, hex_string: str, hash_function: Union[HashFuncT, None] = Non

def _load(
self,
file: Union[Path, str, IOBase, mmap, ByteString],
hash_function: Union[HashFuncT, None] = None,
file: Path | str | IOBase | mmap | ByteString,
hash_function: HashFuncT | None = None,
) -> None:
"""load the Bloom Filter from file or bytes"""
if not isinstance(file, (IOBase, mmap, bytes, bytearray, memoryview)):
Expand All @@ -528,15 +529,16 @@ def _load(
else:
offset = self._FOOTER_STRUCT.size
est_els, els_added, fpr, n_hashes, n_bits = self._parse_footer(
self._FOOTER_STRUCT, file[-1 * offset :] # type: ignore
self._FOOTER_STRUCT,
file[-1 * offset :], # type: ignore
)
self._set_values(est_els, fpr, n_hashes, n_bits, hash_function)
# now read in the bit array!
self._parse_bloom_array(file, self._IMPT_STRUCT.size * self.bloom_length) # type: ignore
self._els_added = els_added

@classmethod
def _parse_footer(cls, stct: Struct, d: ByteString) -> Tuple[int, int, float, int, int]:
def _parse_footer(cls, stct: Struct, d: ByteString) -> tuple[int, int, float, int, int]:
"""parse footer returning the data: estimated elements, elements added,
false positive rate, hash function, number hashes, number bits"""
e_elms, e_added, fpr = stct.unpack_from(bytearray(d))
Expand Down Expand Up @@ -568,9 +570,7 @@ def _verify_bloom_similarity(self, second: SimpleBloomT) -> bool:
hash_match = self.number_hashes != second.number_hashes
same_bits = self.number_bits != second.number_bits
next_hash = self.hashes("test") != second.hashes("test")
if hash_match or same_bits or next_hash:
return False
return True
return not (hash_match or same_bits or next_hash)


class BloomFilterOnDisk(BloomFilter):
Expand Down Expand Up @@ -599,15 +599,15 @@ class BloomFilterOnDisk(BloomFilter):

def __init__(
self,
filepath: Union[str, Path],
est_elements: Union[int, None] = None,
false_positive_rate: Union[float, None] = None,
hex_string: Union[str, None] = None,
hash_function: Union[HashFuncT, None] = None,
filepath: str | Path,
est_elements: int | None = None,
false_positive_rate: float | None = None,
hex_string: str | None = None,
hash_function: HashFuncT | None = None,
) -> None:
# set some things up
self._filepath = resolve_path(filepath)
self.__file_pointer = None
self.__file_pointer: BufferedRandom | None = None
super().__init__(est_elements, false_positive_rate, filepath, hex_string, hash_function)

def _load_init(self, filepath, hash_function, hex_string, est_elements, false_positive_rate):
Expand Down Expand Up @@ -642,11 +642,11 @@ def close(self) -> None:
"""Clean up the BloomFilterOnDisk object"""
if self.__file_pointer is not None and not self.__file_pointer.closed:
self.__update()
self._bloom.close()
self._bloom.close() # type: ignore
self.__file_pointer.close()
self.__file_pointer = None

def export(self, file: Union[str, Path]) -> None: # type: ignore
def export(self, file: str | Path) -> None: # type: ignore
"""Export to disk if a different location

Args:
Expand All @@ -658,7 +658,7 @@ def export(self, file: Union[str, Path]) -> None: # type: ignore
copyfile(self._filepath.name, str(file))
# otherwise, nothing to do!

def _load(self, file: Union[str, Path], hash_function: Union[HashFuncT, None] = None): # type: ignore
def _load(self, file: str | Path, hash_function: HashFuncT | None = None): # type: ignore
"""load the Bloom Filter on disk"""
# read the file, set the optimal params
# mmap everything
Expand All @@ -671,7 +671,7 @@ def _load(self, file: Union[str, Path], hash_function: Union[HashFuncT, None] =
fpr, n_hashes, n_bits = self._get_optimized_params(est_els, fpr)
self._set_values(est_els, fpr, n_hashes, n_bits, hash_function)
# setup a few additional items
self.__file_pointer = open(file, "r+b") # type: ignore
self.__file_pointer = open(file, "r+b") # noqa: SIM115
self._bloom = mmap(self.__file_pointer.fileno(), 0) # type: ignore
self._on_disk = True

Expand All @@ -680,7 +680,7 @@ def add_alt(self, hashes: HashResultsT) -> None:
self.__update()

@classmethod
def frombytes(cls, b: ByteString, hash_function: Union[HashFuncT, None] = None) -> "BloomFilterOnDisk":
def frombytes(cls, b: ByteString, hash_function: HashFuncT | None = None) -> BloomFilterOnDisk:
"""
Raises: NotSupportedError
"""
Expand Down
Loading
Loading