diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml new file mode 100644 index 0000000..6cb1148 --- /dev/null +++ b/.github/workflows/unittests.yml @@ -0,0 +1,58 @@ +name: Run UnitTests +on: + pull_request: + branches: + - dev + paths-ignore: + - 'z85base91/version.py' + - '.github/**' + - '.gitignore' + - 'LICENSE' + - 'CHANGELOG.md' + - 'MANIFEST.in' + - 'README.md' + push: + branches: + - master + paths-ignore: + - 'z85base91/version.py' + - '.github/**' + - '.gitignore' + - 'LICENSE' + - 'CHANGELOG.md' + - 'MANIFEST.in' + - 'README.md' + workflow_dispatch: + +jobs: + unit_tests: + strategy: + matrix: + python-version: ["3.10", "3.11" ] + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - uses: actions/checkout@v2 + - name: Set up python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install System Dependencies + run: | + sudo apt-get update + sudo apt install python3-dev swig + python -m pip install build wheel + - name: Install repo + run: | + pip install -e . + - name: Install test dependencies + run: | + pip install -r test/requirements.txt + - name: Run unittests + run: | + pytest --cov=z85base91 --cov-report xml test + - name: Upload coverage + if: "${{ matrix.python-version == '3.11' }}" + env: + CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}} + uses: codecov/codecov-action@v2 \ No newline at end of file diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/requirements.txt b/test/requirements.txt new file mode 100644 index 0000000..9e40bde --- /dev/null +++ b/test/requirements.txt @@ -0,0 +1,2 @@ +pytest~=7.1 +pytest-cov~=4.1 \ No newline at end of file diff --git a/test/test_b91.py b/test/test_b91.py new file mode 100644 index 0000000..4defb20 --- /dev/null +++ b/test/test_b91.py @@ -0,0 +1,62 @@ +import unittest +from z85base91 import B91 + + +class TestB91(unittest.TestCase): + def test_encode_empty(self): + """Test encoding an empty byte sequence.""" + self.assertEqual(B91.encode(b''), b'') + self.assertEqual(B91.encode(''), b'') + + def test_decode_empty(self): + """Test decoding an empty string.""" + self.assertEqual(B91.decode(''), b'') + self.assertEqual(B91.decode(b''), b'') + + def test_encode_single_byte(self): + """Test encoding a single byte.""" + self.assertEqual(b'A', B91.decode(B91.encode(b'A'))) + self.assertEqual(b'B', B91.decode(B91.encode('B'))) + self.assertEqual(b'_~', B91.decode(B91.encode(b'_~'))) + self.assertEqual(b'_~', B91.decode(B91.encode('_~'))) + + def test_encode_short_string(self): + """Test encoding a short string.""" + self.assertEqual(b'hello', B91.decode(B91.encode(b'hello'))) + self.assertEqual(B91.decode('>OwJh>Io0Tv!lE'), b'Hello World') + + def test_encode_decode_round_trip(self): + """Test encoding and decoding round-trip.""" + data = b'The quick brown fox jumps over the lazy dog.' + encoded = B91.encode(data) + decoded = B91.decode(encoded) + self.assertEqual(decoded, data) + + def test_encode_unicode_string(self): + """Test encoding a Unicode string.""" + data = 'こんにちは' # Japanese for "hello" + encoded = B91.encode(data) + decoded = B91.decode(encoded) + self.assertEqual(decoded.decode('utf-8'), data) + + def test_decode_invalid_character(self): + """Test decoding with invalid Base91 characters.""" + with self.assertRaises(ValueError): + B91.decode('Invalid🎉Chars') + + def test_3bytes_threshold(self): + """Test edge cases around the 88 threshold.""" + data = b'\x00\x00\x00' # Minimal data + encoded = B91.encode(data) + self.assertEqual(B91.decode(encoded), data) + + def test_encode_large_data(self): + """Test encoding a large byte sequence.""" + data = b'\xff' * 1000 + encoded = B91.encode(data) + decoded = B91.decode(encoded) + self.assertEqual(decoded, data) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_z85b.py b/test/test_z85b.py new file mode 100644 index 0000000..d9fa64c --- /dev/null +++ b/test/test_z85b.py @@ -0,0 +1,62 @@ +import unittest +from z85base91 import Z85B + + +class TestZ85B(unittest.TestCase): + def test_encode_empty(self): + """Test encoding an empty byte sequence.""" + self.assertEqual(Z85B.encode(b''), b'') + self.assertEqual(Z85B.encode(''), b'') + + def test_decode_empty(self): + """Test decoding an empty string.""" + self.assertEqual(Z85B.decode(''), b'') + self.assertEqual(Z85B.decode(b''), b'') + + def test_encode_single_byte(self): + """Test encoding a single byte.""" + self.assertEqual(b'A', Z85B.decode(Z85B.encode(b'A'))) + self.assertEqual(b'B', Z85B.decode(Z85B.encode('B'))) + self.assertEqual(b'_~', Z85B.decode(Z85B.encode(b'_~'))) + self.assertEqual(b'_~', Z85B.decode(Z85B.encode('_~'))) + + def test_encode_short_string(self): + """Test encoding a short string.""" + self.assertEqual(b'hello', Z85B.decode(Z85B.encode(b'hello'))) + self.assertEqual(b'Hello World', Z85B.decode(Z85B.encode(b'Hello World'))) + + def test_encode_decode_round_trip(self): + """Test encoding and decoding round-trip.""" + data = b'The quick brown fox jumps over the lazy dog.' + encoded = Z85B.encode(data) + decoded = Z85B.decode(encoded) + self.assertEqual(decoded, data) + + def test_encode_unicode_string(self): + """Test encoding a Unicode string.""" + data = 'こんにちは' # Japanese for "hello" + encoded = Z85B.encode(data) + decoded = Z85B.decode(encoded) + self.assertEqual(decoded.decode('utf-8'), data) + + def test_decode_invalid_character(self): + """Test decoding with invalid Base91 characters.""" + with self.assertRaises(ValueError): + Z85B.decode('Invalid🎉Chars') + + def test_edge_case_88_threshold(self): + """Test edge cases around the 88 threshold.""" + data = b'\x00\x00\x00' # Minimal data + encoded = Z85B.encode(data) + self.assertEqual(Z85B.decode(encoded), data) + + def test_encode_large_data(self): + """Test encoding a large byte sequence.""" + data = b'\xff' * 1000 + encoded = Z85B.encode(data) + decoded = Z85B.decode(encoded) + self.assertEqual(decoded, data) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_z85p.py b/test/test_z85p.py new file mode 100644 index 0000000..0a18324 --- /dev/null +++ b/test/test_z85p.py @@ -0,0 +1,104 @@ +import unittest +from z85base91 import Z85P + + +class TestZ85P(unittest.TestCase): + def test_encode_empty(self): + """Test encoding an empty byte sequence.""" + self.assertEqual(Z85P.encode(b''), b'\x00') + self.assertEqual(Z85P.encode(''), b'\x00') + + def test_decode_empty(self): + """Test decoding an empty string.""" + self.assertEqual(Z85P.decode(b'\x00'), b'') + self.assertEqual(Z85P.decode(''), b'') + self.assertEqual(Z85P.decode(b''), b'') + + def test_encode_single_byte(self): + """Test encoding a single byte.""" + self.assertEqual(b'A', Z85P.decode(Z85P.encode(b'A'))) + self.assertEqual(b'B', Z85P.decode(Z85P.encode('B'))) + self.assertEqual(b'_~', Z85P.decode(Z85P.encode(b'_~'))) + self.assertEqual(b'_~', Z85P.decode(Z85P.encode('_~'))) + + def test_encode_short_string(self): + """Test encoding a short string.""" + self.assertEqual(b'hello', Z85P.decode(Z85P.encode(b'hello'))) + + def test_encode_decode_round_trip(self): + """Test encoding and decoding round-trip.""" + data = b'The quick brown fox jumps over the lazy dog.' + encoded = Z85P.encode(data) + decoded = Z85P.decode(encoded) + self.assertEqual(decoded, data) + + def test_encode_unicode_string(self): + """Test encoding a Unicode string.""" + data = 'こんにちは' # Japanese for "hello" + encoded = Z85P.encode(data) + decoded = Z85P.decode(encoded) + self.assertEqual(decoded.decode('utf-8'), data) + + def test_decode_invalid_character(self): + """Test decoding with invalid z85 characters.""" + with self.assertRaises(ValueError): + Z85P.decode('Invalid🎉Chars') + + def test_encode_large_data(self): + """Test encoding a large byte sequence.""" + data = b'\xff' * 1000 + encoded = Z85P.encode(data) + decoded = Z85P.decode(encoded) + self.assertEqual(decoded, data) + + def test_padding_single_byte(self): + """Test encoding and decoding with one byte that requires padding.""" + data = b'\x01' # Single byte, should get padded with 3 \x00 bytes + encoded = Z85P.encode(data) + self.assertEqual(encoded[0], 3) # Check padding byte + self.assertEqual(Z85P.decode(encoded), data) + + def test_padding_two_bytes(self): + """Test encoding and decoding with two bytes that require padding.""" + data = b'\x01\x01' # Two bytes, should get padded with 2 \x00 bytes + encoded = Z85P.encode(data) + self.assertEqual(encoded[0], 2) # Check padding byte + self.assertEqual(Z85P.decode(encoded), data) + + def test_padding_three_bytes(self): + """Test encoding and decoding with three bytes that require padding.""" + data = b'\x01\x01\x01' # Three bytes, should get padded with 1 \x00 byte + encoded = Z85P.encode(data) + self.assertEqual(encoded[0], 1) # Check padding byte + self.assertEqual(Z85P.decode(encoded), data) + + def test_no_padding_5_bytes(self): # fails + data = b'\x01\x01\x01\x01\x01' # 5 bytes + encoded = Z85P.encode(data) + self.assertEqual(encoded[0], 3) # Check padding byte + self.assertEqual(Z85P.decode(encoded), data) + + def test_no_padding_needed(self): + """Test encoding and decoding with data that doesn't need padding.""" + data = b'\x01\x01\x01\x01' # Exactly 4 bytes, no padding + encoded = Z85P.encode(data) + self.assertEqual(encoded[0], 0) # No padding + self.assertEqual(Z85P.decode(encoded), data) + + def test_round_trip_padding(self): + """Test round-trip encoding and decoding with padding.""" + data = b'\x01\x01\x01' # Less than 4 bytes, needs padding + encoded = Z85P.encode(data) + decoded = Z85P.decode(encoded) + self.assertEqual(decoded, data) # Ensure padding is correctly removed + + def test_padding_removal_after_decoding(self): + """Test ensuring padding is correctly removed after decoding.""" + data = b'\x01\x01\x01' # Less than 4 bytes, needs padding + encoded = Z85P.encode(data) + self.assertEqual(encoded[0], 1) # Padding size is 1 + decoded = Z85P.decode(encoded) + self.assertEqual(decoded, data) # Padding should be removed + +if __name__ == '__main__': + unittest.main() diff --git a/z85base91/__init__.py b/z85base91/__init__.py index ba83a6f..088516f 100644 --- a/z85base91/__init__.py +++ b/z85base91/__init__.py @@ -40,6 +40,8 @@ class Z85P: Class for encoding and decoding Z85P format using a C-based shared library. If the C library is not available, it falls back to a pure Python implementation. """ + Z85CHARS = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#" + # Load the correct shared library based on system architecture lib = ctypes.CDLL(get_arch_lib('libz85p')) @@ -55,7 +57,7 @@ class Z85P: lib.decode_z85p.restype = ctypes.POINTER(ctypes.c_ubyte) @classmethod - def encode(cls, data: bytes) -> bytes: + def encode(cls, data: Union[str, bytes]) -> bytes: """ Encodes the input data into Z85P format. @@ -68,20 +70,21 @@ def encode(cls, data: bytes) -> bytes: Raises: ValueError: If encoding fails. """ + if isinstance(data, str): + data = data.encode('utf-8') + out_len = c_size_t(0) raw_data = (ctypes.c_ubyte * len(data))(*data) encoded_data = cls.lib.encode_z85p(raw_data, len(data), ctypes.byref(out_len)) - if not encoded_data: - raise ValueError("Encoding failed") return bytes(ctypes.string_at(encoded_data, out_len.value)) @classmethod - def decode(cls, data: bytes) -> bytes: + def decode(cls, encoded_data: Union[str, bytes]) -> bytes: """ Decodes the input Z85P-encoded data into raw bytes. Args: - data (bytes): The Z85P-encoded data to decode. + encoded_data (bytes): The Z85P-encoded data to decode. Returns: bytes: The decoded raw data. @@ -89,11 +92,14 @@ def decode(cls, data: bytes) -> bytes: Raises: ValueError: If decoding fails. """ + if isinstance(encoded_data, str): + if any(c not in Z85P.Z85CHARS for c in encoded_data): + raise ValueError("Invalid Z85 character") + encoded_data = encoded_data.encode('utf-8') + out_len = c_size_t(0) - raw_data = (ctypes.c_ubyte * len(data))(*data) - decoded_data = cls.lib.decode_z85p(raw_data, len(data), ctypes.byref(out_len)) - if not decoded_data: - raise ValueError("Decoding failed") + raw_data = (ctypes.c_ubyte * len(encoded_data))(*encoded_data) + decoded_data = cls.lib.decode_z85p(raw_data, len(encoded_data), ctypes.byref(out_len)) return bytes(ctypes.string_at(decoded_data, out_len.value)) except Exception as e: logging.warning(f"Z85P C library not available: {e}. Falling back to pure Python implementation.") @@ -120,56 +126,55 @@ class B91: lib.encode.restype = c_char_p @classmethod - def decode(cls, encoded_data: Union[str, bytes]) -> bytes: + def encode(cls, data: Union[str, bytes]) -> bytes: """ - Decodes the input Base91-encoded data into raw bytes. + Encodes the input data into Base91 format. Args: - encoded_data (Union[str, bytes]): The Base91-encoded data to decode. + data (Union[str, bytes]): The raw data to encode. Returns: - bytes: The decoded raw data. + bytes: The Base91-encoded data. Raises: - ValueError: If decoding fails. + ValueError: If encoding fails. """ - if isinstance(encoded_data, str): - # Convert the encoded data to bytes - encoded_data = encoded_data.encode('utf-8') + if isinstance(data, str): + # Convert the data to bytes + data = data.encode('utf-8') output_len = c_size_t(0) # Call the C function - decoded_data = cls.lib.decode(encoded_data, ctypes.byref(output_len)) + encoded_data = cls.lib.encode((ctypes.c_ubyte * len(data))(*data), len(data), ctypes.byref(output_len)) - if not decoded_data: - raise ValueError("Invalid Base91 string") - return ctypes.string_at(decoded_data, output_len.value) + return ctypes.string_at(encoded_data, output_len.value) @classmethod - def encode(cls, data: Union[str, bytes]) -> bytes: + def decode(cls, encoded_data: Union[str, bytes]) -> bytes: """ - Encodes the input data into Base91 format. + Decodes the input Base91-encoded data into raw bytes. Args: - data (Union[str, bytes]): The raw data to encode. + encoded_data (Union[str, bytes]): The Base91-encoded data to decode. Returns: - bytes: The Base91-encoded data. + bytes: The decoded raw data. Raises: - ValueError: If encoding fails. + ValueError: If decoding fails. """ - if isinstance(data, str): - # Convert the data to bytes - data = data.encode('utf-8') + if isinstance(encoded_data, str): + # Convert the encoded data to bytes + encoded_data = encoded_data.encode('utf-8') output_len = c_size_t(0) # Call the C function - encoded_data = cls.lib.encode((ctypes.c_ubyte * len(data))(*data), len(data), ctypes.byref(output_len)) + decoded_data = cls.lib.decode(encoded_data, ctypes.byref(output_len)) + + if not decoded_data: + raise ValueError("Invalid Base91 string") + return ctypes.string_at(decoded_data, output_len.value) - if not encoded_data: - raise ValueError("Encoding failed") - return ctypes.string_at(encoded_data, output_len.value) except Exception as e: logging.warning(f"Base91 C library not available: {e}. Falling back to pure Python implementation.") from z85base91.b91 import B91 @@ -193,7 +198,7 @@ class Z85B: lib.free.argtypes = [ctypes.c_void_p] # Add free function for memory cleanup @classmethod - def encode(cls, data: bytes) -> bytes: + def encode(cls, data: Union[str, bytes]) -> bytes: """ Encodes the input data into Z85B format. @@ -206,6 +211,8 @@ def encode(cls, data: bytes) -> bytes: Raises: ValueError: If encoding fails. """ + if isinstance(data, str): + data = data.encode('utf-8') output_len = c_size_t(0) encoded_data = cls.lib.encode_z85b((c_ubyte * len(data))(*data), len(data), byref(output_len)) if not encoded_data: @@ -217,7 +224,7 @@ def encode(cls, data: bytes) -> bytes: cls.lib.free(encoded_data) @classmethod - def decode(cls, encoded_data: bytes) -> bytes: + def decode(cls, encoded_data: Union[str, bytes]) -> bytes: """ Decodes the input Z85B-encoded data into raw bytes. @@ -230,12 +237,14 @@ def decode(cls, encoded_data: bytes) -> bytes: Raises: ValueError: If decoding fails. """ + if isinstance(encoded_data, str): + # Convert the encoded data to bytes + if any(c not in Z85P.Z85CHARS for c in encoded_data): + raise ValueError("Invalid Z85 character") + encoded_data = encoded_data.encode('utf-8') output_len = c_size_t(0) decoded_data = cls.lib.decode_z85b((c_ubyte * len(encoded_data))(*encoded_data), len(encoded_data), byref(output_len)) - if not decoded_data: - raise ValueError("Decoding failed") - try: return ctypes.string_at(decoded_data, output_len.value) finally: