Skip to content

Commit

Permalink
changed name to pdrle and added get_sn
Browse files Browse the repository at this point in the history
  • Loading branch information
darshanbaral committed Sep 12, 2021
1 parent 01494db commit be69145
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 12 deletions.
20 changes: 15 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,20 @@ pip install git+https://github.com/darshanbaral/pyrle.git
## Usage

```python
import pyrle
import pdrle
import pandas


x = pandas.Series(["a", "a", "b", "b", "a", "a", "a", "c"])

rle = pyrle.encode(x)
rle = pdrle.encode(x)
rle
# vals runs
# 0 a 2
# 1 b 2
# 2 a 3
# 3 c 1

y = pyrle.decode(rle.vals, rle.runs)
y = pdrle.decode(rle.vals, rle.runs)
y
# 0 a
# 1 a
Expand All @@ -35,7 +34,7 @@ y
# 7 c
# dtype: object

pandas.concat({"x": x, "id": pyrle.get_id(x)}, axis=1)
pandas.concat({"x": x, "id": pdrle.get_id(x)}, axis=1)
# x id
# 0 a 0
# 1 a 0
Expand All @@ -45,4 +44,15 @@ pandas.concat({"x": x, "id": pyrle.get_id(x)}, axis=1)
# 5 a 2
# 6 a 2
# 7 c 3

pandas.concat({"x": x, "sn": pdrle.get_sn(x)}, axis=1)
# x sn
# 0 a 0
# 1 a 1
# 2 b 0
# 3 b 1
# 4 a 0
# 5 a 1
# 6 a 2
# 7 c 0
```
11 changes: 11 additions & 0 deletions pyrle/__init__.py → pdrle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,14 @@ def get_id(data: pandas.Series) -> pandas.Series:

rle_id = check.cumsum().astype(numpy.int64)
return rle_id - 1


def get_sn(data: pandas.Series) -> pandas.Series:
"""
Generates serial number for different elements of each consecutive runs of values in a pandas Series
:param data: input value, a pandas Series
:return: pandas Series
"""
grp = get_id(data)
rle_sn = data.groupby(grp).cumcount()
return rle_sn
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[metadata]
description-file = README.md
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from setuptools import setup


setup(name="pyrle",
version="0.2",
setup(name="pdrle",
version="0.3",
description="python package for run length encoding on pandas Series",
url="https://github.com/darshanbaral/pyrle",
author="Darshan Baral",
license="MIT",
packages=["pyrle"],
packages=["pdrle"],
install_requires=["pandas"]
)
27 changes: 23 additions & 4 deletions tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from parameterized import parameterized
import pandas
import numpy
import pyrle
import pdrle


class TestPyrle(unittest.TestCase):
Expand All @@ -25,7 +25,7 @@ class TestPyrle(unittest.TestCase):
"runs": [2, 3, 1]})]
])
def test_encode(self, input_data, expected_output):
actual_output = pyrle.encode(input_data)
actual_output = pdrle.encode(input_data)
pandas.testing.assert_frame_equal(actual_output, expected_output)

# test decode
Expand All @@ -41,7 +41,7 @@ def test_encode(self, input_data, expected_output):
"runs": [1]})]
])
def test_decode(self, expected_output, input_data):
actual_output = pyrle.decode(input_data.vals, input_data.runs)
actual_output = pdrle.decode(input_data.vals, input_data.runs)
pandas.testing.assert_series_equal(actual_output, expected_output)

# test get_id
Expand All @@ -60,5 +60,24 @@ def test_decode(self, expected_output, input_data):
pandas.Series({"a": 0, "b": 0, "c": 1, "d": 1, "e": 1, "f": 2})]
])
def test_get_id(self, input_data, expected_output):
actual_output = pyrle.get_id(input_data)
actual_output = pdrle.get_id(input_data)
pandas.testing.assert_series_equal(actual_output, expected_output)

# test get_sn
@parameterized.expand([
[pandas.Series(["a", "a", "b", "b", "b", "a", "a", "c"]),
pandas.Series([0, 1, 0, 1, 2, 0, 1, 0])],
[pandas.Series([1, 1, 1, 1, 1, 1, 1]),
pandas.Series([0, 1, 2, 3, 4, 5, 6])],
[pandas.Series([2]),
pandas.Series([0])],
[pandas.Series({"a": 1, "b": 2, "c": 3, "d": 3, "e": 3, "f": 1}),
pandas.Series({"a": 0, "b": 0, "c": 0, "d": 1, "e": 2, "f": 0})],
[pandas.Series({"a": 1, "b": 1, "c": numpy.nan, "d": 1, "e": 2, "f": 2}),
pandas.Series({"a": 0, "b": 1, "c": 0, "d": 0, "e": 0, "f": 1})],
[pandas.Series({"a": 1, "b": 1, "c": numpy.nan, "d": numpy.nan, "e": numpy.nan, "f": 2}),
pandas.Series({"a": 0, "b": 1, "c": 0, "d": 1, "e": 2, "f": 0})]
])
def test_get_sn(self, input_data, expected_output):
actual_output = pdrle.get_sn(input_data)
pandas.testing.assert_series_equal(actual_output, expected_output)

0 comments on commit be69145

Please sign in to comment.