diff --git a/LICENSE b/LICENSE index 45f9d3a..899853e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 JavaScriptDude +Copyright (c) 2022 Timothy C. Quinn Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +SOFTWARE. \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..103cbde --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include README.md +include LICENSE +include pyproject.toml \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..b9a632a --- /dev/null +++ b/README.md @@ -0,0 +1,208 @@ +## `multisort` - NoneType Safe Multi Column Sorting + +Simplified multi-column sorting of lists of tuples, dicts, lists or objects that are NoneType safe. + +### Installation + +``` +python3 -m pip install multisort +``` + +### Dependencies +None + +### Performance +Average over 10 iterations with 500 rows. +Test | Secs +---|--- +cmp_func|0.0054 +pandas|0.0061 +reversor|0.0149 +msorted|0.0179 + +As you can see, if the `cmp_func` is by far the fastest methodology as long as the number of cells in the table are 500 rows for 5 columns. However for larger data sets, `pandas` is the performance winner and scales extremely well. In such large dataset cases, where performance is key, `pandas` should be the first choice. + +The surprising thing from testing is that `cmp_func` far outperforms `reversor` which which is the only other methodology for multi-columnar sorting that can handle `NoneType` values. + +### Note on `NoneType` and sorting +If your data may contain None, it would be wise to ensure your sort algorithm is tuned to handle them. This is because sorted uses `<` comparisons; which is not supported by `NoneType`. For example, the following error will result: `TypeError: '>' not supported between instances of 'NoneType' and 'str'`. + +### Methodologies +Method|Descr|Notes +---|---|--- +cmp_func|Multi column sorting in the model `java.util.Comparator`|Fastest for small to medium size data +reversor|Enable multi column sorting with column specific reverse sorting|Medium speed. [Source](https://stackoverflow.com/a/56842689/286807) +msorted|Simple one-liner designed after `multisort` [example from python docs](https://docs.python.org/3/howto/sorting.html#sort-stability-and-complex-sorts)|Slowest of the bunch but not by much + + + +### Dictionary Examples +For data: +``` +rows_dict = [ + {'idx': 0, 'name': 'joh', 'grade': 'C', 'attend': 100} + ,{'idx': 1, 'name': 'jan', 'grade': 'a', 'attend': 80} + ,{'idx': 2, 'name': 'dav', 'grade': 'B', 'attend': 85} + ,{'idx': 3, 'name': 'bob' , 'grade': 'C', 'attend': 85} + ,{'idx': 4, 'name': 'jim' , 'grade': 'F', 'attend': 55} + ,{'idx': 5, 'name': 'joe' , 'grade': None, 'attend': 55} +] +``` + +### `msorted` +Sort rows_dict by _grade_, descending, then _attend_, ascending and put None first in results: +``` +from multisort import msorted +rows_sorted = msorted(rows_dict, [ + ('grade', {'reverse': False, 'none_first': True}) + ,'attend' +]) + +``` + +Sort rows_dict by _grade_, descending, then _attend_ and call upper() for _grade_: +``` +from multisort import msorted +rows_sorted = msorted(rows_dict, [ + ('grade', {'reverse': False, 'clean': lambda s:None if s is None else s.upper()}) + ,'attend' +]) + +``` + +### `sorted` with `reversor` +Sort rows_dict by _grade_, descending, then _attend_ and call upper() for _grade_: +``` +rows_sorted = sorted(rows_dict, key=lambda o: ( + reversor(None if o['grade'] is None else o['grade'].upper()) + ,o['attend']) +)) +``` + + +### `sorted` with `cmp_func` +Sort rows_dict by _grade_, descending, then _attend_ and call upper() for _grade_: +``` +def cmp_student(a,b): + k='grade'; va=a[k]; vb=b[k] + if va != vb: + if va is None: return -1 + if vb is None: return 1 + return -1 if va > vb else 1 + k='attend'; va=a[k]; vb=b[k]; + if va != vb: return -1 if va < vb else 1 + return 0 +rows_sorted = sorted(rows_dict, key=cmp_func(cmp_student), reverse=True) +``` + + + +### Object Examples +For data: +``` +class Student(): + def __init__(self, idx, name, grade, attend): + self.idx = idx + self.name = name + self.grade = grade + self.attend = attend + def __str__(self): return f"name: {self.name}, grade: {self.grade}, attend: {self.attend}" + def __repr__(self): return self.__str__() + +rows_obj = [ + Student(0, 'joh', 'C', 100) + ,Student(1, 'jan', 'a', 80) + ,Student(2, 'dav', 'B', 85) + ,Student(3, 'bob', 'C', 85) + ,Student(4, 'jim', 'F', 55) + ,Student(5, 'joe', None, 55) +] +``` + +### `msorted` +(Same syntax as with 'dict' example) + + +### `sorted` with `reversor` +Sort rows_obj by _grade_, descending, then _attend_ and call upper() for _grade_: +``` +rows_sorted = sorted(rows_obj, key=lambda o: ( + reversor(None if o.grade is None else o.grade.upper()) + ,o.attend) +)) +``` + + +### `sorted` with `cmp_func` +Sort rows_obj by _grade_, descending, then _attend_ and call upper() for _grade_: +``` +def cmp_student(a,b): + if a.grade != b.grade: + if a.grade is None: return -1 + if b.grade is None: return 1 + return -1 if a.grade > b.grade else 1 + if a.attend != b.attend: + return -1 if a.attend < b.attend else 1 + return 0 +rows_sorted = sorted(rows_obj, key=cmp_func(cmp_student), reverse=True) +``` + + +### List / Tuple Examples +For data: +``` +rows_tuple = [ + (0, 'joh', 'a' , 100) + ,(1, 'joe', 'B' , 80) + ,(2, 'dav', 'A' , 85) + ,(3, 'bob', 'C' , 85) + ,(4, 'jim', None , 55) + ,(5, 'jan', 'B' , 70) +] +(COL_IDX, COL_NAME, COL_GRADE, COL_ATTEND) = range(0,4) +``` + +### `msorted` +Sort rows_tuple by _grade_, descending, then _attend_, ascending and put None first in results: +``` +from multisort import msorted +rows_sorted = msorted(rows_tuple, [ + (COL_GRADE, {'reverse': False, 'none_first': True}) + ,COL_ATTEND +]) + +``` + + +### `sorted` with `reversor` +Sort rows_tuple by _grade_, descending, then _attend_ and call upper() for _grade_: +``` +rows_sorted = sorted(rows_tuple, key=lambda o: ( + reversor(None if o[COL_GRADE] is None else o[COL_GRADE].upper()) + ,o[COL_ATTEND]) +)) +``` + + +### `sorted` with `cmp_func` +Sort rows_tuple by _grade_, descending, then _attend_ and call upper() for _grade_: +``` +def cmp_student(a,b): + k=COL_GRADE; va=a[k]; vb=b[k] + if va != vb: + if va is None: return -1 + if vb is None: return 1 + return -1 if va > vb else 1 + k=COL_ATTEND; va=a[k]; vb=b[k]; + if va != vb: + return -1 if va < vb else 1 + return 0 +rows_sorted = sorted(rows_tuple, key=cmp_func(cmp_student), reverse=True) +``` + +### Tests / Samples +Name|Descr|Other +---|---|--- +tests/test_msorted.py|msorted unit tests|- +tests/performance_tests.py|Tunable performance tests using asyncio | requires pandas +tests/hand_test.py|Hand testing|- diff --git a/dev.env b/dev.env new file mode 100644 index 0000000..1d3b509 --- /dev/null +++ b/dev.env @@ -0,0 +1 @@ +PYTHONPATH=./src:${PYTHONPATH} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f109b49 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,33 @@ +[tool.poetry] +name = "multisort" +version = "0.1.0" +description = "NoneType Safe Multi Column Sorting For Python" +license = "MIT" +authors = ["Timothy C. Quinn"] +readme = "README.md" +homepage = "https://pypi.org/project/multisort" +repository = "https://github.com/JavaScriptDude/multisort" +classifiers = [ + 'Development Status :: 4 - Beta', + 'Environment :: Console', + 'Intended Audience :: Developers', + 'Operating System :: POSIX :: Linux', + 'Operating System :: POSIX :: BSD', + 'Operating System :: POSIX :: SunOS/Solaris', + 'Operating System :: MacOS :: MacOS X', + 'Programming Language :: Python :: 3 :: Only', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Topic :: Utilities', +] + +[tool.poetry.dependencies] +python = "^3.7.9" + +[tool.poetry.dev-dependencies] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/src/multisort/__init__.py b/src/multisort/__init__.py new file mode 100644 index 0000000..6ab5b64 --- /dev/null +++ b/src/multisort/__init__.py @@ -0,0 +1 @@ +from .multisort import msorted, cmp_func, reversor \ No newline at end of file diff --git a/src/multisort/multisort.py b/src/multisort/multisort.py new file mode 100644 index 0000000..eb9bdc3 --- /dev/null +++ b/src/multisort/multisort.py @@ -0,0 +1,110 @@ +######################################### +# .: multisort.py :. +# Simplified Multi-Column Sorting For Lists of records +# Installation: +# . pip install multisort +# Author: Timothy C. Quinn +# Home: https://pypi.org/project/multisort +# Licence: MIT +######################################### +from functools import cmp_to_key +cmp_func = cmp_to_key + + +# .: msorted :. +# spec is a list one of the following +# +# (,) +# (, ) +# where: +# Property, Key or Index for 'column' in row +# dict. Options: +# reverse: opt - reversed sort (defaults to False) +# clean: opt - callback to clean / alter data in 'field' +# none_first: opt - If True, None will be at top of sort. Default is False (bottom) +class Comparator: + @classmethod + def new(cls, *args): + if len(args) == 1 and isinstance(args[0], (int,str)): + _c = Comparator(spec=args[0]) + else: + _c = Comparator(spec=args) + return cmp_to_key(_c._compare_a_b) + + def __init__(self, spec): + if isinstance(spec, (int, str)): + self.spec = ( (spec, False, None, False), ) + else: + a=[] + for s_c in spec: + if isinstance(s_c, (int, str)): + a.append((s_c, None, None, False)) + else: + assert isinstance(s_c, tuple) and len(s_c) in (1,2),\ + f"Invalid spec. Must have 1 or 2 params per record. Got: {s_c}" + if len(s_c) == 1: + a.append((s_c[0], None, None, False)) + elif len(s_c) == 2: + s_opts = s_c[1] + assert not s_opts is None and isinstance(s_opts, dict), f"Invalid Spec. Second value must be a dict. Got {getClassName(s_opts)}" + a.append((s_c[0], s_opts.get('reverse', False), s_opts.get('clean', None), s_opts.get('none_first', False))) + + self.spec = a + + def _compare_a_b(self, a, b): + if a is None: return 1 + if b is None: return -1 + for k, desc, clean, none_first in self.spec: + try: + try: + va = a[k]; vb = b[k] + except Exception as ex: + va = getattr(a, k); vb = getattr(b, k) + + except Exception as ex: + raise KeyError(f"Key {k} is not available in object(s) given a: {a.__class__.__name__}, b: {a.__class__.__name__}") + + if clean: + va = clean(va) + vb = clean(vb) + + if va != vb: + if va is None: return -1 if none_first else 1 + if vb is None: return 1 if none_first else -1 + if desc: + return -1 if va > vb else 1 + else: + return 1 if va > vb else -1 + + return 0 + + +def msorted(rows, spec, reverse:bool=False): + if isinstance(spec, (int, str)): + _c = Comparator.new(spec) + else: + _c = Comparator.new(*spec) + return sorted(rows, key=_c, reverse=reverse) + +# For use in the multi column sorted syntax to sort by 'grade' and then 'attend' descending +# dict example: +# rows_sorted = sorted(rows, key=lambda o: ((None if o['grade'] is None else o['grade'].lower()), reversor(o['attend'])), reverse=True) +# object example: +# rows_sorted = sorted(rows, key=lambda o: ((None if o.grade is None else o.grade.lower()), reversor(o.attend)), reverse=True) +# list, tuple example: +# rows_sorted = sorted(rows, key=lambda o: ((None if o[COL_GRADE] is None else o[COL_GRADE].lower()), reversor(o[COL_ATTEND])), reverse=True) +# where: COL_GRADE and COL_ATTEND are column indexes for values +class reversor: + def __init__(self, obj): + self.obj = obj + def __eq__(self, other): + return other.obj == self.obj + def __lt__(self, other): + return False if self.obj is None else \ + True if other.obj is None else \ + other.obj < self.obj + + +def getClassName(o): + return None if o == None else type(o).__name__ + diff --git a/tests/hand_test.py b/tests/hand_test.py new file mode 100644 index 0000000..b375447 --- /dev/null +++ b/tests/hand_test.py @@ -0,0 +1,97 @@ +import sys +from multisort import msorted, cmp_func, reversor +import test_util as util +pc = util.pc + +def main(): + # test_msorted_dict_single() + # test_msorted_obj_single() + # test_msorted_tuple_single() + + test_msorted_dict_multi() + # test_msorted_obj_multi() + # test_msorted_tuple_multi() + + +students_dict = [ + {'idx': 0, 'name': 'joh', 'grade': 'C', 'attend': 100} + ,{'idx': 1, 'name': 'jan', 'grade': 'a', 'attend': 80} + ,{'idx': 2, 'name': 'dav', 'grade': 'B', 'attend': 85} + ,{'idx': 3, 'name': 'bob' , 'grade': 'C', 'attend': 85} + ,{'idx': 4, 'name': 'jim' , 'grade': 'F', 'attend': 55} + ,{'idx': 5, 'name': 'joe' , 'grade': None, 'attend': 55} +] + +class Student(): + def __init__(self, idx, name, grade, attend): + self.idx = idx + self.name = name + self.grade = grade + self.attend = attend + def __str__(self): return f"name: {self.name}, grade: {self.grade}, attend: {self.attend}" + def __repr__(self): return self.__str__() + +students_obj = [ + Student(0, 'joh', 'C', 100) + ,Student(1, 'jan', 'a', 80) + ,Student(2, 'dav', 'B', 85) + ,Student(3, 'bob', 'C', 85) + ,Student(4, 'jim', 'F', 55) + ,Student(5, 'joe', None, 55) +] + +student_tuple = [ + (0, 'joh', 'C', 100) + ,(1, 'jan', 'a', 80) + ,(2, 'dav', 'B', 85) + ,(3, 'bob', 'C', 85) + ,(4, 'jim', 'F', 55) + ,(5, 'joe', None, 55) +] +(COL_IDX, COL_NAME, COL_GRADE, COL_ATTEND) = range(0,4) + + + + + +def test_msorted_dict_single(): + _sorted = msorted(students_dict, 'grade', reverse=False) + _print_stud(_sorted) + + +def test_msorted_obj_single(): + _sorted = msorted(students_obj, 'attend', reverse=False) + _print_stud(_sorted) + + +def test_msorted_tuple_single(): + _sorted = msorted(student_tuple, COL_ATTEND, reverse=False) + _print_stud(_sorted) + + +def test_msorted_dict_multi(): + _sorted = msorted(students_dict, [('grade', {'reverse': False, 'none_first': False}), 'attend'], reverse=False) + _print_stud(_sorted) + + +def test_msorted_obj_multi(): + _sorted = msorted(students_obj, [('grade', {'reverse': True}), 'attend'], reverse=False) + _print_stud(_sorted) + + +def test_msorted_tuple_multi(): + _sorted = msorted(student_tuple, [(COL_GRADE, {'reverse': True}), COL_ATTEND], reverse=False) + _print_stud(_sorted) + + +def _print_stud(rows): + print(f"\n{util.getFuncName(2)}() Results:") + for row in rows: + print(util.pre(str(row))) + print('\n') + + + + +if __name__ == '__main__': + main() diff --git a/tests/performance_tests.py b/tests/performance_tests.py new file mode 100644 index 0000000..d5fc9a0 --- /dev/null +++ b/tests/performance_tests.py @@ -0,0 +1,113 @@ +import asyncio +import pandas +from random import randint +from multisort import msorted, cmp_func, reversor +import test_util as util +pc = util.pc + +students = [ + {'idx': 0, 'name': 'joh', 'grade': 'C', 'attend': 100} + ,{'idx': 1, 'name': 'jan', 'grade': 'a', 'attend': 80} + ,{'idx': 2, 'name': 'dav', 'grade': 'B', 'attend': 85} + ,{'idx': 3, 'name': 'bob' , 'grade': 'C', 'attend': 85} + ,{'idx': 4, 'name': 'jim' , 'grade': 'F', 'attend': 55} + ,{'idx': 5, 'name': 'joe' , 'grade': None, 'attend': 55} +] +ITERATIONS = 10 +EXTRA_ROW = 500 + +def main(): + results = asyncio.get_event_loop().run_until_complete(run_tests()) + rrows = [] + for result in results: + if isinstance(result, Exception): raise result + rrows.append(result) + + rrows = msorted(rrows, 1) + table = util.quickTT(['test', 's/iter']) + for rrow in rrows: table.add_row([rrow[0], f"{(rrow[1] / ITERATIONS):.7f}"]) + print(f"\nSummary for {ITERATIONS} iteration{'s' if ITERATIONS > 1 else ''} with {len(students)} rows:\n{table.draw()}\n") + + +async def run_tests(): + global students + + # Add an additional number of records for testing + if EXTRA_ROW > 0: + for i in range(1,EXTRA_ROW+1): + students.append({'idx': len(students), 'name':'rnd', 'grade': 'ABCDEF'[randint(0,5)], 'attend': randint(0,100)}) + + coroutines = [ + run_cmp_func(students[:]), + run_msorted(students[:]), + run_reversor(students[:]), + run_reversor_func(students[:]), + run_pandas(students[:]), + ] + res = await asyncio.gather(*coroutines, return_exceptions=True) + + return res + + + +async def run_cmp_func(rows): + sw = util.StopWatch() + def cmp_student(a,b): + k='grade'; va=a[k]; vb=b[k] + if va != vb: + if va is None: return -1 + if vb is None: return 1 + return -1 if va > vb else 1 + k='attend'; va=a[k]; vb=b[k]; + if va != vb: return -1 if va < vb else 1 + return 0 + + for i in range(0,ITERATIONS): + rows_sorted = sorted(rows, key=cmp_func(cmp_student), reverse=True) + + return ('cmp_func', sw.elapsed(prec=7)) + + + +async def run_msorted(rows): + sw = util.StopWatch() + for i in range(0,ITERATIONS): + rows_sorted = msorted(rows, spec=( + ('grade', {'reverse': True, 'clean': lambda v: None if v is None else v.lower()}) + ,('attend', {'reverse': True}) + ), reverse=True) + return ('msorted', sw.elapsed(prec=7)) + +async def run_reversor(rows): + sw = util.StopWatch() + for i in range(0,ITERATIONS): + rows_sorted = sorted(rows, key=lambda o: ( + reversor(None if o['grade'] is None else o['grade'].lower()) + ,reversor(o['attend']) + ), reverse=True) + return ('reversor', sw.elapsed(prec=7)) + +async def run_reversor_func(rows): + sw = util.StopWatch() + def _student_sort(o): + return ( reversor(None if o['grade'] is None else o['grade'].lower()) + ,reversor(o['attend']) + ) + for i in range(0,ITERATIONS): + rows_sorted = sorted(rows, key=_student_sort, reverse=True) + + return ('reversor func', sw.elapsed(prec=7)) + +async def run_pandas(rows): + sw = util.StopWatch() + + for i in range(0,ITERATIONS): + df = pandas.DataFrame(rows[:]) + df.sort_values(by = ['grade', 'attend'], ascending = [False, False], na_position = 'last') + # d_rows_sorted = list(df.T.to_dict().values()) + + return ('pandas', sw.elapsed(prec=7)) + + +if __name__ == '__main__': + main() diff --git a/tests/test_msorted.py b/tests/test_msorted.py new file mode 100644 index 0000000..a65a482 --- /dev/null +++ b/tests/test_msorted.py @@ -0,0 +1,237 @@ +import sys +import unittest +from multisort import msorted +import test_util as util +pc = util.pc + +FAILFAST = True + +STUDENTS_BASE = [ + (0, 'joh', 'a' , 100) + ,(1, 'joe', 'B' , 80) + ,(2, 'dav', 'A' , 85) + ,(3, 'bob', 'C' , 85) + ,(4, 'jim', None , 55) + ,(5, 'jan', 'B' , 70) +] +(COL_IDX, COL_NAME, COL_GRADE, COL_ATTEND) = range(0,4) +STUDENT_COLS=['idx', 'name', 'grade', 'attend'] + +def clean_grade(v): + if v is None: return v + return v.upper() + + +MSORTED_TESTS=[ + ( (2,0,5,1,3,4), [(COL_GRADE, {'reverse': False, 'clean': clean_grade}) , (COL_ATTEND, {'reverse': False})]), + ( (0,2,1,5,3,4), [(COL_GRADE, {'reverse': False, 'clean': clean_grade}) , (COL_ATTEND, {'reverse': True})]), + ( (0,2,1,5,3,4), [(COL_GRADE, {'reverse': False, 'clean': clean_grade}) , (COL_ATTEND, {'reverse': True})]), + ( (3,1,5,0,2,4), [(COL_GRADE, {'reverse': True , 'clean': clean_grade}) , (COL_ATTEND, {'reverse': True})]), + ( (3,5,1,2,0,4), [(COL_GRADE, {'reverse': True , 'clean': clean_grade}) , (COL_ATTEND, {'reverse': False})]), + ( (2,1,5,3,0,4), COL_GRADE), + ( (2,1,5,3,0,4), [COL_GRADE]), + ( (2,5,1,3,0,4), [COL_GRADE, COL_NAME]), +] + + +class Student(): + def __init__(self, idx, name, grade, attend): + self.idx = idx + self.name = name + self.grade = grade + self.attend = attend + def __str__(self): return f"[{self.idx}] name: {self.name}, grade: {self.grade}, attend: {self.attend}" + def __repr__(self): return f" {self.__str__()}" + +class MultiSortBase(unittest.TestCase): + + def _run_tests(self, rows_as, row_as, rows_in): + test_name = sys._getframe(1).f_code.co_name + for i, (expected, spec) in enumerate(MSORTED_TESTS): + for j in range(0,1): + if j == 0: + reverse = False + else: + reverse = True + expected = reversed(expected) + + spec = self._fix_SORT_TESTS_spec(spec, row_as) + + rows_sorted = msorted(rows_in, spec, reverse=reverse) + + if rows_as == 'list': + self.assertIsInstance(rows_in, list) + elif rows_as == 'tuple': + self.assertIsInstance(rows_in, tuple) + + bOk = self._check_sort(expected, rows_sorted, row_as) + + _dump = dump_sort(i, spec, rows_sorted, rows_as, row_as, expected) + + if not bOk: + self.fail(msg=f"\nTest Name: {test_name}\nTestSet: {i}\n{_dump}\n") + else: + pass + # pc(f'\n.: sort_dump :.\n{_dump}\n') + + def _fix_SORT_TESTS_spec(self, spec, row_as): + if row_as in ('list', 'tuple'): + return spec + elif row_as in ('dict', 'object'): + pass + else: + raise Exception(f"Unexpected row_as: {row_as}") + + if isinstance(spec, (int)): + return STUDENT_COLS[spec] + + a = [] + for spec_c in spec: + if isinstance(spec_c, int): + a.append(STUDENT_COLS[spec_c]) + else: + spec_c = [*spec_c] + spec_c[0] = STUDENT_COLS[spec_c[0]] + a.append(tuple(spec_c)) + spec = a + + return tuple(spec) + + + def _check_sort(self, expected, rows, row_as) -> bool: + assert len(expected) == len(STUDENTS_BASE), f"Invalid expected length ({len(expected)}). got: {len(STUDENTS_BASE)} ({expected})" + indexable = row_as in ('list', 'tuple') + for i, row in enumerate(rows): + if row_as == 'list' and not isinstance(row, list): + self.fail(f"Expecting list but got {util.getClassName(row)}") + elif row_as == 'tuple' and not isinstance(row, tuple): + self.fail(f"Expecting tuple but got {util.getClassName(row)}") + elif row_as == 'dict' and not isinstance(row, dict): + self.fail(f"Expecting dict but got {util.getClassName(row)}") + elif row_as == 'object' and not isinstance(row, object): + self.fail(f"Expecting object but got {util.getClassName(row)}") + + idx = row[0] if indexable else row.idx if row_as == 'object' else row['idx'] + if not expected[i] == idx: return False + return True + + + +class TupleTests(MultiSortBase): + # TupleTests.test_list_of_tuples + def test_list_of_tuples(self): + (rows_as, row_as, rows_in) = _get_rows_in(rows_list=True, row_as_tuple=True) + self._run_tests(rows_as, row_as, rows_in) + + # TupleTests.test_tuple_of_tuples + def test_tuple_of_tuples(self): + (rows_as, row_as, rows_in) = _get_rows_in(rows_tuple=True, row_as_tuple=True) + self._run_tests(rows_as, row_as, rows_in) + + +class DictTests(MultiSortBase): + # DictTests.test_list_of_dicts + def test_list_of_dicts(self): + (rows_as, row_as, rows_in) = _get_rows_in(rows_list=True, row_as_dict=True) + self._run_tests(rows_as, row_as, rows_in) + + # DictTests.test_tuple_of_dict + def test_tuple_of_dict(self): + (rows_as, row_as, rows_in) = _get_rows_in(rows_tuple=True, row_as_dict=True) + self._run_tests(rows_as, row_as, rows_in) + + +class ObjectTests(MultiSortBase): + # ObjectTests.test_list_of_objects + def test_list_of_objects(self): + (rows_as, row_as, rows_in) = _get_rows_in(rows_list=True, row_as_obj=True) + self._run_tests(rows_as, row_as, rows_in) + + # ObjectTests.test_tuple_of_objects + def test_tuple_of_objects(self): + (rows_as, row_as, rows_in) = _get_rows_in(rows_tuple=True, row_as_obj=True) + self._run_tests(rows_as, row_as, rows_in) + + +def norm_spec_item(spec_c): + if isinstance(spec_c, (int, str)): + return (spec_c, None, None) + else: + assert isinstance(spec_c, tuple) and len(spec_c) in (1,2),\ + f"Invalid spec. Must have 1 or 2 params per record. Got: {spec_c}" + if len(spec_c) == 1: + return (spec_c[0], None, None) + elif len(spec_c) == 2: + s_opts = spec_c[1] + assert not s_opts is None and isinstance(s_opts, dict), f"Invalid Spec. Second value must be a dict. Got {util.getClassName(s_opts)}" + return (spec_c[0], s_opts.get('reverse', False), s_opts.get('clean', None)) + + +def dump_sort(stest_no, spec, rows, rows_as, row_as, expected): + sb = util.StringBuffer('Rows of ') + sb.a(rows_as) + sb.a(' sorted by ') + indexable = row_as in ('list', 'tuple') + if isinstance(spec, (int, str)): + sb.a(spec).a(" (a)") + else: + for i, spec_c in enumerate(spec): + (key, desc, clean) = norm_spec_item(spec_c) + if i > 0: sb.a(", ") + if indexable: + sb.a(STUDENT_COLS[key]) + else: + sb.a(key) + sb.a(' (d)' if desc else ' (a)') + + + sb.a(':\n') + + table = util.quickTT(STUDENT_COLS) + + bOk = True + for i, row in enumerate(rows): + if indexable: + table.add_row(row) + idx = row[0] + else: + if row_as == 'object': + table.add_row([row.idx, row.name, row.grade, row.attend]) + idx = row.idx + else: + table.add_row([row['idx'], row['name'], row['grade'], row['attend']]) + idx = row['idx'] + if not expected[i] == idx: bOk = False + + sb.a(util.pre(table.draw())) + if bOk: + sb.a("\n check: pass") + else: + sb.a('\n check: FAIL! expected: ').a(expected) + + return sb.ts() + + + + + +def _get_rows_in(rows_list=False, rows_tuple=False, row_as_dict=False, row_as_obj=False, row_as_list=False, row_as_tuple=False): + if row_as_dict: + rows_in = [{'idx': r[COL_IDX], 'name': r[COL_NAME], 'grade': r[COL_GRADE], 'attend': r[COL_ATTEND]} for r in STUDENTS_BASE] + elif row_as_obj: + rows_in = [Student(*r) for r in STUDENTS_BASE] + elif row_as_tuple: + rows_in = [tuple(r) for r in STUDENTS_BASE] + elif row_as_list: + rows_in = STUDENTS_BASE + + return ( 'list' if rows_list else 'tuple' + ,'tuple' if row_as_tuple else 'list' if row_as_list else 'dict' if row_as_dict else 'object' + ,tuple(rows_in) if rows_tuple else rows_in) + + +if __name__ == "__main__": + unittest.main() + sys.exit(0) + + diff --git a/tests/test_util.py b/tests/test_util.py new file mode 100644 index 0000000..72cdec3 --- /dev/null +++ b/tests/test_util.py @@ -0,0 +1,63 @@ +import sys +import time +import texttable as tt + +def pc(*args): + if len(args) == 0: return + if len(args) == 1: print(args[0]); return + a = [] + for i, v in enumerate(args): a.append( ( v if i == 0 or isinstance(v, (int, float, complex, str)) else str(v) ) ) + print( a[0].format(*a[1:]) ) + +def quickTT(header:list, max_width:int=120) -> tt: + table = tt.Texttable(max_width=max_width) + table.set_cols_align(list('r'*len(header))) + table.set_cols_dtype(list('t'*len(header))) + table.set_deco(table.VLINES) + table.header(header) + return table + +def pre(s, iChars=2): + sPad = ' ' * iChars + iF = s.find('\n') + if iF == -1: + return sPad + s + sb = [] + iFL = 0 + while iF > -1: + sb.append(sPad + s[iFL:iF]) + iFL = iF + 1 + iF = s.find('\n', iF + 1) + sb.append('' if iF == len(s) else sPad + s[iFL:]) + return '\n'.join(sb) + +class StringBuffer: + def __init__(self, s:str=None): + self._a=[] if s is None else [s] + def a(self, v): + self._a.append(str(v)) + return self + def al(self, v): + self._a.append(str(v) + '\n') + return self + def ts(self, delim=''): + return delim.join(self._a) + +def getClassName(o): + if o == None: return None + return type(o).__name__ + +def getFuncName(depth=1): + return sys._getframe(depth).f_code.co_name + +class StopWatch: + def __init__(self): + self.start() + def start(self): + self._startTime = time.time() + def getStartTime(self): + return self._startTime + def elapsed(self, prec=3): + prec = 3 if prec is None or not isinstance(prec, int) else prec + diff= time.time() - self._startTime + return round(diff, prec) \ No newline at end of file