From 64d68a06a15a4d067131327513843a02f12e40ff Mon Sep 17 00:00:00 2001 From: dmitryshepelev Date: Sun, 6 Jun 2021 21:49:51 +0300 Subject: [PATCH 1/2] Add default replacement keys parameter for insert and upsert methods --- dataset/table.py | 33 +++++++++++++++++++++++++++------ test/test_dataset.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/dataset/table.py b/dataset/table.py index 0b817ef..830c267 100644 --- a/dataset/table.py +++ b/dataset/table.py @@ -3,7 +3,7 @@ import threading from banal import ensure_list -from sqlalchemy import func, select, false +from sqlalchemy import func, select, false, UniqueConstraint from sqlalchemy.sql import and_, expression from sqlalchemy.sql.expression import bindparam, ClauseElement from sqlalchemy.schema import Column, Index @@ -121,10 +121,12 @@ def insert(self, row, ensure=None, types=None): return res.inserted_primary_key[0] return True - def insert_ignore(self, row, keys, ensure=None, types=None): + def insert_ignore(self, row, keys=None, ensure=None, types=None): """Add a ``row`` dict into the table if the row does not exist. If rows with matching ``keys`` exist no change is made. + If ``keys`` are not passed, keys will be + replaced by columns with unique constraints of table. Setting ``ensure`` results in automatically creating missing columns, i.e., keys of the row are not table columns. @@ -139,6 +141,7 @@ def insert_ignore(self, row, keys, ensure=None, types=None): data = dict(id=10, title='I am a banana!') table.insert_ignore(data, ['id']) """ + keys = keys or self.get_unique_columns() row = self._sync_columns(row, ensure, types=types) if self._check_ensure(ensure): self.create_index(keys) @@ -184,7 +187,7 @@ def insert_many(self, rows, chunk_size=1000, ensure=None, types=None): self.table.insert().execute(chunk) chunk = [] - def update(self, row, keys, ensure=None, types=None, return_count=False): + def update(self, row, keys=None, ensure=None, types=None, return_count=False): """Update a row in the table. The update is managed via the set of column names stated in ``keys``: @@ -200,7 +203,11 @@ def update(self, row, keys, ensure=None, types=None, return_count=False): If keys in ``row`` update columns not present in the table, they will be created based on the settings of ``ensure`` and ``types``, matching the behavior of :py:meth:`insert() `. + + If ``keys`` are not passed, keys will be + replaced by columns with unique constraints of table. """ + keys = keys or self.get_unique_columns() row = self._sync_columns(row, ensure, types=types) args, row = self._keys_to_args(row, keys) clause = self._args_to_clause(args) @@ -213,7 +220,7 @@ def update(self, row, keys, ensure=None, types=None, return_count=False): if return_count: return self.count(clause) - def update_many(self, rows, keys, chunk_size=1000, ensure=None, types=None): + def update_many(self, rows, keys=None, chunk_size=1000, ensure=None, types=None): """Update many rows in the table at a time. This is significantly faster than updating them one by one. Per default @@ -222,7 +229,10 @@ def update_many(self, rows, keys, chunk_size=1000, ensure=None, types=None): See :py:meth:`update() ` for details on the other parameters. + If ``keys`` are not passed, keys will be + replaced by columns with unique constraints of table. """ + keys = keys or self.get_unique_columns() keys = ensure_list(keys) chunk = [] @@ -247,16 +257,19 @@ def update_many(self, rows, keys, chunk_size=1000, ensure=None, types=None): self.db.executable.execute(stmt, chunk) chunk = [] - def upsert(self, row, keys, ensure=None, types=None): + def upsert(self, row, keys=None, ensure=None, types=None): """An UPSERT is a smart combination of insert and update. If rows with matching ``keys`` exist they will be updated, otherwise a new row is inserted in the table. + If ``keys`` are not passed, keys will be + replaced by columns with unique constraints of table. :: data = dict(id=10, title='I am a banana!') table.upsert(data, ['id']) """ + keys = keys or self.get_unique_columns() row = self._sync_columns(row, ensure, types=types) if self._check_ensure(ensure): self.create_index(keys) @@ -265,7 +278,7 @@ def upsert(self, row, keys, ensure=None, types=None): return self.insert(row, ensure=False) return True - def upsert_many(self, rows, keys, chunk_size=1000, ensure=None, types=None): + def upsert_many(self, rows, keys=None, chunk_size=1000, ensure=None, types=None): """ Sorts multiple input rows into upserts and inserts. Inserts are passed to insert and upserts are updated. @@ -296,6 +309,14 @@ def delete(self, *clauses, **filters): rp = self.db.executable.execute(stmt) return rp.rowcount > 0 + def get_unique_columns(self): + return list({ + column.name + for constraint in self._table.constraints + if isinstance(constraint, UniqueConstraint) + for column in constraint.columns + }) + def _reflect_table(self): """Load the tables definition from the database.""" with self.db.lock: diff --git a/test/test_dataset.py b/test/test_dataset.py index 9c2bb8e..241f785 100644 --- a/test/test_dataset.py +++ b/test/test_dataset.py @@ -156,8 +156,31 @@ def setUp(self): for row in TEST_DATA: self.tbl.insert(row) + self.tbl_uniq_columns = self._init_table_with_unique_columns() + + def _init_table_with_unique_columns(self): + table = self.db.create_table('uniq_table') + table.create_column('key1', self.db.types.string(255), unique=True, nullable=False) + + if "sqlite" not in self.db.engine.dialect.dbapi.__name__: + # we can't execute create_column for sqlite multiple times + table.create_column('key2', self.db.types.float, unique=False) + table.create_column('key3', self.db.types.integer, unique=True) + + return table + + def _get_data_for_table_with_unique_columns(self): + data = dict(key1='I am banana') + unique_columns = ['key1'] + if "sqlite" not in self.db.engine.dialect.dbapi.__name__: + data.update(key2=3.1456, key3=42) + unique_columns.append('key3') + + return data, unique_columns + def tearDown(self): self.tbl.drop() + self.tbl_uniq_columns.drop() def test_insert(self): assert len(self.tbl) == len(TEST_DATA), len(self.tbl) @@ -237,6 +260,14 @@ def test_upsert_id(self): table.upsert(data, ["id"]) assert len(table) == 1, len(table) + def test_upsert_with_default_unique_columns(self): + data, unique_columns = self._get_data_for_table_with_unique_columns() + table = self.tbl_uniq_columns + table.upsert(data) + assert len(table) == 1, len(table) + table.upsert(data, unique_columns) + assert len(table) == 1, len(table) + def test_update_while_iter(self): for row in self.tbl: row["foo"] = "bar" @@ -540,6 +571,10 @@ def test_empty_query(self): empty = list(self.tbl.find(place="not in data")) assert len(empty) == 0, empty + def test_unique_columns(self): + data, unique_columns = self._get_data_for_table_with_unique_columns() + assert len(set(self.tbl_uniq_columns.get_unique_columns()) & set(unique_columns)) == len(unique_columns) + class Constructor(dict): """Very simple low-functionality extension to ``dict`` to From 53082438a8dfc7fd07b89ca38caa1e01c4436c91 Mon Sep 17 00:00:00 2001 From: dshepelev15 Date: Mon, 7 Jun 2021 23:26:06 +0300 Subject: [PATCH 2/2] Add warning for empty unique constrains of table --- dataset/table.py | 37 +++++++++++++++++++++++++++---------- test/test_dataset.py | 2 +- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/dataset/table.py b/dataset/table.py index 830c267..907333e 100644 --- a/dataset/table.py +++ b/dataset/table.py @@ -3,7 +3,7 @@ import threading from banal import ensure_list -from sqlalchemy import func, select, false, UniqueConstraint +from sqlalchemy import func, select, false from sqlalchemy.sql import and_, expression from sqlalchemy.sql.expression import bindparam, ClauseElement from sqlalchemy.schema import Column, Index @@ -141,7 +141,11 @@ def insert_ignore(self, row, keys=None, ensure=None, types=None): data = dict(id=10, title='I am a banana!') table.insert_ignore(data, ['id']) """ - keys = keys or self.get_unique_columns() + if not keys and not self.unique_columns: + log.warning("Insert ignore can not be executed. Table does not have unique columns") + return + + keys = keys or self.unique_columns row = self._sync_columns(row, ensure, types=types) if self._check_ensure(ensure): self.create_index(keys) @@ -207,7 +211,11 @@ def update(self, row, keys=None, ensure=None, types=None, return_count=False): If ``keys`` are not passed, keys will be replaced by columns with unique constraints of table. """ - keys = keys or self.get_unique_columns() + if not keys and not self.unique_columns: + log.warning("Update can not be executed. Table does not have unique columns") + return + + keys = keys or self.unique_columns row = self._sync_columns(row, ensure, types=types) args, row = self._keys_to_args(row, keys) clause = self._args_to_clause(args) @@ -232,7 +240,11 @@ def update_many(self, rows, keys=None, chunk_size=1000, ensure=None, types=None) If ``keys`` are not passed, keys will be replaced by columns with unique constraints of table. """ - keys = keys or self.get_unique_columns() + if not keys and not self.unique_columns: + log.warning("Update can not be executed. Table does not have unique columns") + return + + keys = keys or self.unique_columns keys = ensure_list(keys) chunk = [] @@ -269,7 +281,11 @@ def upsert(self, row, keys=None, ensure=None, types=None): data = dict(id=10, title='I am a banana!') table.upsert(data, ['id']) """ - keys = keys or self.get_unique_columns() + if not keys and not self.unique_columns: + log.warning("Upsert can not be executed. Table does not have unique columns") + return + + keys = keys or self.unique_columns row = self._sync_columns(row, ensure, types=types) if self._check_ensure(ensure): self.create_index(keys) @@ -309,12 +325,13 @@ def delete(self, *clauses, **filters): rp = self.db.executable.execute(stmt) return rp.rowcount > 0 - def get_unique_columns(self): + @property + def unique_columns(self): + """Get table unique columns""" + u_constraints = self.db.inspect.get_unique_constraints(self.name, schema=self.db.schema) return list({ - column.name - for constraint in self._table.constraints - if isinstance(constraint, UniqueConstraint) - for column in constraint.columns + column for constraint in u_constraints + for column in constraint['column_names'] }) def _reflect_table(self): diff --git a/test/test_dataset.py b/test/test_dataset.py index 241f785..7d03492 100644 --- a/test/test_dataset.py +++ b/test/test_dataset.py @@ -573,7 +573,7 @@ def test_empty_query(self): def test_unique_columns(self): data, unique_columns = self._get_data_for_table_with_unique_columns() - assert len(set(self.tbl_uniq_columns.get_unique_columns()) & set(unique_columns)) == len(unique_columns) + assert len(set(self.tbl_uniq_columns.unique_columns) & set(unique_columns)) == len(unique_columns) class Constructor(dict):