Skip to content

Commit

Permalink
timetravel in duckdb/polars
Browse files Browse the repository at this point in the history
  • Loading branch information
aersam committed Mar 21, 2024
1 parent 9bca7b8 commit 6799d68
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 2 deletions.
1 change: 0 additions & 1 deletion deltalake2db/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,6 @@ def get_sql_for_delta_expr(
check_is_supported(dt)

delta_table_cte_name = delta_table_cte_name or sql_prefix + "_delta_table"
dt.update_incremental()
from deltalake.schema import PrimitiveType

file_selects: list[ex.Select] = []
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "deltalake2db"
version = "0.2.0"
version = "0.2.1"
description = ""
authors = ["Adrian Ehrsam <[email protected]>"]
license = "MIT"
Expand Down
2 changes: 2 additions & 0 deletions tests/data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
_user2
_user3
35 changes: 35 additions & 0 deletions tests/test_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,41 @@ def test_user_empty():
assert len(con.fetchall()) == 0


def test_user_add():
import shutil
import pandas as pd

shutil.rmtree("tests/data/_user2", ignore_errors=True)
shutil.copytree("tests/data/user", "tests/data/_user2")
dt = DeltaTable("tests/data/_user2")
old_version = dt.version()
from deltalake.writer import write_deltalake

write_deltalake(
dt,
pd.DataFrame({"User - iD": [1555], "FirstName": ["Hansueli"]}),
schema_mode="merge",
engine="rust",
mode="append",
)
dt.update_incremental()

dt_o = DeltaTable("tests/data/_user2")
dt_o.load_as_version(old_version)

from deltalake2db import duckdb_create_view_for_delta

with duckdb.connect() as con:
duckdb_create_view_for_delta(con, dt, "delta_table_n")
duckdb_create_view_for_delta(con, dt_o, "delta_table_o")
con.execute(
'select "User - iD" from delta_table_n except select "User - iD" from delta_table_o'
)
res = con.fetchall()
assert len(res) == 1
assert res[0][0] == 1555


def test_empty_struct():
# >>> duckdb.execute("""Select { 'lat': 1 } as tester union all select Null""").fetchall()
import pyarrow as pa
Expand Down
31 changes: 31 additions & 0 deletions tests/test_polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,37 @@ def test_col_mapping():
print(as_py_rows)


def test_user_add():
import shutil
import pandas as pd

shutil.rmtree("tests/data/_user3", ignore_errors=True)
shutil.copytree("tests/data/user", "tests/data/_user3")
dt = DeltaTable("tests/data/_user3")
old_version = dt.version()
from deltalake.writer import write_deltalake

write_deltalake(
dt,
pd.DataFrame({"User - iD": [1555], "FirstName": ["Hansueli"]}),
schema_mode="merge",
engine="rust",
mode="append",
)
dt.update_incremental()

dt_o = DeltaTable("tests/data/_user3")
dt_o.load_as_version(old_version)

from deltalake2db import polars_scan_delta
import polars as pl

nc = polars_scan_delta(dt).select(pl.col("User - iD")).collect().to_dicts()
oc = polars_scan_delta(dt_o).select(pl.col("User - iD")).collect().to_dicts()
diff = [o["User - iD"] for o in nc if o not in oc]
assert diff == [1555]


def test_strange_cols():
dt = DeltaTable("tests/data/user")

Expand Down

0 comments on commit 6799d68

Please sign in to comment.