Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release v0.16.3 #384

Merged
merged 3 commits into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datacompy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
Then extended to carry that functionality over to Spark Dataframes.
"""

__version__ = "0.16.2"
__version__ = "0.16.3"

import platform
from warnings import warn
Expand Down
4 changes: 2 additions & 2 deletions datacompy/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def df1(self, df1: tuple[Union[str, "sp.DataFrame"], str | None]) -> None:
if len(table_name) != 3:
errmsg = f"{df} is not a valid table name. Be sure to include the target db and schema."
raise ValueError(errmsg)
self.df1_name = df_name.upper() if df_name else table_name[2]
self.df1_name = df_name.upper() if df_name else "__".join(table_name)
self._df1 = self.session.table(df)
else:
self._df1 = df
Expand All @@ -184,7 +184,7 @@ def df2(self, df2: tuple[Union[str, "sp.DataFrame"], str | None]) -> None:
if len(table_name) != 3:
errmsg = f"{df} is not a valid table name. Be sure to include the target db and schema."
raise ValueError(errmsg)
self.df2_name = df_name.upper() if df_name else table_name[2]
self.df2_name = df_name.upper() if df_name else "__".join(table_name)
self._df2 = self.session.table(df)
else:
self._df2 = df
Expand Down
7 changes: 7 additions & 0 deletions docs/source/snowflake_usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ For ``SnowflakeCompare``
- Joining is done using ``EQUAL_NULL`` which is the equality test that is safe for null values.
- Compares ``snowflake.snowpark.DataFrame``, which can be provided as either raw Snowflake dataframes
or as the names of full names of valid snowflake tables, which we will process into Snowpark dataframes.
- Note that if Snowflake tables are provided, that dataframe names will default to the full name of their
respective Snowflake tables. This can be overriden by setting the ``df1_name`` and ``df2_name`` arguments
when creating the Compare object.


SnowflakeCompare setup
Expand Down Expand Up @@ -57,6 +60,8 @@ Provide Snowpark dataframes
session,
df_1,
df_2,
#df1_name='original', # optional param for naming df1
#df2_name='new' # optional param for naming df2
join_columns=['acct_id'],
rel_tol=1e-03,
abs_tol=1e-04,
Expand All @@ -80,6 +85,8 @@ Given the dataframes from the prior examples...
session,
f"{db}.{schema}.toy_table_1",
f"{db}.{schema}.toy_table_2",
#df1_name='original', # optional param for naming df1
#df2_name='new' # optional param for naming df2
join_columns=['acct_id'],
rel_tol=1e-03,
abs_tol=1e-04,
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ maintainers = [
{ name="Raymond Haffar", email="[email protected]" },
]
license = {text = "Apache Software License"}
dependencies = ["pandas<=2.2.3,>=0.25.0", "numpy<=2.2.0,>=1.22.0", "ordered-set<=4.1.0,>=4.0.2", "polars[pandas]<=1.17.1,>=0.20.4"]
dependencies = ["pandas<=2.2.3,>=0.25.0", "numpy<=2.2.3,>=1.22.0", "ordered-set<=4.1.0,>=4.0.2", "polars[pandas]<=1.22.0,>=0.20.4"]
requires-python = ">=3.10.0"
classifiers = [
"Intended Audience :: Developers",
Expand Down Expand Up @@ -55,7 +55,7 @@ version = {attr = "datacompy.__version__"}
python-tag = "py3"

[project.optional-dependencies]
fugue = ["fugue[duckdb,dask,ray]<=0.9.1,>=0.8.7"]
fugue = ["fugue[dask,duckdb,ray]<=0.9.1,>=0.8.7"]
spark = ["pyspark[connect]>=3.1.1; python_version < \"3.11\"", "pyspark[connect]>=3.4; python_version >= \"3.11\""]
snowflake = ["snowflake-connector-python", "snowflake-snowpark-python"]
docs = ["sphinx", "furo", "myst-parser"]
Expand Down
25 changes: 25 additions & 0 deletions tests/test_snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,31 @@ def test_compare_table_setter_bad(snowpark_session):
)


@mock.patch(
"datacompy.snowflake.SnowflakeCompare._validate_dataframe", new=mock.MagicMock()
)
@mock.patch("datacompy.snowflake.SnowflakeCompare._compare", new=mock.MagicMock())
def test_compare_table_unique_names(snowpark_session):
# Assert that two tables with the same name but from a different DB/Schema have unique names
# Same schema/name, different DB
compare = SnowflakeCompare(
snowpark_session,
"test_db1.test_schema.test_name",
"test_db2.test_schema.test_name",
["A"],
)
assert compare.df1_name != compare.df2_name

# Same db/name, different schema
compare = SnowflakeCompare(
snowpark_session,
"test_db.test_schema1.test_name",
"test_db.test_schema2.test_name",
["A"],
)
assert compare.df1_name != compare.df2_name


def test_compare_table_setter_good(snowpark_session):
data = """ACCT_ID,DOLLAR_AMT,NAME,FLOAT_FLD,DATE_FLD
10000001234,123.4,George Michael Bluth,14530.155,
Expand Down