diff --git a/datacompy/__init__.py b/datacompy/__init__.py index 347c449..966b160 100644 --- a/datacompy/__init__.py +++ b/datacompy/__init__.py @@ -18,7 +18,7 @@ Then extended to carry that functionality over to Spark Dataframes. """ -__version__ = "0.16.2" +__version__ = "0.16.3" import platform from warnings import warn diff --git a/datacompy/snowflake.py b/datacompy/snowflake.py index d2fdaf7..c7fe1ce 100644 --- a/datacompy/snowflake.py +++ b/datacompy/snowflake.py @@ -163,7 +163,7 @@ def df1(self, df1: tuple[Union[str, "sp.DataFrame"], str | None]) -> None: if len(table_name) != 3: errmsg = f"{df} is not a valid table name. Be sure to include the target db and schema." raise ValueError(errmsg) - self.df1_name = df_name.upper() if df_name else table_name[2] + self.df1_name = df_name.upper() if df_name else "__".join(table_name) self._df1 = self.session.table(df) else: self._df1 = df @@ -184,7 +184,7 @@ def df2(self, df2: tuple[Union[str, "sp.DataFrame"], str | None]) -> None: if len(table_name) != 3: errmsg = f"{df} is not a valid table name. Be sure to include the target db and schema." raise ValueError(errmsg) - self.df2_name = df_name.upper() if df_name else table_name[2] + self.df2_name = df_name.upper() if df_name else "__".join(table_name) self._df2 = self.session.table(df) else: self._df2 = df diff --git a/docs/source/snowflake_usage.rst b/docs/source/snowflake_usage.rst index 9cc2139..66e3f39 100644 --- a/docs/source/snowflake_usage.rst +++ b/docs/source/snowflake_usage.rst @@ -7,6 +7,9 @@ For ``SnowflakeCompare`` - Joining is done using ``EQUAL_NULL`` which is the equality test that is safe for null values. - Compares ``snowflake.snowpark.DataFrame``, which can be provided as either raw Snowflake dataframes or as the names of full names of valid snowflake tables, which we will process into Snowpark dataframes. +- Note that if Snowflake tables are provided, that dataframe names will default to the full name of their +respective Snowflake tables. This can be overriden by setting the ``df1_name`` and ``df2_name`` arguments +when creating the Compare object. SnowflakeCompare setup @@ -57,6 +60,8 @@ Provide Snowpark dataframes session, df_1, df_2, + #df1_name='original', # optional param for naming df1 + #df2_name='new' # optional param for naming df2 join_columns=['acct_id'], rel_tol=1e-03, abs_tol=1e-04, @@ -80,6 +85,8 @@ Given the dataframes from the prior examples... session, f"{db}.{schema}.toy_table_1", f"{db}.{schema}.toy_table_2", + #df1_name='original', # optional param for naming df1 + #df2_name='new' # optional param for naming df2 join_columns=['acct_id'], rel_tol=1e-03, abs_tol=1e-04, diff --git a/pyproject.toml b/pyproject.toml index 1333c4c..3fd0acc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ maintainers = [ { name="Raymond Haffar", email="raymond.haffar@capitalone.com" }, ] license = {text = "Apache Software License"} -dependencies = ["pandas<=2.2.3,>=0.25.0", "numpy<=2.2.0,>=1.22.0", "ordered-set<=4.1.0,>=4.0.2", "polars[pandas]<=1.17.1,>=0.20.4"] +dependencies = ["pandas<=2.2.3,>=0.25.0", "numpy<=2.2.3,>=1.22.0", "ordered-set<=4.1.0,>=4.0.2", "polars[pandas]<=1.22.0,>=0.20.4"] requires-python = ">=3.10.0" classifiers = [ "Intended Audience :: Developers", @@ -55,7 +55,7 @@ version = {attr = "datacompy.__version__"} python-tag = "py3" [project.optional-dependencies] -fugue = ["fugue[duckdb,dask,ray]<=0.9.1,>=0.8.7"] +fugue = ["fugue[dask,duckdb,ray]<=0.9.1,>=0.8.7"] spark = ["pyspark[connect]>=3.1.1; python_version < \"3.11\"", "pyspark[connect]>=3.4; python_version >= \"3.11\""] snowflake = ["snowflake-connector-python", "snowflake-snowpark-python"] docs = ["sphinx", "furo", "myst-parser"] diff --git a/tests/test_snowflake.py b/tests/test_snowflake.py index b0af4f6..a5387ec 100644 --- a/tests/test_snowflake.py +++ b/tests/test_snowflake.py @@ -356,6 +356,31 @@ def test_compare_table_setter_bad(snowpark_session): ) +@mock.patch( + "datacompy.snowflake.SnowflakeCompare._validate_dataframe", new=mock.MagicMock() +) +@mock.patch("datacompy.snowflake.SnowflakeCompare._compare", new=mock.MagicMock()) +def test_compare_table_unique_names(snowpark_session): + # Assert that two tables with the same name but from a different DB/Schema have unique names + # Same schema/name, different DB + compare = SnowflakeCompare( + snowpark_session, + "test_db1.test_schema.test_name", + "test_db2.test_schema.test_name", + ["A"], + ) + assert compare.df1_name != compare.df2_name + + # Same db/name, different schema + compare = SnowflakeCompare( + snowpark_session, + "test_db.test_schema1.test_name", + "test_db.test_schema2.test_name", + ["A"], + ) + assert compare.df1_name != compare.df2_name + + def test_compare_table_setter_good(snowpark_session): data = """ACCT_ID,DOLLAR_AMT,NAME,FLOAT_FLD,DATE_FLD 10000001234,123.4,George Michael Bluth,14530.155,