diff --git a/scripts/table_updates/tests/test_utilities.py b/scripts/table_updates/tests/test_utilities.py index 954f7486..e3638877 100644 --- a/scripts/table_updates/tests/test_utilities.py +++ b/scripts/table_updates/tests/test_utilities.py @@ -1,4 +1,3 @@ -import pdb from unittest.mock import MagicMock, create_autospec, patch import numpy as np @@ -13,89 +12,56 @@ def syn(): return create_autospec(synapseclient.Synapse) -def test_download_synapse_table_with_selected_single_column(syn): - select = "col1" - df = pd.DataFrame({ - 'col1': ['value1', 'value2'] - }) +@pytest.fixture(scope="session") +def table_schema(): schema = synapseclient.table.Schema( name="test_table", parent="syn123", column_names=["col1", "col2"], column_types=["STRING", "INTEGER"], ) - syn.tableQuery = MagicMock(return_value = Table(schema, df)) - result = utilities.download_synapse_table(syn, "syn123456", select) + return schema - syn.tableQuery.assert_called_once_with("SELECT col1 from syn123456") - pd.testing.assert_frame_equal(result, df) -def test_download_synapse_table_without_condition(syn): - df = pd.DataFrame({ - 'col1': ['value1', 'value2'], - 'col2': [1, 2] - }) - schema = synapseclient.table.Schema( - name="test_table", - parent="syn123", - column_names=["col1", "col2"], - column_types=["STRING", "INTEGER"], - ) - syn.tableQuery = MagicMock(return_value = Table(schema, df)) - result = utilities.download_synapse_table(syn, "syn123456", condition = "") +@pytest.mark.parametrize( + "query_return_df,select,query,expected_df", + [ + (pd.DataFrame({'col1': ['value1', 'value2']}), "col1", "SELECT col1 from syn123456",pd.DataFrame({'col1': ['value1', 'value2']})), + (pd.DataFrame({'col1': ['value1', 'value2'],'col2': [1, 2]}), "col1,col2", "SELECT col1,col2 from syn123456",pd.DataFrame({'col1': ['value1', 'value2'],'col2': [1, 2]})), + (pd.DataFrame({'col1': ["NA", "value1", "None"],'col2': [1, 2, 3]}),"*","SELECT * from syn123456",pd.DataFrame({'col1': [np.nan, "value1", "None"],'col2': [1, 2, 3]})), + (pd.DataFrame(columns = ["col1", "col2"]),"*","SELECT * from syn123456",pd.DataFrame(columns = ["col1", "col2"])), + ], + ids = ["selected_single_column","selected_multiple_column","pull_table_with_na_values_all_columns","pull_empty_table_all_columns"], +) +def test_download_synapse_table_default_condition(syn, table_schema, query_return_df, select, query, expected_df): + syn.tableQuery = MagicMock(return_value = Table(table_schema, query_return_df)) + result = utilities.download_synapse_table(syn, "syn123456", select) - syn.tableQuery.assert_called_once_with("SELECT * from syn123456") - pd.testing.assert_frame_equal(result, df) + # validate + syn.tableQuery.assert_called_once_with(query) + pd.testing.assert_frame_equal(result, expected_df) -def test_download_synapse_table_with_condition(syn): - condition = "col1 = 'value1'" - df = pd.DataFrame({ - 'col1': ['value1'], - 'col2': [1] - }) - schema = synapseclient.table.Schema( - name="test_table", - parent="syn123", - column_names=["col1", "col2"], - column_types=["STRING", "INTEGER"], - ) - syn.tableQuery = MagicMock(return_value = Table(schema, df)) +@pytest.mark.parametrize( + "query_return_df,condition,query,expected_df", + [ + (pd.DataFrame({'col1': ['value1'],'col2': [1]}), "col1 = 'value1'", "SELECT * from syn123456 WHERE col1 = 'value1'",pd.DataFrame({'col1': ['value1'],'col2': [1]})), + (pd.DataFrame({'col1': ["NA", "value1", "None"],'col2': [1, 1, 1]}), "col2 = 1","SELECT * from syn123456 WHERE col2 = 1",pd.DataFrame({'col1': [np.nan, "value1", "None"],'col2': [1, 1, 1]})), + ], + ids = ["selected_row_all_columns","pull_table_with_na_values_all_columns"], +) +def test_download_synapse_table_with_condition(syn, table_schema, query_return_df, condition, query,expected_df): + syn.tableQuery = MagicMock(return_value = Table(table_schema, query_return_df)) result = utilities.download_synapse_table(syn, "syn123456", condition = condition) - syn.tableQuery.assert_called_once_with("SELECT * from syn123456 WHERE col1 = 'value1'") - pd.testing.assert_frame_equal(result, df) - -def test_download_synapse_table_with_na_values(syn): - df = pd.DataFrame({ - 'col1': ["NA", "value1", "None"], - 'col2': [1, 2, 3] - }) - schema = synapseclient.table.Schema( - name="test_table", - parent="syn123", - column_names=["col1", "col2"], - column_types=["STRING", "INTEGER"], - ) - syn.tableQuery = MagicMock(return_value = Table(schema, df)) - result = utilities.download_synapse_table(syn, "syn123456", condition = "") - - syn.tableQuery.assert_called_once_with("SELECT * from syn123456") - # Unlike None is not converted to nan - pd.testing.assert_frame_equal(result, pd.DataFrame({ - 'col1': [np.nan, "value1", "None"], - 'col2': [1, 2, 3] - })) - -def test_download_synapse_table_with_empty_table(syn): - df = pd.DataFrame(columns = ["col1", "col2"]) - schema = synapseclient.table.Schema( - name="test_table", - parent="syn123", - column_names=["col1", "col2"], - column_types=["STRING", "INTEGER"], - ) - syn.tableQuery = MagicMock(return_value = Table(schema, df)) - result = utilities.download_synapse_table(syn, "syn123456", condition = "") + # validate + syn.tableQuery.assert_called_once_with(query) + pd.testing.assert_frame_equal(result, expected_df) - syn.tableQuery.assert_called_once_with("SELECT * from syn123456") - pd.testing.assert_frame_equal(result, df) \ No newline at end of file +def test_download_empty_synapse_table_with_condition(syn, table_schema, ): + syn.tableQuery = MagicMock(return_value = Table(table_schema, pd.DataFrame(columns = ["col1", "col2"]))) + result = utilities.download_synapse_table(syn, "syn123456", condition = "col2 = 1") + + # validate + syn.tableQuery.assert_called_once_with("SELECT * from syn123456 WHERE col2 = 1") + pd.testing.assert_frame_equal(result, pd.DataFrame(columns = ["col1", "col2"])) + \ No newline at end of file