Skip to content

Commit

Permalink
re-organize and add test cases for download_synapse_table
Browse files Browse the repository at this point in the history
  • Loading branch information
danlu1 committed Oct 24, 2024
1 parent e53df57 commit b89e608
Showing 1 changed file with 40 additions and 74 deletions.
114 changes: 40 additions & 74 deletions scripts/table_updates/tests/test_utilities.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import pdb
from unittest.mock import MagicMock, create_autospec, patch

import numpy as np
Expand All @@ -13,89 +12,56 @@
def syn():
return create_autospec(synapseclient.Synapse)

def test_download_synapse_table_with_selected_single_column(syn):
select = "col1"
df = pd.DataFrame({
'col1': ['value1', 'value2']
})
@pytest.fixture(scope="session")
def table_schema():
schema = synapseclient.table.Schema(
name="test_table",
parent="syn123",
column_names=["col1", "col2"],
column_types=["STRING", "INTEGER"],
)
syn.tableQuery = MagicMock(return_value = Table(schema, df))
result = utilities.download_synapse_table(syn, "syn123456", select)
return schema

syn.tableQuery.assert_called_once_with("SELECT col1 from syn123456")
pd.testing.assert_frame_equal(result, df)

def test_download_synapse_table_without_condition(syn):
df = pd.DataFrame({
'col1': ['value1', 'value2'],
'col2': [1, 2]
})
schema = synapseclient.table.Schema(
name="test_table",
parent="syn123",
column_names=["col1", "col2"],
column_types=["STRING", "INTEGER"],
)
syn.tableQuery = MagicMock(return_value = Table(schema, df))
result = utilities.download_synapse_table(syn, "syn123456", condition = "")
@pytest.mark.parametrize(
"query_return_df,select,query,expected_df",
[
(pd.DataFrame({'col1': ['value1', 'value2']}), "col1", "SELECT col1 from syn123456",pd.DataFrame({'col1': ['value1', 'value2']})),
(pd.DataFrame({'col1': ['value1', 'value2'],'col2': [1, 2]}), "col1,col2", "SELECT col1,col2 from syn123456",pd.DataFrame({'col1': ['value1', 'value2'],'col2': [1, 2]})),
(pd.DataFrame({'col1': ["NA", "value1", "None"],'col2': [1, 2, 3]}),"*","SELECT * from syn123456",pd.DataFrame({'col1': [np.nan, "value1", "None"],'col2': [1, 2, 3]})),
(pd.DataFrame(columns = ["col1", "col2"]),"*","SELECT * from syn123456",pd.DataFrame(columns = ["col1", "col2"])),
],
ids = ["selected_single_column","selected_multiple_column","pull_table_with_na_values_all_columns","pull_empty_table_all_columns"],
)
def test_download_synapse_table_default_condition(syn, table_schema, query_return_df, select, query, expected_df):
syn.tableQuery = MagicMock(return_value = Table(table_schema, query_return_df))
result = utilities.download_synapse_table(syn, "syn123456", select)

syn.tableQuery.assert_called_once_with("SELECT * from syn123456")
pd.testing.assert_frame_equal(result, df)
# validate
syn.tableQuery.assert_called_once_with(query)
pd.testing.assert_frame_equal(result, expected_df)

def test_download_synapse_table_with_condition(syn):
condition = "col1 = 'value1'"
df = pd.DataFrame({
'col1': ['value1'],
'col2': [1]
})
schema = synapseclient.table.Schema(
name="test_table",
parent="syn123",
column_names=["col1", "col2"],
column_types=["STRING", "INTEGER"],
)
syn.tableQuery = MagicMock(return_value = Table(schema, df))
@pytest.mark.parametrize(
"query_return_df,condition,query,expected_df",
[
(pd.DataFrame({'col1': ['value1'],'col2': [1]}), "col1 = 'value1'", "SELECT * from syn123456 WHERE col1 = 'value1'",pd.DataFrame({'col1': ['value1'],'col2': [1]})),
(pd.DataFrame({'col1': ["NA", "value1", "None"],'col2': [1, 1, 1]}), "col2 = 1","SELECT * from syn123456 WHERE col2 = 1",pd.DataFrame({'col1': [np.nan, "value1", "None"],'col2': [1, 1, 1]})),
],
ids = ["selected_row_all_columns","pull_table_with_na_values_all_columns"],
)
def test_download_synapse_table_with_condition(syn, table_schema, query_return_df, condition, query,expected_df):
syn.tableQuery = MagicMock(return_value = Table(table_schema, query_return_df))
result = utilities.download_synapse_table(syn, "syn123456", condition = condition)

syn.tableQuery.assert_called_once_with("SELECT * from syn123456 WHERE col1 = 'value1'")
pd.testing.assert_frame_equal(result, df)

def test_download_synapse_table_with_na_values(syn):
df = pd.DataFrame({
'col1': ["NA", "value1", "None"],
'col2': [1, 2, 3]
})
schema = synapseclient.table.Schema(
name="test_table",
parent="syn123",
column_names=["col1", "col2"],
column_types=["STRING", "INTEGER"],
)
syn.tableQuery = MagicMock(return_value = Table(schema, df))
result = utilities.download_synapse_table(syn, "syn123456", condition = "")

syn.tableQuery.assert_called_once_with("SELECT * from syn123456")
# Unlike None is not converted to nan
pd.testing.assert_frame_equal(result, pd.DataFrame({
'col1': [np.nan, "value1", "None"],
'col2': [1, 2, 3]
}))

def test_download_synapse_table_with_empty_table(syn):
df = pd.DataFrame(columns = ["col1", "col2"])
schema = synapseclient.table.Schema(
name="test_table",
parent="syn123",
column_names=["col1", "col2"],
column_types=["STRING", "INTEGER"],
)
syn.tableQuery = MagicMock(return_value = Table(schema, df))
result = utilities.download_synapse_table(syn, "syn123456", condition = "")
# validate
syn.tableQuery.assert_called_once_with(query)
pd.testing.assert_frame_equal(result, expected_df)

syn.tableQuery.assert_called_once_with("SELECT * from syn123456")
pd.testing.assert_frame_equal(result, df)
def test_download_empty_synapse_table_with_condition(syn, table_schema, ):
syn.tableQuery = MagicMock(return_value = Table(table_schema, pd.DataFrame(columns = ["col1", "col2"])))
result = utilities.download_synapse_table(syn, "syn123456", condition = "col2 = 1")

# validate
syn.tableQuery.assert_called_once_with("SELECT * from syn123456 WHERE col2 = 1")
pd.testing.assert_frame_equal(result, pd.DataFrame(columns = ["col1", "col2"]))

0 comments on commit b89e608

Please sign in to comment.