Skip to content

Commit

Permalink
Reenable batch for release (#1302)
Browse files Browse the repository at this point in the history
  • Loading branch information
xzdandy authored Oct 18, 2023
1 parent b8dd206 commit f192a10
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 10 deletions.
6 changes: 3 additions & 3 deletions evadb/storage/native_storage_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from evadb.models.storage.batch import Batch
from evadb.storage.abstract_storage_engine import AbstractStorageEngine
from evadb.third_party.databases.interface import get_database_handler
from evadb.utils.generic_utils import PickleSerializer
from evadb.utils.generic_utils import PickleSerializer, rebatch
from evadb.utils.logging_manager import logger


Expand Down Expand Up @@ -190,8 +190,8 @@ def read(
_deserialize_sql_row(row, ordered_columns) for row in result
)

for data_batch in result:
yield Batch(pd.DataFrame([data_batch]))
for df in rebatch(result, batch_mem_size):
yield Batch(pd.DataFrame(df))

except Exception as e:
err_msg = f"Failed to read the table {table.name} in data source {table.database_name} with exception {str(e)}"
Expand Down
14 changes: 7 additions & 7 deletions evadb/storage/sqlite_storage_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from evadb.models.storage.batch import Batch
from evadb.parser.table_ref import TableInfo
from evadb.storage.abstract_storage_engine import AbstractStorageEngine
from evadb.utils.generic_utils import PickleSerializer
from evadb.utils.generic_utils import PickleSerializer, rebatch
from evadb.utils.logging_manager import logger

# Leveraging Dynamic schema in SQLAlchemy
Expand Down Expand Up @@ -189,12 +189,12 @@ def read(
try:
table_to_read = self._try_loading_table_via_reflection(table.name)
result = self._sql_session.execute(table_to_read.select()).fetchall()
for row in result:
yield Batch(
pd.DataFrame(
[self._deserialize_sql_row(row._asdict(), table.columns)]
)
)
result_iter = (
self._deserialize_sql_row(row._asdict(), table.columns)
for row in result
)
for df in rebatch(result_iter, batch_mem_size):
yield Batch(pd.DataFrame(df))
except Exception as e:
err_msg = f"Failed to read the table {table.name} with exception {str(e)}"
logger.exception(err_msg)
Expand Down
3 changes: 3 additions & 0 deletions test/integration_tests/long/test_github_datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def setUp(self):
def tearDown(self):
execute_query_fetch_all(self.evadb, "DROP DATABASE IF EXISTS github_data;")

@pytest.mark.skip(
reason="Need https://github.com/georgia-tech-db/evadb/pull/1280 for a cost-based rebatch optimization"
)
@pytest.mark.xfail(reason="Flaky testcase due to `bad request` error message")
def test_should_run_select_query_in_github(self):
# Create database.
Expand Down

0 comments on commit f192a10

Please sign in to comment.