Skip to content

Commit

Permalink
fix: index creation better error msg (#898)
Browse files Browse the repository at this point in the history
* Validate index scan with descending order works fine
* Add more error msg

---------

Co-authored-by: jarulraj <[email protected]>
  • Loading branch information
jiashenC and jarulraj authored Jun 26, 2023
1 parent e2bfa06 commit a5c951b
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 67 deletions.
17 changes: 14 additions & 3 deletions evadb/binder/statement_binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,20 @@ def _bind_create_index_statement(self, node: CreateIndexStatement):
assert node.table_ref.is_table_atom(), "Index can only be created on Tableinfo"
if not node.udf_func:
# Feature table type needs to be float32 numpy array.
assert (
len(node.col_list) == 1
), f"Index can be only created on one column, but instead {len(node.col_list)} are provided"
col_def = node.col_list[0]

table_ref_obj = node.table_ref.table.table_obj
col = [col for col in table_ref_obj.columns if col.name == col_def.name][0]
col_list = [
col for col in table_ref_obj.columns if col.name == col_def.name
]
assert (
len(col_list) == 1
), f"Index is created on non-existent column {col_def.name}"

col = col_list[0]
assert (
col.array_type == NdArrayType.FLOAT32
), "Index input needs to be float32."
Expand Down Expand Up @@ -262,8 +273,8 @@ def _bind_func_expr(self, node: FunctionExpression):
udf_obj = self._catalog().get_udf_catalog_entry_by_name(node.name)
if udf_obj is None:
err_msg = (
f"UDF with name {node.name} does not exist in the catalog. "
"Please create the UDF using CREATE UDF command."
f"Function '{node.name}' does not exist in the catalog. "
"Please create the function using CREATE UDF command."
)
logger.error(err_msg)
raise BinderError(err_msg)
Expand Down
2 changes: 1 addition & 1 deletion evadb/udfs/udf_bootstrap_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def init_builtin_udfs(db: EvaDBDatabase, mode: str = "debug") -> None:
# ocr_udf_query,
# Mvit_udf_query,
Sift_udf_query,
yolo8n_query,
Yolo_udf_query,
]

# if mode is 'debug', add debug UDFs
Expand Down
65 changes: 53 additions & 12 deletions test/integration_tests/test_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,6 @@ def test_similarity_should_work_in_order(self):

actual_open = actual_batch.frames["testsimilaritytable.data_col"].to_numpy()[0]
self.assertTrue(np.array_equal(actual_open, base_img))
# actual_distance = actual_batch.frames["similarity.distance"].to_numpy()[0]
# self.assertEqual(actual_distance, 0)

# Top 2 - assume table contains base data.
select_query = """SELECT data_col FROM testSimilarityTable
Expand All @@ -160,10 +158,19 @@ def test_similarity_should_work_in_order(self):
self.assertTrue(np.array_equal(actual_open, base_img))
actual_open = actual_batch.frames["testsimilaritytable.data_col"].to_numpy()[1]
self.assertTrue(np.array_equal(actual_open, base_img + 1))
# actual_distance = actual_batch.frames["similarity.distance"].to_numpy()[0]
# self.assertEqual(actual_distance, 0)
# actual_distance = actual_batch.frames["similarity.distance"].to_numpy()[1]
# self.assertEqual(actual_distance, 27)

# Top 2 - descending order
select_query = """SELECT data_col FROM testSimilarityTable
ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data_col)) DESC
LIMIT 2;""".format(
self.img_path
)
actual_batch = execute_query_fetch_all(self.evadb, select_query)

actual_open = actual_batch.frames["testsimilaritytable.data_col"].to_numpy()[0]
self.assertTrue(np.array_equal(actual_open, base_img + 4))
actual_open = actual_batch.frames["testsimilaritytable.data_col"].to_numpy()[1]
self.assertTrue(np.array_equal(actual_open, base_img + 3))

###########################################
# Test case runs on feature vector table. #
Expand All @@ -186,8 +193,6 @@ def test_similarity_should_work_in_order(self):
"testsimilarityfeaturetable.feature_col"
].to_numpy()[0]
self.assertTrue(np.array_equal(actual_open, base_img))
# actual_distance = actual_batch.frames["similarity.distance"].to_numpy()[0]
# self.assertEqual(actual_distance, 0)

# Top 2 - assume table contains feature data.
select_query = """SELECT feature_col FROM testSimilarityFeatureTable
Expand All @@ -205,10 +210,6 @@ def test_similarity_should_work_in_order(self):
"testsimilarityfeaturetable.feature_col"
].to_numpy()[1]
self.assertTrue(np.array_equal(actual_open, base_img + 1))
# actual_distance = actual_batch.frames["similarity.distance"].to_numpy()[0]
# self.assertEqual(actual_distance, 0)
# actual_distance = actual_batch.frames["similarity.distance"].to_numpy()[1]
# self.assertEqual(actual_distance, 27)

def test_should_do_vector_index_scan(self):
###########################################
Expand Down Expand Up @@ -291,6 +292,46 @@ def test_should_do_vector_index_scan(self):
self.evadb.catalog().drop_index_catalog_entry("testFaissIndexScanRewrite1")
self.evadb.catalog().drop_index_catalog_entry("testFaissIndexScanRewrite2")

def test_should_not_do_vector_index_scan_with_desc_order(self):
# Execution with index scan.
create_index_query = """CREATE INDEX testFaissIndexScanRewrite
ON testSimilarityTable (DummyFeatureExtractor(data_col))
USING FAISS;"""
execute_query_fetch_all(self.evadb, create_index_query)

explain_query = """
EXPLAIN
SELECT data_col FROM testSimilarityTable WHERE dummy = 0
ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data_col))
LIMIT 3;
""".format(
"dummypath"
)
batch = execute_query_fetch_all(self.evadb, explain_query)

# Index scan should not be used.
self.assertFalse("FaissIndexScan" in batch.frames[0][0])

# Check results are in descending order
base_img = np.array(np.ones((3, 3, 3)), dtype=np.uint8)
base_img[0] -= 1
base_img[2] += 1

select_query = """SELECT data_col FROM testSimilarityTable
ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data_col)) DESC
LIMIT 2;""".format(
self.img_path
)
actual_batch = execute_query_fetch_all(self.evadb, select_query)

actual_open = actual_batch.frames["testsimilaritytable.data_col"].to_numpy()[0]
self.assertTrue(np.array_equal(actual_open, base_img + 4))
actual_open = actual_batch.frames["testsimilaritytable.data_col"].to_numpy()[1]
self.assertTrue(np.array_equal(actual_open, base_img + 3))

# Cleanup
self.evadb.catalog().drop_index_catalog_entry("testFaissIndexScanRewrite")

def test_should_not_do_vector_index_scan_with_predicate(self):
# Execution with index scan.
create_index_query = """CREATE INDEX testFaissIndexScanRewrite
Expand Down
4 changes: 2 additions & 2 deletions test/integration_tests/test_udf_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ def test_should_raise_using_missing_udf(self):
)

err_msg = (
"UDF with name DummyObjectDetector1 does not exist in the catalog. "
"Please create the UDF using CREATE UDF command."
"Function 'DummyObjectDetector1' does not exist in the catalog. "
"Please create the function using CREATE UDF command."
)
self.assertEqual(str(cm.exception), err_msg)

Expand Down
Loading

0 comments on commit a5c951b

Please sign in to comment.