fix: index creation better error msg (#898)

* Validate index scan with descending order works fine * Add more error msg --------- Co-authored-by: jarulraj <[email protected]>
georgia-tech-db · Jun 26, 2023 · a5c951b · a5c951b
1 parent e2bfa06
commit a5c951b
Show file tree

Hide file tree

Showing 5 changed files with 195 additions and 67 deletions.
diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py
@@ -82,9 +82,20 @@ def _bind_create_index_statement(self, node: CreateIndexStatement):
         assert node.table_ref.is_table_atom(), "Index can only be created on Tableinfo"
         if not node.udf_func:
             # Feature table type needs to be float32 numpy array.
+            assert (
+                len(node.col_list) == 1
+            ), f"Index can be only created on one column, but instead {len(node.col_list)} are provided"
             col_def = node.col_list[0]
+
             table_ref_obj = node.table_ref.table.table_obj
-            col = [col for col in table_ref_obj.columns if col.name == col_def.name][0]
+            col_list = [
+                col for col in table_ref_obj.columns if col.name == col_def.name
+            ]
+            assert (
+                len(col_list) == 1
+            ), f"Index is created on non-existent column {col_def.name}"
+
+            col = col_list[0]
             assert (
                 col.array_type == NdArrayType.FLOAT32
             ), "Index input needs to be float32."
@@ -262,8 +273,8 @@ def _bind_func_expr(self, node: FunctionExpression):
         udf_obj = self._catalog().get_udf_catalog_entry_by_name(node.name)
         if udf_obj is None:
             err_msg = (
-                f"UDF with name {node.name} does not exist in the catalog. "
-                "Please create the UDF using CREATE UDF command."
+                f"Function '{node.name}' does not exist in the catalog. "
+                "Please create the function using CREATE UDF command."
             )
             logger.error(err_msg)
             raise BinderError(err_msg)

diff --git a/evadb/udfs/udf_bootstrap_queries.py b/evadb/udfs/udf_bootstrap_queries.py
@@ -243,7 +243,7 @@ def init_builtin_udfs(db: EvaDBDatabase, mode: str = "debug") -> None:
         # ocr_udf_query,
         # Mvit_udf_query,
         Sift_udf_query,
-        yolo8n_query,
+        Yolo_udf_query,
     ]
 
     # if mode is 'debug', add debug UDFs

diff --git a/test/integration_tests/test_similarity.py b/test/integration_tests/test_similarity.py
@@ -145,8 +145,6 @@ def test_similarity_should_work_in_order(self):
 
         actual_open = actual_batch.frames["testsimilaritytable.data_col"].to_numpy()[0]
         self.assertTrue(np.array_equal(actual_open, base_img))
-        # actual_distance = actual_batch.frames["similarity.distance"].to_numpy()[0]
-        # self.assertEqual(actual_distance, 0)
 
         # Top 2 - assume table contains base data.
         select_query = """SELECT data_col FROM testSimilarityTable
@@ -160,10 +158,19 @@ def test_similarity_should_work_in_order(self):
         self.assertTrue(np.array_equal(actual_open, base_img))
         actual_open = actual_batch.frames["testsimilaritytable.data_col"].to_numpy()[1]
         self.assertTrue(np.array_equal(actual_open, base_img + 1))
-        # actual_distance = actual_batch.frames["similarity.distance"].to_numpy()[0]
-        # self.assertEqual(actual_distance, 0)
-        # actual_distance = actual_batch.frames["similarity.distance"].to_numpy()[1]
-        # self.assertEqual(actual_distance, 27)
+
+        # Top 2 - descending order
+        select_query = """SELECT data_col FROM testSimilarityTable
+                            ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data_col)) DESC
+                            LIMIT 2;""".format(
+            self.img_path
+        )
+        actual_batch = execute_query_fetch_all(self.evadb, select_query)
+
+        actual_open = actual_batch.frames["testsimilaritytable.data_col"].to_numpy()[0]
+        self.assertTrue(np.array_equal(actual_open, base_img + 4))
+        actual_open = actual_batch.frames["testsimilaritytable.data_col"].to_numpy()[1]
+        self.assertTrue(np.array_equal(actual_open, base_img + 3))
 
         ###########################################
         # Test case runs on feature vector table. #
@@ -186,8 +193,6 @@ def test_similarity_should_work_in_order(self):
             "testsimilarityfeaturetable.feature_col"
         ].to_numpy()[0]
         self.assertTrue(np.array_equal(actual_open, base_img))
-        # actual_distance = actual_batch.frames["similarity.distance"].to_numpy()[0]
-        # self.assertEqual(actual_distance, 0)
 
         # Top 2 - assume table contains feature data.
         select_query = """SELECT feature_col FROM testSimilarityFeatureTable
@@ -205,10 +210,6 @@ def test_similarity_should_work_in_order(self):
             "testsimilarityfeaturetable.feature_col"
         ].to_numpy()[1]
         self.assertTrue(np.array_equal(actual_open, base_img + 1))
-        # actual_distance = actual_batch.frames["similarity.distance"].to_numpy()[0]
-        # self.assertEqual(actual_distance, 0)
-        # actual_distance = actual_batch.frames["similarity.distance"].to_numpy()[1]
-        # self.assertEqual(actual_distance, 27)
 
     def test_should_do_vector_index_scan(self):
         ###########################################
@@ -291,6 +292,46 @@ def test_should_do_vector_index_scan(self):
         self.evadb.catalog().drop_index_catalog_entry("testFaissIndexScanRewrite1")
         self.evadb.catalog().drop_index_catalog_entry("testFaissIndexScanRewrite2")
 
+    def test_should_not_do_vector_index_scan_with_desc_order(self):
+        # Execution with index scan.
+        create_index_query = """CREATE INDEX testFaissIndexScanRewrite
+                                    ON testSimilarityTable (DummyFeatureExtractor(data_col))
+                                    USING FAISS;"""
+        execute_query_fetch_all(self.evadb, create_index_query)
+
+        explain_query = """
+            EXPLAIN
+                SELECT data_col FROM testSimilarityTable WHERE dummy = 0
+                  ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data_col))
+                  LIMIT 3;
+        """.format(
+            "dummypath"
+        )
+        batch = execute_query_fetch_all(self.evadb, explain_query)
+
+        # Index scan should not be used.
+        self.assertFalse("FaissIndexScan" in batch.frames[0][0])
+
+        # Check results are in descending order
+        base_img = np.array(np.ones((3, 3, 3)), dtype=np.uint8)
+        base_img[0] -= 1
+        base_img[2] += 1
+
+        select_query = """SELECT data_col FROM testSimilarityTable
+                            ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data_col)) DESC
+                            LIMIT 2;""".format(
+            self.img_path
+        )
+        actual_batch = execute_query_fetch_all(self.evadb, select_query)
+
+        actual_open = actual_batch.frames["testsimilaritytable.data_col"].to_numpy()[0]
+        self.assertTrue(np.array_equal(actual_open, base_img + 4))
+        actual_open = actual_batch.frames["testsimilaritytable.data_col"].to_numpy()[1]
+        self.assertTrue(np.array_equal(actual_open, base_img + 3))
+
+        # Cleanup
+        self.evadb.catalog().drop_index_catalog_entry("testFaissIndexScanRewrite")
+
     def test_should_not_do_vector_index_scan_with_predicate(self):
         # Execution with index scan.
         create_index_query = """CREATE INDEX testFaissIndexScanRewrite

diff --git a/test/integration_tests/test_udf_executor.py b/test/integration_tests/test_udf_executor.py
@@ -223,8 +223,8 @@ def test_should_raise_using_missing_udf(self):
             )
 
         err_msg = (
-            "UDF with name DummyObjectDetector1 does not exist in the catalog. "
-            "Please create the UDF using CREATE UDF command."
+            "Function 'DummyObjectDetector1' does not exist in the catalog. "
+            "Please create the function using CREATE UDF command."
         )
         self.assertEqual(str(cm.exception), err_msg)