Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deprecate vector<float> #41

Merged
merged 3 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions tests/integrations/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

def test_extract_info_from_column_definition():
# Test case with dimension and distance metric
column_type = "VECTOR<FLOAT>(128)"
column_type = "VECTOR(128)"
column_comment = "hnsw(distance=cosine)"
expected_result = (128, "cosine")
assert (
Expand All @@ -15,7 +15,7 @@ def test_extract_info_from_column_definition():
)

# Test case with dimension but no distance metric
column_type = "VECTOR<FLOAT>(256)"
column_type = "VECTOR(256)"
column_comment = "some comment"
expected_result = (256, None)
assert (
Expand All @@ -24,7 +24,7 @@ def test_extract_info_from_column_definition():
)

# Test case with no dimension and no distance metric
column_type = "VECTOR<FLOAT>"
column_type = "VECTOR"
column_comment = "another comment"
expected_result = (None, None)
assert (
Expand All @@ -33,7 +33,7 @@ def test_extract_info_from_column_definition():
)

# Test case with no dimension and no comment
column_type = "VECTOR<FLOAT>"
column_type = "VECTOR"
column_comment = ""
expected_result = (None, None)
assert (
Expand All @@ -42,7 +42,7 @@ def test_extract_info_from_column_definition():
)

# Test case with dimension but no comment
column_type = "VECTOR<FLOAT>(256)"
column_type = "VECTOR(256)"
column_comment = ""
expected_result = (256, None)
assert (
Expand All @@ -51,7 +51,7 @@ def test_extract_info_from_column_definition():
)

# Test case without index type
column_type = "VECTOR<FLOAT>"
column_type = "VECTOR"
column_comment = "distance=l2"
expected_result = (None, "l2")
assert (
Expand All @@ -60,7 +60,7 @@ def test_extract_info_from_column_definition():
)

# Test case with addition comment content
column_type = "VECTOR<FLOAT>(128)"
column_type = "VECTOR(128)"
column_comment = "test, hnsw(distance=l2)"
expected_result = (128, "l2")
assert (
Expand Down
4 changes: 1 addition & 3 deletions tidb_vector/integrations/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,7 @@ def extract_info_from_column_definition(column_type, column_comment):
tuple: A tuple containing the dimension (int or None) and the distance metric (str or None).
"""
# Try to extract the dimension, which is optional.
dimension_match = re.search(
r"VECTOR<FLOAT>(?:\((\d+)\))?", column_type, re.IGNORECASE
)
dimension_match = re.search(r"VECTOR(?:\((\d+)\))?", column_type, re.IGNORECASE)
dimension = (
int(dimension_match.group(1))
if dimension_match and dimension_match.group(1)
Expand Down
8 changes: 4 additions & 4 deletions tidb_vector/integrations/vector_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,17 +137,17 @@ def _check_table_compatibility(self) -> None:
self._vector_dimension = actual_dim
elif actual_dim != self._vector_dimension:
raise EmbeddingColumnMismatchError(
existing_col=f"vector<float>({actual_dim})",
expected_col=f"vector<float>({self._vector_dimension})",
existing_col=f"vector({actual_dim})",
expected_col=f"vector({self._vector_dimension})",
)

if actual_distance_strategy is not None:
if self._distance_strategy is None:
self._distance_strategy = DistanceStrategy(actual_distance_strategy)
elif actual_distance_strategy != self._distance_strategy:
raise EmbeddingColumnMismatchError(
existing_col=f"vector<float>({actual_dim}) COMMENT 'hnsw(distance={actual_distance_strategy})'",
expected_col=f"vector<float>({self._vector_dimension}) COMMENT 'hnsw(distance={self._distance_strategy})'",
existing_col=f"vector({actual_dim}) COMMENT 'hnsw(distance={actual_distance_strategy})'",
expected_col=f"vector({self._vector_dimension}) COMMENT 'hnsw(distance={self._distance_strategy})'",
)

def _create_table_if_not_exists(self) -> None:
Expand Down
2 changes: 1 addition & 1 deletion tidb_vector/peewee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class VectorField(Field):
field_type = "VECTOR<FLOAT>"
field_type = "VECTOR"

def __init__(self, dimensions=None, *args, **kwargs):
self.dimensions = dimensions
Expand Down
6 changes: 3 additions & 3 deletions tidb_vector/sqlalchemy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,16 @@ def get_col_spec(self, **kw):
"""
Returns the column specification for the vector column.

If the dimension is not specified, it returns "VECTOR<FLOAT>".
If the dimension is not specified, it returns "VECTOR".
Otherwise, it returns "VECTOR(<dimension>)".

:param kw: Additional keyword arguments.
:return: The column specification string.
"""

if self.dim is None:
return "VECTOR<FLOAT>"
return "VECTOR<FLOAT>(%d)" % self.dim
return "VECTOR"
return "VECTOR(%d)" % self.dim

def bind_processor(self, dialect):
"""Convert the vector float array to a string representation suitable for binding to a database column."""
Expand Down
Loading