Skip to content

Commit

Permalink
Merge pull request #79 from LovaArutinovi/contains
Browse files Browse the repository at this point in the history
Feat: add new filter operator "$contains"
  • Loading branch information
olirice authored Feb 27, 2024
2 parents fe4d584 + de0aa8e commit 8c2d87f
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 2 deletions.
9 changes: 9 additions & 0 deletions docs/concepts_metadata.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Comparison operators compare a provided value with a value stored in metadata fi
| $lt | Matches values that are less than a specified value |
| $lte | Matches values that are less than or equal to a specified value |
| $in | Matches values that are contained by scalar list of specified values |
| $contains | Matches values where a scalar is contained within an array metadata field |


### Logical Operators
Expand Down Expand Up @@ -97,3 +98,11 @@ Those variants are most consistently able to make use of indexes.
"priority": {"$in": ["enterprise", "pro"]}
}
```

`tags`, an array, contains the string "important"

```json
{
"tags": {"$contains": "important"}
}
```
55 changes: 55 additions & 0 deletions src/tests/test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,61 @@ def test_filters_in(client: vecs.Client) -> None:
)


def test_filters_contains(client: vecs.Client) -> None:
bar = client.get_or_create_collection(name="bar", dimension=4)

records = [
("0", [0, 0, 0, 0], {"a": 1, "b": 2}),
("1", [1, 0, 0, 0], {"a": [1, 2, 3]}),
("2", [1, 1, 0, 0], {"a": {"1": "2", "x": "y"}}),
("3", [0, 0, 0, 0], {"a": ["1"]}),
("4", [1, 0, 0, 0], {"a": [4, 3, 2, 1]}),
("5", [1, 0, 0, 0], {"a": [2]}),
]

bar.upsert(records)
bar.create_index()

# Test $contains operator for int value
assert bar.query(
data=[0, 0, 0, 0],
limit=3,
filters={"a": {"$contains": 1}},
) == ["1", "4"]

# Test $contains operator for string value. Strings treated differently than ints
assert bar.query(
data=[0, 0, 0, 0],
limit=3,
filters={"a": {"$contains": "1"}},
) == ["3"]

# Test $contains operator for non-existent value
assert (
bar.query(
data=[0, 0, 0, 0],
limit=3,
filters={"a": {"$contains": 5}},
)
== []
)

# Test $contains requires a scalar value
with pytest.raises(vecs.exc.FilterError):
bar.query(
data=[1, 0, 0, 0],
limit=3,
filters={"a": {"$contains": [1, 2, 3]}},
)

with pytest.raises(vecs.exc.FilterError):
bar.query(
data=[1, 0, 0, 0],
limit=3,
filters={"a": {"$contains": {"a": 1}}},
)


def test_access_index(client: vecs.Client) -> None:
dim = 4
bar = client.get_or_create_collection(name="bar", dimension=dim)
Expand Down
37 changes: 35 additions & 2 deletions src/vecs/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,16 @@ def build_filters(json_col: Column, filters: Dict):
if len(value) > 1:
raise FilterError("only one operator permitted")
for operator, clause in value.items():
if operator not in ("$eq", "$ne", "$lt", "$lte", "$gt", "$gte", "$in"):
if operator not in (
"$eq",
"$ne",
"$lt",
"$lte",
"$gt",
"$gte",
"$in",
"$contains",
):
raise FilterError("unknown operator")

# equality of singular values can take advantage of the metadata index
Expand All @@ -877,7 +886,7 @@ def build_filters(json_col: Column, filters: Dict):
for elem in clause:
if not isinstance(elem, (int, str, float)):
raise FilterError(
"argument to $in filter must be a list or scalars"
"argument to $in filter must be a list of scalars"
)

# cast the array of scalars to a postgres array of jsonb so we can
Expand All @@ -887,6 +896,30 @@ def build_filters(json_col: Column, filters: Dict):

matches_value = cast(clause, postgresql.JSONB)

# @> in Postgres is heavily overloaded.
# By default, it will return True for
#
# scalar in array
# '[1, 2, 3]'::jsonb @> '1'::jsonb -- true#
# equality:
# '1'::jsonb @> '1'::jsonb -- true
# key value pair in object
# '{"a": 1, "b": 2}'::jsonb @> '{"a": 1}'::jsonb -- true
#
# At this time we only want to allow "scalar in array" so
# we assert that the clause is a scalar and the target metadata
# is an array
if operator == "$contains":
if not isinstance(clause, (int, str, float)):
raise FilterError(
"argument to $contains filter must be a scalar"
)

return and_(
json_col.op("->")(key).contains(matches_value),
func.jsonb_typeof(json_col.op("->")(key)) == "array",
)

# handles non-singular values
if operator == "$eq":
return json_col.op("->")(key) == matches_value
Expand Down

0 comments on commit 8c2d87f

Please sign in to comment.