From 9318464a6336ecb219aba24c7d51394599ca3b8e Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Wed, 22 Jan 2025 17:21:09 +0000 Subject: [PATCH 01/48] Add logic to deser/ser multi-vecs from/into vector_bytes in BatchObjects + Search --- .github/workflows/main.yaml | 34 +++--- ...erative.yml => docker-compose-modules.yml} | 4 +- integration/test_collection_openai.py | 2 +- integration/test_collection_rerank.py | 2 + integration/test_named_vectors.py | 93 ++++++++++++++- weaviate/classes/query.py | 2 + .../collections/batch/grpc_batch_objects.py | 45 +++----- weaviate/collections/classes/config.py | 14 +++ .../classes/config_named_vectors.py | 46 +++++++- .../classes/config_vector_index.py | 9 ++ .../collections/classes/config_vectorizers.py | 10 ++ weaviate/collections/classes/grpc.py | 31 ++++- weaviate/collections/classes/internal.py | 2 +- weaviate/collections/grpc/query.py | 57 +++++++--- weaviate/collections/grpc/shared.py | 101 ++++++++++++++++- weaviate/collections/queries/base.py | 20 ++-- weaviate/collections/queries/byteops.py | 16 --- weaviate/proto/v1/base_pb2.py | 12 +- weaviate/proto/v1/base_pb2.pyi | 14 ++- weaviate/proto/v1/search_get_pb2.py | 106 +++++++++--------- weaviate/proto/v1/search_get_pb2.pyi | 17 ++- weaviate/types.py | 4 +- weaviate/util.py | 17 ++- 23 files changed, 502 insertions(+), 156 deletions(-) rename ci/{docker-compose-generative.yml => docker-compose-modules.yml} (80%) delete mode 100644 weaviate/collections/queries/byteops.py diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 3b192b31a..9a200ade2 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -18,11 +18,12 @@ concurrency: cancel-in-progress: true env: - WEAVIATE_124: 1.24.26 + # WEAVIATE_124: 1.24.26 WEAVIATE_125: 1.25.29 WEAVIATE_126: 1.26.13 WEAVIATE_127: 1.27.9 - WEAVIATE_128: 1.28.2-2c00437 + WEAVIATE_128: 1.28.3 + WEAVIATE_129: 1.29.0-dev-f985564 jobs: lint-and-format: @@ -54,7 +55,7 @@ jobs: strategy: fail-fast: false matrix: - version: ["3.9", "3.10", "3.11", "3.12"] + version: ["3.9", "3.10", "3.11", "3.12", "3.13"] folder: ["weaviate"] steps: - uses: actions/checkout@v4 @@ -75,7 +76,7 @@ jobs: strategy: fail-fast: false matrix: - version: ["3.9", "3.10", "3.11", "3.12"] + version: ["3.9", "3.10", "3.11", "3.12", "3.13"] folder: ["test", "mock_tests"] steps: - uses: actions/checkout@v4 @@ -98,7 +99,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - version: ["3.9", "3.10", "3.11", "3.12"] + version: ["3.9", "3.10", "3.11", "3.12", "3.13"] optional_dependencies: [false] steps: - uses: actions/checkout@v4 @@ -129,11 +130,11 @@ jobs: fail-fast: false matrix: versions: [ - { py: "3.9", weaviate: $WEAVIATE_124}, - { py: "3.10", weaviate: $WEAVIATE_125}, - { py: "3.11", weaviate: $WEAVIATE_126}, - { py: "3.12", weaviate: $WEAVIATE_127}, - { py: "3.13", weaviate: $WEAVIATE_128} + { py: "3.9", weaviate: $WEAVIATE_129}, + { py: "3.10", weaviate: $WEAVIATE_129}, + { py: "3.11", weaviate: $WEAVIATE_129}, + { py: "3.12", weaviate: $WEAVIATE_129}, + { py: "3.13", weaviate: $WEAVIATE_129} ] optional_dependencies: [false] steps: @@ -186,11 +187,11 @@ jobs: fail-fast: false matrix: versions: [ - { py: "3.9", weaviate: $WEAVIATE_128}, - { py: "3.10", weaviate: $WEAVIATE_128}, - { py: "3.11", weaviate: $WEAVIATE_128}, - { py: "3.12", weaviate: $WEAVIATE_128}, - { py: "3.13", weaviate: $WEAVIATE_128} + { py: "3.9", weaviate: $WEAVIATE_129}, + { py: "3.10", weaviate: $WEAVIATE_129}, + { py: "3.11", weaviate: $WEAVIATE_129}, + { py: "3.12", weaviate: $WEAVIATE_129}, + { py: "3.13", weaviate: $WEAVIATE_129} ] optional_dependencies: [false] steps: @@ -281,7 +282,8 @@ jobs: $WEAVIATE_125, $WEAVIATE_126, $WEAVIATE_127, - $WEAVIATE_128 + $WEAVIATE_128, + $WEAVIATE_129 ] steps: - name: Checkout diff --git a/ci/docker-compose-generative.yml b/ci/docker-compose-modules.yml similarity index 80% rename from ci/docker-compose-generative.yml rename to ci/docker-compose-modules.yml index 67034ae7a..ec09fdafd 100644 --- a/ci/docker-compose-generative.yml +++ b/ci/docker-compose-modules.yml @@ -1,7 +1,7 @@ --- version: '3.4' services: - weaviate_openai: + weaviate_modules: command: - --host - 0.0.0.0 @@ -19,6 +19,6 @@ services: AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true' PERSISTENCE_DATA_PATH: '/var/lib/weaviate' DEFAULT_VECTORIZER_MODULE: 'text2vec-openai' - ENABLE_MODULES: 'text2vec-openai,generative-openai,text2vec-cohere,generative-cohere' + ENABLE_MODULES: 'text2colbert-jinaai,text2vec-openai,generative-openai,text2vec-cohere,generative-cohere' CLUSTER_HOSTNAME: 'node1' DISABLE_TELEMETRY: 'true' diff --git a/integration/test_collection_openai.py b/integration/test_collection_openai.py index f53ae5567..bd3319955 100644 --- a/integration/test_collection_openai.py +++ b/integration/test_collection_openai.py @@ -588,7 +588,7 @@ def test_queries_with_rerank_and_generative(collection_factory: CollectionFactor reranker_config=Configure.Reranker.transformers(), vectorizer_config=Configure.Vectorizer.text2vec_openai(), properties=[Property(name="text", data_type=DataType.TEXT)], - ports=(8079, 50050), + ports=(8086, 50057), headers={"X-OpenAI-Api-Key": api_key}, ) if collection._connection._weaviate_version < _ServerVersion(1, 23, 1): diff --git a/integration/test_collection_rerank.py b/integration/test_collection_rerank.py index 4363dc5fb..f201a3344 100644 --- a/integration/test_collection_rerank.py +++ b/integration/test_collection_rerank.py @@ -38,6 +38,7 @@ def test_queries_with_rerank(collection_factory: CollectionFactory) -> None: vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_openai(), properties=[wvc.config.Property(name="text", data_type=wvc.config.DataType.TEXT)], headers={"X-OpenAI-Api-Key": api_key}, + ports=(8086, 50057), ) if collection._connection._weaviate_version < _ServerVersion(1, 23, 1): pytest.skip("Reranking requires Weaviate 1.23.1 or higher") @@ -95,6 +96,7 @@ def test_queries_with_rerank_and_group_by(collection_factory: CollectionFactory) ), properties=[wvc.config.Property(name="text", data_type=wvc.config.DataType.TEXT)], headers={"X-OpenAI-Api-Key": api_key}, + ports=(8086, 50057), ) if collection._connection._weaviate_version < _ServerVersion(1, 23, 1): pytest.skip("Reranking requires Weaviate 1.23.1 or higher") diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index 21be17251..4a1bb3e73 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -1,3 +1,4 @@ +import os import uuid from typing import List, Union, Dict, Sequence @@ -14,7 +15,7 @@ ReferenceProperty, ) from weaviate.collections.classes.data import DataObject -from weaviate.collections.classes.grpc import _MultiTargetVectorJoin +from weaviate.collections.classes.grpc import _MultiTargetVectorJoin, _ManyVectorsQuery from weaviate.exceptions import WeaviateInvalidInputError from weaviate.types import INCLUDE_VECTOR @@ -690,6 +691,13 @@ def test_same_target_vector_multiple_input( "near_vector,target_vector", [ ({"first": [0, 1], "second": [[1, 0, 0], [0, 0, 1]]}, ["first", "second"]), + ( + { + "first": [0, 1], + "second": wvc.query.NearVectorQuery.many_vectors([[1, 0, 0], [0, 0, 1]]), + }, + ["first", "second"], + ), ({"first": [[0, 1], [0, 1]], "second": [1, 0, 0]}, ["first", "second"]), ( {"first": [[0, 1], [0, 1]], "second": [[1, 0, 0], [0, 0, 1]]}, @@ -703,7 +711,7 @@ def test_same_target_vector_multiple_input( ) def test_same_target_vector_multiple_input_combinations( collection_factory: CollectionFactory, - near_vector: Dict[str, Union[Sequence[float], Sequence[Sequence[float]]]], + near_vector: Dict[str, Union[Sequence[float], Sequence[Sequence[float]], _ManyVectorsQuery]], target_vector: List[str], ) -> None: dummy = collection_factory("dummy") @@ -801,3 +809,84 @@ def test_include_vector_on_references( ).objects assert objs[0].references["hasRef"].objects[0].vector == expected + + +def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: + dummy = collection_factory() + if dummy._connection._weaviate_version.is_lower_than(1, 29, 0): + pytest.skip("ColBERT vectors are only supported in Weaviate v1.29.0 and higher.") + + collection = collection_factory( + properties=[ + wvc.config.Property( + name="title", + data_type=wvc.config.DataType.TEXT, + ) + ], + vectorizer_config=[ + wvc.config.Configure.NamedVectors.none( + name="colbert", + vector_index_config=wvc.config.Configure.VectorIndex.hnsw( + multi_vector=wvc.config.Configure.VectorIndex.MultiVector.colbert() + ), + ), + ], + ) + + collection.data.insert({}, vector={"colbert": [[1, 2], [4, 5]]}) + assert len(collection) == 1 + + objs = collection.query.near_vector( + {"colbert": wvc.query.NearVectorQuery.multi_vector([[1, 2], [3, 4]])}, + target_vector="colbert", + ).objects + assert len(objs) == 1 + + +def test_colbert_vectors_jinaai(collection_factory: CollectionFactory) -> None: + api_key = os.environ.get("JINAAI_APIKEY") + if api_key is None: + pytest.skip("No JinaAI API key found.") + + dummy = collection_factory(ports=(8086, 50057), headers={"X-Jinaai-Api-Key": api_key}) + if dummy._connection._weaviate_version.is_lower_than(1, 29, 0): + pytest.skip("ColBERT vectors are only supported in Weaviate v1.29.0 and higher.") + + collection = collection_factory( + properties=[ + wvc.config.Property( + name="title", + data_type=wvc.config.DataType.TEXT, + ) + ], + vectorizer_config=[ + wvc.config.Configure.NamedVectors.text2colbert_jinaai( + name="colbert", + ) + ], + ) + + uuid = collection.data.insert({"title": "Hello World"}) + assert len(collection) == 1 + obj = collection.query.fetch_object_by_id(uuid, include_vector=["colbert"]) + vecs = obj.vector["colbert"] + assert isinstance(vecs[0], list) + + objs = collection.query.near_text("Hello", target_vector="colbert").objects + assert len(objs) == 1 + + objs = collection.query.hybrid("Hello", target_vector="colbert").objects + assert len(objs) == 1 + + objs = collection.query.near_vector( + { + "colbert": wvc.query.NearVectorQuery.multi_vector( + [[e + 0.01 for e in vec] for vec in vecs] + ) + }, + target_vector="colbert", + ).objects + assert len(objs) == 1 + + objs = collection.query.near_object(uuid, target_vector="colbert").objects + assert len(objs) == 1 diff --git a/weaviate/classes/query.py b/weaviate/classes/query.py index e48ef20cb..a7715ba36 100644 --- a/weaviate/classes/query.py +++ b/weaviate/classes/query.py @@ -10,6 +10,7 @@ NearMediaType, QueryNested, QueryReference, + NearVectorQuery, Rerank, Sort, ) @@ -27,6 +28,7 @@ "NearMediaType", "QueryNested", "QueryReference", + "NearVectorQuery", "Rerank", "Sort", "TargetVectors", diff --git a/weaviate/collections/batch/grpc_batch_objects.py b/weaviate/collections/batch/grpc_batch_objects.py index 907135bdc..8f120f4ac 100644 --- a/weaviate/collections/batch/grpc_batch_objects.py +++ b/weaviate/collections/batch/grpc_batch_objects.py @@ -2,7 +2,7 @@ import struct import time import uuid as uuid_package -from typing import Any, Dict, List, Optional, Union, cast +from typing import Any, Dict, List, Mapping, Optional, Sequence, Union, cast from google.protobuf.struct_pb2 import Struct from grpc.aio import AioRpcError # type: ignore @@ -16,7 +16,7 @@ from weaviate.collections.classes.config import ConsistencyLevel from weaviate.collections.classes.internal import ReferenceToMulti, ReferenceInputs from weaviate.collections.classes.types import GeoCoordinate, PhoneNumber -from weaviate.collections.grpc.shared import _BaseGRPC, PERMISSION_DENIED +from weaviate.collections.grpc.shared import _BaseGRPC, _Pack, PERMISSION_DENIED from weaviate.connect import ConnectionV4 from weaviate.exceptions import ( WeaviateBatchError, @@ -26,17 +26,8 @@ InsufficientPermissionsError, ) from weaviate.proto.v1 import batch_pb2, base_pb2 -from weaviate.util import _datetime_to_string, _get_vector_v4 - - -def _pack_named_vectors(vectors: Dict[str, List[float]]) -> List[base_pb2.Vectors]: - return [ - base_pb2.Vectors( - name=name, - vector_bytes=struct.pack("{}f".format(len(vector)), *vector), - ) - for name, vector in vectors.items() - ] +from weaviate.types import VECTORS +from weaviate.util import _datetime_to_string, _is_1d_vector class _BatchGRPC(_BaseGRPC): @@ -49,19 +40,22 @@ class _BatchGRPC(_BaseGRPC): def __init__(self, connection: ConnectionV4, consistency_level: Optional[ConsistencyLevel]): super().__init__(connection, consistency_level) - def __grpc_objects(self, objects: List[_BatchObject]) -> List[batch_pb2.BatchObject]: - def pack_vector(vector: Any) -> bytes: - vector_list = _get_vector_v4(vector) - return struct.pack("{}f".format(len(vector_list)), *vector_list) + def __single_vec(self, vectors: Optional[VECTORS]) -> Optional[bytes]: + if not _is_1d_vector(vectors): + return None + return _Pack.single(vectors) + + def __multi_vec(self, vectors: Optional[VECTORS]) -> Optional[List[base_pb2.Vectors]]: + if vectors is None or _is_1d_vector(vectors): + return None + # pylance fails to type narrow TypeGuard in _is_1d_vector properly + v = cast(Mapping[str, Sequence[float] | Sequence[Sequence[float]]], vectors) + return _Pack.vectors(v) + def __grpc_objects(self, objects: List[_BatchObject]) -> List[batch_pb2.BatchObject]: return [ batch_pb2.BatchObject( collection=obj.collection, - vector_bytes=( - pack_vector(obj.vector) - if obj.vector is not None and isinstance(obj.vector, list) - else None - ), uuid=str(obj.uuid) if obj.uuid is not None else str(uuid_package.uuid4()), properties=( self.__translate_properties_from_python_to_grpc( @@ -72,11 +66,8 @@ def pack_vector(vector: Any) -> bytes: else None ), tenant=obj.tenant, - vectors=( - _pack_named_vectors(obj.vector) - if obj.vector is not None and isinstance(obj.vector, dict) - else None - ), + vector_bytes=self.__single_vec(obj.vector), + vectors=self.__multi_vec(obj.vector), ) for obj in objects ] diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 341812ff8..62fef2a47 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -31,6 +31,7 @@ _NamedVectorsUpdate, ) from weaviate.collections.classes.config_vector_index import ( + _MultiVectorConfigCreate, VectorIndexType as VectorIndexTypeAlias, VectorFilterStrategy, ) @@ -1975,6 +1976,16 @@ def __add_props( ret_dict["properties"] = existing_props +class _VectorIndexMultiVector: + @staticmethod + def colbert( + aggregation: Union[Literal["maxSim"], str, None] = None, + ) -> _MultiVectorConfigCreate: + return _MultiVectorConfigCreate( + aggregation=aggregation, + ) + + class _VectorIndexQuantizer: @staticmethod def pq( @@ -2039,6 +2050,7 @@ def sq( class _VectorIndex: + MultiVector = _VectorIndexMultiVector Quantizer = _VectorIndexQuantizer @staticmethod @@ -2066,6 +2078,7 @@ def hnsw( max_connections: Optional[int] = None, vector_cache_max_objects: Optional[int] = None, quantizer: Optional[_QuantizerConfigCreate] = None, + multi_vector: Optional[_MultiVectorConfigCreate] = None, ) -> _VectorIndexConfigHNSWCreate: """Create a `_VectorIndexConfigHNSWCreate` object to be used when defining the HNSW vector index configuration of Weaviate. @@ -2087,6 +2100,7 @@ def hnsw( maxConnections=max_connections, vectorCacheMaxObjects=vector_cache_max_objects, quantizer=quantizer, + multivector=multi_vector, ) @staticmethod diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index e0847cb99..416063bfc 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -24,6 +24,7 @@ _Multi2VecVoyageaiConfig, _Multi2VecGoogleConfig, _Ref2VecCentroidConfig, + _Text2ColbertJinaAIConfig, _Text2VecAWSConfig, _Text2VecAzureOpenAIConfig, _Text2VecCohereConfig, @@ -155,6 +156,46 @@ def custom( vector_index_config=vector_index_config, ) + @staticmethod + def text2colbert_jinaai( + name: str, + *, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + model: Optional[str] = None, + dimensions: Optional[int] = None, + ) -> _NamedVectorConfigCreate: + """Create a named vector using the `text2colbert_jinaai` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/colbert) + for detailed usage. + + Arguments: + `name` + The name of the named vector. + `source_properties` + Which properties should be included when vectorizing. By default all text properties are included. + `vector_index_config` + The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. + `model` + The model to use. Defaults to `None`, which uses the server-defined default. + `dimensions` + Number of dimensions. Applicable to v3 OpenAI models only. Defaults to `None`, which uses the server-defined default. + """ + return _NamedVectorConfigCreate( + name=name, + source_properties=source_properties, + vector_index_config=vector_index_config, + vectorizer=_Text2ColbertJinaAIConfig( + model=model, dimensions=dimensions, vectorizeClassName=vectorize_collection_name + ), + ) + @staticmethod def text2vec_cohere( name: str, @@ -1205,8 +1246,7 @@ def text2vec_jinaai( ) -> _NamedVectorConfigCreate: """Create a named vector using the `text2vec-jinaai` model. - See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings) - for detailed usage. + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings) for detailed usage. Arguments: `name` @@ -1223,8 +1263,6 @@ def text2vec_jinaai( The number of dimensions for the generated embeddings. Defaults to `None`, which uses the server-defined default. `model` The model to use. Defaults to `None`, which uses the server-defined default. - See the - [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings#available-models) for more details. """ return _NamedVectorConfigCreate( name=name, diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index a620e9c2c..78290af4d 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -72,6 +72,14 @@ def vector_index_type() -> VectorIndexType: return VectorIndexType.HNSW +class _MultiVectorConfigCreateBase(_ConfigCreateModel): + enabled: bool = Field(default=True) + + +class _MultiVectorConfigCreate(_MultiVectorConfigCreateBase): + aggregation: Optional[str] + + class _VectorIndexConfigHNSWCreate(_VectorIndexConfigCreate): cleanupIntervalSeconds: Optional[int] dynamicEfMin: Optional[int] @@ -83,6 +91,7 @@ class _VectorIndexConfigHNSWCreate(_VectorIndexConfigCreate): flatSearchCutoff: Optional[int] maxConnections: Optional[int] vectorCacheMaxObjects: Optional[int] + multivector: Optional[_MultiVectorConfigCreate] @staticmethod def vector_index_type() -> VectorIndexType: diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 96074d969..77464b287 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -114,6 +114,7 @@ class Vectorizers(str, Enum): """ NONE = "none" + TEXT2COLBERT_JINAAI = "text2colbert-jinaai" TEXT2VEC_AWS = "text2vec-aws" TEXT2VEC_COHERE = "text2vec-cohere" TEXT2VEC_CONTEXTIONARY = "text2vec-contextionary" @@ -168,6 +169,15 @@ class _VectorizerConfigCreate(_ConfigCreateModel): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(default=..., exclude=True) +class _Text2ColbertJinaAIConfig(_VectorizerConfigCreate): + vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( + default=Vectorizers.TEXT2COLBERT_JINAAI, frozen=True, exclude=True + ) + vectorizeClassName: bool + model: Optional[str] + dimensions: Optional[int] + + class _Text2VecContextionaryConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_CONTEXTIONARY, frozen=True, exclude=True diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index c053aca42..714c9f72e 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from enum import Enum, auto -from typing import ClassVar, List, Literal, Optional, Sequence, Type, Union, Dict, cast +from typing import ClassVar, List, Literal, Mapping, Optional, Sequence, Type, Union, Dict, cast from pydantic import ConfigDict, Field @@ -228,11 +228,38 @@ class Rerank(_WeaviateInput): query: Optional[str] = Field(default=None) +class _MultiVectorQuery(_WeaviateInput): + tensor: Sequence[Sequence[float]] + + +class _ManyVectorsQuery(_WeaviateInput): + vectors: Sequence[Sequence[float]] + + NearVectorInputType = Union[ - Sequence[NUMBER], Dict[str, Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]]] + Sequence[NUMBER], + Sequence[Sequence[NUMBER]], + Mapping[ + str, + Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]], _MultiVectorQuery, _ManyVectorsQuery], + ], ] +class NearVectorQuery: + """Factory class to use when defining near vector queries with multiple vectors in `near_vector()` and `hybrid()` methods.""" + + @staticmethod + def multi_vector(tensor: Sequence[Sequence[float]]) -> _MultiVectorQuery: + """Define a multi-vector query to be used within a near vector search, i.e. a single vector over a multi-vector space.""" + return _MultiVectorQuery(tensor=tensor) + + @staticmethod + def many_vectors(vectors: Sequence[Sequence[float]]) -> _ManyVectorsQuery: + """Define a many-vectors query to be used within a near vector search, i.e. multiple vectors over a single-vector space.""" + return _ManyVectorsQuery(vectors=vectors) + + class _HybridNearBase(_WeaviateInput): model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid") diff --git a/weaviate/collections/classes/internal.py b/weaviate/collections/classes/internal.py index d1e343a7f..4e4e81c40 100644 --- a/weaviate/collections/classes/internal.py +++ b/weaviate/collections/classes/internal.py @@ -94,7 +94,7 @@ class _Object(Generic[P, R, M]): metadata: M properties: P references: R - vector: Dict[str, List[float]] + vector: Dict[str, Union[List[float], List[List[float]]]] collection: str diff --git a/weaviate/collections/grpc/query.py b/weaviate/collections/grpc/query.py index 287576b06..3ed3c323a 100644 --- a/weaviate/collections/grpc/query.py +++ b/weaviate/collections/grpc/query.py @@ -24,6 +24,8 @@ from weaviate.collections.classes.filters import _Filters from weaviate.collections.classes.grpc import ( _MultiTargetVectorJoin, + _MultiVectorQuery, + _ManyVectorsQuery, HybridFusion, _QueryReferenceMultiTarget, _MetadataQuery, @@ -48,7 +50,7 @@ ) from weaviate.collections.filters import _FilterToGRPC from weaviate.collections.grpc.retry import _Retry -from weaviate.collections.grpc.shared import _BaseGRPC, PERMISSION_DENIED +from weaviate.collections.grpc.shared import _BaseGRPC, _Pack, PERMISSION_DENIED from weaviate.connect import ConnectionV4 from weaviate.exceptions import ( InsufficientPermissionsError, @@ -946,10 +948,13 @@ def __target_vector_to_grpc( else: return target_vector.to_grpc_target_vector(self._connection._weaviate_version), None - @staticmethod def __vector_per_target( - vector: NearVectorInputType, targets: Optional[search_get_pb2.Targets], argument_name: str + self, + vector: NearVectorInputType, + targets: Optional[search_get_pb2.Targets], + argument_name: str, ) -> Tuple[Optional[Dict[str, bytes]], Optional[bytes]]: + """@deprecated in 1.27.0, included for BC until 1.27.0 is no longer supported.""" # noqa: D401 invalid_nv_exception = WeaviateInvalidInputError( f"""{argument_name} argument can be: - a list of numbers @@ -977,7 +982,11 @@ def __vector_per_target( return vector_per_target, None else: - if len(vector) == 0: + if ( + isinstance(vector, _MultiVectorQuery) + or isinstance(vector, _ManyVectorsQuery) + or len(vector) == 0 + ): raise invalid_nv_exception if _is_1d_vector(vector): @@ -991,9 +1000,11 @@ def __vector_per_target( keys and lists of numbers as values.""" ) - @staticmethod def __vector_for_target( - vector: NearVectorInputType, targets: Optional[search_get_pb2.Targets], argument_name: str + self, + vector: NearVectorInputType, + targets: Optional[search_get_pb2.Targets], + argument_name: str, ) -> Tuple[ Optional[List[search_get_pb2.VectorForTarget]], Optional[bytes], Optional[List[str]] ]: @@ -1006,7 +1017,7 @@ def __vector_for_target( vector_for_target: List[search_get_pb2.VectorForTarget] = [] - def add_vector(val: List[float], target_name: str) -> None: + def add_vector(val: Sequence[float], target_name: str) -> None: vec = _get_vector_v4(val) if ( @@ -1016,11 +1027,16 @@ def add_vector(val: List[float], target_name: str) -> None: ): raise invalid_nv_exception - vector_for_target.append( - search_get_pb2.VectorForTarget( - name=target_name, vector_bytes=struct.pack("{}f".format(len(vec)), *vec) + if self._connection._weaviate_version.is_lower_than(1, 29, 0): + vector_for_target.append( + search_get_pb2.VectorForTarget(name=target_name, vector_bytes=_Pack.single(vec)) + ) + else: + vector_for_target.append( + search_get_pb2.VectorForTarget( + name=target_name, vectors=_Pack.vectors({target_name: vec}) + ) ) - ) if isinstance(vector, dict): if ( @@ -1033,11 +1049,26 @@ def add_vector(val: List[float], target_name: str) -> None: for key, value in vector.items(): # typing tools do not understand the type narrowing here if _is_1d_vector(value): - val: List[float] = cast(List[float], value) + val = value add_vector(val, key) target_vectors_tmp.append(key) + elif isinstance(value, _MultiVectorQuery): + vector_for_target.append( + search_get_pb2.VectorForTarget( + name=key, + vectors=_Pack.vectors({key: value.tensor}), + ) + ) + elif isinstance(value, _ManyVectorsQuery): + vector_for_target.append( + search_get_pb2.VectorForTarget( + name=key, + vectors=_Pack.vectors({key: vector for vector in value.vectors}), + ) + ) + target_vectors_tmp.append(key) else: - vals: List[List[float]] = cast(List[List[float]], value) + vals = cast(Sequence[Sequence[NUMBER]], value) for inner_vector in vals: add_vector(inner_vector, key) target_vectors_tmp.append(key) diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index d1d8f1775..e95bcdc51 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -1,11 +1,22 @@ -from typing import Optional +import struct +from collections.abc import Mapping +from dataclasses import dataclass +from typing import List, Optional, Sequence, Union +from typing_extensions import TypeGuard from weaviate.collections.classes.config import ConsistencyLevel from weaviate.connect import ConnectionV4 +from weaviate.exceptions import WeaviateInvalidInputError from weaviate.proto.v1 import base_pb2 +from weaviate.types import NUMBER +from weaviate.util import _get_vector_v4 + PERMISSION_DENIED = "PERMISSION_DENIED" +UINT32_LEN = 4 +UINT64_LEN = 8 + class _BaseGRPC: def __init__( @@ -30,3 +41,91 @@ def _get_consistency_level( else: assert consistency_level.value == ConsistencyLevel.ALL return base_pb2.ConsistencyLevel.CONSISTENCY_LEVEL_ALL + + +class _ByteOps: + @staticmethod + def decode_float32s(byte_vector: bytes) -> List[float]: + return [ + float(val) for val in struct.unpack(f"{len(byte_vector)//UINT32_LEN}f", byte_vector) + ] + + @staticmethod + def decode_float64s(byte_vector: bytes) -> List[float]: + return [ + float(val) for val in struct.unpack(f"{len(byte_vector)//UINT64_LEN}d", byte_vector) + ] + + @staticmethod + def decode_int64s(byte_vector: bytes) -> List[int]: + return [int(val) for val in struct.unpack(f"{len(byte_vector)//UINT64_LEN}q", byte_vector)] + + +@dataclass +class _Packing: + bytes_: bytes + type_: base_pb2.VectorType + + +class _Pack: + @staticmethod + def is_multi( + v: Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]] + ) -> TypeGuard[List[List[NUMBER]]]: + return len(v) > 0 and isinstance(v[0], list) + + @staticmethod + def is_single( + v: Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]] + ) -> TypeGuard[List[NUMBER]]: + return len(v) > 0 and (isinstance(v[0], float) or isinstance(v[0], int)) + + @staticmethod + def parse_single_or_multi_vec( + vector: Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]] + ) -> _Packing: + if _Pack.is_multi(vector): + return _Packing(bytes_=_Pack.multi(vector), type_=base_pb2.VECTOR_TYPE_MULTI_FP32) + elif _Pack.is_single(vector): + return _Packing(bytes_=_Pack.single(vector), type_=base_pb2.VECTOR_TYPE_SINGLE_FP32) + else: + raise WeaviateInvalidInputError(f"Invalid vectors: {vector}") + + @staticmethod + def vectors( + vectors: Mapping[str, Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]]] + ) -> List[base_pb2.Vectors]: + return [ + base_pb2.Vectors(name=name, vector_bytes=packing.bytes_, type=packing.type_) + for name, vec_or_vecs in vectors.items() + if (packing := _Pack.parse_single_or_multi_vec(vec_or_vecs)) + ] + + @staticmethod + def single(vector: Sequence[NUMBER]) -> bytes: + vector_list = _get_vector_v4(vector) + return struct.pack("{}f".format(len(vector_list)), *vector_list) + + @staticmethod + def multi(vector: Sequence[Sequence[NUMBER]]) -> bytes: + vector_list = [item for sublist in vector for item in sublist] + return struct.pack(" List[float]: + return _ByteOps.decode_float32s(byte_vector) + + @staticmethod + def multi(byte_vector: bytes) -> List[List[float]]: + dim_bytes = byte_vector[:2] + dim = int(struct.unpack(" Dict[str, List[float]]: + ) -> Dict[str, Union[List[float], List[List[float]]]]: if ( len(add_props.vector_bytes) == 0 and len(add_props.vector) == 0 @@ -160,9 +157,14 @@ def __extract_vector_for_object( if len(add_props.vector_bytes) > 0: return {"default": _ByteOps.decode_float32s(add_props.vector_bytes)} - vecs = {} + vecs: Dict[str, Union[List[float], List[List[float]]]] = {} for vec in add_props.vectors: - vecs[vec.name] = _ByteOps.decode_float32s(vec.vector_bytes) + if vec.type == base_pb2.VECTOR_TYPE_SINGLE_FP32: + vecs[vec.name] = _Unpack.single(vec.vector_bytes) + elif vec.type == base_pb2.VECTOR_TYPE_MULTI_FP32: + vecs[vec.name] = _Unpack.multi(vec.vector_bytes) + else: + vecs[vec.name] = _Unpack.single(vec.vector_bytes) return vecs def __extract_generated_for_object( diff --git a/weaviate/collections/queries/byteops.py b/weaviate/collections/queries/byteops.py deleted file mode 100644 index 93c97fac8..000000000 --- a/weaviate/collections/queries/byteops.py +++ /dev/null @@ -1,16 +0,0 @@ -import struct -from typing import List - - -class _ByteOps: - @staticmethod - def decode_float32s(byte_vector: bytes) -> List[float]: - return [float(val) for val in struct.unpack(f"{len(byte_vector)//4}f", byte_vector)] - - @staticmethod - def decode_float64s(byte_vector: bytes) -> List[float]: - return [float(val) for val in struct.unpack(f"{len(byte_vector)//8}d", byte_vector)] - - @staticmethod - def decode_int64s(byte_vector: bytes) -> List[int]: - return [int(val) for val in struct.unpack(f"{len(byte_vector)//8}q", byte_vector)] diff --git a/weaviate/proto/v1/base_pb2.py b/weaviate/proto/v1/base_pb2.py index b9d3886de..503562529 100644 --- a/weaviate/proto/v1/base_pb2.py +++ b/weaviate/proto/v1/base_pb2.py @@ -22,7 +22,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\rv1/base.proto\x12\x0bweaviate.v1\x1a\x1cgoogle/protobuf/struct.proto"T\n\x15NumberArrayProperties\x12\x12\n\x06values\x18\x01 \x03(\x01\x42\x02\x18\x01\x12\x11\n\tprop_name\x18\x02 \x01(\t\x12\x14\n\x0cvalues_bytes\x18\x03 \x01(\x0c"7\n\x12IntArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\x03\x12\x11\n\tprop_name\x18\x02 \x01(\t"8\n\x13TextArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\t\x12\x11\n\tprop_name\x18\x02 \x01(\t";\n\x16\x42ooleanArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\x08\x12\x11\n\tprop_name\x18\x02 \x01(\t"\xf1\x03\n\x15ObjectPropertiesValue\x12\x33\n\x12non_ref_properties\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x43\n\x17number_array_properties\x18\x02 \x03(\x0b\x32".weaviate.v1.NumberArrayProperties\x12=\n\x14int_array_properties\x18\x03 \x03(\x0b\x32\x1f.weaviate.v1.IntArrayProperties\x12?\n\x15text_array_properties\x18\x04 \x03(\x0b\x32 .weaviate.v1.TextArrayProperties\x12\x45\n\x18\x62oolean_array_properties\x18\x05 \x03(\x0b\x32#.weaviate.v1.BooleanArrayProperties\x12\x38\n\x11object_properties\x18\x06 \x03(\x0b\x32\x1d.weaviate.v1.ObjectProperties\x12\x43\n\x17object_array_properties\x18\x07 \x03(\x0b\x32".weaviate.v1.ObjectArrayProperties\x12\x18\n\x10\x65mpty_list_props\x18\n \x03(\t"^\n\x15ObjectArrayProperties\x12\x32\n\x06values\x18\x01 \x03(\x0b\x32".weaviate.v1.ObjectPropertiesValue\x12\x11\n\tprop_name\x18\x02 \x01(\t"X\n\x10ObjectProperties\x12\x31\n\x05value\x18\x01 \x01(\x0b\x32".weaviate.v1.ObjectPropertiesValue\x12\x11\n\tprop_name\x18\x02 \x01(\t"\x1b\n\tTextArray\x12\x0e\n\x06values\x18\x01 \x03(\t"\x1a\n\x08IntArray\x12\x0e\n\x06values\x18\x01 \x03(\x03"\x1d\n\x0bNumberArray\x12\x0e\n\x06values\x18\x01 \x03(\x01"\x1e\n\x0c\x42ooleanArray\x12\x0e\n\x06values\x18\x01 \x03(\x08"\xfc\x06\n\x07\x46ilters\x12/\n\x08operator\x18\x01 \x01(\x0e\x32\x1d.weaviate.v1.Filters.Operator\x12\x0e\n\x02on\x18\x02 \x03(\tB\x02\x18\x01\x12%\n\x07\x66ilters\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Filters\x12\x14\n\nvalue_text\x18\x04 \x01(\tH\x00\x12\x13\n\tvalue_int\x18\x05 \x01(\x03H\x00\x12\x17\n\rvalue_boolean\x18\x06 \x01(\x08H\x00\x12\x16\n\x0cvalue_number\x18\x07 \x01(\x01H\x00\x12\x32\n\x10value_text_array\x18\t \x01(\x0b\x32\x16.weaviate.v1.TextArrayH\x00\x12\x30\n\x0fvalue_int_array\x18\n \x01(\x0b\x32\x15.weaviate.v1.IntArrayH\x00\x12\x38\n\x13value_boolean_array\x18\x0b \x01(\x0b\x32\x19.weaviate.v1.BooleanArrayH\x00\x12\x36\n\x12value_number_array\x18\x0c \x01(\x0b\x32\x18.weaviate.v1.NumberArrayH\x00\x12\x36\n\tvalue_geo\x18\r \x01(\x0b\x32!.weaviate.v1.GeoCoordinatesFilterH\x00\x12)\n\x06target\x18\x14 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget"\xe3\x02\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x12\n\x0eOPERATOR_EQUAL\x10\x01\x12\x16\n\x12OPERATOR_NOT_EQUAL\x10\x02\x12\x19\n\x15OPERATOR_GREATER_THAN\x10\x03\x12\x1f\n\x1bOPERATOR_GREATER_THAN_EQUAL\x10\x04\x12\x16\n\x12OPERATOR_LESS_THAN\x10\x05\x12\x1c\n\x18OPERATOR_LESS_THAN_EQUAL\x10\x06\x12\x10\n\x0cOPERATOR_AND\x10\x07\x12\x0f\n\x0bOPERATOR_OR\x10\x08\x12\x1d\n\x19OPERATOR_WITHIN_GEO_RANGE\x10\t\x12\x11\n\rOPERATOR_LIKE\x10\n\x12\x14\n\x10OPERATOR_IS_NULL\x10\x0b\x12\x19\n\x15OPERATOR_CONTAINS_ANY\x10\x0c\x12\x19\n\x15OPERATOR_CONTAINS_ALL\x10\rB\x0c\n\ntest_value"T\n\x1b\x46ilterReferenceSingleTarget\x12\n\n\x02on\x18\x01 \x01(\t\x12)\n\x06target\x18\x02 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget"n\n\x1a\x46ilterReferenceMultiTarget\x12\n\n\x02on\x18\x01 \x01(\t\x12)\n\x06target\x18\x02 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget\x12\x19\n\x11target_collection\x18\x03 \x01(\t""\n\x14\x46ilterReferenceCount\x12\n\n\x02on\x18\x01 \x01(\t"\xe4\x01\n\x0c\x46ilterTarget\x12\x12\n\x08property\x18\x01 \x01(\tH\x00\x12\x41\n\rsingle_target\x18\x02 \x01(\x0b\x32(.weaviate.v1.FilterReferenceSingleTargetH\x00\x12?\n\x0cmulti_target\x18\x03 \x01(\x0b\x32\'.weaviate.v1.FilterReferenceMultiTargetH\x00\x12\x32\n\x05\x63ount\x18\x04 \x01(\x0b\x32!.weaviate.v1.FilterReferenceCountH\x00\x42\x08\n\x06target"M\n\x14GeoCoordinatesFilter\x12\x10\n\x08latitude\x18\x01 \x01(\x02\x12\x11\n\tlongitude\x18\x02 \x01(\x02\x12\x10\n\x08\x64istance\x18\x03 \x01(\x02"<\n\x07Vectors\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05index\x18\x02 \x01(\x04\x12\x14\n\x0cvector_bytes\x18\x03 \x01(\x0c*\x89\x01\n\x10\x43onsistencyLevel\x12!\n\x1d\x43ONSISTENCY_LEVEL_UNSPECIFIED\x10\x00\x12\x19\n\x15\x43ONSISTENCY_LEVEL_ONE\x10\x01\x12\x1c\n\x18\x43ONSISTENCY_LEVEL_QUORUM\x10\x02\x12\x19\n\x15\x43ONSISTENCY_LEVEL_ALL\x10\x03\x42n\n#io.weaviate.client.grpc.protocol.v1B\x11WeaviateProtoBaseZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' + b'\n\rv1/base.proto\x12\x0bweaviate.v1\x1a\x1cgoogle/protobuf/struct.proto"T\n\x15NumberArrayProperties\x12\x12\n\x06values\x18\x01 \x03(\x01\x42\x02\x18\x01\x12\x11\n\tprop_name\x18\x02 \x01(\t\x12\x14\n\x0cvalues_bytes\x18\x03 \x01(\x0c"7\n\x12IntArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\x03\x12\x11\n\tprop_name\x18\x02 \x01(\t"8\n\x13TextArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\t\x12\x11\n\tprop_name\x18\x02 \x01(\t";\n\x16\x42ooleanArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\x08\x12\x11\n\tprop_name\x18\x02 \x01(\t"\xf1\x03\n\x15ObjectPropertiesValue\x12\x33\n\x12non_ref_properties\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x43\n\x17number_array_properties\x18\x02 \x03(\x0b\x32".weaviate.v1.NumberArrayProperties\x12=\n\x14int_array_properties\x18\x03 \x03(\x0b\x32\x1f.weaviate.v1.IntArrayProperties\x12?\n\x15text_array_properties\x18\x04 \x03(\x0b\x32 .weaviate.v1.TextArrayProperties\x12\x45\n\x18\x62oolean_array_properties\x18\x05 \x03(\x0b\x32#.weaviate.v1.BooleanArrayProperties\x12\x38\n\x11object_properties\x18\x06 \x03(\x0b\x32\x1d.weaviate.v1.ObjectProperties\x12\x43\n\x17object_array_properties\x18\x07 \x03(\x0b\x32".weaviate.v1.ObjectArrayProperties\x12\x18\n\x10\x65mpty_list_props\x18\n \x03(\t"^\n\x15ObjectArrayProperties\x12\x32\n\x06values\x18\x01 \x03(\x0b\x32".weaviate.v1.ObjectPropertiesValue\x12\x11\n\tprop_name\x18\x02 \x01(\t"X\n\x10ObjectProperties\x12\x31\n\x05value\x18\x01 \x01(\x0b\x32".weaviate.v1.ObjectPropertiesValue\x12\x11\n\tprop_name\x18\x02 \x01(\t"\x1b\n\tTextArray\x12\x0e\n\x06values\x18\x01 \x03(\t"\x1a\n\x08IntArray\x12\x0e\n\x06values\x18\x01 \x03(\x03"\x1d\n\x0bNumberArray\x12\x0e\n\x06values\x18\x01 \x03(\x01"\x1e\n\x0c\x42ooleanArray\x12\x0e\n\x06values\x18\x01 \x03(\x08"\xfc\x06\n\x07\x46ilters\x12/\n\x08operator\x18\x01 \x01(\x0e\x32\x1d.weaviate.v1.Filters.Operator\x12\x0e\n\x02on\x18\x02 \x03(\tB\x02\x18\x01\x12%\n\x07\x66ilters\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Filters\x12\x14\n\nvalue_text\x18\x04 \x01(\tH\x00\x12\x13\n\tvalue_int\x18\x05 \x01(\x03H\x00\x12\x17\n\rvalue_boolean\x18\x06 \x01(\x08H\x00\x12\x16\n\x0cvalue_number\x18\x07 \x01(\x01H\x00\x12\x32\n\x10value_text_array\x18\t \x01(\x0b\x32\x16.weaviate.v1.TextArrayH\x00\x12\x30\n\x0fvalue_int_array\x18\n \x01(\x0b\x32\x15.weaviate.v1.IntArrayH\x00\x12\x38\n\x13value_boolean_array\x18\x0b \x01(\x0b\x32\x19.weaviate.v1.BooleanArrayH\x00\x12\x36\n\x12value_number_array\x18\x0c \x01(\x0b\x32\x18.weaviate.v1.NumberArrayH\x00\x12\x36\n\tvalue_geo\x18\r \x01(\x0b\x32!.weaviate.v1.GeoCoordinatesFilterH\x00\x12)\n\x06target\x18\x14 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget"\xe3\x02\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x12\n\x0eOPERATOR_EQUAL\x10\x01\x12\x16\n\x12OPERATOR_NOT_EQUAL\x10\x02\x12\x19\n\x15OPERATOR_GREATER_THAN\x10\x03\x12\x1f\n\x1bOPERATOR_GREATER_THAN_EQUAL\x10\x04\x12\x16\n\x12OPERATOR_LESS_THAN\x10\x05\x12\x1c\n\x18OPERATOR_LESS_THAN_EQUAL\x10\x06\x12\x10\n\x0cOPERATOR_AND\x10\x07\x12\x0f\n\x0bOPERATOR_OR\x10\x08\x12\x1d\n\x19OPERATOR_WITHIN_GEO_RANGE\x10\t\x12\x11\n\rOPERATOR_LIKE\x10\n\x12\x14\n\x10OPERATOR_IS_NULL\x10\x0b\x12\x19\n\x15OPERATOR_CONTAINS_ANY\x10\x0c\x12\x19\n\x15OPERATOR_CONTAINS_ALL\x10\rB\x0c\n\ntest_value"T\n\x1b\x46ilterReferenceSingleTarget\x12\n\n\x02on\x18\x01 \x01(\t\x12)\n\x06target\x18\x02 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget"n\n\x1a\x46ilterReferenceMultiTarget\x12\n\n\x02on\x18\x01 \x01(\t\x12)\n\x06target\x18\x02 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget\x12\x19\n\x11target_collection\x18\x03 \x01(\t""\n\x14\x46ilterReferenceCount\x12\n\n\x02on\x18\x01 \x01(\t"\xe4\x01\n\x0c\x46ilterTarget\x12\x12\n\x08property\x18\x01 \x01(\tH\x00\x12\x41\n\rsingle_target\x18\x02 \x01(\x0b\x32(.weaviate.v1.FilterReferenceSingleTargetH\x00\x12?\n\x0cmulti_target\x18\x03 \x01(\x0b\x32\'.weaviate.v1.FilterReferenceMultiTargetH\x00\x12\x32\n\x05\x63ount\x18\x04 \x01(\x0b\x32!.weaviate.v1.FilterReferenceCountH\x00\x42\x08\n\x06target"M\n\x14GeoCoordinatesFilter\x12\x10\n\x08latitude\x18\x01 \x01(\x02\x12\x11\n\tlongitude\x18\x02 \x01(\x02\x12\x10\n\x08\x64istance\x18\x03 \x01(\x02"g\n\x07Vectors\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x11\n\x05index\x18\x02 \x01(\x04\x42\x02\x18\x01\x12\x14\n\x0cvector_bytes\x18\x03 \x01(\x0c\x12%\n\x04type\x18\x04 \x01(\x0e\x32\x17.weaviate.v1.VectorType*\x89\x01\n\x10\x43onsistencyLevel\x12!\n\x1d\x43ONSISTENCY_LEVEL_UNSPECIFIED\x10\x00\x12\x19\n\x15\x43ONSISTENCY_LEVEL_ONE\x10\x01\x12\x1c\n\x18\x43ONSISTENCY_LEVEL_QUORUM\x10\x02\x12\x19\n\x15\x43ONSISTENCY_LEVEL_ALL\x10\x03*b\n\nVectorType\x12\x1b\n\x17VECTOR_TYPE_UNSPECIFIED\x10\x00\x12\x1b\n\x17VECTOR_TYPE_SINGLE_FP32\x10\x01\x12\x1a\n\x16VECTOR_TYPE_MULTI_FP32\x10\x02\x42n\n#io.weaviate.client.grpc.protocol.v1B\x11WeaviateProtoBaseZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' ) _globals = globals() @@ -37,8 +37,12 @@ _globals["_NUMBERARRAYPROPERTIES"].fields_by_name["values"]._serialized_options = b"\030\001" _globals["_FILTERS"].fields_by_name["on"]._loaded_options = None _globals["_FILTERS"].fields_by_name["on"]._serialized_options = b"\030\001" - _globals["_CONSISTENCYLEVEL"]._serialized_start = 2630 - _globals["_CONSISTENCYLEVEL"]._serialized_end = 2767 + _globals["_VECTORS"].fields_by_name["index"]._loaded_options = None + _globals["_VECTORS"].fields_by_name["index"]._serialized_options = b"\030\001" + _globals["_CONSISTENCYLEVEL"]._serialized_start = 2673 + _globals["_CONSISTENCYLEVEL"]._serialized_end = 2810 + _globals["_VECTORTYPE"]._serialized_start = 2812 + _globals["_VECTORTYPE"]._serialized_end = 2910 _globals["_NUMBERARRAYPROPERTIES"]._serialized_start = 60 _globals["_NUMBERARRAYPROPERTIES"]._serialized_end = 144 _globals["_INTARRAYPROPERTIES"]._serialized_start = 146 @@ -76,5 +80,5 @@ _globals["_GEOCOORDINATESFILTER"]._serialized_start = 2488 _globals["_GEOCOORDINATESFILTER"]._serialized_end = 2565 _globals["_VECTORS"]._serialized_start = 2567 - _globals["_VECTORS"]._serialized_end = 2627 + _globals["_VECTORS"]._serialized_end = 2670 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/base_pb2.pyi b/weaviate/proto/v1/base_pb2.pyi index f2b2e8f31..257244bb2 100644 --- a/weaviate/proto/v1/base_pb2.pyi +++ b/weaviate/proto/v1/base_pb2.pyi @@ -20,10 +20,19 @@ class ConsistencyLevel(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): CONSISTENCY_LEVEL_QUORUM: _ClassVar[ConsistencyLevel] CONSISTENCY_LEVEL_ALL: _ClassVar[ConsistencyLevel] +class VectorType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + VECTOR_TYPE_UNSPECIFIED: _ClassVar[VectorType] + VECTOR_TYPE_SINGLE_FP32: _ClassVar[VectorType] + VECTOR_TYPE_MULTI_FP32: _ClassVar[VectorType] + CONSISTENCY_LEVEL_UNSPECIFIED: ConsistencyLevel CONSISTENCY_LEVEL_ONE: ConsistencyLevel CONSISTENCY_LEVEL_QUORUM: ConsistencyLevel CONSISTENCY_LEVEL_ALL: ConsistencyLevel +VECTOR_TYPE_UNSPECIFIED: VectorType +VECTOR_TYPE_SINGLE_FP32: VectorType +VECTOR_TYPE_MULTI_FP32: VectorType class NumberArrayProperties(_message.Message): __slots__ = ("values", "prop_name", "values_bytes") @@ -319,16 +328,19 @@ class GeoCoordinatesFilter(_message.Message): ) -> None: ... class Vectors(_message.Message): - __slots__ = ("name", "index", "vector_bytes") + __slots__ = ("name", "index", "vector_bytes", "type") NAME_FIELD_NUMBER: _ClassVar[int] INDEX_FIELD_NUMBER: _ClassVar[int] VECTOR_BYTES_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] name: str index: int vector_bytes: bytes + type: VectorType def __init__( self, name: _Optional[str] = ..., index: _Optional[int] = ..., vector_bytes: _Optional[bytes] = ..., + type: _Optional[_Union[VectorType, str]] = ..., ) -> None: ... diff --git a/weaviate/proto/v1/search_get_pb2.py b/weaviate/proto/v1/search_get_pb2.py index c2ae33ba5..f1da1b40d 100644 --- a/weaviate/proto/v1/search_get_pb2.py +++ b/weaviate/proto/v1/search_get_pb2.py @@ -25,7 +25,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\x1cgoogle/protobuf/struct.proto\x1a\rv1/base.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t"\xdd\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02"\xfa\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12\x36\n\x07weights\x18\x03 \x03(\x0b\x32!.weaviate.v1.Targets.WeightsEntryB\x02\x18\x01\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTarget\x1a.\n\x0cWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01"\xc5\x03\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\r\n\x05\x61lpha\x18\x04 \x01(\x02\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x14\n\x0cvector_bytes\x18\x06 \x01(\x0c\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthreshold"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance")\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t"5\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cvector_bytes\x18\x02 \x01(\x0c"\x82\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x14\n\x0cvector_bytes\x18\x04 \x01(\x0c\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query"\xae\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_results"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent"\xba\x05\n\x10PropertiesResult\x12\x37\n\x12non_ref_properties\x18\x01 \x01(\x0b\x32\x17.google.protobuf.StructB\x02\x18\x01\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12G\n\x17number_array_properties\x18\x05 \x03(\x0b\x32".weaviate.v1.NumberArrayPropertiesB\x02\x18\x01\x12\x41\n\x14int_array_properties\x18\x06 \x03(\x0b\x32\x1f.weaviate.v1.IntArrayPropertiesB\x02\x18\x01\x12\x43\n\x15text_array_properties\x18\x07 \x03(\x0b\x32 .weaviate.v1.TextArrayPropertiesB\x02\x18\x01\x12I\n\x18\x62oolean_array_properties\x18\x08 \x03(\x0b\x32#.weaviate.v1.BooleanArrayPropertiesB\x02\x18\x01\x12<\n\x11object_properties\x18\t \x03(\x0b\x32\x1d.weaviate.v1.ObjectPropertiesB\x02\x18\x01\x12G\n\x17object_array_properties\x18\n \x03(\x0b\x32".weaviate.v1.ObjectArrayPropertiesB\x02\x18\x01\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\t*\xee\x01\n\x11\x43ombinationMethod\x12"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42s\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' + b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\x1cgoogle/protobuf/struct.proto\x1a\rv1/base.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t"\xdd\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02"\xfa\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12\x36\n\x07weights\x18\x03 \x03(\x0b\x32!.weaviate.v1.Targets.WeightsEntryB\x02\x18\x01\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTarget\x1a.\n\x0cWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01"\xf0\x03\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\r\n\x05\x61lpha\x18\x04 \x01(\x02\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthreshold"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance")\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query"\xae\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_results"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent"\xba\x05\n\x10PropertiesResult\x12\x37\n\x12non_ref_properties\x18\x01 \x01(\x0b\x32\x17.google.protobuf.StructB\x02\x18\x01\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12G\n\x17number_array_properties\x18\x05 \x03(\x0b\x32".weaviate.v1.NumberArrayPropertiesB\x02\x18\x01\x12\x41\n\x14int_array_properties\x18\x06 \x03(\x0b\x32\x1f.weaviate.v1.IntArrayPropertiesB\x02\x18\x01\x12\x43\n\x15text_array_properties\x18\x07 \x03(\x0b\x32 .weaviate.v1.TextArrayPropertiesB\x02\x18\x01\x12I\n\x18\x62oolean_array_properties\x18\x08 \x03(\x0b\x32#.weaviate.v1.BooleanArrayPropertiesB\x02\x18\x01\x12<\n\x11object_properties\x18\t \x03(\x0b\x32\x1d.weaviate.v1.ObjectPropertiesB\x02\x18\x01\x12G\n\x17object_array_properties\x18\n \x03(\x0b\x32".weaviate.v1.ObjectArrayPropertiesB\x02\x18\x01\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\t*\xee\x01\n\x11\x43ombinationMethod\x12"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42s\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' ) _globals = globals() @@ -46,6 +46,8 @@ _globals["_TARGETS"].fields_by_name["weights"]._serialized_options = b"\030\001" _globals["_HYBRID"].fields_by_name["vector"]._loaded_options = None _globals["_HYBRID"].fields_by_name["vector"]._serialized_options = b"\030\001" + _globals["_HYBRID"].fields_by_name["vector_bytes"]._loaded_options = None + _globals["_HYBRID"].fields_by_name["vector_bytes"]._serialized_options = b"\030\001" _globals["_HYBRID"].fields_by_name["target_vectors"]._loaded_options = None _globals["_HYBRID"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" _globals["_NEARTEXTSEARCH"].fields_by_name["target_vectors"]._loaded_options = None @@ -64,10 +66,14 @@ ]._serialized_options = b"\030\001" _globals["_NEARIMUSEARCH"].fields_by_name["target_vectors"]._loaded_options = None _globals["_NEARIMUSEARCH"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" + _globals["_VECTORFORTARGET"].fields_by_name["vector_bytes"]._loaded_options = None + _globals["_VECTORFORTARGET"].fields_by_name["vector_bytes"]._serialized_options = b"\030\001" _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._loaded_options = None _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_options = b"8\001" _globals["_NEARVECTOR"].fields_by_name["vector"]._loaded_options = None _globals["_NEARVECTOR"].fields_by_name["vector"]._serialized_options = b"\030\001" + _globals["_NEARVECTOR"].fields_by_name["vector_bytes"]._loaded_options = None + _globals["_NEARVECTOR"].fields_by_name["vector_bytes"]._serialized_options = b"\030\001" _globals["_NEARVECTOR"].fields_by_name["target_vectors"]._loaded_options = None _globals["_NEARVECTOR"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" _globals["_NEARVECTOR"].fields_by_name["vector_per_target"]._loaded_options = None @@ -116,8 +122,8 @@ _globals["_PROPERTIESRESULT"].fields_by_name[ "object_array_properties" ]._serialized_options = b"\030\001" - _globals["_COMBINATIONMETHOD"]._serialized_start = 7656 - _globals["_COMBINATIONMETHOD"]._serialized_end = 7894 + _globals["_COMBINATIONMETHOD"]._serialized_start = 7785 + _globals["_COMBINATIONMETHOD"]._serialized_end = 8023 _globals["_SEARCHREQUEST"]._serialized_start = 124 _globals["_SEARCHREQUEST"]._serialized_end = 1560 _globals["_GROUPBY"]._serialized_start = 1562 @@ -137,51 +143,51 @@ _globals["_TARGETS_WEIGHTSENTRY"]._serialized_start = 2518 _globals["_TARGETS_WEIGHTSENTRY"]._serialized_end = 2564 _globals["_HYBRID"]._serialized_start = 2567 - _globals["_HYBRID"]._serialized_end = 3020 - _globals["_HYBRID_FUSIONTYPE"]._serialized_start = 2910 - _globals["_HYBRID_FUSIONTYPE"]._serialized_end = 3007 - _globals["_NEARTEXTSEARCH"]._serialized_start = 3023 - _globals["_NEARTEXTSEARCH"]._serialized_end = 3391 - _globals["_NEARTEXTSEARCH_MOVE"]._serialized_start = 3284 - _globals["_NEARTEXTSEARCH_MOVE"]._serialized_end = 3338 - _globals["_NEARIMAGESEARCH"]._serialized_start = 3394 - _globals["_NEARIMAGESEARCH"]._serialized_end = 3567 - _globals["_NEARAUDIOSEARCH"]._serialized_start = 3570 - _globals["_NEARAUDIOSEARCH"]._serialized_end = 3743 - _globals["_NEARVIDEOSEARCH"]._serialized_start = 3746 - _globals["_NEARVIDEOSEARCH"]._serialized_end = 3919 - _globals["_NEARDEPTHSEARCH"]._serialized_start = 3922 - _globals["_NEARDEPTHSEARCH"]._serialized_end = 4095 - _globals["_NEARTHERMALSEARCH"]._serialized_start = 4098 - _globals["_NEARTHERMALSEARCH"]._serialized_end = 4275 - _globals["_NEARIMUSEARCH"]._serialized_start = 4278 - _globals["_NEARIMUSEARCH"]._serialized_end = 4447 - _globals["_BM25"]._serialized_start = 4449 - _globals["_BM25"]._serialized_end = 4490 - _globals["_REFPROPERTIESREQUEST"]._serialized_start = 4493 - _globals["_REFPROPERTIESREQUEST"]._serialized_end = 4670 - _globals["_VECTORFORTARGET"]._serialized_start = 4672 - _globals["_VECTORFORTARGET"]._serialized_end = 4725 - _globals["_NEARVECTOR"]._serialized_start = 4728 - _globals["_NEARVECTOR"]._serialized_end = 5114 - _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_start = 5033 - _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_end = 5087 - _globals["_NEAROBJECT"]._serialized_start = 5117 - _globals["_NEAROBJECT"]._serialized_end = 5282 - _globals["_RERANK"]._serialized_start = 5284 - _globals["_RERANK"]._serialized_end = 5340 - _globals["_SEARCHREPLY"]._serialized_start = 5343 - _globals["_SEARCHREPLY"]._serialized_end = 5645 - _globals["_RERANKREPLY"]._serialized_start = 5647 - _globals["_RERANKREPLY"]._serialized_end = 5675 - _globals["_GROUPBYRESULT"]._serialized_start = 5678 - _globals["_GROUPBYRESULT"]._serialized_end = 6039 - _globals["_SEARCHRESULT"]._serialized_start = 6042 - _globals["_SEARCHRESULT"]._serialized_end = 6225 - _globals["_METADATARESULT"]._serialized_start = 6228 - _globals["_METADATARESULT"]._serialized_end = 6859 - _globals["_PROPERTIESRESULT"]._serialized_start = 6862 - _globals["_PROPERTIESRESULT"]._serialized_end = 7560 - _globals["_REFPROPERTIESRESULT"]._serialized_start = 7562 - _globals["_REFPROPERTIESRESULT"]._serialized_end = 7653 + _globals["_HYBRID"]._serialized_end = 3063 + _globals["_HYBRID_FUSIONTYPE"]._serialized_start = 2953 + _globals["_HYBRID_FUSIONTYPE"]._serialized_end = 3050 + _globals["_NEARTEXTSEARCH"]._serialized_start = 3066 + _globals["_NEARTEXTSEARCH"]._serialized_end = 3434 + _globals["_NEARTEXTSEARCH_MOVE"]._serialized_start = 3327 + _globals["_NEARTEXTSEARCH_MOVE"]._serialized_end = 3381 + _globals["_NEARIMAGESEARCH"]._serialized_start = 3437 + _globals["_NEARIMAGESEARCH"]._serialized_end = 3610 + _globals["_NEARAUDIOSEARCH"]._serialized_start = 3613 + _globals["_NEARAUDIOSEARCH"]._serialized_end = 3786 + _globals["_NEARVIDEOSEARCH"]._serialized_start = 3789 + _globals["_NEARVIDEOSEARCH"]._serialized_end = 3962 + _globals["_NEARDEPTHSEARCH"]._serialized_start = 3965 + _globals["_NEARDEPTHSEARCH"]._serialized_end = 4138 + _globals["_NEARTHERMALSEARCH"]._serialized_start = 4141 + _globals["_NEARTHERMALSEARCH"]._serialized_end = 4318 + _globals["_NEARIMUSEARCH"]._serialized_start = 4321 + _globals["_NEARIMUSEARCH"]._serialized_end = 4490 + _globals["_BM25"]._serialized_start = 4492 + _globals["_BM25"]._serialized_end = 4533 + _globals["_REFPROPERTIESREQUEST"]._serialized_start = 4536 + _globals["_REFPROPERTIESREQUEST"]._serialized_end = 4713 + _globals["_VECTORFORTARGET"]._serialized_start = 4715 + _globals["_VECTORFORTARGET"]._serialized_end = 4811 + _globals["_NEARVECTOR"]._serialized_start = 4814 + _globals["_NEARVECTOR"]._serialized_end = 5243 + _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_start = 5162 + _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_end = 5216 + _globals["_NEAROBJECT"]._serialized_start = 5246 + _globals["_NEAROBJECT"]._serialized_end = 5411 + _globals["_RERANK"]._serialized_start = 5413 + _globals["_RERANK"]._serialized_end = 5469 + _globals["_SEARCHREPLY"]._serialized_start = 5472 + _globals["_SEARCHREPLY"]._serialized_end = 5774 + _globals["_RERANKREPLY"]._serialized_start = 5776 + _globals["_RERANKREPLY"]._serialized_end = 5804 + _globals["_GROUPBYRESULT"]._serialized_start = 5807 + _globals["_GROUPBYRESULT"]._serialized_end = 6168 + _globals["_SEARCHRESULT"]._serialized_start = 6171 + _globals["_SEARCHRESULT"]._serialized_end = 6354 + _globals["_METADATARESULT"]._serialized_start = 6357 + _globals["_METADATARESULT"]._serialized_end = 6988 + _globals["_PROPERTIESRESULT"]._serialized_start = 6991 + _globals["_PROPERTIESRESULT"]._serialized_end = 7689 + _globals["_REFPROPERTIESRESULT"]._serialized_start = 7691 + _globals["_REFPROPERTIESRESULT"]._serialized_end = 7782 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/search_get_pb2.pyi b/weaviate/proto/v1/search_get_pb2.pyi index eaaa6f20b..b7bd7ff1f 100644 --- a/weaviate/proto/v1/search_get_pb2.pyi +++ b/weaviate/proto/v1/search_get_pb2.pyi @@ -307,6 +307,7 @@ class Hybrid(_message.Message): "near_vector", "targets", "vector_distance", + "vectors", ) class FusionType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): @@ -329,6 +330,7 @@ class Hybrid(_message.Message): NEAR_VECTOR_FIELD_NUMBER: _ClassVar[int] TARGETS_FIELD_NUMBER: _ClassVar[int] VECTOR_DISTANCE_FIELD_NUMBER: _ClassVar[int] + VECTORS_FIELD_NUMBER: _ClassVar[int] query: str properties: _containers.RepeatedScalarFieldContainer[str] vector: _containers.RepeatedScalarFieldContainer[float] @@ -340,6 +342,7 @@ class Hybrid(_message.Message): near_vector: NearVector targets: Targets vector_distance: float + vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] def __init__( self, query: _Optional[str] = ..., @@ -353,6 +356,7 @@ class Hybrid(_message.Message): near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., vector_distance: _Optional[float] = ..., + vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ..., ) -> None: ... class NearTextSearch(_message.Message): @@ -561,13 +565,18 @@ class RefPropertiesRequest(_message.Message): ) -> None: ... class VectorForTarget(_message.Message): - __slots__ = ("name", "vector_bytes") + __slots__ = ("name", "vector_bytes", "vectors") NAME_FIELD_NUMBER: _ClassVar[int] VECTOR_BYTES_FIELD_NUMBER: _ClassVar[int] + VECTORS_FIELD_NUMBER: _ClassVar[int] name: str vector_bytes: bytes + vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] def __init__( - self, name: _Optional[str] = ..., vector_bytes: _Optional[bytes] = ... + self, + name: _Optional[str] = ..., + vector_bytes: _Optional[bytes] = ..., + vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ..., ) -> None: ... class NearVector(_message.Message): @@ -580,6 +589,7 @@ class NearVector(_message.Message): "targets", "vector_per_target", "vector_for_targets", + "vectors", ) class VectorPerTargetEntry(_message.Message): @@ -598,6 +608,7 @@ class NearVector(_message.Message): TARGETS_FIELD_NUMBER: _ClassVar[int] VECTOR_PER_TARGET_FIELD_NUMBER: _ClassVar[int] VECTOR_FOR_TARGETS_FIELD_NUMBER: _ClassVar[int] + VECTORS_FIELD_NUMBER: _ClassVar[int] vector: _containers.RepeatedScalarFieldContainer[float] certainty: float distance: float @@ -606,6 +617,7 @@ class NearVector(_message.Message): targets: Targets vector_per_target: _containers.ScalarMap[str, bytes] vector_for_targets: _containers.RepeatedCompositeFieldContainer[VectorForTarget] + vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] def __init__( self, vector: _Optional[_Iterable[float]] = ..., @@ -616,6 +628,7 @@ class NearVector(_message.Message): targets: _Optional[_Union[Targets, _Mapping]] = ..., vector_per_target: _Optional[_Mapping[str, bytes]] = ..., vector_for_targets: _Optional[_Iterable[_Union[VectorForTarget, _Mapping]]] = ..., + vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ..., ) -> None: ... class NearObject(_message.Message): diff --git a/weaviate/types.py b/weaviate/types.py index eba8c2664..c389f7f2b 100644 --- a/weaviate/types.py +++ b/weaviate/types.py @@ -1,13 +1,13 @@ import datetime import uuid as uuid_package -from typing import Dict, Union, List, Sequence, Tuple +from typing import Dict, Union, Mapping, List, Sequence, Tuple DATE = datetime.datetime UUID = Union[str, uuid_package.UUID] UUIDS = Union[Sequence[UUID], UUID] NUMBER = Union[int, float] GEO_COORDINATES = Tuple[float, float] -VECTORS = Union[Dict[str, List[float]], List[float]] +VECTORS = Union[Mapping[str, Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]]], Sequence[NUMBER]] INCLUDE_VECTOR = Union[bool, str, List[str]] BEACON = "weaviate://localhost/" diff --git a/weaviate/util.py b/weaviate/util.py index bbf9743c9..5e63985d4 100644 --- a/weaviate/util.py +++ b/weaviate/util.py @@ -9,7 +9,7 @@ import re import uuid as uuid_lib from pathlib import Path -from typing import Union, Sequence, Any, Optional, List, Dict, Generator, Tuple, cast +from typing import Union, Sequence, Any, Optional, List, Dict, Generator, Tuple, TypeGuard, cast import httpx import validators @@ -883,12 +883,23 @@ def __is_list_type(inputs: Any) -> bool: ) -def _is_1d_vector(inputs: Any) -> bool: +def _is_1d_vector(inputs: Any) -> TypeGuard[Sequence[float]]: try: if len(inputs) == 0: return False except TypeError: return False if __is_list_type(inputs): - return not __is_list_type(inputs[0]) # 2D vectors are not 1D vectors + return not __is_list_type(inputs[0]) + return False + + +def _is_2d_vector(inputs: Any) -> TypeGuard[List[List[float]]]: + try: + if len(inputs) == 0: + return False + except TypeError: + return False + if __is_list_type(inputs): + return __is_list_type(inputs[0]) return False From 6f1d4ad90f7620d39a4a643733dbb6cf37230a0c Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 23 Jan 2025 11:08:38 +0000 Subject: [PATCH 02/48] Rearrange packing logic for better readability and generality --- test/collection/test_byteops.py | 2 +- .../collections/batch/grpc_batch_objects.py | 8 ++++-- weaviate/collections/grpc/query.py | 28 ++++++++++++++++--- weaviate/collections/grpc/shared.py | 11 -------- weaviate/util.py | 3 +- 5 files changed, 33 insertions(+), 19 deletions(-) diff --git a/test/collection/test_byteops.py b/test/collection/test_byteops.py index 508845c3a..f847e179c 100644 --- a/test/collection/test_byteops.py +++ b/test/collection/test_byteops.py @@ -1,4 +1,4 @@ -from weaviate.collections.queries.byteops import _ByteOps +from weaviate.collections.grpc.shared import _ByteOps def test_decode_float32s(): diff --git a/weaviate/collections/batch/grpc_batch_objects.py b/weaviate/collections/batch/grpc_batch_objects.py index 8f120f4ac..fa4be24c8 100644 --- a/weaviate/collections/batch/grpc_batch_objects.py +++ b/weaviate/collections/batch/grpc_batch_objects.py @@ -49,8 +49,12 @@ def __multi_vec(self, vectors: Optional[VECTORS]) -> Optional[List[base_pb2.Vect if vectors is None or _is_1d_vector(vectors): return None # pylance fails to type narrow TypeGuard in _is_1d_vector properly - v = cast(Mapping[str, Sequence[float] | Sequence[Sequence[float]]], vectors) - return _Pack.vectors(v) + vectors = cast(Mapping[str, Sequence[float] | Sequence[Sequence[float]]], vectors) + return [ + base_pb2.Vectors(name=name, vector_bytes=packing.bytes_, type=packing.type_) + for name, vec_or_vecs in vectors.items() + if (packing := _Pack.parse_single_or_multi_vec(vec_or_vecs)) + ] def __grpc_objects(self, objects: List[_BatchObject]) -> List[batch_pb2.BatchObject]: return [ diff --git a/weaviate/collections/grpc/query.py b/weaviate/collections/grpc/query.py index 3ed3c323a..6a2f18f63 100644 --- a/weaviate/collections/grpc/query.py +++ b/weaviate/collections/grpc/query.py @@ -59,7 +59,7 @@ WeaviateInvalidInputError, WeaviateRetryError, ) -from weaviate.proto.v1 import search_get_pb2 +from weaviate.proto.v1 import base_pb2, search_get_pb2 from weaviate.types import NUMBER, UUID from weaviate.util import _get_vector_v4, _is_1d_vector from weaviate.validator import _ValidateArgument, _validate_input, _ExtraTypes @@ -1034,7 +1034,14 @@ def add_vector(val: Sequence[float], target_name: str) -> None: else: vector_for_target.append( search_get_pb2.VectorForTarget( - name=target_name, vectors=_Pack.vectors({target_name: vec}) + name=target_name, + vectors=[ + base_pb2.Vectors( + name=target_name, + vector_bytes=_Pack.single(vec), + type=base_pb2.VECTOR_TYPE_SINGLE_FP32, + ) + ], ) ) @@ -1056,14 +1063,27 @@ def add_vector(val: Sequence[float], target_name: str) -> None: vector_for_target.append( search_get_pb2.VectorForTarget( name=key, - vectors=_Pack.vectors({key: value.tensor}), + vectors=[ + base_pb2.Vectors( + name=key, + vector_bytes=_Pack.multi(value.tensor), + type=base_pb2.VECTOR_TYPE_MULTI_FP32, + ) + ], ) ) elif isinstance(value, _ManyVectorsQuery): vector_for_target.append( search_get_pb2.VectorForTarget( name=key, - vectors=_Pack.vectors({key: vector for vector in value.vectors}), + vectors=[ + base_pb2.Vectors( + name=key, + vector_bytes=_Pack.single(vec), + type=base_pb2.VECTOR_TYPE_SINGLE_FP32, + ) + for vec in value.vectors + ], ) ) target_vectors_tmp.append(key) diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index e95bcdc51..1f94f06da 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -1,5 +1,4 @@ import struct -from collections.abc import Mapping from dataclasses import dataclass from typing import List, Optional, Sequence, Union from typing_extensions import TypeGuard @@ -91,16 +90,6 @@ def parse_single_or_multi_vec( else: raise WeaviateInvalidInputError(f"Invalid vectors: {vector}") - @staticmethod - def vectors( - vectors: Mapping[str, Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]]] - ) -> List[base_pb2.Vectors]: - return [ - base_pb2.Vectors(name=name, vector_bytes=packing.bytes_, type=packing.type_) - for name, vec_or_vecs in vectors.items() - if (packing := _Pack.parse_single_or_multi_vec(vec_or_vecs)) - ] - @staticmethod def single(vector: Sequence[NUMBER]) -> bytes: vector_list = _get_vector_v4(vector) diff --git a/weaviate/util.py b/weaviate/util.py index 5e63985d4..59208693a 100644 --- a/weaviate/util.py +++ b/weaviate/util.py @@ -9,7 +9,8 @@ import re import uuid as uuid_lib from pathlib import Path -from typing import Union, Sequence, Any, Optional, List, Dict, Generator, Tuple, TypeGuard, cast +from typing import Union, Sequence, Any, Optional, List, Dict, Generator, Tuple, cast +from typing_extensions import TypeGuard import httpx import validators From 070889da3f6a76cbfbce2c2b9c99cdaea9e639ad Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 23 Jan 2025 11:12:10 +0000 Subject: [PATCH 03/48] Import `cast` from `typing_extensions` for `3.9` --- weaviate/collections/batch/grpc_batch_objects.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/weaviate/collections/batch/grpc_batch_objects.py b/weaviate/collections/batch/grpc_batch_objects.py index fa4be24c8..e4fc98dab 100644 --- a/weaviate/collections/batch/grpc_batch_objects.py +++ b/weaviate/collections/batch/grpc_batch_objects.py @@ -2,7 +2,8 @@ import struct import time import uuid as uuid_package -from typing import Any, Dict, List, Mapping, Optional, Sequence, Union, cast +from typing import Any, Dict, List, Mapping, Optional, Sequence, Union +from typing_extensions import cast from google.protobuf.struct_pb2 import Struct from grpc.aio import AioRpcError # type: ignore From 45a253996927b05e206ece9656167e34d5910267 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 23 Jan 2025 11:26:27 +0000 Subject: [PATCH 04/48] Fix `3.9` type issue --- weaviate/collections/batch/grpc_batch_objects.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/weaviate/collections/batch/grpc_batch_objects.py b/weaviate/collections/batch/grpc_batch_objects.py index e4fc98dab..cff298c64 100644 --- a/weaviate/collections/batch/grpc_batch_objects.py +++ b/weaviate/collections/batch/grpc_batch_objects.py @@ -2,8 +2,7 @@ import struct import time import uuid as uuid_package -from typing import Any, Dict, List, Mapping, Optional, Sequence, Union -from typing_extensions import cast +from typing import Any, Dict, List, Mapping, Optional, Sequence, Union, cast from google.protobuf.struct_pb2 import Struct from grpc.aio import AioRpcError # type: ignore @@ -50,7 +49,7 @@ def __multi_vec(self, vectors: Optional[VECTORS]) -> Optional[List[base_pb2.Vect if vectors is None or _is_1d_vector(vectors): return None # pylance fails to type narrow TypeGuard in _is_1d_vector properly - vectors = cast(Mapping[str, Sequence[float] | Sequence[Sequence[float]]], vectors) + vectors = cast(Mapping[str, Union[Sequence[float], Sequence[Sequence[float]]]], vectors) return [ base_pb2.Vectors(name=name, vector_bytes=packing.bytes_, type=packing.type_) for name, vec_or_vecs in vectors.items() From fac3c5e9586cf2b46c1b145eb62aa1537bb65330 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 23 Jan 2025 11:47:18 +0000 Subject: [PATCH 05/48] Fix usage of `_ManyVectorsQuery` for 1.27/1.28 --- weaviate/collections/grpc/query.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/weaviate/collections/grpc/query.py b/weaviate/collections/grpc/query.py index 6a2f18f63..549d1c9ba 100644 --- a/weaviate/collections/grpc/query.py +++ b/weaviate/collections/grpc/query.py @@ -1073,26 +1073,14 @@ def add_vector(val: Sequence[float], target_name: str) -> None: ) ) elif isinstance(value, _ManyVectorsQuery): - vector_for_target.append( - search_get_pb2.VectorForTarget( - name=key, - vectors=[ - base_pb2.Vectors( - name=key, - vector_bytes=_Pack.single(vec), - type=base_pb2.VECTOR_TYPE_SINGLE_FP32, - ) - for vec in value.vectors - ], - ) - ) - target_vectors_tmp.append(key) + for vec in value.vectors: + add_vector(vec, key) + target_vectors_tmp.append(key) else: vals = cast(Sequence[Sequence[NUMBER]], value) for inner_vector in vals: add_vector(inner_vector, key) target_vectors_tmp.append(key) - return vector_for_target, None, target_vectors_tmp else: if _is_1d_vector(vector): From 2d219f48e367f80e28c58c3fba70dce5b48d6001 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 23 Jan 2025 12:10:21 +0000 Subject: [PATCH 06/48] Fix flakey periodic token refresh failures --- weaviate/connect/v4.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/weaviate/connect/v4.py b/weaviate/connect/v4.py index b3bec7f8e..4ff482e2a 100644 --- a/weaviate/connect/v4.py +++ b/weaviate/connect/v4.py @@ -359,7 +359,9 @@ def periodic_refresh_token(refresh_time: int, _auth: Optional[_Auth]) -> None: ): # use refresh token when available try: - if "refresh_token" in cast(AsyncOAuth2Client, self._client).token: + if self._client is None: + pass + elif "refresh_token" in cast(AsyncOAuth2Client, self._client).token: assert isinstance(self._client, AsyncOAuth2Client) self._client.token = asyncio.run_coroutine_threadsafe( self._client.refresh_token( From c72106270874c1ea764df8de2c7f2cb0aecee11c Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 23 Jan 2025 12:36:15 +0000 Subject: [PATCH 07/48] Rename `colbert` to `multivector` in collection config, add `multi_vector` to `config.get()` return --- integration/test_named_vectors.py | 11 +++++++++- weaviate/collections/classes/config.py | 22 +++++++++++++++++-- .../collections/classes/config_methods.py | 9 ++++++++ .../classes/config_vector_index.py | 18 +++++++-------- 4 files changed, 48 insertions(+), 12 deletions(-) diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index 4a1bb3e73..0c1266d59 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -11,6 +11,7 @@ PQConfig, _VectorIndexConfigHNSW, _VectorIndexConfigFlat, + _MultiVectorConfig, Vectorizers, ReferenceProperty, ) @@ -827,12 +828,20 @@ def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: wvc.config.Configure.NamedVectors.none( name="colbert", vector_index_config=wvc.config.Configure.VectorIndex.hnsw( - multi_vector=wvc.config.Configure.VectorIndex.MultiVector.colbert() + multi_vector=wvc.config.Configure.VectorIndex.MultiVector.multivector() ), ), ], ) + config = collection.config.get() + assert config.vector_config is not None + assert isinstance(config.vector_config["colbert"].vector_index_config, _VectorIndexConfigHNSW) + assert isinstance( + config.vector_config["colbert"].vector_index_config.multi_vector, _MultiVectorConfig + ) + assert config.vector_config["colbert"].vector_index_config.multi_vector.aggregation == "maxSim" + collection.data.insert({}, vector={"colbert": [[1, 2], [4, 5]]}) assert len(collection) == 1 diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 62fef2a47..c68f3fa10 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -1508,8 +1508,17 @@ class _SQConfig(_ConfigBase): SQConfig = _SQConfig +@dataclass +class _MultiVectorConfig(_ConfigBase): + aggregation: str + + +MultiVector = _MultiVectorConfig + + @dataclass class _VectorIndexConfig(_ConfigBase): + multi_vector: Optional[_MultiVectorConfig] quantizer: Optional[Union[PQConfig, BQConfig, SQConfig]] def to_dict(self) -> Dict[str, Any]: @@ -1520,6 +1529,8 @@ def to_dict(self) -> Dict[str, Any]: out["bq"] = {**out.pop("quantizer"), "enabled": True} elif isinstance(self.quantizer, _SQConfig): out["sq"] = {**out.pop("quantizer"), "enabled": True} + if self.multi_vector is not None: + out["multivector"] = self.multi_vector.to_dict() return out @@ -1978,7 +1989,7 @@ def __add_props( class _VectorIndexMultiVector: @staticmethod - def colbert( + def multivector( aggregation: Union[Literal["maxSim"], str, None] = None, ) -> _MultiVectorConfigCreate: return _MultiVectorConfigCreate( @@ -2062,6 +2073,7 @@ def none() -> _VectorIndexConfigSkipCreate: return _VectorIndexConfigSkipCreate( distance=None, quantizer=None, + multivector=None, ) @staticmethod @@ -2120,6 +2132,7 @@ def flat( distance=distance_metric, vectorCacheMaxObjects=vector_cache_max_objects, quantizer=quantizer, + multivector=None, ) @staticmethod @@ -2137,7 +2150,12 @@ def dynamic( See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hnsw) for a more detailed view! """ # noqa: D417 (missing argument descriptions in the docstring) return _VectorIndexConfigDynamicCreate( - distance=distance_metric, threshold=threshold, hnsw=hnsw, flat=flat, quantizer=None + distance=distance_metric, + threshold=threshold, + hnsw=hnsw, + flat=flat, + quantizer=None, + multivector=None, ) diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index f0d39ebd4..5d21af419 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -7,6 +7,7 @@ _CollectionConfigSimple, _NamedVectorConfig, _NamedVectorizerConfig, + _MultiVectorConfig, _PQConfig, _VectorIndexConfigFlat, _VectorIndexConfigDynamic, @@ -168,6 +169,13 @@ def __get_hnsw_config(config: Dict[str, Any]) -> _VectorIndexConfigHNSW: quantizer=quantizer, skip=config["skip"], vector_cache_max_objects=config["vectorCacheMaxObjects"], + multi_vector=( + None + if config.get("multivector") is None + else _MultiVectorConfig( + aggregation=config["multivector"]["aggregation"], + ) + ), ) @@ -177,6 +185,7 @@ def __get_flat_config(config: Dict[str, Any]) -> _VectorIndexConfigFlat: distance_metric=VectorDistances(config["distance"]), quantizer=quantizer, vector_cache_max_objects=config["vectorCacheMaxObjects"], + multi_vector=None, ) diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index 78290af4d..3c36bbf32 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -38,8 +38,17 @@ class VectorIndexType(str, Enum): DYNAMIC = "dynamic" +class _MultiVectorConfigCreateBase(_ConfigCreateModel): + enabled: bool = Field(default=True) + + +class _MultiVectorConfigCreate(_MultiVectorConfigCreateBase): + aggregation: Optional[str] + + class _VectorIndexConfigCreate(_ConfigCreateModel): distance: Optional[VectorDistances] + multivector: Optional[_MultiVectorConfigCreate] quantizer: Optional[_QuantizerConfigCreate] = Field(exclude=True) @staticmethod @@ -72,14 +81,6 @@ def vector_index_type() -> VectorIndexType: return VectorIndexType.HNSW -class _MultiVectorConfigCreateBase(_ConfigCreateModel): - enabled: bool = Field(default=True) - - -class _MultiVectorConfigCreate(_MultiVectorConfigCreateBase): - aggregation: Optional[str] - - class _VectorIndexConfigHNSWCreate(_VectorIndexConfigCreate): cleanupIntervalSeconds: Optional[int] dynamicEfMin: Optional[int] @@ -91,7 +92,6 @@ class _VectorIndexConfigHNSWCreate(_VectorIndexConfigCreate): flatSearchCutoff: Optional[int] maxConnections: Optional[int] vectorCacheMaxObjects: Optional[int] - multivector: Optional[_MultiVectorConfigCreate] @staticmethod def vector_index_type() -> VectorIndexType: From 0bfdc45f6e9b0d3e3a016b1513af0b945b2986c2 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 23 Jan 2025 12:39:15 +0000 Subject: [PATCH 08/48] Fix broken mock test --- mock_tests/test_collection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mock_tests/test_collection.py b/mock_tests/test_collection.py index b885bea93..6091b8b44 100644 --- a/mock_tests/test_collection.py +++ b/mock_tests/test_collection.py @@ -170,6 +170,7 @@ def test_missing_multi_tenancy_config( quantizer=None, distance_metric=VectorDistances.COSINE, vector_cache_max_objects=10, + multi_vector=None, ) vic.distance = vic.distance_metric response_json = CollectionConfig( From 0f2a9d316a694c0e03288974d51abeb1b6e8d43c Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 23 Jan 2025 13:14:50 +0000 Subject: [PATCH 09/48] Rename `NearVectorQuery` to `NearVector` --- integration/test_named_vectors.py | 10 +++------- weaviate/classes/query.py | 4 ++-- weaviate/collections/classes/grpc.py | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index 0c1266d59..fe48897b2 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -695,7 +695,7 @@ def test_same_target_vector_multiple_input( ( { "first": [0, 1], - "second": wvc.query.NearVectorQuery.many_vectors([[1, 0, 0], [0, 0, 1]]), + "second": wvc.query.NearVector.many_vectors([[1, 0, 0], [0, 0, 1]]), }, ["first", "second"], ), @@ -846,7 +846,7 @@ def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: assert len(collection) == 1 objs = collection.query.near_vector( - {"colbert": wvc.query.NearVectorQuery.multi_vector([[1, 2], [3, 4]])}, + {"colbert": wvc.query.NearVector.multi_vector([[1, 2], [3, 4]])}, target_vector="colbert", ).objects assert len(objs) == 1 @@ -888,11 +888,7 @@ def test_colbert_vectors_jinaai(collection_factory: CollectionFactory) -> None: assert len(objs) == 1 objs = collection.query.near_vector( - { - "colbert": wvc.query.NearVectorQuery.multi_vector( - [[e + 0.01 for e in vec] for vec in vecs] - ) - }, + {"colbert": wvc.query.NearVector.multi_vector([[e + 0.01 for e in vec] for vec in vecs])}, target_vector="colbert", ).objects assert len(objs) == 1 diff --git a/weaviate/classes/query.py b/weaviate/classes/query.py index a7715ba36..00939ddce 100644 --- a/weaviate/classes/query.py +++ b/weaviate/classes/query.py @@ -10,7 +10,7 @@ NearMediaType, QueryNested, QueryReference, - NearVectorQuery, + NearVector, Rerank, Sort, ) @@ -28,7 +28,7 @@ "NearMediaType", "QueryNested", "QueryReference", - "NearVectorQuery", + "NearVector", "Rerank", "Sort", "TargetVectors", diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index 714c9f72e..6a8ddb847 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -246,7 +246,7 @@ class _ManyVectorsQuery(_WeaviateInput): ] -class NearVectorQuery: +class NearVector: """Factory class to use when defining near vector queries with multiple vectors in `near_vector()` and `hybrid()` methods.""" @staticmethod From 403b8e3ca476ba4785bfe63254f99d74c8d96308 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 23 Jan 2025 16:42:08 +0000 Subject: [PATCH 10/48] Respond to review --- .github/workflows/main.yaml | 3 ++- integration/test_named_vectors.py | 2 +- weaviate/collections/classes/config.py | 2 +- weaviate/collections/classes/grpc.py | 10 +--------- 4 files changed, 5 insertions(+), 12 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 9a200ade2..d698119b7 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -18,7 +18,7 @@ concurrency: cancel-in-progress: true env: - # WEAVIATE_124: 1.24.26 + WEAVIATE_124: 1.24.26 WEAVIATE_125: 1.25.29 WEAVIATE_126: 1.26.13 WEAVIATE_127: 1.27.9 @@ -279,6 +279,7 @@ jobs: fail-fast: false matrix: server: [ + $WEAVIATE_124, $WEAVIATE_125, $WEAVIATE_126, $WEAVIATE_127, diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index fe48897b2..a305f9ea8 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -828,7 +828,7 @@ def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: wvc.config.Configure.NamedVectors.none( name="colbert", vector_index_config=wvc.config.Configure.VectorIndex.hnsw( - multi_vector=wvc.config.Configure.VectorIndex.MultiVector.multivector() + multi_vector=wvc.config.Configure.VectorIndex.MultiVector.multi_vector() ), ), ], diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index c68f3fa10..2dd733afc 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -1989,7 +1989,7 @@ def __add_props( class _VectorIndexMultiVector: @staticmethod - def multivector( + def multi_vector( aggregation: Union[Literal["maxSim"], str, None] = None, ) -> _MultiVectorConfigCreate: return _MultiVectorConfigCreate( diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index 6a8ddb847..a3d249396 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -8,7 +8,7 @@ from weaviate.proto.v1 import search_get_pb2 from weaviate.str_enum import BaseEnum from weaviate.types import INCLUDE_VECTOR, UUID, NUMBER -from weaviate.util import _ServerVersion, _get_vector_v4, _is_1d_vector +from weaviate.util import _ServerVersion class HybridFusion(str, BaseEnum): @@ -447,14 +447,6 @@ def near_vector( Returns: A `_HybridNearVector` object to be used in the `vector` parameter of the `query.hybrid` and `generate.hybrid` search methods. """ - if isinstance(vector, dict): - for key, val in vector.items(): - if _is_1d_vector(val): - vector[key] = _get_vector_v4(val) - else: - vector[key] = [_get_vector_v4(v) for v in val] - else: - vector = _get_vector_v4(vector) return _HybridNearVector(vector=vector, distance=distance, certainty=certainty) From 66af603ccf1a86471a2d977c4e9989de81053c79 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 24 Jan 2025 15:58:28 +0000 Subject: [PATCH 11/48] Add initial support for gRPC aggregate method for use in __len__ --- .../collections/aggregations/aggregate.py | 8 +++ weaviate/collections/collection/async_.py | 13 +++- weaviate/collections/collection/sync.py | 16 ++++- weaviate/collections/grpc/aggregate.py | 59 +++++++++++++++++++ weaviate/proto/v1/aggregate_pb2.py | 41 +++++++++++++ weaviate/proto/v1/aggregate_pb2.pyi | 56 ++++++++++++++++++ weaviate/proto/v1/aggregate_pb2_grpc.py | 25 ++++++++ weaviate/proto/v1/weaviate_pb2.py | 7 ++- weaviate/proto/v1/weaviate_pb2.pyi | 1 + weaviate/proto/v1/weaviate_pb2_grpc.py | 48 +++++++++++++++ 10 files changed, 265 insertions(+), 9 deletions(-) create mode 100644 weaviate/collections/grpc/aggregate.py create mode 100644 weaviate/proto/v1/aggregate_pb2.py create mode 100644 weaviate/proto/v1/aggregate_pb2.pyi create mode 100644 weaviate/proto/v1/aggregate_pb2_grpc.py diff --git a/weaviate/collections/aggregations/aggregate.py b/weaviate/collections/aggregations/aggregate.py index 8076b96d7..bb3305d89 100644 --- a/weaviate/collections/aggregations/aggregate.py +++ b/weaviate/collections/aggregations/aggregate.py @@ -34,6 +34,7 @@ from weaviate.collections.classes.filters import _Filters from weaviate.collections.classes.grpc import Move from weaviate.collections.filters import _FilterToREST +from weaviate.collections.grpc.aggregate import _AggregateGRPC from weaviate.connect import ConnectionV4 from weaviate.exceptions import WeaviateInvalidInputError, WeaviateQueryError from weaviate.gql.aggregate import AggregateBuilder @@ -57,6 +58,13 @@ def __init__( self.__name = name self._tenant = tenant self._consistency_level = consistency_level + self._grpc = _AggregateGRPC( + connection=connection, + name=name, + tenant=tenant, + consistency_level=consistency_level, + validate_arguments=False, + ) def _query(self) -> AggregateBuilder: return AggregateBuilder( diff --git a/weaviate/collections/collection/async_.py b/weaviate/collections/collection/async_.py index 09ebabf6b..a920b6e46 100644 --- a/weaviate/collections/collection/async_.py +++ b/weaviate/collections/collection/async_.py @@ -18,6 +18,7 @@ from weaviate.collections.classes.types import Properties, TProperties from weaviate.collections.data import _DataCollectionAsync from weaviate.collections.generate import _GenerateCollectionAsync +from weaviate.collections.grpc.aggregate import _AggregateGRPC from weaviate.collections.iterator import _IteratorInputs, _ObjectAIterator from weaviate.collections.tenants import _TenantsAsync from weaviate.connect import ConnectionV4 @@ -74,6 +75,9 @@ def __init__( references, ) + self.__aggregate_grpc = _AggregateGRPC( + connection, name, tenant, consistency_level, validate_arguments + ) self.__cluster = _ClusterAsync(connection) self.aggregate = _AggregateCollectionAsync(connection, name, consistency_level, tenant) @@ -103,9 +107,12 @@ def __init__( async def length(self) -> int: """Get the total number of objects in the collection.""" - total = (await self.aggregate.over_all(total_count=True)).total_count - assert total is not None - return total + if self._connection._weaviate_version.is_lower_than(1, 28, 4): + total = (await self.aggregate.over_all(total_count=True)).total_count + assert total is not None + return total + else: + return await self.__aggregate_grpc.meta_count() async def to_string(self) -> str: """Return a string representation of the collection object.""" diff --git a/weaviate/collections/collection/sync.py b/weaviate/collections/collection/sync.py index 77c7028ee..1ac6a295e 100644 --- a/weaviate/collections/collection/sync.py +++ b/weaviate/collections/collection/sync.py @@ -20,10 +20,12 @@ from weaviate.collections.config import _ConfigCollection from weaviate.collections.data import _DataCollection from weaviate.collections.generate import _GenerateCollection +from weaviate.collections.grpc.aggregate import _AggregateGRPC from weaviate.collections.iterator import _IteratorInputs, _ObjectIterator from weaviate.collections.query import _QueryCollection from weaviate.collections.tenants import _Tenants from weaviate.connect import ConnectionV4 +from weaviate.event_loop import _EventLoopSingleton from weaviate.types import UUID from .base import _CollectionBase @@ -77,6 +79,9 @@ def __init__( references, ) + self.__aggregate_grpc = _AggregateGRPC( + connection, name, tenant, consistency_level, validate_arguments + ) self.__cluster = _Cluster(connection) config = _ConfigCollection( @@ -140,9 +145,14 @@ def __init__( """This namespace includes all the CRUD methods available to you when modifying the tenants of a multi-tenancy-enabled collection in Weaviate.""" def __len__(self) -> int: - total = self.aggregate.over_all(total_count=True).total_count - assert total is not None - return total + if self._connection._weaviate_version.is_lower_than(1, 28, 4): + total = self.aggregate.over_all(total_count=True).total_count + assert total is not None + return total + else: + return _EventLoopSingleton.get_instance().run_until_complete( + self.__aggregate_grpc.meta_count + ) def __str__(self) -> str: config = self.config.get() diff --git a/weaviate/collections/grpc/aggregate.py b/weaviate/collections/grpc/aggregate.py new file mode 100644 index 000000000..a60e1a79e --- /dev/null +++ b/weaviate/collections/grpc/aggregate.py @@ -0,0 +1,59 @@ +from typing import Optional, cast + +from grpc.aio import AioRpcError # type: ignore + +from weaviate.collections.classes.config import ConsistencyLevel +from weaviate.collections.grpc.retry import _Retry +from weaviate.collections.grpc.shared import _BaseGRPC, PERMISSION_DENIED +from weaviate.connect.v4 import ConnectionV4 +from weaviate.exceptions import ( + InsufficientPermissionsError, + WeaviateQueryError, + WeaviateRetryError, +) +from weaviate.proto.v1 import aggregate_pb2 + + +class _AggregateGRPC(_BaseGRPC): + def __init__( + self, + connection: ConnectionV4, + name: str, + tenant: Optional[str], + consistency_level: Optional[ConsistencyLevel], + validate_arguments: bool, + ): + super().__init__(connection, consistency_level) + self._name: str = name + self._tenant = tenant + self._validate_arguments = validate_arguments + + async def meta_count(self) -> int: + res = await self.__call(self.__create_request(meta_count=True)) + return res.result.groups[0].count + + def __create_request(self, *, meta_count: bool = False) -> aggregate_pb2.AggregateRequest: + return aggregate_pb2.AggregateRequest( + collection=self._name, + meta_count=meta_count, + tenant=self._tenant, + ) + + async def __call(self, request: aggregate_pb2.AggregateRequest) -> aggregate_pb2.AggregateReply: + try: + assert self._connection.grpc_stub is not None + res = await _Retry(4).with_exponential_backoff( + 0, + f"Searching in collection {request.collection}", + self._connection.grpc_stub.Aggregate, + request, + metadata=self._connection.grpc_headers(), + timeout=self._connection.timeout_config.query, + ) + return cast(aggregate_pb2.AggregateReply, res) + except AioRpcError as e: + if e.code().name == PERMISSION_DENIED: + raise InsufficientPermissionsError(e) + raise WeaviateQueryError(str(e), "GRPC search") # pyright: ignore + except WeaviateRetryError as e: + raise WeaviateQueryError(str(e), "GRPC search") # pyright: ignore diff --git a/weaviate/proto/v1/aggregate_pb2.py b/weaviate/proto/v1/aggregate_pb2.py new file mode 100644 index 000000000..2b56b9844 --- /dev/null +++ b/weaviate/proto/v1/aggregate_pb2.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE +# source: v1/aggregate.proto +# Protobuf Python Version: 5.27.2 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder + +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, 5, 27, 2, "", "v1/aggregate.proto" +) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x12v1/aggregate.proto\x12\x0bweaviate.v1"v\n\x10\x41ggregateRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x12\n\nmeta_count\x18\x14 \x01(\x08\x12\x19\n\x0cobject_limit\x18\x1e \x01(\rH\x00\x88\x01\x01\x12\x0e\n\x06tenant\x18\n \x01(\tB\x0f\n\r_object_limit"L\n\x0e\x41ggregateReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12,\n\x06result\x18\x02 \x01(\x0b\x32\x1c.weaviate.v1.AggregateResult"D\n\x0f\x41ggregateResult\x12\x31\n\x06groups\x18\x01 \x03(\x0b\x32!.weaviate.v1.AggregateGroupResult"%\n\x14\x41ggregateGroupResult\x12\r\n\x05\x63ount\x18\x01 \x01(\x03\x42s\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoAggregateZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' +) + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "v1.aggregate_pb2", _globals) +if not _descriptor._USE_C_DESCRIPTORS: + _globals["DESCRIPTOR"]._loaded_options = None + _globals["DESCRIPTOR"]._serialized_options = ( + b"\n#io.weaviate.client.grpc.protocol.v1B\026WeaviateProtoAggregateZ4github.com/weaviate/weaviate/grpc/generated;protocol" + ) + _globals["_AGGREGATEREQUEST"]._serialized_start = 35 + _globals["_AGGREGATEREQUEST"]._serialized_end = 153 + _globals["_AGGREGATEREPLY"]._serialized_start = 155 + _globals["_AGGREGATEREPLY"]._serialized_end = 231 + _globals["_AGGREGATERESULT"]._serialized_start = 233 + _globals["_AGGREGATERESULT"]._serialized_end = 301 + _globals["_AGGREGATEGROUPRESULT"]._serialized_start = 303 + _globals["_AGGREGATEGROUPRESULT"]._serialized_end = 340 +# @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/aggregate_pb2.pyi b/weaviate/proto/v1/aggregate_pb2.pyi new file mode 100644 index 000000000..ede2d6bc6 --- /dev/null +++ b/weaviate/proto/v1/aggregate_pb2.pyi @@ -0,0 +1,56 @@ +from google.protobuf.internal import containers as _containers +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from typing import ( + ClassVar as _ClassVar, + Iterable as _Iterable, + Mapping as _Mapping, + Optional as _Optional, + Union as _Union, +) + +DESCRIPTOR: _descriptor.FileDescriptor + +class AggregateRequest(_message.Message): + __slots__ = ("collection", "meta_count", "object_limit", "tenant") + COLLECTION_FIELD_NUMBER: _ClassVar[int] + META_COUNT_FIELD_NUMBER: _ClassVar[int] + OBJECT_LIMIT_FIELD_NUMBER: _ClassVar[int] + TENANT_FIELD_NUMBER: _ClassVar[int] + collection: str + meta_count: bool + object_limit: int + tenant: str + def __init__( + self, + collection: _Optional[str] = ..., + meta_count: bool = ..., + object_limit: _Optional[int] = ..., + tenant: _Optional[str] = ..., + ) -> None: ... + +class AggregateReply(_message.Message): + __slots__ = ("took", "result") + TOOK_FIELD_NUMBER: _ClassVar[int] + RESULT_FIELD_NUMBER: _ClassVar[int] + took: float + result: AggregateResult + def __init__( + self, + took: _Optional[float] = ..., + result: _Optional[_Union[AggregateResult, _Mapping]] = ..., + ) -> None: ... + +class AggregateResult(_message.Message): + __slots__ = ("groups",) + GROUPS_FIELD_NUMBER: _ClassVar[int] + groups: _containers.RepeatedCompositeFieldContainer[AggregateGroupResult] + def __init__( + self, groups: _Optional[_Iterable[_Union[AggregateGroupResult, _Mapping]]] = ... + ) -> None: ... + +class AggregateGroupResult(_message.Message): + __slots__ = ("count",) + COUNT_FIELD_NUMBER: _ClassVar[int] + count: int + def __init__(self, count: _Optional[int] = ...) -> None: ... diff --git a/weaviate/proto/v1/aggregate_pb2_grpc.py b/weaviate/proto/v1/aggregate_pb2_grpc.py new file mode 100644 index 000000000..bd9419279 --- /dev/null +++ b/weaviate/proto/v1/aggregate_pb2_grpc.py @@ -0,0 +1,25 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc +import warnings + + +GRPC_GENERATED_VERSION = "1.66.2" +GRPC_VERSION = grpc.__version__ +_version_not_supported = False + +try: + from grpc._utilities import first_version_is_lower + + _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) +except ImportError: + _version_not_supported = True + +if _version_not_supported: + raise RuntimeError( + f"The grpc package installed is at version {GRPC_VERSION}," + + f" but the generated code in v1/aggregate_pb2_grpc.py depends on" + + f" grpcio>={GRPC_GENERATED_VERSION}." + + f" Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}" + + f" or downgrade your generated code using grpcio-tools<={GRPC_VERSION}." + ) diff --git a/weaviate/proto/v1/weaviate_pb2.py b/weaviate/proto/v1/weaviate_pb2.py index 5c102ea2f..852494a3e 100644 --- a/weaviate/proto/v1/weaviate_pb2.py +++ b/weaviate/proto/v1/weaviate_pb2.py @@ -18,6 +18,7 @@ _sym_db = _symbol_database.Default() +from weaviate.proto.v1 import aggregate_pb2 as v1_dot_aggregate__pb2 from weaviate.proto.v1 import batch_pb2 as v1_dot_batch__pb2 from weaviate.proto.v1 import batch_delete_pb2 as v1_dot_batch__delete__pb2 from weaviate.proto.v1 import search_get_pb2 as v1_dot_search__get__pb2 @@ -25,7 +26,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x11v1/weaviate.proto\x12\x0bweaviate.v1\x1a\x0ev1/batch.proto\x1a\x15v1/batch_delete.proto\x1a\x13v1/search_get.proto\x1a\x10v1/tenants.proto2\xbf\x02\n\x08Weaviate\x12@\n\x06Search\x12\x1a.weaviate.v1.SearchRequest\x1a\x18.weaviate.v1.SearchReply"\x00\x12R\n\x0c\x42\x61tchObjects\x12 .weaviate.v1.BatchObjectsRequest\x1a\x1e.weaviate.v1.BatchObjectsReply"\x00\x12O\n\x0b\x42\x61tchDelete\x12\x1f.weaviate.v1.BatchDeleteRequest\x1a\x1d.weaviate.v1.BatchDeleteReply"\x00\x12L\n\nTenantsGet\x12\x1e.weaviate.v1.TenantsGetRequest\x1a\x1c.weaviate.v1.TenantsGetReply"\x00\x42j\n#io.weaviate.client.grpc.protocol.v1B\rWeaviateProtoZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' + b'\n\x11v1/weaviate.proto\x12\x0bweaviate.v1\x1a\x12v1/aggregate.proto\x1a\x0ev1/batch.proto\x1a\x15v1/batch_delete.proto\x1a\x13v1/search_get.proto\x1a\x10v1/tenants.proto2\x8a\x03\n\x08Weaviate\x12@\n\x06Search\x12\x1a.weaviate.v1.SearchRequest\x1a\x18.weaviate.v1.SearchReply"\x00\x12R\n\x0c\x42\x61tchObjects\x12 .weaviate.v1.BatchObjectsRequest\x1a\x1e.weaviate.v1.BatchObjectsReply"\x00\x12O\n\x0b\x42\x61tchDelete\x12\x1f.weaviate.v1.BatchDeleteRequest\x1a\x1d.weaviate.v1.BatchDeleteReply"\x00\x12L\n\nTenantsGet\x12\x1e.weaviate.v1.TenantsGetRequest\x1a\x1c.weaviate.v1.TenantsGetReply"\x00\x12I\n\tAggregate\x12\x1d.weaviate.v1.AggregateRequest\x1a\x1b.weaviate.v1.AggregateReply"\x00\x42j\n#io.weaviate.client.grpc.protocol.v1B\rWeaviateProtoZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' ) _globals = globals() @@ -36,6 +37,6 @@ _globals["DESCRIPTOR"]._serialized_options = ( b"\n#io.weaviate.client.grpc.protocol.v1B\rWeaviateProtoZ4github.com/weaviate/weaviate/grpc/generated;protocol" ) - _globals["_WEAVIATE"]._serialized_start = 113 - _globals["_WEAVIATE"]._serialized_end = 432 + _globals["_WEAVIATE"]._serialized_start = 133 + _globals["_WEAVIATE"]._serialized_end = 527 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/weaviate_pb2.pyi b/weaviate/proto/v1/weaviate_pb2.pyi index f91159542..34b623594 100644 --- a/weaviate/proto/v1/weaviate_pb2.pyi +++ b/weaviate/proto/v1/weaviate_pb2.pyi @@ -1,3 +1,4 @@ +from weaviate.proto.v1 import aggregate_pb2 as _aggregate_pb2 from weaviate.proto.v1 import batch_pb2 as _batch_pb2 from weaviate.proto.v1 import batch_delete_pb2 as _batch_delete_pb2 from weaviate.proto.v1 import search_get_pb2 as _search_get_pb2 diff --git a/weaviate/proto/v1/weaviate_pb2_grpc.py b/weaviate/proto/v1/weaviate_pb2_grpc.py index a25b0fac9..a7f88b64c 100644 --- a/weaviate/proto/v1/weaviate_pb2_grpc.py +++ b/weaviate/proto/v1/weaviate_pb2_grpc.py @@ -3,6 +3,7 @@ import grpc import warnings +from weaviate.proto.v1 import aggregate_pb2 as v1_dot_aggregate__pb2 from weaviate.proto.v1 import batch_delete_pb2 as v1_dot_batch__delete__pb2 from weaviate.proto.v1 import batch_pb2 as v1_dot_batch__pb2 from weaviate.proto.v1 import search_get_pb2 as v1_dot_search__get__pb2 @@ -62,6 +63,12 @@ def __init__(self, channel): response_deserializer=v1_dot_tenants__pb2.TenantsGetReply.FromString, _registered_method=True, ) + self.Aggregate = channel.unary_unary( + "/weaviate.v1.Weaviate/Aggregate", + request_serializer=v1_dot_aggregate__pb2.AggregateRequest.SerializeToString, + response_deserializer=v1_dot_aggregate__pb2.AggregateReply.FromString, + _registered_method=True, + ) class WeaviateServicer(object): @@ -91,6 +98,12 @@ def TenantsGet(self, request, context): context.set_details("Method not implemented!") raise NotImplementedError("Method not implemented!") + def Aggregate(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + def add_WeaviateServicer_to_server(servicer, server): rpc_method_handlers = { @@ -114,6 +127,11 @@ def add_WeaviateServicer_to_server(servicer, server): request_deserializer=v1_dot_tenants__pb2.TenantsGetRequest.FromString, response_serializer=v1_dot_tenants__pb2.TenantsGetReply.SerializeToString, ), + "Aggregate": grpc.unary_unary_rpc_method_handler( + servicer.Aggregate, + request_deserializer=v1_dot_aggregate__pb2.AggregateRequest.FromString, + response_serializer=v1_dot_aggregate__pb2.AggregateReply.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( "weaviate.v1.Weaviate", rpc_method_handlers @@ -245,3 +263,33 @@ def TenantsGet( metadata, _registered_method=True, ) + + @staticmethod + def Aggregate( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, + target, + "/weaviate.v1.Weaviate/Aggregate", + v1_dot_aggregate__pb2.AggregateRequest.SerializeToString, + v1_dot_aggregate__pb2.AggregateReply.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True, + ) From 59d860915439fc2747c09c114bf6e24c121987a8 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 27 Jan 2025 11:21:49 +0000 Subject: [PATCH 12/48] Add test of hybrid with colbert named vector --- integration/test_named_vectors.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index a305f9ea8..39c8e0aa7 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -851,6 +851,13 @@ def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: ).objects assert len(objs) == 1 + objs = collection.query.hybrid( + None, + vector={"colbert": wvc.query.NearVector.multi_vector([[1, 2], [3, 4]])}, + target_vector="colbert", + ).objects + assert len(objs) == 1 + def test_colbert_vectors_jinaai(collection_factory: CollectionFactory) -> None: api_key = os.environ.get("JINAAI_APIKEY") From c632284f33cbb766f33ea09a1c2e524530cb492a Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 27 Jan 2025 11:22:21 +0000 Subject: [PATCH 13/48] Update to use latest changes on `main` of core --- .github/workflows/main.yaml | 2 +- weaviate/collections/grpc/query.py | 4 +- weaviate/collections/grpc/shared.py | 10 +++-- weaviate/collections/queries/base.py | 4 +- weaviate/proto/v1/aggregate_pb2.py | 41 ++++++++++++++++++ weaviate/proto/v1/aggregate_pb2.pyi | 56 +++++++++++++++++++++++++ weaviate/proto/v1/aggregate_pb2_grpc.py | 25 +++++++++++ weaviate/proto/v1/base_pb2.py | 14 +++---- weaviate/proto/v1/base_pb2.pyi | 23 +++++----- weaviate/proto/v1/weaviate_pb2.py | 7 ++-- weaviate/proto/v1/weaviate_pb2.pyi | 1 + weaviate/proto/v1/weaviate_pb2_grpc.py | 48 +++++++++++++++++++++ 12 files changed, 206 insertions(+), 29 deletions(-) create mode 100644 weaviate/proto/v1/aggregate_pb2.py create mode 100644 weaviate/proto/v1/aggregate_pb2.pyi create mode 100644 weaviate/proto/v1/aggregate_pb2_grpc.py diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index d698119b7..15f8e4c95 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -23,7 +23,7 @@ env: WEAVIATE_126: 1.26.13 WEAVIATE_127: 1.27.9 WEAVIATE_128: 1.28.3 - WEAVIATE_129: 1.29.0-dev-f985564 + WEAVIATE_129: main-f4ae2e9 jobs: lint-and-format: diff --git a/weaviate/collections/grpc/query.py b/weaviate/collections/grpc/query.py index 549d1c9ba..ae8dea591 100644 --- a/weaviate/collections/grpc/query.py +++ b/weaviate/collections/grpc/query.py @@ -1039,7 +1039,7 @@ def add_vector(val: Sequence[float], target_name: str) -> None: base_pb2.Vectors( name=target_name, vector_bytes=_Pack.single(vec), - type=base_pb2.VECTOR_TYPE_SINGLE_FP32, + type=base_pb2.Vectors.VECTOR_TYPE_SINGLE_FP32, ) ], ) @@ -1067,7 +1067,7 @@ def add_vector(val: Sequence[float], target_name: str) -> None: base_pb2.Vectors( name=key, vector_bytes=_Pack.multi(value.tensor), - type=base_pb2.VECTOR_TYPE_MULTI_FP32, + type=base_pb2.Vectors.VECTOR_TYPE_MULTI_FP32, ) ], ) diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index 1f94f06da..712cfd244 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -63,7 +63,7 @@ def decode_int64s(byte_vector: bytes) -> List[int]: @dataclass class _Packing: bytes_: bytes - type_: base_pb2.VectorType + type_: base_pb2.Vectors.VectorType class _Pack: @@ -84,9 +84,13 @@ def parse_single_or_multi_vec( vector: Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]] ) -> _Packing: if _Pack.is_multi(vector): - return _Packing(bytes_=_Pack.multi(vector), type_=base_pb2.VECTOR_TYPE_MULTI_FP32) + return _Packing( + bytes_=_Pack.multi(vector), type_=base_pb2.Vectors.VECTOR_TYPE_MULTI_FP32 + ) elif _Pack.is_single(vector): - return _Packing(bytes_=_Pack.single(vector), type_=base_pb2.VECTOR_TYPE_SINGLE_FP32) + return _Packing( + bytes_=_Pack.single(vector), type_=base_pb2.Vectors.VECTOR_TYPE_SINGLE_FP32 + ) else: raise WeaviateInvalidInputError(f"Invalid vectors: {vector}") diff --git a/weaviate/collections/queries/base.py b/weaviate/collections/queries/base.py index 84f855c38..f0f634d89 100644 --- a/weaviate/collections/queries/base.py +++ b/weaviate/collections/queries/base.py @@ -159,9 +159,9 @@ def __extract_vector_for_object( vecs: Dict[str, Union[List[float], List[List[float]]]] = {} for vec in add_props.vectors: - if vec.type == base_pb2.VECTOR_TYPE_SINGLE_FP32: + if vec.type == base_pb2.Vectors.VECTOR_TYPE_SINGLE_FP32: vecs[vec.name] = _Unpack.single(vec.vector_bytes) - elif vec.type == base_pb2.VECTOR_TYPE_MULTI_FP32: + elif vec.type == base_pb2.Vectors.VECTOR_TYPE_MULTI_FP32: vecs[vec.name] = _Unpack.multi(vec.vector_bytes) else: vecs[vec.name] = _Unpack.single(vec.vector_bytes) diff --git a/weaviate/proto/v1/aggregate_pb2.py b/weaviate/proto/v1/aggregate_pb2.py new file mode 100644 index 000000000..2b56b9844 --- /dev/null +++ b/weaviate/proto/v1/aggregate_pb2.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE +# source: v1/aggregate.proto +# Protobuf Python Version: 5.27.2 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder + +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, 5, 27, 2, "", "v1/aggregate.proto" +) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x12v1/aggregate.proto\x12\x0bweaviate.v1"v\n\x10\x41ggregateRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x12\n\nmeta_count\x18\x14 \x01(\x08\x12\x19\n\x0cobject_limit\x18\x1e \x01(\rH\x00\x88\x01\x01\x12\x0e\n\x06tenant\x18\n \x01(\tB\x0f\n\r_object_limit"L\n\x0e\x41ggregateReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12,\n\x06result\x18\x02 \x01(\x0b\x32\x1c.weaviate.v1.AggregateResult"D\n\x0f\x41ggregateResult\x12\x31\n\x06groups\x18\x01 \x03(\x0b\x32!.weaviate.v1.AggregateGroupResult"%\n\x14\x41ggregateGroupResult\x12\r\n\x05\x63ount\x18\x01 \x01(\x03\x42s\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoAggregateZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' +) + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "v1.aggregate_pb2", _globals) +if not _descriptor._USE_C_DESCRIPTORS: + _globals["DESCRIPTOR"]._loaded_options = None + _globals["DESCRIPTOR"]._serialized_options = ( + b"\n#io.weaviate.client.grpc.protocol.v1B\026WeaviateProtoAggregateZ4github.com/weaviate/weaviate/grpc/generated;protocol" + ) + _globals["_AGGREGATEREQUEST"]._serialized_start = 35 + _globals["_AGGREGATEREQUEST"]._serialized_end = 153 + _globals["_AGGREGATEREPLY"]._serialized_start = 155 + _globals["_AGGREGATEREPLY"]._serialized_end = 231 + _globals["_AGGREGATERESULT"]._serialized_start = 233 + _globals["_AGGREGATERESULT"]._serialized_end = 301 + _globals["_AGGREGATEGROUPRESULT"]._serialized_start = 303 + _globals["_AGGREGATEGROUPRESULT"]._serialized_end = 340 +# @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/aggregate_pb2.pyi b/weaviate/proto/v1/aggregate_pb2.pyi new file mode 100644 index 000000000..ede2d6bc6 --- /dev/null +++ b/weaviate/proto/v1/aggregate_pb2.pyi @@ -0,0 +1,56 @@ +from google.protobuf.internal import containers as _containers +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from typing import ( + ClassVar as _ClassVar, + Iterable as _Iterable, + Mapping as _Mapping, + Optional as _Optional, + Union as _Union, +) + +DESCRIPTOR: _descriptor.FileDescriptor + +class AggregateRequest(_message.Message): + __slots__ = ("collection", "meta_count", "object_limit", "tenant") + COLLECTION_FIELD_NUMBER: _ClassVar[int] + META_COUNT_FIELD_NUMBER: _ClassVar[int] + OBJECT_LIMIT_FIELD_NUMBER: _ClassVar[int] + TENANT_FIELD_NUMBER: _ClassVar[int] + collection: str + meta_count: bool + object_limit: int + tenant: str + def __init__( + self, + collection: _Optional[str] = ..., + meta_count: bool = ..., + object_limit: _Optional[int] = ..., + tenant: _Optional[str] = ..., + ) -> None: ... + +class AggregateReply(_message.Message): + __slots__ = ("took", "result") + TOOK_FIELD_NUMBER: _ClassVar[int] + RESULT_FIELD_NUMBER: _ClassVar[int] + took: float + result: AggregateResult + def __init__( + self, + took: _Optional[float] = ..., + result: _Optional[_Union[AggregateResult, _Mapping]] = ..., + ) -> None: ... + +class AggregateResult(_message.Message): + __slots__ = ("groups",) + GROUPS_FIELD_NUMBER: _ClassVar[int] + groups: _containers.RepeatedCompositeFieldContainer[AggregateGroupResult] + def __init__( + self, groups: _Optional[_Iterable[_Union[AggregateGroupResult, _Mapping]]] = ... + ) -> None: ... + +class AggregateGroupResult(_message.Message): + __slots__ = ("count",) + COUNT_FIELD_NUMBER: _ClassVar[int] + count: int + def __init__(self, count: _Optional[int] = ...) -> None: ... diff --git a/weaviate/proto/v1/aggregate_pb2_grpc.py b/weaviate/proto/v1/aggregate_pb2_grpc.py new file mode 100644 index 000000000..bd9419279 --- /dev/null +++ b/weaviate/proto/v1/aggregate_pb2_grpc.py @@ -0,0 +1,25 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc +import warnings + + +GRPC_GENERATED_VERSION = "1.66.2" +GRPC_VERSION = grpc.__version__ +_version_not_supported = False + +try: + from grpc._utilities import first_version_is_lower + + _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) +except ImportError: + _version_not_supported = True + +if _version_not_supported: + raise RuntimeError( + f"The grpc package installed is at version {GRPC_VERSION}," + + f" but the generated code in v1/aggregate_pb2_grpc.py depends on" + + f" grpcio>={GRPC_GENERATED_VERSION}." + + f" Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}" + + f" or downgrade your generated code using grpcio-tools<={GRPC_VERSION}." + ) diff --git a/weaviate/proto/v1/base_pb2.py b/weaviate/proto/v1/base_pb2.py index 503562529..242b6f0a6 100644 --- a/weaviate/proto/v1/base_pb2.py +++ b/weaviate/proto/v1/base_pb2.py @@ -22,7 +22,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\rv1/base.proto\x12\x0bweaviate.v1\x1a\x1cgoogle/protobuf/struct.proto"T\n\x15NumberArrayProperties\x12\x12\n\x06values\x18\x01 \x03(\x01\x42\x02\x18\x01\x12\x11\n\tprop_name\x18\x02 \x01(\t\x12\x14\n\x0cvalues_bytes\x18\x03 \x01(\x0c"7\n\x12IntArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\x03\x12\x11\n\tprop_name\x18\x02 \x01(\t"8\n\x13TextArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\t\x12\x11\n\tprop_name\x18\x02 \x01(\t";\n\x16\x42ooleanArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\x08\x12\x11\n\tprop_name\x18\x02 \x01(\t"\xf1\x03\n\x15ObjectPropertiesValue\x12\x33\n\x12non_ref_properties\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x43\n\x17number_array_properties\x18\x02 \x03(\x0b\x32".weaviate.v1.NumberArrayProperties\x12=\n\x14int_array_properties\x18\x03 \x03(\x0b\x32\x1f.weaviate.v1.IntArrayProperties\x12?\n\x15text_array_properties\x18\x04 \x03(\x0b\x32 .weaviate.v1.TextArrayProperties\x12\x45\n\x18\x62oolean_array_properties\x18\x05 \x03(\x0b\x32#.weaviate.v1.BooleanArrayProperties\x12\x38\n\x11object_properties\x18\x06 \x03(\x0b\x32\x1d.weaviate.v1.ObjectProperties\x12\x43\n\x17object_array_properties\x18\x07 \x03(\x0b\x32".weaviate.v1.ObjectArrayProperties\x12\x18\n\x10\x65mpty_list_props\x18\n \x03(\t"^\n\x15ObjectArrayProperties\x12\x32\n\x06values\x18\x01 \x03(\x0b\x32".weaviate.v1.ObjectPropertiesValue\x12\x11\n\tprop_name\x18\x02 \x01(\t"X\n\x10ObjectProperties\x12\x31\n\x05value\x18\x01 \x01(\x0b\x32".weaviate.v1.ObjectPropertiesValue\x12\x11\n\tprop_name\x18\x02 \x01(\t"\x1b\n\tTextArray\x12\x0e\n\x06values\x18\x01 \x03(\t"\x1a\n\x08IntArray\x12\x0e\n\x06values\x18\x01 \x03(\x03"\x1d\n\x0bNumberArray\x12\x0e\n\x06values\x18\x01 \x03(\x01"\x1e\n\x0c\x42ooleanArray\x12\x0e\n\x06values\x18\x01 \x03(\x08"\xfc\x06\n\x07\x46ilters\x12/\n\x08operator\x18\x01 \x01(\x0e\x32\x1d.weaviate.v1.Filters.Operator\x12\x0e\n\x02on\x18\x02 \x03(\tB\x02\x18\x01\x12%\n\x07\x66ilters\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Filters\x12\x14\n\nvalue_text\x18\x04 \x01(\tH\x00\x12\x13\n\tvalue_int\x18\x05 \x01(\x03H\x00\x12\x17\n\rvalue_boolean\x18\x06 \x01(\x08H\x00\x12\x16\n\x0cvalue_number\x18\x07 \x01(\x01H\x00\x12\x32\n\x10value_text_array\x18\t \x01(\x0b\x32\x16.weaviate.v1.TextArrayH\x00\x12\x30\n\x0fvalue_int_array\x18\n \x01(\x0b\x32\x15.weaviate.v1.IntArrayH\x00\x12\x38\n\x13value_boolean_array\x18\x0b \x01(\x0b\x32\x19.weaviate.v1.BooleanArrayH\x00\x12\x36\n\x12value_number_array\x18\x0c \x01(\x0b\x32\x18.weaviate.v1.NumberArrayH\x00\x12\x36\n\tvalue_geo\x18\r \x01(\x0b\x32!.weaviate.v1.GeoCoordinatesFilterH\x00\x12)\n\x06target\x18\x14 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget"\xe3\x02\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x12\n\x0eOPERATOR_EQUAL\x10\x01\x12\x16\n\x12OPERATOR_NOT_EQUAL\x10\x02\x12\x19\n\x15OPERATOR_GREATER_THAN\x10\x03\x12\x1f\n\x1bOPERATOR_GREATER_THAN_EQUAL\x10\x04\x12\x16\n\x12OPERATOR_LESS_THAN\x10\x05\x12\x1c\n\x18OPERATOR_LESS_THAN_EQUAL\x10\x06\x12\x10\n\x0cOPERATOR_AND\x10\x07\x12\x0f\n\x0bOPERATOR_OR\x10\x08\x12\x1d\n\x19OPERATOR_WITHIN_GEO_RANGE\x10\t\x12\x11\n\rOPERATOR_LIKE\x10\n\x12\x14\n\x10OPERATOR_IS_NULL\x10\x0b\x12\x19\n\x15OPERATOR_CONTAINS_ANY\x10\x0c\x12\x19\n\x15OPERATOR_CONTAINS_ALL\x10\rB\x0c\n\ntest_value"T\n\x1b\x46ilterReferenceSingleTarget\x12\n\n\x02on\x18\x01 \x01(\t\x12)\n\x06target\x18\x02 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget"n\n\x1a\x46ilterReferenceMultiTarget\x12\n\n\x02on\x18\x01 \x01(\t\x12)\n\x06target\x18\x02 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget\x12\x19\n\x11target_collection\x18\x03 \x01(\t""\n\x14\x46ilterReferenceCount\x12\n\n\x02on\x18\x01 \x01(\t"\xe4\x01\n\x0c\x46ilterTarget\x12\x12\n\x08property\x18\x01 \x01(\tH\x00\x12\x41\n\rsingle_target\x18\x02 \x01(\x0b\x32(.weaviate.v1.FilterReferenceSingleTargetH\x00\x12?\n\x0cmulti_target\x18\x03 \x01(\x0b\x32\'.weaviate.v1.FilterReferenceMultiTargetH\x00\x12\x32\n\x05\x63ount\x18\x04 \x01(\x0b\x32!.weaviate.v1.FilterReferenceCountH\x00\x42\x08\n\x06target"M\n\x14GeoCoordinatesFilter\x12\x10\n\x08latitude\x18\x01 \x01(\x02\x12\x11\n\tlongitude\x18\x02 \x01(\x02\x12\x10\n\x08\x64istance\x18\x03 \x01(\x02"g\n\x07Vectors\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x11\n\x05index\x18\x02 \x01(\x04\x42\x02\x18\x01\x12\x14\n\x0cvector_bytes\x18\x03 \x01(\x0c\x12%\n\x04type\x18\x04 \x01(\x0e\x32\x17.weaviate.v1.VectorType*\x89\x01\n\x10\x43onsistencyLevel\x12!\n\x1d\x43ONSISTENCY_LEVEL_UNSPECIFIED\x10\x00\x12\x19\n\x15\x43ONSISTENCY_LEVEL_ONE\x10\x01\x12\x1c\n\x18\x43ONSISTENCY_LEVEL_QUORUM\x10\x02\x12\x19\n\x15\x43ONSISTENCY_LEVEL_ALL\x10\x03*b\n\nVectorType\x12\x1b\n\x17VECTOR_TYPE_UNSPECIFIED\x10\x00\x12\x1b\n\x17VECTOR_TYPE_SINGLE_FP32\x10\x01\x12\x1a\n\x16VECTOR_TYPE_MULTI_FP32\x10\x02\x42n\n#io.weaviate.client.grpc.protocol.v1B\x11WeaviateProtoBaseZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' + b'\n\rv1/base.proto\x12\x0bweaviate.v1\x1a\x1cgoogle/protobuf/struct.proto"T\n\x15NumberArrayProperties\x12\x12\n\x06values\x18\x01 \x03(\x01\x42\x02\x18\x01\x12\x11\n\tprop_name\x18\x02 \x01(\t\x12\x14\n\x0cvalues_bytes\x18\x03 \x01(\x0c"7\n\x12IntArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\x03\x12\x11\n\tprop_name\x18\x02 \x01(\t"8\n\x13TextArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\t\x12\x11\n\tprop_name\x18\x02 \x01(\t";\n\x16\x42ooleanArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\x08\x12\x11\n\tprop_name\x18\x02 \x01(\t"\xf1\x03\n\x15ObjectPropertiesValue\x12\x33\n\x12non_ref_properties\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x43\n\x17number_array_properties\x18\x02 \x03(\x0b\x32".weaviate.v1.NumberArrayProperties\x12=\n\x14int_array_properties\x18\x03 \x03(\x0b\x32\x1f.weaviate.v1.IntArrayProperties\x12?\n\x15text_array_properties\x18\x04 \x03(\x0b\x32 .weaviate.v1.TextArrayProperties\x12\x45\n\x18\x62oolean_array_properties\x18\x05 \x03(\x0b\x32#.weaviate.v1.BooleanArrayProperties\x12\x38\n\x11object_properties\x18\x06 \x03(\x0b\x32\x1d.weaviate.v1.ObjectProperties\x12\x43\n\x17object_array_properties\x18\x07 \x03(\x0b\x32".weaviate.v1.ObjectArrayProperties\x12\x18\n\x10\x65mpty_list_props\x18\n \x03(\t"^\n\x15ObjectArrayProperties\x12\x32\n\x06values\x18\x01 \x03(\x0b\x32".weaviate.v1.ObjectPropertiesValue\x12\x11\n\tprop_name\x18\x02 \x01(\t"X\n\x10ObjectProperties\x12\x31\n\x05value\x18\x01 \x01(\x0b\x32".weaviate.v1.ObjectPropertiesValue\x12\x11\n\tprop_name\x18\x02 \x01(\t"\x1b\n\tTextArray\x12\x0e\n\x06values\x18\x01 \x03(\t"\x1a\n\x08IntArray\x12\x0e\n\x06values\x18\x01 \x03(\x03"\x1d\n\x0bNumberArray\x12\x0e\n\x06values\x18\x01 \x03(\x01"\x1e\n\x0c\x42ooleanArray\x12\x0e\n\x06values\x18\x01 \x03(\x08"\xfc\x06\n\x07\x46ilters\x12/\n\x08operator\x18\x01 \x01(\x0e\x32\x1d.weaviate.v1.Filters.Operator\x12\x0e\n\x02on\x18\x02 \x03(\tB\x02\x18\x01\x12%\n\x07\x66ilters\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Filters\x12\x14\n\nvalue_text\x18\x04 \x01(\tH\x00\x12\x13\n\tvalue_int\x18\x05 \x01(\x03H\x00\x12\x17\n\rvalue_boolean\x18\x06 \x01(\x08H\x00\x12\x16\n\x0cvalue_number\x18\x07 \x01(\x01H\x00\x12\x32\n\x10value_text_array\x18\t \x01(\x0b\x32\x16.weaviate.v1.TextArrayH\x00\x12\x30\n\x0fvalue_int_array\x18\n \x01(\x0b\x32\x15.weaviate.v1.IntArrayH\x00\x12\x38\n\x13value_boolean_array\x18\x0b \x01(\x0b\x32\x19.weaviate.v1.BooleanArrayH\x00\x12\x36\n\x12value_number_array\x18\x0c \x01(\x0b\x32\x18.weaviate.v1.NumberArrayH\x00\x12\x36\n\tvalue_geo\x18\r \x01(\x0b\x32!.weaviate.v1.GeoCoordinatesFilterH\x00\x12)\n\x06target\x18\x14 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget"\xe3\x02\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x12\n\x0eOPERATOR_EQUAL\x10\x01\x12\x16\n\x12OPERATOR_NOT_EQUAL\x10\x02\x12\x19\n\x15OPERATOR_GREATER_THAN\x10\x03\x12\x1f\n\x1bOPERATOR_GREATER_THAN_EQUAL\x10\x04\x12\x16\n\x12OPERATOR_LESS_THAN\x10\x05\x12\x1c\n\x18OPERATOR_LESS_THAN_EQUAL\x10\x06\x12\x10\n\x0cOPERATOR_AND\x10\x07\x12\x0f\n\x0bOPERATOR_OR\x10\x08\x12\x1d\n\x19OPERATOR_WITHIN_GEO_RANGE\x10\t\x12\x11\n\rOPERATOR_LIKE\x10\n\x12\x14\n\x10OPERATOR_IS_NULL\x10\x0b\x12\x19\n\x15OPERATOR_CONTAINS_ANY\x10\x0c\x12\x19\n\x15OPERATOR_CONTAINS_ALL\x10\rB\x0c\n\ntest_value"T\n\x1b\x46ilterReferenceSingleTarget\x12\n\n\x02on\x18\x01 \x01(\t\x12)\n\x06target\x18\x02 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget"n\n\x1a\x46ilterReferenceMultiTarget\x12\n\n\x02on\x18\x01 \x01(\t\x12)\n\x06target\x18\x02 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget\x12\x19\n\x11target_collection\x18\x03 \x01(\t""\n\x14\x46ilterReferenceCount\x12\n\n\x02on\x18\x01 \x01(\t"\xe4\x01\n\x0c\x46ilterTarget\x12\x12\n\x08property\x18\x01 \x01(\tH\x00\x12\x41\n\rsingle_target\x18\x02 \x01(\x0b\x32(.weaviate.v1.FilterReferenceSingleTargetH\x00\x12?\n\x0cmulti_target\x18\x03 \x01(\x0b\x32\'.weaviate.v1.FilterReferenceMultiTargetH\x00\x12\x32\n\x05\x63ount\x18\x04 \x01(\x0b\x32!.weaviate.v1.FilterReferenceCountH\x00\x42\x08\n\x06target"M\n\x14GeoCoordinatesFilter\x12\x10\n\x08latitude\x18\x01 \x01(\x02\x12\x11\n\tlongitude\x18\x02 \x01(\x02\x12\x10\n\x08\x64istance\x18\x03 \x01(\x02"\xd3\x01\n\x07Vectors\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x11\n\x05index\x18\x02 \x01(\x04\x42\x02\x18\x01\x12\x14\n\x0cvector_bytes\x18\x03 \x01(\x0c\x12-\n\x04type\x18\x04 \x01(\x0e\x32\x1f.weaviate.v1.Vectors.VectorType"b\n\nVectorType\x12\x1b\n\x17VECTOR_TYPE_UNSPECIFIED\x10\x00\x12\x1b\n\x17VECTOR_TYPE_SINGLE_FP32\x10\x01\x12\x1a\n\x16VECTOR_TYPE_MULTI_FP32\x10\x02*\x89\x01\n\x10\x43onsistencyLevel\x12!\n\x1d\x43ONSISTENCY_LEVEL_UNSPECIFIED\x10\x00\x12\x19\n\x15\x43ONSISTENCY_LEVEL_ONE\x10\x01\x12\x1c\n\x18\x43ONSISTENCY_LEVEL_QUORUM\x10\x02\x12\x19\n\x15\x43ONSISTENCY_LEVEL_ALL\x10\x03\x42n\n#io.weaviate.client.grpc.protocol.v1B\x11WeaviateProtoBaseZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' ) _globals = globals() @@ -39,10 +39,8 @@ _globals["_FILTERS"].fields_by_name["on"]._serialized_options = b"\030\001" _globals["_VECTORS"].fields_by_name["index"]._loaded_options = None _globals["_VECTORS"].fields_by_name["index"]._serialized_options = b"\030\001" - _globals["_CONSISTENCYLEVEL"]._serialized_start = 2673 - _globals["_CONSISTENCYLEVEL"]._serialized_end = 2810 - _globals["_VECTORTYPE"]._serialized_start = 2812 - _globals["_VECTORTYPE"]._serialized_end = 2910 + _globals["_CONSISTENCYLEVEL"]._serialized_start = 2782 + _globals["_CONSISTENCYLEVEL"]._serialized_end = 2919 _globals["_NUMBERARRAYPROPERTIES"]._serialized_start = 60 _globals["_NUMBERARRAYPROPERTIES"]._serialized_end = 144 _globals["_INTARRAYPROPERTIES"]._serialized_start = 146 @@ -79,6 +77,8 @@ _globals["_FILTERTARGET"]._serialized_end = 2486 _globals["_GEOCOORDINATESFILTER"]._serialized_start = 2488 _globals["_GEOCOORDINATESFILTER"]._serialized_end = 2565 - _globals["_VECTORS"]._serialized_start = 2567 - _globals["_VECTORS"]._serialized_end = 2670 + _globals["_VECTORS"]._serialized_start = 2568 + _globals["_VECTORS"]._serialized_end = 2779 + _globals["_VECTORS_VECTORTYPE"]._serialized_start = 2681 + _globals["_VECTORS_VECTORTYPE"]._serialized_end = 2779 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/base_pb2.pyi b/weaviate/proto/v1/base_pb2.pyi index 257244bb2..70bb769d7 100644 --- a/weaviate/proto/v1/base_pb2.pyi +++ b/weaviate/proto/v1/base_pb2.pyi @@ -20,19 +20,10 @@ class ConsistencyLevel(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): CONSISTENCY_LEVEL_QUORUM: _ClassVar[ConsistencyLevel] CONSISTENCY_LEVEL_ALL: _ClassVar[ConsistencyLevel] -class VectorType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): - __slots__ = () - VECTOR_TYPE_UNSPECIFIED: _ClassVar[VectorType] - VECTOR_TYPE_SINGLE_FP32: _ClassVar[VectorType] - VECTOR_TYPE_MULTI_FP32: _ClassVar[VectorType] - CONSISTENCY_LEVEL_UNSPECIFIED: ConsistencyLevel CONSISTENCY_LEVEL_ONE: ConsistencyLevel CONSISTENCY_LEVEL_QUORUM: ConsistencyLevel CONSISTENCY_LEVEL_ALL: ConsistencyLevel -VECTOR_TYPE_UNSPECIFIED: VectorType -VECTOR_TYPE_SINGLE_FP32: VectorType -VECTOR_TYPE_MULTI_FP32: VectorType class NumberArrayProperties(_message.Message): __slots__ = ("values", "prop_name", "values_bytes") @@ -329,6 +320,16 @@ class GeoCoordinatesFilter(_message.Message): class Vectors(_message.Message): __slots__ = ("name", "index", "vector_bytes", "type") + + class VectorType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + VECTOR_TYPE_UNSPECIFIED: _ClassVar[Vectors.VectorType] + VECTOR_TYPE_SINGLE_FP32: _ClassVar[Vectors.VectorType] + VECTOR_TYPE_MULTI_FP32: _ClassVar[Vectors.VectorType] + + VECTOR_TYPE_UNSPECIFIED: Vectors.VectorType + VECTOR_TYPE_SINGLE_FP32: Vectors.VectorType + VECTOR_TYPE_MULTI_FP32: Vectors.VectorType NAME_FIELD_NUMBER: _ClassVar[int] INDEX_FIELD_NUMBER: _ClassVar[int] VECTOR_BYTES_FIELD_NUMBER: _ClassVar[int] @@ -336,11 +337,11 @@ class Vectors(_message.Message): name: str index: int vector_bytes: bytes - type: VectorType + type: Vectors.VectorType def __init__( self, name: _Optional[str] = ..., index: _Optional[int] = ..., vector_bytes: _Optional[bytes] = ..., - type: _Optional[_Union[VectorType, str]] = ..., + type: _Optional[_Union[Vectors.VectorType, str]] = ..., ) -> None: ... diff --git a/weaviate/proto/v1/weaviate_pb2.py b/weaviate/proto/v1/weaviate_pb2.py index 5c102ea2f..852494a3e 100644 --- a/weaviate/proto/v1/weaviate_pb2.py +++ b/weaviate/proto/v1/weaviate_pb2.py @@ -18,6 +18,7 @@ _sym_db = _symbol_database.Default() +from weaviate.proto.v1 import aggregate_pb2 as v1_dot_aggregate__pb2 from weaviate.proto.v1 import batch_pb2 as v1_dot_batch__pb2 from weaviate.proto.v1 import batch_delete_pb2 as v1_dot_batch__delete__pb2 from weaviate.proto.v1 import search_get_pb2 as v1_dot_search__get__pb2 @@ -25,7 +26,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x11v1/weaviate.proto\x12\x0bweaviate.v1\x1a\x0ev1/batch.proto\x1a\x15v1/batch_delete.proto\x1a\x13v1/search_get.proto\x1a\x10v1/tenants.proto2\xbf\x02\n\x08Weaviate\x12@\n\x06Search\x12\x1a.weaviate.v1.SearchRequest\x1a\x18.weaviate.v1.SearchReply"\x00\x12R\n\x0c\x42\x61tchObjects\x12 .weaviate.v1.BatchObjectsRequest\x1a\x1e.weaviate.v1.BatchObjectsReply"\x00\x12O\n\x0b\x42\x61tchDelete\x12\x1f.weaviate.v1.BatchDeleteRequest\x1a\x1d.weaviate.v1.BatchDeleteReply"\x00\x12L\n\nTenantsGet\x12\x1e.weaviate.v1.TenantsGetRequest\x1a\x1c.weaviate.v1.TenantsGetReply"\x00\x42j\n#io.weaviate.client.grpc.protocol.v1B\rWeaviateProtoZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' + b'\n\x11v1/weaviate.proto\x12\x0bweaviate.v1\x1a\x12v1/aggregate.proto\x1a\x0ev1/batch.proto\x1a\x15v1/batch_delete.proto\x1a\x13v1/search_get.proto\x1a\x10v1/tenants.proto2\x8a\x03\n\x08Weaviate\x12@\n\x06Search\x12\x1a.weaviate.v1.SearchRequest\x1a\x18.weaviate.v1.SearchReply"\x00\x12R\n\x0c\x42\x61tchObjects\x12 .weaviate.v1.BatchObjectsRequest\x1a\x1e.weaviate.v1.BatchObjectsReply"\x00\x12O\n\x0b\x42\x61tchDelete\x12\x1f.weaviate.v1.BatchDeleteRequest\x1a\x1d.weaviate.v1.BatchDeleteReply"\x00\x12L\n\nTenantsGet\x12\x1e.weaviate.v1.TenantsGetRequest\x1a\x1c.weaviate.v1.TenantsGetReply"\x00\x12I\n\tAggregate\x12\x1d.weaviate.v1.AggregateRequest\x1a\x1b.weaviate.v1.AggregateReply"\x00\x42j\n#io.weaviate.client.grpc.protocol.v1B\rWeaviateProtoZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' ) _globals = globals() @@ -36,6 +37,6 @@ _globals["DESCRIPTOR"]._serialized_options = ( b"\n#io.weaviate.client.grpc.protocol.v1B\rWeaviateProtoZ4github.com/weaviate/weaviate/grpc/generated;protocol" ) - _globals["_WEAVIATE"]._serialized_start = 113 - _globals["_WEAVIATE"]._serialized_end = 432 + _globals["_WEAVIATE"]._serialized_start = 133 + _globals["_WEAVIATE"]._serialized_end = 527 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/weaviate_pb2.pyi b/weaviate/proto/v1/weaviate_pb2.pyi index f91159542..34b623594 100644 --- a/weaviate/proto/v1/weaviate_pb2.pyi +++ b/weaviate/proto/v1/weaviate_pb2.pyi @@ -1,3 +1,4 @@ +from weaviate.proto.v1 import aggregate_pb2 as _aggregate_pb2 from weaviate.proto.v1 import batch_pb2 as _batch_pb2 from weaviate.proto.v1 import batch_delete_pb2 as _batch_delete_pb2 from weaviate.proto.v1 import search_get_pb2 as _search_get_pb2 diff --git a/weaviate/proto/v1/weaviate_pb2_grpc.py b/weaviate/proto/v1/weaviate_pb2_grpc.py index a25b0fac9..a7f88b64c 100644 --- a/weaviate/proto/v1/weaviate_pb2_grpc.py +++ b/weaviate/proto/v1/weaviate_pb2_grpc.py @@ -3,6 +3,7 @@ import grpc import warnings +from weaviate.proto.v1 import aggregate_pb2 as v1_dot_aggregate__pb2 from weaviate.proto.v1 import batch_delete_pb2 as v1_dot_batch__delete__pb2 from weaviate.proto.v1 import batch_pb2 as v1_dot_batch__pb2 from weaviate.proto.v1 import search_get_pb2 as v1_dot_search__get__pb2 @@ -62,6 +63,12 @@ def __init__(self, channel): response_deserializer=v1_dot_tenants__pb2.TenantsGetReply.FromString, _registered_method=True, ) + self.Aggregate = channel.unary_unary( + "/weaviate.v1.Weaviate/Aggregate", + request_serializer=v1_dot_aggregate__pb2.AggregateRequest.SerializeToString, + response_deserializer=v1_dot_aggregate__pb2.AggregateReply.FromString, + _registered_method=True, + ) class WeaviateServicer(object): @@ -91,6 +98,12 @@ def TenantsGet(self, request, context): context.set_details("Method not implemented!") raise NotImplementedError("Method not implemented!") + def Aggregate(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + def add_WeaviateServicer_to_server(servicer, server): rpc_method_handlers = { @@ -114,6 +127,11 @@ def add_WeaviateServicer_to_server(servicer, server): request_deserializer=v1_dot_tenants__pb2.TenantsGetRequest.FromString, response_serializer=v1_dot_tenants__pb2.TenantsGetReply.SerializeToString, ), + "Aggregate": grpc.unary_unary_rpc_method_handler( + servicer.Aggregate, + request_deserializer=v1_dot_aggregate__pb2.AggregateRequest.FromString, + response_serializer=v1_dot_aggregate__pb2.AggregateReply.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( "weaviate.v1.Weaviate", rpc_method_handlers @@ -245,3 +263,33 @@ def TenantsGet( metadata, _registered_method=True, ) + + @staticmethod + def Aggregate( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, + target, + "/weaviate.v1.Weaviate/Aggregate", + v1_dot_aggregate__pb2.AggregateRequest.SerializeToString, + v1_dot_aggregate__pb2.AggregateReply.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True, + ) From 285be767a01fe996571f32f9cf7c3fec349ba462 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 27 Jan 2025 11:45:55 +0000 Subject: [PATCH 14/48] Make `_HybridNearVector` a native class to avoid validation issues with np, pd, pl, etc. --- weaviate/collections/classes/grpc.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index a3d249396..dc6185e07 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -273,8 +273,21 @@ class _HybridNearText(_HybridNearBase): move_away: Optional[Move] = None -class _HybridNearVector(_HybridNearBase): +class _HybridNearVector: # can't be a Pydantic model because of validation issues parsing numpy, pd, pl arrays/series vector: NearVectorInputType + distance: Optional[float] + certainty: Optional[float] + + def __init__( + self, + *, + vector: NearVectorInputType, + distance: Optional[float] = None, + certainty: Optional[float] = None, + ) -> None: + self.vector = vector + self.distance = distance + self.certainty = certainty HybridVectorType = Union[NearVectorInputType, _HybridNearText, _HybridNearVector] From 4d158a85dafa8e14597236e3282f3fe1c7e0aef0 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Tue, 28 Jan 2025 09:57:28 +0000 Subject: [PATCH 15/48] Align on acceptable UX naming of methods --- integration/test_named_vectors.py | 12 ++++++++---- weaviate/collections/classes/grpc.py | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index 39c8e0aa7..4b1e7cb08 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -695,7 +695,7 @@ def test_same_target_vector_multiple_input( ( { "first": [0, 1], - "second": wvc.query.NearVector.many_vectors([[1, 0, 0], [0, 0, 1]]), + "second": wvc.query.NearVector.list_of_vectors([[1, 0, 0], [0, 0, 1]]), }, ["first", "second"], ), @@ -846,14 +846,14 @@ def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: assert len(collection) == 1 objs = collection.query.near_vector( - {"colbert": wvc.query.NearVector.multi_vector([[1, 2], [3, 4]])}, + {"colbert": wvc.query.NearVector.multidimensional([[1, 2], [3, 4]])}, target_vector="colbert", ).objects assert len(objs) == 1 objs = collection.query.hybrid( None, - vector={"colbert": wvc.query.NearVector.multi_vector([[1, 2], [3, 4]])}, + vector={"colbert": wvc.query.NearVector.multidimensional([[1, 2], [3, 4]])}, target_vector="colbert", ).objects assert len(objs) == 1 @@ -895,7 +895,11 @@ def test_colbert_vectors_jinaai(collection_factory: CollectionFactory) -> None: assert len(objs) == 1 objs = collection.query.near_vector( - {"colbert": wvc.query.NearVector.multi_vector([[e + 0.01 for e in vec] for vec in vecs])}, + { + "colbert": wvc.query.NearVector.multidimensional( + [[e + 0.01 for e in vec] for vec in vecs] + ) + }, target_vector="colbert", ).objects assert len(objs) == 1 diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index dc6185e07..ac606f93d 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -250,12 +250,12 @@ class NearVector: """Factory class to use when defining near vector queries with multiple vectors in `near_vector()` and `hybrid()` methods.""" @staticmethod - def multi_vector(tensor: Sequence[Sequence[float]]) -> _MultiVectorQuery: + def multidimensional(tensor: Sequence[Sequence[float]]) -> _MultiVectorQuery: """Define a multi-vector query to be used within a near vector search, i.e. a single vector over a multi-vector space.""" return _MultiVectorQuery(tensor=tensor) @staticmethod - def many_vectors(vectors: Sequence[Sequence[float]]) -> _ManyVectorsQuery: + def list_of_vectors(vectors: Sequence[Sequence[float]]) -> _ManyVectorsQuery: """Define a many-vectors query to be used within a near vector search, i.e. multiple vectors over a single-vector space.""" return _ManyVectorsQuery(vectors=vectors) From 68578099817cae687ee711a6efa583a191ec0aa1 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Tue, 28 Jan 2025 12:05:32 +0000 Subject: [PATCH 16/48] Respond to comment --- weaviate/collections/classes/grpc.py | 24 +++++++++++++++++------- weaviate/collections/grpc/query.py | 12 ++++++------ weaviate/outputs/query.py | 12 ++++++++++-- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index ac606f93d..0550cf004 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -228,36 +228,46 @@ class Rerank(_WeaviateInput): query: Optional[str] = Field(default=None) -class _MultiVectorQuery(_WeaviateInput): +class _MultidimensionalQuery(_WeaviateInput): tensor: Sequence[Sequence[float]] -class _ManyVectorsQuery(_WeaviateInput): +class _ListOfVectorsQuery(_WeaviateInput): vectors: Sequence[Sequence[float]] +MultidimensionalQuery = _MultidimensionalQuery +"""Define a multi-vector query to be used within a near vector search, i.e. a single vector over a multi-vector space.""" + +ListOfVectorsQuery = _ListOfVectorsQuery +"""Define a many-vectors query to be used within a near vector search, i.e. multiple vectors over a single-vector space.""" + + NearVectorInputType = Union[ Sequence[NUMBER], Sequence[Sequence[NUMBER]], Mapping[ str, - Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]], _MultiVectorQuery, _ManyVectorsQuery], + Union[ + Sequence[NUMBER], Sequence[Sequence[NUMBER]], MultidimensionalQuery, ListOfVectorsQuery + ], ], ] +"""Define the input types that can be used in a near vector search.""" class NearVector: """Factory class to use when defining near vector queries with multiple vectors in `near_vector()` and `hybrid()` methods.""" @staticmethod - def multidimensional(tensor: Sequence[Sequence[float]]) -> _MultiVectorQuery: + def multidimensional(tensor: Sequence[Sequence[float]]) -> _MultidimensionalQuery: """Define a multi-vector query to be used within a near vector search, i.e. a single vector over a multi-vector space.""" - return _MultiVectorQuery(tensor=tensor) + return _MultidimensionalQuery(tensor=tensor) @staticmethod - def list_of_vectors(vectors: Sequence[Sequence[float]]) -> _ManyVectorsQuery: + def list_of_vectors(vectors: Sequence[Sequence[float]]) -> _ListOfVectorsQuery: """Define a many-vectors query to be used within a near vector search, i.e. multiple vectors over a single-vector space.""" - return _ManyVectorsQuery(vectors=vectors) + return _ListOfVectorsQuery(vectors=vectors) class _HybridNearBase(_WeaviateInput): diff --git a/weaviate/collections/grpc/query.py b/weaviate/collections/grpc/query.py index ae8dea591..25fdeeccd 100644 --- a/weaviate/collections/grpc/query.py +++ b/weaviate/collections/grpc/query.py @@ -24,8 +24,8 @@ from weaviate.collections.classes.filters import _Filters from weaviate.collections.classes.grpc import ( _MultiTargetVectorJoin, - _MultiVectorQuery, - _ManyVectorsQuery, + _MultidimensionalQuery, + _ListOfVectorsQuery, HybridFusion, _QueryReferenceMultiTarget, _MetadataQuery, @@ -983,8 +983,8 @@ def __vector_per_target( return vector_per_target, None else: if ( - isinstance(vector, _MultiVectorQuery) - or isinstance(vector, _ManyVectorsQuery) + isinstance(vector, _MultidimensionalQuery) + or isinstance(vector, _ListOfVectorsQuery) or len(vector) == 0 ): raise invalid_nv_exception @@ -1059,7 +1059,7 @@ def add_vector(val: Sequence[float], target_name: str) -> None: val = value add_vector(val, key) target_vectors_tmp.append(key) - elif isinstance(value, _MultiVectorQuery): + elif isinstance(value, _MultidimensionalQuery): vector_for_target.append( search_get_pb2.VectorForTarget( name=key, @@ -1072,7 +1072,7 @@ def add_vector(val: Sequence[float], target_name: str) -> None: ], ) ) - elif isinstance(value, _ManyVectorsQuery): + elif isinstance(value, _ListOfVectorsQuery): for vec in value.vectors: add_vector(vec, key) target_vectors_tmp.append(key) diff --git a/weaviate/outputs/query.py b/weaviate/outputs/query.py index 113a1e474..d63e1090d 100644 --- a/weaviate/outputs/query.py +++ b/weaviate/outputs/query.py @@ -6,7 +6,13 @@ FilterByRef, FilterReturn, ) -from weaviate.collections.classes.grpc import Sorting, NearVectorInputType, TargetVectorJoinType +from weaviate.collections.classes.grpc import ( + Sorting, + NearVectorInputType, + TargetVectorJoinType, + MultidimensionalQuery, + ListOfVectorsQuery, +) from weaviate.collections.classes.internal import ( @@ -52,9 +58,11 @@ "GenerativeGroupByReturnType", "GenerativeSearchReturnType", "GeoCoordinate", - "NearVectorInputType", + "ListOfVectorsQuery", "MetadataReturn", "MetadataSingleObjectReturn", + "MultidimensionalQuery", + "NearVectorInputType", "Object", "ObjectSingleReturn", "GroupByObject", From 5ec9c81cb650b371cbeef2d05971d97b158b0779 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Tue, 28 Jan 2025 12:25:07 +0000 Subject: [PATCH 17/48] Fix test --- integration/test_named_vectors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index 4b1e7cb08..369675131 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -16,7 +16,7 @@ ReferenceProperty, ) from weaviate.collections.classes.data import DataObject -from weaviate.collections.classes.grpc import _MultiTargetVectorJoin, _ManyVectorsQuery +from weaviate.collections.classes.grpc import _MultiTargetVectorJoin, _ListOfVectorsQuery from weaviate.exceptions import WeaviateInvalidInputError from weaviate.types import INCLUDE_VECTOR @@ -712,7 +712,7 @@ def test_same_target_vector_multiple_input( ) def test_same_target_vector_multiple_input_combinations( collection_factory: CollectionFactory, - near_vector: Dict[str, Union[Sequence[float], Sequence[Sequence[float]], _ManyVectorsQuery]], + near_vector: Dict[str, Union[Sequence[float], Sequence[Sequence[float]], _ListOfVectorsQuery]], target_vector: List[str], ) -> None: dummy = collection_factory("dummy") From a13e6c9f00c75547be34a3881477f2762c7b5f3c Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Tue, 28 Jan 2025 13:19:15 +0000 Subject: [PATCH 18/48] Update to latest `main` image --- .github/workflows/main.yaml | 2 +- integration/test_named_vectors.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 15f8e4c95..da292881d 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -23,7 +23,7 @@ env: WEAVIATE_126: 1.26.13 WEAVIATE_127: 1.27.9 WEAVIATE_128: 1.28.3 - WEAVIATE_129: main-f4ae2e9 + WEAVIATE_129: 1.29.0-dev-7b81c72 jobs: lint-and-format: diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index 369675131..15acf056f 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -842,7 +842,7 @@ def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: ) assert config.vector_config["colbert"].vector_index_config.multi_vector.aggregation == "maxSim" - collection.data.insert({}, vector={"colbert": [[1, 2], [4, 5]]}) + collection.data.insert_many([DataObject({}, vector={"colbert": [[1, 2], [4, 5]]})]) assert len(collection) == 1 objs = collection.query.near_vector( From 11a8cb719aa22b3acefda01943382b78daeb38e7 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Tue, 28 Jan 2025 18:04:20 +0000 Subject: [PATCH 19/48] Update to latest protos and add logic to for `over_all` call using grpc --- .../collections/aggregations/aggregate.py | 135 ++++- weaviate/collections/aggregations/over_all.py | 35 +- weaviate/collections/classes/aggregate.py | 123 +++- weaviate/collections/collection/async_.py | 2 +- weaviate/collections/collection/sync.py | 2 +- weaviate/collections/grpc/aggregate.py | 41 +- weaviate/proto/v1/aggregate_pb2.py | 65 ++- weaviate/proto/v1/aggregate_pb2.pyi | 528 +++++++++++++++++- 8 files changed, 856 insertions(+), 75 deletions(-) diff --git a/weaviate/collections/aggregations/aggregate.py b/weaviate/collections/aggregations/aggregate.py index bb3305d89..1e6d3e414 100644 --- a/weaviate/collections/aggregations/aggregate.py +++ b/weaviate/collections/aggregations/aggregate.py @@ -14,7 +14,7 @@ AggregateDate, AggregateInteger, AggregateNumber, - # AggregateReference, # Aggregate references currently bugged on Weaviate's side + AggregateReference, AggregateText, AggregateGroup, AggregateGroupByReturn, @@ -25,7 +25,7 @@ _MetricsDate, _MetricsNumber, _MetricsInteger, - # _MetricsReference, # Aggregate references currently bugged on Weaviate's side + _MetricsReference, _MetricsText, GroupedBy, TopOccurrence, @@ -33,11 +33,13 @@ from weaviate.collections.classes.config import ConsistencyLevel from weaviate.collections.classes.filters import _Filters from weaviate.collections.classes.grpc import Move +from weaviate.collections.classes.types import GeoCoordinate from weaviate.collections.filters import _FilterToREST from weaviate.collections.grpc.aggregate import _AggregateGRPC from weaviate.connect import ConnectionV4 from weaviate.exceptions import WeaviateInvalidInputError, WeaviateQueryError from weaviate.gql.aggregate import AggregateBuilder +from weaviate.proto.v1 import aggregate_pb2 from weaviate.types import NUMBER, UUID from weaviate.util import file_encoder_b64, _decode_json_response_dict from weaviate.validator import _ValidateArgument, _validate_input @@ -85,6 +87,68 @@ def _to_aggregate_result( f"There was an error accessing the {e} key when parsing the GraphQL response: {response}" ) + def _to_result( + self, response: aggregate_pb2.AggregateReply + ) -> Union[AggregateReturn, AggregateGroupByReturn]: + if len(response.result.groups) == 0: + raise WeaviateQueryError("No results found in the aggregation query!", "gRPC") + if len(response.result.groups) == 1: + result = response.result.groups[0] + return AggregateReturn( + properties={ + aggregation.property: self.__parse_property_grpc(aggregation) + for aggregation in result.aggregations.aggregations + }, + total_count=result.objects_count, + ) + return AggregateGroupByReturn( + groups=[ + AggregateGroup( + grouped_by=self.__parse_grouped_by_value(group.grouped_by), + properties={ + aggregation.property: self.__parse_property_grpc(aggregation) + for aggregation in group.aggregations.aggregations + }, + total_count=group.objects_count, + ) + for group in response.result.groups + ] + ) + + def __parse_grouped_by_value( + self, grouped_by: aggregate_pb2.AggregateGroup.GroupedBy + ) -> GroupedBy: + value: Union[ + str, int, float, bool, List[str], List[int], List[float], List[bool], GeoCoordinate + ] + if grouped_by.HasField("text"): + value = grouped_by.text + elif grouped_by.HasField("int"): + value = grouped_by.int + elif grouped_by.HasField("number"): + value = grouped_by.number + elif grouped_by.HasField("boolean"): + value = grouped_by.boolean + elif grouped_by.HasField("texts"): + value = list(grouped_by.texts.values) + elif grouped_by.HasField("ints"): + value = list(grouped_by.ints.values) + elif grouped_by.HasField("numbers"): + value = list(grouped_by.numbers.values) + elif grouped_by.HasField("booleans"): + value = list(grouped_by.booleans.values) + elif grouped_by.HasField("geo"): + v = grouped_by.geo + value = GeoCoordinate( + latitude=v.latitude, + longitude=v.longitude, + ) + else: + raise ValueError( + f"Unknown grouped by type {grouped_by} encountered in _Aggregate.__parse_grouped_by_value()" + ) + return GroupedBy(prop=grouped_by.path[0], value=value) + def _to_group_by_result( self, response: dict, metrics: Optional[List[_Metrics]] ) -> AggregateGroupByReturn: @@ -116,13 +180,13 @@ def __parse_properties(self, result: dict, metrics: List[_Metrics]) -> AProperti props: AProperties = {} for metric in metrics: if metric.property_name in result: - props[metric.property_name] = self.__parse_property( + props[metric.property_name] = self.__parse_property_gql( result[metric.property_name], metric ) return props @staticmethod - def __parse_property(property_: dict, metric: _Metrics) -> AggregateResult: + def __parse_property_gql(property_: dict, metric: _Metrics) -> AggregateResult: if isinstance(metric, _MetricsText): return AggregateText( count=property_.get("count"), @@ -170,14 +234,71 @@ def __parse_property(property_: dict, metric: _Metrics) -> AggregateResult: minimum=property_.get("minimum"), mode=property_.get("mode"), ) - # Aggregate references currently bugged on Weaviate's side - # elif isinstance(metric, _MetricsReference): - # return AggregateReference(pointing_to=property_.get("pointingTo")) + elif isinstance(metric, _MetricsReference): + return AggregateReference(pointing_to=property_.get("pointingTo")) else: raise ValueError( f"Unknown aggregation type {metric} encountered in _Aggregate.__parse_property() for property {property_}" ) + @staticmethod + def __parse_property_grpc( + aggregation: aggregate_pb2.AggregateGroup.Aggregations.Aggregation, + ) -> AggregateResult: + if (a := aggregation.text) is not None: + return AggregateText( + count=a.count, + top_occurrences=[ + TopOccurrence( + count=top_occurrence.occurs, + value=top_occurrence.value, + ) + for top_occurrence in a.top_occurences.items + ], + ) + elif (a := aggregation.int) is not None: + return AggregateInteger( + count=a.count, + maximum=a.maximum, + mean=a.mean, + median=a.median, + minimum=a.minimum, + mode=a.mode, + sum_=a.sum, + ) + elif (a := aggregation.number) is not None: + return AggregateNumber( + count=a.count, + maximum=a.maximum, + mean=a.mean, + median=a.median, + minimum=a.minimum, + mode=a.mode, + sum_=a.sum, + ) + elif (a := aggregation.boolean) is not None: + return AggregateBoolean( + count=a.count, + percentage_false=a.percentage_false, + percentage_true=a.percentage_true, + total_false=a.total_false, + total_true=a.total_true, + ) + elif (a := aggregation.date) is not None: + return AggregateDate( + count=a.count, + maximum=a.maximum, + median=a.median, + minimum=a.minimum, + mode=a.mode, + ) + elif (a := aggregation.reference) is not None: + return AggregateReference(pointing_to=list(a.pointing_to)) + else: + raise ValueError( + f"Unknown aggregation type {aggregation} encountered in _Aggregate.__parse_property_grpc()" + ) + @staticmethod def _add_groupby_to_builder( builder: AggregateBuilder, group_by: Union[str, GroupByAggregate, None] diff --git a/weaviate/collections/aggregations/over_all.py b/weaviate/collections/aggregations/over_all.py index bacdac2ff..4c03e7e38 100644 --- a/weaviate/collections/aggregations/over_all.py +++ b/weaviate/collections/aggregations/over_all.py @@ -9,6 +9,7 @@ GroupByAggregate, ) from weaviate.collections.classes.filters import _Filters +from weaviate.collections.filters import _FilterToGRPC class _OverAllAsync(_AggregateAsync): @@ -46,14 +47,32 @@ async def over_all( if (return_metrics is None or isinstance(return_metrics, list)) else [return_metrics] ) - builder = self._base(return_metrics, filters, total_count) - builder = self._add_groupby_to_builder(builder, group_by) - res = await self._do(builder) - return ( - self._to_aggregate_result(res, return_metrics) - if group_by is None - else self._to_group_by_result(res, return_metrics) - ) + if isinstance(group_by, str): + group_by = GroupByAggregate(prop=group_by) + + if self._connection._weaviate_version.is_lower_than(1, 29, 0): + # use gql, remove once 1.29 is the minimum supported version + builder = self._base(return_metrics, filters, total_count) + builder = self._add_groupby_to_builder(builder, group_by) + res = await self._do(builder) + return ( + self._to_aggregate_result(res, return_metrics) + if group_by is None + else self._to_group_by_result(res, return_metrics) + ) + else: + # use grpc + reply = await self._grpc.over_all( + aggregations=( + [metric.to_grpc() for metric in return_metrics] + if return_metrics is not None + else [] + ), + filters=_FilterToGRPC.convert(filters) if filters is not None else None, + group_by=group_by._to_grpc() if group_by is not None else None, + objects_count=total_count, + ) + return self._to_result(reply) @syncify.convert diff --git a/weaviate/collections/classes/aggregate.py b/weaviate/collections/classes/aggregate.py index 38243b574..c3f66cb44 100644 --- a/weaviate/collections/classes/aggregate.py +++ b/weaviate/collections/classes/aggregate.py @@ -9,7 +9,8 @@ from pydantic import BaseModel, Field -from weaviate.collections.classes.types import _WeaviateInput +from weaviate.collections.classes.types import _WeaviateInput, GeoCoordinate +from weaviate.proto.v1 import aggregate_pb2 N = TypeVar("N", int, float) @@ -67,12 +68,11 @@ class AggregateBoolean: total_true: Optional[int] -# Aggregate references currently bugged on Weaviate's side -# @dataclass -# class AggregateReference: -# """The aggregation result for a cross-reference property.""" +@dataclass +class AggregateReference: + """The aggregation result for a cross-reference property.""" -# pointing_to: Optional[str] + pointing_to: Optional[List[str]] @dataclass @@ -92,7 +92,7 @@ class AggregateDate: AggregateText, AggregateBoolean, AggregateDate, - # AggregateReference, # Aggregate references currently bugged on Weaviate's side + AggregateReference, ] AProperties = Dict[str, AggregateResult] @@ -111,7 +111,9 @@ class GroupedBy: """The property that the collection was grouped by.""" prop: str - value: str + value: Union[ + str, int, float, bool, List[str], List[int], List[float], List[bool], GeoCoordinate + ] @dataclass @@ -134,6 +136,12 @@ class _MetricsBase(BaseModel): property_name: str count: bool + def to_gql(self) -> str: + raise NotImplementedError + + def to_grpc(self) -> aggregate_pb2.AggregateRequest.Aggregation: + raise NotImplementedError + class _MetricsText(_MetricsBase): top_occurrences_count: bool @@ -157,6 +165,16 @@ def to_gql(self) -> str: ) return f"{self.property_name} {{ {body} }}" + def to_grpc(self) -> aggregate_pb2.AggregateRequest.Aggregation: + return aggregate_pb2.AggregateRequest.Aggregation( + property=self.property_name, + text=aggregate_pb2.AggregateRequest.Aggregation.Text( + count=self.count, + top_occurences=self.top_occurrences_count, + top_occurences_limit=self.min_occurrences, + ), + ) + class _MetricsNum(_MetricsBase): maximum: bool @@ -182,11 +200,35 @@ def to_gql(self) -> str: class _MetricsInteger(_MetricsNum): - pass + def to_grpc(self) -> aggregate_pb2.AggregateRequest.Aggregation: + return aggregate_pb2.AggregateRequest.Aggregation( + property=self.property_name, + int=aggregate_pb2.AggregateRequest.Aggregation.Integer( + count=self.count, + maximum=self.maximum, + mean=self.mean, + median=self.median, + minimum=self.minimum, + mode=self.mode, + sum=self.sum_, + ), + ) class _MetricsNumber(_MetricsNum): - pass + def to_grpc(self) -> aggregate_pb2.AggregateRequest.Aggregation: + return aggregate_pb2.AggregateRequest.Aggregation( + property=self.property_name, + number=aggregate_pb2.AggregateRequest.Aggregation.Number( + count=self.count, + maximum=self.maximum, + mean=self.mean, + median=self.median, + minimum=self.minimum, + mode=self.mode, + sum=self.sum_, + ), + ) class _MetricsBoolean(_MetricsBase): @@ -207,6 +249,18 @@ def to_gql(self) -> str: ) return f"{self.property_name} {{ {body} }}" + def to_grpc(self) -> aggregate_pb2.AggregateRequest.Aggregation: + return aggregate_pb2.AggregateRequest.Aggregation( + property=self.property_name, + boolean=aggregate_pb2.AggregateRequest.Aggregation.Boolean( + count=self.count, + percentage_false=self.percentage_false, + percentage_true=self.percentage_true, + total_false=self.total_false, + total_true=self.total_true, + ), + ) + class _MetricsDate(_MetricsBase): maximum: bool @@ -226,19 +280,38 @@ def to_gql(self) -> str: ) return f"{self.property_name} {{ {body} }}" + def to_grpc(self) -> aggregate_pb2.AggregateRequest.Aggregation: + return aggregate_pb2.AggregateRequest.Aggregation( + property=self.property_name, + date=aggregate_pb2.AggregateRequest.Aggregation.Date( + count=self.count, + maximum=self.maximum, + median=self.median, + minimum=self.minimum, + mode=self.mode, + ), + ) -# Aggregate references currently bugged on Weaviate's side -# class _MetricsReference(BaseModel): -# property_name: str -# pointing_to: bool -# def to_gql(self) -> str: -# body = " ".join( -# [ -# "pointingTo" if self.pointing_to else "", -# ] -# ) -# return f"{self.property_name} {{ {body} }}" +class _MetricsReference(BaseModel): + property_name: str + pointing_to: bool + + def to_gql(self) -> str: + body = " ".join( + [ + "pointingTo" if self.pointing_to else "", + ] + ) + return f"{self.property_name} {{ {body} }}" + + def to_grpc(self) -> aggregate_pb2.AggregateRequest.Aggregation: + return aggregate_pb2.AggregateRequest.Aggregation( + property=self.property_name, + reference=aggregate_pb2.AggregateRequest.Aggregation.Reference( + pointing_to=self.pointing_to, + ), + ) _Metrics = Union[ @@ -247,7 +320,7 @@ def to_gql(self) -> str: _MetricsNumber, _MetricsDate, _MetricsBoolean, - # _MetricsReference, # Aggregate references currently bugged on Weaviate's side + _MetricsReference, ] PropertiesMetrics = Union[_Metrics, List[_Metrics]] @@ -259,6 +332,12 @@ class GroupByAggregate(_WeaviateInput): prop: str limit: Optional[int] = Field(default=None) + def _to_grpc(self) -> aggregate_pb2.AggregateRequest.GroupBy: + return aggregate_pb2.AggregateRequest.GroupBy( + collection="", + property=self.prop, + ) + class Metrics: """Define the metrics to be returned based on a property when aggregating over a collection. diff --git a/weaviate/collections/collection/async_.py b/weaviate/collections/collection/async_.py index a920b6e46..5aaf79050 100644 --- a/weaviate/collections/collection/async_.py +++ b/weaviate/collections/collection/async_.py @@ -112,7 +112,7 @@ async def length(self) -> int: assert total is not None return total else: - return await self.__aggregate_grpc.meta_count() + return await self.__aggregate_grpc.objects_count() async def to_string(self) -> str: """Return a string representation of the collection object.""" diff --git a/weaviate/collections/collection/sync.py b/weaviate/collections/collection/sync.py index 1ac6a295e..5b9e7f121 100644 --- a/weaviate/collections/collection/sync.py +++ b/weaviate/collections/collection/sync.py @@ -151,7 +151,7 @@ def __len__(self) -> int: return total else: return _EventLoopSingleton.get_instance().run_until_complete( - self.__aggregate_grpc.meta_count + self.__aggregate_grpc.objects_count ) def __str__(self) -> str: diff --git a/weaviate/collections/grpc/aggregate.py b/weaviate/collections/grpc/aggregate.py index a60e1a79e..bbd5e1225 100644 --- a/weaviate/collections/grpc/aggregate.py +++ b/weaviate/collections/grpc/aggregate.py @@ -1,4 +1,4 @@ -from typing import Optional, cast +from typing import List, Optional, cast from grpc.aio import AioRpcError # type: ignore @@ -11,7 +11,7 @@ WeaviateQueryError, WeaviateRetryError, ) -from weaviate.proto.v1 import aggregate_pb2 +from weaviate.proto.v1 import aggregate_pb2, base_pb2 class _AggregateGRPC(_BaseGRPC): @@ -28,14 +28,41 @@ def __init__( self._tenant = tenant self._validate_arguments = validate_arguments - async def meta_count(self) -> int: - res = await self.__call(self.__create_request(meta_count=True)) - return res.result.groups[0].count + async def objects_count(self) -> int: + res = await self.__call(self.__create_request(objects_count=True)) + return res.result.groups[0].objects_count - def __create_request(self, *, meta_count: bool = False) -> aggregate_pb2.AggregateRequest: + async def over_all( + self, + *, + aggregations: List[aggregate_pb2.AggregateRequest.Aggregation], + filters: Optional[base_pb2.Filters], + group_by: Optional[aggregate_pb2.AggregateRequest.GroupBy], + objects_count: bool = False, + ) -> aggregate_pb2.AggregateReply: + return await self.__call( + self.__create_request( + aggregations=aggregations, + filters=filters, + group_by=group_by, + objects_count=objects_count, + ) + ) + + def __create_request( + self, + *, + aggregations: Optional[List[aggregate_pb2.AggregateRequest.Aggregation]] = None, + filters: Optional[base_pb2.Filters] = None, + group_by: Optional[aggregate_pb2.AggregateRequest.GroupBy] = None, + objects_count: bool = False, + ) -> aggregate_pb2.AggregateRequest: return aggregate_pb2.AggregateRequest( collection=self._name, - meta_count=meta_count, + aggregations=aggregations, + filters=filters, + group_by=group_by, + objects_count=objects_count, tenant=self._tenant, ) diff --git a/weaviate/proto/v1/aggregate_pb2.py b/weaviate/proto/v1/aggregate_pb2.py index 2b56b9844..627806fbf 100644 --- a/weaviate/proto/v1/aggregate_pb2.py +++ b/weaviate/proto/v1/aggregate_pb2.py @@ -18,8 +18,11 @@ _sym_db = _symbol_database.Default() +from weaviate.proto.v1 import base_pb2 as v1_dot_base__pb2 + + DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x12v1/aggregate.proto\x12\x0bweaviate.v1"v\n\x10\x41ggregateRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x12\n\nmeta_count\x18\x14 \x01(\x08\x12\x19\n\x0cobject_limit\x18\x1e \x01(\rH\x00\x88\x01\x01\x12\x0e\n\x06tenant\x18\n \x01(\tB\x0f\n\r_object_limit"L\n\x0e\x41ggregateReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12,\n\x06result\x18\x02 \x01(\x0b\x32\x1c.weaviate.v1.AggregateResult"D\n\x0f\x41ggregateResult\x12\x31\n\x06groups\x18\x01 \x03(\x0b\x32!.weaviate.v1.AggregateGroupResult"%\n\x14\x41ggregateGroupResult\x12\r\n\x05\x63ount\x18\x01 \x01(\x03\x42s\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoAggregateZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' + b'\n\x12v1/aggregate.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto"\xed\x0b\n\x10\x41ggregateRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12\x15\n\robjects_count\x18\x14 \x01(\x08\x12?\n\x0c\x61ggregations\x18\x15 \x03(\x0b\x32).weaviate.v1.AggregateRequest.Aggregation\x12\x19\n\x0cobject_limit\x18\x1e \x01(\rH\x00\x88\x01\x01\x12<\n\x08group_by\x18\x1f \x01(\x0b\x32%.weaviate.v1.AggregateRequest.GroupByH\x01\x88\x01\x01\x12\x12\n\x05limit\x18 \x01(\rH\x02\x88\x01\x01\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x03\x88\x01\x01\x1a\xde\x08\n\x0b\x41ggregation\x12\x10\n\x08property\x18\x01 \x01(\t\x12@\n\x03int\x18\x02 \x01(\x0b\x32\x31.weaviate.v1.AggregateRequest.Aggregation.IntegerH\x00\x12\x42\n\x06number\x18\x03 \x01(\x0b\x32\x30.weaviate.v1.AggregateRequest.Aggregation.NumberH\x00\x12>\n\x04text\x18\x04 \x01(\x0b\x32..weaviate.v1.AggregateRequest.Aggregation.TextH\x00\x12\x44\n\x07\x62oolean\x18\x05 \x01(\x0b\x32\x31.weaviate.v1.AggregateRequest.Aggregation.BooleanH\x00\x12>\n\x04\x64\x61te\x18\x06 \x01(\x0b\x32..weaviate.v1.AggregateRequest.Aggregation.DateH\x00\x12H\n\treference\x18\x07 \x01(\x0b\x32\x33.weaviate.v1.AggregateRequest.Aggregation.ReferenceH\x00\x1a\x81\x01\n\x07Integer\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0b\n\x03sum\x18\x03 \x01(\x08\x12\x0c\n\x04mean\x18\x04 \x01(\x08\x12\x0c\n\x04mode\x18\x05 \x01(\x08\x12\x0e\n\x06median\x18\x06 \x01(\x08\x12\x0f\n\x07maximum\x18\x07 \x01(\x08\x12\x0f\n\x07minimum\x18\x08 \x01(\x08\x1a\x80\x01\n\x06Number\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0b\n\x03sum\x18\x03 \x01(\x08\x12\x0c\n\x04mean\x18\x04 \x01(\x08\x12\x0c\n\x04mode\x18\x05 \x01(\x08\x12\x0e\n\x06median\x18\x06 \x01(\x08\x12\x0f\n\x07maximum\x18\x07 \x01(\x08\x12\x0f\n\x07minimum\x18\x08 \x01(\x08\x1aw\n\x04Text\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x16\n\x0etop_occurences\x18\x03 \x01(\x08\x12!\n\x14top_occurences_limit\x18\x04 \x01(\rH\x00\x88\x01\x01\x42\x17\n\x15_top_occurences_limit\x1a\x82\x01\n\x07\x42oolean\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x12\n\ntotal_true\x18\x03 \x01(\x08\x12\x13\n\x0btotal_false\x18\x04 \x01(\x08\x12\x17\n\x0fpercentage_true\x18\x05 \x01(\x08\x12\x18\n\x10percentage_false\x18\x06 \x01(\x08\x1a\x63\n\x04\x44\x61te\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0e\n\x06median\x18\x03 \x01(\x08\x12\x0c\n\x04mode\x18\x04 \x01(\x08\x12\x0f\n\x07maximum\x18\x05 \x01(\x08\x12\x0f\n\x07minimum\x18\x06 \x01(\x08\x1a.\n\tReference\x12\x0c\n\x04type\x18\x01 \x01(\x08\x12\x13\n\x0bpointing_to\x18\x02 \x01(\x08\x42\r\n\x0b\x61ggregation\x1a/\n\x07GroupBy\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x10\n\x08property\x18\x02 \x01(\tB\x0f\n\r_object_limitB\x0b\n\t_group_byB\x08\n\x06_limitB\n\n\x08_filters"\x89\x01\n\x0e\x41ggregateReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12\x32\n\x06result\x18\x02 \x01(\x0b\x32".weaviate.v1.AggregateReply.Result\x1a\x35\n\x06Result\x12+\n\x06groups\x18\x01 \x03(\x0b\x32\x1b.weaviate.v1.AggregateGroup"\xed\x13\n\x0e\x41ggregateGroup\x12\x1a\n\robjects_count\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x43\n\x0c\x61ggregations\x18\x02 \x01(\x0b\x32(.weaviate.v1.AggregateGroup.AggregationsH\x01\x88\x01\x01\x12>\n\ngrouped_by\x18\x03 \x01(\x0b\x32%.weaviate.v1.AggregateGroup.GroupedByH\x02\x88\x01\x01\x1a\xc0\x0f\n\x0c\x41ggregations\x12J\n\x0c\x61ggregations\x18\x01 \x03(\x0b\x32\x34.weaviate.v1.AggregateGroup.Aggregations.Aggregation\x1a\xe3\x0e\n\x0b\x41ggregation\x12\x10\n\x08property\x18\x01 \x01(\t\x12K\n\x03int\x18\x02 \x01(\x0b\x32<.weaviate.v1.AggregateGroup.Aggregations.Aggregation.IntegerH\x00\x12M\n\x06number\x18\x03 \x01(\x0b\x32;.weaviate.v1.AggregateGroup.Aggregations.Aggregation.NumberH\x00\x12I\n\x04text\x18\x04 \x01(\x0b\x32\x39.weaviate.v1.AggregateGroup.Aggregations.Aggregation.TextH\x00\x12O\n\x07\x62oolean\x18\x05 \x01(\x0b\x32<.weaviate.v1.AggregateGroup.Aggregations.Aggregation.BooleanH\x00\x12I\n\x04\x64\x61te\x18\x06 \x01(\x0b\x32\x39.weaviate.v1.AggregateGroup.Aggregations.Aggregation.DateH\x00\x12S\n\treference\x18\x07 \x01(\x0b\x32>.weaviate.v1.AggregateGroup.Aggregations.Aggregation.ReferenceH\x00\x1a\xf9\x01\n\x07Integer\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04mean\x18\x03 \x01(\x01H\x02\x88\x01\x01\x12\x13\n\x06median\x18\x04 \x01(\x01H\x03\x88\x01\x01\x12\x11\n\x04mode\x18\x05 \x01(\x03H\x04\x88\x01\x01\x12\x14\n\x07maximum\x18\x06 \x01(\x03H\x05\x88\x01\x01\x12\x14\n\x07minimum\x18\x07 \x01(\x03H\x06\x88\x01\x01\x12\x10\n\x03sum\x18\x08 \x01(\x03H\x07\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\x07\n\x05_meanB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimumB\x06\n\x04_sum\x1a\xf8\x01\n\x06Number\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04mean\x18\x03 \x01(\x01H\x02\x88\x01\x01\x12\x13\n\x06median\x18\x04 \x01(\x01H\x03\x88\x01\x01\x12\x11\n\x04mode\x18\x05 \x01(\x01H\x04\x88\x01\x01\x12\x14\n\x07maximum\x18\x06 \x01(\x01H\x05\x88\x01\x01\x12\x14\n\x07minimum\x18\x07 \x01(\x01H\x06\x88\x01\x01\x12\x10\n\x03sum\x18\x08 \x01(\x01H\x07\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\x07\n\x05_meanB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimumB\x06\n\x04_sum\x1a\xe4\x02\n\x04Text\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x65\n\x0etop_occurences\x18\x03 \x01(\x0b\x32H.weaviate.v1.AggregateGroup.Aggregations.Aggregation.Text.TopOccurrencesH\x02\x88\x01\x01\x1a\xa7\x01\n\x0eTopOccurrences\x12\x65\n\x05items\x18\x01 \x03(\x0b\x32V.weaviate.v1.AggregateGroup.Aggregations.Aggregation.Text.TopOccurrences.TopOccurrence\x1a.\n\rTopOccurrence\x12\r\n\x05value\x18\x01 \x01(\t\x12\x0e\n\x06occurs\x18\x02 \x01(\x03\x42\x08\n\x06_countB\x07\n\x05_typeB\x11\n\x0f_top_occurences\x1a\xfb\x01\n\x07\x42oolean\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ntotal_true\x18\x03 \x01(\x03H\x02\x88\x01\x01\x12\x18\n\x0btotal_false\x18\x04 \x01(\x03H\x03\x88\x01\x01\x12\x1c\n\x0fpercentage_true\x18\x05 \x01(\x01H\x04\x88\x01\x01\x12\x1d\n\x10percentage_false\x18\x06 \x01(\x01H\x05\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\r\n\x0b_total_trueB\x0e\n\x0c_total_falseB\x12\n\x10_percentage_trueB\x13\n\x11_percentage_false\x1a\xc0\x01\n\x04\x44\x61te\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x13\n\x06median\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x11\n\x04mode\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x14\n\x07maximum\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x14\n\x07minimum\x18\x06 \x01(\tH\x05\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimum\x1a<\n\tReference\x12\x11\n\x04type\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x0bpointing_to\x18\x02 \x03(\tB\x07\n\x05_typeB\r\n\x0b\x61ggregation\x1a\xc4\x02\n\tGroupedBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x0e\n\x04text\x18\x02 \x01(\tH\x00\x12\r\n\x03int\x18\x03 \x01(\x03H\x00\x12\x11\n\x07\x62oolean\x18\x04 \x01(\x08H\x00\x12\x10\n\x06number\x18\x05 \x01(\x01H\x00\x12\'\n\x05texts\x18\x06 \x01(\x0b\x32\x16.weaviate.v1.TextArrayH\x00\x12%\n\x04ints\x18\x07 \x01(\x0b\x32\x15.weaviate.v1.IntArrayH\x00\x12-\n\x08\x62ooleans\x18\x08 \x01(\x0b\x32\x19.weaviate.v1.BooleanArrayH\x00\x12+\n\x07numbers\x18\t \x01(\x0b\x32\x18.weaviate.v1.NumberArrayH\x00\x12\x30\n\x03geo\x18\n \x01(\x0b\x32!.weaviate.v1.GeoCoordinatesFilterH\x00\x42\x07\n\x05valueB\x10\n\x0e_objects_countB\x0f\n\r_aggregationsB\r\n\x0b_grouped_byBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoAggregateZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' ) _globals = globals() @@ -30,12 +33,56 @@ _globals["DESCRIPTOR"]._serialized_options = ( b"\n#io.weaviate.client.grpc.protocol.v1B\026WeaviateProtoAggregateZ4github.com/weaviate/weaviate/grpc/generated;protocol" ) - _globals["_AGGREGATEREQUEST"]._serialized_start = 35 - _globals["_AGGREGATEREQUEST"]._serialized_end = 153 - _globals["_AGGREGATEREPLY"]._serialized_start = 155 - _globals["_AGGREGATEREPLY"]._serialized_end = 231 - _globals["_AGGREGATERESULT"]._serialized_start = 233 - _globals["_AGGREGATERESULT"]._serialized_end = 301 - _globals["_AGGREGATEGROUPRESULT"]._serialized_start = 303 - _globals["_AGGREGATEGROUPRESULT"]._serialized_end = 340 + _globals["_AGGREGATEREQUEST"]._serialized_start = 51 + _globals["_AGGREGATEREQUEST"]._serialized_end = 1568 + _globals["_AGGREGATEREQUEST_AGGREGATION"]._serialized_start = 349 + _globals["_AGGREGATEREQUEST_AGGREGATION"]._serialized_end = 1467 + _globals["_AGGREGATEREQUEST_AGGREGATION_INTEGER"]._serialized_start = 789 + _globals["_AGGREGATEREQUEST_AGGREGATION_INTEGER"]._serialized_end = 918 + _globals["_AGGREGATEREQUEST_AGGREGATION_NUMBER"]._serialized_start = 921 + _globals["_AGGREGATEREQUEST_AGGREGATION_NUMBER"]._serialized_end = 1049 + _globals["_AGGREGATEREQUEST_AGGREGATION_TEXT"]._serialized_start = 1051 + _globals["_AGGREGATEREQUEST_AGGREGATION_TEXT"]._serialized_end = 1170 + _globals["_AGGREGATEREQUEST_AGGREGATION_BOOLEAN"]._serialized_start = 1173 + _globals["_AGGREGATEREQUEST_AGGREGATION_BOOLEAN"]._serialized_end = 1303 + _globals["_AGGREGATEREQUEST_AGGREGATION_DATE"]._serialized_start = 1305 + _globals["_AGGREGATEREQUEST_AGGREGATION_DATE"]._serialized_end = 1404 + _globals["_AGGREGATEREQUEST_AGGREGATION_REFERENCE"]._serialized_start = 1406 + _globals["_AGGREGATEREQUEST_AGGREGATION_REFERENCE"]._serialized_end = 1452 + _globals["_AGGREGATEREQUEST_GROUPBY"]._serialized_start = 1469 + _globals["_AGGREGATEREQUEST_GROUPBY"]._serialized_end = 1516 + _globals["_AGGREGATEREPLY"]._serialized_start = 1571 + _globals["_AGGREGATEREPLY"]._serialized_end = 1708 + _globals["_AGGREGATEREPLY_RESULT"]._serialized_start = 1655 + _globals["_AGGREGATEREPLY_RESULT"]._serialized_end = 1708 + _globals["_AGGREGATEGROUP"]._serialized_start = 1711 + _globals["_AGGREGATEGROUP"]._serialized_end = 4252 + _globals["_AGGREGATEGROUP_AGGREGATIONS"]._serialized_start = 1891 + _globals["_AGGREGATEGROUP_AGGREGATIONS"]._serialized_end = 3875 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION"]._serialized_start = 1984 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION"]._serialized_end = 3875 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_INTEGER"]._serialized_start = 2490 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_INTEGER"]._serialized_end = 2739 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_NUMBER"]._serialized_start = 2742 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_NUMBER"]._serialized_end = 2990 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT"]._serialized_start = 2993 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT"]._serialized_end = 3349 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES"]._serialized_start = ( + 3144 + ) + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES"]._serialized_end = 3311 + _globals[ + "_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES_TOPOCCURRENCE" + ]._serialized_start = 3265 + _globals[ + "_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES_TOPOCCURRENCE" + ]._serialized_end = 3311 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_BOOLEAN"]._serialized_start = 3352 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_BOOLEAN"]._serialized_end = 3603 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_DATE"]._serialized_start = 3606 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_DATE"]._serialized_end = 3798 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_REFERENCE"]._serialized_start = 3800 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_REFERENCE"]._serialized_end = 3860 + _globals["_AGGREGATEGROUP_GROUPEDBY"]._serialized_start = 3878 + _globals["_AGGREGATEGROUP_GROUPEDBY"]._serialized_end = 4202 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/aggregate_pb2.pyi b/weaviate/proto/v1/aggregate_pb2.pyi index ede2d6bc6..4d2b10726 100644 --- a/weaviate/proto/v1/aggregate_pb2.pyi +++ b/weaviate/proto/v1/aggregate_pb2.pyi @@ -1,3 +1,4 @@ +from weaviate.proto.v1 import base_pb2 as _base_pb2 from google.protobuf.internal import containers as _containers from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message @@ -12,45 +13,532 @@ from typing import ( DESCRIPTOR: _descriptor.FileDescriptor class AggregateRequest(_message.Message): - __slots__ = ("collection", "meta_count", "object_limit", "tenant") + __slots__ = ( + "collection", + "tenant", + "objects_count", + "aggregations", + "object_limit", + "group_by", + "limit", + "filters", + ) + + class Aggregation(_message.Message): + __slots__ = ("property", "int", "number", "text", "boolean", "date", "reference") + + class Integer(_message.Message): + __slots__ = ("count", "type", "sum", "mean", "mode", "median", "maximum", "minimum") + COUNT_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + SUM_FIELD_NUMBER: _ClassVar[int] + MEAN_FIELD_NUMBER: _ClassVar[int] + MODE_FIELD_NUMBER: _ClassVar[int] + MEDIAN_FIELD_NUMBER: _ClassVar[int] + MAXIMUM_FIELD_NUMBER: _ClassVar[int] + MINIMUM_FIELD_NUMBER: _ClassVar[int] + count: bool + type: bool + sum: bool + mean: bool + mode: bool + median: bool + maximum: bool + minimum: bool + def __init__( + self, + count: bool = ..., + type: bool = ..., + sum: bool = ..., + mean: bool = ..., + mode: bool = ..., + median: bool = ..., + maximum: bool = ..., + minimum: bool = ..., + ) -> None: ... + + class Number(_message.Message): + __slots__ = ("count", "type", "sum", "mean", "mode", "median", "maximum", "minimum") + COUNT_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + SUM_FIELD_NUMBER: _ClassVar[int] + MEAN_FIELD_NUMBER: _ClassVar[int] + MODE_FIELD_NUMBER: _ClassVar[int] + MEDIAN_FIELD_NUMBER: _ClassVar[int] + MAXIMUM_FIELD_NUMBER: _ClassVar[int] + MINIMUM_FIELD_NUMBER: _ClassVar[int] + count: bool + type: bool + sum: bool + mean: bool + mode: bool + median: bool + maximum: bool + minimum: bool + def __init__( + self, + count: bool = ..., + type: bool = ..., + sum: bool = ..., + mean: bool = ..., + mode: bool = ..., + median: bool = ..., + maximum: bool = ..., + minimum: bool = ..., + ) -> None: ... + + class Text(_message.Message): + __slots__ = ("count", "type", "top_occurences", "top_occurences_limit") + COUNT_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + TOP_OCCURENCES_FIELD_NUMBER: _ClassVar[int] + TOP_OCCURENCES_LIMIT_FIELD_NUMBER: _ClassVar[int] + count: bool + type: bool + top_occurences: bool + top_occurences_limit: int + def __init__( + self, + count: bool = ..., + type: bool = ..., + top_occurences: bool = ..., + top_occurences_limit: _Optional[int] = ..., + ) -> None: ... + + class Boolean(_message.Message): + __slots__ = ( + "count", + "type", + "total_true", + "total_false", + "percentage_true", + "percentage_false", + ) + COUNT_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + TOTAL_TRUE_FIELD_NUMBER: _ClassVar[int] + TOTAL_FALSE_FIELD_NUMBER: _ClassVar[int] + PERCENTAGE_TRUE_FIELD_NUMBER: _ClassVar[int] + PERCENTAGE_FALSE_FIELD_NUMBER: _ClassVar[int] + count: bool + type: bool + total_true: bool + total_false: bool + percentage_true: bool + percentage_false: bool + def __init__( + self, + count: bool = ..., + type: bool = ..., + total_true: bool = ..., + total_false: bool = ..., + percentage_true: bool = ..., + percentage_false: bool = ..., + ) -> None: ... + + class Date(_message.Message): + __slots__ = ("count", "type", "median", "mode", "maximum", "minimum") + COUNT_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + MEDIAN_FIELD_NUMBER: _ClassVar[int] + MODE_FIELD_NUMBER: _ClassVar[int] + MAXIMUM_FIELD_NUMBER: _ClassVar[int] + MINIMUM_FIELD_NUMBER: _ClassVar[int] + count: bool + type: bool + median: bool + mode: bool + maximum: bool + minimum: bool + def __init__( + self, + count: bool = ..., + type: bool = ..., + median: bool = ..., + mode: bool = ..., + maximum: bool = ..., + minimum: bool = ..., + ) -> None: ... + + class Reference(_message.Message): + __slots__ = ("type", "pointing_to") + TYPE_FIELD_NUMBER: _ClassVar[int] + POINTING_TO_FIELD_NUMBER: _ClassVar[int] + type: bool + pointing_to: bool + def __init__(self, type: bool = ..., pointing_to: bool = ...) -> None: ... + + PROPERTY_FIELD_NUMBER: _ClassVar[int] + INT_FIELD_NUMBER: _ClassVar[int] + NUMBER_FIELD_NUMBER: _ClassVar[int] + TEXT_FIELD_NUMBER: _ClassVar[int] + BOOLEAN_FIELD_NUMBER: _ClassVar[int] + DATE_FIELD_NUMBER: _ClassVar[int] + REFERENCE_FIELD_NUMBER: _ClassVar[int] + property: str + int: AggregateRequest.Aggregation.Integer + number: AggregateRequest.Aggregation.Number + text: AggregateRequest.Aggregation.Text + boolean: AggregateRequest.Aggregation.Boolean + date: AggregateRequest.Aggregation.Date + reference: AggregateRequest.Aggregation.Reference + def __init__( + self, + property: _Optional[str] = ..., + int: _Optional[_Union[AggregateRequest.Aggregation.Integer, _Mapping]] = ..., + number: _Optional[_Union[AggregateRequest.Aggregation.Number, _Mapping]] = ..., + text: _Optional[_Union[AggregateRequest.Aggregation.Text, _Mapping]] = ..., + boolean: _Optional[_Union[AggregateRequest.Aggregation.Boolean, _Mapping]] = ..., + date: _Optional[_Union[AggregateRequest.Aggregation.Date, _Mapping]] = ..., + reference: _Optional[_Union[AggregateRequest.Aggregation.Reference, _Mapping]] = ..., + ) -> None: ... + + class GroupBy(_message.Message): + __slots__ = ("collection", "property") + COLLECTION_FIELD_NUMBER: _ClassVar[int] + PROPERTY_FIELD_NUMBER: _ClassVar[int] + collection: str + property: str + def __init__( + self, collection: _Optional[str] = ..., property: _Optional[str] = ... + ) -> None: ... + COLLECTION_FIELD_NUMBER: _ClassVar[int] - META_COUNT_FIELD_NUMBER: _ClassVar[int] - OBJECT_LIMIT_FIELD_NUMBER: _ClassVar[int] TENANT_FIELD_NUMBER: _ClassVar[int] + OBJECTS_COUNT_FIELD_NUMBER: _ClassVar[int] + AGGREGATIONS_FIELD_NUMBER: _ClassVar[int] + OBJECT_LIMIT_FIELD_NUMBER: _ClassVar[int] + GROUP_BY_FIELD_NUMBER: _ClassVar[int] + LIMIT_FIELD_NUMBER: _ClassVar[int] + FILTERS_FIELD_NUMBER: _ClassVar[int] collection: str - meta_count: bool - object_limit: int tenant: str + objects_count: bool + aggregations: _containers.RepeatedCompositeFieldContainer[AggregateRequest.Aggregation] + object_limit: int + group_by: AggregateRequest.GroupBy + limit: int + filters: _base_pb2.Filters def __init__( self, collection: _Optional[str] = ..., - meta_count: bool = ..., - object_limit: _Optional[int] = ..., tenant: _Optional[str] = ..., + objects_count: bool = ..., + aggregations: _Optional[_Iterable[_Union[AggregateRequest.Aggregation, _Mapping]]] = ..., + object_limit: _Optional[int] = ..., + group_by: _Optional[_Union[AggregateRequest.GroupBy, _Mapping]] = ..., + limit: _Optional[int] = ..., + filters: _Optional[_Union[_base_pb2.Filters, _Mapping]] = ..., ) -> None: ... class AggregateReply(_message.Message): __slots__ = ("took", "result") + + class Result(_message.Message): + __slots__ = ("groups",) + GROUPS_FIELD_NUMBER: _ClassVar[int] + groups: _containers.RepeatedCompositeFieldContainer[AggregateGroup] + def __init__( + self, groups: _Optional[_Iterable[_Union[AggregateGroup, _Mapping]]] = ... + ) -> None: ... + TOOK_FIELD_NUMBER: _ClassVar[int] RESULT_FIELD_NUMBER: _ClassVar[int] took: float - result: AggregateResult + result: AggregateReply.Result def __init__( self, took: _Optional[float] = ..., - result: _Optional[_Union[AggregateResult, _Mapping]] = ..., + result: _Optional[_Union[AggregateReply.Result, _Mapping]] = ..., ) -> None: ... -class AggregateResult(_message.Message): - __slots__ = ("groups",) - GROUPS_FIELD_NUMBER: _ClassVar[int] - groups: _containers.RepeatedCompositeFieldContainer[AggregateGroupResult] +class AggregateGroup(_message.Message): + __slots__ = ("objects_count", "aggregations", "grouped_by") + + class Aggregations(_message.Message): + __slots__ = ("aggregations",) + + class Aggregation(_message.Message): + __slots__ = ("property", "int", "number", "text", "boolean", "date", "reference") + + class Integer(_message.Message): + __slots__ = ("count", "type", "mean", "median", "mode", "maximum", "minimum", "sum") + COUNT_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + MEAN_FIELD_NUMBER: _ClassVar[int] + MEDIAN_FIELD_NUMBER: _ClassVar[int] + MODE_FIELD_NUMBER: _ClassVar[int] + MAXIMUM_FIELD_NUMBER: _ClassVar[int] + MINIMUM_FIELD_NUMBER: _ClassVar[int] + SUM_FIELD_NUMBER: _ClassVar[int] + count: int + type: str + mean: float + median: float + mode: int + maximum: int + minimum: int + sum: int + def __init__( + self, + count: _Optional[int] = ..., + type: _Optional[str] = ..., + mean: _Optional[float] = ..., + median: _Optional[float] = ..., + mode: _Optional[int] = ..., + maximum: _Optional[int] = ..., + minimum: _Optional[int] = ..., + sum: _Optional[int] = ..., + ) -> None: ... + + class Number(_message.Message): + __slots__ = ("count", "type", "mean", "median", "mode", "maximum", "minimum", "sum") + COUNT_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + MEAN_FIELD_NUMBER: _ClassVar[int] + MEDIAN_FIELD_NUMBER: _ClassVar[int] + MODE_FIELD_NUMBER: _ClassVar[int] + MAXIMUM_FIELD_NUMBER: _ClassVar[int] + MINIMUM_FIELD_NUMBER: _ClassVar[int] + SUM_FIELD_NUMBER: _ClassVar[int] + count: int + type: str + mean: float + median: float + mode: float + maximum: float + minimum: float + sum: float + def __init__( + self, + count: _Optional[int] = ..., + type: _Optional[str] = ..., + mean: _Optional[float] = ..., + median: _Optional[float] = ..., + mode: _Optional[float] = ..., + maximum: _Optional[float] = ..., + minimum: _Optional[float] = ..., + sum: _Optional[float] = ..., + ) -> None: ... + + class Text(_message.Message): + __slots__ = ("count", "type", "top_occurences") + + class TopOccurrences(_message.Message): + __slots__ = ("items",) + + class TopOccurrence(_message.Message): + __slots__ = ("value", "occurs") + VALUE_FIELD_NUMBER: _ClassVar[int] + OCCURS_FIELD_NUMBER: _ClassVar[int] + value: str + occurs: int + def __init__( + self, value: _Optional[str] = ..., occurs: _Optional[int] = ... + ) -> None: ... + + ITEMS_FIELD_NUMBER: _ClassVar[int] + items: _containers.RepeatedCompositeFieldContainer[ + AggregateGroup.Aggregations.Aggregation.Text.TopOccurrences.TopOccurrence + ] + def __init__( + self, + items: _Optional[ + _Iterable[ + _Union[ + AggregateGroup.Aggregations.Aggregation.Text.TopOccurrences.TopOccurrence, + _Mapping, + ] + ] + ] = ..., + ) -> None: ... + + COUNT_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + TOP_OCCURENCES_FIELD_NUMBER: _ClassVar[int] + count: int + type: str + top_occurences: AggregateGroup.Aggregations.Aggregation.Text.TopOccurrences + def __init__( + self, + count: _Optional[int] = ..., + type: _Optional[str] = ..., + top_occurences: _Optional[ + _Union[ + AggregateGroup.Aggregations.Aggregation.Text.TopOccurrences, _Mapping + ] + ] = ..., + ) -> None: ... + + class Boolean(_message.Message): + __slots__ = ( + "count", + "type", + "total_true", + "total_false", + "percentage_true", + "percentage_false", + ) + COUNT_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + TOTAL_TRUE_FIELD_NUMBER: _ClassVar[int] + TOTAL_FALSE_FIELD_NUMBER: _ClassVar[int] + PERCENTAGE_TRUE_FIELD_NUMBER: _ClassVar[int] + PERCENTAGE_FALSE_FIELD_NUMBER: _ClassVar[int] + count: int + type: str + total_true: int + total_false: int + percentage_true: float + percentage_false: float + def __init__( + self, + count: _Optional[int] = ..., + type: _Optional[str] = ..., + total_true: _Optional[int] = ..., + total_false: _Optional[int] = ..., + percentage_true: _Optional[float] = ..., + percentage_false: _Optional[float] = ..., + ) -> None: ... + + class Date(_message.Message): + __slots__ = ("count", "type", "median", "mode", "maximum", "minimum") + COUNT_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + MEDIAN_FIELD_NUMBER: _ClassVar[int] + MODE_FIELD_NUMBER: _ClassVar[int] + MAXIMUM_FIELD_NUMBER: _ClassVar[int] + MINIMUM_FIELD_NUMBER: _ClassVar[int] + count: int + type: str + median: str + mode: str + maximum: str + minimum: str + def __init__( + self, + count: _Optional[int] = ..., + type: _Optional[str] = ..., + median: _Optional[str] = ..., + mode: _Optional[str] = ..., + maximum: _Optional[str] = ..., + minimum: _Optional[str] = ..., + ) -> None: ... + + class Reference(_message.Message): + __slots__ = ("type", "pointing_to") + TYPE_FIELD_NUMBER: _ClassVar[int] + POINTING_TO_FIELD_NUMBER: _ClassVar[int] + type: str + pointing_to: _containers.RepeatedScalarFieldContainer[str] + def __init__( + self, type: _Optional[str] = ..., pointing_to: _Optional[_Iterable[str]] = ... + ) -> None: ... + + PROPERTY_FIELD_NUMBER: _ClassVar[int] + INT_FIELD_NUMBER: _ClassVar[int] + NUMBER_FIELD_NUMBER: _ClassVar[int] + TEXT_FIELD_NUMBER: _ClassVar[int] + BOOLEAN_FIELD_NUMBER: _ClassVar[int] + DATE_FIELD_NUMBER: _ClassVar[int] + REFERENCE_FIELD_NUMBER: _ClassVar[int] + property: str + int: AggregateGroup.Aggregations.Aggregation.Integer + number: AggregateGroup.Aggregations.Aggregation.Number + text: AggregateGroup.Aggregations.Aggregation.Text + boolean: AggregateGroup.Aggregations.Aggregation.Boolean + date: AggregateGroup.Aggregations.Aggregation.Date + reference: AggregateGroup.Aggregations.Aggregation.Reference + def __init__( + self, + property: _Optional[str] = ..., + int: _Optional[ + _Union[AggregateGroup.Aggregations.Aggregation.Integer, _Mapping] + ] = ..., + number: _Optional[ + _Union[AggregateGroup.Aggregations.Aggregation.Number, _Mapping] + ] = ..., + text: _Optional[ + _Union[AggregateGroup.Aggregations.Aggregation.Text, _Mapping] + ] = ..., + boolean: _Optional[ + _Union[AggregateGroup.Aggregations.Aggregation.Boolean, _Mapping] + ] = ..., + date: _Optional[ + _Union[AggregateGroup.Aggregations.Aggregation.Date, _Mapping] + ] = ..., + reference: _Optional[ + _Union[AggregateGroup.Aggregations.Aggregation.Reference, _Mapping] + ] = ..., + ) -> None: ... + + AGGREGATIONS_FIELD_NUMBER: _ClassVar[int] + aggregations: _containers.RepeatedCompositeFieldContainer[ + AggregateGroup.Aggregations.Aggregation + ] + def __init__( + self, + aggregations: _Optional[ + _Iterable[_Union[AggregateGroup.Aggregations.Aggregation, _Mapping]] + ] = ..., + ) -> None: ... + + class GroupedBy(_message.Message): + __slots__ = ( + "path", + "text", + "int", + "boolean", + "number", + "texts", + "ints", + "booleans", + "numbers", + "geo", + ) + PATH_FIELD_NUMBER: _ClassVar[int] + TEXT_FIELD_NUMBER: _ClassVar[int] + INT_FIELD_NUMBER: _ClassVar[int] + BOOLEAN_FIELD_NUMBER: _ClassVar[int] + NUMBER_FIELD_NUMBER: _ClassVar[int] + TEXTS_FIELD_NUMBER: _ClassVar[int] + INTS_FIELD_NUMBER: _ClassVar[int] + BOOLEANS_FIELD_NUMBER: _ClassVar[int] + NUMBERS_FIELD_NUMBER: _ClassVar[int] + GEO_FIELD_NUMBER: _ClassVar[int] + path: _containers.RepeatedScalarFieldContainer[str] + text: str + int: int + boolean: bool + number: float + texts: _base_pb2.TextArray + ints: _base_pb2.IntArray + booleans: _base_pb2.BooleanArray + numbers: _base_pb2.NumberArray + geo: _base_pb2.GeoCoordinatesFilter + def __init__( + self, + path: _Optional[_Iterable[str]] = ..., + text: _Optional[str] = ..., + int: _Optional[int] = ..., + boolean: bool = ..., + number: _Optional[float] = ..., + texts: _Optional[_Union[_base_pb2.TextArray, _Mapping]] = ..., + ints: _Optional[_Union[_base_pb2.IntArray, _Mapping]] = ..., + booleans: _Optional[_Union[_base_pb2.BooleanArray, _Mapping]] = ..., + numbers: _Optional[_Union[_base_pb2.NumberArray, _Mapping]] = ..., + geo: _Optional[_Union[_base_pb2.GeoCoordinatesFilter, _Mapping]] = ..., + ) -> None: ... + + OBJECTS_COUNT_FIELD_NUMBER: _ClassVar[int] + AGGREGATIONS_FIELD_NUMBER: _ClassVar[int] + GROUPED_BY_FIELD_NUMBER: _ClassVar[int] + objects_count: int + aggregations: AggregateGroup.Aggregations + grouped_by: AggregateGroup.GroupedBy def __init__( - self, groups: _Optional[_Iterable[_Union[AggregateGroupResult, _Mapping]]] = ... + self, + objects_count: _Optional[int] = ..., + aggregations: _Optional[_Union[AggregateGroup.Aggregations, _Mapping]] = ..., + grouped_by: _Optional[_Union[AggregateGroup.GroupedBy, _Mapping]] = ..., ) -> None: ... - -class AggregateGroupResult(_message.Message): - __slots__ = ("count",) - COUNT_FIELD_NUMBER: _ClassVar[int] - count: int - def __init__(self, count: _Optional[int] = ...) -> None: ... From 148ae4eb680dd5b000d242b727daeabcb60eb075 Mon Sep 17 00:00:00 2001 From: Nate Wilkinson Date: Wed, 29 Jan 2025 02:48:42 -0700 Subject: [PATCH 20/48] do we append to tmp on every iteration no matter the type? --- weaviate/collections/grpc/query.py | 1 + 1 file changed, 1 insertion(+) diff --git a/weaviate/collections/grpc/query.py b/weaviate/collections/grpc/query.py index 25fdeeccd..8ee8871bc 100644 --- a/weaviate/collections/grpc/query.py +++ b/weaviate/collections/grpc/query.py @@ -1072,6 +1072,7 @@ def add_vector(val: Sequence[float], target_name: str) -> None: ], ) ) + target_vectors_tmp.append(key) elif isinstance(value, _ListOfVectorsQuery): for vec in value.vectors: add_vector(vec, key) From 07b537a67d545a5e201cea4821ab20cee5531548 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Wed, 29 Jan 2025 15:59:57 +0000 Subject: [PATCH 21/48] Add support for all searches using grpc aggregate --- .github/workflows/main.yaml | 2 +- integration/test_collection_aggregate.py | 21 +- .../collections/aggregations/aggregate.py | 84 +-- weaviate/collections/aggregations/hybrid.py | 65 +- .../collections/aggregations/near_image.py | 52 +- .../collections/aggregations/near_object.py | 49 +- .../collections/aggregations/near_text.py | 65 +- .../collections/aggregations/near_vector.py | 72 ++- weaviate/collections/aggregations/over_all.py | 5 +- .../collections/batch/grpc_batch_delete.py | 2 +- .../collections/batch/grpc_batch_objects.py | 2 +- weaviate/collections/classes/aggregate.py | 42 +- weaviate/collections/classes/grpc.py | 26 +- weaviate/collections/grpc/aggregate.py | 238 +++++++- weaviate/collections/grpc/query.py | 561 ++--------------- weaviate/collections/grpc/shared.py | 562 +++++++++++++++++- weaviate/collections/grpc/tenants.py | 2 +- weaviate/proto/v1/aggregate_pb2.py | 95 +-- weaviate/proto/v1/aggregate_pb2.pyi | 41 ++ weaviate/proto/v1/base_search_pb2.py | 105 ++++ weaviate/proto/v1/base_search_pb2.pyi | 391 ++++++++++++ weaviate/proto/v1/base_search_pb2_grpc.py | 25 + weaviate/proto/v1/search_get_pb2.py | 135 +---- weaviate/proto/v1/search_get_pb2.pyi | 424 +------------ 24 files changed, 1829 insertions(+), 1237 deletions(-) create mode 100644 weaviate/proto/v1/base_search_pb2.py create mode 100644 weaviate/proto/v1/base_search_pb2.pyi create mode 100644 weaviate/proto/v1/base_search_pb2_grpc.py diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 3b192b31a..ad34d6b43 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -22,7 +22,7 @@ env: WEAVIATE_125: 1.25.29 WEAVIATE_126: 1.26.13 WEAVIATE_127: 1.27.9 - WEAVIATE_128: 1.28.2-2c00437 + WEAVIATE_128: 1.28.4-73ecf75 jobs: lint-and-format: diff --git a/integration/test_collection_aggregate.py b/integration/test_collection_aggregate.py index 4ee34245d..fb3739d2a 100644 --- a/integration/test_collection_aggregate.py +++ b/integration/test_collection_aggregate.py @@ -201,17 +201,24 @@ def test_over_all_with_filters_ref(collection_factory: CollectionFactory) -> Non assert res.properties["text"].count == 1 assert res.properties["text"].top_occurrences[0].value == "two" - with pytest.raises(WeaviateInvalidInputError): - res = collection.aggregate.over_all( - filters=Filter.by_ref("ref") - .by_property("text") - .equal("one"), # gRPC-compat API not support by GQL aggregation - return_metrics=[Metrics("text").text(count=True, top_occurrences_value=True)], - ) + query = lambda: collection.aggregate.over_all( + filters=Filter.by_ref("ref").by_property("text").equal("one"), + return_metrics=[Metrics("text").text(count=True, top_occurrences_value=True)], + ) + if collection._connection._weaviate_version.is_lower_than(1, 28, 4): + with pytest.raises(WeaviateInvalidInputError): + query() + else: + res = query() + assert isinstance(res.properties["text"], AggregateText) + assert res.properties["text"].count == 1 + assert res.properties["text"].top_occurrences[0].value == "two" def test_wrong_aggregation(collection_factory: CollectionFactory) -> None: collection = collection_factory(properties=[Property(name="text", data_type=DataType.TEXT)]) + if collection._connection._weaviate_version.is_at_least(1, 28, 4): + pytest.skip("GQL is only used for versions 1.28.4 and lower") with pytest.raises(WeaviateQueryError) as e: collection.aggregate.over_all(total_count=False) assert ( diff --git a/weaviate/collections/aggregations/aggregate.py b/weaviate/collections/aggregations/aggregate.py index 1e6d3e414..9994043d8 100644 --- a/weaviate/collections/aggregations/aggregate.py +++ b/weaviate/collections/aggregations/aggregate.py @@ -88,11 +88,25 @@ def _to_aggregate_result( ) def _to_result( - self, response: aggregate_pb2.AggregateReply + self, response: aggregate_pb2.AggregateReply, is_group_by: bool ) -> Union[AggregateReturn, AggregateGroupByReturn]: if len(response.result.groups) == 0: raise WeaviateQueryError("No results found in the aggregation query!", "gRPC") - if len(response.result.groups) == 1: + if is_group_by: + AggregateGroupByReturn( + groups=[ + AggregateGroup( + grouped_by=self.__parse_grouped_by_value(group.grouped_by), + properties={ + aggregation.property: self.__parse_property_grpc(aggregation) + for aggregation in group.aggregations.aggregations + }, + total_count=group.objects_count, + ) + for group in response.result.groups + ] + ) + else: result = response.result.groups[0] return AggregateReturn( properties={ @@ -245,55 +259,55 @@ def __parse_property_gql(property_: dict, metric: _Metrics) -> AggregateResult: def __parse_property_grpc( aggregation: aggregate_pb2.AggregateGroup.Aggregations.Aggregation, ) -> AggregateResult: - if (a := aggregation.text) is not None: + if aggregation.HasField("text"): return AggregateText( - count=a.count, + count=aggregation.text.count, top_occurrences=[ TopOccurrence( count=top_occurrence.occurs, value=top_occurrence.value, ) - for top_occurrence in a.top_occurences.items + for top_occurrence in aggregation.text.top_occurences.items ], ) - elif (a := aggregation.int) is not None: + elif aggregation.HasField("int"): return AggregateInteger( - count=a.count, - maximum=a.maximum, - mean=a.mean, - median=a.median, - minimum=a.minimum, - mode=a.mode, - sum_=a.sum, + count=aggregation.int.count, + maximum=aggregation.int.maximum, + mean=aggregation.int.mean, + median=aggregation.int.median, + minimum=aggregation.int.minimum, + mode=aggregation.int.mode, + sum_=aggregation.int.sum, ) - elif (a := aggregation.number) is not None: + elif aggregation.HasField("number"): return AggregateNumber( - count=a.count, - maximum=a.maximum, - mean=a.mean, - median=a.median, - minimum=a.minimum, - mode=a.mode, - sum_=a.sum, + count=aggregation.number.count, + maximum=aggregation.number.maximum, + mean=aggregation.number.mean, + median=aggregation.number.median, + minimum=aggregation.number.minimum, + mode=aggregation.number.mode, + sum_=aggregation.number.sum, ) - elif (a := aggregation.boolean) is not None: + elif aggregation.HasField("boolean"): return AggregateBoolean( - count=a.count, - percentage_false=a.percentage_false, - percentage_true=a.percentage_true, - total_false=a.total_false, - total_true=a.total_true, + count=aggregation.boolean.count, + percentage_false=aggregation.boolean.percentage_false, + percentage_true=aggregation.boolean.percentage_true, + total_false=aggregation.boolean.total_false, + total_true=aggregation.boolean.total_true, ) - elif (a := aggregation.date) is not None: + elif aggregation.HasField("date"): return AggregateDate( - count=a.count, - maximum=a.maximum, - median=a.median, - minimum=a.minimum, - mode=a.mode, + count=aggregation.date.count, + maximum=aggregation.date.maximum, + median=aggregation.date.median, + minimum=aggregation.date.minimum, + mode=aggregation.date.mode, ) - elif (a := aggregation.reference) is not None: - return AggregateReference(pointing_to=list(a.pointing_to)) + elif aggregation.HasField("reference"): + return AggregateReference(pointing_to=list(aggregation.reference.pointing_to)) else: raise ValueError( f"Unknown aggregation type {aggregation} encountered in _Aggregate.__parse_property_grpc()" diff --git a/weaviate/collections/aggregations/hybrid.py b/weaviate/collections/aggregations/hybrid.py index 1d25091ad..4a9f1e28a 100644 --- a/weaviate/collections/aggregations/hybrid.py +++ b/weaviate/collections/aggregations/hybrid.py @@ -9,6 +9,7 @@ GroupByAggregate, ) from weaviate.collections.classes.filters import _Filters +from weaviate.collections.filters import _FilterToGRPC from weaviate.exceptions import WeaviateUnsupportedFeatureError from weaviate.types import NUMBER @@ -69,24 +70,52 @@ async def hybrid( if (return_metrics is None or isinstance(return_metrics, list)) else [return_metrics] ) - builder = self._base(return_metrics, filters, total_count) - builder = self._add_hybrid_to_builder( - builder, - query, - alpha, - vector, - query_properties, - object_limit, - target_vector, - max_vector_distance, - ) - builder = self._add_groupby_to_builder(builder, group_by) - res = await self._do(builder) - return ( - self._to_aggregate_result(res, return_metrics) - if group_by is None - else self._to_group_by_result(res, return_metrics) - ) + + if isinstance(group_by, str): + group_by = GroupByAggregate(prop=group_by) + + if self._connection._weaviate_version.is_lower_than(1, 28, 4): + # use gql, remove once 1.29 is the minimum supported version + + builder = self._base(return_metrics, filters, total_count) + builder = self._add_hybrid_to_builder( + builder, + query, + alpha, + vector, + query_properties, + object_limit, + target_vector, + max_vector_distance, + ) + builder = self._add_groupby_to_builder(builder, group_by) + res = await self._do(builder) + return ( + self._to_aggregate_result(res, return_metrics) + if group_by is None + else self._to_group_by_result(res, return_metrics) + ) + else: + # use grpc + reply = await self._grpc.hybrid( + query=query, + alpha=alpha, + vector=vector, + properties=query_properties, + object_limit=object_limit, + target_vector=target_vector, + distance=max_vector_distance, + aggregations=( + [metric.to_grpc() for metric in return_metrics] + if return_metrics is not None + else [] + ), + filters=_FilterToGRPC.convert(filters) if filters is not None else None, + group_by=group_by._to_grpc() if group_by is not None else None, + limit=group_by.limit if group_by is not None else None, + objects_count=total_count, + ) + return self._to_result(reply, group_by is not None) @syncify.convert diff --git a/weaviate/collections/aggregations/near_image.py b/weaviate/collections/aggregations/near_image.py index 9d4e64f10..bbdea27c6 100644 --- a/weaviate/collections/aggregations/near_image.py +++ b/weaviate/collections/aggregations/near_image.py @@ -3,7 +3,7 @@ from typing import Optional, Union from weaviate import syncify -from weaviate.collections.aggregations.aggregate import _AggregateAsync +from weaviate.collections.aggregations.aggregate import _AggregateAsync, _parse_media from weaviate.collections.classes.aggregate import ( PropertiesMetrics, AggregateReturn, @@ -11,6 +11,7 @@ GroupByAggregate, ) from weaviate.collections.classes.filters import _Filters +from weaviate.collections.filters import _FilterToGRPC from weaviate.types import NUMBER @@ -66,17 +67,44 @@ async def near_image( if (return_metrics is None or isinstance(return_metrics, list)) else [return_metrics] ) - builder = self._base(return_metrics, filters, total_count) - builder = self._add_groupby_to_builder(builder, group_by) - builder = self._add_near_image_to_builder( - builder, near_image, certainty, distance, object_limit, target_vector - ) - res = await self._do(builder) - return ( - self._to_aggregate_result(res, return_metrics) - if group_by is None - else self._to_group_by_result(res, return_metrics) - ) + + if isinstance(group_by, str): + group_by = GroupByAggregate(prop=group_by) + + if self._connection._weaviate_version.is_lower_than(1, 28, 4): + # use gql, remove once 1.29 is the minimum supported version + + builder = self._base(return_metrics, filters, total_count) + builder = self._add_groupby_to_builder(builder, group_by) + builder = self._add_near_image_to_builder( + builder, near_image, certainty, distance, object_limit, target_vector + ) + res = await self._do(builder) + return ( + self._to_aggregate_result(res, return_metrics) + if group_by is None + else self._to_group_by_result(res, return_metrics) + ) + else: + # use grpc + reply = await self._grpc.near_media( + media=_parse_media(near_image), + type_="image", + certainty=certainty, + distance=distance, + target_vector=target_vector, + aggregations=( + [metric.to_grpc() for metric in return_metrics] + if return_metrics is not None + else [] + ), + filters=_FilterToGRPC.convert(filters) if filters is not None else None, + group_by=group_by._to_grpc() if group_by is not None else None, + limit=group_by.limit if group_by is not None else None, + objects_count=total_count, + object_limit=object_limit, + ) + return self._to_result(reply, group_by is not None) @syncify.convert diff --git a/weaviate/collections/aggregations/near_object.py b/weaviate/collections/aggregations/near_object.py index ab8db7a10..4cff1be04 100644 --- a/weaviate/collections/aggregations/near_object.py +++ b/weaviate/collections/aggregations/near_object.py @@ -9,6 +9,7 @@ GroupByAggregate, ) from weaviate.collections.classes.filters import _Filters +from weaviate.collections.filters import _FilterToGRPC from weaviate.types import NUMBER, UUID @@ -64,17 +65,43 @@ async def near_object( if (return_metrics is None or isinstance(return_metrics, list)) else [return_metrics] ) - builder = self._base(return_metrics, filters, total_count) - builder = self._add_groupby_to_builder(builder, group_by) - builder = self._add_near_object_to_builder( - builder, near_object, certainty, distance, object_limit, target_vector - ) - res = await self._do(builder) - return ( - self._to_aggregate_result(res, return_metrics) - if group_by is None - else self._to_group_by_result(res, return_metrics) - ) + + if isinstance(group_by, str): + group_by = GroupByAggregate(prop=group_by) + + if self._connection._weaviate_version.is_lower_than(1, 28, 4): + # use gql, remove once 1.29 is the minimum supported version + + builder = self._base(return_metrics, filters, total_count) + builder = self._add_groupby_to_builder(builder, group_by) + builder = self._add_near_object_to_builder( + builder, near_object, certainty, distance, object_limit, target_vector + ) + res = await self._do(builder) + return ( + self._to_aggregate_result(res, return_metrics) + if group_by is None + else self._to_group_by_result(res, return_metrics) + ) + else: + # use grpc + reply = await self._grpc.near_object( + near_object=near_object, + certainty=certainty, + distance=distance, + target_vector=target_vector, + aggregations=( + [metric.to_grpc() for metric in return_metrics] + if return_metrics is not None + else [] + ), + filters=_FilterToGRPC.convert(filters) if filters is not None else None, + group_by=group_by._to_grpc() if group_by is not None else None, + limit=group_by.limit if group_by is not None else None, + objects_count=total_count, + object_limit=object_limit, + ) + return self._to_result(reply, group_by is not None) @syncify.convert diff --git a/weaviate/collections/aggregations/near_text.py b/weaviate/collections/aggregations/near_text.py index 01925ccb7..a8f380217 100644 --- a/weaviate/collections/aggregations/near_text.py +++ b/weaviate/collections/aggregations/near_text.py @@ -10,6 +10,7 @@ ) from weaviate.collections.classes.filters import _Filters from weaviate.collections.classes.grpc import Move +from weaviate.collections.filters import _FilterToGRPC from weaviate.types import NUMBER @@ -71,24 +72,52 @@ async def near_text( if (return_metrics is None or isinstance(return_metrics, list)) else [return_metrics] ) - builder = self._base(return_metrics, filters, total_count) - builder = self._add_groupby_to_builder(builder, group_by) - builder = self._add_near_text_to_builder( - builder=builder, - query=query, - certainty=certainty, - distance=distance, - move_to=move_to, - move_away=move_away, - object_limit=object_limit, - target_vector=target_vector, - ) - res = await self._do(builder) - return ( - self._to_aggregate_result(res, return_metrics) - if group_by is None - else self._to_group_by_result(res, return_metrics) - ) + + if isinstance(group_by, str): + group_by = GroupByAggregate(prop=group_by) + + if self._connection._weaviate_version.is_lower_than(1, 28, 4): + # use gql, remove once 1.29 is the minimum supported version + + builder = self._base(return_metrics, filters, total_count) + builder = self._add_groupby_to_builder(builder, group_by) + builder = self._add_near_text_to_builder( + builder=builder, + query=query, + certainty=certainty, + distance=distance, + move_to=move_to, + move_away=move_away, + object_limit=object_limit, + target_vector=target_vector, + ) + res = await self._do(builder) + return ( + self._to_aggregate_result(res, return_metrics) + if group_by is None + else self._to_group_by_result(res, return_metrics) + ) + else: + # use grpc + reply = await self._grpc.near_text( + near_text=query, + certainty=certainty, + distance=distance, + move_away=move_away, + move_to=move_to, + target_vector=target_vector, + aggregations=( + [metric.to_grpc() for metric in return_metrics] + if return_metrics is not None + else [] + ), + filters=_FilterToGRPC.convert(filters) if filters is not None else None, + group_by=group_by._to_grpc() if group_by is not None else None, + limit=group_by.limit if group_by is not None else None, + objects_count=total_count, + object_limit=object_limit, + ) + return self._to_result(reply, group_by is not None) @syncify.convert diff --git a/weaviate/collections/aggregations/near_vector.py b/weaviate/collections/aggregations/near_vector.py index ef782eabb..18e5fc974 100644 --- a/weaviate/collections/aggregations/near_vector.py +++ b/weaviate/collections/aggregations/near_vector.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Union +from typing import Optional, Union from weaviate import syncify from weaviate.collections.aggregations.aggregate import _AggregateAsync @@ -9,20 +9,26 @@ GroupByAggregate, ) from weaviate.collections.classes.filters import _Filters +from weaviate.collections.classes.grpc import ( + TargetVectorJoinType, + NearVectorInputType, +) +from weaviate.collections.filters import _FilterToGRPC +from weaviate.exceptions import WeaviateUnsupportedFeatureError from weaviate.types import NUMBER class _NearVectorAsync(_AggregateAsync): async def near_vector( self, - near_vector: List[float], + near_vector: NearVectorInputType, *, certainty: Optional[NUMBER] = None, distance: Optional[NUMBER] = None, object_limit: Optional[int] = None, filters: Optional[_Filters] = None, group_by: Optional[Union[str, GroupByAggregate]] = None, - target_vector: Optional[str] = None, + target_vector: Optional[TargetVectorJoinType] = None, total_count: bool = True, return_metrics: Optional[PropertiesMetrics] = None, ) -> Union[AggregateReturn, AggregateGroupByReturn]: @@ -64,17 +70,55 @@ async def near_vector( if (return_metrics is None or isinstance(return_metrics, list)) else [return_metrics] ) - builder = self._base(return_metrics, filters, total_count) - builder = self._add_groupby_to_builder(builder, group_by) - builder = self._add_near_vector_to_builder( - builder, near_vector, certainty, distance, object_limit, target_vector - ) - res = await self._do(builder) - return ( - self._to_aggregate_result(res, return_metrics) - if group_by is None - else self._to_group_by_result(res, return_metrics) - ) + if isinstance(group_by, str): + group_by = GroupByAggregate(prop=group_by) + + if self._connection._weaviate_version.is_lower_than(1, 28, 4): + # use gql, remove once 1.29 is the minimum supported version + + if not isinstance(near_vector, list): + raise WeaviateUnsupportedFeatureError( + "A `near_vector` argument other than a list of floats", + str(self._connection._weaviate_version), + "1.29.0", + ) + if not isinstance(target_vector, str): + raise WeaviateUnsupportedFeatureError( + "A `target_vector` argument other than a string", + str(self._connection._weaviate_version), + "1.29.0", + ) + + builder = self._base(return_metrics, filters, total_count) + builder = self._add_groupby_to_builder(builder, group_by) + builder = self._add_near_vector_to_builder( + builder, near_vector, certainty, distance, object_limit, target_vector + ) + res = await self._do(builder) + return ( + self._to_aggregate_result(res, return_metrics) + if group_by is None + else self._to_group_by_result(res, return_metrics) + ) + else: + # use grpc + reply = await self._grpc.near_vector( + near_vector=near_vector, + certainty=certainty, + distance=distance, + target_vector=target_vector, + aggregations=( + [metric.to_grpc() for metric in return_metrics] + if return_metrics is not None + else [] + ), + filters=_FilterToGRPC.convert(filters) if filters is not None else None, + group_by=group_by._to_grpc() if group_by is not None else None, + limit=group_by.limit if group_by is not None else None, + objects_count=total_count, + object_limit=object_limit, + ) + return self._to_result(reply, group_by is not None) @syncify.convert diff --git a/weaviate/collections/aggregations/over_all.py b/weaviate/collections/aggregations/over_all.py index 4c03e7e38..26a9a7a9b 100644 --- a/weaviate/collections/aggregations/over_all.py +++ b/weaviate/collections/aggregations/over_all.py @@ -50,7 +50,7 @@ async def over_all( if isinstance(group_by, str): group_by = GroupByAggregate(prop=group_by) - if self._connection._weaviate_version.is_lower_than(1, 29, 0): + if self._connection._weaviate_version.is_lower_than(1, 28, 4): # use gql, remove once 1.29 is the minimum supported version builder = self._base(return_metrics, filters, total_count) builder = self._add_groupby_to_builder(builder, group_by) @@ -70,9 +70,10 @@ async def over_all( ), filters=_FilterToGRPC.convert(filters) if filters is not None else None, group_by=group_by._to_grpc() if group_by is not None else None, + limit=group_by.limit if group_by is not None else None, objects_count=total_count, ) - return self._to_result(reply) + return self._to_result(reply, group_by is not None) @syncify.convert diff --git a/weaviate/collections/batch/grpc_batch_delete.py b/weaviate/collections/batch/grpc_batch_delete.py index 279fca8fc..47d1dbab1 100644 --- a/weaviate/collections/batch/grpc_batch_delete.py +++ b/weaviate/collections/batch/grpc_batch_delete.py @@ -20,7 +20,7 @@ class _BatchDeleteGRPC(_BaseGRPC): """This class is used to delete multiple objects from Weaviate using the gRPC API.""" def __init__(self, connection: ConnectionV4, consistency_level: Optional[ConsistencyLevel]): - super().__init__(connection, consistency_level) + super().__init__(connection, consistency_level, False) async def batch_delete( self, name: str, filters: _Filters, verbose: bool, dry_run: bool, tenant: Optional[str] diff --git a/weaviate/collections/batch/grpc_batch_objects.py b/weaviate/collections/batch/grpc_batch_objects.py index 907135bdc..0453c9e60 100644 --- a/weaviate/collections/batch/grpc_batch_objects.py +++ b/weaviate/collections/batch/grpc_batch_objects.py @@ -47,7 +47,7 @@ class _BatchGRPC(_BaseGRPC): """ def __init__(self, connection: ConnectionV4, consistency_level: Optional[ConsistencyLevel]): - super().__init__(connection, consistency_level) + super().__init__(connection, consistency_level, False) def __grpc_objects(self, objects: List[_BatchObject]) -> List[batch_pb2.BatchObject]: def pack_vector(vector: Any) -> bytes: diff --git a/weaviate/collections/classes/aggregate.py b/weaviate/collections/classes/aggregate.py index c3f66cb44..324a2049e 100644 --- a/weaviate/collections/classes/aggregate.py +++ b/weaviate/collections/classes/aggregate.py @@ -576,24 +576,24 @@ def date_( ) # Aggregate references currently bugged on Weaviate's side - # def reference( - # self, - # pointing_to: bool = False, - # ) -> _MetricsReference: - # """Define the metrics to be returned for a cross-reference property when aggregating over a collection. - - # If none of the arguments are provided then all metrics will be returned. - - # Arguments: - # `pointing_to` - # Whether to include the collection names that this property references. - - # Returns: - # A `_MetricsReference` object that includes the metrics to be returned. - # """ - # if not any([pointing_to]): - # pointing_to = True - # return _MetricsReference( - # property_name=self.__property, - # pointing_to=pointing_to, - # ) + def reference( + self, + pointing_to: bool = False, + ) -> _MetricsReference: + """Define the metrics to be returned for a cross-reference property when aggregating over a collection. + + If none of the arguments are provided then all metrics will be returned. + + Arguments: + `pointing_to` + Whether to include the collection names that this property references. + + Returns: + A `_MetricsReference` object that includes the metrics to be returned. + """ + if not any([pointing_to]): + pointing_to = True + return _MetricsReference( + property_name=self.__property, + pointing_to=pointing_to, + ) diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index c053aca42..505ea3ce8 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -5,7 +5,7 @@ from pydantic import ConfigDict, Field from weaviate.collections.classes.types import _WeaviateInput -from weaviate.proto.v1 import search_get_pb2 +from weaviate.proto.v1 import base_search_pb2 from weaviate.str_enum import BaseEnum from weaviate.types import INCLUDE_VECTOR, UUID, NUMBER from weaviate.util import _ServerVersion, _get_vector_v4, _is_1d_vector @@ -269,19 +269,19 @@ class _MultiTargetVectorJoin: target_vectors: List[str] weights: Optional[Dict[str, Union[float, List[float]]]] = None - def to_grpc_target_vector(self, version: _ServerVersion) -> search_get_pb2.Targets: + def to_grpc_target_vector(self, version: _ServerVersion) -> base_search_pb2.Targets: combination = self.combination if combination == _MultiTargetVectorJoinEnum.AVERAGE: - combination_grpc = search_get_pb2.COMBINATION_METHOD_TYPE_AVERAGE + combination_grpc = base_search_pb2.COMBINATION_METHOD_TYPE_AVERAGE elif combination == _MultiTargetVectorJoinEnum.SUM: - combination_grpc = search_get_pb2.COMBINATION_METHOD_TYPE_SUM + combination_grpc = base_search_pb2.COMBINATION_METHOD_TYPE_SUM elif combination == _MultiTargetVectorJoinEnum.RELATIVE_SCORE: - combination_grpc = search_get_pb2.COMBINATION_METHOD_TYPE_RELATIVE_SCORE + combination_grpc = base_search_pb2.COMBINATION_METHOD_TYPE_RELATIVE_SCORE elif combination == _MultiTargetVectorJoinEnum.MANUAL_WEIGHTS: - combination_grpc = search_get_pb2.COMBINATION_METHOD_TYPE_MANUAL + combination_grpc = base_search_pb2.COMBINATION_METHOD_TYPE_MANUAL else: assert combination == _MultiTargetVectorJoinEnum.MINIMUM - combination_grpc = search_get_pb2.COMBINATION_METHOD_TYPE_MIN + combination_grpc = base_search_pb2.COMBINATION_METHOD_TYPE_MIN if version.is_lower_than(1, 27, 0): if self.weights is not None and any(isinstance(w, list) for w in self.weights.values()): @@ -291,28 +291,30 @@ def to_grpc_target_vector(self, version: _ServerVersion) -> search_get_pb2.Targe # mypy does not seem to understand the type narrowing right above weights_typed = cast(Optional[Dict[str, float]], self.weights) - return search_get_pb2.Targets( + return base_search_pb2.Targets( target_vectors=self.target_vectors, weights=weights_typed, combination=combination_grpc, ) else: - weights: List[search_get_pb2.WeightsForTarget] = [] + weights: List[base_search_pb2.WeightsForTarget] = [] target_vectors: List[str] = self.target_vectors if self.weights is not None: target_vectors = [] for target, weight in self.weights.items(): if isinstance(weight, list): for w in weight: - weights.append(search_get_pb2.WeightsForTarget(target=target, weight=w)) + weights.append( + base_search_pb2.WeightsForTarget(target=target, weight=w) + ) target_vectors.append(target) else: weights.append( - search_get_pb2.WeightsForTarget(target=target, weight=weight) + base_search_pb2.WeightsForTarget(target=target, weight=weight) ) target_vectors.append(target) - return search_get_pb2.Targets( + return base_search_pb2.Targets( target_vectors=target_vectors, weights_for_targets=weights, combination=combination_grpc, diff --git a/weaviate/collections/grpc/aggregate.py b/weaviate/collections/grpc/aggregate.py index bbd5e1225..4573eaf49 100644 --- a/weaviate/collections/grpc/aggregate.py +++ b/weaviate/collections/grpc/aggregate.py @@ -1,8 +1,14 @@ -from typing import List, Optional, cast +from typing import Awaitable, List, Literal, Optional, Union, cast from grpc.aio import AioRpcError # type: ignore from weaviate.collections.classes.config import ConsistencyLevel +from weaviate.collections.classes.grpc import ( + TargetVectorJoinType, + NearVectorInputType, + Move, + HybridVectorType, +) from weaviate.collections.grpc.retry import _Retry from weaviate.collections.grpc.shared import _BaseGRPC, PERMISSION_DENIED from weaviate.connect.v4 import ConnectionV4 @@ -10,8 +16,10 @@ InsufficientPermissionsError, WeaviateQueryError, WeaviateRetryError, + WeaviateInvalidInputError, ) -from weaviate.proto.v1 import aggregate_pb2, base_pb2 +from weaviate.proto.v1 import aggregate_pb2, base_pb2, base_search_pb2 +from weaviate.types import NUMBER, UUID class _AggregateGRPC(_BaseGRPC): @@ -23,31 +31,223 @@ def __init__( consistency_level: Optional[ConsistencyLevel], validate_arguments: bool, ): - super().__init__(connection, consistency_level) + super().__init__(connection, consistency_level, validate_arguments) self._name: str = name self._tenant = tenant - self._validate_arguments = validate_arguments async def objects_count(self) -> int: res = await self.__call(self.__create_request(objects_count=True)) return res.result.groups[0].objects_count - async def over_all( + def hybrid( self, *, + query: Optional[str], + alpha: Optional[float], + vector: Optional[HybridVectorType], + properties: Optional[List[str]], + distance: Optional[NUMBER] = None, + target_vector: Optional[TargetVectorJoinType], aggregations: List[aggregate_pb2.AggregateRequest.Aggregation], filters: Optional[base_pb2.Filters], group_by: Optional[aggregate_pb2.AggregateRequest.GroupBy], - objects_count: bool = False, - ) -> aggregate_pb2.AggregateReply: - return await self.__call( - self.__create_request( - aggregations=aggregations, - filters=filters, - group_by=group_by, - objects_count=objects_count, + limit: Optional[int], + object_limit: Optional[int], + objects_count: bool, + ) -> Awaitable[aggregate_pb2.AggregateReply]: + request = self.__create_request( + aggregations=aggregations, + filters=filters, + group_by=group_by, + hybrid=self._parse_hybrid( + query, + alpha, + vector, + properties, + None, + distance, + target_vector, + ), + limit=limit, + object_limit=object_limit, + objects_count=objects_count, + ) + return self.__call(request) + + def near_media( + self, + *, + media: str, + type_: Literal["audio", "depth", "image", "imu", "thermal", "video"], + certainty: Optional[NUMBER] = None, + distance: Optional[NUMBER] = None, + target_vector: Optional[TargetVectorJoinType], + aggregations: List[aggregate_pb2.AggregateRequest.Aggregation], + filters: Optional[base_pb2.Filters], + group_by: Optional[aggregate_pb2.AggregateRequest.GroupBy], + limit: Optional[int], + object_limit: Optional[int], + objects_count: bool, + ) -> Awaitable[aggregate_pb2.AggregateReply]: + if self._validate_arguments: + self.__check_vector_search_args( + certainty=certainty, + distance=distance, + object_limit=object_limit, ) + request = self.__create_request( + aggregations=aggregations, + filters=filters, + group_by=group_by, + limit=limit, + **self._parse_media( + media, + type_, + certainty, + distance, + target_vector, + ), + object_limit=object_limit, + objects_count=objects_count, ) + return self.__call(request) + + def near_object( + self, + *, + near_object: UUID, + certainty: Optional[NUMBER] = None, + distance: Optional[NUMBER] = None, + target_vector: Optional[TargetVectorJoinType], + aggregations: List[aggregate_pb2.AggregateRequest.Aggregation], + filters: Optional[base_pb2.Filters], + group_by: Optional[aggregate_pb2.AggregateRequest.GroupBy], + limit: Optional[int], + object_limit: Optional[int], + objects_count: bool, + ) -> Awaitable[aggregate_pb2.AggregateReply]: + if self._validate_arguments: + self.__check_vector_search_args( + certainty=certainty, + distance=distance, + object_limit=object_limit, + ) + request = self.__create_request( + aggregations=aggregations, + filters=filters, + group_by=group_by, + limit=limit, + near_object=self._parse_near_object(near_object, certainty, distance, target_vector), + object_limit=object_limit, + objects_count=objects_count, + ) + return self.__call(request) + + def near_text( + self, + *, + near_text: Union[List[str], str], + certainty: Optional[NUMBER], + distance: Optional[NUMBER], + move_to: Optional[Move], + move_away: Optional[Move], + target_vector: Optional[TargetVectorJoinType], + aggregations: List[aggregate_pb2.AggregateRequest.Aggregation], + filters: Optional[base_pb2.Filters], + group_by: Optional[aggregate_pb2.AggregateRequest.GroupBy], + limit: Optional[int], + object_limit: Optional[int], + objects_count: bool, + ) -> Awaitable[aggregate_pb2.AggregateReply]: + if self._validate_arguments: + self.__check_vector_search_args( + certainty=certainty, + distance=distance, + object_limit=object_limit, + ) + request = self.__create_request( + aggregations=aggregations, + filters=filters, + group_by=group_by, + limit=limit, + near_text=self._parse_near_text( + near_text, + certainty, + distance, + move_away=move_away, + move_to=move_to, + target_vector=target_vector, + ), + object_limit=object_limit, + objects_count=objects_count, + ) + return self.__call(request) + + def near_vector( + self, + *, + near_vector: NearVectorInputType, + certainty: Optional[NUMBER], + distance: Optional[NUMBER], + target_vector: Optional[TargetVectorJoinType], + aggregations: List[aggregate_pb2.AggregateRequest.Aggregation], + filters: Optional[base_pb2.Filters], + group_by: Optional[aggregate_pb2.AggregateRequest.GroupBy], + limit: Optional[int], + object_limit: Optional[int], + objects_count: bool, + ) -> Awaitable[aggregate_pb2.AggregateReply]: + if self._validate_arguments: + self.__check_vector_search_args( + certainty=certainty, + distance=distance, + object_limit=object_limit, + ) + req = self.__create_request( + aggregations=aggregations, + filters=filters, + group_by=group_by, + limit=limit, + near_vector=self._parse_near_vector( + near_vector=near_vector, + certainty=certainty, + distance=distance, + target_vector=target_vector, + ), + object_limit=object_limit, + objects_count=objects_count, + ) + return self.__call(req) + + def over_all( + self, + *, + aggregations: List[aggregate_pb2.AggregateRequest.Aggregation], + filters: Optional[base_pb2.Filters], + group_by: Optional[aggregate_pb2.AggregateRequest.GroupBy], + limit: Optional[int], + objects_count: bool = False, + ) -> Awaitable[aggregate_pb2.AggregateReply]: + req = self.__create_request( + aggregations=aggregations, + filters=filters, + group_by=group_by, + limit=limit, + objects_count=objects_count, + ) + return self.__call(req) + + def __check_vector_search_args( + self, + *, + certainty: Optional[NUMBER], + distance: Optional[NUMBER], + object_limit: Optional[int], + ) -> None: + if all([certainty is None, distance is None, object_limit is None]): + raise WeaviateInvalidInputError( + "You must provide at least one of the following arguments: certainty, distance, object_limit when vector searching" + ) def __create_request( self, @@ -55,6 +255,12 @@ def __create_request( aggregations: Optional[List[aggregate_pb2.AggregateRequest.Aggregation]] = None, filters: Optional[base_pb2.Filters] = None, group_by: Optional[aggregate_pb2.AggregateRequest.GroupBy] = None, + hybrid: Optional[base_search_pb2.Hybrid] = None, + limit: Optional[int] = None, + near_object: Optional[base_search_pb2.NearObject] = None, + near_text: Optional[base_search_pb2.NearTextSearch] = None, + near_vector: Optional[base_search_pb2.NearVector] = None, + object_limit: Optional[int] = None, objects_count: bool = False, ) -> aggregate_pb2.AggregateRequest: return aggregate_pb2.AggregateRequest( @@ -62,6 +268,12 @@ def __create_request( aggregations=aggregations, filters=filters, group_by=group_by, + hybrid=hybrid, + limit=limit, + near_object=near_object, + near_text=near_text, + near_vector=near_vector, + object_limit=object_limit, objects_count=objects_count, tenant=self._tenant, ) diff --git a/weaviate/collections/grpc/query.py b/weaviate/collections/grpc/query.py index 287576b06..d3b675b75 100644 --- a/weaviate/collections/grpc/query.py +++ b/weaviate/collections/grpc/query.py @@ -1,8 +1,6 @@ -import struct import uuid as uuid_lib from dataclasses import dataclass from typing import ( - Any, Awaitable, Dict, List, @@ -13,8 +11,6 @@ TypeVar, Union, cast, - Tuple, - get_args, ) from grpc.aio import AioRpcError # type: ignore @@ -23,12 +19,9 @@ from weaviate.collections.classes.config import ConsistencyLevel from weaviate.collections.classes.filters import _Filters from weaviate.collections.classes.grpc import ( - _MultiTargetVectorJoin, HybridFusion, _QueryReferenceMultiTarget, _MetadataQuery, - _HybridNearText, - _HybridNearVector, HybridVectorType, Move, QueryNested, @@ -53,14 +46,11 @@ from weaviate.exceptions import ( InsufficientPermissionsError, WeaviateQueryError, - WeaviateUnsupportedFeatureError, - WeaviateInvalidInputError, WeaviateRetryError, ) -from weaviate.proto.v1 import search_get_pb2 +from weaviate.proto.v1 import base_search_pb2, search_get_pb2 from weaviate.types import NUMBER, UUID -from weaviate.util import _get_vector_v4, _is_1d_vector -from weaviate.validator import _ValidateArgument, _validate_input, _ExtraTypes +from weaviate.validator import _ValidateArgument, _validate_input # Can be found in the google.protobuf.internal.well_known_types.pyi stub file but is defined explicitly here for clarity. _PyValue: TypeAlias = Union[ @@ -98,29 +88,11 @@ def __init__( validate_arguments: bool, uses_125_api: bool, ): - super().__init__(connection, consistency_level) + super().__init__(connection, consistency_level, validate_arguments) self._name: str = name self._tenant = tenant - self._validate_arguments = validate_arguments self.__uses_125_api = uses_125_api - def __parse_near_options( - self, - certainty: Optional[NUMBER] = None, - distance: Optional[NUMBER] = None, - ) -> Tuple[Optional[float], Optional[float]]: - if self._validate_arguments: - _validate_input( - [ - _ValidateArgument([float, int, None], "certainty", certainty), - _ValidateArgument([float, int, None], "distance", distance), - ] - ) - return ( - float(certainty) if certainty is not None else None, - float(distance) if distance is not None else None, - ) - def get( self, limit: Optional[int] = None, @@ -180,141 +152,6 @@ def hybrid( rerank: Optional[Rerank] = None, target_vector: Optional[TargetVectorJoinType] = None, ) -> Awaitable[search_get_pb2.SearchReply]: - if self._connection._weaviate_version.is_lower_than(1, 25, 0) and ( - isinstance(vector, _HybridNearText) or isinstance(vector, _HybridNearVector) - ): - raise WeaviateUnsupportedFeatureError( - "Hybrid search with NearText or NearVector", - str(self._connection._weaviate_version), - "1.25.0", - ) - if self._validate_arguments: - _validate_input( - [ - _ValidateArgument([None, str], "query", query), - _ValidateArgument([float, int, None], "alpha", alpha), - _ValidateArgument( - [ - List, - Dict, - _ExtraTypes.PANDAS, - _ExtraTypes.POLARS, - _ExtraTypes.NUMPY, - _ExtraTypes.TF, - _HybridNearText, - _HybridNearVector, - None, - ], - "vector", - vector, - ), - _ValidateArgument([List, None], "properties", properties), - _ValidateArgument([HybridFusion, None], "fusion_type", fusion_type), - _ValidateArgument( - [str, None, List, _MultiTargetVectorJoin], "target_vector", target_vector - ), - ] - ) - - # Set hybrid search to only query the other search-type if one of the two is not set - if query is None: - alpha = 1 - - targets, target_vectors = self.__target_vector_to_grpc(target_vector) - - near_text, near_vector, vector_bytes = None, None, None - - if vector is None: - pass - elif isinstance(vector, list) and len(vector) > 0 and isinstance(vector[0], float): - # fast path for simple vector - vector_bytes = struct.pack("{}f".format(len(vector)), *vector) - elif isinstance(vector, _HybridNearText): - near_text = search_get_pb2.NearTextSearch( - query=[vector.text] if isinstance(vector.text, str) else vector.text, - certainty=vector.certainty, - distance=vector.distance, - move_away=self.__parse_move(vector.move_away), - move_to=self.__parse_move(vector.move_to), - ) - elif isinstance(vector, _HybridNearVector): - if self._connection._weaviate_version.is_lower_than(1, 27, 0): - vector_per_target_tmp, vector_bytes_tmp = self.__vector_per_target( - vector.vector, targets, "vector" - ) - vector_for_targets_tmp = None - else: - ( - vector_for_targets_tmp, - vector_bytes_tmp, - target_vectors_tmp, - ) = self.__vector_for_target(vector.vector, targets, "vector") - vector_per_target_tmp = None - if target_vectors_tmp is not None: - targets, target_vectors = self.__recompute_target_vector_to_grpc( - target_vector, target_vectors_tmp - ) - - near_vector = search_get_pb2.NearVector( - vector_bytes=vector_bytes_tmp, - certainty=vector.certainty, - distance=vector.distance, - vector_per_target=vector_per_target_tmp, - vector_for_targets=vector_for_targets_tmp, - ) - else: - if self._connection._weaviate_version.is_lower_than(1, 27, 0): - vector_per_target_tmp, vector_bytes_tmp = self.__vector_per_target( - vector, targets, "vector" - ) - vector_for_targets_tmp = None - else: - ( - vector_for_targets_tmp, - vector_bytes_tmp, - target_vectors_tmp, - ) = self.__vector_for_target(vector, targets, "vector") - vector_per_target_tmp = None - if target_vectors_tmp is not None: - targets, target_vectors = self.__recompute_target_vector_to_grpc( - target_vector, target_vectors_tmp - ) - else: - targets, target_vectors = self.__target_vector_to_grpc(target_vector) - - if vector_per_target_tmp is not None or vector_for_targets_tmp is not None: - near_vector = search_get_pb2.NearVector( - vector_bytes=vector_bytes_tmp, - vector_per_target=vector_per_target_tmp, - vector_for_targets=vector_for_targets_tmp, - ) - else: - vector_bytes = vector_bytes_tmp - - hybrid_search = ( - search_get_pb2.Hybrid( - properties=properties, - query=query, - alpha=float(alpha) if alpha is not None else None, - fusion_type=( - cast( - search_get_pb2.Hybrid.FusionType, - search_get_pb2.Hybrid.FusionType.Value(fusion_type.value), - ) - if fusion_type is not None - else None - ), - target_vectors=target_vectors, - targets=targets, - near_text=near_text, - near_vector=near_vector, - vector_bytes=vector_bytes, - vector_distance=distance, - ) - if query is not None or vector is not None - else None - ) - request = self.__create_request( limit=limit, offset=offset, @@ -326,9 +163,16 @@ def hybrid( generative=generative, rerank=rerank, autocut=autocut, - hybrid_search=hybrid_search, + hybrid_search=self._parse_hybrid( + query, + alpha, + vector, + properties, + fusion_type, + distance, + target_vector, + ), ) - return self.__call(request) def bm25( @@ -366,7 +210,7 @@ def bm25( rerank=rerank, autocut=autocut, bm25=( - search_get_pb2.BM25( + base_search_pb2.BM25( query=query, properties=properties if properties is not None else [] ) if query is not None @@ -392,54 +236,6 @@ def near_vector( return_properties: Union[PROPERTIES, bool, None] = None, return_references: Optional[REFERENCES] = None, ) -> Awaitable[search_get_pb2.SearchReply]: - if self._validate_arguments: - _validate_input( - [ - _ValidateArgument( - [ - List, - Dict, - _ExtraTypes.PANDAS, - _ExtraTypes.POLARS, - _ExtraTypes.NUMPY, - _ExtraTypes.TF, - ], - "near_vector", - near_vector, - ), - _ValidateArgument( - [str, None, List, _MultiTargetVectorJoin], "target_vector", target_vector - ), - ] - ) - - certainty, distance = self.__parse_near_options(certainty, distance) - - targets, target_vectors = self.__target_vector_to_grpc(target_vector) - - if _is_1d_vector(near_vector) and len(near_vector) > 0: - # fast path for simple vector - near_vector_grpc: Optional[bytes] = struct.pack( - "{}f".format(len(near_vector)), *near_vector - ) - vector_per_target_tmp = None - vector_for_targets = None - else: - if self._connection._weaviate_version.is_lower_than(1, 27, 0): - vector_per_target_tmp, near_vector_grpc = self.__vector_per_target( - near_vector, targets, "near_vector" - ) - vector_for_targets = None - else: - vector_for_targets, near_vector_grpc, target_vectors_tmp = self.__vector_for_target( - near_vector, targets, "near_vector" - ) - vector_per_target_tmp = None - if target_vectors_tmp is not None: - targets, target_vectors = self.__recompute_target_vector_to_grpc( - target_vector, target_vectors_tmp - ) - request = self.__create_request( limit=limit, offset=offset, @@ -451,17 +247,10 @@ def near_vector( rerank=rerank, autocut=autocut, group_by=group_by, - near_vector=search_get_pb2.NearVector( - certainty=certainty, - distance=distance, - targets=targets, - target_vectors=target_vectors, - vector_per_target=vector_per_target_tmp, - vector_for_targets=vector_for_targets, - vector_bytes=near_vector_grpc, + near_vector=self._parse_near_vector( + near_vector, certainty, distance, target_vector=target_vector ), ) - return self.__call(request) def near_object( @@ -481,21 +270,7 @@ def near_object( return_properties: Union[PROPERTIES, bool, None] = None, return_references: Optional[REFERENCES] = None, ) -> Awaitable[search_get_pb2.SearchReply]: - if self._validate_arguments: - _validate_input( - [ - _ValidateArgument([str, uuid_lib.UUID], "near_object", near_object), - _ValidateArgument( - [str, None, List, _MultiTargetVectorJoin], "target_vector", target_vector - ), - ] - ) - - certainty, distance = self.__parse_near_options(certainty, distance) - - targets, target_vector = self.__target_vector_to_grpc(target_vector) - - base_request = self.__create_request( + request = self.__create_request( limit=limit, offset=offset, filters=filters, @@ -506,16 +281,9 @@ def near_object( rerank=rerank, autocut=autocut, group_by=group_by, - near_object=search_get_pb2.NearObject( - id=str(near_object), - certainty=certainty, - distance=distance, - target_vectors=target_vector, - targets=targets, - ), + near_object=self._parse_near_object(near_object, certainty, distance, target_vector), ) - - return self.__call(base_request) + return self.__call(request) def near_text( self, @@ -536,33 +304,6 @@ def near_text( return_properties: Union[PROPERTIES, bool, None] = None, return_references: Optional[REFERENCES] = None, ) -> Awaitable[search_get_pb2.SearchReply]: - if self._validate_arguments: - _validate_input( - [ - _ValidateArgument([List, str], "near_text", near_text), - _ValidateArgument([Move, None], "move_away", move_away), - _ValidateArgument([Move, None], "move_to", move_to), - _ValidateArgument( - [str, List, _MultiTargetVectorJoin, None], "target_vector", target_vector - ), - ] - ) - - if isinstance(near_text, str): - near_text = [near_text] - certainty, distance = self.__parse_near_options(certainty, distance) - targets, target_vector = self.__target_vector_to_grpc(target_vector) - - near_text_req = search_get_pb2.NearTextSearch( - query=near_text, - certainty=certainty, - distance=distance, - move_away=self.__parse_move(move_away), - move_to=self.__parse_move(move_to), - targets=targets, - target_vectors=target_vector, - ) - request = self.__create_request( limit=limit, offset=offset, @@ -574,9 +315,15 @@ def near_text( rerank=rerank, autocut=autocut, group_by=group_by, - near_text=near_text_req, + near_text=self._parse_near_text( + near_text, + certainty, + distance, + move_away=move_away, + move_to=move_to, + target_vector=target_vector, + ), ) - return self.__call(request) def near_media( @@ -597,72 +344,6 @@ def near_media( return_properties: Union[PROPERTIES, bool, None] = None, return_references: Optional[REFERENCES] = None, ) -> Awaitable[search_get_pb2.SearchReply]: - if self._validate_arguments: - _validate_input( - [ - _ValidateArgument([str], "media", media), - _ValidateArgument( - [str, None, List, _MultiTargetVectorJoin], "target_vector", target_vector - ), - ] - ) - - certainty, distance = self.__parse_near_options(certainty, distance) - - kwargs: Dict[str, Any] = {} - targets, target_vector = self.__target_vector_to_grpc(target_vector) - if type_ == "audio": - kwargs["near_audio"] = search_get_pb2.NearAudioSearch( - audio=media, - distance=distance, - certainty=certainty, - target_vectors=target_vector, - targets=targets, - ) - elif type_ == "depth": - kwargs["near_depth"] = search_get_pb2.NearDepthSearch( - depth=media, - distance=distance, - certainty=certainty, - target_vectors=target_vector, - targets=targets, - ) - elif type_ == "image": - kwargs["near_image"] = search_get_pb2.NearImageSearch( - image=media, - distance=distance, - certainty=certainty, - target_vectors=target_vector, - targets=targets, - ) - elif type_ == "imu": - kwargs["near_imu"] = search_get_pb2.NearIMUSearch( - imu=media, - distance=distance, - certainty=certainty, - target_vectors=target_vector, - targets=targets, - ) - elif type_ == "thermal": - kwargs["near_thermal"] = search_get_pb2.NearThermalSearch( - thermal=media, - distance=distance, - certainty=certainty, - target_vectors=target_vector, - targets=targets, - ) - elif type_ == "video": - kwargs["near_video"] = search_get_pb2.NearVideoSearch( - video=media, - distance=distance, - certainty=certainty, - target_vectors=target_vector, - targets=targets, - ) - else: - raise ValueError( - f"type_ must be one of ['audio', 'depth', 'image', 'imu', 'thermal', 'video'], but got {type_}" - ) request = self.__create_request( limit=limit, offset=offset, @@ -674,22 +355,16 @@ def near_media( rerank=rerank, autocut=autocut, group_by=group_by, - **kwargs, + **self._parse_media( + media, + type_, + certainty, + distance, + target_vector, + ), ) return self.__call(request) - @staticmethod - def __parse_move(move: Optional[Move]) -> Optional[search_get_pb2.NearTextSearch.Move]: - return ( - search_get_pb2.NearTextSearch.Move( - force=move.force, - concepts=move._concepts_list, - uuids=move._objects_list, - ) - if move is not None - else None - ) - def __create_request( self, limit: Optional[int] = None, @@ -703,18 +378,18 @@ def __create_request( rerank: Optional[Rerank] = None, autocut: Optional[int] = None, group_by: Optional[_GroupBy] = None, - near_vector: Optional[search_get_pb2.NearVector] = None, + near_vector: Optional[base_search_pb2.NearVector] = None, sort_by: Optional[Sequence[search_get_pb2.SortBy]] = None, - hybrid_search: Optional[search_get_pb2.Hybrid] = None, - bm25: Optional[search_get_pb2.BM25] = None, - near_object: Optional[search_get_pb2.NearObject] = None, - near_text: Optional[search_get_pb2.NearTextSearch] = None, - near_audio: Optional[search_get_pb2.NearAudioSearch] = None, - near_depth: Optional[search_get_pb2.NearDepthSearch] = None, - near_image: Optional[search_get_pb2.NearImageSearch] = None, - near_imu: Optional[search_get_pb2.NearIMUSearch] = None, - near_thermal: Optional[search_get_pb2.NearThermalSearch] = None, - near_video: Optional[search_get_pb2.NearVideoSearch] = None, + hybrid_search: Optional[base_search_pb2.Hybrid] = None, + bm25: Optional[base_search_pb2.BM25] = None, + near_object: Optional[base_search_pb2.NearObject] = None, + near_text: Optional[base_search_pb2.NearTextSearch] = None, + near_audio: Optional[base_search_pb2.NearAudioSearch] = None, + near_depth: Optional[base_search_pb2.NearDepthSearch] = None, + near_image: Optional[base_search_pb2.NearImageSearch] = None, + near_imu: Optional[base_search_pb2.NearIMUSearch] = None, + near_thermal: Optional[base_search_pb2.NearThermalSearch] = None, + near_video: Optional[base_search_pb2.NearVideoSearch] = None, ) -> search_get_pb2.SearchRequest: if self._validate_arguments: _validate_input( @@ -906,151 +581,3 @@ def __convert_to_set(args: Union[A, Sequence[A]]) -> Set[A]: return set(args) else: return {cast(A, args)} - - def __recompute_target_vector_to_grpc( - self, target_vector: Optional[TargetVectorJoinType], target_vectors_tmp: List[str] - ) -> Tuple[Optional[search_get_pb2.Targets], Optional[List[str]]]: - # reorder input for targets so they match the vectors - if isinstance(target_vector, _MultiTargetVectorJoin): - target_vector.target_vectors = target_vectors_tmp - if target_vector.weights is not None: - target_vector.weights = { - target: target_vector.weights[target] for target in target_vectors_tmp - } - else: - target_vector = target_vectors_tmp - return self.__target_vector_to_grpc(target_vector) - - def __target_vector_to_grpc( - self, target_vector: Optional[TargetVectorJoinType] - ) -> Tuple[Optional[search_get_pb2.Targets], Optional[List[str]]]: - if target_vector is None: - return None, None - - if self._connection._weaviate_version.is_lower_than(1, 26, 0): - if isinstance(target_vector, str): - return None, [target_vector] - elif isinstance(target_vector, list) and len(target_vector) == 1: - return None, target_vector - else: - raise WeaviateUnsupportedFeatureError( - "Multiple target vectors in search", - str(self._connection._weaviate_version), - "1.26.0", - ) - - if isinstance(target_vector, str): - return search_get_pb2.Targets(target_vectors=[target_vector]), None - elif isinstance(target_vector, list): - return search_get_pb2.Targets(target_vectors=target_vector), None - else: - return target_vector.to_grpc_target_vector(self._connection._weaviate_version), None - - @staticmethod - def __vector_per_target( - vector: NearVectorInputType, targets: Optional[search_get_pb2.Targets], argument_name: str - ) -> Tuple[Optional[Dict[str, bytes]], Optional[bytes]]: - invalid_nv_exception = WeaviateInvalidInputError( - f"""{argument_name} argument can be: - - a list of numbers - - a dictionary with target names as keys and lists of numbers as values - received: {vector}""" - ) - if isinstance(vector, dict): - if targets is None or len(targets.target_vectors) != len(vector): - raise WeaviateInvalidInputError( - "The number of target vectors must be equal to the number of vectors." - ) - - vector_per_target: Dict[str, bytes] = {} - for key, value in vector.items(): - nv = _get_vector_v4(value) - - if ( - not isinstance(nv, list) - or len(nv) == 0 - or not isinstance(nv[0], get_args(NUMBER)) - ): - raise invalid_nv_exception - - vector_per_target[key] = struct.pack("{}f".format(len(nv)), *nv) - - return vector_per_target, None - else: - if len(vector) == 0: - raise invalid_nv_exception - - if _is_1d_vector(vector): - near_vector = _get_vector_v4(vector) - if not isinstance(near_vector, list): - raise invalid_nv_exception - return None, struct.pack("{}f".format(len(near_vector)), *near_vector) - else: - raise WeaviateInvalidInputError( - """Providing lists of lists has been deprecated. Please provide a dictionary with target names as - keys and lists of numbers as values.""" - ) - - @staticmethod - def __vector_for_target( - vector: NearVectorInputType, targets: Optional[search_get_pb2.Targets], argument_name: str - ) -> Tuple[ - Optional[List[search_get_pb2.VectorForTarget]], Optional[bytes], Optional[List[str]] - ]: - invalid_nv_exception = WeaviateInvalidInputError( - f"""{argument_name} argument can be: - - a list of numbers - - a dictionary with target names as keys and lists of numbers as values for multi target search. The keys must match the given target vectors - received: {vector} and {targets}.""" - ) - - vector_for_target: List[search_get_pb2.VectorForTarget] = [] - - def add_vector(val: List[float], target_name: str) -> None: - vec = _get_vector_v4(val) - - if ( - not isinstance(vec, list) - or len(vec) == 0 - or not isinstance(vec[0], get_args(NUMBER)) - ): - raise invalid_nv_exception - - vector_for_target.append( - search_get_pb2.VectorForTarget( - name=target_name, vector_bytes=struct.pack("{}f".format(len(vec)), *vec) - ) - ) - - if isinstance(vector, dict): - if ( - len(vector) == 0 - or targets is None - or len(set(targets.target_vectors)) != len(vector) - ): - raise invalid_nv_exception - target_vectors_tmp: List[str] = [] - for key, value in vector.items(): - # typing tools do not understand the type narrowing here - if _is_1d_vector(value): - val: List[float] = cast(List[float], value) - add_vector(val, key) - target_vectors_tmp.append(key) - else: - vals: List[List[float]] = cast(List[List[float]], value) - for inner_vector in vals: - add_vector(inner_vector, key) - target_vectors_tmp.append(key) - - return vector_for_target, None, target_vectors_tmp - else: - if _is_1d_vector(vector): - near_vector = _get_vector_v4(vector) - if not isinstance(near_vector, list): - raise invalid_nv_exception - return None, struct.pack("{}f".format(len(near_vector)), *near_vector), None - else: - raise WeaviateInvalidInputError( - """Providing lists of lists has been deprecated. Please provide a dictionary with target names as - keys and lists of numbers as values.""" - ) diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index d1d8f1775..61b8dc499 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -1,8 +1,37 @@ -from typing import Optional +import struct +import uuid as uuid_lib +from typing import ( + Any, + Dict, + List, + Literal, + Optional, + Union, + cast, + Tuple, + get_args, +) from weaviate.collections.classes.config import ConsistencyLevel +from weaviate.collections.classes.grpc import ( + _MultiTargetVectorJoin, + _HybridNearText, + _HybridNearVector, + HybridFusion, + HybridVectorType, + Move, + TargetVectorJoinType, + NearVectorInputType, +) from weaviate.connect import ConnectionV4 -from weaviate.proto.v1 import base_pb2 +from weaviate.exceptions import ( + WeaviateUnsupportedFeatureError, + WeaviateInvalidInputError, +) +from weaviate.proto.v1 import base_search_pb2, base_pb2 +from weaviate.types import NUMBER, UUID +from weaviate.util import _get_vector_v4, _is_1d_vector +from weaviate.validator import _ValidateArgument, _validate_input, _ExtraTypes PERMISSION_DENIED = "PERMISSION_DENIED" @@ -12,9 +41,11 @@ def __init__( self, connection: ConnectionV4, consistency_level: Optional[ConsistencyLevel], + validate_arguments: bool, ): self._connection = connection self._consistency_level = self._get_consistency_level(consistency_level) + self._validate_arguments = validate_arguments @staticmethod def _get_consistency_level( @@ -30,3 +61,530 @@ def _get_consistency_level( else: assert consistency_level.value == ConsistencyLevel.ALL return base_pb2.ConsistencyLevel.CONSISTENCY_LEVEL_ALL + + def _recompute_target_vector_to_grpc( + self, target_vector: Optional[TargetVectorJoinType], target_vectors_tmp: List[str] + ) -> Tuple[Optional[base_search_pb2.Targets], Optional[List[str]]]: + # reorder input for targets so they match the vectors + if isinstance(target_vector, _MultiTargetVectorJoin): + target_vector.target_vectors = target_vectors_tmp + if target_vector.weights is not None: + target_vector.weights = { + target: target_vector.weights[target] for target in target_vectors_tmp + } + else: + target_vector = target_vectors_tmp + return self.__target_vector_to_grpc(target_vector) + + def __target_vector_to_grpc( + self, target_vector: Optional[TargetVectorJoinType] + ) -> Tuple[Optional[base_search_pb2.Targets], Optional[List[str]]]: + if target_vector is None: + return None, None + + if self._connection._weaviate_version.is_lower_than(1, 26, 0): + if isinstance(target_vector, str): + return None, [target_vector] + elif isinstance(target_vector, list) and len(target_vector) == 1: + return None, target_vector + else: + raise WeaviateUnsupportedFeatureError( + "Multiple target vectors in search", + str(self._connection._weaviate_version), + "1.26.0", + ) + + if isinstance(target_vector, str): + return base_search_pb2.Targets(target_vectors=[target_vector]), None + elif isinstance(target_vector, list): + return base_search_pb2.Targets(target_vectors=target_vector), None + else: + return target_vector.to_grpc_target_vector(self._connection._weaviate_version), None + + @staticmethod + def _vector_per_target( + vector: NearVectorInputType, targets: Optional[base_search_pb2.Targets], argument_name: str + ) -> Tuple[Optional[Dict[str, bytes]], Optional[bytes]]: + invalid_nv_exception = WeaviateInvalidInputError( + f"""{argument_name} argument can be: + - a list of numbers + - a dictionary with target names as keys and lists of numbers as values + received: {vector}""" + ) + if isinstance(vector, dict): + if targets is None or len(targets.target_vectors) != len(vector): + raise WeaviateInvalidInputError( + "The number of target vectors must be equal to the number of vectors." + ) + + vector_per_target: Dict[str, bytes] = {} + for key, value in vector.items(): + nv = _get_vector_v4(value) + + if ( + not isinstance(nv, list) + or len(nv) == 0 + or not isinstance(nv[0], get_args(NUMBER)) + ): + raise invalid_nv_exception + + vector_per_target[key] = struct.pack("{}f".format(len(nv)), *nv) + + return vector_per_target, None + else: + if len(vector) == 0: + raise invalid_nv_exception + + if _is_1d_vector(vector): + near_vector = _get_vector_v4(vector) + if not isinstance(near_vector, list): + raise invalid_nv_exception + return None, struct.pack("{}f".format(len(near_vector)), *near_vector) + else: + raise WeaviateInvalidInputError( + """Providing lists of lists has been deprecated. Please provide a dictionary with target names as + keys and lists of numbers as values.""" + ) + + @staticmethod + def _vector_for_target( + vector: NearVectorInputType, targets: Optional[base_search_pb2.Targets], argument_name: str + ) -> Tuple[ + Optional[List[base_search_pb2.VectorForTarget]], Optional[bytes], Optional[List[str]] + ]: + invalid_nv_exception = WeaviateInvalidInputError( + f"""{argument_name} argument can be: + - a list of numbers + - a dictionary with target names as keys and lists of numbers as values for multi target search. The keys must match the given target vectors + received: {vector} and {targets}.""" + ) + + vector_for_target: List[base_search_pb2.VectorForTarget] = [] + + def add_vector(val: List[float], target_name: str) -> None: + vec = _get_vector_v4(val) + + if ( + not isinstance(vec, list) + or len(vec) == 0 + or not isinstance(vec[0], get_args(NUMBER)) + ): + raise invalid_nv_exception + + vector_for_target.append( + base_search_pb2.VectorForTarget( + name=target_name, vector_bytes=struct.pack("{}f".format(len(vec)), *vec) + ) + ) + + if isinstance(vector, dict): + if ( + len(vector) == 0 + or targets is None + or len(set(targets.target_vectors)) != len(vector) + ): + raise invalid_nv_exception + target_vectors_tmp: List[str] = [] + for key, value in vector.items(): + # typing tools do not understand the type narrowing here + if _is_1d_vector(value): + val: List[float] = cast(List[float], value) + add_vector(val, key) + target_vectors_tmp.append(key) + else: + vals: List[List[float]] = cast(List[List[float]], value) + for inner_vector in vals: + add_vector(inner_vector, key) + target_vectors_tmp.append(key) + + return vector_for_target, None, target_vectors_tmp + else: + if _is_1d_vector(vector): + near_vector = _get_vector_v4(vector) + if not isinstance(near_vector, list): + raise invalid_nv_exception + return None, struct.pack("{}f".format(len(near_vector)), *near_vector), None + else: + raise WeaviateInvalidInputError( + """Providing lists of lists has been deprecated. Please provide a dictionary with target names as + keys and lists of numbers as values.""" + ) + + def _parse_near_options( + self, + certainty: Optional[NUMBER] = None, + distance: Optional[NUMBER] = None, + ) -> Tuple[Optional[float], Optional[float]]: + if self._validate_arguments: + _validate_input( + [ + _ValidateArgument([float, int, None], "certainty", certainty), + _ValidateArgument([float, int, None], "distance", distance), + ] + ) + return ( + float(certainty) if certainty is not None else None, + float(distance) if distance is not None else None, + ) + + def _parse_near_vector( + self, + near_vector: NearVectorInputType, + certainty: Optional[NUMBER], + distance: Optional[NUMBER], + target_vector: Optional[TargetVectorJoinType], + ) -> base_search_pb2.NearVector: + if self._validate_arguments: + _validate_input( + [ + _ValidateArgument( + [ + List, + Dict, + _ExtraTypes.PANDAS, + _ExtraTypes.POLARS, + _ExtraTypes.NUMPY, + _ExtraTypes.TF, + ], + "near_vector", + near_vector, + ), + _ValidateArgument( + [str, None, List, _MultiTargetVectorJoin], "target_vector", target_vector + ), + ] + ) + + certainty, distance = self._parse_near_options(certainty, distance) + + targets, target_vectors = self.__target_vector_to_grpc(target_vector) + + if _is_1d_vector(near_vector) and len(near_vector) > 0: + # fast path for simple vector + near_vector_grpc: Optional[bytes] = struct.pack( + "{}f".format(len(near_vector)), *near_vector + ) + vector_per_target_tmp = None + vector_for_targets = None + else: + if self._connection._weaviate_version.is_lower_than(1, 27, 0): + vector_per_target_tmp, near_vector_grpc = self._vector_per_target( + near_vector, targets, "near_vector" + ) + vector_for_targets = None + else: + vector_for_targets, near_vector_grpc, target_vectors_tmp = self._vector_for_target( + near_vector, targets, "near_vector" + ) + vector_per_target_tmp = None + if target_vectors_tmp is not None: + targets, target_vectors = self._recompute_target_vector_to_grpc( + target_vector, target_vectors_tmp + ) + return base_search_pb2.NearVector( + vector_bytes=near_vector_grpc, + certainty=certainty, + distance=distance, + targets=targets, + target_vectors=target_vectors, + vector_per_target=vector_per_target_tmp, + vector_for_targets=vector_for_targets, + ) + + @staticmethod + def __parse_move(move: Optional[Move]) -> Optional[base_search_pb2.NearTextSearch.Move]: + return ( + base_search_pb2.NearTextSearch.Move( + force=move.force, + concepts=move._concepts_list, + uuids=move._objects_list, + ) + if move is not None + else None + ) + + def _parse_near_text( + self, + near_text: Union[List[str], str], + certainty: Optional[NUMBER], + distance: Optional[NUMBER], + move_to: Optional[Move], + move_away: Optional[Move], + target_vector: Optional[TargetVectorJoinType], + ) -> base_search_pb2.NearTextSearch: + if self._validate_arguments: + _validate_input( + [ + _ValidateArgument([List, str], "near_text", near_text), + _ValidateArgument([Move, None], "move_away", move_away), + _ValidateArgument([Move, None], "move_to", move_to), + _ValidateArgument( + [str, List, _MultiTargetVectorJoin, None], "target_vector", target_vector + ), + ] + ) + + if isinstance(near_text, str): + near_text = [near_text] + certainty, distance = self._parse_near_options(certainty, distance) + targets, target_vector = self.__target_vector_to_grpc(target_vector) + + return base_search_pb2.NearTextSearch( + query=near_text, + certainty=certainty, + distance=distance, + move_away=self.__parse_move(move_away), + move_to=self.__parse_move(move_to), + targets=targets, + target_vectors=target_vector, + ) + + def _parse_near_object( + self, + near_object: UUID, + certainty: Optional[NUMBER], + distance: Optional[NUMBER], + target_vector: Optional[TargetVectorJoinType], + ) -> base_search_pb2.NearObject: + if self._validate_arguments: + _validate_input( + [ + _ValidateArgument([str, uuid_lib.UUID], "near_object", near_object), + _ValidateArgument( + [str, None, List, _MultiTargetVectorJoin], "target_vector", target_vector + ), + ] + ) + + certainty, distance = self._parse_near_options(certainty, distance) + + targets, target_vector = self.__target_vector_to_grpc(target_vector) + + return base_search_pb2.NearObject( + id=str(near_object), + certainty=certainty, + distance=distance, + targets=targets, + target_vectors=target_vector, + ) + + def _parse_media( + self, + media: str, + type_: Literal["audio", "depth", "image", "imu", "thermal", "video"], + certainty: Optional[NUMBER], + distance: Optional[NUMBER], + target_vector: Optional[TargetVectorJoinType], + ) -> dict: + if self._validate_arguments: + _validate_input( + [ + _ValidateArgument([str], "media", media), + _ValidateArgument( + [str, None, List, _MultiTargetVectorJoin], "target_vector", target_vector + ), + ] + ) + + certainty, distance = self._parse_near_options(certainty, distance) + + kwargs: Dict[str, Any] = {} + targets, target_vector = self.__target_vector_to_grpc(target_vector) + if type_ == "audio": + kwargs["near_audio"] = base_search_pb2.NearAudioSearch( + audio=media, + distance=distance, + certainty=certainty, + target_vectors=target_vector, + targets=targets, + ) + elif type_ == "depth": + kwargs["near_depth"] = base_search_pb2.NearDepthSearch( + depth=media, + distance=distance, + certainty=certainty, + target_vectors=target_vector, + targets=targets, + ) + elif type_ == "image": + kwargs["near_image"] = base_search_pb2.NearImageSearch( + image=media, + distance=distance, + certainty=certainty, + target_vectors=target_vector, + targets=targets, + ) + elif type_ == "imu": + kwargs["near_imu"] = base_search_pb2.NearIMUSearch( + imu=media, + distance=distance, + certainty=certainty, + target_vectors=target_vector, + targets=targets, + ) + elif type_ == "thermal": + kwargs["near_thermal"] = base_search_pb2.NearThermalSearch( + thermal=media, + distance=distance, + certainty=certainty, + target_vectors=target_vector, + targets=targets, + ) + elif type_ == "video": + kwargs["near_video"] = base_search_pb2.NearVideoSearch( + video=media, + distance=distance, + certainty=certainty, + target_vectors=target_vector, + targets=targets, + ) + else: + raise ValueError( + f"type_ must be one of ['audio', 'depth', 'image', 'imu', 'thermal', 'video'], but got {type_}" + ) + return kwargs + + def _parse_hybrid( + self, + query: Optional[str], + alpha: Optional[float], + vector: Optional[HybridVectorType], + properties: Optional[List[str]], + fusion_type: Optional[HybridFusion], + distance: Optional[NUMBER], + target_vector: Optional[TargetVectorJoinType], + ) -> Union[base_search_pb2.Hybrid, None]: + if self._connection._weaviate_version.is_lower_than(1, 25, 0) and ( + isinstance(vector, _HybridNearText) or isinstance(vector, _HybridNearVector) + ): + raise WeaviateUnsupportedFeatureError( + "Hybrid search with NearText or NearVector", + str(self._connection._weaviate_version), + "1.25.0", + ) + if self._validate_arguments: + _validate_input( + [ + _ValidateArgument([None, str], "query", query), + _ValidateArgument([float, int, None], "alpha", alpha), + _ValidateArgument( + [ + List, + Dict, + _ExtraTypes.PANDAS, + _ExtraTypes.POLARS, + _ExtraTypes.NUMPY, + _ExtraTypes.TF, + _HybridNearText, + _HybridNearVector, + None, + ], + "vector", + vector, + ), + _ValidateArgument([List, None], "properties", properties), + _ValidateArgument([HybridFusion, None], "fusion_type", fusion_type), + _ValidateArgument( + [str, None, List, _MultiTargetVectorJoin], "target_vector", target_vector + ), + ] + ) + + # Set hybrid search to only query the other search-type if one of the two is not set + if query is None: + alpha = 1 + + targets, target_vectors = self.__target_vector_to_grpc(target_vector) + + near_text, near_vector, vector_bytes = None, None, None + + if vector is None: + pass + elif isinstance(vector, list) and len(vector) > 0 and isinstance(vector[0], float): + # fast path for simple vector + vector_bytes = struct.pack("{}f".format(len(vector)), *vector) + elif isinstance(vector, _HybridNearText): + near_text = base_search_pb2.NearTextSearch( + query=[vector.text] if isinstance(vector.text, str) else vector.text, + certainty=vector.certainty, + distance=vector.distance, + move_away=self.__parse_move(vector.move_away), + move_to=self.__parse_move(vector.move_to), + ) + elif isinstance(vector, _HybridNearVector): + if self._connection._weaviate_version.is_lower_than(1, 27, 0): + vector_per_target_tmp, vector_bytes_tmp = self._vector_per_target( + vector.vector, targets, "vector" + ) + vector_for_targets_tmp = None + else: + ( + vector_for_targets_tmp, + vector_bytes_tmp, + target_vectors_tmp, + ) = self._vector_for_target(vector.vector, targets, "vector") + vector_per_target_tmp = None + if target_vectors_tmp is not None: + targets, target_vectors = self._recompute_target_vector_to_grpc( + target_vector, target_vectors_tmp + ) + + near_vector = base_search_pb2.NearVector( + vector_bytes=vector_bytes_tmp, + certainty=vector.certainty, + distance=vector.distance, + vector_per_target=vector_per_target_tmp, + vector_for_targets=vector_for_targets_tmp, + ) + else: + if self._connection._weaviate_version.is_lower_than(1, 27, 0): + vector_per_target_tmp, vector_bytes_tmp = self._vector_per_target( + vector, targets, "vector" + ) + vector_for_targets_tmp = None + else: + ( + vector_for_targets_tmp, + vector_bytes_tmp, + target_vectors_tmp, + ) = self._vector_for_target(vector, targets, "vector") + vector_per_target_tmp = None + if target_vectors_tmp is not None: + targets, target_vectors = self._recompute_target_vector_to_grpc( + target_vector, target_vectors_tmp + ) + else: + targets, target_vectors = self.__target_vector_to_grpc(target_vector) + + if vector_per_target_tmp is not None or vector_for_targets_tmp is not None: + near_vector = base_search_pb2.NearVector( + vector_bytes=vector_bytes_tmp, + vector_per_target=vector_per_target_tmp, + vector_for_targets=vector_for_targets_tmp, + ) + else: + vector_bytes = vector_bytes_tmp + + return ( + base_search_pb2.Hybrid( + properties=properties, + query=query, + alpha=float(alpha) if alpha is not None else None, + fusion_type=( + cast( + base_search_pb2.Hybrid.FusionType, + base_search_pb2.Hybrid.FusionType.Value(fusion_type.value), + ) + if fusion_type is not None + else None + ), + target_vectors=target_vectors, + targets=targets, + near_text=near_text, + near_vector=near_vector, + vector_bytes=vector_bytes, + vector_distance=distance, + ) + if query is not None or vector is not None + else None + ) diff --git a/weaviate/collections/grpc/tenants.py b/weaviate/collections/grpc/tenants.py index d160bfc12..2bb77927b 100644 --- a/weaviate/collections/grpc/tenants.py +++ b/weaviate/collections/grpc/tenants.py @@ -18,7 +18,7 @@ def __init__( name: str, consistency_level: Optional[ConsistencyLevel], ): - super().__init__(connection, consistency_level) + super().__init__(connection, consistency_level, False) self._name: str = name async def get(self, names: Optional[Sequence[str]]) -> tenants_pb2.TenantsGetReply: diff --git a/weaviate/proto/v1/aggregate_pb2.py b/weaviate/proto/v1/aggregate_pb2.py index 627806fbf..5c8a81d04 100644 --- a/weaviate/proto/v1/aggregate_pb2.py +++ b/weaviate/proto/v1/aggregate_pb2.py @@ -19,10 +19,11 @@ from weaviate.proto.v1 import base_pb2 as v1_dot_base__pb2 +from weaviate.proto.v1 import base_search_pb2 as v1_dot_base__search__pb2 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x12v1/aggregate.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto"\xed\x0b\n\x10\x41ggregateRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12\x15\n\robjects_count\x18\x14 \x01(\x08\x12?\n\x0c\x61ggregations\x18\x15 \x03(\x0b\x32).weaviate.v1.AggregateRequest.Aggregation\x12\x19\n\x0cobject_limit\x18\x1e \x01(\rH\x00\x88\x01\x01\x12<\n\x08group_by\x18\x1f \x01(\x0b\x32%.weaviate.v1.AggregateRequest.GroupByH\x01\x88\x01\x01\x12\x12\n\x05limit\x18 \x01(\rH\x02\x88\x01\x01\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x03\x88\x01\x01\x1a\xde\x08\n\x0b\x41ggregation\x12\x10\n\x08property\x18\x01 \x01(\t\x12@\n\x03int\x18\x02 \x01(\x0b\x32\x31.weaviate.v1.AggregateRequest.Aggregation.IntegerH\x00\x12\x42\n\x06number\x18\x03 \x01(\x0b\x32\x30.weaviate.v1.AggregateRequest.Aggregation.NumberH\x00\x12>\n\x04text\x18\x04 \x01(\x0b\x32..weaviate.v1.AggregateRequest.Aggregation.TextH\x00\x12\x44\n\x07\x62oolean\x18\x05 \x01(\x0b\x32\x31.weaviate.v1.AggregateRequest.Aggregation.BooleanH\x00\x12>\n\x04\x64\x61te\x18\x06 \x01(\x0b\x32..weaviate.v1.AggregateRequest.Aggregation.DateH\x00\x12H\n\treference\x18\x07 \x01(\x0b\x32\x33.weaviate.v1.AggregateRequest.Aggregation.ReferenceH\x00\x1a\x81\x01\n\x07Integer\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0b\n\x03sum\x18\x03 \x01(\x08\x12\x0c\n\x04mean\x18\x04 \x01(\x08\x12\x0c\n\x04mode\x18\x05 \x01(\x08\x12\x0e\n\x06median\x18\x06 \x01(\x08\x12\x0f\n\x07maximum\x18\x07 \x01(\x08\x12\x0f\n\x07minimum\x18\x08 \x01(\x08\x1a\x80\x01\n\x06Number\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0b\n\x03sum\x18\x03 \x01(\x08\x12\x0c\n\x04mean\x18\x04 \x01(\x08\x12\x0c\n\x04mode\x18\x05 \x01(\x08\x12\x0e\n\x06median\x18\x06 \x01(\x08\x12\x0f\n\x07maximum\x18\x07 \x01(\x08\x12\x0f\n\x07minimum\x18\x08 \x01(\x08\x1aw\n\x04Text\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x16\n\x0etop_occurences\x18\x03 \x01(\x08\x12!\n\x14top_occurences_limit\x18\x04 \x01(\rH\x00\x88\x01\x01\x42\x17\n\x15_top_occurences_limit\x1a\x82\x01\n\x07\x42oolean\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x12\n\ntotal_true\x18\x03 \x01(\x08\x12\x13\n\x0btotal_false\x18\x04 \x01(\x08\x12\x17\n\x0fpercentage_true\x18\x05 \x01(\x08\x12\x18\n\x10percentage_false\x18\x06 \x01(\x08\x1a\x63\n\x04\x44\x61te\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0e\n\x06median\x18\x03 \x01(\x08\x12\x0c\n\x04mode\x18\x04 \x01(\x08\x12\x0f\n\x07maximum\x18\x05 \x01(\x08\x12\x0f\n\x07minimum\x18\x06 \x01(\x08\x1a.\n\tReference\x12\x0c\n\x04type\x18\x01 \x01(\x08\x12\x13\n\x0bpointing_to\x18\x02 \x01(\x08\x42\r\n\x0b\x61ggregation\x1a/\n\x07GroupBy\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x10\n\x08property\x18\x02 \x01(\tB\x0f\n\r_object_limitB\x0b\n\t_group_byB\x08\n\x06_limitB\n\n\x08_filters"\x89\x01\n\x0e\x41ggregateReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12\x32\n\x06result\x18\x02 \x01(\x0b\x32".weaviate.v1.AggregateReply.Result\x1a\x35\n\x06Result\x12+\n\x06groups\x18\x01 \x03(\x0b\x32\x1b.weaviate.v1.AggregateGroup"\xed\x13\n\x0e\x41ggregateGroup\x12\x1a\n\robjects_count\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x43\n\x0c\x61ggregations\x18\x02 \x01(\x0b\x32(.weaviate.v1.AggregateGroup.AggregationsH\x01\x88\x01\x01\x12>\n\ngrouped_by\x18\x03 \x01(\x0b\x32%.weaviate.v1.AggregateGroup.GroupedByH\x02\x88\x01\x01\x1a\xc0\x0f\n\x0c\x41ggregations\x12J\n\x0c\x61ggregations\x18\x01 \x03(\x0b\x32\x34.weaviate.v1.AggregateGroup.Aggregations.Aggregation\x1a\xe3\x0e\n\x0b\x41ggregation\x12\x10\n\x08property\x18\x01 \x01(\t\x12K\n\x03int\x18\x02 \x01(\x0b\x32<.weaviate.v1.AggregateGroup.Aggregations.Aggregation.IntegerH\x00\x12M\n\x06number\x18\x03 \x01(\x0b\x32;.weaviate.v1.AggregateGroup.Aggregations.Aggregation.NumberH\x00\x12I\n\x04text\x18\x04 \x01(\x0b\x32\x39.weaviate.v1.AggregateGroup.Aggregations.Aggregation.TextH\x00\x12O\n\x07\x62oolean\x18\x05 \x01(\x0b\x32<.weaviate.v1.AggregateGroup.Aggregations.Aggregation.BooleanH\x00\x12I\n\x04\x64\x61te\x18\x06 \x01(\x0b\x32\x39.weaviate.v1.AggregateGroup.Aggregations.Aggregation.DateH\x00\x12S\n\treference\x18\x07 \x01(\x0b\x32>.weaviate.v1.AggregateGroup.Aggregations.Aggregation.ReferenceH\x00\x1a\xf9\x01\n\x07Integer\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04mean\x18\x03 \x01(\x01H\x02\x88\x01\x01\x12\x13\n\x06median\x18\x04 \x01(\x01H\x03\x88\x01\x01\x12\x11\n\x04mode\x18\x05 \x01(\x03H\x04\x88\x01\x01\x12\x14\n\x07maximum\x18\x06 \x01(\x03H\x05\x88\x01\x01\x12\x14\n\x07minimum\x18\x07 \x01(\x03H\x06\x88\x01\x01\x12\x10\n\x03sum\x18\x08 \x01(\x03H\x07\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\x07\n\x05_meanB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimumB\x06\n\x04_sum\x1a\xf8\x01\n\x06Number\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04mean\x18\x03 \x01(\x01H\x02\x88\x01\x01\x12\x13\n\x06median\x18\x04 \x01(\x01H\x03\x88\x01\x01\x12\x11\n\x04mode\x18\x05 \x01(\x01H\x04\x88\x01\x01\x12\x14\n\x07maximum\x18\x06 \x01(\x01H\x05\x88\x01\x01\x12\x14\n\x07minimum\x18\x07 \x01(\x01H\x06\x88\x01\x01\x12\x10\n\x03sum\x18\x08 \x01(\x01H\x07\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\x07\n\x05_meanB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimumB\x06\n\x04_sum\x1a\xe4\x02\n\x04Text\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x65\n\x0etop_occurences\x18\x03 \x01(\x0b\x32H.weaviate.v1.AggregateGroup.Aggregations.Aggregation.Text.TopOccurrencesH\x02\x88\x01\x01\x1a\xa7\x01\n\x0eTopOccurrences\x12\x65\n\x05items\x18\x01 \x03(\x0b\x32V.weaviate.v1.AggregateGroup.Aggregations.Aggregation.Text.TopOccurrences.TopOccurrence\x1a.\n\rTopOccurrence\x12\r\n\x05value\x18\x01 \x01(\t\x12\x0e\n\x06occurs\x18\x02 \x01(\x03\x42\x08\n\x06_countB\x07\n\x05_typeB\x11\n\x0f_top_occurences\x1a\xfb\x01\n\x07\x42oolean\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ntotal_true\x18\x03 \x01(\x03H\x02\x88\x01\x01\x12\x18\n\x0btotal_false\x18\x04 \x01(\x03H\x03\x88\x01\x01\x12\x1c\n\x0fpercentage_true\x18\x05 \x01(\x01H\x04\x88\x01\x01\x12\x1d\n\x10percentage_false\x18\x06 \x01(\x01H\x05\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\r\n\x0b_total_trueB\x0e\n\x0c_total_falseB\x12\n\x10_percentage_trueB\x13\n\x11_percentage_false\x1a\xc0\x01\n\x04\x44\x61te\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x13\n\x06median\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x11\n\x04mode\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x14\n\x07maximum\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x14\n\x07minimum\x18\x06 \x01(\tH\x05\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimum\x1a<\n\tReference\x12\x11\n\x04type\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x0bpointing_to\x18\x02 \x03(\tB\x07\n\x05_typeB\r\n\x0b\x61ggregation\x1a\xc4\x02\n\tGroupedBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x0e\n\x04text\x18\x02 \x01(\tH\x00\x12\r\n\x03int\x18\x03 \x01(\x03H\x00\x12\x11\n\x07\x62oolean\x18\x04 \x01(\x08H\x00\x12\x10\n\x06number\x18\x05 \x01(\x01H\x00\x12\'\n\x05texts\x18\x06 \x01(\x0b\x32\x16.weaviate.v1.TextArrayH\x00\x12%\n\x04ints\x18\x07 \x01(\x0b\x32\x15.weaviate.v1.IntArrayH\x00\x12-\n\x08\x62ooleans\x18\x08 \x01(\x0b\x32\x19.weaviate.v1.BooleanArrayH\x00\x12+\n\x07numbers\x18\t \x01(\x0b\x32\x18.weaviate.v1.NumberArrayH\x00\x12\x30\n\x03geo\x18\n \x01(\x0b\x32!.weaviate.v1.GeoCoordinatesFilterH\x00\x42\x07\n\x05valueB\x10\n\x0e_objects_countB\x0f\n\r_aggregationsB\r\n\x0b_grouped_byBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoAggregateZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' + b'\n\x12v1/aggregate.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto"\xe8\x0f\n\x10\x41ggregateRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12\x15\n\robjects_count\x18\x14 \x01(\x08\x12?\n\x0c\x61ggregations\x18\x15 \x03(\x0b\x32).weaviate.v1.AggregateRequest.Aggregation\x12\x19\n\x0cobject_limit\x18\x1e \x01(\rH\x01\x88\x01\x01\x12<\n\x08group_by\x18\x1f \x01(\x0b\x32%.weaviate.v1.AggregateRequest.GroupByH\x02\x88\x01\x01\x12\x12\n\x05limit\x18 \x01(\rH\x03\x88\x01\x01\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12%\n\x06hybrid\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x00\x12.\n\x0bnear_vector\x18* \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x00\x12.\n\x0bnear_object\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x00\x12\x30\n\tnear_text\x18, \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\x00\x12\x32\n\nnear_image\x18- \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\x00\x12\x32\n\nnear_audio\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x00\x12\x32\n\nnear_video\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x00\x12\x32\n\nnear_depth\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\x00\x12\x36\n\x0cnear_thermal\x18\x31 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x00\x12.\n\x08near_imu\x18\x32 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x00\x1a\xde\x08\n\x0b\x41ggregation\x12\x10\n\x08property\x18\x01 \x01(\t\x12@\n\x03int\x18\x02 \x01(\x0b\x32\x31.weaviate.v1.AggregateRequest.Aggregation.IntegerH\x00\x12\x42\n\x06number\x18\x03 \x01(\x0b\x32\x30.weaviate.v1.AggregateRequest.Aggregation.NumberH\x00\x12>\n\x04text\x18\x04 \x01(\x0b\x32..weaviate.v1.AggregateRequest.Aggregation.TextH\x00\x12\x44\n\x07\x62oolean\x18\x05 \x01(\x0b\x32\x31.weaviate.v1.AggregateRequest.Aggregation.BooleanH\x00\x12>\n\x04\x64\x61te\x18\x06 \x01(\x0b\x32..weaviate.v1.AggregateRequest.Aggregation.DateH\x00\x12H\n\treference\x18\x07 \x01(\x0b\x32\x33.weaviate.v1.AggregateRequest.Aggregation.ReferenceH\x00\x1a\x81\x01\n\x07Integer\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0b\n\x03sum\x18\x03 \x01(\x08\x12\x0c\n\x04mean\x18\x04 \x01(\x08\x12\x0c\n\x04mode\x18\x05 \x01(\x08\x12\x0e\n\x06median\x18\x06 \x01(\x08\x12\x0f\n\x07maximum\x18\x07 \x01(\x08\x12\x0f\n\x07minimum\x18\x08 \x01(\x08\x1a\x80\x01\n\x06Number\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0b\n\x03sum\x18\x03 \x01(\x08\x12\x0c\n\x04mean\x18\x04 \x01(\x08\x12\x0c\n\x04mode\x18\x05 \x01(\x08\x12\x0e\n\x06median\x18\x06 \x01(\x08\x12\x0f\n\x07maximum\x18\x07 \x01(\x08\x12\x0f\n\x07minimum\x18\x08 \x01(\x08\x1aw\n\x04Text\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x16\n\x0etop_occurences\x18\x03 \x01(\x08\x12!\n\x14top_occurences_limit\x18\x04 \x01(\rH\x00\x88\x01\x01\x42\x17\n\x15_top_occurences_limit\x1a\x82\x01\n\x07\x42oolean\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x12\n\ntotal_true\x18\x03 \x01(\x08\x12\x13\n\x0btotal_false\x18\x04 \x01(\x08\x12\x17\n\x0fpercentage_true\x18\x05 \x01(\x08\x12\x18\n\x10percentage_false\x18\x06 \x01(\x08\x1a\x63\n\x04\x44\x61te\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0e\n\x06median\x18\x03 \x01(\x08\x12\x0c\n\x04mode\x18\x04 \x01(\x08\x12\x0f\n\x07maximum\x18\x05 \x01(\x08\x12\x0f\n\x07minimum\x18\x06 \x01(\x08\x1a.\n\tReference\x12\x0c\n\x04type\x18\x01 \x01(\x08\x12\x13\n\x0bpointing_to\x18\x02 \x01(\x08\x42\r\n\x0b\x61ggregation\x1a/\n\x07GroupBy\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x10\n\x08property\x18\x02 \x01(\tB\x08\n\x06searchB\x0f\n\r_object_limitB\x0b\n\t_group_byB\x08\n\x06_limitB\n\n\x08_filters"\x89\x01\n\x0e\x41ggregateReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12\x32\n\x06result\x18\x02 \x01(\x0b\x32".weaviate.v1.AggregateReply.Result\x1a\x35\n\x06Result\x12+\n\x06groups\x18\x01 \x03(\x0b\x32\x1b.weaviate.v1.AggregateGroup"\xed\x13\n\x0e\x41ggregateGroup\x12\x1a\n\robjects_count\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x43\n\x0c\x61ggregations\x18\x02 \x01(\x0b\x32(.weaviate.v1.AggregateGroup.AggregationsH\x01\x88\x01\x01\x12>\n\ngrouped_by\x18\x03 \x01(\x0b\x32%.weaviate.v1.AggregateGroup.GroupedByH\x02\x88\x01\x01\x1a\xc0\x0f\n\x0c\x41ggregations\x12J\n\x0c\x61ggregations\x18\x01 \x03(\x0b\x32\x34.weaviate.v1.AggregateGroup.Aggregations.Aggregation\x1a\xe3\x0e\n\x0b\x41ggregation\x12\x10\n\x08property\x18\x01 \x01(\t\x12K\n\x03int\x18\x02 \x01(\x0b\x32<.weaviate.v1.AggregateGroup.Aggregations.Aggregation.IntegerH\x00\x12M\n\x06number\x18\x03 \x01(\x0b\x32;.weaviate.v1.AggregateGroup.Aggregations.Aggregation.NumberH\x00\x12I\n\x04text\x18\x04 \x01(\x0b\x32\x39.weaviate.v1.AggregateGroup.Aggregations.Aggregation.TextH\x00\x12O\n\x07\x62oolean\x18\x05 \x01(\x0b\x32<.weaviate.v1.AggregateGroup.Aggregations.Aggregation.BooleanH\x00\x12I\n\x04\x64\x61te\x18\x06 \x01(\x0b\x32\x39.weaviate.v1.AggregateGroup.Aggregations.Aggregation.DateH\x00\x12S\n\treference\x18\x07 \x01(\x0b\x32>.weaviate.v1.AggregateGroup.Aggregations.Aggregation.ReferenceH\x00\x1a\xf9\x01\n\x07Integer\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04mean\x18\x03 \x01(\x01H\x02\x88\x01\x01\x12\x13\n\x06median\x18\x04 \x01(\x01H\x03\x88\x01\x01\x12\x11\n\x04mode\x18\x05 \x01(\x03H\x04\x88\x01\x01\x12\x14\n\x07maximum\x18\x06 \x01(\x03H\x05\x88\x01\x01\x12\x14\n\x07minimum\x18\x07 \x01(\x03H\x06\x88\x01\x01\x12\x10\n\x03sum\x18\x08 \x01(\x03H\x07\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\x07\n\x05_meanB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimumB\x06\n\x04_sum\x1a\xf8\x01\n\x06Number\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04mean\x18\x03 \x01(\x01H\x02\x88\x01\x01\x12\x13\n\x06median\x18\x04 \x01(\x01H\x03\x88\x01\x01\x12\x11\n\x04mode\x18\x05 \x01(\x01H\x04\x88\x01\x01\x12\x14\n\x07maximum\x18\x06 \x01(\x01H\x05\x88\x01\x01\x12\x14\n\x07minimum\x18\x07 \x01(\x01H\x06\x88\x01\x01\x12\x10\n\x03sum\x18\x08 \x01(\x01H\x07\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\x07\n\x05_meanB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimumB\x06\n\x04_sum\x1a\xe4\x02\n\x04Text\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x65\n\x0etop_occurences\x18\x03 \x01(\x0b\x32H.weaviate.v1.AggregateGroup.Aggregations.Aggregation.Text.TopOccurrencesH\x02\x88\x01\x01\x1a\xa7\x01\n\x0eTopOccurrences\x12\x65\n\x05items\x18\x01 \x03(\x0b\x32V.weaviate.v1.AggregateGroup.Aggregations.Aggregation.Text.TopOccurrences.TopOccurrence\x1a.\n\rTopOccurrence\x12\r\n\x05value\x18\x01 \x01(\t\x12\x0e\n\x06occurs\x18\x02 \x01(\x03\x42\x08\n\x06_countB\x07\n\x05_typeB\x11\n\x0f_top_occurences\x1a\xfb\x01\n\x07\x42oolean\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ntotal_true\x18\x03 \x01(\x03H\x02\x88\x01\x01\x12\x18\n\x0btotal_false\x18\x04 \x01(\x03H\x03\x88\x01\x01\x12\x1c\n\x0fpercentage_true\x18\x05 \x01(\x01H\x04\x88\x01\x01\x12\x1d\n\x10percentage_false\x18\x06 \x01(\x01H\x05\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\r\n\x0b_total_trueB\x0e\n\x0c_total_falseB\x12\n\x10_percentage_trueB\x13\n\x11_percentage_false\x1a\xc0\x01\n\x04\x44\x61te\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x13\n\x06median\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x11\n\x04mode\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x14\n\x07maximum\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x14\n\x07minimum\x18\x06 \x01(\tH\x05\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimum\x1a<\n\tReference\x12\x11\n\x04type\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x0bpointing_to\x18\x02 \x03(\tB\x07\n\x05_typeB\r\n\x0b\x61ggregation\x1a\xc4\x02\n\tGroupedBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x0e\n\x04text\x18\x02 \x01(\tH\x00\x12\r\n\x03int\x18\x03 \x01(\x03H\x00\x12\x11\n\x07\x62oolean\x18\x04 \x01(\x08H\x00\x12\x10\n\x06number\x18\x05 \x01(\x01H\x00\x12\'\n\x05texts\x18\x06 \x01(\x0b\x32\x16.weaviate.v1.TextArrayH\x00\x12%\n\x04ints\x18\x07 \x01(\x0b\x32\x15.weaviate.v1.IntArrayH\x00\x12-\n\x08\x62ooleans\x18\x08 \x01(\x0b\x32\x19.weaviate.v1.BooleanArrayH\x00\x12+\n\x07numbers\x18\t \x01(\x0b\x32\x18.weaviate.v1.NumberArrayH\x00\x12\x30\n\x03geo\x18\n \x01(\x0b\x32!.weaviate.v1.GeoCoordinatesFilterH\x00\x42\x07\n\x05valueB\x10\n\x0e_objects_countB\x0f\n\r_aggregationsB\r\n\x0b_grouped_byBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoAggregateZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' ) _globals = globals() @@ -33,56 +34,56 @@ _globals["DESCRIPTOR"]._serialized_options = ( b"\n#io.weaviate.client.grpc.protocol.v1B\026WeaviateProtoAggregateZ4github.com/weaviate/weaviate/grpc/generated;protocol" ) - _globals["_AGGREGATEREQUEST"]._serialized_start = 51 - _globals["_AGGREGATEREQUEST"]._serialized_end = 1568 - _globals["_AGGREGATEREQUEST_AGGREGATION"]._serialized_start = 349 - _globals["_AGGREGATEREQUEST_AGGREGATION"]._serialized_end = 1467 - _globals["_AGGREGATEREQUEST_AGGREGATION_INTEGER"]._serialized_start = 789 - _globals["_AGGREGATEREQUEST_AGGREGATION_INTEGER"]._serialized_end = 918 - _globals["_AGGREGATEREQUEST_AGGREGATION_NUMBER"]._serialized_start = 921 - _globals["_AGGREGATEREQUEST_AGGREGATION_NUMBER"]._serialized_end = 1049 - _globals["_AGGREGATEREQUEST_AGGREGATION_TEXT"]._serialized_start = 1051 - _globals["_AGGREGATEREQUEST_AGGREGATION_TEXT"]._serialized_end = 1170 - _globals["_AGGREGATEREQUEST_AGGREGATION_BOOLEAN"]._serialized_start = 1173 - _globals["_AGGREGATEREQUEST_AGGREGATION_BOOLEAN"]._serialized_end = 1303 - _globals["_AGGREGATEREQUEST_AGGREGATION_DATE"]._serialized_start = 1305 - _globals["_AGGREGATEREQUEST_AGGREGATION_DATE"]._serialized_end = 1404 - _globals["_AGGREGATEREQUEST_AGGREGATION_REFERENCE"]._serialized_start = 1406 - _globals["_AGGREGATEREQUEST_AGGREGATION_REFERENCE"]._serialized_end = 1452 - _globals["_AGGREGATEREQUEST_GROUPBY"]._serialized_start = 1469 - _globals["_AGGREGATEREQUEST_GROUPBY"]._serialized_end = 1516 - _globals["_AGGREGATEREPLY"]._serialized_start = 1571 - _globals["_AGGREGATEREPLY"]._serialized_end = 1708 - _globals["_AGGREGATEREPLY_RESULT"]._serialized_start = 1655 - _globals["_AGGREGATEREPLY_RESULT"]._serialized_end = 1708 - _globals["_AGGREGATEGROUP"]._serialized_start = 1711 - _globals["_AGGREGATEGROUP"]._serialized_end = 4252 - _globals["_AGGREGATEGROUP_AGGREGATIONS"]._serialized_start = 1891 - _globals["_AGGREGATEGROUP_AGGREGATIONS"]._serialized_end = 3875 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION"]._serialized_start = 1984 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION"]._serialized_end = 3875 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_INTEGER"]._serialized_start = 2490 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_INTEGER"]._serialized_end = 2739 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_NUMBER"]._serialized_start = 2742 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_NUMBER"]._serialized_end = 2990 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT"]._serialized_start = 2993 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT"]._serialized_end = 3349 + _globals["_AGGREGATEREQUEST"]._serialized_start = 73 + _globals["_AGGREGATEREQUEST"]._serialized_end = 2097 + _globals["_AGGREGATEREQUEST_AGGREGATION"]._serialized_start = 868 + _globals["_AGGREGATEREQUEST_AGGREGATION"]._serialized_end = 1986 + _globals["_AGGREGATEREQUEST_AGGREGATION_INTEGER"]._serialized_start = 1308 + _globals["_AGGREGATEREQUEST_AGGREGATION_INTEGER"]._serialized_end = 1437 + _globals["_AGGREGATEREQUEST_AGGREGATION_NUMBER"]._serialized_start = 1440 + _globals["_AGGREGATEREQUEST_AGGREGATION_NUMBER"]._serialized_end = 1568 + _globals["_AGGREGATEREQUEST_AGGREGATION_TEXT"]._serialized_start = 1570 + _globals["_AGGREGATEREQUEST_AGGREGATION_TEXT"]._serialized_end = 1689 + _globals["_AGGREGATEREQUEST_AGGREGATION_BOOLEAN"]._serialized_start = 1692 + _globals["_AGGREGATEREQUEST_AGGREGATION_BOOLEAN"]._serialized_end = 1822 + _globals["_AGGREGATEREQUEST_AGGREGATION_DATE"]._serialized_start = 1824 + _globals["_AGGREGATEREQUEST_AGGREGATION_DATE"]._serialized_end = 1923 + _globals["_AGGREGATEREQUEST_AGGREGATION_REFERENCE"]._serialized_start = 1925 + _globals["_AGGREGATEREQUEST_AGGREGATION_REFERENCE"]._serialized_end = 1971 + _globals["_AGGREGATEREQUEST_GROUPBY"]._serialized_start = 1988 + _globals["_AGGREGATEREQUEST_GROUPBY"]._serialized_end = 2035 + _globals["_AGGREGATEREPLY"]._serialized_start = 2100 + _globals["_AGGREGATEREPLY"]._serialized_end = 2237 + _globals["_AGGREGATEREPLY_RESULT"]._serialized_start = 2184 + _globals["_AGGREGATEREPLY_RESULT"]._serialized_end = 2237 + _globals["_AGGREGATEGROUP"]._serialized_start = 2240 + _globals["_AGGREGATEGROUP"]._serialized_end = 4781 + _globals["_AGGREGATEGROUP_AGGREGATIONS"]._serialized_start = 2420 + _globals["_AGGREGATEGROUP_AGGREGATIONS"]._serialized_end = 4404 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION"]._serialized_start = 2513 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION"]._serialized_end = 4404 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_INTEGER"]._serialized_start = 3019 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_INTEGER"]._serialized_end = 3268 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_NUMBER"]._serialized_start = 3271 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_NUMBER"]._serialized_end = 3519 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT"]._serialized_start = 3522 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT"]._serialized_end = 3878 _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES"]._serialized_start = ( - 3144 + 3673 ) - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES"]._serialized_end = 3311 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES"]._serialized_end = 3840 _globals[ "_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES_TOPOCCURRENCE" - ]._serialized_start = 3265 + ]._serialized_start = 3794 _globals[ "_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES_TOPOCCURRENCE" - ]._serialized_end = 3311 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_BOOLEAN"]._serialized_start = 3352 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_BOOLEAN"]._serialized_end = 3603 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_DATE"]._serialized_start = 3606 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_DATE"]._serialized_end = 3798 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_REFERENCE"]._serialized_start = 3800 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_REFERENCE"]._serialized_end = 3860 - _globals["_AGGREGATEGROUP_GROUPEDBY"]._serialized_start = 3878 - _globals["_AGGREGATEGROUP_GROUPEDBY"]._serialized_end = 4202 + ]._serialized_end = 3840 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_BOOLEAN"]._serialized_start = 3881 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_BOOLEAN"]._serialized_end = 4132 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_DATE"]._serialized_start = 4135 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_DATE"]._serialized_end = 4327 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_REFERENCE"]._serialized_start = 4329 + _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_REFERENCE"]._serialized_end = 4389 + _globals["_AGGREGATEGROUP_GROUPEDBY"]._serialized_start = 4407 + _globals["_AGGREGATEGROUP_GROUPEDBY"]._serialized_end = 4731 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/aggregate_pb2.pyi b/weaviate/proto/v1/aggregate_pb2.pyi index 4d2b10726..fcadbcc75 100644 --- a/weaviate/proto/v1/aggregate_pb2.pyi +++ b/weaviate/proto/v1/aggregate_pb2.pyi @@ -1,4 +1,5 @@ from weaviate.proto.v1 import base_pb2 as _base_pb2 +from weaviate.proto.v1 import base_search_pb2 as _base_search_pb2 from google.protobuf.internal import containers as _containers from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message @@ -22,6 +23,16 @@ class AggregateRequest(_message.Message): "group_by", "limit", "filters", + "hybrid", + "near_vector", + "near_object", + "near_text", + "near_image", + "near_audio", + "near_video", + "near_depth", + "near_thermal", + "near_imu", ) class Aggregation(_message.Message): @@ -211,6 +222,16 @@ class AggregateRequest(_message.Message): GROUP_BY_FIELD_NUMBER: _ClassVar[int] LIMIT_FIELD_NUMBER: _ClassVar[int] FILTERS_FIELD_NUMBER: _ClassVar[int] + HYBRID_FIELD_NUMBER: _ClassVar[int] + NEAR_VECTOR_FIELD_NUMBER: _ClassVar[int] + NEAR_OBJECT_FIELD_NUMBER: _ClassVar[int] + NEAR_TEXT_FIELD_NUMBER: _ClassVar[int] + NEAR_IMAGE_FIELD_NUMBER: _ClassVar[int] + NEAR_AUDIO_FIELD_NUMBER: _ClassVar[int] + NEAR_VIDEO_FIELD_NUMBER: _ClassVar[int] + NEAR_DEPTH_FIELD_NUMBER: _ClassVar[int] + NEAR_THERMAL_FIELD_NUMBER: _ClassVar[int] + NEAR_IMU_FIELD_NUMBER: _ClassVar[int] collection: str tenant: str objects_count: bool @@ -219,6 +240,16 @@ class AggregateRequest(_message.Message): group_by: AggregateRequest.GroupBy limit: int filters: _base_pb2.Filters + hybrid: _base_search_pb2.Hybrid + near_vector: _base_search_pb2.NearVector + near_object: _base_search_pb2.NearObject + near_text: _base_search_pb2.NearTextSearch + near_image: _base_search_pb2.NearImageSearch + near_audio: _base_search_pb2.NearAudioSearch + near_video: _base_search_pb2.NearVideoSearch + near_depth: _base_search_pb2.NearDepthSearch + near_thermal: _base_search_pb2.NearThermalSearch + near_imu: _base_search_pb2.NearIMUSearch def __init__( self, collection: _Optional[str] = ..., @@ -229,6 +260,16 @@ class AggregateRequest(_message.Message): group_by: _Optional[_Union[AggregateRequest.GroupBy, _Mapping]] = ..., limit: _Optional[int] = ..., filters: _Optional[_Union[_base_pb2.Filters, _Mapping]] = ..., + hybrid: _Optional[_Union[_base_search_pb2.Hybrid, _Mapping]] = ..., + near_vector: _Optional[_Union[_base_search_pb2.NearVector, _Mapping]] = ..., + near_object: _Optional[_Union[_base_search_pb2.NearObject, _Mapping]] = ..., + near_text: _Optional[_Union[_base_search_pb2.NearTextSearch, _Mapping]] = ..., + near_image: _Optional[_Union[_base_search_pb2.NearImageSearch, _Mapping]] = ..., + near_audio: _Optional[_Union[_base_search_pb2.NearAudioSearch, _Mapping]] = ..., + near_video: _Optional[_Union[_base_search_pb2.NearVideoSearch, _Mapping]] = ..., + near_depth: _Optional[_Union[_base_search_pb2.NearDepthSearch, _Mapping]] = ..., + near_thermal: _Optional[_Union[_base_search_pb2.NearThermalSearch, _Mapping]] = ..., + near_imu: _Optional[_Union[_base_search_pb2.NearIMUSearch, _Mapping]] = ..., ) -> None: ... class AggregateReply(_message.Message): diff --git a/weaviate/proto/v1/base_search_pb2.py b/weaviate/proto/v1/base_search_pb2.py new file mode 100644 index 000000000..da0341806 --- /dev/null +++ b/weaviate/proto/v1/base_search_pb2.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE +# source: v1/base_search.proto +# Protobuf Python Version: 5.27.2 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder + +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, 5, 27, 2, "", "v1/base_search.proto" +) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02"\xfa\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12\x36\n\x07weights\x18\x03 \x03(\x0b\x32!.weaviate.v1.Targets.WeightsEntryB\x02\x18\x01\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTarget\x1a.\n\x0cWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01"5\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cvector_bytes\x18\x02 \x01(\x0c"\xc5\x03\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\r\n\x05\x61lpha\x18\x04 \x01(\x02\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x14\n\x0cvector_bytes\x18\x06 \x01(\x0c\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthreshold"\x82\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x14\n\x0cvector_bytes\x18\x04 \x01(\x0c\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance")\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t*\xee\x01\n\x11\x43ombinationMethod\x12"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' +) + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "v1.base_search_pb2", _globals) +if not _descriptor._USE_C_DESCRIPTORS: + _globals["DESCRIPTOR"]._loaded_options = None + _globals["DESCRIPTOR"]._serialized_options = ( + b"\n#io.weaviate.client.grpc.protocol.v1B\027WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocol" + ) + _globals["_TARGETS_WEIGHTSENTRY"]._loaded_options = None + _globals["_TARGETS_WEIGHTSENTRY"]._serialized_options = b"8\001" + _globals["_TARGETS"].fields_by_name["weights"]._loaded_options = None + _globals["_TARGETS"].fields_by_name["weights"]._serialized_options = b"\030\001" + _globals["_HYBRID"].fields_by_name["vector"]._loaded_options = None + _globals["_HYBRID"].fields_by_name["vector"]._serialized_options = b"\030\001" + _globals["_HYBRID"].fields_by_name["target_vectors"]._loaded_options = None + _globals["_HYBRID"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" + _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._loaded_options = None + _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_options = b"8\001" + _globals["_NEARVECTOR"].fields_by_name["vector"]._loaded_options = None + _globals["_NEARVECTOR"].fields_by_name["vector"]._serialized_options = b"\030\001" + _globals["_NEARVECTOR"].fields_by_name["target_vectors"]._loaded_options = None + _globals["_NEARVECTOR"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" + _globals["_NEARVECTOR"].fields_by_name["vector_per_target"]._loaded_options = None + _globals["_NEARVECTOR"].fields_by_name["vector_per_target"]._serialized_options = b"\030\001" + _globals["_NEAROBJECT"].fields_by_name["target_vectors"]._loaded_options = None + _globals["_NEAROBJECT"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" + _globals["_NEARTEXTSEARCH"].fields_by_name["target_vectors"]._loaded_options = None + _globals["_NEARTEXTSEARCH"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" + _globals["_NEARIMAGESEARCH"].fields_by_name["target_vectors"]._loaded_options = None + _globals["_NEARIMAGESEARCH"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" + _globals["_NEARAUDIOSEARCH"].fields_by_name["target_vectors"]._loaded_options = None + _globals["_NEARAUDIOSEARCH"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" + _globals["_NEARVIDEOSEARCH"].fields_by_name["target_vectors"]._loaded_options = None + _globals["_NEARVIDEOSEARCH"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" + _globals["_NEARDEPTHSEARCH"].fields_by_name["target_vectors"]._loaded_options = None + _globals["_NEARDEPTHSEARCH"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" + _globals["_NEARTHERMALSEARCH"].fields_by_name["target_vectors"]._loaded_options = None + _globals["_NEARTHERMALSEARCH"].fields_by_name[ + "target_vectors" + ]._serialized_options = b"\030\001" + _globals["_NEARIMUSEARCH"].fields_by_name["target_vectors"]._loaded_options = None + _globals["_NEARIMUSEARCH"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" + _globals["_COMBINATIONMETHOD"]._serialized_start = 2881 + _globals["_COMBINATIONMETHOD"]._serialized_end = 3119 + _globals["_WEIGHTSFORTARGET"]._serialized_start = 37 + _globals["_WEIGHTSFORTARGET"]._serialized_end = 87 + _globals["_TARGETS"]._serialized_start = 90 + _globals["_TARGETS"]._serialized_end = 340 + _globals["_TARGETS_WEIGHTSENTRY"]._serialized_start = 294 + _globals["_TARGETS_WEIGHTSENTRY"]._serialized_end = 340 + _globals["_VECTORFORTARGET"]._serialized_start = 342 + _globals["_VECTORFORTARGET"]._serialized_end = 395 + _globals["_HYBRID"]._serialized_start = 398 + _globals["_HYBRID"]._serialized_end = 851 + _globals["_HYBRID_FUSIONTYPE"]._serialized_start = 741 + _globals["_HYBRID_FUSIONTYPE"]._serialized_end = 838 + _globals["_NEARVECTOR"]._serialized_start = 854 + _globals["_NEARVECTOR"]._serialized_end = 1240 + _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_start = 1159 + _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_end = 1213 + _globals["_NEAROBJECT"]._serialized_start = 1243 + _globals["_NEAROBJECT"]._serialized_end = 1408 + _globals["_NEARTEXTSEARCH"]._serialized_start = 1411 + _globals["_NEARTEXTSEARCH"]._serialized_end = 1779 + _globals["_NEARTEXTSEARCH_MOVE"]._serialized_start = 1672 + _globals["_NEARTEXTSEARCH_MOVE"]._serialized_end = 1726 + _globals["_NEARIMAGESEARCH"]._serialized_start = 1782 + _globals["_NEARIMAGESEARCH"]._serialized_end = 1955 + _globals["_NEARAUDIOSEARCH"]._serialized_start = 1958 + _globals["_NEARAUDIOSEARCH"]._serialized_end = 2131 + _globals["_NEARVIDEOSEARCH"]._serialized_start = 2134 + _globals["_NEARVIDEOSEARCH"]._serialized_end = 2307 + _globals["_NEARDEPTHSEARCH"]._serialized_start = 2310 + _globals["_NEARDEPTHSEARCH"]._serialized_end = 2483 + _globals["_NEARTHERMALSEARCH"]._serialized_start = 2486 + _globals["_NEARTHERMALSEARCH"]._serialized_end = 2663 + _globals["_NEARIMUSEARCH"]._serialized_start = 2666 + _globals["_NEARIMUSEARCH"]._serialized_end = 2835 + _globals["_BM25"]._serialized_start = 2837 + _globals["_BM25"]._serialized_end = 2878 +# @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/base_search_pb2.pyi b/weaviate/proto/v1/base_search_pb2.pyi new file mode 100644 index 000000000..1760f1485 --- /dev/null +++ b/weaviate/proto/v1/base_search_pb2.pyi @@ -0,0 +1,391 @@ +from google.protobuf.internal import containers as _containers +from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from typing import ( + ClassVar as _ClassVar, + Iterable as _Iterable, + Mapping as _Mapping, + Optional as _Optional, + Union as _Union, +) + +DESCRIPTOR: _descriptor.FileDescriptor + +class CombinationMethod(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + COMBINATION_METHOD_UNSPECIFIED: _ClassVar[CombinationMethod] + COMBINATION_METHOD_TYPE_SUM: _ClassVar[CombinationMethod] + COMBINATION_METHOD_TYPE_MIN: _ClassVar[CombinationMethod] + COMBINATION_METHOD_TYPE_AVERAGE: _ClassVar[CombinationMethod] + COMBINATION_METHOD_TYPE_RELATIVE_SCORE: _ClassVar[CombinationMethod] + COMBINATION_METHOD_TYPE_MANUAL: _ClassVar[CombinationMethod] + +COMBINATION_METHOD_UNSPECIFIED: CombinationMethod +COMBINATION_METHOD_TYPE_SUM: CombinationMethod +COMBINATION_METHOD_TYPE_MIN: CombinationMethod +COMBINATION_METHOD_TYPE_AVERAGE: CombinationMethod +COMBINATION_METHOD_TYPE_RELATIVE_SCORE: CombinationMethod +COMBINATION_METHOD_TYPE_MANUAL: CombinationMethod + +class WeightsForTarget(_message.Message): + __slots__ = ("target", "weight") + TARGET_FIELD_NUMBER: _ClassVar[int] + WEIGHT_FIELD_NUMBER: _ClassVar[int] + target: str + weight: float + def __init__(self, target: _Optional[str] = ..., weight: _Optional[float] = ...) -> None: ... + +class Targets(_message.Message): + __slots__ = ("target_vectors", "combination", "weights", "weights_for_targets") + + class WeightsEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: float + def __init__(self, key: _Optional[str] = ..., value: _Optional[float] = ...) -> None: ... + + TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] + COMBINATION_FIELD_NUMBER: _ClassVar[int] + WEIGHTS_FIELD_NUMBER: _ClassVar[int] + WEIGHTS_FOR_TARGETS_FIELD_NUMBER: _ClassVar[int] + target_vectors: _containers.RepeatedScalarFieldContainer[str] + combination: CombinationMethod + weights: _containers.ScalarMap[str, float] + weights_for_targets: _containers.RepeatedCompositeFieldContainer[WeightsForTarget] + def __init__( + self, + target_vectors: _Optional[_Iterable[str]] = ..., + combination: _Optional[_Union[CombinationMethod, str]] = ..., + weights: _Optional[_Mapping[str, float]] = ..., + weights_for_targets: _Optional[_Iterable[_Union[WeightsForTarget, _Mapping]]] = ..., + ) -> None: ... + +class VectorForTarget(_message.Message): + __slots__ = ("name", "vector_bytes") + NAME_FIELD_NUMBER: _ClassVar[int] + VECTOR_BYTES_FIELD_NUMBER: _ClassVar[int] + name: str + vector_bytes: bytes + def __init__( + self, name: _Optional[str] = ..., vector_bytes: _Optional[bytes] = ... + ) -> None: ... + +class Hybrid(_message.Message): + __slots__ = ( + "query", + "properties", + "vector", + "alpha", + "fusion_type", + "vector_bytes", + "target_vectors", + "near_text", + "near_vector", + "targets", + "vector_distance", + ) + + class FusionType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + FUSION_TYPE_UNSPECIFIED: _ClassVar[Hybrid.FusionType] + FUSION_TYPE_RANKED: _ClassVar[Hybrid.FusionType] + FUSION_TYPE_RELATIVE_SCORE: _ClassVar[Hybrid.FusionType] + + FUSION_TYPE_UNSPECIFIED: Hybrid.FusionType + FUSION_TYPE_RANKED: Hybrid.FusionType + FUSION_TYPE_RELATIVE_SCORE: Hybrid.FusionType + QUERY_FIELD_NUMBER: _ClassVar[int] + PROPERTIES_FIELD_NUMBER: _ClassVar[int] + VECTOR_FIELD_NUMBER: _ClassVar[int] + ALPHA_FIELD_NUMBER: _ClassVar[int] + FUSION_TYPE_FIELD_NUMBER: _ClassVar[int] + VECTOR_BYTES_FIELD_NUMBER: _ClassVar[int] + TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] + NEAR_TEXT_FIELD_NUMBER: _ClassVar[int] + NEAR_VECTOR_FIELD_NUMBER: _ClassVar[int] + TARGETS_FIELD_NUMBER: _ClassVar[int] + VECTOR_DISTANCE_FIELD_NUMBER: _ClassVar[int] + query: str + properties: _containers.RepeatedScalarFieldContainer[str] + vector: _containers.RepeatedScalarFieldContainer[float] + alpha: float + fusion_type: Hybrid.FusionType + vector_bytes: bytes + target_vectors: _containers.RepeatedScalarFieldContainer[str] + near_text: NearTextSearch + near_vector: NearVector + targets: Targets + vector_distance: float + def __init__( + self, + query: _Optional[str] = ..., + properties: _Optional[_Iterable[str]] = ..., + vector: _Optional[_Iterable[float]] = ..., + alpha: _Optional[float] = ..., + fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., + vector_bytes: _Optional[bytes] = ..., + target_vectors: _Optional[_Iterable[str]] = ..., + near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., + near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., + targets: _Optional[_Union[Targets, _Mapping]] = ..., + vector_distance: _Optional[float] = ..., + ) -> None: ... + +class NearVector(_message.Message): + __slots__ = ( + "vector", + "certainty", + "distance", + "vector_bytes", + "target_vectors", + "targets", + "vector_per_target", + "vector_for_targets", + ) + + class VectorPerTargetEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: bytes + def __init__(self, key: _Optional[str] = ..., value: _Optional[bytes] = ...) -> None: ... + + VECTOR_FIELD_NUMBER: _ClassVar[int] + CERTAINTY_FIELD_NUMBER: _ClassVar[int] + DISTANCE_FIELD_NUMBER: _ClassVar[int] + VECTOR_BYTES_FIELD_NUMBER: _ClassVar[int] + TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] + TARGETS_FIELD_NUMBER: _ClassVar[int] + VECTOR_PER_TARGET_FIELD_NUMBER: _ClassVar[int] + VECTOR_FOR_TARGETS_FIELD_NUMBER: _ClassVar[int] + vector: _containers.RepeatedScalarFieldContainer[float] + certainty: float + distance: float + vector_bytes: bytes + target_vectors: _containers.RepeatedScalarFieldContainer[str] + targets: Targets + vector_per_target: _containers.ScalarMap[str, bytes] + vector_for_targets: _containers.RepeatedCompositeFieldContainer[VectorForTarget] + def __init__( + self, + vector: _Optional[_Iterable[float]] = ..., + certainty: _Optional[float] = ..., + distance: _Optional[float] = ..., + vector_bytes: _Optional[bytes] = ..., + target_vectors: _Optional[_Iterable[str]] = ..., + targets: _Optional[_Union[Targets, _Mapping]] = ..., + vector_per_target: _Optional[_Mapping[str, bytes]] = ..., + vector_for_targets: _Optional[_Iterable[_Union[VectorForTarget, _Mapping]]] = ..., + ) -> None: ... + +class NearObject(_message.Message): + __slots__ = ("id", "certainty", "distance", "target_vectors", "targets") + ID_FIELD_NUMBER: _ClassVar[int] + CERTAINTY_FIELD_NUMBER: _ClassVar[int] + DISTANCE_FIELD_NUMBER: _ClassVar[int] + TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] + TARGETS_FIELD_NUMBER: _ClassVar[int] + id: str + certainty: float + distance: float + target_vectors: _containers.RepeatedScalarFieldContainer[str] + targets: Targets + def __init__( + self, + id: _Optional[str] = ..., + certainty: _Optional[float] = ..., + distance: _Optional[float] = ..., + target_vectors: _Optional[_Iterable[str]] = ..., + targets: _Optional[_Union[Targets, _Mapping]] = ..., + ) -> None: ... + +class NearTextSearch(_message.Message): + __slots__ = ( + "query", + "certainty", + "distance", + "move_to", + "move_away", + "target_vectors", + "targets", + ) + + class Move(_message.Message): + __slots__ = ("force", "concepts", "uuids") + FORCE_FIELD_NUMBER: _ClassVar[int] + CONCEPTS_FIELD_NUMBER: _ClassVar[int] + UUIDS_FIELD_NUMBER: _ClassVar[int] + force: float + concepts: _containers.RepeatedScalarFieldContainer[str] + uuids: _containers.RepeatedScalarFieldContainer[str] + def __init__( + self, + force: _Optional[float] = ..., + concepts: _Optional[_Iterable[str]] = ..., + uuids: _Optional[_Iterable[str]] = ..., + ) -> None: ... + + QUERY_FIELD_NUMBER: _ClassVar[int] + CERTAINTY_FIELD_NUMBER: _ClassVar[int] + DISTANCE_FIELD_NUMBER: _ClassVar[int] + MOVE_TO_FIELD_NUMBER: _ClassVar[int] + MOVE_AWAY_FIELD_NUMBER: _ClassVar[int] + TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] + TARGETS_FIELD_NUMBER: _ClassVar[int] + query: _containers.RepeatedScalarFieldContainer[str] + certainty: float + distance: float + move_to: NearTextSearch.Move + move_away: NearTextSearch.Move + target_vectors: _containers.RepeatedScalarFieldContainer[str] + targets: Targets + def __init__( + self, + query: _Optional[_Iterable[str]] = ..., + certainty: _Optional[float] = ..., + distance: _Optional[float] = ..., + move_to: _Optional[_Union[NearTextSearch.Move, _Mapping]] = ..., + move_away: _Optional[_Union[NearTextSearch.Move, _Mapping]] = ..., + target_vectors: _Optional[_Iterable[str]] = ..., + targets: _Optional[_Union[Targets, _Mapping]] = ..., + ) -> None: ... + +class NearImageSearch(_message.Message): + __slots__ = ("image", "certainty", "distance", "target_vectors", "targets") + IMAGE_FIELD_NUMBER: _ClassVar[int] + CERTAINTY_FIELD_NUMBER: _ClassVar[int] + DISTANCE_FIELD_NUMBER: _ClassVar[int] + TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] + TARGETS_FIELD_NUMBER: _ClassVar[int] + image: str + certainty: float + distance: float + target_vectors: _containers.RepeatedScalarFieldContainer[str] + targets: Targets + def __init__( + self, + image: _Optional[str] = ..., + certainty: _Optional[float] = ..., + distance: _Optional[float] = ..., + target_vectors: _Optional[_Iterable[str]] = ..., + targets: _Optional[_Union[Targets, _Mapping]] = ..., + ) -> None: ... + +class NearAudioSearch(_message.Message): + __slots__ = ("audio", "certainty", "distance", "target_vectors", "targets") + AUDIO_FIELD_NUMBER: _ClassVar[int] + CERTAINTY_FIELD_NUMBER: _ClassVar[int] + DISTANCE_FIELD_NUMBER: _ClassVar[int] + TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] + TARGETS_FIELD_NUMBER: _ClassVar[int] + audio: str + certainty: float + distance: float + target_vectors: _containers.RepeatedScalarFieldContainer[str] + targets: Targets + def __init__( + self, + audio: _Optional[str] = ..., + certainty: _Optional[float] = ..., + distance: _Optional[float] = ..., + target_vectors: _Optional[_Iterable[str]] = ..., + targets: _Optional[_Union[Targets, _Mapping]] = ..., + ) -> None: ... + +class NearVideoSearch(_message.Message): + __slots__ = ("video", "certainty", "distance", "target_vectors", "targets") + VIDEO_FIELD_NUMBER: _ClassVar[int] + CERTAINTY_FIELD_NUMBER: _ClassVar[int] + DISTANCE_FIELD_NUMBER: _ClassVar[int] + TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] + TARGETS_FIELD_NUMBER: _ClassVar[int] + video: str + certainty: float + distance: float + target_vectors: _containers.RepeatedScalarFieldContainer[str] + targets: Targets + def __init__( + self, + video: _Optional[str] = ..., + certainty: _Optional[float] = ..., + distance: _Optional[float] = ..., + target_vectors: _Optional[_Iterable[str]] = ..., + targets: _Optional[_Union[Targets, _Mapping]] = ..., + ) -> None: ... + +class NearDepthSearch(_message.Message): + __slots__ = ("depth", "certainty", "distance", "target_vectors", "targets") + DEPTH_FIELD_NUMBER: _ClassVar[int] + CERTAINTY_FIELD_NUMBER: _ClassVar[int] + DISTANCE_FIELD_NUMBER: _ClassVar[int] + TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] + TARGETS_FIELD_NUMBER: _ClassVar[int] + depth: str + certainty: float + distance: float + target_vectors: _containers.RepeatedScalarFieldContainer[str] + targets: Targets + def __init__( + self, + depth: _Optional[str] = ..., + certainty: _Optional[float] = ..., + distance: _Optional[float] = ..., + target_vectors: _Optional[_Iterable[str]] = ..., + targets: _Optional[_Union[Targets, _Mapping]] = ..., + ) -> None: ... + +class NearThermalSearch(_message.Message): + __slots__ = ("thermal", "certainty", "distance", "target_vectors", "targets") + THERMAL_FIELD_NUMBER: _ClassVar[int] + CERTAINTY_FIELD_NUMBER: _ClassVar[int] + DISTANCE_FIELD_NUMBER: _ClassVar[int] + TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] + TARGETS_FIELD_NUMBER: _ClassVar[int] + thermal: str + certainty: float + distance: float + target_vectors: _containers.RepeatedScalarFieldContainer[str] + targets: Targets + def __init__( + self, + thermal: _Optional[str] = ..., + certainty: _Optional[float] = ..., + distance: _Optional[float] = ..., + target_vectors: _Optional[_Iterable[str]] = ..., + targets: _Optional[_Union[Targets, _Mapping]] = ..., + ) -> None: ... + +class NearIMUSearch(_message.Message): + __slots__ = ("imu", "certainty", "distance", "target_vectors", "targets") + IMU_FIELD_NUMBER: _ClassVar[int] + CERTAINTY_FIELD_NUMBER: _ClassVar[int] + DISTANCE_FIELD_NUMBER: _ClassVar[int] + TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] + TARGETS_FIELD_NUMBER: _ClassVar[int] + imu: str + certainty: float + distance: float + target_vectors: _containers.RepeatedScalarFieldContainer[str] + targets: Targets + def __init__( + self, + imu: _Optional[str] = ..., + certainty: _Optional[float] = ..., + distance: _Optional[float] = ..., + target_vectors: _Optional[_Iterable[str]] = ..., + targets: _Optional[_Union[Targets, _Mapping]] = ..., + ) -> None: ... + +class BM25(_message.Message): + __slots__ = ("query", "properties") + QUERY_FIELD_NUMBER: _ClassVar[int] + PROPERTIES_FIELD_NUMBER: _ClassVar[int] + query: str + properties: _containers.RepeatedScalarFieldContainer[str] + def __init__( + self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ... + ) -> None: ... diff --git a/weaviate/proto/v1/base_search_pb2_grpc.py b/weaviate/proto/v1/base_search_pb2_grpc.py new file mode 100644 index 000000000..0861cb8c9 --- /dev/null +++ b/weaviate/proto/v1/base_search_pb2_grpc.py @@ -0,0 +1,25 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc +import warnings + + +GRPC_GENERATED_VERSION = "1.66.2" +GRPC_VERSION = grpc.__version__ +_version_not_supported = False + +try: + from grpc._utilities import first_version_is_lower + + _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) +except ImportError: + _version_not_supported = True + +if _version_not_supported: + raise RuntimeError( + f"The grpc package installed is at version {GRPC_VERSION}," + + f" but the generated code in v1/base_search_pb2_grpc.py depends on" + + f" grpcio>={GRPC_GENERATED_VERSION}." + + f" Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}" + + f" or downgrade your generated code using grpcio-tools<={GRPC_VERSION}." + ) diff --git a/weaviate/proto/v1/search_get_pb2.py b/weaviate/proto/v1/search_get_pb2.py index c2ae33ba5..3690a683d 100644 --- a/weaviate/proto/v1/search_get_pb2.py +++ b/weaviate/proto/v1/search_get_pb2.py @@ -20,12 +20,13 @@ from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2 from weaviate.proto.v1 import base_pb2 as v1_dot_base__pb2 +from weaviate.proto.v1 import base_search_pb2 as v1_dot_base__search__pb2 from weaviate.proto.v1 import generative_pb2 as v1_dot_generative__pb2 from weaviate.proto.v1 import properties_pb2 as v1_dot_properties__pb2 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\x1cgoogle/protobuf/struct.proto\x1a\rv1/base.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t"\xdd\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02"\xfa\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12\x36\n\x07weights\x18\x03 \x03(\x0b\x32!.weaviate.v1.Targets.WeightsEntryB\x02\x18\x01\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTarget\x1a.\n\x0cWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01"\xc5\x03\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\r\n\x05\x61lpha\x18\x04 \x01(\x02\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x14\n\x0cvector_bytes\x18\x06 \x01(\x0c\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthreshold"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance")\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t"5\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cvector_bytes\x18\x02 \x01(\x0c"\x82\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x14\n\x0cvector_bytes\x18\x04 \x01(\x0c\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query"\xae\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_results"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent"\xba\x05\n\x10PropertiesResult\x12\x37\n\x12non_ref_properties\x18\x01 \x01(\x0b\x32\x17.google.protobuf.StructB\x02\x18\x01\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12G\n\x17number_array_properties\x18\x05 \x03(\x0b\x32".weaviate.v1.NumberArrayPropertiesB\x02\x18\x01\x12\x41\n\x14int_array_properties\x18\x06 \x03(\x0b\x32\x1f.weaviate.v1.IntArrayPropertiesB\x02\x18\x01\x12\x43\n\x15text_array_properties\x18\x07 \x03(\x0b\x32 .weaviate.v1.TextArrayPropertiesB\x02\x18\x01\x12I\n\x18\x62oolean_array_properties\x18\x08 \x03(\x0b\x32#.weaviate.v1.BooleanArrayPropertiesB\x02\x18\x01\x12<\n\x11object_properties\x18\t \x03(\x0b\x32\x1d.weaviate.v1.ObjectPropertiesB\x02\x18\x01\x12G\n\x17object_array_properties\x18\n \x03(\x0b\x32".weaviate.v1.ObjectArrayPropertiesB\x02\x18\x01\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\t*\xee\x01\n\x11\x43ombinationMethod\x12"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42s\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' + b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\x1cgoogle/protobuf/struct.proto\x1a\rv1/base.proto\x1a\x14v1/base_search.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t"\xdd\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query"\xae\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_results"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent"\xba\x05\n\x10PropertiesResult\x12\x37\n\x12non_ref_properties\x18\x01 \x01(\x0b\x32\x17.google.protobuf.StructB\x02\x18\x01\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12G\n\x17number_array_properties\x18\x05 \x03(\x0b\x32".weaviate.v1.NumberArrayPropertiesB\x02\x18\x01\x12\x41\n\x14int_array_properties\x18\x06 \x03(\x0b\x32\x1f.weaviate.v1.IntArrayPropertiesB\x02\x18\x01\x12\x43\n\x15text_array_properties\x18\x07 \x03(\x0b\x32 .weaviate.v1.TextArrayPropertiesB\x02\x18\x01\x12I\n\x18\x62oolean_array_properties\x18\x08 \x03(\x0b\x32#.weaviate.v1.BooleanArrayPropertiesB\x02\x18\x01\x12<\n\x11object_properties\x18\t \x03(\x0b\x32\x1d.weaviate.v1.ObjectPropertiesB\x02\x18\x01\x12G\n\x17object_array_properties\x18\n \x03(\x0b\x32".weaviate.v1.ObjectArrayPropertiesB\x02\x18\x01\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\tBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' ) _globals = globals() @@ -40,40 +41,6 @@ _globals["_SEARCHREQUEST"].fields_by_name["uses_123_api"]._serialized_options = b"\030\001" _globals["_SEARCHREQUEST"].fields_by_name["uses_125_api"]._loaded_options = None _globals["_SEARCHREQUEST"].fields_by_name["uses_125_api"]._serialized_options = b"\030\001" - _globals["_TARGETS_WEIGHTSENTRY"]._loaded_options = None - _globals["_TARGETS_WEIGHTSENTRY"]._serialized_options = b"8\001" - _globals["_TARGETS"].fields_by_name["weights"]._loaded_options = None - _globals["_TARGETS"].fields_by_name["weights"]._serialized_options = b"\030\001" - _globals["_HYBRID"].fields_by_name["vector"]._loaded_options = None - _globals["_HYBRID"].fields_by_name["vector"]._serialized_options = b"\030\001" - _globals["_HYBRID"].fields_by_name["target_vectors"]._loaded_options = None - _globals["_HYBRID"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" - _globals["_NEARTEXTSEARCH"].fields_by_name["target_vectors"]._loaded_options = None - _globals["_NEARTEXTSEARCH"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" - _globals["_NEARIMAGESEARCH"].fields_by_name["target_vectors"]._loaded_options = None - _globals["_NEARIMAGESEARCH"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" - _globals["_NEARAUDIOSEARCH"].fields_by_name["target_vectors"]._loaded_options = None - _globals["_NEARAUDIOSEARCH"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" - _globals["_NEARVIDEOSEARCH"].fields_by_name["target_vectors"]._loaded_options = None - _globals["_NEARVIDEOSEARCH"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" - _globals["_NEARDEPTHSEARCH"].fields_by_name["target_vectors"]._loaded_options = None - _globals["_NEARDEPTHSEARCH"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" - _globals["_NEARTHERMALSEARCH"].fields_by_name["target_vectors"]._loaded_options = None - _globals["_NEARTHERMALSEARCH"].fields_by_name[ - "target_vectors" - ]._serialized_options = b"\030\001" - _globals["_NEARIMUSEARCH"].fields_by_name["target_vectors"]._loaded_options = None - _globals["_NEARIMUSEARCH"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" - _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._loaded_options = None - _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_options = b"8\001" - _globals["_NEARVECTOR"].fields_by_name["vector"]._loaded_options = None - _globals["_NEARVECTOR"].fields_by_name["vector"]._serialized_options = b"\030\001" - _globals["_NEARVECTOR"].fields_by_name["target_vectors"]._loaded_options = None - _globals["_NEARVECTOR"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" - _globals["_NEARVECTOR"].fields_by_name["vector_per_target"]._loaded_options = None - _globals["_NEARVECTOR"].fields_by_name["vector_per_target"]._serialized_options = b"\030\001" - _globals["_NEAROBJECT"].fields_by_name["target_vectors"]._loaded_options = None - _globals["_NEAROBJECT"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" _globals["_SEARCHREPLY"].fields_by_name["generative_grouped_result"]._loaded_options = None _globals["_SEARCHREPLY"].fields_by_name[ "generative_grouped_result" @@ -116,72 +83,34 @@ _globals["_PROPERTIESRESULT"].fields_by_name[ "object_array_properties" ]._serialized_options = b"\030\001" - _globals["_COMBINATIONMETHOD"]._serialized_start = 7656 - _globals["_COMBINATIONMETHOD"]._serialized_end = 7894 - _globals["_SEARCHREQUEST"]._serialized_start = 124 - _globals["_SEARCHREQUEST"]._serialized_end = 1560 - _globals["_GROUPBY"]._serialized_start = 1562 - _globals["_GROUPBY"]._serialized_end = 1638 - _globals["_SORTBY"]._serialized_start = 1640 - _globals["_SORTBY"]._serialized_end = 1681 - _globals["_METADATAREQUEST"]._serialized_start = 1684 - _globals["_METADATAREQUEST"]._serialized_end = 1905 - _globals["_PROPERTIESREQUEST"]._serialized_start = 1908 - _globals["_PROPERTIESREQUEST"]._serialized_end = 2117 - _globals["_OBJECTPROPERTIESREQUEST"]._serialized_start = 2120 - _globals["_OBJECTPROPERTIESREQUEST"]._serialized_end = 2259 - _globals["_WEIGHTSFORTARGET"]._serialized_start = 2261 - _globals["_WEIGHTSFORTARGET"]._serialized_end = 2311 - _globals["_TARGETS"]._serialized_start = 2314 - _globals["_TARGETS"]._serialized_end = 2564 - _globals["_TARGETS_WEIGHTSENTRY"]._serialized_start = 2518 - _globals["_TARGETS_WEIGHTSENTRY"]._serialized_end = 2564 - _globals["_HYBRID"]._serialized_start = 2567 - _globals["_HYBRID"]._serialized_end = 3020 - _globals["_HYBRID_FUSIONTYPE"]._serialized_start = 2910 - _globals["_HYBRID_FUSIONTYPE"]._serialized_end = 3007 - _globals["_NEARTEXTSEARCH"]._serialized_start = 3023 - _globals["_NEARTEXTSEARCH"]._serialized_end = 3391 - _globals["_NEARTEXTSEARCH_MOVE"]._serialized_start = 3284 - _globals["_NEARTEXTSEARCH_MOVE"]._serialized_end = 3338 - _globals["_NEARIMAGESEARCH"]._serialized_start = 3394 - _globals["_NEARIMAGESEARCH"]._serialized_end = 3567 - _globals["_NEARAUDIOSEARCH"]._serialized_start = 3570 - _globals["_NEARAUDIOSEARCH"]._serialized_end = 3743 - _globals["_NEARVIDEOSEARCH"]._serialized_start = 3746 - _globals["_NEARVIDEOSEARCH"]._serialized_end = 3919 - _globals["_NEARDEPTHSEARCH"]._serialized_start = 3922 - _globals["_NEARDEPTHSEARCH"]._serialized_end = 4095 - _globals["_NEARTHERMALSEARCH"]._serialized_start = 4098 - _globals["_NEARTHERMALSEARCH"]._serialized_end = 4275 - _globals["_NEARIMUSEARCH"]._serialized_start = 4278 - _globals["_NEARIMUSEARCH"]._serialized_end = 4447 - _globals["_BM25"]._serialized_start = 4449 - _globals["_BM25"]._serialized_end = 4490 - _globals["_REFPROPERTIESREQUEST"]._serialized_start = 4493 - _globals["_REFPROPERTIESREQUEST"]._serialized_end = 4670 - _globals["_VECTORFORTARGET"]._serialized_start = 4672 - _globals["_VECTORFORTARGET"]._serialized_end = 4725 - _globals["_NEARVECTOR"]._serialized_start = 4728 - _globals["_NEARVECTOR"]._serialized_end = 5114 - _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_start = 5033 - _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_end = 5087 - _globals["_NEAROBJECT"]._serialized_start = 5117 - _globals["_NEAROBJECT"]._serialized_end = 5282 - _globals["_RERANK"]._serialized_start = 5284 - _globals["_RERANK"]._serialized_end = 5340 - _globals["_SEARCHREPLY"]._serialized_start = 5343 - _globals["_SEARCHREPLY"]._serialized_end = 5645 - _globals["_RERANKREPLY"]._serialized_start = 5647 - _globals["_RERANKREPLY"]._serialized_end = 5675 - _globals["_GROUPBYRESULT"]._serialized_start = 5678 - _globals["_GROUPBYRESULT"]._serialized_end = 6039 - _globals["_SEARCHRESULT"]._serialized_start = 6042 - _globals["_SEARCHRESULT"]._serialized_end = 6225 - _globals["_METADATARESULT"]._serialized_start = 6228 - _globals["_METADATARESULT"]._serialized_end = 6859 - _globals["_PROPERTIESRESULT"]._serialized_start = 6862 - _globals["_PROPERTIESRESULT"]._serialized_end = 7560 - _globals["_REFPROPERTIESRESULT"]._serialized_start = 7562 - _globals["_REFPROPERTIESRESULT"]._serialized_end = 7653 + _globals["_SEARCHREQUEST"]._serialized_start = 146 + _globals["_SEARCHREQUEST"]._serialized_end = 1582 + _globals["_GROUPBY"]._serialized_start = 1584 + _globals["_GROUPBY"]._serialized_end = 1660 + _globals["_SORTBY"]._serialized_start = 1662 + _globals["_SORTBY"]._serialized_end = 1703 + _globals["_METADATAREQUEST"]._serialized_start = 1706 + _globals["_METADATAREQUEST"]._serialized_end = 1927 + _globals["_PROPERTIESREQUEST"]._serialized_start = 1930 + _globals["_PROPERTIESREQUEST"]._serialized_end = 2139 + _globals["_OBJECTPROPERTIESREQUEST"]._serialized_start = 2142 + _globals["_OBJECTPROPERTIESREQUEST"]._serialized_end = 2281 + _globals["_REFPROPERTIESREQUEST"]._serialized_start = 2284 + _globals["_REFPROPERTIESREQUEST"]._serialized_end = 2461 + _globals["_RERANK"]._serialized_start = 2463 + _globals["_RERANK"]._serialized_end = 2519 + _globals["_SEARCHREPLY"]._serialized_start = 2522 + _globals["_SEARCHREPLY"]._serialized_end = 2824 + _globals["_RERANKREPLY"]._serialized_start = 2826 + _globals["_RERANKREPLY"]._serialized_end = 2854 + _globals["_GROUPBYRESULT"]._serialized_start = 2857 + _globals["_GROUPBYRESULT"]._serialized_end = 3218 + _globals["_SEARCHRESULT"]._serialized_start = 3221 + _globals["_SEARCHRESULT"]._serialized_end = 3404 + _globals["_METADATARESULT"]._serialized_start = 3407 + _globals["_METADATARESULT"]._serialized_end = 4038 + _globals["_PROPERTIESRESULT"]._serialized_start = 4041 + _globals["_PROPERTIESRESULT"]._serialized_end = 4739 + _globals["_REFPROPERTIESRESULT"]._serialized_start = 4741 + _globals["_REFPROPERTIESRESULT"]._serialized_end = 4832 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/search_get_pb2.pyi b/weaviate/proto/v1/search_get_pb2.pyi index eaaa6f20b..e06d54274 100644 --- a/weaviate/proto/v1/search_get_pb2.pyi +++ b/weaviate/proto/v1/search_get_pb2.pyi @@ -1,9 +1,9 @@ from google.protobuf import struct_pb2 as _struct_pb2 from weaviate.proto.v1 import base_pb2 as _base_pb2 +from weaviate.proto.v1 import base_search_pb2 as _base_search_pb2 from weaviate.proto.v1 import generative_pb2 as _generative_pb2 from weaviate.proto.v1 import properties_pb2 as _properties_pb2 from google.protobuf.internal import containers as _containers -from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from typing import ( @@ -16,22 +16,6 @@ from typing import ( DESCRIPTOR: _descriptor.FileDescriptor -class CombinationMethod(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): - __slots__ = () - COMBINATION_METHOD_UNSPECIFIED: _ClassVar[CombinationMethod] - COMBINATION_METHOD_TYPE_SUM: _ClassVar[CombinationMethod] - COMBINATION_METHOD_TYPE_MIN: _ClassVar[CombinationMethod] - COMBINATION_METHOD_TYPE_AVERAGE: _ClassVar[CombinationMethod] - COMBINATION_METHOD_TYPE_RELATIVE_SCORE: _ClassVar[CombinationMethod] - COMBINATION_METHOD_TYPE_MANUAL: _ClassVar[CombinationMethod] - -COMBINATION_METHOD_UNSPECIFIED: CombinationMethod -COMBINATION_METHOD_TYPE_SUM: CombinationMethod -COMBINATION_METHOD_TYPE_MIN: CombinationMethod -COMBINATION_METHOD_TYPE_AVERAGE: CombinationMethod -COMBINATION_METHOD_TYPE_RELATIVE_SCORE: CombinationMethod -COMBINATION_METHOD_TYPE_MANUAL: CombinationMethod - class SearchRequest(_message.Message): __slots__ = ( "collection", @@ -103,17 +87,17 @@ class SearchRequest(_message.Message): after: str sort_by: _containers.RepeatedCompositeFieldContainer[SortBy] filters: _base_pb2.Filters - hybrid_search: Hybrid - bm25_search: BM25 - near_vector: NearVector - near_object: NearObject - near_text: NearTextSearch - near_image: NearImageSearch - near_audio: NearAudioSearch - near_video: NearVideoSearch - near_depth: NearDepthSearch - near_thermal: NearThermalSearch - near_imu: NearIMUSearch + hybrid_search: _base_search_pb2.Hybrid + bm25_search: _base_search_pb2.BM25 + near_vector: _base_search_pb2.NearVector + near_object: _base_search_pb2.NearObject + near_text: _base_search_pb2.NearTextSearch + near_image: _base_search_pb2.NearImageSearch + near_audio: _base_search_pb2.NearAudioSearch + near_video: _base_search_pb2.NearVideoSearch + near_depth: _base_search_pb2.NearDepthSearch + near_thermal: _base_search_pb2.NearThermalSearch + near_imu: _base_search_pb2.NearIMUSearch generative: _generative_pb2.GenerativeSearch rerank: Rerank uses_123_api: bool @@ -133,17 +117,17 @@ class SearchRequest(_message.Message): after: _Optional[str] = ..., sort_by: _Optional[_Iterable[_Union[SortBy, _Mapping]]] = ..., filters: _Optional[_Union[_base_pb2.Filters, _Mapping]] = ..., - hybrid_search: _Optional[_Union[Hybrid, _Mapping]] = ..., - bm25_search: _Optional[_Union[BM25, _Mapping]] = ..., - near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., - near_object: _Optional[_Union[NearObject, _Mapping]] = ..., - near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., - near_image: _Optional[_Union[NearImageSearch, _Mapping]] = ..., - near_audio: _Optional[_Union[NearAudioSearch, _Mapping]] = ..., - near_video: _Optional[_Union[NearVideoSearch, _Mapping]] = ..., - near_depth: _Optional[_Union[NearDepthSearch, _Mapping]] = ..., - near_thermal: _Optional[_Union[NearThermalSearch, _Mapping]] = ..., - near_imu: _Optional[_Union[NearIMUSearch, _Mapping]] = ..., + hybrid_search: _Optional[_Union[_base_search_pb2.Hybrid, _Mapping]] = ..., + bm25_search: _Optional[_Union[_base_search_pb2.BM25, _Mapping]] = ..., + near_vector: _Optional[_Union[_base_search_pb2.NearVector, _Mapping]] = ..., + near_object: _Optional[_Union[_base_search_pb2.NearObject, _Mapping]] = ..., + near_text: _Optional[_Union[_base_search_pb2.NearTextSearch, _Mapping]] = ..., + near_image: _Optional[_Union[_base_search_pb2.NearImageSearch, _Mapping]] = ..., + near_audio: _Optional[_Union[_base_search_pb2.NearAudioSearch, _Mapping]] = ..., + near_video: _Optional[_Union[_base_search_pb2.NearVideoSearch, _Mapping]] = ..., + near_depth: _Optional[_Union[_base_search_pb2.NearDepthSearch, _Mapping]] = ..., + near_thermal: _Optional[_Union[_base_search_pb2.NearThermalSearch, _Mapping]] = ..., + near_imu: _Optional[_Union[_base_search_pb2.NearIMUSearch, _Mapping]] = ..., generative: _Optional[_Union[_generative_pb2.GenerativeSearch, _Mapping]] = ..., rerank: _Optional[_Union[Rerank, _Mapping]] = ..., uses_123_api: bool = ..., @@ -259,289 +243,6 @@ class ObjectPropertiesRequest(_message.Message): object_properties: _Optional[_Iterable[_Union[ObjectPropertiesRequest, _Mapping]]] = ..., ) -> None: ... -class WeightsForTarget(_message.Message): - __slots__ = ("target", "weight") - TARGET_FIELD_NUMBER: _ClassVar[int] - WEIGHT_FIELD_NUMBER: _ClassVar[int] - target: str - weight: float - def __init__(self, target: _Optional[str] = ..., weight: _Optional[float] = ...) -> None: ... - -class Targets(_message.Message): - __slots__ = ("target_vectors", "combination", "weights", "weights_for_targets") - - class WeightsEntry(_message.Message): - __slots__ = ("key", "value") - KEY_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - key: str - value: float - def __init__(self, key: _Optional[str] = ..., value: _Optional[float] = ...) -> None: ... - - TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] - COMBINATION_FIELD_NUMBER: _ClassVar[int] - WEIGHTS_FIELD_NUMBER: _ClassVar[int] - WEIGHTS_FOR_TARGETS_FIELD_NUMBER: _ClassVar[int] - target_vectors: _containers.RepeatedScalarFieldContainer[str] - combination: CombinationMethod - weights: _containers.ScalarMap[str, float] - weights_for_targets: _containers.RepeatedCompositeFieldContainer[WeightsForTarget] - def __init__( - self, - target_vectors: _Optional[_Iterable[str]] = ..., - combination: _Optional[_Union[CombinationMethod, str]] = ..., - weights: _Optional[_Mapping[str, float]] = ..., - weights_for_targets: _Optional[_Iterable[_Union[WeightsForTarget, _Mapping]]] = ..., - ) -> None: ... - -class Hybrid(_message.Message): - __slots__ = ( - "query", - "properties", - "vector", - "alpha", - "fusion_type", - "vector_bytes", - "target_vectors", - "near_text", - "near_vector", - "targets", - "vector_distance", - ) - - class FusionType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): - __slots__ = () - FUSION_TYPE_UNSPECIFIED: _ClassVar[Hybrid.FusionType] - FUSION_TYPE_RANKED: _ClassVar[Hybrid.FusionType] - FUSION_TYPE_RELATIVE_SCORE: _ClassVar[Hybrid.FusionType] - - FUSION_TYPE_UNSPECIFIED: Hybrid.FusionType - FUSION_TYPE_RANKED: Hybrid.FusionType - FUSION_TYPE_RELATIVE_SCORE: Hybrid.FusionType - QUERY_FIELD_NUMBER: _ClassVar[int] - PROPERTIES_FIELD_NUMBER: _ClassVar[int] - VECTOR_FIELD_NUMBER: _ClassVar[int] - ALPHA_FIELD_NUMBER: _ClassVar[int] - FUSION_TYPE_FIELD_NUMBER: _ClassVar[int] - VECTOR_BYTES_FIELD_NUMBER: _ClassVar[int] - TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] - NEAR_TEXT_FIELD_NUMBER: _ClassVar[int] - NEAR_VECTOR_FIELD_NUMBER: _ClassVar[int] - TARGETS_FIELD_NUMBER: _ClassVar[int] - VECTOR_DISTANCE_FIELD_NUMBER: _ClassVar[int] - query: str - properties: _containers.RepeatedScalarFieldContainer[str] - vector: _containers.RepeatedScalarFieldContainer[float] - alpha: float - fusion_type: Hybrid.FusionType - vector_bytes: bytes - target_vectors: _containers.RepeatedScalarFieldContainer[str] - near_text: NearTextSearch - near_vector: NearVector - targets: Targets - vector_distance: float - def __init__( - self, - query: _Optional[str] = ..., - properties: _Optional[_Iterable[str]] = ..., - vector: _Optional[_Iterable[float]] = ..., - alpha: _Optional[float] = ..., - fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., - vector_bytes: _Optional[bytes] = ..., - target_vectors: _Optional[_Iterable[str]] = ..., - near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., - near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., - targets: _Optional[_Union[Targets, _Mapping]] = ..., - vector_distance: _Optional[float] = ..., - ) -> None: ... - -class NearTextSearch(_message.Message): - __slots__ = ( - "query", - "certainty", - "distance", - "move_to", - "move_away", - "target_vectors", - "targets", - ) - - class Move(_message.Message): - __slots__ = ("force", "concepts", "uuids") - FORCE_FIELD_NUMBER: _ClassVar[int] - CONCEPTS_FIELD_NUMBER: _ClassVar[int] - UUIDS_FIELD_NUMBER: _ClassVar[int] - force: float - concepts: _containers.RepeatedScalarFieldContainer[str] - uuids: _containers.RepeatedScalarFieldContainer[str] - def __init__( - self, - force: _Optional[float] = ..., - concepts: _Optional[_Iterable[str]] = ..., - uuids: _Optional[_Iterable[str]] = ..., - ) -> None: ... - - QUERY_FIELD_NUMBER: _ClassVar[int] - CERTAINTY_FIELD_NUMBER: _ClassVar[int] - DISTANCE_FIELD_NUMBER: _ClassVar[int] - MOVE_TO_FIELD_NUMBER: _ClassVar[int] - MOVE_AWAY_FIELD_NUMBER: _ClassVar[int] - TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] - TARGETS_FIELD_NUMBER: _ClassVar[int] - query: _containers.RepeatedScalarFieldContainer[str] - certainty: float - distance: float - move_to: NearTextSearch.Move - move_away: NearTextSearch.Move - target_vectors: _containers.RepeatedScalarFieldContainer[str] - targets: Targets - def __init__( - self, - query: _Optional[_Iterable[str]] = ..., - certainty: _Optional[float] = ..., - distance: _Optional[float] = ..., - move_to: _Optional[_Union[NearTextSearch.Move, _Mapping]] = ..., - move_away: _Optional[_Union[NearTextSearch.Move, _Mapping]] = ..., - target_vectors: _Optional[_Iterable[str]] = ..., - targets: _Optional[_Union[Targets, _Mapping]] = ..., - ) -> None: ... - -class NearImageSearch(_message.Message): - __slots__ = ("image", "certainty", "distance", "target_vectors", "targets") - IMAGE_FIELD_NUMBER: _ClassVar[int] - CERTAINTY_FIELD_NUMBER: _ClassVar[int] - DISTANCE_FIELD_NUMBER: _ClassVar[int] - TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] - TARGETS_FIELD_NUMBER: _ClassVar[int] - image: str - certainty: float - distance: float - target_vectors: _containers.RepeatedScalarFieldContainer[str] - targets: Targets - def __init__( - self, - image: _Optional[str] = ..., - certainty: _Optional[float] = ..., - distance: _Optional[float] = ..., - target_vectors: _Optional[_Iterable[str]] = ..., - targets: _Optional[_Union[Targets, _Mapping]] = ..., - ) -> None: ... - -class NearAudioSearch(_message.Message): - __slots__ = ("audio", "certainty", "distance", "target_vectors", "targets") - AUDIO_FIELD_NUMBER: _ClassVar[int] - CERTAINTY_FIELD_NUMBER: _ClassVar[int] - DISTANCE_FIELD_NUMBER: _ClassVar[int] - TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] - TARGETS_FIELD_NUMBER: _ClassVar[int] - audio: str - certainty: float - distance: float - target_vectors: _containers.RepeatedScalarFieldContainer[str] - targets: Targets - def __init__( - self, - audio: _Optional[str] = ..., - certainty: _Optional[float] = ..., - distance: _Optional[float] = ..., - target_vectors: _Optional[_Iterable[str]] = ..., - targets: _Optional[_Union[Targets, _Mapping]] = ..., - ) -> None: ... - -class NearVideoSearch(_message.Message): - __slots__ = ("video", "certainty", "distance", "target_vectors", "targets") - VIDEO_FIELD_NUMBER: _ClassVar[int] - CERTAINTY_FIELD_NUMBER: _ClassVar[int] - DISTANCE_FIELD_NUMBER: _ClassVar[int] - TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] - TARGETS_FIELD_NUMBER: _ClassVar[int] - video: str - certainty: float - distance: float - target_vectors: _containers.RepeatedScalarFieldContainer[str] - targets: Targets - def __init__( - self, - video: _Optional[str] = ..., - certainty: _Optional[float] = ..., - distance: _Optional[float] = ..., - target_vectors: _Optional[_Iterable[str]] = ..., - targets: _Optional[_Union[Targets, _Mapping]] = ..., - ) -> None: ... - -class NearDepthSearch(_message.Message): - __slots__ = ("depth", "certainty", "distance", "target_vectors", "targets") - DEPTH_FIELD_NUMBER: _ClassVar[int] - CERTAINTY_FIELD_NUMBER: _ClassVar[int] - DISTANCE_FIELD_NUMBER: _ClassVar[int] - TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] - TARGETS_FIELD_NUMBER: _ClassVar[int] - depth: str - certainty: float - distance: float - target_vectors: _containers.RepeatedScalarFieldContainer[str] - targets: Targets - def __init__( - self, - depth: _Optional[str] = ..., - certainty: _Optional[float] = ..., - distance: _Optional[float] = ..., - target_vectors: _Optional[_Iterable[str]] = ..., - targets: _Optional[_Union[Targets, _Mapping]] = ..., - ) -> None: ... - -class NearThermalSearch(_message.Message): - __slots__ = ("thermal", "certainty", "distance", "target_vectors", "targets") - THERMAL_FIELD_NUMBER: _ClassVar[int] - CERTAINTY_FIELD_NUMBER: _ClassVar[int] - DISTANCE_FIELD_NUMBER: _ClassVar[int] - TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] - TARGETS_FIELD_NUMBER: _ClassVar[int] - thermal: str - certainty: float - distance: float - target_vectors: _containers.RepeatedScalarFieldContainer[str] - targets: Targets - def __init__( - self, - thermal: _Optional[str] = ..., - certainty: _Optional[float] = ..., - distance: _Optional[float] = ..., - target_vectors: _Optional[_Iterable[str]] = ..., - targets: _Optional[_Union[Targets, _Mapping]] = ..., - ) -> None: ... - -class NearIMUSearch(_message.Message): - __slots__ = ("imu", "certainty", "distance", "target_vectors", "targets") - IMU_FIELD_NUMBER: _ClassVar[int] - CERTAINTY_FIELD_NUMBER: _ClassVar[int] - DISTANCE_FIELD_NUMBER: _ClassVar[int] - TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] - TARGETS_FIELD_NUMBER: _ClassVar[int] - imu: str - certainty: float - distance: float - target_vectors: _containers.RepeatedScalarFieldContainer[str] - targets: Targets - def __init__( - self, - imu: _Optional[str] = ..., - certainty: _Optional[float] = ..., - distance: _Optional[float] = ..., - target_vectors: _Optional[_Iterable[str]] = ..., - targets: _Optional[_Union[Targets, _Mapping]] = ..., - ) -> None: ... - -class BM25(_message.Message): - __slots__ = ("query", "properties") - QUERY_FIELD_NUMBER: _ClassVar[int] - PROPERTIES_FIELD_NUMBER: _ClassVar[int] - query: str - properties: _containers.RepeatedScalarFieldContainer[str] - def __init__( - self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ... - ) -> None: ... - class RefPropertiesRequest(_message.Message): __slots__ = ("reference_property", "properties", "metadata", "target_collection") REFERENCE_PROPERTY_FIELD_NUMBER: _ClassVar[int] @@ -560,85 +261,6 @@ class RefPropertiesRequest(_message.Message): target_collection: _Optional[str] = ..., ) -> None: ... -class VectorForTarget(_message.Message): - __slots__ = ("name", "vector_bytes") - NAME_FIELD_NUMBER: _ClassVar[int] - VECTOR_BYTES_FIELD_NUMBER: _ClassVar[int] - name: str - vector_bytes: bytes - def __init__( - self, name: _Optional[str] = ..., vector_bytes: _Optional[bytes] = ... - ) -> None: ... - -class NearVector(_message.Message): - __slots__ = ( - "vector", - "certainty", - "distance", - "vector_bytes", - "target_vectors", - "targets", - "vector_per_target", - "vector_for_targets", - ) - - class VectorPerTargetEntry(_message.Message): - __slots__ = ("key", "value") - KEY_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - key: str - value: bytes - def __init__(self, key: _Optional[str] = ..., value: _Optional[bytes] = ...) -> None: ... - - VECTOR_FIELD_NUMBER: _ClassVar[int] - CERTAINTY_FIELD_NUMBER: _ClassVar[int] - DISTANCE_FIELD_NUMBER: _ClassVar[int] - VECTOR_BYTES_FIELD_NUMBER: _ClassVar[int] - TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] - TARGETS_FIELD_NUMBER: _ClassVar[int] - VECTOR_PER_TARGET_FIELD_NUMBER: _ClassVar[int] - VECTOR_FOR_TARGETS_FIELD_NUMBER: _ClassVar[int] - vector: _containers.RepeatedScalarFieldContainer[float] - certainty: float - distance: float - vector_bytes: bytes - target_vectors: _containers.RepeatedScalarFieldContainer[str] - targets: Targets - vector_per_target: _containers.ScalarMap[str, bytes] - vector_for_targets: _containers.RepeatedCompositeFieldContainer[VectorForTarget] - def __init__( - self, - vector: _Optional[_Iterable[float]] = ..., - certainty: _Optional[float] = ..., - distance: _Optional[float] = ..., - vector_bytes: _Optional[bytes] = ..., - target_vectors: _Optional[_Iterable[str]] = ..., - targets: _Optional[_Union[Targets, _Mapping]] = ..., - vector_per_target: _Optional[_Mapping[str, bytes]] = ..., - vector_for_targets: _Optional[_Iterable[_Union[VectorForTarget, _Mapping]]] = ..., - ) -> None: ... - -class NearObject(_message.Message): - __slots__ = ("id", "certainty", "distance", "target_vectors", "targets") - ID_FIELD_NUMBER: _ClassVar[int] - CERTAINTY_FIELD_NUMBER: _ClassVar[int] - DISTANCE_FIELD_NUMBER: _ClassVar[int] - TARGET_VECTORS_FIELD_NUMBER: _ClassVar[int] - TARGETS_FIELD_NUMBER: _ClassVar[int] - id: str - certainty: float - distance: float - target_vectors: _containers.RepeatedScalarFieldContainer[str] - targets: Targets - def __init__( - self, - id: _Optional[str] = ..., - certainty: _Optional[float] = ..., - distance: _Optional[float] = ..., - target_vectors: _Optional[_Iterable[str]] = ..., - targets: _Optional[_Union[Targets, _Mapping]] = ..., - ) -> None: ... - class Rerank(_message.Message): __slots__ = ("property", "query") PROPERTY_FIELD_NUMBER: _ClassVar[int] From 2a010e1cb05ecf20b80e4e882219ea68b7591cd5 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Wed, 29 Jan 2025 16:10:00 +0000 Subject: [PATCH 22/48] Amend colbert test to validate fix --- integration/test_named_vectors.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index 15acf056f..f41979440 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -831,6 +831,9 @@ def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: multi_vector=wvc.config.Configure.VectorIndex.MultiVector.multi_vector() ), ), + wvc.config.Configure.NamedVectors.none( + name="regular", + ), ], ) From a0aa6d3f00e3b747e4c8014a799e2ea350d11125 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 30 Jan 2025 09:40:03 +0000 Subject: [PATCH 23/48] Change any reranker tests to exclusively use cohere --- ci/docker-compose-modules.yml | 2 +- integration/test_collection_openai.py | 12 ++++++------ integration/test_collection_rerank.py | 27 +++++++++++++-------------- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/ci/docker-compose-modules.yml b/ci/docker-compose-modules.yml index ec09fdafd..ae378b9e1 100644 --- a/ci/docker-compose-modules.yml +++ b/ci/docker-compose-modules.yml @@ -19,6 +19,6 @@ services: AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true' PERSISTENCE_DATA_PATH: '/var/lib/weaviate' DEFAULT_VECTORIZER_MODULE: 'text2vec-openai' - ENABLE_MODULES: 'text2colbert-jinaai,text2vec-openai,generative-openai,text2vec-cohere,generative-cohere' + ENABLE_MODULES: 'text2colbert-jinaai,text2vec-openai,generative-openai,text2vec-cohere,generative-cohere,reranker-cohere' CLUSTER_HOSTNAME: 'node1' DISABLE_TELEMETRY: 'true' diff --git a/integration/test_collection_openai.py b/integration/test_collection_openai.py index bd3319955..676af6d58 100644 --- a/integration/test_collection_openai.py +++ b/integration/test_collection_openai.py @@ -578,18 +578,18 @@ def test_openai_batch_upload(openai_collection: OpenAICollection, request: SubRe def test_queries_with_rerank_and_generative(collection_factory: CollectionFactory) -> None: - api_key = os.environ.get("OPENAI_APIKEY") + api_key = os.environ.get("COHERE_APIKEY") if api_key is None: - pytest.skip("No OpenAI API key found.") + pytest.skip("No Cohere API key found.") collection = collection_factory( name="Test_test_queries_with_rerank_and_generative", - generative_config=Configure.Generative.openai(), - reranker_config=Configure.Reranker.transformers(), - vectorizer_config=Configure.Vectorizer.text2vec_openai(), + generative_config=Configure.Generative.cohere(), + reranker_config=Configure.Reranker.cohere(), + vectorizer_config=Configure.Vectorizer.text2vec_cohere(), properties=[Property(name="text", data_type=DataType.TEXT)], ports=(8086, 50057), - headers={"X-OpenAI-Api-Key": api_key}, + headers={"X-Cohere-Api-Key": api_key}, ) if collection._connection._weaviate_version < _ServerVersion(1, 23, 1): pytest.skip("Generative reranking requires Weaviate 1.23.1 or higher") diff --git a/integration/test_collection_rerank.py b/integration/test_collection_rerank.py index f201a3344..dc4f362d0 100644 --- a/integration/test_collection_rerank.py +++ b/integration/test_collection_rerank.py @@ -3,7 +3,6 @@ import pytest import weaviate.classes as wvc -from weaviate.util import _ServerVersion from .conftest import CollectionFactory @@ -13,7 +12,7 @@ def test_query_using_rerank_with_old_server(collection_factory: CollectionFactor vectorizer_config=wvc.config.Configure.Vectorizer.none(), properties=[wvc.config.Property(name="text", data_type=wvc.config.DataType.TEXT)], ) - if collection._connection._weaviate_version >= _ServerVersion(1, 23, 1): + if collection._connection._weaviate_version.is_at_least(1, 23, 1): pytest.skip("Reranking works with 1.23.1 or higher so no need to test this") collection.data.insert_many([{"text": "This is a test"}, {"text": "This is another test"}]) @@ -28,19 +27,19 @@ def test_query_using_rerank_with_old_server(collection_factory: CollectionFactor def test_queries_with_rerank(collection_factory: CollectionFactory) -> None: - api_key = os.environ.get("OPENAI_APIKEY") + api_key = os.environ.get("COHERE_APIKEY") if api_key is None: - pytest.skip("No OpenAI API key found.") + pytest.skip("No Cohere API key found.") collection = collection_factory( name="Test_test_queries_with_rerank", - reranker_config=wvc.config.Configure.Reranker.custom("reranker-dummy"), - vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_openai(), + reranker_config=wvc.config.Configure.Reranker.cohere(), + vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_cohere(), properties=[wvc.config.Property(name="text", data_type=wvc.config.DataType.TEXT)], - headers={"X-OpenAI-Api-Key": api_key}, + headers={"X-Cohere-Api-Key": api_key}, ports=(8086, 50057), ) - if collection._connection._weaviate_version < _ServerVersion(1, 23, 1): + if collection._connection._weaviate_version.is_lower_than(1, 23, 1): pytest.skip("Reranking requires Weaviate 1.23.1 or higher") insert = collection.data.insert_many( @@ -84,21 +83,21 @@ def test_queries_with_rerank(collection_factory: CollectionFactory) -> None: def test_queries_with_rerank_and_group_by(collection_factory: CollectionFactory) -> None: - api_key = os.environ.get("OPENAI_APIKEY") + api_key = os.environ.get("COHERE_APIKEY") if api_key is None: - pytest.skip("No OpenAI API key found.") + pytest.skip("No Cohere API key found.") collection = collection_factory( name="Test_test_queries_with_rerank_and_group_by", - reranker_config=wvc.config.Configure.Reranker.custom("reranker-dummy"), - vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_openai( + reranker_config=wvc.config.Configure.Reranker.cohere(), + vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_cohere( vectorize_collection_name=False ), properties=[wvc.config.Property(name="text", data_type=wvc.config.DataType.TEXT)], - headers={"X-OpenAI-Api-Key": api_key}, + headers={"X-Cohere-Api-Key": api_key}, ports=(8086, 50057), ) - if collection._connection._weaviate_version < _ServerVersion(1, 23, 1): + if collection._connection._weaviate_version.is_lower_than(1, 23, 1): pytest.skip("Reranking requires Weaviate 1.23.1 or higher") insert = collection.data.insert_many( From a1957503022da02d37f8d9e9f75c53bd2f2696c4 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 30 Jan 2025 15:43:27 +0000 Subject: [PATCH 24/48] Merge dev/1.29 --- .github/workflows/main.yaml | 33 ++-- ...erative.yml => docker-compose-modules.yml} | 4 +- integration/test_collection_aggregate.py | 4 +- integration/test_collection_openai.py | 2 +- integration/test_collection_rerank.py | 2 + integration/test_named_vectors.py | 112 ++++++++++++- mock_tests/test_collection.py | 1 + requirements-devel.txt | 2 +- test/collection/test_byteops.py | 2 +- weaviate/classes/query.py | 2 + .../collections/aggregations/aggregate.py | 3 +- .../collections/aggregations/near_vector.py | 11 +- .../collections/batch/grpc_batch_objects.py | 49 +++--- weaviate/collections/classes/config.py | 34 +++- .../collections/classes/config_methods.py | 9 + .../classes/config_named_vectors.py | 46 ++++- .../classes/config_vector_index.py | 9 + .../collections/classes/config_vectorizers.py | 10 ++ weaviate/collections/classes/grpc.py | 66 ++++++-- weaviate/collections/classes/internal.py | 2 +- weaviate/collections/collection/async_.py | 10 +- weaviate/collections/collection/sync.py | 1 + weaviate/collections/grpc/shared.py | 157 ++++++++++++++++-- weaviate/collections/queries/base.py | 20 ++- weaviate/collections/queries/byteops.py | 16 -- weaviate/connect/v4.py | 4 +- weaviate/outputs/query.py | 12 +- weaviate/proto/v1/base_pb2.py | 14 +- weaviate/proto/v1/base_pb2.pyi | 15 +- weaviate/proto/v1/base_search_pb2.py | 87 +++++----- weaviate/proto/v1/base_search_pb2.pyi | 18 +- weaviate/types.py | 4 +- weaviate/util.py | 16 +- 33 files changed, 608 insertions(+), 169 deletions(-) rename ci/{docker-compose-generative.yml => docker-compose-modules.yml} (80%) delete mode 100644 weaviate/collections/queries/byteops.py diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index ad34d6b43..39af591f8 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -22,7 +22,8 @@ env: WEAVIATE_125: 1.25.29 WEAVIATE_126: 1.26.13 WEAVIATE_127: 1.27.9 - WEAVIATE_128: 1.28.4-73ecf75 + WEAVIATE_128: 1.28.3 + WEAVIATE_129: 1.29.0-dev-5dc00ba jobs: lint-and-format: @@ -54,7 +55,7 @@ jobs: strategy: fail-fast: false matrix: - version: ["3.9", "3.10", "3.11", "3.12"] + version: ["3.9", "3.10", "3.11", "3.12", "3.13"] folder: ["weaviate"] steps: - uses: actions/checkout@v4 @@ -75,7 +76,7 @@ jobs: strategy: fail-fast: false matrix: - version: ["3.9", "3.10", "3.11", "3.12"] + version: ["3.9", "3.10", "3.11", "3.12", "3.13"] folder: ["test", "mock_tests"] steps: - uses: actions/checkout@v4 @@ -98,7 +99,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - version: ["3.9", "3.10", "3.11", "3.12"] + version: ["3.9", "3.10", "3.11", "3.12", "3.13"] optional_dependencies: [false] steps: - uses: actions/checkout@v4 @@ -129,11 +130,11 @@ jobs: fail-fast: false matrix: versions: [ - { py: "3.9", weaviate: $WEAVIATE_124}, - { py: "3.10", weaviate: $WEAVIATE_125}, - { py: "3.11", weaviate: $WEAVIATE_126}, - { py: "3.12", weaviate: $WEAVIATE_127}, - { py: "3.13", weaviate: $WEAVIATE_128} + { py: "3.9", weaviate: $WEAVIATE_129}, + { py: "3.10", weaviate: $WEAVIATE_129}, + { py: "3.11", weaviate: $WEAVIATE_129}, + { py: "3.12", weaviate: $WEAVIATE_129}, + { py: "3.13", weaviate: $WEAVIATE_129} ] optional_dependencies: [false] steps: @@ -186,11 +187,11 @@ jobs: fail-fast: false matrix: versions: [ - { py: "3.9", weaviate: $WEAVIATE_128}, - { py: "3.10", weaviate: $WEAVIATE_128}, - { py: "3.11", weaviate: $WEAVIATE_128}, - { py: "3.12", weaviate: $WEAVIATE_128}, - { py: "3.13", weaviate: $WEAVIATE_128} + { py: "3.9", weaviate: $WEAVIATE_129}, + { py: "3.10", weaviate: $WEAVIATE_129}, + { py: "3.11", weaviate: $WEAVIATE_129}, + { py: "3.12", weaviate: $WEAVIATE_129}, + { py: "3.13", weaviate: $WEAVIATE_129} ] optional_dependencies: [false] steps: @@ -278,10 +279,12 @@ jobs: fail-fast: false matrix: server: [ + $WEAVIATE_124, $WEAVIATE_125, $WEAVIATE_126, $WEAVIATE_127, - $WEAVIATE_128 + $WEAVIATE_128, + $WEAVIATE_129 ] steps: - name: Checkout diff --git a/ci/docker-compose-generative.yml b/ci/docker-compose-modules.yml similarity index 80% rename from ci/docker-compose-generative.yml rename to ci/docker-compose-modules.yml index 67034ae7a..ec09fdafd 100644 --- a/ci/docker-compose-generative.yml +++ b/ci/docker-compose-modules.yml @@ -1,7 +1,7 @@ --- version: '3.4' services: - weaviate_openai: + weaviate_modules: command: - --host - 0.0.0.0 @@ -19,6 +19,6 @@ services: AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true' PERSISTENCE_DATA_PATH: '/var/lib/weaviate' DEFAULT_VECTORIZER_MODULE: 'text2vec-openai' - ENABLE_MODULES: 'text2vec-openai,generative-openai,text2vec-cohere,generative-cohere' + ENABLE_MODULES: 'text2colbert-jinaai,text2vec-openai,generative-openai,text2vec-cohere,generative-cohere' CLUSTER_HOSTNAME: 'node1' DISABLE_TELEMETRY: 'true' diff --git a/integration/test_collection_aggregate.py b/integration/test_collection_aggregate.py index fb3739d2a..d2fc1289a 100644 --- a/integration/test_collection_aggregate.py +++ b/integration/test_collection_aggregate.py @@ -665,13 +665,13 @@ def test_group_by_aggregation_argument(collection_factory: CollectionFactory) -> groups = res.groups assert len(groups) == 2 assert groups[0].grouped_by.prop == "int" - assert groups[0].grouped_by.value == "1" or groups[1].grouped_by.value == "1" + assert groups[0].grouped_by.value == 1 or groups[1].grouped_by.value == 1 assert isinstance(groups[0].properties["text"], AggregateText) assert groups[0].properties["text"].count == 1 assert isinstance(groups[0].properties["int"], AggregateInteger) assert groups[0].properties["int"].count == 1 assert groups[1].grouped_by.prop == "int" - assert groups[1].grouped_by.value == "2" or groups[0].grouped_by.value == "2" + assert groups[1].grouped_by.value == 2 or groups[0].grouped_by.value == 2 assert isinstance(groups[1].properties["text"], AggregateText) assert groups[1].properties["text"].count == 1 assert isinstance(groups[1].properties["int"], AggregateInteger) diff --git a/integration/test_collection_openai.py b/integration/test_collection_openai.py index f53ae5567..bd3319955 100644 --- a/integration/test_collection_openai.py +++ b/integration/test_collection_openai.py @@ -588,7 +588,7 @@ def test_queries_with_rerank_and_generative(collection_factory: CollectionFactor reranker_config=Configure.Reranker.transformers(), vectorizer_config=Configure.Vectorizer.text2vec_openai(), properties=[Property(name="text", data_type=DataType.TEXT)], - ports=(8079, 50050), + ports=(8086, 50057), headers={"X-OpenAI-Api-Key": api_key}, ) if collection._connection._weaviate_version < _ServerVersion(1, 23, 1): diff --git a/integration/test_collection_rerank.py b/integration/test_collection_rerank.py index 4363dc5fb..f201a3344 100644 --- a/integration/test_collection_rerank.py +++ b/integration/test_collection_rerank.py @@ -38,6 +38,7 @@ def test_queries_with_rerank(collection_factory: CollectionFactory) -> None: vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_openai(), properties=[wvc.config.Property(name="text", data_type=wvc.config.DataType.TEXT)], headers={"X-OpenAI-Api-Key": api_key}, + ports=(8086, 50057), ) if collection._connection._weaviate_version < _ServerVersion(1, 23, 1): pytest.skip("Reranking requires Weaviate 1.23.1 or higher") @@ -95,6 +96,7 @@ def test_queries_with_rerank_and_group_by(collection_factory: CollectionFactory) ), properties=[wvc.config.Property(name="text", data_type=wvc.config.DataType.TEXT)], headers={"X-OpenAI-Api-Key": api_key}, + ports=(8086, 50057), ) if collection._connection._weaviate_version < _ServerVersion(1, 23, 1): pytest.skip("Reranking requires Weaviate 1.23.1 or higher") diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index 21be17251..f41979440 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -1,3 +1,4 @@ +import os import uuid from typing import List, Union, Dict, Sequence @@ -10,11 +11,12 @@ PQConfig, _VectorIndexConfigHNSW, _VectorIndexConfigFlat, + _MultiVectorConfig, Vectorizers, ReferenceProperty, ) from weaviate.collections.classes.data import DataObject -from weaviate.collections.classes.grpc import _MultiTargetVectorJoin +from weaviate.collections.classes.grpc import _MultiTargetVectorJoin, _ListOfVectorsQuery from weaviate.exceptions import WeaviateInvalidInputError from weaviate.types import INCLUDE_VECTOR @@ -690,6 +692,13 @@ def test_same_target_vector_multiple_input( "near_vector,target_vector", [ ({"first": [0, 1], "second": [[1, 0, 0], [0, 0, 1]]}, ["first", "second"]), + ( + { + "first": [0, 1], + "second": wvc.query.NearVector.list_of_vectors([[1, 0, 0], [0, 0, 1]]), + }, + ["first", "second"], + ), ({"first": [[0, 1], [0, 1]], "second": [1, 0, 0]}, ["first", "second"]), ( {"first": [[0, 1], [0, 1]], "second": [[1, 0, 0], [0, 0, 1]]}, @@ -703,7 +712,7 @@ def test_same_target_vector_multiple_input( ) def test_same_target_vector_multiple_input_combinations( collection_factory: CollectionFactory, - near_vector: Dict[str, Union[Sequence[float], Sequence[Sequence[float]]]], + near_vector: Dict[str, Union[Sequence[float], Sequence[Sequence[float]], _ListOfVectorsQuery]], target_vector: List[str], ) -> None: dummy = collection_factory("dummy") @@ -801,3 +810,102 @@ def test_include_vector_on_references( ).objects assert objs[0].references["hasRef"].objects[0].vector == expected + + +def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: + dummy = collection_factory() + if dummy._connection._weaviate_version.is_lower_than(1, 29, 0): + pytest.skip("ColBERT vectors are only supported in Weaviate v1.29.0 and higher.") + + collection = collection_factory( + properties=[ + wvc.config.Property( + name="title", + data_type=wvc.config.DataType.TEXT, + ) + ], + vectorizer_config=[ + wvc.config.Configure.NamedVectors.none( + name="colbert", + vector_index_config=wvc.config.Configure.VectorIndex.hnsw( + multi_vector=wvc.config.Configure.VectorIndex.MultiVector.multi_vector() + ), + ), + wvc.config.Configure.NamedVectors.none( + name="regular", + ), + ], + ) + + config = collection.config.get() + assert config.vector_config is not None + assert isinstance(config.vector_config["colbert"].vector_index_config, _VectorIndexConfigHNSW) + assert isinstance( + config.vector_config["colbert"].vector_index_config.multi_vector, _MultiVectorConfig + ) + assert config.vector_config["colbert"].vector_index_config.multi_vector.aggregation == "maxSim" + + collection.data.insert_many([DataObject({}, vector={"colbert": [[1, 2], [4, 5]]})]) + assert len(collection) == 1 + + objs = collection.query.near_vector( + {"colbert": wvc.query.NearVector.multidimensional([[1, 2], [3, 4]])}, + target_vector="colbert", + ).objects + assert len(objs) == 1 + + objs = collection.query.hybrid( + None, + vector={"colbert": wvc.query.NearVector.multidimensional([[1, 2], [3, 4]])}, + target_vector="colbert", + ).objects + assert len(objs) == 1 + + +def test_colbert_vectors_jinaai(collection_factory: CollectionFactory) -> None: + api_key = os.environ.get("JINAAI_APIKEY") + if api_key is None: + pytest.skip("No JinaAI API key found.") + + dummy = collection_factory(ports=(8086, 50057), headers={"X-Jinaai-Api-Key": api_key}) + if dummy._connection._weaviate_version.is_lower_than(1, 29, 0): + pytest.skip("ColBERT vectors are only supported in Weaviate v1.29.0 and higher.") + + collection = collection_factory( + properties=[ + wvc.config.Property( + name="title", + data_type=wvc.config.DataType.TEXT, + ) + ], + vectorizer_config=[ + wvc.config.Configure.NamedVectors.text2colbert_jinaai( + name="colbert", + ) + ], + ) + + uuid = collection.data.insert({"title": "Hello World"}) + assert len(collection) == 1 + obj = collection.query.fetch_object_by_id(uuid, include_vector=["colbert"]) + vecs = obj.vector["colbert"] + assert isinstance(vecs[0], list) + + objs = collection.query.near_text("Hello", target_vector="colbert").objects + assert len(objs) == 1 + + objs = collection.query.hybrid("Hello", target_vector="colbert").objects + assert len(objs) == 1 + + objs = collection.query.near_vector( + { + "colbert": wvc.query.NearVector.multidimensional( + [[e + 0.01 for e in vec] for vec in vecs] + ) + }, + target_vector="colbert", + ).objects + assert len(objs) == 1 + + objs = collection.query.near_object(uuid, target_vector="colbert").objects + assert len(objs) == 1 diff --git a/mock_tests/test_collection.py b/mock_tests/test_collection.py index b885bea93..6091b8b44 100644 --- a/mock_tests/test_collection.py +++ b/mock_tests/test_collection.py @@ -170,6 +170,7 @@ def test_missing_multi_tenancy_config( quantizer=None, distance_metric=VectorDistances.COSINE, vector_cache_max_objects=10, + multi_vector=None, ) vic.distance = vic.distance_metric response_json = CollectionConfig( diff --git a/requirements-devel.txt b/requirements-devel.txt index 6daf22839..39f07f7a3 100644 --- a/requirements-devel.txt +++ b/requirements-devel.txt @@ -24,7 +24,7 @@ pytest-profiling==1.8.1 coverage==7.6.10 pytest-xdist==3.6.1 werkzeug==3.1.3 -pytest-httpserver==1.1.0 +pytest-httpserver==1.1.1 py-spy==0.4.0 numpy>=1.24.4,<3.0.0 diff --git a/test/collection/test_byteops.py b/test/collection/test_byteops.py index 508845c3a..f847e179c 100644 --- a/test/collection/test_byteops.py +++ b/test/collection/test_byteops.py @@ -1,4 +1,4 @@ -from weaviate.collections.queries.byteops import _ByteOps +from weaviate.collections.grpc.shared import _ByteOps def test_decode_float32s(): diff --git a/weaviate/classes/query.py b/weaviate/classes/query.py index e48ef20cb..00939ddce 100644 --- a/weaviate/classes/query.py +++ b/weaviate/classes/query.py @@ -10,6 +10,7 @@ NearMediaType, QueryNested, QueryReference, + NearVector, Rerank, Sort, ) @@ -27,6 +28,7 @@ "NearMediaType", "QueryNested", "QueryReference", + "NearVector", "Rerank", "Sort", "TargetVectors", diff --git a/weaviate/collections/aggregations/aggregate.py b/weaviate/collections/aggregations/aggregate.py index 9994043d8..d985b20d9 100644 --- a/weaviate/collections/aggregations/aggregate.py +++ b/weaviate/collections/aggregations/aggregate.py @@ -55,6 +55,7 @@ def __init__( name: str, consistency_level: Optional[ConsistencyLevel], tenant: Optional[str], + validate_arguments: bool, ): self._connection = connection self.__name = name @@ -65,7 +66,7 @@ def __init__( name=name, tenant=tenant, consistency_level=consistency_level, - validate_arguments=False, + validate_arguments=validate_arguments, ) def _query(self) -> AggregateBuilder: diff --git a/weaviate/collections/aggregations/near_vector.py b/weaviate/collections/aggregations/near_vector.py index 18e5fc974..e10d40a31 100644 --- a/weaviate/collections/aggregations/near_vector.py +++ b/weaviate/collections/aggregations/near_vector.py @@ -1,4 +1,4 @@ -from typing import Optional, Union +from typing import List, Optional, Union, cast from weaviate import syncify from weaviate.collections.aggregations.aggregate import _AggregateAsync @@ -82,6 +82,15 @@ async def near_vector( str(self._connection._weaviate_version), "1.29.0", ) + if isinstance(near_vector[0], list): + raise WeaviateUnsupportedFeatureError( + "A `near_vector` argument other than a list of floats", + str(self._connection._weaviate_version), + "1.29.0", + ) + near_vector = cast( + List[float], near_vector + ) # pylance cannot type narrow the immediately above check if not isinstance(target_vector, str): raise WeaviateUnsupportedFeatureError( "A `target_vector` argument other than a string", diff --git a/weaviate/collections/batch/grpc_batch_objects.py b/weaviate/collections/batch/grpc_batch_objects.py index 0453c9e60..0eb2ba2eb 100644 --- a/weaviate/collections/batch/grpc_batch_objects.py +++ b/weaviate/collections/batch/grpc_batch_objects.py @@ -2,7 +2,7 @@ import struct import time import uuid as uuid_package -from typing import Any, Dict, List, Optional, Union, cast +from typing import Any, Dict, List, Mapping, Optional, Sequence, Union, cast from google.protobuf.struct_pb2 import Struct from grpc.aio import AioRpcError # type: ignore @@ -16,7 +16,7 @@ from weaviate.collections.classes.config import ConsistencyLevel from weaviate.collections.classes.internal import ReferenceToMulti, ReferenceInputs from weaviate.collections.classes.types import GeoCoordinate, PhoneNumber -from weaviate.collections.grpc.shared import _BaseGRPC, PERMISSION_DENIED +from weaviate.collections.grpc.shared import _BaseGRPC, _Pack, PERMISSION_DENIED from weaviate.connect import ConnectionV4 from weaviate.exceptions import ( WeaviateBatchError, @@ -26,17 +26,8 @@ InsufficientPermissionsError, ) from weaviate.proto.v1 import batch_pb2, base_pb2 -from weaviate.util import _datetime_to_string, _get_vector_v4 - - -def _pack_named_vectors(vectors: Dict[str, List[float]]) -> List[base_pb2.Vectors]: - return [ - base_pb2.Vectors( - name=name, - vector_bytes=struct.pack("{}f".format(len(vector)), *vector), - ) - for name, vector in vectors.items() - ] +from weaviate.types import VECTORS +from weaviate.util import _datetime_to_string, _is_1d_vector class _BatchGRPC(_BaseGRPC): @@ -49,19 +40,26 @@ class _BatchGRPC(_BaseGRPC): def __init__(self, connection: ConnectionV4, consistency_level: Optional[ConsistencyLevel]): super().__init__(connection, consistency_level, False) - def __grpc_objects(self, objects: List[_BatchObject]) -> List[batch_pb2.BatchObject]: - def pack_vector(vector: Any) -> bytes: - vector_list = _get_vector_v4(vector) - return struct.pack("{}f".format(len(vector_list)), *vector_list) + def __single_vec(self, vectors: Optional[VECTORS]) -> Optional[bytes]: + if not _is_1d_vector(vectors): + return None + return _Pack.single(vectors) + + def __multi_vec(self, vectors: Optional[VECTORS]) -> Optional[List[base_pb2.Vectors]]: + if vectors is None or _is_1d_vector(vectors): + return None + # pylance fails to type narrow TypeGuard in _is_1d_vector properly + vectors = cast(Mapping[str, Union[Sequence[float], Sequence[Sequence[float]]]], vectors) + return [ + base_pb2.Vectors(name=name, vector_bytes=packing.bytes_, type=packing.type_) + for name, vec_or_vecs in vectors.items() + if (packing := _Pack.parse_single_or_multi_vec(vec_or_vecs)) + ] + def __grpc_objects(self, objects: List[_BatchObject]) -> List[batch_pb2.BatchObject]: return [ batch_pb2.BatchObject( collection=obj.collection, - vector_bytes=( - pack_vector(obj.vector) - if obj.vector is not None and isinstance(obj.vector, list) - else None - ), uuid=str(obj.uuid) if obj.uuid is not None else str(uuid_package.uuid4()), properties=( self.__translate_properties_from_python_to_grpc( @@ -72,11 +70,8 @@ def pack_vector(vector: Any) -> bytes: else None ), tenant=obj.tenant, - vectors=( - _pack_named_vectors(obj.vector) - if obj.vector is not None and isinstance(obj.vector, dict) - else None - ), + vector_bytes=self.__single_vec(obj.vector), + vectors=self.__multi_vec(obj.vector), ) for obj in objects ] diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 341812ff8..2dd733afc 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -31,6 +31,7 @@ _NamedVectorsUpdate, ) from weaviate.collections.classes.config_vector_index import ( + _MultiVectorConfigCreate, VectorIndexType as VectorIndexTypeAlias, VectorFilterStrategy, ) @@ -1507,8 +1508,17 @@ class _SQConfig(_ConfigBase): SQConfig = _SQConfig +@dataclass +class _MultiVectorConfig(_ConfigBase): + aggregation: str + + +MultiVector = _MultiVectorConfig + + @dataclass class _VectorIndexConfig(_ConfigBase): + multi_vector: Optional[_MultiVectorConfig] quantizer: Optional[Union[PQConfig, BQConfig, SQConfig]] def to_dict(self) -> Dict[str, Any]: @@ -1519,6 +1529,8 @@ def to_dict(self) -> Dict[str, Any]: out["bq"] = {**out.pop("quantizer"), "enabled": True} elif isinstance(self.quantizer, _SQConfig): out["sq"] = {**out.pop("quantizer"), "enabled": True} + if self.multi_vector is not None: + out["multivector"] = self.multi_vector.to_dict() return out @@ -1975,6 +1987,16 @@ def __add_props( ret_dict["properties"] = existing_props +class _VectorIndexMultiVector: + @staticmethod + def multi_vector( + aggregation: Union[Literal["maxSim"], str, None] = None, + ) -> _MultiVectorConfigCreate: + return _MultiVectorConfigCreate( + aggregation=aggregation, + ) + + class _VectorIndexQuantizer: @staticmethod def pq( @@ -2039,6 +2061,7 @@ def sq( class _VectorIndex: + MultiVector = _VectorIndexMultiVector Quantizer = _VectorIndexQuantizer @staticmethod @@ -2050,6 +2073,7 @@ def none() -> _VectorIndexConfigSkipCreate: return _VectorIndexConfigSkipCreate( distance=None, quantizer=None, + multivector=None, ) @staticmethod @@ -2066,6 +2090,7 @@ def hnsw( max_connections: Optional[int] = None, vector_cache_max_objects: Optional[int] = None, quantizer: Optional[_QuantizerConfigCreate] = None, + multi_vector: Optional[_MultiVectorConfigCreate] = None, ) -> _VectorIndexConfigHNSWCreate: """Create a `_VectorIndexConfigHNSWCreate` object to be used when defining the HNSW vector index configuration of Weaviate. @@ -2087,6 +2112,7 @@ def hnsw( maxConnections=max_connections, vectorCacheMaxObjects=vector_cache_max_objects, quantizer=quantizer, + multivector=multi_vector, ) @staticmethod @@ -2106,6 +2132,7 @@ def flat( distance=distance_metric, vectorCacheMaxObjects=vector_cache_max_objects, quantizer=quantizer, + multivector=None, ) @staticmethod @@ -2123,7 +2150,12 @@ def dynamic( See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hnsw) for a more detailed view! """ # noqa: D417 (missing argument descriptions in the docstring) return _VectorIndexConfigDynamicCreate( - distance=distance_metric, threshold=threshold, hnsw=hnsw, flat=flat, quantizer=None + distance=distance_metric, + threshold=threshold, + hnsw=hnsw, + flat=flat, + quantizer=None, + multivector=None, ) diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index f0d39ebd4..5d21af419 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -7,6 +7,7 @@ _CollectionConfigSimple, _NamedVectorConfig, _NamedVectorizerConfig, + _MultiVectorConfig, _PQConfig, _VectorIndexConfigFlat, _VectorIndexConfigDynamic, @@ -168,6 +169,13 @@ def __get_hnsw_config(config: Dict[str, Any]) -> _VectorIndexConfigHNSW: quantizer=quantizer, skip=config["skip"], vector_cache_max_objects=config["vectorCacheMaxObjects"], + multi_vector=( + None + if config.get("multivector") is None + else _MultiVectorConfig( + aggregation=config["multivector"]["aggregation"], + ) + ), ) @@ -177,6 +185,7 @@ def __get_flat_config(config: Dict[str, Any]) -> _VectorIndexConfigFlat: distance_metric=VectorDistances(config["distance"]), quantizer=quantizer, vector_cache_max_objects=config["vectorCacheMaxObjects"], + multi_vector=None, ) diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index e0847cb99..416063bfc 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -24,6 +24,7 @@ _Multi2VecVoyageaiConfig, _Multi2VecGoogleConfig, _Ref2VecCentroidConfig, + _Text2ColbertJinaAIConfig, _Text2VecAWSConfig, _Text2VecAzureOpenAIConfig, _Text2VecCohereConfig, @@ -155,6 +156,46 @@ def custom( vector_index_config=vector_index_config, ) + @staticmethod + def text2colbert_jinaai( + name: str, + *, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + model: Optional[str] = None, + dimensions: Optional[int] = None, + ) -> _NamedVectorConfigCreate: + """Create a named vector using the `text2colbert_jinaai` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/colbert) + for detailed usage. + + Arguments: + `name` + The name of the named vector. + `source_properties` + Which properties should be included when vectorizing. By default all text properties are included. + `vector_index_config` + The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. + `model` + The model to use. Defaults to `None`, which uses the server-defined default. + `dimensions` + Number of dimensions. Applicable to v3 OpenAI models only. Defaults to `None`, which uses the server-defined default. + """ + return _NamedVectorConfigCreate( + name=name, + source_properties=source_properties, + vector_index_config=vector_index_config, + vectorizer=_Text2ColbertJinaAIConfig( + model=model, dimensions=dimensions, vectorizeClassName=vectorize_collection_name + ), + ) + @staticmethod def text2vec_cohere( name: str, @@ -1205,8 +1246,7 @@ def text2vec_jinaai( ) -> _NamedVectorConfigCreate: """Create a named vector using the `text2vec-jinaai` model. - See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings) - for detailed usage. + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings) for detailed usage. Arguments: `name` @@ -1223,8 +1263,6 @@ def text2vec_jinaai( The number of dimensions for the generated embeddings. Defaults to `None`, which uses the server-defined default. `model` The model to use. Defaults to `None`, which uses the server-defined default. - See the - [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings#available-models) for more details. """ return _NamedVectorConfigCreate( name=name, diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index a620e9c2c..3c36bbf32 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -38,8 +38,17 @@ class VectorIndexType(str, Enum): DYNAMIC = "dynamic" +class _MultiVectorConfigCreateBase(_ConfigCreateModel): + enabled: bool = Field(default=True) + + +class _MultiVectorConfigCreate(_MultiVectorConfigCreateBase): + aggregation: Optional[str] + + class _VectorIndexConfigCreate(_ConfigCreateModel): distance: Optional[VectorDistances] + multivector: Optional[_MultiVectorConfigCreate] quantizer: Optional[_QuantizerConfigCreate] = Field(exclude=True) @staticmethod diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 96074d969..77464b287 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -114,6 +114,7 @@ class Vectorizers(str, Enum): """ NONE = "none" + TEXT2COLBERT_JINAAI = "text2colbert-jinaai" TEXT2VEC_AWS = "text2vec-aws" TEXT2VEC_COHERE = "text2vec-cohere" TEXT2VEC_CONTEXTIONARY = "text2vec-contextionary" @@ -168,6 +169,15 @@ class _VectorizerConfigCreate(_ConfigCreateModel): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(default=..., exclude=True) +class _Text2ColbertJinaAIConfig(_VectorizerConfigCreate): + vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( + default=Vectorizers.TEXT2COLBERT_JINAAI, frozen=True, exclude=True + ) + vectorizeClassName: bool + model: Optional[str] + dimensions: Optional[int] + + class _Text2VecContextionaryConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_CONTEXTIONARY, frozen=True, exclude=True diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index 505ea3ce8..480c34cf5 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from enum import Enum, auto -from typing import ClassVar, List, Literal, Optional, Sequence, Type, Union, Dict, cast +from typing import ClassVar, List, Literal, Mapping, Optional, Sequence, Type, Union, Dict, cast from pydantic import ConfigDict, Field @@ -8,7 +8,7 @@ from weaviate.proto.v1 import base_search_pb2 from weaviate.str_enum import BaseEnum from weaviate.types import INCLUDE_VECTOR, UUID, NUMBER -from weaviate.util import _ServerVersion, _get_vector_v4, _is_1d_vector +from weaviate.util import _ServerVersion class HybridFusion(str, BaseEnum): @@ -228,9 +228,46 @@ class Rerank(_WeaviateInput): query: Optional[str] = Field(default=None) +class _MultidimensionalQuery(_WeaviateInput): + tensor: Sequence[Sequence[float]] + + +class _ListOfVectorsQuery(_WeaviateInput): + vectors: Sequence[Sequence[float]] + + +MultidimensionalQuery = _MultidimensionalQuery +"""Define a multi-vector query to be used within a near vector search, i.e. a single vector over a multi-vector space.""" + +ListOfVectorsQuery = _ListOfVectorsQuery +"""Define a many-vectors query to be used within a near vector search, i.e. multiple vectors over a single-vector space.""" + + NearVectorInputType = Union[ - Sequence[NUMBER], Dict[str, Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]]] + Sequence[NUMBER], + Sequence[Sequence[NUMBER]], + Mapping[ + str, + Union[ + Sequence[NUMBER], Sequence[Sequence[NUMBER]], MultidimensionalQuery, ListOfVectorsQuery + ], + ], ] +"""Define the input types that can be used in a near vector search.""" + + +class NearVector: + """Factory class to use when defining near vector queries with multiple vectors in `near_vector()` and `hybrid()` methods.""" + + @staticmethod + def multidimensional(tensor: Sequence[Sequence[float]]) -> _MultidimensionalQuery: + """Define a multi-vector query to be used within a near vector search, i.e. a single vector over a multi-vector space.""" + return _MultidimensionalQuery(tensor=tensor) + + @staticmethod + def list_of_vectors(vectors: Sequence[Sequence[float]]) -> _ListOfVectorsQuery: + """Define a many-vectors query to be used within a near vector search, i.e. multiple vectors over a single-vector space.""" + return _ListOfVectorsQuery(vectors=vectors) class _HybridNearBase(_WeaviateInput): @@ -246,8 +283,21 @@ class _HybridNearText(_HybridNearBase): move_away: Optional[Move] = None -class _HybridNearVector(_HybridNearBase): +class _HybridNearVector: # can't be a Pydantic model because of validation issues parsing numpy, pd, pl arrays/series vector: NearVectorInputType + distance: Optional[float] + certainty: Optional[float] + + def __init__( + self, + *, + vector: NearVectorInputType, + distance: Optional[float] = None, + certainty: Optional[float] = None, + ) -> None: + self.vector = vector + self.distance = distance + self.certainty = certainty HybridVectorType = Union[NearVectorInputType, _HybridNearText, _HybridNearVector] @@ -422,14 +472,6 @@ def near_vector( Returns: A `_HybridNearVector` object to be used in the `vector` parameter of the `query.hybrid` and `generate.hybrid` search methods. """ - if isinstance(vector, dict): - for key, val in vector.items(): - if _is_1d_vector(val): - vector[key] = _get_vector_v4(val) - else: - vector[key] = [_get_vector_v4(v) for v in val] - else: - vector = _get_vector_v4(vector) return _HybridNearVector(vector=vector, distance=distance, certainty=certainty) diff --git a/weaviate/collections/classes/internal.py b/weaviate/collections/classes/internal.py index d1e343a7f..4e4e81c40 100644 --- a/weaviate/collections/classes/internal.py +++ b/weaviate/collections/classes/internal.py @@ -94,7 +94,7 @@ class _Object(Generic[P, R, M]): metadata: M properties: P references: R - vector: Dict[str, List[float]] + vector: Dict[str, Union[List[float], List[List[float]]]] collection: str diff --git a/weaviate/collections/collection/async_.py b/weaviate/collections/collection/async_.py index 5aaf79050..a5331098b 100644 --- a/weaviate/collections/collection/async_.py +++ b/weaviate/collections/collection/async_.py @@ -18,7 +18,6 @@ from weaviate.collections.classes.types import Properties, TProperties from weaviate.collections.data import _DataCollectionAsync from weaviate.collections.generate import _GenerateCollectionAsync -from weaviate.collections.grpc.aggregate import _AggregateGRPC from weaviate.collections.iterator import _IteratorInputs, _ObjectAIterator from weaviate.collections.tenants import _TenantsAsync from weaviate.connect import ConnectionV4 @@ -75,12 +74,11 @@ def __init__( references, ) - self.__aggregate_grpc = _AggregateGRPC( - connection, name, tenant, consistency_level, validate_arguments - ) self.__cluster = _ClusterAsync(connection) - self.aggregate = _AggregateCollectionAsync(connection, name, consistency_level, tenant) + self.aggregate = _AggregateCollectionAsync( + connection, name, consistency_level, tenant, validate_arguments + ) """This namespace includes all the querying methods available to you when using Weaviate's standard aggregation capabilities.""" self.backup = _CollectionBackupAsync(connection, name) """This namespace includes all the backup methods available to you when backing up a collection in Weaviate.""" @@ -112,7 +110,7 @@ async def length(self) -> int: assert total is not None return total else: - return await self.__aggregate_grpc.objects_count() + return await self.aggregate._grpc.objects_count() async def to_string(self) -> str: """Return a string representation of the collection object.""" diff --git a/weaviate/collections/collection/sync.py b/weaviate/collections/collection/sync.py index 5b9e7f121..01ca737a3 100644 --- a/weaviate/collections/collection/sync.py +++ b/weaviate/collections/collection/sync.py @@ -95,6 +95,7 @@ def __init__( name=name, consistency_level=consistency_level, tenant=tenant, + validate_arguments=validate_arguments, ) """This namespace includes all the querying methods available to you when using Weaviate's standard aggregation capabilities.""" self.backup = _CollectionBackup( diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index 61b8dc499..6b28eee6b 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -6,14 +6,19 @@ List, Literal, Optional, + Sequence, Union, cast, Tuple, get_args, ) +from dataclasses import dataclass +from typing_extensions import TypeGuard from weaviate.collections.classes.config import ConsistencyLevel from weaviate.collections.classes.grpc import ( + _MultidimensionalQuery, + _ListOfVectorsQuery, _MultiTargetVectorJoin, _HybridNearText, _HybridNearVector, @@ -33,8 +38,12 @@ from weaviate.util import _get_vector_v4, _is_1d_vector from weaviate.validator import _ValidateArgument, _validate_input, _ExtraTypes + PERMISSION_DENIED = "PERMISSION_DENIED" +UINT32_LEN = 4 +UINT64_LEN = 8 + class _BaseGRPC: def __init__( @@ -101,10 +110,13 @@ def __target_vector_to_grpc( else: return target_vector.to_grpc_target_vector(self._connection._weaviate_version), None - @staticmethod def _vector_per_target( - vector: NearVectorInputType, targets: Optional[base_search_pb2.Targets], argument_name: str + self, + vector: NearVectorInputType, + targets: Optional[base_search_pb2.Targets], + argument_name: str, ) -> Tuple[Optional[Dict[str, bytes]], Optional[bytes]]: + """@deprecated in 1.27.0, included for BC until 1.27.0 is no longer supported.""" # noqa: D401 invalid_nv_exception = WeaviateInvalidInputError( f"""{argument_name} argument can be: - a list of numbers @@ -132,7 +144,11 @@ def _vector_per_target( return vector_per_target, None else: - if len(vector) == 0: + if ( + isinstance(vector, _MultidimensionalQuery) + or isinstance(vector, _ListOfVectorsQuery) + or len(vector) == 0 + ): raise invalid_nv_exception if _is_1d_vector(vector): @@ -146,9 +162,11 @@ def _vector_per_target( keys and lists of numbers as values.""" ) - @staticmethod def _vector_for_target( - vector: NearVectorInputType, targets: Optional[base_search_pb2.Targets], argument_name: str + self, + vector: NearVectorInputType, + targets: Optional[base_search_pb2.Targets], + argument_name: str, ) -> Tuple[ Optional[List[base_search_pb2.VectorForTarget]], Optional[bytes], Optional[List[str]] ]: @@ -161,7 +179,7 @@ def _vector_for_target( vector_for_target: List[base_search_pb2.VectorForTarget] = [] - def add_vector(val: List[float], target_name: str) -> None: + def add_vector(val: Sequence[float], target_name: str) -> None: vec = _get_vector_v4(val) if ( @@ -171,11 +189,25 @@ def add_vector(val: List[float], target_name: str) -> None: ): raise invalid_nv_exception - vector_for_target.append( - base_search_pb2.VectorForTarget( - name=target_name, vector_bytes=struct.pack("{}f".format(len(vec)), *vec) + if self._connection._weaviate_version.is_lower_than(1, 29, 0): + vector_for_target.append( + base_search_pb2.VectorForTarget( + name=target_name, vector_bytes=_Pack.single(vec) + ) + ) + else: + vector_for_target.append( + base_search_pb2.VectorForTarget( + name=target_name, + vectors=[ + base_pb2.Vectors( + name=target_name, + vector_bytes=_Pack.single(vec), + type=base_pb2.Vectors.VECTOR_TYPE_SINGLE_FP32, + ) + ], + ) ) - ) if isinstance(vector, dict): if ( @@ -188,15 +220,32 @@ def add_vector(val: List[float], target_name: str) -> None: for key, value in vector.items(): # typing tools do not understand the type narrowing here if _is_1d_vector(value): - val: List[float] = cast(List[float], value) + val = value add_vector(val, key) target_vectors_tmp.append(key) + elif isinstance(value, _MultidimensionalQuery): + vector_for_target.append( + base_search_pb2.VectorForTarget( + name=key, + vectors=[ + base_pb2.Vectors( + name=key, + vector_bytes=_Pack.multi(value.tensor), + type=base_pb2.Vectors.VECTOR_TYPE_MULTI_FP32, + ) + ], + ) + ) + target_vectors_tmp.append(key) + elif isinstance(value, _ListOfVectorsQuery): + for vec in value.vectors: + add_vector(vec, key) + target_vectors_tmp.append(key) else: - vals: List[List[float]] = cast(List[List[float]], value) + vals = cast(Sequence[Sequence[NUMBER]], value) for inner_vector in vals: add_vector(inner_vector, key) target_vectors_tmp.append(key) - return vector_for_target, None, target_vectors_tmp else: if _is_1d_vector(vector): @@ -588,3 +637,85 @@ def _parse_hybrid( if query is not None or vector is not None else None ) + + +class _ByteOps: + @staticmethod + def decode_float32s(byte_vector: bytes) -> List[float]: + return [ + float(val) for val in struct.unpack(f"{len(byte_vector)//UINT32_LEN}f", byte_vector) + ] + + @staticmethod + def decode_float64s(byte_vector: bytes) -> List[float]: + return [ + float(val) for val in struct.unpack(f"{len(byte_vector)//UINT64_LEN}d", byte_vector) + ] + + @staticmethod + def decode_int64s(byte_vector: bytes) -> List[int]: + return [int(val) for val in struct.unpack(f"{len(byte_vector)//UINT64_LEN}q", byte_vector)] + + +@dataclass +class _Packing: + bytes_: bytes + type_: base_pb2.Vectors.VectorType + + +class _Pack: + @staticmethod + def is_multi( + v: Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]] + ) -> TypeGuard[List[List[NUMBER]]]: + return len(v) > 0 and isinstance(v[0], list) + + @staticmethod + def is_single( + v: Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]] + ) -> TypeGuard[List[NUMBER]]: + return len(v) > 0 and (isinstance(v[0], float) or isinstance(v[0], int)) + + @staticmethod + def parse_single_or_multi_vec( + vector: Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]] + ) -> _Packing: + if _Pack.is_multi(vector): + return _Packing( + bytes_=_Pack.multi(vector), type_=base_pb2.Vectors.VECTOR_TYPE_MULTI_FP32 + ) + elif _Pack.is_single(vector): + return _Packing( + bytes_=_Pack.single(vector), type_=base_pb2.Vectors.VECTOR_TYPE_SINGLE_FP32 + ) + else: + raise WeaviateInvalidInputError(f"Invalid vectors: {vector}") + + @staticmethod + def single(vector: Sequence[NUMBER]) -> bytes: + vector_list = _get_vector_v4(vector) + return struct.pack("{}f".format(len(vector_list)), *vector_list) + + @staticmethod + def multi(vector: Sequence[Sequence[NUMBER]]) -> bytes: + vector_list = [item for sublist in vector for item in sublist] + return struct.pack(" List[float]: + return _ByteOps.decode_float32s(byte_vector) + + @staticmethod + def multi(byte_vector: bytes) -> List[List[float]]: + dim_bytes = byte_vector[:2] + dim = int(struct.unpack(" Dict[str, List[float]]: + ) -> Dict[str, Union[List[float], List[List[float]]]]: if ( len(add_props.vector_bytes) == 0 and len(add_props.vector) == 0 @@ -160,9 +157,14 @@ def __extract_vector_for_object( if len(add_props.vector_bytes) > 0: return {"default": _ByteOps.decode_float32s(add_props.vector_bytes)} - vecs = {} + vecs: Dict[str, Union[List[float], List[List[float]]]] = {} for vec in add_props.vectors: - vecs[vec.name] = _ByteOps.decode_float32s(vec.vector_bytes) + if vec.type == base_pb2.Vectors.VECTOR_TYPE_SINGLE_FP32: + vecs[vec.name] = _Unpack.single(vec.vector_bytes) + elif vec.type == base_pb2.Vectors.VECTOR_TYPE_MULTI_FP32: + vecs[vec.name] = _Unpack.multi(vec.vector_bytes) + else: + vecs[vec.name] = _Unpack.single(vec.vector_bytes) return vecs def __extract_generated_for_object( diff --git a/weaviate/collections/queries/byteops.py b/weaviate/collections/queries/byteops.py deleted file mode 100644 index 93c97fac8..000000000 --- a/weaviate/collections/queries/byteops.py +++ /dev/null @@ -1,16 +0,0 @@ -import struct -from typing import List - - -class _ByteOps: - @staticmethod - def decode_float32s(byte_vector: bytes) -> List[float]: - return [float(val) for val in struct.unpack(f"{len(byte_vector)//4}f", byte_vector)] - - @staticmethod - def decode_float64s(byte_vector: bytes) -> List[float]: - return [float(val) for val in struct.unpack(f"{len(byte_vector)//8}d", byte_vector)] - - @staticmethod - def decode_int64s(byte_vector: bytes) -> List[int]: - return [int(val) for val in struct.unpack(f"{len(byte_vector)//8}q", byte_vector)] diff --git a/weaviate/connect/v4.py b/weaviate/connect/v4.py index b3bec7f8e..4ff482e2a 100644 --- a/weaviate/connect/v4.py +++ b/weaviate/connect/v4.py @@ -359,7 +359,9 @@ def periodic_refresh_token(refresh_time: int, _auth: Optional[_Auth]) -> None: ): # use refresh token when available try: - if "refresh_token" in cast(AsyncOAuth2Client, self._client).token: + if self._client is None: + pass + elif "refresh_token" in cast(AsyncOAuth2Client, self._client).token: assert isinstance(self._client, AsyncOAuth2Client) self._client.token = asyncio.run_coroutine_threadsafe( self._client.refresh_token( diff --git a/weaviate/outputs/query.py b/weaviate/outputs/query.py index 113a1e474..d63e1090d 100644 --- a/weaviate/outputs/query.py +++ b/weaviate/outputs/query.py @@ -6,7 +6,13 @@ FilterByRef, FilterReturn, ) -from weaviate.collections.classes.grpc import Sorting, NearVectorInputType, TargetVectorJoinType +from weaviate.collections.classes.grpc import ( + Sorting, + NearVectorInputType, + TargetVectorJoinType, + MultidimensionalQuery, + ListOfVectorsQuery, +) from weaviate.collections.classes.internal import ( @@ -52,9 +58,11 @@ "GenerativeGroupByReturnType", "GenerativeSearchReturnType", "GeoCoordinate", - "NearVectorInputType", + "ListOfVectorsQuery", "MetadataReturn", "MetadataSingleObjectReturn", + "MultidimensionalQuery", + "NearVectorInputType", "Object", "ObjectSingleReturn", "GroupByObject", diff --git a/weaviate/proto/v1/base_pb2.py b/weaviate/proto/v1/base_pb2.py index b9d3886de..242b6f0a6 100644 --- a/weaviate/proto/v1/base_pb2.py +++ b/weaviate/proto/v1/base_pb2.py @@ -22,7 +22,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\rv1/base.proto\x12\x0bweaviate.v1\x1a\x1cgoogle/protobuf/struct.proto"T\n\x15NumberArrayProperties\x12\x12\n\x06values\x18\x01 \x03(\x01\x42\x02\x18\x01\x12\x11\n\tprop_name\x18\x02 \x01(\t\x12\x14\n\x0cvalues_bytes\x18\x03 \x01(\x0c"7\n\x12IntArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\x03\x12\x11\n\tprop_name\x18\x02 \x01(\t"8\n\x13TextArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\t\x12\x11\n\tprop_name\x18\x02 \x01(\t";\n\x16\x42ooleanArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\x08\x12\x11\n\tprop_name\x18\x02 \x01(\t"\xf1\x03\n\x15ObjectPropertiesValue\x12\x33\n\x12non_ref_properties\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x43\n\x17number_array_properties\x18\x02 \x03(\x0b\x32".weaviate.v1.NumberArrayProperties\x12=\n\x14int_array_properties\x18\x03 \x03(\x0b\x32\x1f.weaviate.v1.IntArrayProperties\x12?\n\x15text_array_properties\x18\x04 \x03(\x0b\x32 .weaviate.v1.TextArrayProperties\x12\x45\n\x18\x62oolean_array_properties\x18\x05 \x03(\x0b\x32#.weaviate.v1.BooleanArrayProperties\x12\x38\n\x11object_properties\x18\x06 \x03(\x0b\x32\x1d.weaviate.v1.ObjectProperties\x12\x43\n\x17object_array_properties\x18\x07 \x03(\x0b\x32".weaviate.v1.ObjectArrayProperties\x12\x18\n\x10\x65mpty_list_props\x18\n \x03(\t"^\n\x15ObjectArrayProperties\x12\x32\n\x06values\x18\x01 \x03(\x0b\x32".weaviate.v1.ObjectPropertiesValue\x12\x11\n\tprop_name\x18\x02 \x01(\t"X\n\x10ObjectProperties\x12\x31\n\x05value\x18\x01 \x01(\x0b\x32".weaviate.v1.ObjectPropertiesValue\x12\x11\n\tprop_name\x18\x02 \x01(\t"\x1b\n\tTextArray\x12\x0e\n\x06values\x18\x01 \x03(\t"\x1a\n\x08IntArray\x12\x0e\n\x06values\x18\x01 \x03(\x03"\x1d\n\x0bNumberArray\x12\x0e\n\x06values\x18\x01 \x03(\x01"\x1e\n\x0c\x42ooleanArray\x12\x0e\n\x06values\x18\x01 \x03(\x08"\xfc\x06\n\x07\x46ilters\x12/\n\x08operator\x18\x01 \x01(\x0e\x32\x1d.weaviate.v1.Filters.Operator\x12\x0e\n\x02on\x18\x02 \x03(\tB\x02\x18\x01\x12%\n\x07\x66ilters\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Filters\x12\x14\n\nvalue_text\x18\x04 \x01(\tH\x00\x12\x13\n\tvalue_int\x18\x05 \x01(\x03H\x00\x12\x17\n\rvalue_boolean\x18\x06 \x01(\x08H\x00\x12\x16\n\x0cvalue_number\x18\x07 \x01(\x01H\x00\x12\x32\n\x10value_text_array\x18\t \x01(\x0b\x32\x16.weaviate.v1.TextArrayH\x00\x12\x30\n\x0fvalue_int_array\x18\n \x01(\x0b\x32\x15.weaviate.v1.IntArrayH\x00\x12\x38\n\x13value_boolean_array\x18\x0b \x01(\x0b\x32\x19.weaviate.v1.BooleanArrayH\x00\x12\x36\n\x12value_number_array\x18\x0c \x01(\x0b\x32\x18.weaviate.v1.NumberArrayH\x00\x12\x36\n\tvalue_geo\x18\r \x01(\x0b\x32!.weaviate.v1.GeoCoordinatesFilterH\x00\x12)\n\x06target\x18\x14 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget"\xe3\x02\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x12\n\x0eOPERATOR_EQUAL\x10\x01\x12\x16\n\x12OPERATOR_NOT_EQUAL\x10\x02\x12\x19\n\x15OPERATOR_GREATER_THAN\x10\x03\x12\x1f\n\x1bOPERATOR_GREATER_THAN_EQUAL\x10\x04\x12\x16\n\x12OPERATOR_LESS_THAN\x10\x05\x12\x1c\n\x18OPERATOR_LESS_THAN_EQUAL\x10\x06\x12\x10\n\x0cOPERATOR_AND\x10\x07\x12\x0f\n\x0bOPERATOR_OR\x10\x08\x12\x1d\n\x19OPERATOR_WITHIN_GEO_RANGE\x10\t\x12\x11\n\rOPERATOR_LIKE\x10\n\x12\x14\n\x10OPERATOR_IS_NULL\x10\x0b\x12\x19\n\x15OPERATOR_CONTAINS_ANY\x10\x0c\x12\x19\n\x15OPERATOR_CONTAINS_ALL\x10\rB\x0c\n\ntest_value"T\n\x1b\x46ilterReferenceSingleTarget\x12\n\n\x02on\x18\x01 \x01(\t\x12)\n\x06target\x18\x02 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget"n\n\x1a\x46ilterReferenceMultiTarget\x12\n\n\x02on\x18\x01 \x01(\t\x12)\n\x06target\x18\x02 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget\x12\x19\n\x11target_collection\x18\x03 \x01(\t""\n\x14\x46ilterReferenceCount\x12\n\n\x02on\x18\x01 \x01(\t"\xe4\x01\n\x0c\x46ilterTarget\x12\x12\n\x08property\x18\x01 \x01(\tH\x00\x12\x41\n\rsingle_target\x18\x02 \x01(\x0b\x32(.weaviate.v1.FilterReferenceSingleTargetH\x00\x12?\n\x0cmulti_target\x18\x03 \x01(\x0b\x32\'.weaviate.v1.FilterReferenceMultiTargetH\x00\x12\x32\n\x05\x63ount\x18\x04 \x01(\x0b\x32!.weaviate.v1.FilterReferenceCountH\x00\x42\x08\n\x06target"M\n\x14GeoCoordinatesFilter\x12\x10\n\x08latitude\x18\x01 \x01(\x02\x12\x11\n\tlongitude\x18\x02 \x01(\x02\x12\x10\n\x08\x64istance\x18\x03 \x01(\x02"<\n\x07Vectors\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05index\x18\x02 \x01(\x04\x12\x14\n\x0cvector_bytes\x18\x03 \x01(\x0c*\x89\x01\n\x10\x43onsistencyLevel\x12!\n\x1d\x43ONSISTENCY_LEVEL_UNSPECIFIED\x10\x00\x12\x19\n\x15\x43ONSISTENCY_LEVEL_ONE\x10\x01\x12\x1c\n\x18\x43ONSISTENCY_LEVEL_QUORUM\x10\x02\x12\x19\n\x15\x43ONSISTENCY_LEVEL_ALL\x10\x03\x42n\n#io.weaviate.client.grpc.protocol.v1B\x11WeaviateProtoBaseZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' + b'\n\rv1/base.proto\x12\x0bweaviate.v1\x1a\x1cgoogle/protobuf/struct.proto"T\n\x15NumberArrayProperties\x12\x12\n\x06values\x18\x01 \x03(\x01\x42\x02\x18\x01\x12\x11\n\tprop_name\x18\x02 \x01(\t\x12\x14\n\x0cvalues_bytes\x18\x03 \x01(\x0c"7\n\x12IntArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\x03\x12\x11\n\tprop_name\x18\x02 \x01(\t"8\n\x13TextArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\t\x12\x11\n\tprop_name\x18\x02 \x01(\t";\n\x16\x42ooleanArrayProperties\x12\x0e\n\x06values\x18\x01 \x03(\x08\x12\x11\n\tprop_name\x18\x02 \x01(\t"\xf1\x03\n\x15ObjectPropertiesValue\x12\x33\n\x12non_ref_properties\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x43\n\x17number_array_properties\x18\x02 \x03(\x0b\x32".weaviate.v1.NumberArrayProperties\x12=\n\x14int_array_properties\x18\x03 \x03(\x0b\x32\x1f.weaviate.v1.IntArrayProperties\x12?\n\x15text_array_properties\x18\x04 \x03(\x0b\x32 .weaviate.v1.TextArrayProperties\x12\x45\n\x18\x62oolean_array_properties\x18\x05 \x03(\x0b\x32#.weaviate.v1.BooleanArrayProperties\x12\x38\n\x11object_properties\x18\x06 \x03(\x0b\x32\x1d.weaviate.v1.ObjectProperties\x12\x43\n\x17object_array_properties\x18\x07 \x03(\x0b\x32".weaviate.v1.ObjectArrayProperties\x12\x18\n\x10\x65mpty_list_props\x18\n \x03(\t"^\n\x15ObjectArrayProperties\x12\x32\n\x06values\x18\x01 \x03(\x0b\x32".weaviate.v1.ObjectPropertiesValue\x12\x11\n\tprop_name\x18\x02 \x01(\t"X\n\x10ObjectProperties\x12\x31\n\x05value\x18\x01 \x01(\x0b\x32".weaviate.v1.ObjectPropertiesValue\x12\x11\n\tprop_name\x18\x02 \x01(\t"\x1b\n\tTextArray\x12\x0e\n\x06values\x18\x01 \x03(\t"\x1a\n\x08IntArray\x12\x0e\n\x06values\x18\x01 \x03(\x03"\x1d\n\x0bNumberArray\x12\x0e\n\x06values\x18\x01 \x03(\x01"\x1e\n\x0c\x42ooleanArray\x12\x0e\n\x06values\x18\x01 \x03(\x08"\xfc\x06\n\x07\x46ilters\x12/\n\x08operator\x18\x01 \x01(\x0e\x32\x1d.weaviate.v1.Filters.Operator\x12\x0e\n\x02on\x18\x02 \x03(\tB\x02\x18\x01\x12%\n\x07\x66ilters\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Filters\x12\x14\n\nvalue_text\x18\x04 \x01(\tH\x00\x12\x13\n\tvalue_int\x18\x05 \x01(\x03H\x00\x12\x17\n\rvalue_boolean\x18\x06 \x01(\x08H\x00\x12\x16\n\x0cvalue_number\x18\x07 \x01(\x01H\x00\x12\x32\n\x10value_text_array\x18\t \x01(\x0b\x32\x16.weaviate.v1.TextArrayH\x00\x12\x30\n\x0fvalue_int_array\x18\n \x01(\x0b\x32\x15.weaviate.v1.IntArrayH\x00\x12\x38\n\x13value_boolean_array\x18\x0b \x01(\x0b\x32\x19.weaviate.v1.BooleanArrayH\x00\x12\x36\n\x12value_number_array\x18\x0c \x01(\x0b\x32\x18.weaviate.v1.NumberArrayH\x00\x12\x36\n\tvalue_geo\x18\r \x01(\x0b\x32!.weaviate.v1.GeoCoordinatesFilterH\x00\x12)\n\x06target\x18\x14 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget"\xe3\x02\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x12\n\x0eOPERATOR_EQUAL\x10\x01\x12\x16\n\x12OPERATOR_NOT_EQUAL\x10\x02\x12\x19\n\x15OPERATOR_GREATER_THAN\x10\x03\x12\x1f\n\x1bOPERATOR_GREATER_THAN_EQUAL\x10\x04\x12\x16\n\x12OPERATOR_LESS_THAN\x10\x05\x12\x1c\n\x18OPERATOR_LESS_THAN_EQUAL\x10\x06\x12\x10\n\x0cOPERATOR_AND\x10\x07\x12\x0f\n\x0bOPERATOR_OR\x10\x08\x12\x1d\n\x19OPERATOR_WITHIN_GEO_RANGE\x10\t\x12\x11\n\rOPERATOR_LIKE\x10\n\x12\x14\n\x10OPERATOR_IS_NULL\x10\x0b\x12\x19\n\x15OPERATOR_CONTAINS_ANY\x10\x0c\x12\x19\n\x15OPERATOR_CONTAINS_ALL\x10\rB\x0c\n\ntest_value"T\n\x1b\x46ilterReferenceSingleTarget\x12\n\n\x02on\x18\x01 \x01(\t\x12)\n\x06target\x18\x02 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget"n\n\x1a\x46ilterReferenceMultiTarget\x12\n\n\x02on\x18\x01 \x01(\t\x12)\n\x06target\x18\x02 \x01(\x0b\x32\x19.weaviate.v1.FilterTarget\x12\x19\n\x11target_collection\x18\x03 \x01(\t""\n\x14\x46ilterReferenceCount\x12\n\n\x02on\x18\x01 \x01(\t"\xe4\x01\n\x0c\x46ilterTarget\x12\x12\n\x08property\x18\x01 \x01(\tH\x00\x12\x41\n\rsingle_target\x18\x02 \x01(\x0b\x32(.weaviate.v1.FilterReferenceSingleTargetH\x00\x12?\n\x0cmulti_target\x18\x03 \x01(\x0b\x32\'.weaviate.v1.FilterReferenceMultiTargetH\x00\x12\x32\n\x05\x63ount\x18\x04 \x01(\x0b\x32!.weaviate.v1.FilterReferenceCountH\x00\x42\x08\n\x06target"M\n\x14GeoCoordinatesFilter\x12\x10\n\x08latitude\x18\x01 \x01(\x02\x12\x11\n\tlongitude\x18\x02 \x01(\x02\x12\x10\n\x08\x64istance\x18\x03 \x01(\x02"\xd3\x01\n\x07Vectors\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x11\n\x05index\x18\x02 \x01(\x04\x42\x02\x18\x01\x12\x14\n\x0cvector_bytes\x18\x03 \x01(\x0c\x12-\n\x04type\x18\x04 \x01(\x0e\x32\x1f.weaviate.v1.Vectors.VectorType"b\n\nVectorType\x12\x1b\n\x17VECTOR_TYPE_UNSPECIFIED\x10\x00\x12\x1b\n\x17VECTOR_TYPE_SINGLE_FP32\x10\x01\x12\x1a\n\x16VECTOR_TYPE_MULTI_FP32\x10\x02*\x89\x01\n\x10\x43onsistencyLevel\x12!\n\x1d\x43ONSISTENCY_LEVEL_UNSPECIFIED\x10\x00\x12\x19\n\x15\x43ONSISTENCY_LEVEL_ONE\x10\x01\x12\x1c\n\x18\x43ONSISTENCY_LEVEL_QUORUM\x10\x02\x12\x19\n\x15\x43ONSISTENCY_LEVEL_ALL\x10\x03\x42n\n#io.weaviate.client.grpc.protocol.v1B\x11WeaviateProtoBaseZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' ) _globals = globals() @@ -37,8 +37,10 @@ _globals["_NUMBERARRAYPROPERTIES"].fields_by_name["values"]._serialized_options = b"\030\001" _globals["_FILTERS"].fields_by_name["on"]._loaded_options = None _globals["_FILTERS"].fields_by_name["on"]._serialized_options = b"\030\001" - _globals["_CONSISTENCYLEVEL"]._serialized_start = 2630 - _globals["_CONSISTENCYLEVEL"]._serialized_end = 2767 + _globals["_VECTORS"].fields_by_name["index"]._loaded_options = None + _globals["_VECTORS"].fields_by_name["index"]._serialized_options = b"\030\001" + _globals["_CONSISTENCYLEVEL"]._serialized_start = 2782 + _globals["_CONSISTENCYLEVEL"]._serialized_end = 2919 _globals["_NUMBERARRAYPROPERTIES"]._serialized_start = 60 _globals["_NUMBERARRAYPROPERTIES"]._serialized_end = 144 _globals["_INTARRAYPROPERTIES"]._serialized_start = 146 @@ -75,6 +77,8 @@ _globals["_FILTERTARGET"]._serialized_end = 2486 _globals["_GEOCOORDINATESFILTER"]._serialized_start = 2488 _globals["_GEOCOORDINATESFILTER"]._serialized_end = 2565 - _globals["_VECTORS"]._serialized_start = 2567 - _globals["_VECTORS"]._serialized_end = 2627 + _globals["_VECTORS"]._serialized_start = 2568 + _globals["_VECTORS"]._serialized_end = 2779 + _globals["_VECTORS_VECTORTYPE"]._serialized_start = 2681 + _globals["_VECTORS_VECTORTYPE"]._serialized_end = 2779 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/base_pb2.pyi b/weaviate/proto/v1/base_pb2.pyi index f2b2e8f31..70bb769d7 100644 --- a/weaviate/proto/v1/base_pb2.pyi +++ b/weaviate/proto/v1/base_pb2.pyi @@ -319,16 +319,29 @@ class GeoCoordinatesFilter(_message.Message): ) -> None: ... class Vectors(_message.Message): - __slots__ = ("name", "index", "vector_bytes") + __slots__ = ("name", "index", "vector_bytes", "type") + + class VectorType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + VECTOR_TYPE_UNSPECIFIED: _ClassVar[Vectors.VectorType] + VECTOR_TYPE_SINGLE_FP32: _ClassVar[Vectors.VectorType] + VECTOR_TYPE_MULTI_FP32: _ClassVar[Vectors.VectorType] + + VECTOR_TYPE_UNSPECIFIED: Vectors.VectorType + VECTOR_TYPE_SINGLE_FP32: Vectors.VectorType + VECTOR_TYPE_MULTI_FP32: Vectors.VectorType NAME_FIELD_NUMBER: _ClassVar[int] INDEX_FIELD_NUMBER: _ClassVar[int] VECTOR_BYTES_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] name: str index: int vector_bytes: bytes + type: Vectors.VectorType def __init__( self, name: _Optional[str] = ..., index: _Optional[int] = ..., vector_bytes: _Optional[bytes] = ..., + type: _Optional[_Union[Vectors.VectorType, str]] = ..., ) -> None: ... diff --git a/weaviate/proto/v1/base_search_pb2.py b/weaviate/proto/v1/base_search_pb2.py index da0341806..b58cb6c9a 100644 --- a/weaviate/proto/v1/base_search_pb2.py +++ b/weaviate/proto/v1/base_search_pb2.py @@ -18,8 +18,11 @@ _sym_db = _symbol_database.Default() +from weaviate.proto.v1 import base_pb2 as v1_dot_base__pb2 + + DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02"\xfa\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12\x36\n\x07weights\x18\x03 \x03(\x0b\x32!.weaviate.v1.Targets.WeightsEntryB\x02\x18\x01\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTarget\x1a.\n\x0cWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01"5\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cvector_bytes\x18\x02 \x01(\x0c"\xc5\x03\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\r\n\x05\x61lpha\x18\x04 \x01(\x02\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x14\n\x0cvector_bytes\x18\x06 \x01(\x0c\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthreshold"\x82\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x14\n\x0cvector_bytes\x18\x04 \x01(\x0c\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance")\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t*\xee\x01\n\x11\x43ombinationMethod\x12"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' + b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02"\xfa\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12\x36\n\x07weights\x18\x03 \x03(\x0b\x32!.weaviate.v1.Targets.WeightsEntryB\x02\x18\x01\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTarget\x1a.\n\x0cWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors"\xf0\x03\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\r\n\x05\x61lpha\x18\x04 \x01(\x02\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthreshold"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance")\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t*\xee\x01\n\x11\x43ombinationMethod\x12"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' ) _globals = globals() @@ -34,14 +37,20 @@ _globals["_TARGETS_WEIGHTSENTRY"]._serialized_options = b"8\001" _globals["_TARGETS"].fields_by_name["weights"]._loaded_options = None _globals["_TARGETS"].fields_by_name["weights"]._serialized_options = b"\030\001" + _globals["_VECTORFORTARGET"].fields_by_name["vector_bytes"]._loaded_options = None + _globals["_VECTORFORTARGET"].fields_by_name["vector_bytes"]._serialized_options = b"\030\001" _globals["_HYBRID"].fields_by_name["vector"]._loaded_options = None _globals["_HYBRID"].fields_by_name["vector"]._serialized_options = b"\030\001" + _globals["_HYBRID"].fields_by_name["vector_bytes"]._loaded_options = None + _globals["_HYBRID"].fields_by_name["vector_bytes"]._serialized_options = b"\030\001" _globals["_HYBRID"].fields_by_name["target_vectors"]._loaded_options = None _globals["_HYBRID"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._loaded_options = None _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_options = b"8\001" _globals["_NEARVECTOR"].fields_by_name["vector"]._loaded_options = None _globals["_NEARVECTOR"].fields_by_name["vector"]._serialized_options = b"\030\001" + _globals["_NEARVECTOR"].fields_by_name["vector_bytes"]._loaded_options = None + _globals["_NEARVECTOR"].fields_by_name["vector_bytes"]._serialized_options = b"\030\001" _globals["_NEARVECTOR"].fields_by_name["target_vectors"]._loaded_options = None _globals["_NEARVECTOR"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" _globals["_NEARVECTOR"].fields_by_name["vector_per_target"]._loaded_options = None @@ -64,42 +73,42 @@ ]._serialized_options = b"\030\001" _globals["_NEARIMUSEARCH"].fields_by_name["target_vectors"]._loaded_options = None _globals["_NEARIMUSEARCH"].fields_by_name["target_vectors"]._serialized_options = b"\030\001" - _globals["_COMBINATIONMETHOD"]._serialized_start = 2881 - _globals["_COMBINATIONMETHOD"]._serialized_end = 3119 - _globals["_WEIGHTSFORTARGET"]._serialized_start = 37 - _globals["_WEIGHTSFORTARGET"]._serialized_end = 87 - _globals["_TARGETS"]._serialized_start = 90 - _globals["_TARGETS"]._serialized_end = 340 - _globals["_TARGETS_WEIGHTSENTRY"]._serialized_start = 294 - _globals["_TARGETS_WEIGHTSENTRY"]._serialized_end = 340 - _globals["_VECTORFORTARGET"]._serialized_start = 342 - _globals["_VECTORFORTARGET"]._serialized_end = 395 - _globals["_HYBRID"]._serialized_start = 398 - _globals["_HYBRID"]._serialized_end = 851 - _globals["_HYBRID_FUSIONTYPE"]._serialized_start = 741 - _globals["_HYBRID_FUSIONTYPE"]._serialized_end = 838 - _globals["_NEARVECTOR"]._serialized_start = 854 - _globals["_NEARVECTOR"]._serialized_end = 1240 - _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_start = 1159 - _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_end = 1213 - _globals["_NEAROBJECT"]._serialized_start = 1243 - _globals["_NEAROBJECT"]._serialized_end = 1408 - _globals["_NEARTEXTSEARCH"]._serialized_start = 1411 - _globals["_NEARTEXTSEARCH"]._serialized_end = 1779 - _globals["_NEARTEXTSEARCH_MOVE"]._serialized_start = 1672 - _globals["_NEARTEXTSEARCH_MOVE"]._serialized_end = 1726 - _globals["_NEARIMAGESEARCH"]._serialized_start = 1782 - _globals["_NEARIMAGESEARCH"]._serialized_end = 1955 - _globals["_NEARAUDIOSEARCH"]._serialized_start = 1958 - _globals["_NEARAUDIOSEARCH"]._serialized_end = 2131 - _globals["_NEARVIDEOSEARCH"]._serialized_start = 2134 - _globals["_NEARVIDEOSEARCH"]._serialized_end = 2307 - _globals["_NEARDEPTHSEARCH"]._serialized_start = 2310 - _globals["_NEARDEPTHSEARCH"]._serialized_end = 2483 - _globals["_NEARTHERMALSEARCH"]._serialized_start = 2486 - _globals["_NEARTHERMALSEARCH"]._serialized_end = 2663 - _globals["_NEARIMUSEARCH"]._serialized_start = 2666 - _globals["_NEARIMUSEARCH"]._serialized_end = 2835 - _globals["_BM25"]._serialized_start = 2837 - _globals["_BM25"]._serialized_end = 2878 + _globals["_COMBINATIONMETHOD"]._serialized_start = 3025 + _globals["_COMBINATIONMETHOD"]._serialized_end = 3263 + _globals["_WEIGHTSFORTARGET"]._serialized_start = 52 + _globals["_WEIGHTSFORTARGET"]._serialized_end = 102 + _globals["_TARGETS"]._serialized_start = 105 + _globals["_TARGETS"]._serialized_end = 355 + _globals["_TARGETS_WEIGHTSENTRY"]._serialized_start = 309 + _globals["_TARGETS_WEIGHTSENTRY"]._serialized_end = 355 + _globals["_VECTORFORTARGET"]._serialized_start = 357 + _globals["_VECTORFORTARGET"]._serialized_end = 453 + _globals["_HYBRID"]._serialized_start = 456 + _globals["_HYBRID"]._serialized_end = 952 + _globals["_HYBRID_FUSIONTYPE"]._serialized_start = 842 + _globals["_HYBRID_FUSIONTYPE"]._serialized_end = 939 + _globals["_NEARVECTOR"]._serialized_start = 955 + _globals["_NEARVECTOR"]._serialized_end = 1384 + _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_start = 1303 + _globals["_NEARVECTOR_VECTORPERTARGETENTRY"]._serialized_end = 1357 + _globals["_NEAROBJECT"]._serialized_start = 1387 + _globals["_NEAROBJECT"]._serialized_end = 1552 + _globals["_NEARTEXTSEARCH"]._serialized_start = 1555 + _globals["_NEARTEXTSEARCH"]._serialized_end = 1923 + _globals["_NEARTEXTSEARCH_MOVE"]._serialized_start = 1816 + _globals["_NEARTEXTSEARCH_MOVE"]._serialized_end = 1870 + _globals["_NEARIMAGESEARCH"]._serialized_start = 1926 + _globals["_NEARIMAGESEARCH"]._serialized_end = 2099 + _globals["_NEARAUDIOSEARCH"]._serialized_start = 2102 + _globals["_NEARAUDIOSEARCH"]._serialized_end = 2275 + _globals["_NEARVIDEOSEARCH"]._serialized_start = 2278 + _globals["_NEARVIDEOSEARCH"]._serialized_end = 2451 + _globals["_NEARDEPTHSEARCH"]._serialized_start = 2454 + _globals["_NEARDEPTHSEARCH"]._serialized_end = 2627 + _globals["_NEARTHERMALSEARCH"]._serialized_start = 2630 + _globals["_NEARTHERMALSEARCH"]._serialized_end = 2807 + _globals["_NEARIMUSEARCH"]._serialized_start = 2810 + _globals["_NEARIMUSEARCH"]._serialized_end = 2979 + _globals["_BM25"]._serialized_start = 2981 + _globals["_BM25"]._serialized_end = 3022 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/base_search_pb2.pyi b/weaviate/proto/v1/base_search_pb2.pyi index 1760f1485..8d7ac2d66 100644 --- a/weaviate/proto/v1/base_search_pb2.pyi +++ b/weaviate/proto/v1/base_search_pb2.pyi @@ -1,3 +1,4 @@ +from weaviate.proto.v1 import base_pb2 as _base_pb2 from google.protobuf.internal import containers as _containers from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor @@ -64,13 +65,18 @@ class Targets(_message.Message): ) -> None: ... class VectorForTarget(_message.Message): - __slots__ = ("name", "vector_bytes") + __slots__ = ("name", "vector_bytes", "vectors") NAME_FIELD_NUMBER: _ClassVar[int] VECTOR_BYTES_FIELD_NUMBER: _ClassVar[int] + VECTORS_FIELD_NUMBER: _ClassVar[int] name: str vector_bytes: bytes + vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] def __init__( - self, name: _Optional[str] = ..., vector_bytes: _Optional[bytes] = ... + self, + name: _Optional[str] = ..., + vector_bytes: _Optional[bytes] = ..., + vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ..., ) -> None: ... class Hybrid(_message.Message): @@ -86,6 +92,7 @@ class Hybrid(_message.Message): "near_vector", "targets", "vector_distance", + "vectors", ) class FusionType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): @@ -108,6 +115,7 @@ class Hybrid(_message.Message): NEAR_VECTOR_FIELD_NUMBER: _ClassVar[int] TARGETS_FIELD_NUMBER: _ClassVar[int] VECTOR_DISTANCE_FIELD_NUMBER: _ClassVar[int] + VECTORS_FIELD_NUMBER: _ClassVar[int] query: str properties: _containers.RepeatedScalarFieldContainer[str] vector: _containers.RepeatedScalarFieldContainer[float] @@ -119,6 +127,7 @@ class Hybrid(_message.Message): near_vector: NearVector targets: Targets vector_distance: float + vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] def __init__( self, query: _Optional[str] = ..., @@ -132,6 +141,7 @@ class Hybrid(_message.Message): near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., vector_distance: _Optional[float] = ..., + vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ..., ) -> None: ... class NearVector(_message.Message): @@ -144,6 +154,7 @@ class NearVector(_message.Message): "targets", "vector_per_target", "vector_for_targets", + "vectors", ) class VectorPerTargetEntry(_message.Message): @@ -162,6 +173,7 @@ class NearVector(_message.Message): TARGETS_FIELD_NUMBER: _ClassVar[int] VECTOR_PER_TARGET_FIELD_NUMBER: _ClassVar[int] VECTOR_FOR_TARGETS_FIELD_NUMBER: _ClassVar[int] + VECTORS_FIELD_NUMBER: _ClassVar[int] vector: _containers.RepeatedScalarFieldContainer[float] certainty: float distance: float @@ -170,6 +182,7 @@ class NearVector(_message.Message): targets: Targets vector_per_target: _containers.ScalarMap[str, bytes] vector_for_targets: _containers.RepeatedCompositeFieldContainer[VectorForTarget] + vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] def __init__( self, vector: _Optional[_Iterable[float]] = ..., @@ -180,6 +193,7 @@ class NearVector(_message.Message): targets: _Optional[_Union[Targets, _Mapping]] = ..., vector_per_target: _Optional[_Mapping[str, bytes]] = ..., vector_for_targets: _Optional[_Iterable[_Union[VectorForTarget, _Mapping]]] = ..., + vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ..., ) -> None: ... class NearObject(_message.Message): diff --git a/weaviate/types.py b/weaviate/types.py index eba8c2664..c389f7f2b 100644 --- a/weaviate/types.py +++ b/weaviate/types.py @@ -1,13 +1,13 @@ import datetime import uuid as uuid_package -from typing import Dict, Union, List, Sequence, Tuple +from typing import Dict, Union, Mapping, List, Sequence, Tuple DATE = datetime.datetime UUID = Union[str, uuid_package.UUID] UUIDS = Union[Sequence[UUID], UUID] NUMBER = Union[int, float] GEO_COORDINATES = Tuple[float, float] -VECTORS = Union[Dict[str, List[float]], List[float]] +VECTORS = Union[Mapping[str, Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]]], Sequence[NUMBER]] INCLUDE_VECTOR = Union[bool, str, List[str]] BEACON = "weaviate://localhost/" diff --git a/weaviate/util.py b/weaviate/util.py index bbf9743c9..59208693a 100644 --- a/weaviate/util.py +++ b/weaviate/util.py @@ -10,6 +10,7 @@ import uuid as uuid_lib from pathlib import Path from typing import Union, Sequence, Any, Optional, List, Dict, Generator, Tuple, cast +from typing_extensions import TypeGuard import httpx import validators @@ -883,12 +884,23 @@ def __is_list_type(inputs: Any) -> bool: ) -def _is_1d_vector(inputs: Any) -> bool: +def _is_1d_vector(inputs: Any) -> TypeGuard[Sequence[float]]: try: if len(inputs) == 0: return False except TypeError: return False if __is_list_type(inputs): - return not __is_list_type(inputs[0]) # 2D vectors are not 1D vectors + return not __is_list_type(inputs[0]) + return False + + +def _is_2d_vector(inputs: Any) -> TypeGuard[List[List[float]]]: + try: + if len(inputs) == 0: + return False + except TypeError: + return False + if __is_list_type(inputs): + return __is_list_type(inputs[0]) return False From 7415183300ff822017759bc6e8848da60c8114f5 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 30 Jan 2025 15:54:20 +0000 Subject: [PATCH 25/48] Fix broken unit test --- test/collection/test_aggregates.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/collection/test_aggregates.py b/test/collection/test_aggregates.py index 42d18b70f..c2881de9b 100644 --- a/test/collection/test_aggregates.py +++ b/test/collection/test_aggregates.py @@ -1,18 +1,18 @@ import pytest -from typing import Awaitable +from typing import Awaitable, Callable from weaviate.connect import ConnectionV4 from weaviate.collections.aggregate import _AggregateCollectionAsync from weaviate.exceptions import WeaviateInvalidInputError -async def _test_aggregate(aggregate: Awaitable) -> None: +async def _test_aggregate(aggregate: Callable[[], Awaitable]) -> None: with pytest.raises(WeaviateInvalidInputError): await aggregate() @pytest.mark.asyncio async def test_bad_aggregate_inputs(connection: ConnectionV4) -> None: - aggregate = _AggregateCollectionAsync(connection, "dummy", None, None) + aggregate = _AggregateCollectionAsync(connection, "dummy", None, None, False) # over_all await _test_aggregate(lambda: aggregate.over_all(filters="wrong")) await _test_aggregate(lambda: aggregate.over_all(group_by=42)) From 23aaba7a55d3854733a480462ab539a40deefc5a Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 30 Jan 2025 16:03:12 +0000 Subject: [PATCH 26/48] Actually fix unit test --- .../collections/aggregations/near_vector.py | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/weaviate/collections/aggregations/near_vector.py b/weaviate/collections/aggregations/near_vector.py index e10d40a31..a06e9c609 100644 --- a/weaviate/collections/aggregations/near_vector.py +++ b/weaviate/collections/aggregations/near_vector.py @@ -14,7 +14,7 @@ NearVectorInputType, ) from weaviate.collections.filters import _FilterToGRPC -from weaviate.exceptions import WeaviateUnsupportedFeatureError +from weaviate.exceptions import WeaviateInvalidInputError from weaviate.types import NUMBER @@ -77,25 +77,19 @@ async def near_vector( # use gql, remove once 1.29 is the minimum supported version if not isinstance(near_vector, list): - raise WeaviateUnsupportedFeatureError( - "A `near_vector` argument other than a list of floats", - str(self._connection._weaviate_version), - "1.29.0", + raise WeaviateInvalidInputError( + "A `near_vector` argument other than a list of float is not supported in Date: Thu, 30 Jan 2025 16:14:35 +0000 Subject: [PATCH 27/48] Fix logic for BC target vector check in aggregate --- weaviate/collections/aggregations/near_vector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weaviate/collections/aggregations/near_vector.py b/weaviate/collections/aggregations/near_vector.py index a06e9c609..9a6649fc0 100644 --- a/weaviate/collections/aggregations/near_vector.py +++ b/weaviate/collections/aggregations/near_vector.py @@ -87,7 +87,7 @@ async def near_vector( near_vector = cast( List[float], near_vector ) # pylance cannot type narrow the immediately above check - if not isinstance(target_vector, str): + if target_vector is not None and not isinstance(target_vector, str): raise WeaviateInvalidInputError( "A `target_vector` argument other than a string is not supported in Date: Thu, 30 Jan 2025 16:31:58 +0000 Subject: [PATCH 28/48] Fix integration test for previous Weaviate versions --- integration/test_collection_aggregate.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/integration/test_collection_aggregate.py b/integration/test_collection_aggregate.py index d2fc1289a..56b600faf 100644 --- a/integration/test_collection_aggregate.py +++ b/integration/test_collection_aggregate.py @@ -665,13 +665,19 @@ def test_group_by_aggregation_argument(collection_factory: CollectionFactory) -> groups = res.groups assert len(groups) == 2 assert groups[0].grouped_by.prop == "int" - assert groups[0].grouped_by.value == 1 or groups[1].grouped_by.value == 1 + if collection._connection._weaviate_version.is_lower_than(1, 28, 4): + assert groups[0].grouped_by.value == "1" or groups[1].grouped_by.value == "1" + else: + assert groups[0].grouped_by.value == 1 or groups[1].grouped_by.value == 1 assert isinstance(groups[0].properties["text"], AggregateText) assert groups[0].properties["text"].count == 1 assert isinstance(groups[0].properties["int"], AggregateInteger) assert groups[0].properties["int"].count == 1 assert groups[1].grouped_by.prop == "int" - assert groups[1].grouped_by.value == 2 or groups[0].grouped_by.value == 2 + if collection._connection._weaviate_version.is_lower_than(1, 28, 4): + assert groups[1].grouped_by.value == "2" or groups[0].grouped_by.value == "2" + else: + assert groups[1].grouped_by.value == 2 or groups[0].grouped_by.value == 2 assert isinstance(groups[1].properties["text"], AggregateText) assert groups[1].properties["text"].count == 1 assert isinstance(groups[1].properties["int"], AggregateInteger) From 3cd76987f0d7ea6bd58b275f004e8db1adda2f01 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 31 Jan 2025 09:34:35 +0000 Subject: [PATCH 29/48] Make smaller changes in response to review --- .../collections/aggregations/aggregate.py | 32 ++++++++----------- weaviate/collections/classes/aggregate.py | 4 +-- weaviate/collections/collection/async_.py | 9 ++---- weaviate/collections/collection/sync.py | 16 ++-------- 4 files changed, 22 insertions(+), 39 deletions(-) diff --git a/weaviate/collections/aggregations/aggregate.py b/weaviate/collections/aggregations/aggregate.py index d985b20d9..24747b5df 100644 --- a/weaviate/collections/aggregations/aggregate.py +++ b/weaviate/collections/aggregations/aggregate.py @@ -43,6 +43,7 @@ from weaviate.types import NUMBER, UUID from weaviate.util import file_encoder_b64, _decode_json_response_dict from weaviate.validator import _ValidateArgument, _validate_input +from weaviate.warnings import _Warnings P = ParamSpec("P") T = TypeVar("T") @@ -94,7 +95,7 @@ def _to_result( if len(response.result.groups) == 0: raise WeaviateQueryError("No results found in the aggregation query!", "gRPC") if is_group_by: - AggregateGroupByReturn( + return AggregateGroupByReturn( groups=[ AggregateGroup( grouped_by=self.__parse_grouped_by_value(group.grouped_by), @@ -116,25 +117,21 @@ def _to_result( }, total_count=result.objects_count, ) - return AggregateGroupByReturn( - groups=[ - AggregateGroup( - grouped_by=self.__parse_grouped_by_value(group.grouped_by), - properties={ - aggregation.property: self.__parse_property_grpc(aggregation) - for aggregation in group.aggregations.aggregations - }, - total_count=group.objects_count, - ) - for group in response.result.groups - ] - ) def __parse_grouped_by_value( self, grouped_by: aggregate_pb2.AggregateGroup.GroupedBy ) -> GroupedBy: value: Union[ - str, int, float, bool, List[str], List[int], List[float], List[bool], GeoCoordinate + str, + int, + float, + bool, + List[str], + List[int], + List[float], + List[bool], + GeoCoordinate, + None, ] if grouped_by.HasField("text"): value = grouped_by.text @@ -159,9 +156,8 @@ def __parse_grouped_by_value( longitude=v.longitude, ) else: - raise ValueError( - f"Unknown grouped by type {grouped_by} encountered in _Aggregate.__parse_grouped_by_value()" - ) + value = None + _Warnings.unknown_type_encountered(grouped_by.WhichOneof("GroupedBy")) return GroupedBy(prop=grouped_by.path[0], value=value) def _to_group_by_result( diff --git a/weaviate/collections/classes/aggregate.py b/weaviate/collections/classes/aggregate.py index 324a2049e..9f6cef5a9 100644 --- a/weaviate/collections/classes/aggregate.py +++ b/weaviate/collections/classes/aggregate.py @@ -112,7 +112,7 @@ class GroupedBy: prop: str value: Union[ - str, int, float, bool, List[str], List[int], List[float], List[bool], GeoCoordinate + str, int, float, bool, List[str], List[int], List[float], List[bool], GeoCoordinate, None ] @@ -586,7 +586,7 @@ def reference( Arguments: `pointing_to` - Whether to include the collection names that this property references. + The UUIDs of the objects that are being pointed to. Returns: A `_MetricsReference` object that includes the metrics to be returned. diff --git a/weaviate/collections/collection/async_.py b/weaviate/collections/collection/async_.py index a5331098b..dc8eceeda 100644 --- a/weaviate/collections/collection/async_.py +++ b/weaviate/collections/collection/async_.py @@ -105,12 +105,9 @@ def __init__( async def length(self) -> int: """Get the total number of objects in the collection.""" - if self._connection._weaviate_version.is_lower_than(1, 28, 4): - total = (await self.aggregate.over_all(total_count=True)).total_count - assert total is not None - return total - else: - return await self.aggregate._grpc.objects_count() + total = (await self.aggregate.over_all(total_count=True)).total_count + assert total is not None + return total async def to_string(self) -> str: """Return a string representation of the collection object.""" diff --git a/weaviate/collections/collection/sync.py b/weaviate/collections/collection/sync.py index 01ca737a3..2aee0c19f 100644 --- a/weaviate/collections/collection/sync.py +++ b/weaviate/collections/collection/sync.py @@ -20,12 +20,10 @@ from weaviate.collections.config import _ConfigCollection from weaviate.collections.data import _DataCollection from weaviate.collections.generate import _GenerateCollection -from weaviate.collections.grpc.aggregate import _AggregateGRPC from weaviate.collections.iterator import _IteratorInputs, _ObjectIterator from weaviate.collections.query import _QueryCollection from weaviate.collections.tenants import _Tenants from weaviate.connect import ConnectionV4 -from weaviate.event_loop import _EventLoopSingleton from weaviate.types import UUID from .base import _CollectionBase @@ -79,9 +77,6 @@ def __init__( references, ) - self.__aggregate_grpc = _AggregateGRPC( - connection, name, tenant, consistency_level, validate_arguments - ) self.__cluster = _Cluster(connection) config = _ConfigCollection( @@ -146,14 +141,9 @@ def __init__( """This namespace includes all the CRUD methods available to you when modifying the tenants of a multi-tenancy-enabled collection in Weaviate.""" def __len__(self) -> int: - if self._connection._weaviate_version.is_lower_than(1, 28, 4): - total = self.aggregate.over_all(total_count=True).total_count - assert total is not None - return total - else: - return _EventLoopSingleton.get_instance().run_until_complete( - self.__aggregate_grpc.objects_count - ) + total = self.aggregate.over_all(total_count=True).total_count + assert total is not None + return total def __str__(self) -> str: config = self.config.get() From 93139787703eddd5b8b9371b21d5af130436d51f Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 31 Jan 2025 14:53:14 +0000 Subject: [PATCH 30/48] Update to use latest proto --- .../collections/aggregations/aggregate.py | 32 +-- weaviate/collections/aggregations/hybrid.py | 2 +- .../collections/aggregations/near_image.py | 2 +- .../collections/aggregations/near_object.py | 2 +- .../collections/aggregations/near_text.py | 2 +- .../collections/aggregations/near_vector.py | 2 +- weaviate/collections/aggregations/over_all.py | 2 +- weaviate/collections/grpc/aggregate.py | 2 +- weaviate/collections/queries/base.py | 4 +- weaviate/proto/v1/aggregate_pb2.py | 64 +++--- weaviate/proto/v1/aggregate_pb2.pyi | 199 ++++++++++-------- 11 files changed, 165 insertions(+), 148 deletions(-) diff --git a/weaviate/collections/aggregations/aggregate.py b/weaviate/collections/aggregations/aggregate.py index 24747b5df..2f6703de9 100644 --- a/weaviate/collections/aggregations/aggregate.py +++ b/weaviate/collections/aggregations/aggregate.py @@ -90,11 +90,17 @@ def _to_aggregate_result( ) def _to_result( - self, response: aggregate_pb2.AggregateReply, is_group_by: bool + self, response: aggregate_pb2.AggregateReply ) -> Union[AggregateReturn, AggregateGroupByReturn]: - if len(response.result.groups) == 0: - raise WeaviateQueryError("No results found in the aggregation query!", "gRPC") - if is_group_by: + if response.HasField("single_result"): + return AggregateReturn( + properties={ + aggregation.property: self.__parse_property_grpc(aggregation) + for aggregation in response.single_result.aggregations.aggregations + }, + total_count=response.single_result.objects_count, + ) + if response.HasField("grouped_results"): return AggregateGroupByReturn( groups=[ AggregateGroup( @@ -105,21 +111,15 @@ def _to_result( }, total_count=group.objects_count, ) - for group in response.result.groups + for group in response.grouped_results.groups ] ) else: - result = response.result.groups[0] - return AggregateReturn( - properties={ - aggregation.property: self.__parse_property_grpc(aggregation) - for aggregation in result.aggregations.aggregations - }, - total_count=result.objects_count, - ) + _Warnings.unknown_type_encountered(response.WhichOneof("result")) + return AggregateReturn(properties={}, total_count=None) def __parse_grouped_by_value( - self, grouped_by: aggregate_pb2.AggregateGroup.GroupedBy + self, grouped_by: aggregate_pb2.AggregateReply.Group.GroupedBy ) -> GroupedBy: value: Union[ str, @@ -157,7 +157,7 @@ def __parse_grouped_by_value( ) else: value = None - _Warnings.unknown_type_encountered(grouped_by.WhichOneof("GroupedBy")) + _Warnings.unknown_type_encountered(grouped_by.WhichOneof("value")) return GroupedBy(prop=grouped_by.path[0], value=value) def _to_group_by_result( @@ -254,7 +254,7 @@ def __parse_property_gql(property_: dict, metric: _Metrics) -> AggregateResult: @staticmethod def __parse_property_grpc( - aggregation: aggregate_pb2.AggregateGroup.Aggregations.Aggregation, + aggregation: aggregate_pb2.AggregateReply.Aggregations.Aggregation, ) -> AggregateResult: if aggregation.HasField("text"): return AggregateText( diff --git a/weaviate/collections/aggregations/hybrid.py b/weaviate/collections/aggregations/hybrid.py index 4a9f1e28a..1426184e7 100644 --- a/weaviate/collections/aggregations/hybrid.py +++ b/weaviate/collections/aggregations/hybrid.py @@ -115,7 +115,7 @@ async def hybrid( limit=group_by.limit if group_by is not None else None, objects_count=total_count, ) - return self._to_result(reply, group_by is not None) + return self._to_result(reply) @syncify.convert diff --git a/weaviate/collections/aggregations/near_image.py b/weaviate/collections/aggregations/near_image.py index bbdea27c6..d957e652c 100644 --- a/weaviate/collections/aggregations/near_image.py +++ b/weaviate/collections/aggregations/near_image.py @@ -104,7 +104,7 @@ async def near_image( objects_count=total_count, object_limit=object_limit, ) - return self._to_result(reply, group_by is not None) + return self._to_result(reply) @syncify.convert diff --git a/weaviate/collections/aggregations/near_object.py b/weaviate/collections/aggregations/near_object.py index 4cff1be04..12ce4cd12 100644 --- a/weaviate/collections/aggregations/near_object.py +++ b/weaviate/collections/aggregations/near_object.py @@ -101,7 +101,7 @@ async def near_object( objects_count=total_count, object_limit=object_limit, ) - return self._to_result(reply, group_by is not None) + return self._to_result(reply) @syncify.convert diff --git a/weaviate/collections/aggregations/near_text.py b/weaviate/collections/aggregations/near_text.py index a8f380217..07822de11 100644 --- a/weaviate/collections/aggregations/near_text.py +++ b/weaviate/collections/aggregations/near_text.py @@ -117,7 +117,7 @@ async def near_text( objects_count=total_count, object_limit=object_limit, ) - return self._to_result(reply, group_by is not None) + return self._to_result(reply) @syncify.convert diff --git a/weaviate/collections/aggregations/near_vector.py b/weaviate/collections/aggregations/near_vector.py index 9a6649fc0..c6567d3c5 100644 --- a/weaviate/collections/aggregations/near_vector.py +++ b/weaviate/collections/aggregations/near_vector.py @@ -121,7 +121,7 @@ async def near_vector( objects_count=total_count, object_limit=object_limit, ) - return self._to_result(reply, group_by is not None) + return self._to_result(reply) @syncify.convert diff --git a/weaviate/collections/aggregations/over_all.py b/weaviate/collections/aggregations/over_all.py index 26a9a7a9b..e44c2a826 100644 --- a/weaviate/collections/aggregations/over_all.py +++ b/weaviate/collections/aggregations/over_all.py @@ -73,7 +73,7 @@ async def over_all( limit=group_by.limit if group_by is not None else None, objects_count=total_count, ) - return self._to_result(reply, group_by is not None) + return self._to_result(reply) @syncify.convert diff --git a/weaviate/collections/grpc/aggregate.py b/weaviate/collections/grpc/aggregate.py index 4573eaf49..398229c37 100644 --- a/weaviate/collections/grpc/aggregate.py +++ b/weaviate/collections/grpc/aggregate.py @@ -37,7 +37,7 @@ def __init__( async def objects_count(self) -> int: res = await self.__call(self.__create_request(objects_count=True)) - return res.result.groups[0].objects_count + return res.single_result.objects_count def hybrid( self, diff --git a/weaviate/collections/queries/base.py b/weaviate/collections/queries/base.py index f0f634d89..6d709e46c 100644 --- a/weaviate/collections/queries/base.py +++ b/weaviate/collections/queries/base.py @@ -192,7 +192,7 @@ def __deserialize_list_value_prop_125( return [ self.__parse_nonref_properties_result(val) for val in value.object_values.values ] - _Warnings.unknown_type_encountered(value.WhichOneof("Value")) + _Warnings.unknown_type_encountered(value.WhichOneof("value")) return None def __deserialize_list_value_prop_123(self, value: properties_pb2.ListValue) -> List[Any]: @@ -248,7 +248,7 @@ def __deserialize_non_ref_prop(self, value: properties_pb2.Value) -> Any: if value.HasField("null_value"): return None - _Warnings.unknown_type_encountered(value.WhichOneof("Value")) + _Warnings.unknown_type_encountered(value.WhichOneof("value")) return None def __parse_nonref_properties_result( diff --git a/weaviate/proto/v1/aggregate_pb2.py b/weaviate/proto/v1/aggregate_pb2.py index 5c8a81d04..7e11cf434 100644 --- a/weaviate/proto/v1/aggregate_pb2.py +++ b/weaviate/proto/v1/aggregate_pb2.py @@ -23,7 +23,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x12v1/aggregate.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto"\xe8\x0f\n\x10\x41ggregateRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12\x15\n\robjects_count\x18\x14 \x01(\x08\x12?\n\x0c\x61ggregations\x18\x15 \x03(\x0b\x32).weaviate.v1.AggregateRequest.Aggregation\x12\x19\n\x0cobject_limit\x18\x1e \x01(\rH\x01\x88\x01\x01\x12<\n\x08group_by\x18\x1f \x01(\x0b\x32%.weaviate.v1.AggregateRequest.GroupByH\x02\x88\x01\x01\x12\x12\n\x05limit\x18 \x01(\rH\x03\x88\x01\x01\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12%\n\x06hybrid\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x00\x12.\n\x0bnear_vector\x18* \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x00\x12.\n\x0bnear_object\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x00\x12\x30\n\tnear_text\x18, \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\x00\x12\x32\n\nnear_image\x18- \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\x00\x12\x32\n\nnear_audio\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x00\x12\x32\n\nnear_video\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x00\x12\x32\n\nnear_depth\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\x00\x12\x36\n\x0cnear_thermal\x18\x31 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x00\x12.\n\x08near_imu\x18\x32 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x00\x1a\xde\x08\n\x0b\x41ggregation\x12\x10\n\x08property\x18\x01 \x01(\t\x12@\n\x03int\x18\x02 \x01(\x0b\x32\x31.weaviate.v1.AggregateRequest.Aggregation.IntegerH\x00\x12\x42\n\x06number\x18\x03 \x01(\x0b\x32\x30.weaviate.v1.AggregateRequest.Aggregation.NumberH\x00\x12>\n\x04text\x18\x04 \x01(\x0b\x32..weaviate.v1.AggregateRequest.Aggregation.TextH\x00\x12\x44\n\x07\x62oolean\x18\x05 \x01(\x0b\x32\x31.weaviate.v1.AggregateRequest.Aggregation.BooleanH\x00\x12>\n\x04\x64\x61te\x18\x06 \x01(\x0b\x32..weaviate.v1.AggregateRequest.Aggregation.DateH\x00\x12H\n\treference\x18\x07 \x01(\x0b\x32\x33.weaviate.v1.AggregateRequest.Aggregation.ReferenceH\x00\x1a\x81\x01\n\x07Integer\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0b\n\x03sum\x18\x03 \x01(\x08\x12\x0c\n\x04mean\x18\x04 \x01(\x08\x12\x0c\n\x04mode\x18\x05 \x01(\x08\x12\x0e\n\x06median\x18\x06 \x01(\x08\x12\x0f\n\x07maximum\x18\x07 \x01(\x08\x12\x0f\n\x07minimum\x18\x08 \x01(\x08\x1a\x80\x01\n\x06Number\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0b\n\x03sum\x18\x03 \x01(\x08\x12\x0c\n\x04mean\x18\x04 \x01(\x08\x12\x0c\n\x04mode\x18\x05 \x01(\x08\x12\x0e\n\x06median\x18\x06 \x01(\x08\x12\x0f\n\x07maximum\x18\x07 \x01(\x08\x12\x0f\n\x07minimum\x18\x08 \x01(\x08\x1aw\n\x04Text\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x16\n\x0etop_occurences\x18\x03 \x01(\x08\x12!\n\x14top_occurences_limit\x18\x04 \x01(\rH\x00\x88\x01\x01\x42\x17\n\x15_top_occurences_limit\x1a\x82\x01\n\x07\x42oolean\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x12\n\ntotal_true\x18\x03 \x01(\x08\x12\x13\n\x0btotal_false\x18\x04 \x01(\x08\x12\x17\n\x0fpercentage_true\x18\x05 \x01(\x08\x12\x18\n\x10percentage_false\x18\x06 \x01(\x08\x1a\x63\n\x04\x44\x61te\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0e\n\x06median\x18\x03 \x01(\x08\x12\x0c\n\x04mode\x18\x04 \x01(\x08\x12\x0f\n\x07maximum\x18\x05 \x01(\x08\x12\x0f\n\x07minimum\x18\x06 \x01(\x08\x1a.\n\tReference\x12\x0c\n\x04type\x18\x01 \x01(\x08\x12\x13\n\x0bpointing_to\x18\x02 \x01(\x08\x42\r\n\x0b\x61ggregation\x1a/\n\x07GroupBy\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x10\n\x08property\x18\x02 \x01(\tB\x08\n\x06searchB\x0f\n\r_object_limitB\x0b\n\t_group_byB\x08\n\x06_limitB\n\n\x08_filters"\x89\x01\n\x0e\x41ggregateReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12\x32\n\x06result\x18\x02 \x01(\x0b\x32".weaviate.v1.AggregateReply.Result\x1a\x35\n\x06Result\x12+\n\x06groups\x18\x01 \x03(\x0b\x32\x1b.weaviate.v1.AggregateGroup"\xed\x13\n\x0e\x41ggregateGroup\x12\x1a\n\robjects_count\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x43\n\x0c\x61ggregations\x18\x02 \x01(\x0b\x32(.weaviate.v1.AggregateGroup.AggregationsH\x01\x88\x01\x01\x12>\n\ngrouped_by\x18\x03 \x01(\x0b\x32%.weaviate.v1.AggregateGroup.GroupedByH\x02\x88\x01\x01\x1a\xc0\x0f\n\x0c\x41ggregations\x12J\n\x0c\x61ggregations\x18\x01 \x03(\x0b\x32\x34.weaviate.v1.AggregateGroup.Aggregations.Aggregation\x1a\xe3\x0e\n\x0b\x41ggregation\x12\x10\n\x08property\x18\x01 \x01(\t\x12K\n\x03int\x18\x02 \x01(\x0b\x32<.weaviate.v1.AggregateGroup.Aggregations.Aggregation.IntegerH\x00\x12M\n\x06number\x18\x03 \x01(\x0b\x32;.weaviate.v1.AggregateGroup.Aggregations.Aggregation.NumberH\x00\x12I\n\x04text\x18\x04 \x01(\x0b\x32\x39.weaviate.v1.AggregateGroup.Aggregations.Aggregation.TextH\x00\x12O\n\x07\x62oolean\x18\x05 \x01(\x0b\x32<.weaviate.v1.AggregateGroup.Aggregations.Aggregation.BooleanH\x00\x12I\n\x04\x64\x61te\x18\x06 \x01(\x0b\x32\x39.weaviate.v1.AggregateGroup.Aggregations.Aggregation.DateH\x00\x12S\n\treference\x18\x07 \x01(\x0b\x32>.weaviate.v1.AggregateGroup.Aggregations.Aggregation.ReferenceH\x00\x1a\xf9\x01\n\x07Integer\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04mean\x18\x03 \x01(\x01H\x02\x88\x01\x01\x12\x13\n\x06median\x18\x04 \x01(\x01H\x03\x88\x01\x01\x12\x11\n\x04mode\x18\x05 \x01(\x03H\x04\x88\x01\x01\x12\x14\n\x07maximum\x18\x06 \x01(\x03H\x05\x88\x01\x01\x12\x14\n\x07minimum\x18\x07 \x01(\x03H\x06\x88\x01\x01\x12\x10\n\x03sum\x18\x08 \x01(\x03H\x07\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\x07\n\x05_meanB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimumB\x06\n\x04_sum\x1a\xf8\x01\n\x06Number\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04mean\x18\x03 \x01(\x01H\x02\x88\x01\x01\x12\x13\n\x06median\x18\x04 \x01(\x01H\x03\x88\x01\x01\x12\x11\n\x04mode\x18\x05 \x01(\x01H\x04\x88\x01\x01\x12\x14\n\x07maximum\x18\x06 \x01(\x01H\x05\x88\x01\x01\x12\x14\n\x07minimum\x18\x07 \x01(\x01H\x06\x88\x01\x01\x12\x10\n\x03sum\x18\x08 \x01(\x01H\x07\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\x07\n\x05_meanB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimumB\x06\n\x04_sum\x1a\xe4\x02\n\x04Text\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x65\n\x0etop_occurences\x18\x03 \x01(\x0b\x32H.weaviate.v1.AggregateGroup.Aggregations.Aggregation.Text.TopOccurrencesH\x02\x88\x01\x01\x1a\xa7\x01\n\x0eTopOccurrences\x12\x65\n\x05items\x18\x01 \x03(\x0b\x32V.weaviate.v1.AggregateGroup.Aggregations.Aggregation.Text.TopOccurrences.TopOccurrence\x1a.\n\rTopOccurrence\x12\r\n\x05value\x18\x01 \x01(\t\x12\x0e\n\x06occurs\x18\x02 \x01(\x03\x42\x08\n\x06_countB\x07\n\x05_typeB\x11\n\x0f_top_occurences\x1a\xfb\x01\n\x07\x42oolean\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ntotal_true\x18\x03 \x01(\x03H\x02\x88\x01\x01\x12\x18\n\x0btotal_false\x18\x04 \x01(\x03H\x03\x88\x01\x01\x12\x1c\n\x0fpercentage_true\x18\x05 \x01(\x01H\x04\x88\x01\x01\x12\x1d\n\x10percentage_false\x18\x06 \x01(\x01H\x05\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\r\n\x0b_total_trueB\x0e\n\x0c_total_falseB\x12\n\x10_percentage_trueB\x13\n\x11_percentage_false\x1a\xc0\x01\n\x04\x44\x61te\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x13\n\x06median\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x11\n\x04mode\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x14\n\x07maximum\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x14\n\x07minimum\x18\x06 \x01(\tH\x05\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimum\x1a<\n\tReference\x12\x11\n\x04type\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x0bpointing_to\x18\x02 \x03(\tB\x07\n\x05_typeB\r\n\x0b\x61ggregation\x1a\xc4\x02\n\tGroupedBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x0e\n\x04text\x18\x02 \x01(\tH\x00\x12\r\n\x03int\x18\x03 \x01(\x03H\x00\x12\x11\n\x07\x62oolean\x18\x04 \x01(\x08H\x00\x12\x10\n\x06number\x18\x05 \x01(\x01H\x00\x12\'\n\x05texts\x18\x06 \x01(\x0b\x32\x16.weaviate.v1.TextArrayH\x00\x12%\n\x04ints\x18\x07 \x01(\x0b\x32\x15.weaviate.v1.IntArrayH\x00\x12-\n\x08\x62ooleans\x18\x08 \x01(\x0b\x32\x19.weaviate.v1.BooleanArrayH\x00\x12+\n\x07numbers\x18\t \x01(\x0b\x32\x18.weaviate.v1.NumberArrayH\x00\x12\x30\n\x03geo\x18\n \x01(\x0b\x32!.weaviate.v1.GeoCoordinatesFilterH\x00\x42\x07\n\x05valueB\x10\n\x0e_objects_countB\x0f\n\r_aggregationsB\r\n\x0b_grouped_byBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoAggregateZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' + b'\n\x12v1/aggregate.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto"\xe8\x0f\n\x10\x41ggregateRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12\x15\n\robjects_count\x18\x14 \x01(\x08\x12?\n\x0c\x61ggregations\x18\x15 \x03(\x0b\x32).weaviate.v1.AggregateRequest.Aggregation\x12\x19\n\x0cobject_limit\x18\x1e \x01(\rH\x01\x88\x01\x01\x12<\n\x08group_by\x18\x1f \x01(\x0b\x32%.weaviate.v1.AggregateRequest.GroupByH\x02\x88\x01\x01\x12\x12\n\x05limit\x18 \x01(\rH\x03\x88\x01\x01\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12%\n\x06hybrid\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x00\x12.\n\x0bnear_vector\x18* \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x00\x12.\n\x0bnear_object\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x00\x12\x30\n\tnear_text\x18, \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\x00\x12\x32\n\nnear_image\x18- \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\x00\x12\x32\n\nnear_audio\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x00\x12\x32\n\nnear_video\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x00\x12\x32\n\nnear_depth\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\x00\x12\x36\n\x0cnear_thermal\x18\x31 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x00\x12.\n\x08near_imu\x18\x32 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x00\x1a\xde\x08\n\x0b\x41ggregation\x12\x10\n\x08property\x18\x01 \x01(\t\x12@\n\x03int\x18\x02 \x01(\x0b\x32\x31.weaviate.v1.AggregateRequest.Aggregation.IntegerH\x00\x12\x42\n\x06number\x18\x03 \x01(\x0b\x32\x30.weaviate.v1.AggregateRequest.Aggregation.NumberH\x00\x12>\n\x04text\x18\x04 \x01(\x0b\x32..weaviate.v1.AggregateRequest.Aggregation.TextH\x00\x12\x44\n\x07\x62oolean\x18\x05 \x01(\x0b\x32\x31.weaviate.v1.AggregateRequest.Aggregation.BooleanH\x00\x12>\n\x04\x64\x61te\x18\x06 \x01(\x0b\x32..weaviate.v1.AggregateRequest.Aggregation.DateH\x00\x12H\n\treference\x18\x07 \x01(\x0b\x32\x33.weaviate.v1.AggregateRequest.Aggregation.ReferenceH\x00\x1a\x81\x01\n\x07Integer\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0b\n\x03sum\x18\x03 \x01(\x08\x12\x0c\n\x04mean\x18\x04 \x01(\x08\x12\x0c\n\x04mode\x18\x05 \x01(\x08\x12\x0e\n\x06median\x18\x06 \x01(\x08\x12\x0f\n\x07maximum\x18\x07 \x01(\x08\x12\x0f\n\x07minimum\x18\x08 \x01(\x08\x1a\x80\x01\n\x06Number\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0b\n\x03sum\x18\x03 \x01(\x08\x12\x0c\n\x04mean\x18\x04 \x01(\x08\x12\x0c\n\x04mode\x18\x05 \x01(\x08\x12\x0e\n\x06median\x18\x06 \x01(\x08\x12\x0f\n\x07maximum\x18\x07 \x01(\x08\x12\x0f\n\x07minimum\x18\x08 \x01(\x08\x1aw\n\x04Text\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x16\n\x0etop_occurences\x18\x03 \x01(\x08\x12!\n\x14top_occurences_limit\x18\x04 \x01(\rH\x00\x88\x01\x01\x42\x17\n\x15_top_occurences_limit\x1a\x82\x01\n\x07\x42oolean\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x12\n\ntotal_true\x18\x03 \x01(\x08\x12\x13\n\x0btotal_false\x18\x04 \x01(\x08\x12\x17\n\x0fpercentage_true\x18\x05 \x01(\x08\x12\x18\n\x10percentage_false\x18\x06 \x01(\x08\x1a\x63\n\x04\x44\x61te\x12\r\n\x05\x63ount\x18\x01 \x01(\x08\x12\x0c\n\x04type\x18\x02 \x01(\x08\x12\x0e\n\x06median\x18\x03 \x01(\x08\x12\x0c\n\x04mode\x18\x04 \x01(\x08\x12\x0f\n\x07maximum\x18\x05 \x01(\x08\x12\x0f\n\x07minimum\x18\x06 \x01(\x08\x1a.\n\tReference\x12\x0c\n\x04type\x18\x01 \x01(\x08\x12\x13\n\x0bpointing_to\x18\x02 \x01(\x08\x42\r\n\x0b\x61ggregation\x1a/\n\x07GroupBy\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x10\n\x08property\x18\x02 \x01(\tB\x08\n\x06searchB\x0f\n\r_object_limitB\x0b\n\t_group_byB\x08\n\x06_limitB\n\n\x08_filters"\xdf\x16\n\x0e\x41ggregateReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12;\n\rsingle_result\x18\x02 \x01(\x0b\x32".weaviate.v1.AggregateReply.SingleH\x00\x12>\n\x0fgrouped_results\x18\x03 \x01(\x0b\x32#.weaviate.v1.AggregateReply.GroupedH\x00\x1a\xc0\x0f\n\x0c\x41ggregations\x12J\n\x0c\x61ggregations\x18\x01 \x03(\x0b\x32\x34.weaviate.v1.AggregateReply.Aggregations.Aggregation\x1a\xe3\x0e\n\x0b\x41ggregation\x12\x10\n\x08property\x18\x01 \x01(\t\x12K\n\x03int\x18\x02 \x01(\x0b\x32<.weaviate.v1.AggregateReply.Aggregations.Aggregation.IntegerH\x00\x12M\n\x06number\x18\x03 \x01(\x0b\x32;.weaviate.v1.AggregateReply.Aggregations.Aggregation.NumberH\x00\x12I\n\x04text\x18\x04 \x01(\x0b\x32\x39.weaviate.v1.AggregateReply.Aggregations.Aggregation.TextH\x00\x12O\n\x07\x62oolean\x18\x05 \x01(\x0b\x32<.weaviate.v1.AggregateReply.Aggregations.Aggregation.BooleanH\x00\x12I\n\x04\x64\x61te\x18\x06 \x01(\x0b\x32\x39.weaviate.v1.AggregateReply.Aggregations.Aggregation.DateH\x00\x12S\n\treference\x18\x07 \x01(\x0b\x32>.weaviate.v1.AggregateReply.Aggregations.Aggregation.ReferenceH\x00\x1a\xf9\x01\n\x07Integer\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04mean\x18\x03 \x01(\x01H\x02\x88\x01\x01\x12\x13\n\x06median\x18\x04 \x01(\x01H\x03\x88\x01\x01\x12\x11\n\x04mode\x18\x05 \x01(\x03H\x04\x88\x01\x01\x12\x14\n\x07maximum\x18\x06 \x01(\x03H\x05\x88\x01\x01\x12\x14\n\x07minimum\x18\x07 \x01(\x03H\x06\x88\x01\x01\x12\x10\n\x03sum\x18\x08 \x01(\x03H\x07\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\x07\n\x05_meanB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimumB\x06\n\x04_sum\x1a\xf8\x01\n\x06Number\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04mean\x18\x03 \x01(\x01H\x02\x88\x01\x01\x12\x13\n\x06median\x18\x04 \x01(\x01H\x03\x88\x01\x01\x12\x11\n\x04mode\x18\x05 \x01(\x01H\x04\x88\x01\x01\x12\x14\n\x07maximum\x18\x06 \x01(\x01H\x05\x88\x01\x01\x12\x14\n\x07minimum\x18\x07 \x01(\x01H\x06\x88\x01\x01\x12\x10\n\x03sum\x18\x08 \x01(\x01H\x07\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\x07\n\x05_meanB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimumB\x06\n\x04_sum\x1a\xe4\x02\n\x04Text\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x65\n\x0etop_occurences\x18\x03 \x01(\x0b\x32H.weaviate.v1.AggregateReply.Aggregations.Aggregation.Text.TopOccurrencesH\x02\x88\x01\x01\x1a\xa7\x01\n\x0eTopOccurrences\x12\x65\n\x05items\x18\x01 \x03(\x0b\x32V.weaviate.v1.AggregateReply.Aggregations.Aggregation.Text.TopOccurrences.TopOccurrence\x1a.\n\rTopOccurrence\x12\r\n\x05value\x18\x01 \x01(\t\x12\x0e\n\x06occurs\x18\x02 \x01(\x03\x42\x08\n\x06_countB\x07\n\x05_typeB\x11\n\x0f_top_occurences\x1a\xfb\x01\n\x07\x42oolean\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ntotal_true\x18\x03 \x01(\x03H\x02\x88\x01\x01\x12\x18\n\x0btotal_false\x18\x04 \x01(\x03H\x03\x88\x01\x01\x12\x1c\n\x0fpercentage_true\x18\x05 \x01(\x01H\x04\x88\x01\x01\x12\x1d\n\x10percentage_false\x18\x06 \x01(\x01H\x05\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\r\n\x0b_total_trueB\x0e\n\x0c_total_falseB\x12\n\x10_percentage_trueB\x13\n\x11_percentage_false\x1a\xc0\x01\n\x04\x44\x61te\x12\x12\n\x05\x63ount\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x13\n\x06median\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x11\n\x04mode\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x14\n\x07maximum\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x14\n\x07minimum\x18\x06 \x01(\tH\x05\x88\x01\x01\x42\x08\n\x06_countB\x07\n\x05_typeB\t\n\x07_medianB\x07\n\x05_modeB\n\n\x08_maximumB\n\n\x08_minimum\x1a<\n\tReference\x12\x11\n\x04type\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x0bpointing_to\x18\x02 \x03(\tB\x07\n\x05_typeB\r\n\x0b\x61ggregation\x1a\x8c\x01\n\x06Single\x12\x1a\n\robjects_count\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x43\n\x0c\x61ggregations\x18\x02 \x01(\x0b\x32(.weaviate.v1.AggregateReply.AggregationsH\x01\x88\x01\x01\x42\x10\n\x0e_objects_countB\x0f\n\r_aggregations\x1a\xa7\x04\n\x05Group\x12\x1a\n\robjects_count\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x43\n\x0c\x61ggregations\x18\x02 \x01(\x0b\x32(.weaviate.v1.AggregateReply.AggregationsH\x01\x88\x01\x01\x12\x44\n\ngrouped_by\x18\x03 \x01(\x0b\x32+.weaviate.v1.AggregateReply.Group.GroupedByH\x02\x88\x01\x01\x1a\xc4\x02\n\tGroupedBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x0e\n\x04text\x18\x02 \x01(\tH\x00\x12\r\n\x03int\x18\x03 \x01(\x03H\x00\x12\x11\n\x07\x62oolean\x18\x04 \x01(\x08H\x00\x12\x10\n\x06number\x18\x05 \x01(\x01H\x00\x12\'\n\x05texts\x18\x06 \x01(\x0b\x32\x16.weaviate.v1.TextArrayH\x00\x12%\n\x04ints\x18\x07 \x01(\x0b\x32\x15.weaviate.v1.IntArrayH\x00\x12-\n\x08\x62ooleans\x18\x08 \x01(\x0b\x32\x19.weaviate.v1.BooleanArrayH\x00\x12+\n\x07numbers\x18\t \x01(\x0b\x32\x18.weaviate.v1.NumberArrayH\x00\x12\x30\n\x03geo\x18\n \x01(\x0b\x32!.weaviate.v1.GeoCoordinatesFilterH\x00\x42\x07\n\x05valueB\x10\n\x0e_objects_countB\x0f\n\r_aggregationsB\r\n\x0b_grouped_by\x1a<\n\x07Grouped\x12\x31\n\x06groups\x18\x01 \x03(\x0b\x32!.weaviate.v1.AggregateReply.GroupB\x08\n\x06resultBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoAggregateZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3' ) _globals = globals() @@ -53,37 +53,39 @@ _globals["_AGGREGATEREQUEST_GROUPBY"]._serialized_start = 1988 _globals["_AGGREGATEREQUEST_GROUPBY"]._serialized_end = 2035 _globals["_AGGREGATEREPLY"]._serialized_start = 2100 - _globals["_AGGREGATEREPLY"]._serialized_end = 2237 - _globals["_AGGREGATEREPLY_RESULT"]._serialized_start = 2184 - _globals["_AGGREGATEREPLY_RESULT"]._serialized_end = 2237 - _globals["_AGGREGATEGROUP"]._serialized_start = 2240 - _globals["_AGGREGATEGROUP"]._serialized_end = 4781 - _globals["_AGGREGATEGROUP_AGGREGATIONS"]._serialized_start = 2420 - _globals["_AGGREGATEGROUP_AGGREGATIONS"]._serialized_end = 4404 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION"]._serialized_start = 2513 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION"]._serialized_end = 4404 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_INTEGER"]._serialized_start = 3019 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_INTEGER"]._serialized_end = 3268 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_NUMBER"]._serialized_start = 3271 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_NUMBER"]._serialized_end = 3519 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT"]._serialized_start = 3522 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT"]._serialized_end = 3878 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES"]._serialized_start = ( - 3673 + _globals["_AGGREGATEREPLY"]._serialized_end = 5011 + _globals["_AGGREGATEREPLY_AGGREGATIONS"]._serialized_start = 2258 + _globals["_AGGREGATEREPLY_AGGREGATIONS"]._serialized_end = 4242 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION"]._serialized_start = 2351 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION"]._serialized_end = 4242 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_INTEGER"]._serialized_start = 2857 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_INTEGER"]._serialized_end = 3106 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_NUMBER"]._serialized_start = 3109 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_NUMBER"]._serialized_end = 3357 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_TEXT"]._serialized_start = 3360 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_TEXT"]._serialized_end = 3716 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES"]._serialized_start = ( + 3511 ) - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES"]._serialized_end = 3840 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES"]._serialized_end = 3678 _globals[ - "_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES_TOPOCCURRENCE" - ]._serialized_start = 3794 + "_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES_TOPOCCURRENCE" + ]._serialized_start = 3632 _globals[ - "_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES_TOPOCCURRENCE" - ]._serialized_end = 3840 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_BOOLEAN"]._serialized_start = 3881 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_BOOLEAN"]._serialized_end = 4132 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_DATE"]._serialized_start = 4135 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_DATE"]._serialized_end = 4327 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_REFERENCE"]._serialized_start = 4329 - _globals["_AGGREGATEGROUP_AGGREGATIONS_AGGREGATION_REFERENCE"]._serialized_end = 4389 - _globals["_AGGREGATEGROUP_GROUPEDBY"]._serialized_start = 4407 - _globals["_AGGREGATEGROUP_GROUPEDBY"]._serialized_end = 4731 + "_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_TEXT_TOPOCCURRENCES_TOPOCCURRENCE" + ]._serialized_end = 3678 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_BOOLEAN"]._serialized_start = 3719 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_BOOLEAN"]._serialized_end = 3970 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_DATE"]._serialized_start = 3973 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_DATE"]._serialized_end = 4165 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_REFERENCE"]._serialized_start = 4167 + _globals["_AGGREGATEREPLY_AGGREGATIONS_AGGREGATION_REFERENCE"]._serialized_end = 4227 + _globals["_AGGREGATEREPLY_SINGLE"]._serialized_start = 4245 + _globals["_AGGREGATEREPLY_SINGLE"]._serialized_end = 4385 + _globals["_AGGREGATEREPLY_GROUP"]._serialized_start = 4388 + _globals["_AGGREGATEREPLY_GROUP"]._serialized_end = 4939 + _globals["_AGGREGATEREPLY_GROUP_GROUPEDBY"]._serialized_start = 4565 + _globals["_AGGREGATEREPLY_GROUP_GROUPEDBY"]._serialized_end = 4889 + _globals["_AGGREGATEREPLY_GROUPED"]._serialized_start = 4941 + _globals["_AGGREGATEREPLY_GROUPED"]._serialized_end = 5001 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/aggregate_pb2.pyi b/weaviate/proto/v1/aggregate_pb2.pyi index fcadbcc75..cb50ce6b7 100644 --- a/weaviate/proto/v1/aggregate_pb2.pyi +++ b/weaviate/proto/v1/aggregate_pb2.pyi @@ -273,28 +273,7 @@ class AggregateRequest(_message.Message): ) -> None: ... class AggregateReply(_message.Message): - __slots__ = ("took", "result") - - class Result(_message.Message): - __slots__ = ("groups",) - GROUPS_FIELD_NUMBER: _ClassVar[int] - groups: _containers.RepeatedCompositeFieldContainer[AggregateGroup] - def __init__( - self, groups: _Optional[_Iterable[_Union[AggregateGroup, _Mapping]]] = ... - ) -> None: ... - - TOOK_FIELD_NUMBER: _ClassVar[int] - RESULT_FIELD_NUMBER: _ClassVar[int] - took: float - result: AggregateReply.Result - def __init__( - self, - took: _Optional[float] = ..., - result: _Optional[_Union[AggregateReply.Result, _Mapping]] = ..., - ) -> None: ... - -class AggregateGroup(_message.Message): - __slots__ = ("objects_count", "aggregations", "grouped_by") + __slots__ = ("took", "single_result", "grouped_results") class Aggregations(_message.Message): __slots__ = ("aggregations",) @@ -380,14 +359,14 @@ class AggregateGroup(_message.Message): ITEMS_FIELD_NUMBER: _ClassVar[int] items: _containers.RepeatedCompositeFieldContainer[ - AggregateGroup.Aggregations.Aggregation.Text.TopOccurrences.TopOccurrence + AggregateReply.Aggregations.Aggregation.Text.TopOccurrences.TopOccurrence ] def __init__( self, items: _Optional[ _Iterable[ _Union[ - AggregateGroup.Aggregations.Aggregation.Text.TopOccurrences.TopOccurrence, + AggregateReply.Aggregations.Aggregation.Text.TopOccurrences.TopOccurrence, _Mapping, ] ] @@ -399,14 +378,14 @@ class AggregateGroup(_message.Message): TOP_OCCURENCES_FIELD_NUMBER: _ClassVar[int] count: int type: str - top_occurences: AggregateGroup.Aggregations.Aggregation.Text.TopOccurrences + top_occurences: AggregateReply.Aggregations.Aggregation.Text.TopOccurrences def __init__( self, count: _Optional[int] = ..., type: _Optional[str] = ..., top_occurences: _Optional[ _Union[ - AggregateGroup.Aggregations.Aggregation.Text.TopOccurrences, _Mapping + AggregateReply.Aggregations.Aggregation.Text.TopOccurrences, _Mapping ] ] = ..., ) -> None: ... @@ -484,102 +463,138 @@ class AggregateGroup(_message.Message): DATE_FIELD_NUMBER: _ClassVar[int] REFERENCE_FIELD_NUMBER: _ClassVar[int] property: str - int: AggregateGroup.Aggregations.Aggregation.Integer - number: AggregateGroup.Aggregations.Aggregation.Number - text: AggregateGroup.Aggregations.Aggregation.Text - boolean: AggregateGroup.Aggregations.Aggregation.Boolean - date: AggregateGroup.Aggregations.Aggregation.Date - reference: AggregateGroup.Aggregations.Aggregation.Reference + int: AggregateReply.Aggregations.Aggregation.Integer + number: AggregateReply.Aggregations.Aggregation.Number + text: AggregateReply.Aggregations.Aggregation.Text + boolean: AggregateReply.Aggregations.Aggregation.Boolean + date: AggregateReply.Aggregations.Aggregation.Date + reference: AggregateReply.Aggregations.Aggregation.Reference def __init__( self, property: _Optional[str] = ..., int: _Optional[ - _Union[AggregateGroup.Aggregations.Aggregation.Integer, _Mapping] + _Union[AggregateReply.Aggregations.Aggregation.Integer, _Mapping] ] = ..., number: _Optional[ - _Union[AggregateGroup.Aggregations.Aggregation.Number, _Mapping] + _Union[AggregateReply.Aggregations.Aggregation.Number, _Mapping] ] = ..., text: _Optional[ - _Union[AggregateGroup.Aggregations.Aggregation.Text, _Mapping] + _Union[AggregateReply.Aggregations.Aggregation.Text, _Mapping] ] = ..., boolean: _Optional[ - _Union[AggregateGroup.Aggregations.Aggregation.Boolean, _Mapping] + _Union[AggregateReply.Aggregations.Aggregation.Boolean, _Mapping] ] = ..., date: _Optional[ - _Union[AggregateGroup.Aggregations.Aggregation.Date, _Mapping] + _Union[AggregateReply.Aggregations.Aggregation.Date, _Mapping] ] = ..., reference: _Optional[ - _Union[AggregateGroup.Aggregations.Aggregation.Reference, _Mapping] + _Union[AggregateReply.Aggregations.Aggregation.Reference, _Mapping] ] = ..., ) -> None: ... AGGREGATIONS_FIELD_NUMBER: _ClassVar[int] aggregations: _containers.RepeatedCompositeFieldContainer[ - AggregateGroup.Aggregations.Aggregation + AggregateReply.Aggregations.Aggregation ] def __init__( self, aggregations: _Optional[ - _Iterable[_Union[AggregateGroup.Aggregations.Aggregation, _Mapping]] + _Iterable[_Union[AggregateReply.Aggregations.Aggregation, _Mapping]] ] = ..., ) -> None: ... - class GroupedBy(_message.Message): - __slots__ = ( - "path", - "text", - "int", - "boolean", - "number", - "texts", - "ints", - "booleans", - "numbers", - "geo", - ) - PATH_FIELD_NUMBER: _ClassVar[int] - TEXT_FIELD_NUMBER: _ClassVar[int] - INT_FIELD_NUMBER: _ClassVar[int] - BOOLEAN_FIELD_NUMBER: _ClassVar[int] - NUMBER_FIELD_NUMBER: _ClassVar[int] - TEXTS_FIELD_NUMBER: _ClassVar[int] - INTS_FIELD_NUMBER: _ClassVar[int] - BOOLEANS_FIELD_NUMBER: _ClassVar[int] - NUMBERS_FIELD_NUMBER: _ClassVar[int] - GEO_FIELD_NUMBER: _ClassVar[int] - path: _containers.RepeatedScalarFieldContainer[str] - text: str - int: int - boolean: bool - number: float - texts: _base_pb2.TextArray - ints: _base_pb2.IntArray - booleans: _base_pb2.BooleanArray - numbers: _base_pb2.NumberArray - geo: _base_pb2.GeoCoordinatesFilter + class Single(_message.Message): + __slots__ = ("objects_count", "aggregations") + OBJECTS_COUNT_FIELD_NUMBER: _ClassVar[int] + AGGREGATIONS_FIELD_NUMBER: _ClassVar[int] + objects_count: int + aggregations: AggregateReply.Aggregations def __init__( self, - path: _Optional[_Iterable[str]] = ..., - text: _Optional[str] = ..., - int: _Optional[int] = ..., - boolean: bool = ..., - number: _Optional[float] = ..., - texts: _Optional[_Union[_base_pb2.TextArray, _Mapping]] = ..., - ints: _Optional[_Union[_base_pb2.IntArray, _Mapping]] = ..., - booleans: _Optional[_Union[_base_pb2.BooleanArray, _Mapping]] = ..., - numbers: _Optional[_Union[_base_pb2.NumberArray, _Mapping]] = ..., - geo: _Optional[_Union[_base_pb2.GeoCoordinatesFilter, _Mapping]] = ..., + objects_count: _Optional[int] = ..., + aggregations: _Optional[_Union[AggregateReply.Aggregations, _Mapping]] = ..., ) -> None: ... - OBJECTS_COUNT_FIELD_NUMBER: _ClassVar[int] - AGGREGATIONS_FIELD_NUMBER: _ClassVar[int] - GROUPED_BY_FIELD_NUMBER: _ClassVar[int] - objects_count: int - aggregations: AggregateGroup.Aggregations - grouped_by: AggregateGroup.GroupedBy + class Group(_message.Message): + __slots__ = ("objects_count", "aggregations", "grouped_by") + + class GroupedBy(_message.Message): + __slots__ = ( + "path", + "text", + "int", + "boolean", + "number", + "texts", + "ints", + "booleans", + "numbers", + "geo", + ) + PATH_FIELD_NUMBER: _ClassVar[int] + TEXT_FIELD_NUMBER: _ClassVar[int] + INT_FIELD_NUMBER: _ClassVar[int] + BOOLEAN_FIELD_NUMBER: _ClassVar[int] + NUMBER_FIELD_NUMBER: _ClassVar[int] + TEXTS_FIELD_NUMBER: _ClassVar[int] + INTS_FIELD_NUMBER: _ClassVar[int] + BOOLEANS_FIELD_NUMBER: _ClassVar[int] + NUMBERS_FIELD_NUMBER: _ClassVar[int] + GEO_FIELD_NUMBER: _ClassVar[int] + path: _containers.RepeatedScalarFieldContainer[str] + text: str + int: int + boolean: bool + number: float + texts: _base_pb2.TextArray + ints: _base_pb2.IntArray + booleans: _base_pb2.BooleanArray + numbers: _base_pb2.NumberArray + geo: _base_pb2.GeoCoordinatesFilter + def __init__( + self, + path: _Optional[_Iterable[str]] = ..., + text: _Optional[str] = ..., + int: _Optional[int] = ..., + boolean: bool = ..., + number: _Optional[float] = ..., + texts: _Optional[_Union[_base_pb2.TextArray, _Mapping]] = ..., + ints: _Optional[_Union[_base_pb2.IntArray, _Mapping]] = ..., + booleans: _Optional[_Union[_base_pb2.BooleanArray, _Mapping]] = ..., + numbers: _Optional[_Union[_base_pb2.NumberArray, _Mapping]] = ..., + geo: _Optional[_Union[_base_pb2.GeoCoordinatesFilter, _Mapping]] = ..., + ) -> None: ... + + OBJECTS_COUNT_FIELD_NUMBER: _ClassVar[int] + AGGREGATIONS_FIELD_NUMBER: _ClassVar[int] + GROUPED_BY_FIELD_NUMBER: _ClassVar[int] + objects_count: int + aggregations: AggregateReply.Aggregations + grouped_by: AggregateReply.Group.GroupedBy + def __init__( + self, + objects_count: _Optional[int] = ..., + aggregations: _Optional[_Union[AggregateReply.Aggregations, _Mapping]] = ..., + grouped_by: _Optional[_Union[AggregateReply.Group.GroupedBy, _Mapping]] = ..., + ) -> None: ... + + class Grouped(_message.Message): + __slots__ = ("groups",) + GROUPS_FIELD_NUMBER: _ClassVar[int] + groups: _containers.RepeatedCompositeFieldContainer[AggregateReply.Group] + def __init__( + self, groups: _Optional[_Iterable[_Union[AggregateReply.Group, _Mapping]]] = ... + ) -> None: ... + + TOOK_FIELD_NUMBER: _ClassVar[int] + SINGLE_RESULT_FIELD_NUMBER: _ClassVar[int] + GROUPED_RESULTS_FIELD_NUMBER: _ClassVar[int] + took: float + single_result: AggregateReply.Single + grouped_results: AggregateReply.Grouped def __init__( self, - objects_count: _Optional[int] = ..., - aggregations: _Optional[_Union[AggregateGroup.Aggregations, _Mapping]] = ..., - grouped_by: _Optional[_Union[AggregateGroup.GroupedBy, _Mapping]] = ..., + took: _Optional[float] = ..., + single_result: _Optional[_Union[AggregateReply.Single, _Mapping]] = ..., + grouped_results: _Optional[_Union[AggregateReply.Grouped, _Mapping]] = ..., ) -> None: ... From 769a354f6ba1cca32fcbb5f38ef020fe130f5234 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 31 Jan 2025 15:46:30 +0000 Subject: [PATCH 31/48] Update CI to latest main build --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 39af591f8..b97f74503 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -23,7 +23,7 @@ env: WEAVIATE_126: 1.26.13 WEAVIATE_127: 1.27.9 WEAVIATE_128: 1.28.3 - WEAVIATE_129: 1.29.0-dev-5dc00ba + WEAVIATE_129: main-63d1d6c jobs: lint-and-format: From a7ec88f1aa5c872cfae37d484396c08077098520 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 31 Jan 2025 16:13:36 +0000 Subject: [PATCH 32/48] Gate aggregate grpc behind `1.29.0` rather than `1.28.4` --- integration/test_collection_aggregate.py | 8 ++++---- weaviate/collections/aggregations/hybrid.py | 2 +- weaviate/collections/aggregations/near_image.py | 2 +- weaviate/collections/aggregations/near_object.py | 2 +- weaviate/collections/aggregations/near_text.py | 2 +- weaviate/collections/aggregations/near_vector.py | 2 +- weaviate/collections/aggregations/over_all.py | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/integration/test_collection_aggregate.py b/integration/test_collection_aggregate.py index 56b600faf..48ba0535c 100644 --- a/integration/test_collection_aggregate.py +++ b/integration/test_collection_aggregate.py @@ -205,7 +205,7 @@ def test_over_all_with_filters_ref(collection_factory: CollectionFactory) -> Non filters=Filter.by_ref("ref").by_property("text").equal("one"), return_metrics=[Metrics("text").text(count=True, top_occurrences_value=True)], ) - if collection._connection._weaviate_version.is_lower_than(1, 28, 4): + if collection._connection._weaviate_version.is_lower_than(1, 29, 0): with pytest.raises(WeaviateInvalidInputError): query() else: @@ -217,7 +217,7 @@ def test_over_all_with_filters_ref(collection_factory: CollectionFactory) -> Non def test_wrong_aggregation(collection_factory: CollectionFactory) -> None: collection = collection_factory(properties=[Property(name="text", data_type=DataType.TEXT)]) - if collection._connection._weaviate_version.is_at_least(1, 28, 4): + if collection._connection._weaviate_version.is_at_least(1, 29, 0): pytest.skip("GQL is only used for versions 1.28.4 and lower") with pytest.raises(WeaviateQueryError) as e: collection.aggregate.over_all(total_count=False) @@ -665,7 +665,7 @@ def test_group_by_aggregation_argument(collection_factory: CollectionFactory) -> groups = res.groups assert len(groups) == 2 assert groups[0].grouped_by.prop == "int" - if collection._connection._weaviate_version.is_lower_than(1, 28, 4): + if collection._connection._weaviate_version.is_lower_than(1, 29, 0): assert groups[0].grouped_by.value == "1" or groups[1].grouped_by.value == "1" else: assert groups[0].grouped_by.value == 1 or groups[1].grouped_by.value == 1 @@ -674,7 +674,7 @@ def test_group_by_aggregation_argument(collection_factory: CollectionFactory) -> assert isinstance(groups[0].properties["int"], AggregateInteger) assert groups[0].properties["int"].count == 1 assert groups[1].grouped_by.prop == "int" - if collection._connection._weaviate_version.is_lower_than(1, 28, 4): + if collection._connection._weaviate_version.is_lower_than(1, 29, 0): assert groups[1].grouped_by.value == "2" or groups[0].grouped_by.value == "2" else: assert groups[1].grouped_by.value == 2 or groups[0].grouped_by.value == 2 diff --git a/weaviate/collections/aggregations/hybrid.py b/weaviate/collections/aggregations/hybrid.py index 1426184e7..1d719fcbc 100644 --- a/weaviate/collections/aggregations/hybrid.py +++ b/weaviate/collections/aggregations/hybrid.py @@ -74,7 +74,7 @@ async def hybrid( if isinstance(group_by, str): group_by = GroupByAggregate(prop=group_by) - if self._connection._weaviate_version.is_lower_than(1, 28, 4): + if self._connection._weaviate_version.is_lower_than(1, 29, 0): # use gql, remove once 1.29 is the minimum supported version builder = self._base(return_metrics, filters, total_count) diff --git a/weaviate/collections/aggregations/near_image.py b/weaviate/collections/aggregations/near_image.py index d957e652c..2b5134416 100644 --- a/weaviate/collections/aggregations/near_image.py +++ b/weaviate/collections/aggregations/near_image.py @@ -71,7 +71,7 @@ async def near_image( if isinstance(group_by, str): group_by = GroupByAggregate(prop=group_by) - if self._connection._weaviate_version.is_lower_than(1, 28, 4): + if self._connection._weaviate_version.is_lower_than(1, 29, 0): # use gql, remove once 1.29 is the minimum supported version builder = self._base(return_metrics, filters, total_count) diff --git a/weaviate/collections/aggregations/near_object.py b/weaviate/collections/aggregations/near_object.py index 12ce4cd12..0be2e4af9 100644 --- a/weaviate/collections/aggregations/near_object.py +++ b/weaviate/collections/aggregations/near_object.py @@ -69,7 +69,7 @@ async def near_object( if isinstance(group_by, str): group_by = GroupByAggregate(prop=group_by) - if self._connection._weaviate_version.is_lower_than(1, 28, 4): + if self._connection._weaviate_version.is_lower_than(1, 29, 0): # use gql, remove once 1.29 is the minimum supported version builder = self._base(return_metrics, filters, total_count) diff --git a/weaviate/collections/aggregations/near_text.py b/weaviate/collections/aggregations/near_text.py index 07822de11..17845a90f 100644 --- a/weaviate/collections/aggregations/near_text.py +++ b/weaviate/collections/aggregations/near_text.py @@ -76,7 +76,7 @@ async def near_text( if isinstance(group_by, str): group_by = GroupByAggregate(prop=group_by) - if self._connection._weaviate_version.is_lower_than(1, 28, 4): + if self._connection._weaviate_version.is_lower_than(1, 29, 0): # use gql, remove once 1.29 is the minimum supported version builder = self._base(return_metrics, filters, total_count) diff --git a/weaviate/collections/aggregations/near_vector.py b/weaviate/collections/aggregations/near_vector.py index c6567d3c5..02584490d 100644 --- a/weaviate/collections/aggregations/near_vector.py +++ b/weaviate/collections/aggregations/near_vector.py @@ -73,7 +73,7 @@ async def near_vector( if isinstance(group_by, str): group_by = GroupByAggregate(prop=group_by) - if self._connection._weaviate_version.is_lower_than(1, 28, 4): + if self._connection._weaviate_version.is_lower_than(1, 29, 0): # use gql, remove once 1.29 is the minimum supported version if not isinstance(near_vector, list): diff --git a/weaviate/collections/aggregations/over_all.py b/weaviate/collections/aggregations/over_all.py index e44c2a826..d36b1edb2 100644 --- a/weaviate/collections/aggregations/over_all.py +++ b/weaviate/collections/aggregations/over_all.py @@ -50,7 +50,7 @@ async def over_all( if isinstance(group_by, str): group_by = GroupByAggregate(prop=group_by) - if self._connection._weaviate_version.is_lower_than(1, 28, 4): + if self._connection._weaviate_version.is_lower_than(1, 29, 0): # use gql, remove once 1.29 is the minimum supported version builder = self._base(return_metrics, filters, total_count) builder = self._add_groupby_to_builder(builder, group_by) From 0707594896a14f98ad410a363faab7aa8ca17fc6 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 3 Feb 2025 11:17:42 +0000 Subject: [PATCH 33/48] Update CI to use latest dev image --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index b97f74503..b3e10bc22 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -23,7 +23,7 @@ env: WEAVIATE_126: 1.26.13 WEAVIATE_127: 1.27.9 WEAVIATE_128: 1.28.3 - WEAVIATE_129: main-63d1d6c + WEAVIATE_129: 1.29.0-dev-35036a8 jobs: lint-and-format: From b94abbb4f1a0096b9b74d72af783b109b197e423 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Tue, 4 Feb 2025 16:03:12 +0000 Subject: [PATCH 34/48] Alter passing of near vector args in queries ready for server change --- integration/test_named_vectors.py | 28 +++- .../collections/batch/grpc_batch_objects.py | 4 +- weaviate/collections/classes/grpc.py | 67 ++++++-- weaviate/collections/grpc/shared.py | 143 +++++++++++++----- weaviate/util.py | 43 ------ 5 files changed, 190 insertions(+), 95 deletions(-) diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index f41979440..948a7ef15 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -695,7 +695,7 @@ def test_same_target_vector_multiple_input( ( { "first": [0, 1], - "second": wvc.query.NearVector.list_of_vectors([[1, 0, 0], [0, 0, 1]]), + "second": wvc.query.NearVector.list_of_vectors([1, 0, 0], [0, 0, 1]), }, ["first", "second"], ), @@ -848,12 +848,31 @@ def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: collection.data.insert_many([DataObject({}, vector={"colbert": [[1, 2], [4, 5]]})]) assert len(collection) == 1 + objs = collection.query.near_vector( + {"regular": [[1, 2], [3, 4]]}, + target_vector="regular", + ).objects + assert len(objs) == 1 + objs = collection.query.near_vector( {"colbert": wvc.query.NearVector.multidimensional([[1, 2], [3, 4]])}, target_vector="colbert", ).objects assert len(objs) == 1 + objs = collection.query.near_vector( + {"colbert": wvc.query.NearVector.list_of_vectors([[1, 2], [3, 4]])}, + target_vector="colbert", + ).objects + assert len(objs) == 1 + + objs = collection.query.hybrid( + None, + vector={"colbert": [[1, 2], [3, 4]]}, + target_vector="colbert", + ).objects + assert len(objs) == 1 + objs = collection.query.hybrid( None, vector={"colbert": wvc.query.NearVector.multidimensional([[1, 2], [3, 4]])}, @@ -861,6 +880,13 @@ def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: ).objects assert len(objs) == 1 + objs = collection.query.hybrid( + None, + vector={"colbert": wvc.query.NearVector.list_of_vectors([[1, 2], [3, 4]])}, + target_vector="colbert", + ).objects + assert len(objs) == 1 + def test_colbert_vectors_jinaai(collection_factory: CollectionFactory) -> None: api_key = os.environ.get("JINAAI_APIKEY") diff --git a/weaviate/collections/batch/grpc_batch_objects.py b/weaviate/collections/batch/grpc_batch_objects.py index 0eb2ba2eb..fa0393013 100644 --- a/weaviate/collections/batch/grpc_batch_objects.py +++ b/weaviate/collections/batch/grpc_batch_objects.py @@ -16,7 +16,7 @@ from weaviate.collections.classes.config import ConsistencyLevel from weaviate.collections.classes.internal import ReferenceToMulti, ReferenceInputs from weaviate.collections.classes.types import GeoCoordinate, PhoneNumber -from weaviate.collections.grpc.shared import _BaseGRPC, _Pack, PERMISSION_DENIED +from weaviate.collections.grpc.shared import _BaseGRPC, _Pack, PERMISSION_DENIED, _is_1d_vector from weaviate.connect import ConnectionV4 from weaviate.exceptions import ( WeaviateBatchError, @@ -27,7 +27,7 @@ ) from weaviate.proto.v1 import batch_pb2, base_pb2 from weaviate.types import VECTORS -from weaviate.util import _datetime_to_string, _is_1d_vector +from weaviate.util import _datetime_to_string class _BatchGRPC(_BaseGRPC): diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index 480c34cf5..1eec10043 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -1,6 +1,19 @@ from dataclasses import dataclass from enum import Enum, auto -from typing import ClassVar, List, Literal, Mapping, Optional, Sequence, Type, Union, Dict, cast +from typing import ( + ClassVar, + Generic, + List, + Literal, + Mapping, + Optional, + Sequence, + Type, + Union, + Dict, + cast, +) +from typing_extensions import TypeGuard, TypeVar from pydantic import ConfigDict, Field @@ -228,12 +241,38 @@ class Rerank(_WeaviateInput): query: Optional[str] = Field(default=None) +OneDimensionalVectorType = Sequence[NUMBER] +"""Represents a one-dimensional vector, e.g. one produced by `text2vec-jinaai`""" +TwoDimensionalVectorType = Sequence[Sequence[NUMBER]] +"""Represents a two-dimensional vector, e.g. one produced by `text2colbert-jinaai""" + + +V = TypeVar("V", OneDimensionalVectorType, TwoDimensionalVectorType) + + class _MultidimensionalQuery(_WeaviateInput): - tensor: Sequence[Sequence[float]] + tensor: TwoDimensionalVectorType -class _ListOfVectorsQuery(_WeaviateInput): - vectors: Sequence[Sequence[float]] +class _ListOfVectorsQuery(Generic[V], _WeaviateInput): + vectors: Sequence[V] + + @staticmethod + def is_one_dimensional( + self_: "_ListOfVectorsQuery", + ) -> TypeGuard["_ListOfVectorsQuery[OneDimensionalVectorType]"]: + return len(self_.vectors) > 0 and isinstance(self_.vectors[0], Sequence) + + @staticmethod + def is_two_dimensional( + self_: "_ListOfVectorsQuery", + ) -> TypeGuard["_ListOfVectorsQuery[TwoDimensionalVectorType]"]: + return ( + len(self_.vectors) > 0 + and isinstance(self_.vectors[0], Sequence) + and len(self_.vectors[0]) > 0 + and isinstance(self_.vectors[0][0], Sequence) + ) MultidimensionalQuery = _MultidimensionalQuery @@ -243,13 +282,19 @@ class _ListOfVectorsQuery(_WeaviateInput): """Define a many-vectors query to be used within a near vector search, i.e. multiple vectors over a single-vector space.""" +PrimitiveVectorType = Union[OneDimensionalVectorType, TwoDimensionalVectorType] + NearVectorInputType = Union[ - Sequence[NUMBER], - Sequence[Sequence[NUMBER]], + OneDimensionalVectorType, + TwoDimensionalVectorType, Mapping[ str, Union[ - Sequence[NUMBER], Sequence[Sequence[NUMBER]], MultidimensionalQuery, ListOfVectorsQuery + OneDimensionalVectorType, + TwoDimensionalVectorType, + MultidimensionalQuery, + ListOfVectorsQuery[OneDimensionalVectorType], + ListOfVectorsQuery[TwoDimensionalVectorType], ], ], ] @@ -260,14 +305,14 @@ class NearVector: """Factory class to use when defining near vector queries with multiple vectors in `near_vector()` and `hybrid()` methods.""" @staticmethod - def multidimensional(tensor: Sequence[Sequence[float]]) -> _MultidimensionalQuery: + def multidimensional(vectors: TwoDimensionalVectorType) -> _MultidimensionalQuery: """Define a multi-vector query to be used within a near vector search, i.e. a single vector over a multi-vector space.""" - return _MultidimensionalQuery(tensor=tensor) + return _MultidimensionalQuery(tensor=vectors) @staticmethod - def list_of_vectors(vectors: Sequence[Sequence[float]]) -> _ListOfVectorsQuery: + def list_of_vectors(*vectors: V) -> _ListOfVectorsQuery[V]: """Define a many-vectors query to be used within a near vector search, i.e. multiple vectors over a single-vector space.""" - return _ListOfVectorsQuery(vectors=vectors) + return _ListOfVectorsQuery[V](vectors=vectors) class _HybridNearBase(_WeaviateInput): diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index 6b28eee6b..50ebe664b 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -6,7 +6,6 @@ List, Literal, Optional, - Sequence, Union, cast, Tuple, @@ -27,6 +26,9 @@ Move, TargetVectorJoinType, NearVectorInputType, + OneDimensionalVectorType, + TwoDimensionalVectorType, + PrimitiveVectorType, ) from weaviate.connect import ConnectionV4 from weaviate.exceptions import ( @@ -35,8 +37,8 @@ ) from weaviate.proto.v1 import base_search_pb2, base_pb2 from weaviate.types import NUMBER, UUID -from weaviate.util import _get_vector_v4, _is_1d_vector -from weaviate.validator import _ValidateArgument, _validate_input, _ExtraTypes +from weaviate.util import _get_vector_v4 +from weaviate.validator import _is_valid, _ValidateArgument, _validate_input, _ExtraTypes PERMISSION_DENIED = "PERMISSION_DENIED" @@ -179,7 +181,7 @@ def _vector_for_target( vector_for_target: List[base_search_pb2.VectorForTarget] = [] - def add_vector(val: Sequence[float], target_name: str) -> None: + def add_1d_vector(val: OneDimensionalVectorType, target_name: str) -> None: vec = _get_vector_v4(val) if ( @@ -209,6 +211,53 @@ def add_vector(val: Sequence[float], target_name: str) -> None: ) ) + def add_2d_vector( + value: Union[_MultidimensionalQuery, TwoDimensionalVectorType], key: str + ) -> None: + if isinstance(value, _MultidimensionalQuery): + vals = [_get_vector_v4(v) for v in value.tensor] + else: + vals = [_get_vector_v4(v) for v in value] + vector_for_target.append( + base_search_pb2.VectorForTarget( + name=key, + vectors=[ + base_pb2.Vectors( + name=key, + vector_bytes=_Pack.multi(vals), + type=base_pb2.Vectors.VECTOR_TYPE_MULTI_FP32, + ) + ], + ) + ) + + def add_list_of_vectors(value: _ListOfVectorsQuery, key: str) -> None: + if _ListOfVectorsQuery.is_one_dimensional(value): + vectors = [ + base_pb2.Vectors( + name=key, + vector_bytes=_Pack.multi([_get_vector_v4(v) for v in value.vectors]), + type=base_pb2.Vectors.VECTOR_TYPE_MULTI_FP32, + ) + ] + elif _ListOfVectorsQuery.is_two_dimensional(value): + vectors = [ + base_pb2.Vectors( + name=key, + vector_bytes=_Pack.multi([_get_vector_v4(v) for v in vecs]), + type=base_pb2.Vectors.VECTOR_TYPE_MULTI_FP32, + ) + for vecs in value.vectors + ] + else: + raise WeaviateInvalidInputError(f"Invalid list of vectors: {value}") + vector_for_target.append( + base_search_pb2.VectorForTarget( + name=key, + vectors=vectors, + ) + ) + if isinstance(vector, dict): if ( len(vector) == 0 @@ -218,34 +267,17 @@ def add_vector(val: Sequence[float], target_name: str) -> None: raise invalid_nv_exception target_vectors_tmp: List[str] = [] for key, value in vector.items(): - # typing tools do not understand the type narrowing here if _is_1d_vector(value): - val = value - add_vector(val, key) + add_1d_vector(value, key) target_vectors_tmp.append(key) - elif isinstance(value, _MultidimensionalQuery): - vector_for_target.append( - base_search_pb2.VectorForTarget( - name=key, - vectors=[ - base_pb2.Vectors( - name=key, - vector_bytes=_Pack.multi(value.tensor), - type=base_pb2.Vectors.VECTOR_TYPE_MULTI_FP32, - ) - ], - ) - ) + elif _is_2d_vector(value) or isinstance(value, _MultidimensionalQuery): + add_2d_vector(value, key) target_vectors_tmp.append(key) elif isinstance(value, _ListOfVectorsQuery): - for vec in value.vectors: - add_vector(vec, key) - target_vectors_tmp.append(key) + add_list_of_vectors(value, key) + target_vectors_tmp.append(key) else: - vals = cast(Sequence[Sequence[NUMBER]], value) - for inner_vector in vals: - add_vector(inner_vector, key) - target_vectors_tmp.append(key) + raise invalid_nv_exception return vector_for_target, None, target_vectors_tmp else: if _is_1d_vector(vector): @@ -665,21 +697,15 @@ class _Packing: class _Pack: @staticmethod - def is_multi( - v: Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]] - ) -> TypeGuard[List[List[NUMBER]]]: + def is_multi(v: PrimitiveVectorType) -> TypeGuard[List[List[NUMBER]]]: return len(v) > 0 and isinstance(v[0], list) @staticmethod - def is_single( - v: Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]] - ) -> TypeGuard[List[NUMBER]]: + def is_single(v: PrimitiveVectorType) -> TypeGuard[List[NUMBER]]: return len(v) > 0 and (isinstance(v[0], float) or isinstance(v[0], int)) @staticmethod - def parse_single_or_multi_vec( - vector: Union[Sequence[NUMBER], Sequence[Sequence[NUMBER]]] - ) -> _Packing: + def parse_single_or_multi_vec(vector: PrimitiveVectorType) -> _Packing: if _Pack.is_multi(vector): return _Packing( bytes_=_Pack.multi(vector), type_=base_pb2.Vectors.VECTOR_TYPE_MULTI_FP32 @@ -692,12 +718,12 @@ def parse_single_or_multi_vec( raise WeaviateInvalidInputError(f"Invalid vectors: {vector}") @staticmethod - def single(vector: Sequence[NUMBER]) -> bytes: + def single(vector: OneDimensionalVectorType) -> bytes: vector_list = _get_vector_v4(vector) return struct.pack("{}f".format(len(vector_list)), *vector_list) @staticmethod - def multi(vector: Sequence[Sequence[NUMBER]]) -> bytes: + def multi(vector: TwoDimensionalVectorType) -> bytes: vector_list = [item for sublist in vector for item in sublist] return struct.pack(" List[List[float]]: _ByteOps.decode_float32s(byte_vector[i * dim * UINT32_LEN : (i + 1) * dim * UINT32_LEN]) for i in range(how_many) ] + + +def _is_1d_vector(inputs: Any) -> TypeGuard[OneDimensionalVectorType]: + try: + if len(inputs) == 0: + return False + except TypeError: + return False + if __is_list_type(inputs): + return not __is_list_type(inputs[0]) + return False + + +def _is_2d_vector(inputs: Any) -> TypeGuard[TwoDimensionalVectorType]: + try: + if len(inputs) == 0: + return False + except TypeError: + return False + if __is_list_type(inputs): + return __is_list_type(inputs[0]) + return False + + +def __is_list_type(inputs: Any) -> bool: + try: + if len(inputs) == 0: + return False + except TypeError: + return False + + return any( + _is_valid(types, inputs) + for types in [ + List, + _ExtraTypes.TF, + _ExtraTypes.PANDAS, + _ExtraTypes.NUMPY, + _ExtraTypes.POLARS, + ] + ) diff --git a/weaviate/util.py b/weaviate/util.py index 59208693a..6af2fff8d 100644 --- a/weaviate/util.py +++ b/weaviate/util.py @@ -10,7 +10,6 @@ import uuid as uuid_lib from pathlib import Path from typing import Union, Sequence, Any, Optional, List, Dict, Generator, Tuple, cast -from typing_extensions import TypeGuard import httpx import validators @@ -23,7 +22,6 @@ WeaviateUnsupportedFeatureError, ) from weaviate.types import NUMBER, UUIDS, TIME -from weaviate.validator import _is_valid, _ExtraTypes from weaviate.warnings import _Warnings PYPI_PACKAGE_URL = "https://pypi.org/pypi/weaviate-client/json" @@ -863,44 +861,3 @@ def _datetime_from_weaviate_str(string: str) -> datetime.datetime: "".join(string.rsplit(":", 1) if string[-1] != "Z" else string), "%Y-%m-%dT%H:%M:%S%z", ) - - -def __is_list_type(inputs: Any) -> bool: - try: - if len(inputs) == 0: - return False - except TypeError: - return False - - return any( - _is_valid(types, inputs) - for types in [ - List, - _ExtraTypes.TF, - _ExtraTypes.PANDAS, - _ExtraTypes.NUMPY, - _ExtraTypes.POLARS, - ] - ) - - -def _is_1d_vector(inputs: Any) -> TypeGuard[Sequence[float]]: - try: - if len(inputs) == 0: - return False - except TypeError: - return False - if __is_list_type(inputs): - return not __is_list_type(inputs[0]) - return False - - -def _is_2d_vector(inputs: Any) -> TypeGuard[List[List[float]]]: - try: - if len(inputs) == 0: - return False - except TypeError: - return False - if __is_list_type(inputs): - return __is_list_type(inputs[0]) - return False From ce971491dba8ed5efea0f7d74341c6e8b9f04081 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 6 Feb 2025 15:44:36 +0000 Subject: [PATCH 35/48] Update CI pipeline images --- .github/workflows/main.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 28cd70169..3e607f742 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -22,8 +22,8 @@ env: WEAVIATE_125: 1.25.29 WEAVIATE_126: 1.26.13 WEAVIATE_127: 1.27.9 - WEAVIATE_128: 1.28.4-6553adc - WEAVIATE_129: 1.29.0-dev-35036a8 + WEAVIATE_128: 1.28.4-1a67582 + WEAVIATE_129: 1.29.0-rc.0-1660e54 jobs: lint-and-format: From 6123c1a6361672e2586c69b2a0af2786e1aeabe5 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 6 Feb 2025 16:25:29 +0000 Subject: [PATCH 36/48] Commit forgotten stashed changes --- .github/workflows/main.yaml | 2 +- integration/test_named_vectors.py | 15 ++++------- weaviate/collections/classes/grpc.py | 40 +++++++++++----------------- weaviate/collections/grpc/shared.py | 20 ++++---------- weaviate/outputs/query.py | 2 -- 5 files changed, 27 insertions(+), 52 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 3e607f742..140d14245 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -22,7 +22,7 @@ env: WEAVIATE_125: 1.25.29 WEAVIATE_126: 1.26.13 WEAVIATE_127: 1.27.9 - WEAVIATE_128: 1.28.4-1a67582 + WEAVIATE_128: 1.28.4-c6f2693 WEAVIATE_129: 1.29.0-rc.0-1660e54 jobs: diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index 948a7ef15..e941dfda7 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -845,17 +845,19 @@ def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: ) assert config.vector_config["colbert"].vector_index_config.multi_vector.aggregation == "maxSim" - collection.data.insert_many([DataObject({}, vector={"colbert": [[1, 2], [4, 5]]})]) + collection.data.insert_many( + [DataObject({}, vector={"regular": [1, 2], "colbert": [[1, 2], [4, 5]]})] + ) assert len(collection) == 1 objs = collection.query.near_vector( - {"regular": [[1, 2], [3, 4]]}, + {"regular": [[1, 2], [2, 1]]}, target_vector="regular", ).objects assert len(objs) == 1 objs = collection.query.near_vector( - {"colbert": wvc.query.NearVector.multidimensional([[1, 2], [3, 4]])}, + {"colbert": [[1, 2], [3, 4]]}, target_vector="colbert", ).objects assert len(objs) == 1 @@ -873,13 +875,6 @@ def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: ).objects assert len(objs) == 1 - objs = collection.query.hybrid( - None, - vector={"colbert": wvc.query.NearVector.multidimensional([[1, 2], [3, 4]])}, - target_vector="colbert", - ).objects - assert len(objs) == 1 - objs = collection.query.hybrid( None, vector={"colbert": wvc.query.NearVector.list_of_vectors([[1, 2], [3, 4]])}, diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index 1eec10043..d14a88d4d 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -18,6 +18,7 @@ from pydantic import ConfigDict, Field from weaviate.collections.classes.types import _WeaviateInput +from weaviate.exceptions import WeaviateInvalidInputError from weaviate.proto.v1 import base_search_pb2 from weaviate.str_enum import BaseEnum from weaviate.types import INCLUDE_VECTOR, UUID, NUMBER @@ -246,44 +247,33 @@ class Rerank(_WeaviateInput): TwoDimensionalVectorType = Sequence[Sequence[NUMBER]] """Represents a two-dimensional vector, e.g. one produced by `text2colbert-jinaai""" - -V = TypeVar("V", OneDimensionalVectorType, TwoDimensionalVectorType) +PrimitiveVectorType = Union[OneDimensionalVectorType, TwoDimensionalVectorType] -class _MultidimensionalQuery(_WeaviateInput): - tensor: TwoDimensionalVectorType +V = TypeVar("V", OneDimensionalVectorType, TwoDimensionalVectorType) class _ListOfVectorsQuery(Generic[V], _WeaviateInput): + dimensionality: Literal["1D", "2D"] vectors: Sequence[V] @staticmethod def is_one_dimensional( self_: "_ListOfVectorsQuery", ) -> TypeGuard["_ListOfVectorsQuery[OneDimensionalVectorType]"]: - return len(self_.vectors) > 0 and isinstance(self_.vectors[0], Sequence) + return self_.dimensionality == "1D" @staticmethod def is_two_dimensional( self_: "_ListOfVectorsQuery", ) -> TypeGuard["_ListOfVectorsQuery[TwoDimensionalVectorType]"]: - return ( - len(self_.vectors) > 0 - and isinstance(self_.vectors[0], Sequence) - and len(self_.vectors[0]) > 0 - and isinstance(self_.vectors[0][0], Sequence) - ) + return self_.dimensionality == "2D" -MultidimensionalQuery = _MultidimensionalQuery -"""Define a multi-vector query to be used within a near vector search, i.e. a single vector over a multi-vector space.""" - ListOfVectorsQuery = _ListOfVectorsQuery """Define a many-vectors query to be used within a near vector search, i.e. multiple vectors over a single-vector space.""" -PrimitiveVectorType = Union[OneDimensionalVectorType, TwoDimensionalVectorType] - NearVectorInputType = Union[ OneDimensionalVectorType, TwoDimensionalVectorType, @@ -292,27 +282,29 @@ def is_two_dimensional( Union[ OneDimensionalVectorType, TwoDimensionalVectorType, - MultidimensionalQuery, ListOfVectorsQuery[OneDimensionalVectorType], ListOfVectorsQuery[TwoDimensionalVectorType], ], ], ] -"""Define the input types that can be used in a near vector search.""" +"""Define the input types that can be used in a near vector search""" class NearVector: """Factory class to use when defining near vector queries with multiple vectors in `near_vector()` and `hybrid()` methods.""" - @staticmethod - def multidimensional(vectors: TwoDimensionalVectorType) -> _MultidimensionalQuery: - """Define a multi-vector query to be used within a near vector search, i.e. a single vector over a multi-vector space.""" - return _MultidimensionalQuery(tensor=vectors) - @staticmethod def list_of_vectors(*vectors: V) -> _ListOfVectorsQuery[V]: """Define a many-vectors query to be used within a near vector search, i.e. multiple vectors over a single-vector space.""" - return _ListOfVectorsQuery[V](vectors=vectors) + if len(vectors) > 0 and len(vectors[0]) > 0: + try: + len(cast(Sequence[TwoDimensionalVectorType], vectors)[0][0]) + dimensionality: Literal["1D", "2D"] = "2D" + except TypeError: + dimensionality = "1D" + return _ListOfVectorsQuery[V](dimensionality=dimensionality, vectors=vectors) + else: + raise WeaviateInvalidInputError(f"At least one vector must be given, got: {vectors}") class _HybridNearBase(_WeaviateInput): diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index 50ebe664b..ace4eafc8 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -16,7 +16,6 @@ from weaviate.collections.classes.config import ConsistencyLevel from weaviate.collections.classes.grpc import ( - _MultidimensionalQuery, _ListOfVectorsQuery, _MultiTargetVectorJoin, _HybridNearText, @@ -146,11 +145,7 @@ def _vector_per_target( return vector_per_target, None else: - if ( - isinstance(vector, _MultidimensionalQuery) - or isinstance(vector, _ListOfVectorsQuery) - or len(vector) == 0 - ): + if isinstance(vector, _ListOfVectorsQuery) or len(vector) == 0: raise invalid_nv_exception if _is_1d_vector(vector): @@ -211,20 +206,14 @@ def add_1d_vector(val: OneDimensionalVectorType, target_name: str) -> None: ) ) - def add_2d_vector( - value: Union[_MultidimensionalQuery, TwoDimensionalVectorType], key: str - ) -> None: - if isinstance(value, _MultidimensionalQuery): - vals = [_get_vector_v4(v) for v in value.tensor] - else: - vals = [_get_vector_v4(v) for v in value] + def add_2d_vector(value: TwoDimensionalVectorType, key: str) -> None: vector_for_target.append( base_search_pb2.VectorForTarget( name=key, vectors=[ base_pb2.Vectors( name=key, - vector_bytes=_Pack.multi(vals), + vector_bytes=_Pack.multi([_get_vector_v4(v) for v in value]), type=base_pb2.Vectors.VECTOR_TYPE_MULTI_FP32, ) ], @@ -270,7 +259,7 @@ def add_list_of_vectors(value: _ListOfVectorsQuery, key: str) -> None: if _is_1d_vector(value): add_1d_vector(value, key) target_vectors_tmp.append(key) - elif _is_2d_vector(value) or isinstance(value, _MultidimensionalQuery): + elif _is_2d_vector(value): add_2d_vector(value, key) target_vectors_tmp.append(key) elif isinstance(value, _ListOfVectorsQuery): @@ -362,6 +351,7 @@ def _parse_near_vector( targets, target_vectors = self._recompute_target_vector_to_grpc( target_vector, target_vectors_tmp ) + print(targets, target_vectors) return base_search_pb2.NearVector( vector_bytes=near_vector_grpc, certainty=certainty, diff --git a/weaviate/outputs/query.py b/weaviate/outputs/query.py index d63e1090d..fb03d6f13 100644 --- a/weaviate/outputs/query.py +++ b/weaviate/outputs/query.py @@ -10,7 +10,6 @@ Sorting, NearVectorInputType, TargetVectorJoinType, - MultidimensionalQuery, ListOfVectorsQuery, ) @@ -61,7 +60,6 @@ "ListOfVectorsQuery", "MetadataReturn", "MetadataSingleObjectReturn", - "MultidimensionalQuery", "NearVectorInputType", "Object", "ObjectSingleReturn", From ecb193c5436c5eb224dbbfac4ea65075d233c7fd Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 6 Feb 2025 17:05:16 +0000 Subject: [PATCH 37/48] Update CI images, adjust logic to handle BC for 1.28/1.29 changes --- .github/workflows/main.yaml | 4 ++-- weaviate/collections/grpc/shared.py | 19 +++++++++++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 140d14245..9391abd35 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -22,8 +22,8 @@ env: WEAVIATE_125: 1.25.29 WEAVIATE_126: 1.26.13 WEAVIATE_127: 1.27.9 - WEAVIATE_128: 1.28.4-c6f2693 - WEAVIATE_129: 1.29.0-rc.0-1660e54 + WEAVIATE_128: stable-v1.28-1a67582 + WEAVIATE_129: 1.29.0-rc.0-a6afdb7 jobs: lint-and-format: diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index ace4eafc8..e205b4956 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -205,8 +205,13 @@ def add_1d_vector(val: OneDimensionalVectorType, target_name: str) -> None: ], ) ) + target_vectors_tmp.append(key) def add_2d_vector(value: TwoDimensionalVectorType, key: str) -> None: + if self._connection._weaviate_version.is_lower_than(1, 29, 0): + for v in value: + add_1d_vector(v, key) + return vector_for_target.append( base_search_pb2.VectorForTarget( name=key, @@ -219,9 +224,18 @@ def add_2d_vector(value: TwoDimensionalVectorType, key: str) -> None: ], ) ) + target_vectors_tmp.append(key) def add_list_of_vectors(value: _ListOfVectorsQuery, key: str) -> None: - if _ListOfVectorsQuery.is_one_dimensional(value): + if _ListOfVectorsQuery.is_one_dimensional( + value + ) and self._connection._weaviate_version.is_lower_than(1, 29, 0): + for v in value.vectors: + add_1d_vector(v, key) + return + elif _ListOfVectorsQuery.is_one_dimensional( + value + ) and self._connection._weaviate_version.is_at_least(1, 29, 0): vectors = [ base_pb2.Vectors( name=key, @@ -258,13 +272,10 @@ def add_list_of_vectors(value: _ListOfVectorsQuery, key: str) -> None: for key, value in vector.items(): if _is_1d_vector(value): add_1d_vector(value, key) - target_vectors_tmp.append(key) elif _is_2d_vector(value): add_2d_vector(value, key) - target_vectors_tmp.append(key) elif isinstance(value, _ListOfVectorsQuery): add_list_of_vectors(value, key) - target_vectors_tmp.append(key) else: raise invalid_nv_exception return vector_for_target, None, target_vectors_tmp From 1450f59231b2c112479fcf2460a017b643eced04 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 7 Feb 2025 10:30:09 +0000 Subject: [PATCH 38/48] Update CI, make small fixes --- .github/workflows/main.yaml | 2 +- integration/test_named_vectors.py | 7 +------ weaviate/collections/grpc/query.py | 5 ++--- weaviate/collections/grpc/shared.py | 1 + weaviate/collections/queries/base.py | 4 +--- 5 files changed, 6 insertions(+), 13 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 9391abd35..36c709251 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -23,7 +23,7 @@ env: WEAVIATE_126: 1.26.13 WEAVIATE_127: 1.27.9 WEAVIATE_128: stable-v1.28-1a67582 - WEAVIATE_129: 1.29.0-rc.0-a6afdb7 + WEAVIATE_129: 1.29.0-rc.0-201e63d jobs: lint-and-format: diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index e941dfda7..c6c388915 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -919,12 +919,7 @@ def test_colbert_vectors_jinaai(collection_factory: CollectionFactory) -> None: assert len(objs) == 1 objs = collection.query.near_vector( - { - "colbert": wvc.query.NearVector.multidimensional( - [[e + 0.01 for e in vec] for vec in vecs] - ) - }, - target_vector="colbert", + {"colbert": [[e + 0.01 for e in vec] for vec in vecs]}, target_vector="colbert" ).objects assert len(objs) == 1 diff --git a/weaviate/collections/grpc/query.py b/weaviate/collections/grpc/query.py index d3b675b75..fa082d564 100644 --- a/weaviate/collections/grpc/query.py +++ b/weaviate/collections/grpc/query.py @@ -86,12 +86,10 @@ def __init__( tenant: Optional[str], consistency_level: Optional[ConsistencyLevel], validate_arguments: bool, - uses_125_api: bool, ): super().__init__(connection, consistency_level, validate_arguments) self._name: str = name self._tenant = tenant - self.__uses_125_api = uses_125_api def get( self, @@ -440,7 +438,8 @@ def __create_request( return search_get_pb2.SearchRequest( uses_123_api=True, - uses_125_api=self.__uses_125_api, + uses_125_api=self._connection._weaviate_version.is_at_least(1, 25, 0), + uses_127_api=self._connection._weaviate_version.is_at_least(1, 27, 0), collection=self._name, limit=limit, offset=offset, diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index e205b4956..75bdec455 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -260,6 +260,7 @@ def add_list_of_vectors(value: _ListOfVectorsQuery, key: str) -> None: vectors=vectors, ) ) + target_vectors_tmp.append(key) if isinstance(vector, dict): if ( diff --git a/weaviate/collections/queries/base.py b/weaviate/collections/queries/base.py index 6d709e46c..6e4c679d8 100644 --- a/weaviate/collections/queries/base.py +++ b/weaviate/collections/queries/base.py @@ -84,14 +84,12 @@ def __init__( self._references = references self._validate_arguments = validate_arguments - self.__uses_125_api = self._connection._weaviate_version.is_at_least(1, 25, 0) self._query = _QueryGRPC( self._connection, self._name, self.__tenant, self.__consistency_level, validate_arguments=self._validate_arguments, - uses_125_api=self.__uses_125_api, ) def __retrieve_timestamp( @@ -224,7 +222,7 @@ def __deserialize_non_ref_prop(self, value: properties_pb2.Value) -> Any: if value.HasField("list_value"): return ( self.__deserialize_list_value_prop_125(value.list_value) - if self.__uses_125_api + if self._connection._weaviate_version.is_at_least(1, 25, 0) else self.__deserialize_list_value_prop_123(value.list_value) ) if value.HasField("object_value"): From 7a64e407c15c97730d84a397a8d2520a5e18920b Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 7 Feb 2025 12:30:08 +0000 Subject: [PATCH 39/48] Fix order of inheritance with pydantic and generics --- weaviate/collections/classes/grpc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index d14a88d4d..6cc8386a6 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -253,7 +253,7 @@ class Rerank(_WeaviateInput): V = TypeVar("V", OneDimensionalVectorType, TwoDimensionalVectorType) -class _ListOfVectorsQuery(Generic[V], _WeaviateInput): +class _ListOfVectorsQuery(_WeaviateInput, Generic[V]): dimensionality: Literal["1D", "2D"] vectors: Sequence[V] From 44f572caf423fb499bd5ec51ea3e29f332cf749e Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 7 Feb 2025 12:38:10 +0000 Subject: [PATCH 40/48] Remove incorrectly added `uses_127_api=True` --- weaviate/collections/grpc/query.py | 1 - 1 file changed, 1 deletion(-) diff --git a/weaviate/collections/grpc/query.py b/weaviate/collections/grpc/query.py index fa082d564..131945769 100644 --- a/weaviate/collections/grpc/query.py +++ b/weaviate/collections/grpc/query.py @@ -439,7 +439,6 @@ def __create_request( return search_get_pb2.SearchRequest( uses_123_api=True, uses_125_api=self._connection._weaviate_version.is_at_least(1, 25, 0), - uses_127_api=self._connection._weaviate_version.is_at_least(1, 27, 0), collection=self._name, limit=limit, offset=offset, From 090138d5af70cfa0fa592a1fde7d1c8ab5bab7fc Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 7 Feb 2025 12:53:05 +0000 Subject: [PATCH 41/48] Modify parsing to support raw `[[float]]` in `near_vector` again --- integration/test_named_vectors.py | 12 ++++++++++ weaviate/collections/grpc/shared.py | 34 +++++++++++++++-------------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index c6c388915..3700e6c9e 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -850,6 +850,18 @@ def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: ) assert len(collection) == 1 + objs = collection.query.near_vector( + [1, 2], + target_vector="regular", + ).objects + assert len(objs) == 1 + + objs = collection.query.near_vector( + [[1, 2], [3, 4]], + target_vector="colbert", + ).objects + assert len(objs) == 1 + objs = collection.query.near_vector( {"regular": [[1, 2], [2, 1]]}, target_vector="regular", diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index 75bdec455..991c67b91 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -175,8 +175,9 @@ def _vector_for_target( ) vector_for_target: List[base_search_pb2.VectorForTarget] = [] + target_vectors: List[str] = [] - def add_1d_vector(val: OneDimensionalVectorType, target_name: str) -> None: + def add_1d_vector(val: OneDimensionalVectorType, key: str) -> None: vec = _get_vector_v4(val) if ( @@ -188,24 +189,22 @@ def add_1d_vector(val: OneDimensionalVectorType, target_name: str) -> None: if self._connection._weaviate_version.is_lower_than(1, 29, 0): vector_for_target.append( - base_search_pb2.VectorForTarget( - name=target_name, vector_bytes=_Pack.single(vec) - ) + base_search_pb2.VectorForTarget(name=key, vector_bytes=_Pack.single(vec)) ) else: vector_for_target.append( base_search_pb2.VectorForTarget( - name=target_name, + name=key, vectors=[ base_pb2.Vectors( - name=target_name, + name=key, vector_bytes=_Pack.single(vec), type=base_pb2.Vectors.VECTOR_TYPE_SINGLE_FP32, ) ], ) ) - target_vectors_tmp.append(key) + target_vectors.append(key) def add_2d_vector(value: TwoDimensionalVectorType, key: str) -> None: if self._connection._weaviate_version.is_lower_than(1, 29, 0): @@ -224,7 +223,7 @@ def add_2d_vector(value: TwoDimensionalVectorType, key: str) -> None: ], ) ) - target_vectors_tmp.append(key) + target_vectors.append(key) def add_list_of_vectors(value: _ListOfVectorsQuery, key: str) -> None: if _ListOfVectorsQuery.is_one_dimensional( @@ -260,7 +259,7 @@ def add_list_of_vectors(value: _ListOfVectorsQuery, key: str) -> None: vectors=vectors, ) ) - target_vectors_tmp.append(key) + target_vectors.append(key) if isinstance(vector, dict): if ( @@ -269,7 +268,6 @@ def add_list_of_vectors(value: _ListOfVectorsQuery, key: str) -> None: or len(set(targets.target_vectors)) != len(vector) ): raise invalid_nv_exception - target_vectors_tmp: List[str] = [] for key, value in vector.items(): if _is_1d_vector(value): add_1d_vector(value, key) @@ -279,7 +277,7 @@ def add_list_of_vectors(value: _ListOfVectorsQuery, key: str) -> None: add_list_of_vectors(value, key) else: raise invalid_nv_exception - return vector_for_target, None, target_vectors_tmp + return vector_for_target, None, target_vectors else: if _is_1d_vector(vector): near_vector = _get_vector_v4(vector) @@ -287,10 +285,15 @@ def add_list_of_vectors(value: _ListOfVectorsQuery, key: str) -> None: raise invalid_nv_exception return None, struct.pack("{}f".format(len(near_vector)), *near_vector), None else: - raise WeaviateInvalidInputError( - """Providing lists of lists has been deprecated. Please provide a dictionary with target names as - keys and lists of numbers as values.""" - ) + if self._connection._weaviate_version.is_lower_than(1, 29, 0): + raise WeaviateInvalidInputError( + """Providing lists of lists has been deprecated. Please provide a dictionary with target names as + keys and lists of numbers as values.""" + ) + assert _is_2d_vector(vector) + assert targets is not None + add_2d_vector(vector, targets.target_vectors[0]) + return vector_for_target, None, target_vectors def _parse_near_options( self, @@ -363,7 +366,6 @@ def _parse_near_vector( targets, target_vectors = self._recompute_target_vector_to_grpc( target_vector, target_vectors_tmp ) - print(targets, target_vectors) return base_search_pb2.NearVector( vector_bytes=near_vector_grpc, certainty=certainty, From 4f0e7b0539c2c55353af14a9bc301e14c02413ba Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 7 Feb 2025 13:30:33 +0000 Subject: [PATCH 42/48] Add logic to pass simple single/multi vec searches into new Vectors message --- integration/test_named_vectors.py | 15 ++++++--- weaviate/collections/grpc/shared.py | 48 +++++++++++++++++++++-------- 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index 3700e6c9e..fc918b9f2 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -738,8 +738,10 @@ def test_same_target_vector_multiple_input_combinations( def test_deprecated_syntax(collection_factory: CollectionFactory): dummy = collection_factory("dummy") - if dummy._connection._weaviate_version.is_lower_than(1, 27, 0): - pytest.skip("Multi vector per target is not supported in versions lower than 1.27.0") + if dummy._connection._weaviate_version.is_lower_than(1, 29, 0): + pytest.skip( + "Syntax was deprecated between 1.27 and 1.29. Now it's allowed for multivector (colbert) searches" + ) collection = collection_factory( properties=[], @@ -924,10 +926,10 @@ def test_colbert_vectors_jinaai(collection_factory: CollectionFactory) -> None: vecs = obj.vector["colbert"] assert isinstance(vecs[0], list) - objs = collection.query.near_text("Hello", target_vector="colbert").objects + objs = collection.query.near_text("Hello").objects assert len(objs) == 1 - objs = collection.query.hybrid("Hello", target_vector="colbert").objects + objs = collection.query.hybrid("Hello").objects assert len(objs) == 1 objs = collection.query.near_vector( @@ -935,5 +937,8 @@ def test_colbert_vectors_jinaai(collection_factory: CollectionFactory) -> None: ).objects assert len(objs) == 1 - objs = collection.query.near_object(uuid, target_vector="colbert").objects + objs = collection.query.near_vector([[e + 0.01 for e in vec] for vec in vecs]).objects + assert len(objs) == 1 + + objs = collection.query.near_object(uuid).objects assert len(objs) == 1 diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index 991c67b91..6d841c49a 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -285,15 +285,10 @@ def add_list_of_vectors(value: _ListOfVectorsQuery, key: str) -> None: raise invalid_nv_exception return None, struct.pack("{}f".format(len(near_vector)), *near_vector), None else: - if self._connection._weaviate_version.is_lower_than(1, 29, 0): - raise WeaviateInvalidInputError( - """Providing lists of lists has been deprecated. Please provide a dictionary with target names as - keys and lists of numbers as values.""" - ) - assert _is_2d_vector(vector) - assert targets is not None - add_2d_vector(vector, targets.target_vectors[0]) - return vector_for_target, None, target_vectors + raise WeaviateInvalidInputError( + """Providing lists of lists has been deprecated. Please provide a dictionary with target names as + keys and lists of numbers as values.""" + ) def _parse_near_options( self, @@ -345,12 +340,37 @@ def _parse_near_vector( targets, target_vectors = self.__target_vector_to_grpc(target_vector) if _is_1d_vector(near_vector) and len(near_vector) > 0: - # fast path for simple vector - near_vector_grpc: Optional[bytes] = struct.pack( - "{}f".format(len(near_vector)), *near_vector - ) + # fast path for simple single-vector + if self._connection._weaviate_version.is_lower_than(1, 29, 0): + near_vector_grpc: Optional[bytes] = struct.pack( + "{}f".format(len(near_vector)), *near_vector + ) + vector_per_target_tmp = None + vector_for_targets = None + vectors = None + else: + near_vector_grpc = None + vector_per_target_tmp = None + vector_for_targets = None + vectors = [ + base_pb2.Vectors( + vector_bytes=_Pack.single(near_vector), + type=base_pb2.Vectors.VECTOR_TYPE_SINGLE_FP32, + ) + ] + elif _is_2d_vector(near_vector) and self._connection._weaviate_version.is_at_least( + 1, 29, 0 + ): + # fast path for simple multi-vector + near_vector_grpc = None vector_per_target_tmp = None vector_for_targets = None + vectors = [ + base_pb2.Vectors( + vector_bytes=_Pack.multi(near_vector), + type=base_pb2.Vectors.VECTOR_TYPE_MULTI_FP32, + ) + ] else: if self._connection._weaviate_version.is_lower_than(1, 27, 0): vector_per_target_tmp, near_vector_grpc = self._vector_per_target( @@ -366,6 +386,7 @@ def _parse_near_vector( targets, target_vectors = self._recompute_target_vector_to_grpc( target_vector, target_vectors_tmp ) + vectors = None return base_search_pb2.NearVector( vector_bytes=near_vector_grpc, certainty=certainty, @@ -374,6 +395,7 @@ def _parse_near_vector( target_vectors=target_vectors, vector_per_target=vector_per_target_tmp, vector_for_targets=vector_for_targets, + vectors=vectors, ) @staticmethod From 23e12998471e002f6819567bff804bd1b248e01e Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 7 Feb 2025 16:19:01 +0000 Subject: [PATCH 43/48] Update CI images, make small changes to fix tests --- .github/workflows/main.yaml | 4 ++-- integration/test_named_vectors.py | 19 ++++++++++++------- weaviate/collections/grpc/shared.py | 14 +++----------- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 36c709251..f87b25f4e 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -22,8 +22,8 @@ env: WEAVIATE_125: 1.25.29 WEAVIATE_126: 1.26.13 WEAVIATE_127: 1.27.9 - WEAVIATE_128: stable-v1.28-1a67582 - WEAVIATE_129: 1.29.0-rc.0-201e63d + WEAVIATE_128: 1.28.4-8246d6e + WEAVIATE_129: 1.29.0-rc.0-f1a96dc jobs: lint-and-format: diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index fc918b9f2..9319afddf 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -17,7 +17,7 @@ ) from weaviate.collections.classes.data import DataObject from weaviate.collections.classes.grpc import _MultiTargetVectorJoin, _ListOfVectorsQuery -from weaviate.exceptions import WeaviateInvalidInputError +from weaviate.exceptions import WeaviateInvalidInputError, WeaviateQueryError from weaviate.types import INCLUDE_VECTOR @@ -633,8 +633,16 @@ def test_multi_query_error_no_target_vector(collection_factory: CollectionFactor ], ) - with pytest.raises(WeaviateInvalidInputError): - collection.query.near_vector([[1.0, 0.0], [1.0, 0.0, 0.0]]) + if dummy._connection._weaviate_version.is_lower_than(1, 29, 0): + # gets checked in the client for validity + with pytest.raises(WeaviateInvalidInputError): + collection.query.near_vector([[1.0, 0.0], [1.0, 0.0, 0.0]]) + with pytest.raises(WeaviateInvalidInputError): + collection.query.near_vector([[[1.0, 0.0], [1.0, 0.0]], [1.0, 0.0, 0.0]]) + else: + # throws an error in the server instead as implicit multi vector is understood now as using multi-vectors + with pytest.raises(WeaviateQueryError): + collection.query.near_vector([[1.0, 0.0], [1.0, 0.0, 0.0]]) with pytest.raises(WeaviateInvalidInputError): collection.query.near_vector({"first": [1.0, 0.0], "second": [1.0, 0.0, 0.0]}) @@ -642,9 +650,6 @@ def test_multi_query_error_no_target_vector(collection_factory: CollectionFactor with pytest.raises(WeaviateInvalidInputError): collection.query.near_vector({"first": [[1.0, 0.0], [1.0, 0.0]], "second": [1.0, 0.0, 0.0]}) - with pytest.raises(WeaviateInvalidInputError): - collection.query.near_vector([[[1.0, 0.0], [1.0, 0.0]], [1.0, 0.0, 0.0]]) - @pytest.mark.parametrize( "target_vector, distances", @@ -738,7 +743,7 @@ def test_same_target_vector_multiple_input_combinations( def test_deprecated_syntax(collection_factory: CollectionFactory): dummy = collection_factory("dummy") - if dummy._connection._weaviate_version.is_lower_than(1, 29, 0): + if dummy._connection._weaviate_version.is_at_least(1, 29, 0): pytest.skip( "Syntax was deprecated between 1.27 and 1.29. Now it's allowed for multivector (colbert) searches" ) diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index 6d841c49a..4928445d7 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -722,21 +722,13 @@ class _Packing: class _Pack: - @staticmethod - def is_multi(v: PrimitiveVectorType) -> TypeGuard[List[List[NUMBER]]]: - return len(v) > 0 and isinstance(v[0], list) - - @staticmethod - def is_single(v: PrimitiveVectorType) -> TypeGuard[List[NUMBER]]: - return len(v) > 0 and (isinstance(v[0], float) or isinstance(v[0], int)) - @staticmethod def parse_single_or_multi_vec(vector: PrimitiveVectorType) -> _Packing: - if _Pack.is_multi(vector): + if _is_2d_vector(vector): return _Packing( bytes_=_Pack.multi(vector), type_=base_pb2.Vectors.VECTOR_TYPE_MULTI_FP32 ) - elif _Pack.is_single(vector): + elif _is_1d_vector(vector): return _Packing( bytes_=_Pack.single(vector), type_=base_pb2.Vectors.VECTOR_TYPE_SINGLE_FP32 ) @@ -791,7 +783,7 @@ def _is_2d_vector(inputs: Any) -> TypeGuard[TwoDimensionalVectorType]: except TypeError: return False if __is_list_type(inputs): - return __is_list_type(inputs[0]) + return _is_1d_vector(inputs[0]) return False From 9d34726b92c10e1d0debb6f997be67171edf884e Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 7 Feb 2025 16:28:04 +0000 Subject: [PATCH 44/48] Fix skip in test --- integration/test_named_vectors.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index 9319afddf..dea27b50c 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -743,7 +743,9 @@ def test_same_target_vector_multiple_input_combinations( def test_deprecated_syntax(collection_factory: CollectionFactory): dummy = collection_factory("dummy") - if dummy._connection._weaviate_version.is_at_least(1, 29, 0): + if dummy._connection._weaviate_version.is_at_least( + 1, 29, 0 + ) and dummy._connection._weaviate_version.is_lower_than(1, 27, 0): pytest.skip( "Syntax was deprecated between 1.27 and 1.29. Now it's allowed for multivector (colbert) searches" ) From 5105704d3e2794920c46b2b24afa7367d05c61e5 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 7 Feb 2025 16:40:46 +0000 Subject: [PATCH 45/48] Actually fix deprecated test --- integration/test_named_vectors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index dea27b50c..865a88e7c 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -745,7 +745,7 @@ def test_deprecated_syntax(collection_factory: CollectionFactory): dummy = collection_factory("dummy") if dummy._connection._weaviate_version.is_at_least( 1, 29, 0 - ) and dummy._connection._weaviate_version.is_lower_than(1, 27, 0): + ) or dummy._connection._weaviate_version.is_lower_than(1, 27, 0): pytest.skip( "Syntax was deprecated between 1.27 and 1.29. Now it's allowed for multivector (colbert) searches" ) From 24feb2ac9371819ed230c2bfdd680787e8677f6d Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 14 Feb 2025 13:18:26 +0000 Subject: [PATCH 46/48] Make multi vector aggregation an enum, fix other enum exports --- integration/test_named_vectors.py | 4 +++- weaviate/classes/config.py | 8 +++++++- weaviate/collections/classes/config.py | 15 +++++++++++++-- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/integration/test_named_vectors.py b/integration/test_named_vectors.py index 865a88e7c..db08ae7d1 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_named_vectors.py @@ -837,7 +837,9 @@ def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: wvc.config.Configure.NamedVectors.none( name="colbert", vector_index_config=wvc.config.Configure.VectorIndex.hnsw( - multi_vector=wvc.config.Configure.VectorIndex.MultiVector.multi_vector() + multi_vector=wvc.config.Configure.VectorIndex.MultiVector.multi_vector( + aggregation=wvc.config.MultiVectorAggregation.MAX_SIM + ) ), ), wvc.config.Configure.NamedVectors.none( diff --git a/weaviate/classes/config.py b/weaviate/classes/config.py index 48eb1b38a..0e8c87069 100644 --- a/weaviate/classes/config.py +++ b/weaviate/classes/config.py @@ -4,7 +4,10 @@ Reconfigure, DataType, GenerativeSearches, + MultiVectorAggregation, ReplicationDeletionStrategy, + PQEncoderDistribution, + PQEncoderType, Property, ReferenceProperty, Rerankers, @@ -22,15 +25,18 @@ "Reconfigure", "DataType", "GenerativeSearches", - "VectorFilterStrategy", "Integrations", "Multi2VecField", + "MultiVectorAggregation", "ReplicationDeletionStrategy", "Property", + "PQEncoderDistribution", + "PQEncoderType", "ReferenceProperty", "Rerankers", "StopwordsPreset", "Tokenization", "Vectorizers", "VectorDistances", + "VectorFilterStrategy", ] diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 2dd733afc..78b90a380 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -278,6 +278,17 @@ class PQEncoderDistribution(str, Enum): NORMAL = "normal" +class MultiVectorAggregation(str, Enum): + """Aggregation type to use for multivector indices. + + Attributes: + `MAX_SIM` + Maximum similarity. + """ + + MAX_SIM = "maxSim" + + class _PQEncoderConfigCreate(_ConfigCreateModel): type_: Optional[PQEncoderType] = Field(serialization_alias="type") distribution: Optional[PQEncoderDistribution] @@ -1990,10 +2001,10 @@ def __add_props( class _VectorIndexMultiVector: @staticmethod def multi_vector( - aggregation: Union[Literal["maxSim"], str, None] = None, + aggregation: Optional[MultiVectorAggregation] = None, ) -> _MultiVectorConfigCreate: return _MultiVectorConfigCreate( - aggregation=aggregation, + aggregation=aggregation.value if aggregation is not None else None, ) From e97bf267a75e84357054e3918132367445934f88 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 14 Feb 2025 13:30:00 +0000 Subject: [PATCH 47/48] Remove redundant field in stub --- weaviate/collections/queries/near_text/query.pyi | 1 - 1 file changed, 1 deletion(-) diff --git a/weaviate/collections/queries/near_text/query.pyi b/weaviate/collections/queries/near_text/query.pyi index d80de6d24..3507c73b4 100644 --- a/weaviate/collections/queries/near_text/query.pyi +++ b/weaviate/collections/queries/near_text/query.pyi @@ -321,7 +321,6 @@ class _NearTextQuery(Generic[Properties, References], _Base[Properties, Referenc group_by: Literal[None] = None, rerank: Optional[Rerank] = None, target_vector: Optional[TargetVectorJoinType] = None, - multi_target_fusion_method: Optional[Literal["Sum", "Average", "Minimum"]] = None, include_vector: INCLUDE_VECTOR = False, return_metadata: Optional[METADATA] = None, return_properties: Union[PROPERTIES, bool, None] = None, From ac6f0eab3d5630e74d79082f6b54d5016f178d6f Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 14 Feb 2025 13:33:26 +0000 Subject: [PATCH 48/48] Change enums in `config.py` to use `BaseEnum` --- weaviate/collections/classes/config.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 78b90a380..dcb0bb47a 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from enum import Enum from typing import ( Any, ClassVar, @@ -52,8 +51,9 @@ from weaviate.collections.classes.config_vectorizers import Vectorizers as VectorizersAlias from weaviate.collections.classes.config_vectorizers import _Vectorizer, _VectorizerConfigCreate from weaviate.exceptions import WeaviateInvalidInputError +from weaviate.str_enum import BaseEnum from weaviate.util import _capitalize_first_letter -from ...warnings import _Warnings +from weaviate.warnings import _Warnings # BC for direct imports Vectorizers: TypeAlias = VectorizersAlias @@ -66,7 +66,7 @@ ] -class ConsistencyLevel(str, Enum): +class ConsistencyLevel(str, BaseEnum): """The consistency levels when writing to Weaviate with replication enabled. Attributes: @@ -80,7 +80,7 @@ class ConsistencyLevel(str, Enum): QUORUM = "QUORUM" -class DataType(str, Enum): +class DataType(str, BaseEnum): """The available primitive data types in Weaviate. Attributes: @@ -122,7 +122,7 @@ class DataType(str, Enum): OBJECT_ARRAY = "object[]" -class Tokenization(str, Enum): +class Tokenization(str, BaseEnum): """The available inverted index tokenization methods for text properties in Weaviate. Attributes: @@ -154,7 +154,7 @@ class Tokenization(str, Enum): KAGOME_KR = "kagome_kr" -class GenerativeSearches(str, Enum): +class GenerativeSearches(str, BaseEnum): """The available generative search modules in Weaviate. These modules generate text from text-based inputs. @@ -195,7 +195,7 @@ class GenerativeSearches(str, Enum): PALM = "generative-palm" # rename to google once all versions support it -class Rerankers(str, Enum): +class Rerankers(str, BaseEnum): """The available reranker modules in Weaviate. These modules rerank the results of a search query. @@ -221,7 +221,7 @@ class Rerankers(str, Enum): JINAAI = "reranker-jinaai" -class StopwordsPreset(str, Enum): +class StopwordsPreset(str, BaseEnum): """Preset stopwords to use in the `Stopwords` class. Attributes: @@ -235,7 +235,7 @@ class StopwordsPreset(str, Enum): EN = "en" -class ReplicationDeletionStrategy(str, Enum): +class ReplicationDeletionStrategy(str, BaseEnum): """How object deletions in multi node environments should be resolved. Attributes: @@ -250,7 +250,7 @@ class ReplicationDeletionStrategy(str, Enum): TIME_BASED_RESOLUTION = "TimeBasedResolution" -class PQEncoderType(str, Enum): +class PQEncoderType(str, BaseEnum): """Type of the PQ encoder. Attributes: @@ -264,7 +264,7 @@ class PQEncoderType(str, Enum): TILE = "tile" -class PQEncoderDistribution(str, Enum): +class PQEncoderDistribution(str, BaseEnum): """Distribution of the PQ encoder. Attributes: @@ -278,7 +278,7 @@ class PQEncoderDistribution(str, Enum): NORMAL = "normal" -class MultiVectorAggregation(str, Enum): +class MultiVectorAggregation(str, BaseEnum): """Aggregation type to use for multivector indices. Attributes: