From 97fd0ec4e4f6a1fda7113c9334221ef1f35dcfee Mon Sep 17 00:00:00 2001 From: wd0517 Date: Fri, 15 Nov 2024 09:59:59 +0000 Subject: [PATCH] examples: use new vector index syntax --- examples/image_search/example.ipynb | 6 +-- .../jina-ai-embeddings-demo.py | 1 - examples/orm-peewee-quickstart/.env.example | 5 ++- .../peewee-quickstart.py | 17 +++++--- .../orm-sqlalchemy-quickstart/.env.example | 9 +++- .../sqlalchemy-quickstart.py | 43 ++++++++++++++++--- examples/semantic-cache/cache.py | 1 - 7 files changed, 61 insertions(+), 21 deletions(-) diff --git a/examples/image_search/example.ipynb b/examples/image_search/example.ipynb index ddca260..7c7ba74 100644 --- a/examples/image_search/example.ipynb +++ b/examples/image_search/example.ipynb @@ -89,11 +89,7 @@ "\n", " id = Column(Integer, primary_key=True)\n", " image_id = Column(Integer)\n", - " embedding = Column(\n", - " VectorType(CLIP_DIMENSION),\n", - " # using hnsw index with cosine distance\n", - " comment=\"hnsw(distance=cosine)\"\n", - " )\n", + " embedding = Column(VectorType(CLIP_DIMENSION))\n", "\n", "Base.metadata.drop_all(engine)\n", "Base.metadata.create_all(engine)" diff --git a/examples/jina-ai-embeddings-demo/jina-ai-embeddings-demo.py b/examples/jina-ai-embeddings-demo/jina-ai-embeddings-demo.py index 987f3da..44e7ae9 100644 --- a/examples/jina-ai-embeddings-demo/jina-ai-embeddings-demo.py +++ b/examples/jina-ai-embeddings-demo/jina-ai-embeddings-demo.py @@ -47,7 +47,6 @@ class Document(Base): # DIMENSIONS is determined by the embedding model, # for Jina AI's jina-embeddings-v2-base-en model it's 768. VectorType(dim=768), - comment="hnsw(distance=cosine)" ) diff --git a/examples/orm-peewee-quickstart/.env.example b/examples/orm-peewee-quickstart/.env.example index e5bd2ae..a1d8448 100644 --- a/examples/orm-peewee-quickstart/.env.example +++ b/examples/orm-peewee-quickstart/.env.example @@ -3,5 +3,6 @@ TIDB_PORT=4000 TIDB_USERNAME=******.root TIDB_PASSWORD=******** TIDB_DATABASE=test -# For macOS. For other platforms, please refer https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-clusters#root-certificate-default-path. -TIDB_CA_PATH=/etc/ssl/cert.pem \ No newline at end of file +# TiDB Serverless Cluster requires SSL connection for public network access. +# For local TiDB cluster, please set TIDB_SSL=false to disable SSL. +TIDB_SSL=true \ No newline at end of file diff --git a/examples/orm-peewee-quickstart/peewee-quickstart.py b/examples/orm-peewee-quickstart/peewee-quickstart.py index a46b47f..5e77613 100644 --- a/examples/orm-peewee-quickstart/peewee-quickstart.py +++ b/examples/orm-peewee-quickstart/peewee-quickstart.py @@ -1,21 +1,22 @@ import os import dotenv -from peewee import Model, MySQLDatabase, SQL, TextField -from tidb_vector.peewee import VectorField +from tidb_vector.peewee import VectorField, VectorAdaptor +from tidb_vector.constants import DistanceMetric +from peewee import Model, MySQLDatabase, TextField dotenv.load_dotenv() # Step 1: Connect to TiDB using Peewee. # Using `pymysql` as the driver. -connect_kwargs = { +ssl_kwargs = { 'ssl_verify_cert': True, 'ssl_verify_identity': True, } # Using `mysqlclient` as the driver. -# connect_kwargs = { +# ssl_kwargs = { # 'ssl_mode': 'VERIFY_IDENTITY', # 'ssl': { # # Root certificate default path @@ -30,7 +31,7 @@ password=os.environ.get('TIDB_PASSWORD', ''), host=os.environ.get('TIDB_HOST', 'localhost'), port=int(os.environ.get('TIDB_PORT', '4000')), - **connect_kwargs, + **ssl_kwargs if os.environ.get('TIDB_SSL', 'false').lower() == 'true' else {}, ) @@ -53,12 +54,16 @@ class Meta: table_name = 'peewee_demo_documents_with_index' content = TextField() - embedding = VectorField(3, constraints=[SQL("COMMENT 'hnsw(distance=cosine)'")]) + embedding = VectorField(3) db.connect() db.drop_tables([Document, DocumentWithIndex]) db.create_tables([Document, DocumentWithIndex]) +VectorAdaptor(db).create_vector_index( + DocumentWithIndex.embedding, + DistanceMetric.COSINE, +) # Step 3. Insert embeddings into the table. Document.create(content='dog', embedding=[1, 2, 1]) diff --git a/examples/orm-sqlalchemy-quickstart/.env.example b/examples/orm-sqlalchemy-quickstart/.env.example index 5c55f22..a1d8448 100644 --- a/examples/orm-sqlalchemy-quickstart/.env.example +++ b/examples/orm-sqlalchemy-quickstart/.env.example @@ -1 +1,8 @@ -TIDB_DATABASE_URL=mysql+pymysql://:@:4000/?ssl_ca=&ssl_verify_cert=true&ssl_verify_identity=true \ No newline at end of file +TIDB_HOST=gateway01.****.prod.aws.tidbcloud.com +TIDB_PORT=4000 +TIDB_USERNAME=******.root +TIDB_PASSWORD=******** +TIDB_DATABASE=test +# TiDB Serverless Cluster requires SSL connection for public network access. +# For local TiDB cluster, please set TIDB_SSL=false to disable SSL. +TIDB_SSL=true \ No newline at end of file diff --git a/examples/orm-sqlalchemy-quickstart/sqlalchemy-quickstart.py b/examples/orm-sqlalchemy-quickstart/sqlalchemy-quickstart.py index 1883a59..ec13061 100644 --- a/examples/orm-sqlalchemy-quickstart/sqlalchemy-quickstart.py +++ b/examples/orm-sqlalchemy-quickstart/sqlalchemy-quickstart.py @@ -1,15 +1,43 @@ import os import dotenv -from sqlalchemy import Column, Integer, create_engine, Text +from sqlalchemy import Column, Integer, create_engine, Text, URL from sqlalchemy.orm import declarative_base, Session -from tidb_vector.sqlalchemy import VectorType +from tidb_vector.sqlalchemy import VectorType, VectorAdaptor +from tidb_vector.constants import DistanceMetric dotenv.load_dotenv() # Step 1: Connect to TiDB using SQLAlchemy. -tidb_connection_string = os.environ['TIDB_DATABASE_URL'] -engine = create_engine(tidb_connection_string) + +# Using `pymysql` as the driver. +drivername = 'mysql+pymysql' +ssl_kwargs = { + 'ssl_verify_cert': 'true', + 'ssl_verify_identity': 'true', +} + +# Using `mysqlclient` as the driver. +# drivername = 'mysql+mysqldb' +# ssl_kwargs = { +# 'ssl_mode': 'VERIFY_IDENTITY', +# 'ssl': { +# # Root certificate default path +# # https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-clusters/#root-certificate-default-path +# 'ca': os.environ.get('TIDB_CA_PATH', '/path/to/ca.pem'), +# }, +# } + +engine = create_engine(URL.create( + drivername=drivername, + username=os.environ['TIDB_USERNAME'], + password=os.environ['TIDB_PASSWORD'], + host=os.environ['TIDB_HOST'], + port=os.environ['TIDB_PORT'], + database=os.environ['TIDB_DATABASE'], + query=ssl_kwargs if os.environ.get('TIDB_SSL', 'false').lower() == 'true' else {}, +)) + # Step 2: Define a table with a vector column. Base = declarative_base() @@ -27,11 +55,16 @@ class DocumentWithIndex(Base): __tablename__ = 'sqlalchemy_demo_documents_with_index' id = Column(Integer, primary_key=True) content = Column(Text) - embedding = Column(VectorType(3), comment="hnsw(distance=cosine)") + embedding = Column(VectorType(3)) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) +VectorAdaptor(engine).create_vector_index( + DocumentWithIndex.embedding, + DistanceMetric.COSINE, + skip_existing=True, +) # Step 3: Insert embeddings into the table. diff --git a/examples/semantic-cache/cache.py b/examples/semantic-cache/cache.py index 413d28e..6935041 100644 --- a/examples/semantic-cache/cache.py +++ b/examples/semantic-cache/cache.py @@ -57,7 +57,6 @@ class Cache(SQLModel, table=True): sa_column=Column( VectorType(768), default=None, - comment="hnsw(distance=l2)", nullable=False, ) )