Skip to content

Commit

Permalink
Add connect notebook for Milvus Lite
Browse files Browse the repository at this point in the history
Signed-off-by: Christy Bergman <[email protected]>
  • Loading branch information
christy committed Jun 7, 2024
1 parent 57fe4f7 commit 28ae22d
Show file tree
Hide file tree
Showing 24 changed files with 4,151 additions and 853 deletions.
353 changes: 0 additions & 353 deletions bootcamp/RAG/rtdocs/.html

This file was deleted.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

147 changes: 95 additions & 52 deletions bootcamp/RAG/rtdocs/rbac.html → bootcamp/RAG/rtdocs_new/rbac.html

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

85 changes: 53 additions & 32 deletions bootcamp/milvus_connect.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@
],
"source": [
"COLLECTION_NAME = \"movies\"\n",
"EMBEDDING_LENGTH = 256\n",
"EMBEDDING_DIM = 256\n",
"\n",
"# Check if collection already exists, if so drop it.\n",
"has = utility.has_collection(COLLECTION_NAME)\n",
Expand All @@ -368,7 +368,7 @@
"# Create a collection with flexible schema and AUTOINDEX.\n",
"mc.create_collection(\n",
" COLLECTION_NAME, \n",
" EMBEDDING_LENGTH, \n",
" EMBEDDING_DIM, \n",
" consistency_level=\"Eventually\", \n",
" auto_id=True, \n",
" overwrite=True,\n",
Expand All @@ -380,10 +380,26 @@
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[33mWARN\u001b[0m[0000] /Users/christy/Documents/bootcamp_scratch/bootcamp/docker-compose.yml: `version` is obsolete \n",
"Successfully disconnected from the server.\n"
]
}
],
"source": [
"# Stop local milvus.\n",
"!docker compose down"
"!docker compose down\n",
"\n",
"# Disconnect from the server.\n",
"try:\n",
" connections.disconnect(alias=\"default\")\n",
" print(\"Successfully disconnected from the server.\")\n",
"except:\n",
" pass"
]
},
{
Expand Down Expand Up @@ -464,14 +480,15 @@
"output_type": "stream",
"text": [
"EMBEDDING_DIM: 1024\n",
"Created Milvus collection from 22 docs in 7.84 seconds\n"
"Created Milvus collection from 22 docs in 7.64 seconds\n"
]
}
],
"source": [
"from langchain_milvus import Milvus\n",
"from langchain_huggingface import HuggingFaceEmbeddings\n",
"import time\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"import time, pprint\n",
"\n",
"# Define the embedding model.\n",
"model_name = \"BAAI/bge-large-en-v1.5\"\n",
Expand All @@ -485,15 +502,19 @@
"EMBEDDING_DIM = embed_model.dict()['client'].get_sentence_embedding_dimension()\n",
"print(f\"EMBEDDING_DIM: {EMBEDDING_DIM}\")\n",
"\n",
"# # Chunking\n",
"# text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=51)\n",
"\n",
"# Create a Milvus collection from the documents and embeddings.\n",
"start_time = time.time()\n",
"# docs = text_splitter.split_documents(docs)\n",
"vectorstore = Milvus.from_documents(\n",
" documents=docs,\n",
" embedding=embed_model,\n",
" connection_args={\n",
" \"uri\": \"./milvus_demo.db\",\n",
" },\n",
" # Override LangChain default values.\n",
" # Override LangChain default values for Milvus.\n",
" consistency_level=\"Eventually\",\n",
" drop_old=True,\n",
" index_params = {\n",
Expand Down Expand Up @@ -524,8 +545,6 @@
}
],
"source": [
"import pprint\n",
"\n",
"# Describe the collection.\n",
"print(f\"collection_name: {vectorstore.collection_name}\")\n",
"print(f\"schema: {vectorstore.fields}\")\n",
Expand Down Expand Up @@ -606,14 +625,14 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/vn/4v5_m9mx69x3h7jcl1chb7nr0000gn/T/ipykernel_11915/2544016635.py:13: DeprecationWarning: Call to deprecated class method from_defaults. (ServiceContext is deprecated, please use `llama_index.settings.Settings` instead.) -- Deprecated since version 0.10.0.\n",
"/var/folders/vn/4v5_m9mx69x3h7jcl1chb7nr0000gn/T/ipykernel_28726/1337788918.py:12: DeprecationWarning: Call to deprecated class method from_defaults. (ServiceContext is deprecated, please use `llama_index.settings.Settings` instead.) -- Deprecated since version 0.10.0.\n",
" service_context = ServiceContext.from_defaults(\n",
"/opt/miniconda3/envs/py311-unum/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n"
Expand All @@ -635,12 +654,11 @@
" 'text_instruction': None}\n",
"\n",
"Start chunking, embedding, inserting...\n",
"Created LlamaIndex collection from 1 docs in 109.35 seconds\n"
"Created LlamaIndex collection from 1 docs in 98.19 seconds\n"
]
}
],
"source": [
"from pymilvus import MilvusClient\n",
"from llama_index.core import (\n",
" Settings,\n",
" ServiceContext,\n",
Expand All @@ -649,11 +667,11 @@
")\n",
"from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
"from llama_index.vector_stores.milvus import MilvusVectorStore\n",
"import time, pprint\n",
"\n",
"# Define the embedding model.\n",
"milvus_client = MilvusClient()\n",
"service_context = ServiceContext.from_defaults(\n",
" # LlamaIndex local location is same as default HF cache location.\n",
" # LlamaIndex local: translates to the same location as default HF cache.\n",
" embed_model=\"local:BAAI/bge-large-en-v1.5\",\n",
")\n",
"# Display what LlamaIndex exposes.\n",
Expand All @@ -665,13 +683,20 @@
"EMBEDDING_DIM = 1024\n",
"\n",
"# Create a Milvus collection from the documents and embeddings.\n",
"vector_store = MilvusVectorStore(\n",
" client=milvus_client, \n",
"vectorstore = MilvusVectorStore(\n",
" uri=\"./milvus_llamaindex.db\",\n",
" dim=EMBEDDING_DIM,\n",
" overwrite=True\n",
" # Override LlamaIndex default values for Milvus.\n",
" consistency_level=\"Eventually\",\n",
" drop_old=True,\n",
" index_params = {\n",
" \"metric_type\": \"COSINE\",\n",
" \"index_type\": \"AUTOINDEX\",\n",
" \"params\": {},}\n",
")\n",
"storage_context = StorageContext.from_defaults(\n",
" vector_store=vector_store)\n",
" vector_store=vectorstore\n",
")\n",
"\n",
"print(f\"Start chunking, embedding, inserting...\")\n",
"start_time = time.time()\n",
Expand All @@ -683,19 +708,18 @@
")\n",
"end_time = time.time()\n",
"print(f\"Created LlamaIndex collection from {len(docs[:1])} docs in {end_time - start_time:.2f} seconds\")\n",
"# Created LlamaIndex Milvus collection in 109.35 seconds"
"# Created LlamaIndex collection from 1 docs in 106.32 seconds"
]
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Milvus vector database:\n",
"stores_text: True\n",
"is_embedding_query: True\n",
"stores_node: True\n",
Expand All @@ -706,28 +730,25 @@
"embedding_field: embedding\n",
"doc_id_field: doc_id\n",
"similarity_metric: IP\n",
"consistency_level: Strong\n",
"overwrite: True\n",
"consistency_level: Eventually\n",
"overwrite: False\n",
"text_key: None\n",
"output_fields: []\n",
"index_config: {}\n"
]
}
],
"source": [
"import pprint\n",
"\n",
"# Describe the collection.\n",
"print(\"Milvus vector database:\")\n",
"temp = vector_store.to_dict()\n",
"first_10_keys = list(temp.keys())[:15]\n",
"for key in first_10_keys:\n",
"temp = llamaindex.storage_context.vector_store.to_dict()\n",
"first_15_keys = list(temp.keys())[:15]\n",
"for key in first_15_keys:\n",
" print(f\"{key}: {temp[key]}\")"
]
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -737,7 +758,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 23,
"metadata": {},
"outputs": [
{
Expand Down

0 comments on commit 28ae22d

Please sign in to comment.