Add connect notebook for Milvus Lite

Signed-off-by: Christy Bergman <[email protected]>
milvus-io · Jun 7, 2024 · 28ae22d · 28ae22d
1 parent 57fe4f7
commit 28ae22d
Show file tree

Hide file tree

Showing 24 changed files with 4,151 additions and 853 deletions.
diff --git a/bootcamp/RAG/rtdocs/.html b/bootcamp/RAG/rtdocs/.html
diff --git a/...amp/RAG/rtdocs/architecture_overview.html → ...RAG/rtdocs_new/architecture_overview.html b/...amp/RAG/rtdocs/architecture_overview.html → ...RAG/rtdocs_new/architecture_overview.html
diff --git a/bootcamp/RAG/rtdocs/aws.html → bootcamp/RAG/rtdocs_new/aws.html b/bootcamp/RAG/rtdocs/aws.html → bootcamp/RAG/rtdocs_new/aws.html
diff --git a/bootcamp/RAG/rtdocs/configure-docker.html → ...camp/RAG/rtdocs_new/configure-docker.html b/bootcamp/RAG/rtdocs/configure-docker.html → ...camp/RAG/rtdocs_new/configure-docker.html
diff --git a/bootcamp/RAG/rtdocs/deploy_s3.html → bootcamp/RAG/rtdocs_new/deploy_s3.html b/bootcamp/RAG/rtdocs/deploy_s3.html → bootcamp/RAG/rtdocs_new/deploy_s3.html
diff --git a/bootcamp/RAG/rtdocs/embeddings.html → bootcamp/RAG/rtdocs_new/embeddings.html b/bootcamp/RAG/rtdocs/embeddings.html → bootcamp/RAG/rtdocs_new/embeddings.html
diff --git a/...camp/RAG/rtdocs/get-and-scalar-query.html → .../RAG/rtdocs_new/get-and-scalar-query.html b/...camp/RAG/rtdocs/get-and-scalar-query.html → .../RAG/rtdocs_new/get-and-scalar-query.html
diff --git a/bootcamp/RAG/rtdocs/glossary.html → bootcamp/RAG/rtdocs_new/glossary.html b/bootcamp/RAG/rtdocs/glossary.html → bootcamp/RAG/rtdocs_new/glossary.html
diff --git a/bootcamp/RAG/rtdocs/gpu_index.html → bootcamp/RAG/rtdocs_new/gpu_index.html b/bootcamp/RAG/rtdocs/gpu_index.html → bootcamp/RAG/rtdocs_new/gpu_index.html
diff --git a/bootcamp/RAG/rtdocs/index-vector-fields.html → ...p/RAG/rtdocs_new/index-vector-fields.html b/bootcamp/RAG/rtdocs/index-vector-fields.html → ...p/RAG/rtdocs_new/index-vector-fields.html
diff --git a/bootcamp/RAG/rtdocs/index.html → bootcamp/RAG/rtdocs_new/index.html b/bootcamp/RAG/rtdocs/index.html → bootcamp/RAG/rtdocs_new/index.html
diff --git a/...camp/RAG/rtdocs/insert-update-delete.html → .../RAG/rtdocs_new/insert-update-delete.html b/...camp/RAG/rtdocs/insert-update-delete.html → .../RAG/rtdocs_new/insert-update-delete.html
diff --git a/...RAG/rtdocs/install_standalone-docker.html → ...rtdocs_new/install_standalone-docker.html b/...RAG/rtdocs/install_standalone-docker.html → ...rtdocs_new/install_standalone-docker.html
diff --git a/bootcamp/RAG/rtdocs/manage-collections.html → ...mp/RAG/rtdocs_new/manage-collections.html b/bootcamp/RAG/rtdocs/manage-collections.html → ...mp/RAG/rtdocs_new/manage-collections.html
diff --git a/bootcamp/RAG/rtdocs/metric.html → bootcamp/RAG/rtdocs_new/metric.html b/bootcamp/RAG/rtdocs/metric.html → bootcamp/RAG/rtdocs_new/metric.html
diff --git a/bootcamp/RAG/rtdocs/milvus-cdc-overview.html → ...p/RAG/rtdocs_new/milvus-cdc-overview.html b/bootcamp/RAG/rtdocs/milvus-cdc-overview.html → ...p/RAG/rtdocs_new/milvus-cdc-overview.html
diff --git a/bootcamp/RAG/rtdocs/monitor_overview.html → ...camp/RAG/rtdocs_new/monitor_overview.html b/bootcamp/RAG/rtdocs/monitor_overview.html → ...camp/RAG/rtdocs_new/monitor_overview.html
diff --git a/bootcamp/RAG/rtdocs/multi-vector-search.html → ...p/RAG/rtdocs_new/multi-vector-search.html b/bootcamp/RAG/rtdocs/multi-vector-search.html → ...p/RAG/rtdocs_new/multi-vector-search.html
diff --git a/bootcamp/RAG/rtdocs/quickstart.html → bootcamp/RAG/rtdocs_new/quickstart.html b/bootcamp/RAG/rtdocs/quickstart.html → bootcamp/RAG/rtdocs_new/quickstart.html
diff --git a/bootcamp/RAG/rtdocs/rbac.html → bootcamp/RAG/rtdocs_new/rbac.html b/bootcamp/RAG/rtdocs/rbac.html → bootcamp/RAG/rtdocs_new/rbac.html
diff --git a/bootcamp/RAG/rtdocs/scaleout.html → bootcamp/RAG/rtdocs_new/scaleout.html b/bootcamp/RAG/rtdocs/scaleout.html → bootcamp/RAG/rtdocs_new/scaleout.html
diff --git a/...camp/RAG/rtdocs/single-vector-search.html → .../RAG/rtdocs_new/single-vector-search.html b/...camp/RAG/rtdocs/single-vector-search.html → .../RAG/rtdocs_new/single-vector-search.html
diff --git a/...camp/RAG/rtdocs/system_configuration.html → .../RAG/rtdocs_new/system_configuration.html b/...camp/RAG/rtdocs/system_configuration.html → .../RAG/rtdocs_new/system_configuration.html
diff --git a/bootcamp/milvus_connect.ipynb b/bootcamp/milvus_connect.ipynb
@@ -357,7 +357,7 @@
    ],
    "source": [
     "COLLECTION_NAME = \"movies\"\n",
-    "EMBEDDING_LENGTH = 256\n",
+    "EMBEDDING_DIM = 256\n",
     "\n",
     "# Check if collection already exists, if so drop it.\n",
     "has = utility.has_collection(COLLECTION_NAME)\n",
@@ -368,7 +368,7 @@
     "# Create a collection with flexible schema and AUTOINDEX.\n",
     "mc.create_collection(\n",
     "        COLLECTION_NAME, \n",
-    "        EMBEDDING_LENGTH, \n",
+    "        EMBEDDING_DIM, \n",
     "        consistency_level=\"Eventually\", \n",
     "        auto_id=True,  \n",
     "        overwrite=True,\n",
@@ -380,10 +380,26 @@
    "cell_type": "code",
    "execution_count": 12,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mWARN\u001b[0m[0000] /Users/christy/Documents/bootcamp_scratch/bootcamp/docker-compose.yml: `version` is obsolete \n",
+      "Successfully disconnected from the server.\n"
+     ]
+    }
+   ],
    "source": [
     "# Stop local milvus.\n",
-    "!docker compose down"
+    "!docker compose down\n",
+    "\n",
+    "# Disconnect from the server.\n",
+    "try:\n",
+    "  connections.disconnect(alias=\"default\")\n",
+    "  print(\"Successfully disconnected from the server.\")\n",
+    "except:\n",
+    "  pass"
    ]
   },
   {
@@ -464,14 +480,15 @@
      "output_type": "stream",
      "text": [
       "EMBEDDING_DIM: 1024\n",
-      "Created Milvus collection from 22 docs in 7.84 seconds\n"
+      "Created Milvus collection from 22 docs in 7.64 seconds\n"
      ]
     }
    ],
    "source": [
     "from langchain_milvus import Milvus\n",
     "from langchain_huggingface import HuggingFaceEmbeddings\n",
-    "import time\n",
+    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+    "import time, pprint\n",
     "\n",
     "# Define the embedding model.\n",
     "model_name = \"BAAI/bge-large-en-v1.5\"\n",
@@ -485,15 +502,19 @@
     "EMBEDDING_DIM = embed_model.dict()['client'].get_sentence_embedding_dimension()\n",
     "print(f\"EMBEDDING_DIM: {EMBEDDING_DIM}\")\n",
     "\n",
+    "# # Chunking\n",
+    "# text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=51)\n",
+    "\n",
     "# Create a Milvus collection from the documents and embeddings.\n",
     "start_time = time.time()\n",
+    "# docs = text_splitter.split_documents(docs)\n",
     "vectorstore = Milvus.from_documents(\n",
     "    documents=docs,\n",
     "    embedding=embed_model,\n",
     "    connection_args={\n",
     "        \"uri\": \"./milvus_demo.db\",\n",
     "    },\n",
-    "    # Override LangChain default values.\n",
+    "    # Override LangChain default values for Milvus.\n",
     "    consistency_level=\"Eventually\",\n",
     "    drop_old=True,\n",
     "    index_params = {\n",
@@ -524,8 +545,6 @@
     }
    ],
    "source": [
-    "import pprint\n",
-    "\n",
     "# Describe the collection.\n",
     "print(f\"collection_name: {vectorstore.collection_name}\")\n",
     "print(f\"schema: {vectorstore.fields}\")\n",
@@ -606,14 +625,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/var/folders/vn/4v5_m9mx69x3h7jcl1chb7nr0000gn/T/ipykernel_11915/2544016635.py:13: DeprecationWarning: Call to deprecated class method from_defaults. (ServiceContext is deprecated, please use `llama_index.settings.Settings` instead.) -- Deprecated since version 0.10.0.\n",
+      "/var/folders/vn/4v5_m9mx69x3h7jcl1chb7nr0000gn/T/ipykernel_28726/1337788918.py:12: DeprecationWarning: Call to deprecated class method from_defaults. (ServiceContext is deprecated, please use `llama_index.settings.Settings` instead.) -- Deprecated since version 0.10.0.\n",
       "  service_context = ServiceContext.from_defaults(\n",
       "/opt/miniconda3/envs/py311-unum/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
       "  warnings.warn(\n"
@@ -635,12 +654,11 @@
       " 'text_instruction': None}\n",
       "\n",
       "Start chunking, embedding, inserting...\n",
-      "Created LlamaIndex collection from 1 docs in 109.35 seconds\n"
+      "Created LlamaIndex collection from 1 docs in 98.19 seconds\n"
      ]
     }
    ],
    "source": [
-    "from pymilvus import MilvusClient\n",
     "from llama_index.core import (\n",
     "    Settings,\n",
     "    ServiceContext,\n",
@@ -649,11 +667,11 @@
     ")\n",
     "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
     "from llama_index.vector_stores.milvus import MilvusVectorStore\n",
+    "import time, pprint\n",
     "\n",
     "# Define the embedding model.\n",
-    "milvus_client = MilvusClient()\n",
     "service_context = ServiceContext.from_defaults(\n",
-    "    # LlamaIndex local location is same as default HF cache location.\n",
+    "    # LlamaIndex local: translates to the same location as default HF cache.\n",
     "    embed_model=\"local:BAAI/bge-large-en-v1.5\",\n",
     ")\n",
     "# Display what LlamaIndex exposes.\n",
@@ -665,13 +683,20 @@
     "EMBEDDING_DIM = 1024\n",
     "\n",
     "# Create a Milvus collection from the documents and embeddings.\n",
-    "vector_store = MilvusVectorStore(\n",
-    "    client=milvus_client, \n",
+    "vectorstore = MilvusVectorStore(\n",
+    "    uri=\"./milvus_llamaindex.db\",\n",
     "    dim=EMBEDDING_DIM,\n",
-    "    overwrite=True\n",
+    "    # Override LlamaIndex default values for Milvus.\n",
+    "    consistency_level=\"Eventually\",\n",
+    "    drop_old=True,\n",
+    "    index_params = {\n",
+    "        \"metric_type\": \"COSINE\",\n",
+    "        \"index_type\": \"AUTOINDEX\",\n",
+    "        \"params\": {},}\n",
     ")\n",
     "storage_context = StorageContext.from_defaults(\n",
-    "    vector_store=vector_store)\n",
+    "    vector_store=vectorstore\n",
+    ")\n",
     "\n",
     "print(f\"Start chunking, embedding, inserting...\")\n",
     "start_time = time.time()\n",
@@ -683,19 +708,18 @@
     ")\n",
     "end_time = time.time()\n",
     "print(f\"Created LlamaIndex collection from {len(docs[:1])} docs in {end_time - start_time:.2f} seconds\")\n",
-    "# Created LlamaIndex Milvus collection in 109.35 seconds"
+    "# Created LlamaIndex collection from 1 docs in 106.32 seconds"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Milvus vector database:\n",
       "stores_text: True\n",
       "is_embedding_query: True\n",
       "stores_node: True\n",
@@ -706,28 +730,25 @@
       "embedding_field: embedding\n",
       "doc_id_field: doc_id\n",
       "similarity_metric: IP\n",
-      "consistency_level: Strong\n",
-      "overwrite: True\n",
+      "consistency_level: Eventually\n",
+      "overwrite: False\n",
       "text_key: None\n",
       "output_fields: []\n",
       "index_config: {}\n"
      ]
     }
    ],
    "source": [
-    "import pprint\n",
-    "\n",
     "# Describe the collection.\n",
-    "print(\"Milvus vector database:\")\n",
-    "temp = vector_store.to_dict()\n",
-    "first_10_keys = list(temp.keys())[:15]\n",
-    "for key in first_10_keys:\n",
+    "temp = llamaindex.storage_context.vector_store.to_dict()\n",
+    "first_15_keys = list(temp.keys())[:15]\n",
+    "for key in first_15_keys:\n",
     "    print(f\"{key}: {temp[key]}\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -737,7 +758,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {