diff --git a/bootcamp/RAG/readthedocs_zilliz_langchain.ipynb b/bootcamp/RAG/readthedocs_zilliz_langchain.ipynb
index 7bd67b3e4..34202c9ca 100755
--- a/bootcamp/RAG/readthedocs_zilliz_langchain.ipynb
+++ b/bootcamp/RAG/readthedocs_zilliz_langchain.ipynb
@@ -34,7 +34,7 @@
    "outputs": [],
    "source": [
     "# For colab install these libraries in this order:\n",
-    "# !pip install pymilvus, langchain, torch, transformers, python-dotenv, accelerate\n",
+    "# !pip install pymilvus, langchain, torch, transformers, python-dotenv\n",
     "\n",
     "# Import common libraries.\n",
     "import time\n",
@@ -74,20 +74,17 @@
     "# !wget -r -A.html -P rtdocs --header=\"Accept-Charset: UTF-8\" $DOCS_PAGE"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "8a67e382",
-   "metadata": {},
-   "source": [
-    "## Start up a Zilliz free tier cluster."
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "fb844837",
    "metadata": {},
    "source": [
-    "Code in this notebook uses fully-managed Milvus on [Ziliz Cloud free trial](https://cloud.zilliz.com/login).  Choose the default \"Starter\" option when you provision > Create collection > Give it a name > Create cluster and collection.\n",
+    "## Start up a Zilliz free tier cluster.\n",
+    "\n",
+    "Code in this notebook uses fully-managed Milvus on [Ziliz Cloud free trial](https://cloud.zilliz.com/login).  \n",
+    "  1. Choose the default \"Starter\" option when you provision > Create collection > Give it a name > Create cluster and collection.  \n",
+    "  2. On the Cluster main page, copy your `API Key` and store it locally in a .env variable.  See note below how to do that.\n",
+    "  3. Also on the Cluster main page, copy the `Public Endpoint URI`.\n",
     "\n",
     "💡 Note: To keep your tokens private, best practice is to use an **env variable**.  See [how to save api key in env variable](https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety). <br>\n",
     "\n",
@@ -118,10 +115,9 @@
     "load_dotenv()\n",
     "TOKEN = os.getenv(\"ZILLIZ_API_KEY\")\n",
     "\n",
-    "# Connect to Zilliz cloud using enpoint URI and API key TOKEN.\n",
+    "# Connect to Zilliz cloud using endpoint URI and API key TOKEN.\n",
     "# TODO change this before checking into github.\n",
-    "# CLUSTER_ENDPOINT=\"https://in03-xxxx.api.gcp-us-west1.zillizcloud.com:443\"\n",
-    "\n",
+    "CLUSTER_ENDPOINT=\"https://in03-xxxx.api.gcp-us-west1.zillizcloud.com:443\"\n",
     "connections.connect(\n",
     "  alias='default',\n",
     "  #  Public endpoint obtained from Zilliz Cloud\n",
@@ -165,7 +161,7 @@
       ")\n",
       "model_name: BAAI/bge-base-en-v1.5\n",
       "EMBEDDING_LENGTH: 768\n",
-      "MAX_SEQ_LENGTH: 1536\n"
+      "MAX_SEQ_LENGTH: 512\n"
      ]
     }
    ],
@@ -189,9 +185,13 @@
     "# Get the model parameters and save for later.\n",
     "EMBEDDING_LENGTH = encoder.get_sentence_embedding_dimension()\n",
     "MAX_SEQ_LENGTH_IN_TOKENS = encoder.get_max_seq_length() \n",
-    "# Assume tokens are 3 characters long.\n",
-    "MAX_SEQ_LENGTH = MAX_SEQ_LENGTH_IN_TOKENS * 3\n",
-    "HF_EOS_TOKEN_LENGTH = 1 * 3\n",
+    "# # Assume tokens are 3 characters long.\n",
+    "# MAX_SEQ_LENGTH = MAX_SEQ_LENGTH_IN_TOKENS * 3\n",
+    "# HF_EOS_TOKEN_LENGTH = 1 * 3\n",
+    "# Test with 512 sequence length.\n",
+    "MAX_SEQ_LENGTH = MAX_SEQ_LENGTH_IN_TOKENS\n",
+    "HF_EOS_TOKEN_LENGTH = 1\n",
+    "\n",
     "\n",
     "# Inspect model parameters.\n",
     "print(f\"model_name: {model_name}\")\n",
@@ -383,23 +383,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "chunking time: 0.013296842575073242\n",
-      "docs: 8, split into: 8\n",
-      "split into chunks: 55, type: list of <class 'langchain.schema.document.Document'>\n",
-      "\n",
-      "Looking at a sample chunk...\n",
-      "Installation¶ Installing via pip¶ PyMilvus is in the Python Package Index. PyMilvus only support pyt\n",
-      "{'h1': 'Installation', 'h2': 'Installing via pip', 'source': 'rtdocs/pymilvus.readthedocs.io/en/latest/install.html'}\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from langchain.text_splitter import HTMLHeaderTextSplitter, RecursiveCharacterTextSplitter\n",
     "from bs4 import BeautifulSoup\n",
@@ -415,6 +401,7 @@
     "# Use the embedding model parameters.\n",
     "chunk_size = MAX_SEQ_LENGTH - HF_EOS_TOKEN_LENGTH\n",
     "chunk_overlap = np.round(chunk_size * 0.10, 0)\n",
+    "print(f\"chunk_size: {chunk_size}, chunk_overlap: {chunk_overlap}\")\n",
     "\n",
     "# Create an instance of the RecursiveCharacterTextSplitter\n",
     "child_splitter = RecursiveCharacterTextSplitter(\n",
@@ -550,11 +537,7 @@
     "        'h1': chunk.metadata['h1'][:50],\n",
     "        'h2': h2,\n",
     "    }\n",
-    "    chunk_list.append(chunk_dict)\n",
-    "\n",
-    "# # TODO - remove this before saving in github.\n",
-    "# for chunk in chunk_list[:1]:\n",
-    "#     print(chunk)"
+    "    chunk_list.append(chunk_dict)"
    ]
   },
   {
@@ -568,7 +551,7 @@
      "output_type": "stream",
      "text": [
       "Start inserting entities\n",
-      "Milvus insert time for 55 vectors: 0.19388794898986816 seconds\n",
+      "Milvus insert time for 156 vectors: 1.3558049201965332 seconds\n",
       "[{\"name\":\"_default\",\"collection_name\":\"MilvusDocs\",\"description\":\"\"}]\n"
      ]
     }
@@ -633,7 +616,7 @@
     "# Define a sample question about your data.\n",
     "QUESTION1 = \"What do the parameters for HNSW mean?\"\n",
     "QUESTION2 = \"What are good default values for HNSW parameters with 25K vectors dim 768?\"\n",
-    "QUESTION3 = \"Default distance metric used in AUTOINDEX?\"\n",
+    "QUESTION3 = \"What is the default AUTOINDEX distance metric in Milvus Client?\"\n",
     "QUERY = [QUESTION1, QUESTION2, QUESTION3]\n",
     "\n",
     "# Inspect the length of the query.\n",
@@ -678,7 +661,7 @@
      "output_type": "stream",
      "text": [
       "Loaded milvus collection into memory.\n",
-      "Milvus search time: 0.0818781852722168 sec\n",
+      "Milvus search time: 0.07079720497131348 sec\n",
       "type: <class 'pymilvus.client.abstract.SearchResult'>, count: 3\n"
      ]
     }
@@ -700,6 +683,9 @@
     "# Return top k results with AUTOINDEX.\n",
     "TOP_K = 3\n",
     "\n",
+    "# Define output fields to return.\n",
+    "OUTPUT_FIELDS = [\"h1\", \"h2\", \"source\", \"text\"]\n",
+    "\n",
     "# Run semantic vector search using your query and the vector database.\n",
     "start_time = time.time()\n",
     "results = mc.search(\n",
@@ -709,7 +695,7 @@
     "    param={},\n",
     "    # Milvus can utilize metadata to enhance the search experience in boolean expressions.\n",
     "    # expr=\"\",\n",
-    "    output_fields=[\"h1\", \"h2\", \"text\", \"source\"], \n",
+    "    output_fields=OUTPUT_FIELDS, \n",
     "    limit=TOP_K,\n",
     "    consistency_level=\"Eventually\"\n",
     "    )\n",
@@ -732,29 +718,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Length of context: 1533\n",
-      "1 1\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "# # TODO - remove printing before saving in github.\n",
-    "# for n, hits in enumerate(results):\n",
-    "#     print(f\"{n}th query result\")\n",
-    "#     for hit in hits:\n",
-    "#         print(hit)\n",
-    "\n",
-    "# Assemble the context and context metadata.\n",
-    "# context, context_metadata = _utils.assemble_retrieved_context(results, num_shot_answers=TOP_K)\n",
-    "context, context_metadata = _utils.assemble_retrieved_context(results, num_shot_answers=1)\n",
-    "print(len(context_metadata), len(context_metadata))"
+    "# Assemble just the top retrieved 1st context and context metadata.\n",
+    "# context, context_metadata = _utils.assemble_retrieved_context(results, METADATA_FIELDS, num_shot_answers=TOP_K)\n",
+    "\n",
+    "context, context_metadata = _utils.assemble_retrieved_context(results, OUTPUT_FIELDS, num_shot_answers=1)\n",
+    "print(f\"Using {len(context)} contexts and {len(context_metadata)} metadata stuffing in Prompt.\")\n",
+    "print(f\"Length context: {len(context[0])}\")\n",
+    "\n",
+    "# TODO - remove printing before saving in github.\n",
+    "# Loop throught each context and metadata and print.\n",
+    "for i in range(len(context)):\n",
+    "    print(f\"Context: {context[i][:200]}\")\n",
+    "    print(f\"Metadata: {context_metadata[i]}\")\n",
+    "    print()"
    ]
   },
   {
@@ -769,7 +749,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 16,
    "id": "3e7fa0b6",
    "metadata": {},
    "outputs": [
@@ -811,7 +791,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 17,
    "id": "a68e87b1",
    "metadata": {},
    "outputs": [
@@ -831,7 +811,7 @@
     "# NOW ASK THE SAME LLM THE SAME QUESTION USING THE RETRIEVED CONTEXT.\n",
     "QA_input = {\n",
     "    'question': SAMPLE_QUESTION,\n",
-    "    'context': context,\n",
+    "    'context': context[0],\n",
     "}\n",
     "\n",
     "nlp = pipeline('question-answering', \n",
@@ -862,7 +842,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -872,6 +852,8 @@
     "\n",
     "# Define the generation llm model to use.\n",
     "LLM_NAME = \"gpt-3.5-turbo-1106\"\n",
+    "TEMPERATURE = 0.1\n",
+    "RANDOM_SEED = 415\n",
     "\n",
     "# See how to save api key in env variable.\n",
     "# https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety\n",
@@ -883,7 +865,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -891,14 +873,10 @@
      "output_type": "stream",
      "text": [
       "Question: What do the parameters for HNSW mean?\n",
-      "('Answer: The parameters for HNSW are M, which denotes the maximum degree of '\n",
-      " 'nodes on each layer of the graph, and efConstruction, which specifies a '\n",
-      " 'search range during index construction. M has values ranging from 4 to 64, '\n",
-      " 'and efConstruction ranges from 8 to 512. The ef parameter is used during the '\n",
-      " 'search stage and should be larger than top_k, with values from top_k to '\n",
-      " '32768. These parameters help control the graph structure and search scope '\n",
-      " 'for HNSW indexing. (Source: Milvus support to create index to accelerate '\n",
-      " 'vector search - https://pymilvus.readthedocs.io/en/latest/param.html)')\n",
+      "('Answer: Answer: The parameter M represents the maximum degree of nodes on '\n",
+      " 'each layer of the graph in the HNSW index, while efConstruction (when '\n",
+      " 'building index) or ef (when searching targets) specifies the search range. M '\n",
+      " 'ranges from 4 to 64, and efConstruction ranges from 8 to 512.')\n",
       "\n",
       "\n"
      ]
@@ -908,23 +886,23 @@
     "# CAREFUL!! THIS COSTS MONEY!!\n",
     "# Generate response\n",
     "\n",
-    "prompt = f\"\"\"Answer the question using the context provided. Be succinct.\n",
-    "Echo in the answer the Grounding Sources.\n",
+    "PROMPT = f\"\"\"Answer the question using the context provided. Be concise.  Use the format below.\n",
+    "Answer: The answer to the question.\n",
     "Grounding sources: {context_metadata}\n",
     "\"\"\"\n",
     "\n",
     "# response = openai_client.chat.completions.create(\n",
+    "#     # response_format={\n",
+    "#     #     \"type\": \"json_object\", \n",
+    "#     #     \"schema\": Result.schema_json()\n",
+    "#     # },\n",
     "#     messages=[\n",
-    "#                 {\n",
-    "#             \"role\": \"system\",\n",
-    "#             \"content\": prompt,\n",
-    "#         },\n",
-    "#         {\n",
-    "#             \"role\": \"user\",\n",
-    "#             \"content\": f\"question: {SAMPLE_QUESTION}, context: {context}\",\n",
-    "#         }\n",
+    "#         {\"role\": \"system\", \"content\": PROMPT,},\n",
+    "#         {\"role\": \"user\", \"content\": f\"question: {SAMPLE_QUESTION}, context: {context[0]}\",}\n",
     "#     ],\n",
     "#     model=LLM_NAME,\n",
+    "#     temperature=TEMPERATURE,\n",
+    "#     seed=RANDOM_SEED,\n",
     "# )\n",
     "\n",
     "# Print the question and answer along with grounding sources and citations.\n",
@@ -938,6 +916,14 @@
     "\n",
     "# Question1: What do the parameters for HNSW mean?\n",
     "# Answer: Perfect!\n",
+    "# ('Answer: The parameters for HNSW are M, which limits the maximum degree of '\n",
+    "#  'nodes on each layer of the graph, and efConstruction, which specifies the '\n",
+    "#  'search range during index construction. Additionally, when searching '\n",
+    "#  'targets, the parameter ef is used to specify the search range. \\n'\n",
+    "#  '\\n'\n",
+    "#  \"Grounding sources: {'h1': 'Index', 'h2': 'Milvus support to create index to \"\n",
+    "#  \"accelerate vecto', 'source': \"\n",
+    "#  \"'https://pymilvus.readthedocs.io/en/latest/param.html'}\")\n",
     "# Best answer:  M: maximum degree of nodes in a layer of the graph. \n",
     "# efConstruction: number of nearest neighbors to consider when connecting nodes in the graph.\n",
     "# ef: number of nearest neighbors to consider when searching for similar vectors. \n",
@@ -974,22 +960,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Length context: 4229, Len metadata: 3\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import requests, json\n",
     "\n",
     "# Define the URL, headers, and data\n",
-    "url = \"https://controller.api.gcp-us-west1.zillizcloud.com/v1/pipelines/pipe-458714b66ffc8ff3ab1bf1/run\"\n",
+    "# TODO change this before checking into github.\n",
+    "url = \"https://controller.api.gcp-us-west1.zillizcloud.com/v1/pipelines/pipe-xxx/run\"\n",
     "headers = {\n",
     "    \"Content-Type\": \"application/json\",\n",
     "    \"Authorization\": f\"Bearer {TOKEN}\",\n",
@@ -1002,15 +981,17 @@
     "        \"limit\": TOP_K,\n",
     "        \"offset\": 0,\n",
     "        \"outputFields\": [\"chunk_text\", \"chunk_id\", \"doc_name\", \"source\"],\n",
-    "        \"filter\": \"chunk_id >= 0\"\n",
+    "        \"filter\": \"chunk_id >= 0 && doc_name == 'param.html'\",\n",
     "    }\n",
     "}\n",
     "\n",
     "# Send the POST request\n",
     "response = requests.post(url, headers=headers, json=data)\n",
-    "# print(type(response))\n",
-    "# # Print the response\n",
-    "# pprint.pprint(response.json())\n",
+    "print(type(response))\n",
+    "\n",
+    "# TODO: Remove this before saving in github.\n",
+    "# Print the response\n",
+    "pprint.pprint(response.json())\n",
     "\n",
     "# Assemble context from Pipeline retriever.\n",
     "pipeline_context = \"\"\n",
@@ -1031,7 +1012,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -1039,13 +1020,10 @@
      "output_type": "stream",
      "text": [
       "Question: What do the parameters for HNSW mean?\n",
-      "('Answer: The parameters for HNSW are used to control the structure of the '\n",
-      " 'multi-layer navigation graph. \"M\" determines the maximum degree of nodes on '\n",
-      " 'each layer, and \"efConstruction\" specifies the search range during index '\n",
-      " 'construction. When searching for targets, the parameter \"ef\" should be set '\n",
-      " 'and be larger than \"top_k\".\\n'\n",
-      " 'Sources: (https://pymilvus.readthedocs.io/en/latest/param.html), '\n",
-      " '(https://pymilvus.readthedocs.io/en/latest/api.html)')\n",
+      "('Answer: The parameters for HNSW are M, which is the maximum degree of the '\n",
+      " 'node, and efConstruction, which specifies the search range during index '\n",
+      " 'construction. The M parameter ranges from 4 to 64, and efConstruction ranges '\n",
+      " 'from 8 to 512. Source: https://pymilvus.readthedocs.io/en/latest/param.html')\n",
       "\n",
       "\n"
      ]
@@ -1053,24 +1031,15 @@
    ],
    "source": [
     "# CAREFUL!! THIS COSTS MONEY!!\n",
-    "# Generate response\n",
-    "prompt = f\"\"\"Answer the question using the context provided. Be succinct.\n",
-    "Echo in the answer the Grounding sources.\n",
-    "Grounding sources: {pipeline_context_metadata}\n",
-    "\"\"\"\n",
     "\n",
     "# response_pipeline = openai_client.chat.completions.create(\n",
     "#     messages=[\n",
-    "#                 {\n",
-    "#             \"role\": \"system\",\n",
-    "#             \"content\": prompt,\n",
-    "#         },\n",
-    "#         {\n",
-    "#             \"role\": \"user\",\n",
-    "#             \"content\": f\"question: {SAMPLE_QUESTION}, context: {pipeline_context}\",\n",
-    "#         }\n",
-    "#     ],\n",
+    "#         {\"role\": \"system\", \"content\": PROMPT,},\n",
+    "#         {\"role\": \"user\", \"content\": f\"question: {SAMPLE_QUESTION}, context: {pipeline_context[0]}\"}\n",
+    "#         ],\n",
     "#     model=LLM_NAME,\n",
+    "#     temperature=TEMPERATURE,\n",
+    "#     seed=RANDOM_SEED,\n",
     "# )\n",
     "\n",
     "# Print the question and answer along with grounding sources and citations.\n",
@@ -1082,7 +1051,15 @@
     "    print(\"\\n\")\n",
     "\n",
     "# Question1: What do the parameters for HNSW mean?\n",
-    "# Answer: Perfect!\n",
+    "# Answer: Good, but missing 'ef' even though given 3 contexts instead of just 1, which manual method used.\n",
+    "# ('Answer: Answer: The parameters for HNSW in Milvus refer to the configuration '\n",
+    "#  'options for the Hierarchical Navigable Small World index, which is used to '\n",
+    "#  'accelerate vector similarity search. The parameters include \"M\" for the '\n",
+    "#  'number of edges per point and \"efConstruction\" for the size of the dynamic '\n",
+    "#  'list for the nearest neighbors during index construction.\\n'\n",
+    "#  \"Grounding sources: [{'h1': 'Index', 'h2': 'Milvus support to create index to \"\n",
+    "#  \"accelerate vecto', 'source': \"\n",
+    "#  \"'https://pymilvus.readthedocs.io/en/latest/param.html'}]\")\n",
     "# Best answer:  M: maximum degree of nodes in a layer of the graph. \n",
     "# efConstruction: number of nearest neighbors to consider when connecting nodes in the graph.\n",
     "# ef: number of nearest neighbors to consider when searching for similar vectors. \n",
@@ -1098,7 +1075,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 24,
    "id": "d0e81e68",
    "metadata": {},
    "outputs": [],
@@ -1109,7 +1086,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 25,
    "id": "c777937e",
    "metadata": {},
    "outputs": [
diff --git a/bootcamp/Retrieval/imdb_milvus_client.ipynb b/bootcamp/Retrieval/imdb_milvus_client.ipynb
index b9a0df3dd..87627f202 100755
--- a/bootcamp/Retrieval/imdb_milvus_client.ipynb
+++ b/bootcamp/Retrieval/imdb_milvus_client.ipynb
@@ -27,21 +27,12 @@
     "# !pip install milvus, pymilvus, langchain, torch, transformers, python-dotenv\n",
     "\n",
     "# Import common libraries.\n",
-    "from typing import List\n",
     "import time\n",
     "import pandas as pd\n",
     "import numpy as np\n",
     "\n",
     "# Import custom functions for splitting and search.\n",
-    "import imdb_utilities"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8a67e382",
-   "metadata": {},
-   "source": [
-    "## Start up a local Milvus server."
+    "import imdb_utilities as _utils"
    ]
   },
   {
@@ -49,13 +40,17 @@
    "id": "fb844837",
    "metadata": {},
    "source": [
-    "Code in this notebook uses [Milvus client](https://milvus.io/docs/using_milvusclient.md) with [Milvus lite](https://milvus.io/docs/milvus_lite.md), which runs a local server.  ⛔️ Milvus lite is only meant for demos and local testing.\n",
-    "- pip install milvus pymilvus\n",
+    "## Start up a Zilliz free tier cluster.\n",
+    "\n",
+    "Code in this notebook uses fully-managed Milvus on [Ziliz Cloud free trial](https://cloud.zilliz.com/login).  \n",
+    "  1. Choose the default \"Starter\" option when you provision > Create collection > Give it a name > Create cluster and collection.  \n",
+    "  2. On the Cluster main page, copy your `API Key` and store it locally in a .env variable.  See note below how to do that.\n",
+    "  3. Also on the Cluster main page, copy the `Public Endpoint URI`.\n",
+    "\n",
+    "💡 Note: To keep your tokens private, best practice is to use an **env variable**.  See [how to save api key in env variable](https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety). <br>\n",
     "\n",
-    "💡 **For production purposes**, use a local Milvus docker, Milvus clusters, or fully-managed Milvus on Zilliz Cloud.\n",
-    "- [Local Milvus docker](https://milvus.io/docs/install_standalone-docker.md) requires local docker installed and running.\n",
-    "- [Milvus clusters](https://milvus.io/docs/install_cluster-milvusoperator.md) requires a K8s cluster up and running.\n",
-    "- [Ziliz Cloud free trial](https://cloud.zilliz.com/login) choose a \"Default\" option when you provision.\n"
+    "In Jupyter, you also need a .env file (in same dir as notebooks) containing lines like this:\n",
+    "- VARIABLE_NAME=value\n"
    ]
   },
   {
@@ -68,40 +63,32 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Milvus server startup time: 7.601609945297241 sec\n",
-      "v2.3.3-lite\n"
+      "Type of server: zilliz_cloud\n"
      ]
     }
    ],
    "source": [
-    "from milvus import default_server\n",
-    "from pymilvus import (\n",
-    "    connections, utility\n",
+    "# !pip install pymilvus #python sdk for milvus\n",
+    "from pymilvus import connections, utility\n",
+    "\n",
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()\n",
+    "TOKEN = os.getenv(\"ZILLIZ_API_KEY\")\n",
+    "\n",
+    "# Connect to Zilliz cloud using endpoint URI and API key TOKEN.\n",
+    "# TODO change this before checking into github.\n",
+    "CLUSTER_ENDPOINT=\"https://in03-xxxx.api.gcp-us-west1.zillizcloud.com:443\"\n",
+    "connections.connect(\n",
+    "  alias='default',\n",
+    "  #  Public endpoint obtained from Zilliz Cloud\n",
+    "  uri=CLUSTER_ENDPOINT,\n",
+    "  # API key or a colon-separated cluster username and password\n",
+    "  token=TOKEN,\n",
     ")\n",
     "\n",
-    "# Cleanup previous data and stop server in case it is still running.\n",
-    "default_server.stop()\n",
-    "default_server.cleanup()\n",
-    "\n",
-    "# Start a new milvus-lite local server.\n",
-    "start_time = time.time()\n",
-    "default_server.start()\n",
-    "\n",
-    "end_time = time.time()\n",
-    "print(f\"Milvus server startup time: {end_time - start_time} sec\")\n",
-    "# startup time: 5.6739208698272705\n",
-    "\n",
-    "# Add wait to avoid error message from trying to connect.\n",
-    "time.sleep(15)\n",
-    "\n",
-    "# Now you could connect with localhost and the given port.\n",
-    "# Port is defined by default_server.listen_port.\n",
-    "connections.connect(host='127.0.0.1', \n",
-    "                  port=default_server.listen_port,\n",
-    "                  show_startup_banner=True)\n",
-    "\n",
-    "# Check if the server is ready.\n",
-    "print(utility.get_server_version())"
+    "# Check if the server is ready and get colleciton name.\n",
+    "print(f\"Type of server: {utility.get_server_version()}\")"
    ]
   },
   {
@@ -110,7 +97,7 @@
    "metadata": {},
    "source": [
     "## Load the Embedding Model checkpoint and use it to create vector embeddings\n",
-    "**Embedding model:**  We will use the open-source [sentence transformers](https://www.sbert.net/docs/pretrained_models.html) hosted on HuggingFace to encode the movie review text.  We will save the embeddings to a pandas dataframe and then into the milvus database.\n",
+    "**Embedding model:**  We will use the open-source [sentence transformers](https://www.sbert.net/docs/pretrained_models.html) available on HuggingFace to encode the documentation text.  We will download the model from HuggingFace and run it locally. \n",
     "\n",
     "Two model parameters of note below:\n",
     "1. EMBEDDING_LENGTH refers to the dimensionality or length of the embedding vector. In this case, the embeddings generated for EACH token in the input text will have the SAME length = 768. This size of embedding is often associated with BERT-based models, where the embeddings are used for downstream tasks such as classification, question answering, or text generation. <br><br>\n",
@@ -157,9 +144,14 @@
     "print(encoder)\n",
     "\n",
     "# Get the model parameters and save for later.\n",
-    "MAX_SEQ_LENGTH = encoder.get_max_seq_length() \n",
-    "HF_EOS_TOKEN_LENGTH = 1\n",
     "EMBEDDING_LENGTH = encoder.get_sentence_embedding_dimension()\n",
+    "MAX_SEQ_LENGTH_IN_TOKENS = encoder.get_max_seq_length() \n",
+    "# # Assume tokens are 3 characters long.\n",
+    "# MAX_SEQ_LENGTH = MAX_SEQ_LENGTH_IN_TOKENS * 3\n",
+    "# HF_EOS_TOKEN_LENGTH = 1 * 3\n",
+    "# Test with 512 sequence length.\n",
+    "MAX_SEQ_LENGTH = MAX_SEQ_LENGTH_IN_TOKENS\n",
+    "HF_EOS_TOKEN_LENGTH = 1\n",
     "\n",
     "# Inspect model parameters.\n",
     "print(f\"model_name: {model_name}\")\n",
@@ -175,30 +167,19 @@
     "## Create a Milvus collection\n",
     "\n",
     "You can think of a collection in Milvus like a \"table\" in SQL databases.  The **collection** will contain the \n",
-    "- **Schema** (or no-schema Milvus Client).  \n",
+    "- **Schema** (or [no-schema Milvus client](https://milvus.io/docs/using_milvusclient.md)).  \n",
     "💡 You'll need the vector `EMBEDDING_LENGTH` parameter from your embedding model.\n",
+    "Typical values are:\n",
+    "   - 768 for sbert embedding models\n",
+    "   - 1536 for ada-002 OpenAI embedding models\n",
     "- **Vector index** for efficient vector search\n",
     "- **Vector distance metric** for measuring nearest neighbor vectors\n",
     "- **Consistency level**\n",
     "In Milvus, transactional consistency is possible; however, according to the [CAP theorem](https://en.wikipedia.org/wiki/CAP_theorem), some latency must be sacrificed. 💡 Searching movie reviews is not mission-critical, so [`eventually`](https://milvus.io/docs/consistency.md) consistent is fine here.\n",
     "\n",
-    "## Add a Vector Index\n",
-    "\n",
-    "The vector index determines the vector **search algorithm** used to find the closest vectors in your data to the query a user submits.  Most vector indexes use different sets of parameters depending on whether the database is:\n",
-    "- **inserting vectors** (creation mode) - vs - \n",
-    "- **searching vectors** (search mode) \n",
-    "\n",
-    "Scroll down the [docs page](https://milvus.io/docs/index.md) to see a table listing different vector indexes available on Milvus.  For example:\n",
-    "- FLAT - deterministic exhaustive search\n",
-    "- IVF_FLAT or IVF_SQ8 - Hash index (stochastic approximate search)\n",
-    "- HNSW - Graph index (stochastic approximate search)\n",
-    "\n",
-    "Besides a search algorithm, we also need to specify a **distance metric**, that is, a definition of what is considered \"close\" in vector space.  In the cell below, the [`HNSW`](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md) search index is chosen.  Its possible distance metrics are one of:\n",
-    "- L2 - L2-norm\n",
-    "- IP - Dot-product\n",
-    "- COSINE - Angular distance\n",
-    "\n",
-    "💡 Most use cases work better with normalized embeddings, in which case L2 is useless (every vector has length=1) and IP and COSINE are the same.  Only choose L2 if you plan to keep your embeddings unnormalized."
+    "Some supported [data types](https://milvus.io/docs/schema.md) for Milvus schemas are:\n",
+    "- INT64 - primary key\n",
+    "- FLOAT_VECTOR - embedings = list of `numpy.ndarray` of `numpy.float32` numbers\n"
    ]
   },
   {
@@ -213,27 +194,58 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "20a05d59",
    "metadata": {},
    "outputs": [],
    "source": [
+    "from pymilvus import MilvusClient\n",
+    "\n",
     "# Set the Milvus collection name.\n",
     "COLLECTION_NAME = # TODO (exercise): code here\n",
     "\n",
-    "# Use no-schema Milvus client (uses flexible json key:value format).\n",
+    "# Use no-schema Milvus client uses flexible json key:value format.\n",
     "# https://milvus.io/docs/using_milvusclient.md\n",
-    "mc = MilvusClient(uri=\"http://localhost\")\n",
+    "mc = MilvusClient(\n",
+    "    uri=CLUSTER_ENDPOINT,\n",
+    "    # API key or a colon-separated cluster username and password\n",
+    "    token=TOKEN)\n",
+    "\n",
     "mc.drop_collection(COLLECTION_NAME)\n",
     "mc.create_collection(COLLECTION_NAME, \n",
     "                     EMBEDDING_LENGTH, \n",
-    "                     #params=index_params # Omit params to use AUTOINDEX.\n",
     "                    )\n",
     "\n",
     "print(mc.describe_collection(COLLECTION_NAME))\n",
     "print(f\"Created collection: {COLLECTION_NAME}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Add a Vector Index\n",
+    "\n",
+    "The vector index determines the vector **search algorithm** used to find the closest vectors in your data to the query a user submits.  \n",
+    "\n",
+    "Most vector indexes use different sets of parameters depending on whether the database is:\n",
+    "- **inserting vectors** (creation mode) - vs - \n",
+    "- **searching vectors** (search mode) \n",
+    "\n",
+    "Scroll down the [docs page](https://milvus.io/docs/index.md) to see a table listing different vector indexes available on Milvus.  For example:\n",
+    "- FLAT - deterministic exhaustive search\n",
+    "- IVF_FLAT or IVF_SQ8 - Hash index (stochastic approximate search)\n",
+    "- HNSW - Graph index (stochastic approximate search)\n",
+    "- AUTOINDEX - Automatically determined based on OSS vs [Zilliz cloud](https://docs.zilliz.com/docs/autoindex-explained), type of GPU, size of data.\n",
+    "\n",
+    "Besides a search algorithm, we also need to specify a **distance metric**, that is, a definition of what is considered \"close\" in vector space.  In the cell below, the [`HNSW`](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md) search index is chosen (Milvus OSS default AUTOINDEX).  Its possible distance metrics are one of:\n",
+    "- L2 - L2-norm\n",
+    "- IP - Dot-product\n",
+    "- COSINE - Angular distance\n",
+    "\n",
+    "💡 Most use cases work better with normalized embeddings, in which case L2 is useless (every vector has length=1) and IP and COSINE are the same.  Only choose L2 if you plan to keep your embeddings unnormalized."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 5,
@@ -246,12 +258,13 @@
      "text": [
       "Embedding length: 768\n",
       "Created collection: movies\n",
-      "{'collection_name': 'movies', 'auto_id': True, 'num_shards': 1, 'description': '', 'fields': [{'field_id': 100, 'name': 'id', 'description': '', 'type': 5, 'params': {}, 'element_type': 0, 'auto_id': True, 'is_primary': True}, {'field_id': 101, 'name': 'vector', 'description': '', 'type': 101, 'params': {'dim': 768}, 'element_type': 0}], 'aliases': [], 'collection_id': 445754962278875466, 'consistency_level': 3, 'properties': {}, 'num_partitions': 1, 'enable_dynamic_field': True}\n"
+      "{'collection_name': 'movies', 'auto_id': True, 'num_shards': 1, 'description': '', 'fields': [{'field_id': 100, 'name': 'id', 'description': '', 'type': 5, 'params': {}, 'element_type': 0, 'auto_id': True, 'is_primary': True}, {'field_id': 101, 'name': 'vector', 'description': '', 'type': 101, 'params': {'dim': 768}, 'element_type': 0}], 'aliases': [], 'collection_id': 446268198612080098, 'consistency_level': 3, 'properties': {}, 'num_partitions': 1, 'enable_dynamic_field': True}\n"
      ]
     }
    ],
    "source": [
     "# Re-run create collection and add vector index specifying custom params.\n",
+    "from pymilvus import MilvusClient\n",
     "\n",
     "# For vector length, use the embedding length from the embedding model.\n",
     "print(f\"Embedding length: {EMBEDDING_LENGTH}\")\n",
@@ -277,10 +290,18 @@
     "    \"params\": INDEX_PARAMS\n",
     "    }\n",
     "\n",
+    "# Check if collection already exists, if so drop it.\n",
+    "has = utility.has_collection(COLLECTION_NAME)\n",
+    "if has:\n",
+    "    drop_result = utility.drop_collection(COLLECTION_NAME)\n",
+    "    print(f\"Successfully dropped collection: `{COLLECTION_NAME}`\")\n",
+    "\n",
     "# Below example uses no-schema Milvus client (flexible json key:value format).\n",
     "# https://milvus.io/docs/using_milvusclient.md\n",
-    "mc = MilvusClient(uri=\"http://localhost\")\n",
-    "mc.drop_collection(COLLECTION_NAME)\n",
+    "mc = MilvusClient(\n",
+    "    uri=CLUSTER_ENDPOINT,\n",
+    "    # API key or a colon-separated cluster username and password\n",
+    "    token=TOKEN)\n",
     "mc.create_collection(\n",
     "    COLLECTION_NAME, \n",
     "    EMBEDDING_LENGTH, \n",
@@ -329,7 +350,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "id": "6a381e57",
    "metadata": {},
    "outputs": [
@@ -416,10 +437,11 @@
     "df.columns = ['text', 'label_int']\n",
     "\n",
     "# Map numbers to text 'Postive' and 'Negative' for sentiment labels.\n",
-    "df[\"label\"] = df[\"label_int\"].apply(imdb_utilities.sentiment_score_to_name)\n",
+    "df[\"label\"] = df[\"label_int\"].apply(_utils.sentiment_score_to_name)\n",
     "\n",
     "# Split data into train/valid/test.\n",
-    "df, df_train, df_val, df_test = imdb_utilities.partition_dataset(df, smoke_test=False)\n",
+    "columns = ['movie_index', 'text', 'label_int', 'label']\n",
+    "df, df_train, df_val, df_test = _utils.partition_dataset(df, columns, smoke_test=False)\n",
     "print(f\"original df shape: {df.shape}\")\n",
     "print(f\"df_train shape: {df_train.shape}, df_val shape: {df_val.shape}, df_test shape: {df_test.shape}\")\n",
     "assert df_train.shape[0] + df_val.shape[0] + df_test.shape[0] == df.shape[0]\n",
@@ -432,7 +454,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "id": "654dd135",
    "metadata": {},
    "outputs": [
@@ -455,7 +477,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -479,79 +501,6 @@
     "- **Function** = Langchain's convenient `RecursiveCharacterTextSplitter` to split up long reviews recursively.\n"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "a53595fa",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
-    "\n",
-    "def recursive_splitter_wrapper(text, chunk_size):\n",
-    "\n",
-    "    # Default chunk overlap is 10% chunk_size.\n",
-    "    chunk_overlap = np.round(chunk_size * 0.10, 0)\n",
-    "\n",
-    "    # Use langchain's convenient recursive chunking method.\n",
-    "    text_splitter = RecursiveCharacterTextSplitter(\n",
-    "        chunk_size=chunk_size,\n",
-    "        chunk_overlap=chunk_overlap,\n",
-    "        length_function=len,\n",
-    "    )\n",
-    "    chunks: List[str] = text_splitter.split_text(text)\n",
-    "\n",
-    "    # Replace special characters with spaces.\n",
-    "    chunks = [text.replace(\"<br /><br />\", \" \") for text in chunks]\n",
-    "\n",
-    "    return chunks\n",
-    "\n",
-    "# Use recursive splitter to chunk text.\n",
-    "def imdb_chunk_text(batch_size, df, chunk_size):\n",
-    "\n",
-    "    batch = df.head(batch_size).copy()\n",
-    "    print(f\"chunk size: {chunk_size}\")\n",
-    "    print(f\"original shape: {batch.shape}\")\n",
-    "    \n",
-    "    start_time = time.time()\n",
-    "    # 1. Change primary key type to string.\n",
-    "    batch[\"movie_index\"] = batch[\"movie_index\"].apply(lambda x: str(x))\n",
-    "\n",
-    "    # 2. Truncate reviews to 512 characters.\n",
-    "    batch['chunk'] = batch['text'].apply(recursive_splitter_wrapper, chunk_size=chunk_size)\n",
-    "    # Explode the 'chunk' column to create new rows for each chunk.\n",
-    "    batch = batch.explode('chunk', ignore_index=True)\n",
-    "    print(f\"new shape: {batch.shape}\")\n",
-    "\n",
-    "    # 3. Add embeddings as new column in df.\n",
-    "    review_embeddings = torch.tensor(encoder.encode(batch['chunk']))\n",
-    "    # Normalize embeddings to unit length.\n",
-    "    review_embeddings = F.normalize(review_embeddings, p=2, dim=1)\n",
-    "    # Quick check if embeddings are normalized.\n",
-    "    norms = np.linalg.norm(review_embeddings, axis=1)\n",
-    "    assert np.allclose(norms, 1.0, atol=1e-5) == True\n",
-    "\n",
-    "    # 4. Convert embeddings to list of `numpy.ndarray`, each containing `numpy.float32` numbers.\n",
-    "    converted_values = list(map(np.float32, review_embeddings))\n",
-    "    batch['vector'] = converted_values\n",
-    "\n",
-    "    # 5. Reorder columns for conveneince, so index first, labels at end.\n",
-    "    new_order = [\"movie_index\", \"text\", \"chunk\", \"vector\", \"label_int\", \"label\"]\n",
-    "    batch = batch[new_order]\n",
-    "\n",
-    "    end_time = time.time()\n",
-    "    print(f\"Chunking + embedding time for {batch_size} docs: {end_time - start_time} sec\")\n",
-    "\n",
-    "    # Inspect the batch of data.\n",
-    "    display(batch.head())\n",
-    "    assert len(batch.chunk[0]) <= MAX_SEQ_LENGTH-1\n",
-    "    assert len(batch.vector[0]) == EMBEDDING_LENGTH\n",
-    "    print(f\"type embeddings: {type(batch.vector)} of {type(batch.vector[0])}\")\n",
-    "    print(f\"of numbers: {type(batch.vector[0][0])}\")\n",
-    "\n",
-    "    return batch"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "249e9c74",
@@ -564,7 +513,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 10,
    "id": "68917def",
    "metadata": {},
    "outputs": [
@@ -575,7 +524,7 @@
       "chunk size: 511\n",
       "original shape: (100, 4)\n",
       "new shape: (290, 5)\n",
-      "Chunking + embedding time for 100 docs: 8.375767946243286 sec\n"
+      "Chunking + embedding time for 100 docs: 7.053884267807007 sec\n"
      ]
     },
     {
@@ -613,7 +562,7 @@
        "      <td>80</td>\n",
        "      <td>The whole town of Blackstone is afraid, becaus...</td>\n",
        "      <td>The whole town of Blackstone is afraid, becaus...</td>\n",
-       "      <td>[-0.075508565, -0.022925325, 0.022277957, 0.03...</td>\n",
+       "      <td>[-0.075508595, -0.022925273, 0.022277843, 0.03...</td>\n",
        "      <td>1</td>\n",
        "      <td>Positive</td>\n",
        "    </tr>\n",
@@ -622,7 +571,7 @@
        "      <td>80</td>\n",
        "      <td>The whole town of Blackstone is afraid, becaus...</td>\n",
        "      <td>Mexican bandits (fighting the Gringos that too...</td>\n",
-       "      <td>[0.0059213955, 0.0042556957, -0.028471153, 0.0...</td>\n",
+       "      <td>[0.005921386, 0.0042556874, -0.028471047, 0.00...</td>\n",
        "      <td>1</td>\n",
        "      <td>Positive</td>\n",
        "    </tr>\n",
@@ -631,7 +580,7 @@
        "      <td>80</td>\n",
        "      <td>The whole town of Blackstone is afraid, becaus...</td>\n",
        "      <td>and definitely everybody is bad to the bone......</td>\n",
-       "      <td>[-0.004301766, -0.03188503, -0.0051136613, -0....</td>\n",
+       "      <td>[-0.004301741, -0.03188501, -0.005113593, -0.0...</td>\n",
        "      <td>1</td>\n",
        "      <td>Positive</td>\n",
        "    </tr>\n",
@@ -640,7 +589,7 @@
        "      <td>84</td>\n",
        "      <td>This Harold Lloyd short wasn't really much; no...</td>\n",
        "      <td>This Harold Lloyd short wasn't really much; no...</td>\n",
-       "      <td>[-0.007607854, -0.033714272, -0.0077492087, 0....</td>\n",
+       "      <td>[-0.0076078353, -0.033714227, -0.0077491296, 0...</td>\n",
        "      <td>0</td>\n",
        "      <td>Negative</td>\n",
        "    </tr>\n",
@@ -649,7 +598,7 @@
        "      <td>84</td>\n",
        "      <td>This Harold Lloyd short wasn't really much; no...</td>\n",
        "      <td>part was the last four or five minutes when th...</td>\n",
-       "      <td>[0.014139466, -0.04540589, 0.012334436, 0.0192...</td>\n",
+       "      <td>[0.014139436, -0.045405857, 0.012334452, 0.019...</td>\n",
        "      <td>0</td>\n",
        "      <td>Negative</td>\n",
        "    </tr>\n",
@@ -673,11 +622,11 @@
        "4  part was the last four or five minutes when th...   \n",
        "\n",
        "                                              vector  label_int     label  \n",
-       "0  [-0.075508565, -0.022925325, 0.022277957, 0.03...          1  Positive  \n",
-       "1  [0.0059213955, 0.0042556957, -0.028471153, 0.0...          1  Positive  \n",
-       "2  [-0.004301766, -0.03188503, -0.0051136613, -0....          1  Positive  \n",
-       "3  [-0.007607854, -0.033714272, -0.0077492087, 0....          0  Negative  \n",
-       "4  [0.014139466, -0.04540589, 0.012334436, 0.0192...          0  Negative  "
+       "0  [-0.075508595, -0.022925273, 0.022277843, 0.03...          1  Positive  \n",
+       "1  [0.005921386, 0.0042556874, -0.028471047, 0.00...          1  Positive  \n",
+       "2  [-0.004301741, -0.03188501, -0.005113593, -0.0...          1  Positive  \n",
+       "3  [-0.0076078353, -0.033714227, -0.0077491296, 0...          0  Negative  \n",
+       "4  [0.014139436, -0.045405857, 0.012334452, 0.019...          0  Negative  "
       ]
      },
      "metadata": {},
@@ -695,12 +644,20 @@
    "source": [
     "## Prepare df for insertion into Milvus index.\n",
     "\n",
-    "# Use the embedding model parameters to calculate chunk_size and overlap.\n",
+    "# Use the embedding model parameters.\n",
     "chunk_size = MAX_SEQ_LENGTH - HF_EOS_TOKEN_LENGTH\n",
+    "chunk_overlap = np.round(chunk_size * 0.10, 0)\n",
     "\n",
     "# Chunk a batch of data from pandas DataFrame and inspect it.\n",
     "BATCH_SIZE = 100\n",
-    "batch = imdb_chunk_text(BATCH_SIZE, df, chunk_size)\n",
+    "batch = _utils.imdb_chunk_text(encoder, BATCH_SIZE, df, chunk_size)\n",
+    "\n",
+    "# Inspect the batch of data.\n",
+    "display(batch.head())\n",
+    "assert len(batch.chunk[0]) <= MAX_SEQ_LENGTH-1\n",
+    "assert len(batch.vector[0]) == EMBEDDING_LENGTH\n",
+    "print(f\"type embeddings: {type(batch.vector)} of {type(batch.vector[0])}\")\n",
+    "print(f\"of numbers: {type(batch.vector[0][0])}\")\n",
     "\n",
     "# Chunking looks good, drop the original text column.\n",
     "batch.drop(columns=[\"text\"], inplace=True)"
@@ -720,7 +677,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "id": "1954c96d",
    "metadata": {},
    "outputs": [],
@@ -740,7 +697,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 12,
    "id": "470a93c7",
    "metadata": {},
    "outputs": [
@@ -751,120 +708,7 @@
       "chunk size: 511\n",
       "original shape: (100, 4)\n",
       "new shape: (290, 5)\n",
-      "Chunking + embedding time for 100 docs: 8.245778799057007 sec\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>movie_index</th>\n",
-       "      <th>text</th>\n",
-       "      <th>chunk</th>\n",
-       "      <th>vector</th>\n",
-       "      <th>label_int</th>\n",
-       "      <th>label</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>80</td>\n",
-       "      <td>The whole town of Blackstone is afraid, becaus...</td>\n",
-       "      <td>The whole town of Blackstone is afraid, becaus...</td>\n",
-       "      <td>[-0.075508565, -0.022925325, 0.022277957, 0.03...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>Positive</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>80</td>\n",
-       "      <td>The whole town of Blackstone is afraid, becaus...</td>\n",
-       "      <td>Mexican bandits (fighting the Gringos that too...</td>\n",
-       "      <td>[0.0059213955, 0.0042556957, -0.028471153, 0.0...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>Positive</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>80</td>\n",
-       "      <td>The whole town of Blackstone is afraid, becaus...</td>\n",
-       "      <td>and definitely everybody is bad to the bone......</td>\n",
-       "      <td>[-0.004301766, -0.03188503, -0.0051136613, -0....</td>\n",
-       "      <td>1</td>\n",
-       "      <td>Positive</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>84</td>\n",
-       "      <td>This Harold Lloyd short wasn't really much; no...</td>\n",
-       "      <td>This Harold Lloyd short wasn't really much; no...</td>\n",
-       "      <td>[-0.007607854, -0.033714272, -0.0077492087, 0....</td>\n",
-       "      <td>0</td>\n",
-       "      <td>Negative</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>84</td>\n",
-       "      <td>This Harold Lloyd short wasn't really much; no...</td>\n",
-       "      <td>part was the last four or five minutes when th...</td>\n",
-       "      <td>[0.014139466, -0.04540589, 0.012334436, 0.0192...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>Negative</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  movie_index                                               text  \\\n",
-       "0          80  The whole town of Blackstone is afraid, becaus...   \n",
-       "1          80  The whole town of Blackstone is afraid, becaus...   \n",
-       "2          80  The whole town of Blackstone is afraid, becaus...   \n",
-       "3          84  This Harold Lloyd short wasn't really much; no...   \n",
-       "4          84  This Harold Lloyd short wasn't really much; no...   \n",
-       "\n",
-       "                                               chunk  \\\n",
-       "0  The whole town of Blackstone is afraid, becaus...   \n",
-       "1  Mexican bandits (fighting the Gringos that too...   \n",
-       "2  and definitely everybody is bad to the bone......   \n",
-       "3  This Harold Lloyd short wasn't really much; no...   \n",
-       "4  part was the last four or five minutes when th...   \n",
-       "\n",
-       "                                              vector  label_int     label  \n",
-       "0  [-0.075508565, -0.022925325, 0.022277957, 0.03...          1  Positive  \n",
-       "1  [0.0059213955, 0.0042556957, -0.028471153, 0.0...          1  Positive  \n",
-       "2  [-0.004301766, -0.03188503, -0.0051136613, -0....          1  Positive  \n",
-       "3  [-0.007607854, -0.033714272, -0.0077492087, 0....          0  Negative  \n",
-       "4  [0.014139466, -0.04540589, 0.012334436, 0.0192...          0  Negative  "
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "type embeddings: <class 'pandas.core.series.Series'> of <class 'numpy.ndarray'>\n",
-      "of numbers: <class 'numpy.float32'>\n"
+      "Chunking + embedding time for 100 docs: 6.648707866668701 sec\n"
      ]
     }
    ],
@@ -876,7 +720,7 @@
     "\n",
     "# Chunk a batch of data from pandas DataFrame and inspect it.\n",
     "BATCH_SIZE = 100\n",
-    "batch = imdb_chunk_text(BATCH_SIZE, df, chunk_size)\n",
+    "batch = _utils.imdb_chunk_text(encoder, BATCH_SIZE, df, chunk_size)\n",
     "\n",
     "# Chunking looks good, drop the original text column.\n",
     "batch.drop(columns=[\"text\"], inplace=True)"
@@ -889,16 +733,22 @@
    "source": [
     "## Insert data into Milvus\n",
     "\n",
-    "Milvus and Milvus Lite support loading pandas dataframes directly.\n",
+    "For each original text chunk, we'll write the quadruplet (`vector, text, source, h1, h2`) into the database.\n",
     "\n",
-    "Milvus Client, however, requires conerting pandas df into a list of dictionaries first.\n",
+    "<div>\n",
+    "<img src=\"../../images/db_insert.png\" width=\"80%\"/>\n",
+    "</div>\n",
+    "\n",
+    "Milvus and Milvus Lite support loading data from:\n",
+    "- pandas dataframes \n",
+    "- list of dictionaries\n",
     "\n",
     "🤔 TODO: This would be a good place to demonstrate Milvus' scalability by using Ray together with Milvus to run batches in parallel. I'll do this in a future tutorial."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 13,
    "id": "b51ff139",
    "metadata": {},
    "outputs": [
@@ -913,21 +763,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 1/1 [00:00<00:00, 29.92it/s]"
+      "100%|██████████| 1/1 [00:02<00:00,  2.06s/it]\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Milvus insert time for 290 vectors: 0.03497195243835449 seconds\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
+      "Milvus insert time for 290 vectors: 2.066472053527832 seconds\n"
      ]
     }
    ],
@@ -968,7 +811,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 14,
    "id": "eb7bc132",
    "metadata": {},
    "outputs": [],
@@ -1003,7 +846,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 15,
    "id": "5e7f41f4",
    "metadata": {},
    "outputs": [
@@ -1037,7 +880,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 16,
    "id": "a6863a32",
    "metadata": {},
    "outputs": [
@@ -1093,7 +936,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 17,
    "id": "2ace8d04",
    "metadata": {},
    "outputs": [
@@ -1101,7 +944,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Search time: 0.003979921340942383 sec\n",
+      "Search time: 0.07567000389099121 sec\n",
       "type: <class 'list'>, count: 10\n"
      ]
     }
@@ -1124,7 +967,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 18,
    "id": "c5d98e28",
    "metadata": {},
    "outputs": [
@@ -1132,7 +975,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Search time: 0.0022897720336914062 sec\n",
+      "Milvus search time: 0.055058956146240234 sec\n",
       "type: <class 'list'>, count: 3\n"
      ]
     }
@@ -1142,6 +985,7 @@
     "\n",
     "# Return top k results with HNSW index.\n",
     "TOP_K = 3\n",
+    "OUTPUT_FIELDS=[\"movie_index\", \"chunk\", \"label\"]\n",
     "SEARCH_PARAMS = dict({\n",
     "    # Re-use index param for num_candidate_nearest_neighbors.\n",
     "    \"ef\": INDEX_PARAMS['efConstruction']\n",
@@ -1153,16 +997,16 @@
     "    COLLECTION_NAME,\n",
     "    data=query_embeddings, \n",
     "    search_params=SEARCH_PARAMS,\n",
-    "    output_fields=[\"movie_index\", \"chunk\", \"label\"], \n",
+    "    output_fields=OUTPUT_FIELDS, \n",
     "    limit=TOP_K,\n",
     "    consistency_level=\"Eventually\",\n",
     "    )\n",
     "\n",
     "elapsed_time = time.time() - start_time\n",
-    "print(f\"Search time: {elapsed_time} sec\")\n",
+    "print(f\"Milvus search time: {elapsed_time} sec\")\n",
     "\n",
     "# Inspect search result.\n",
-    "print(f\"type: {type(results)}, count: {len(results[0])}\")\n"
+    "print(f\"type: {type(results)}, count: {len(results[0])}\")"
    ]
   },
   {
@@ -1177,52 +1021,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
-   "id": "d3dfa33a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "## Results returned from MilvusClient are in the form list of lists of dicts.\n",
-    "\n",
-    "# Get the movie_indexes, review texts, and labels.\n",
-    "distances = []\n",
-    "texts = []\n",
-    "movie_indexes = []\n",
-    "labels = []\n",
-    "for result in results[0]:\n",
-    "    distances.append(result['distance'])\n",
-    "    texts.append(result['entity']['chunk'])\n",
-    "    movie_indexes.append(result['entity']['movie_index'])\n",
-    "    labels.append(result['entity']['label'])\n",
-    "\n",
-    "# Assemble all the results in a zipped list.\n",
-    "formatted_results = list(zip(distances, movie_indexes, texts, labels))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "id": "22d65363",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0: 0.541, 56, Negative, Dr. K(David H Hickey)has been trying to master a formula that would end all disease and handicaps, b\n",
-      "1: 0.54, 44, Positive, is not a horror movie, although it does contain some violent scenes, but is rather a comedy. A satir\n",
-      "2: 0.535, 67, Positive, a good movie with a real good story. The fact that there are so many other big stars who all also ha\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "# Print the results.\n",
-    "# k: distance, movie_index, label, review text\n",
+    "# Assemble just the top retrieved 1st context and context metadata.\n",
+    "results = _utils.client_assemble_retrieved_context(results)\n",
+    "# formatted_results = _utils.client_assemble_retrieved_context(results)\n",
+    "# print(f\"Length context: {len(context[0])}\")\n",
     "\n",
-    "i = 0\n",
-    "for row in formatted_results:\n",
-    "    print(f\"{i}: {np.round(row[0],3)}, {row[1]}, {row[3]}, {row[2][:100]}\")\n",
-    "    i += 1\n",
+    "# TODO - remove printing before saving in github.\n",
+    "# Loop throught each formatted result and print.\n",
+    "print(f\"#i: (distance, movie_id, label, chunk_text)\")\n",
+    "for i, result in enumerate(results):\n",
+    "    print(f\"#{i}: {result}\")\n",
     "\n",
     "#1:  2006, Serum, \n",
     "# 0: 0.541, 931, Negative, Dr. K(David H Hickey)has been trying to master a formula that would end all disease and handicaps, b\n",
@@ -1246,39 +1059,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "id": "922073f2",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0: 0.561, 67, Positive, a good movie with a real good story. The fact that there are so many other big stars who all also ha\n",
-      "1: 0.56, 13, Positive, the stories but helps Malkovich to provoke some thought. I'd say it is worth seeing and the best of \n",
-      "2: 0.549, 12, Positive, the mini-bio on Woody Strode here as a primer: http://imdb.com/name/nm0834754/bio  The film does a g\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "# Take as input a user question and conduct semantic vector search using the question.\n",
+    "# # Take as input a user question and conduct semantic vector search using the question.\n",
     "question = \"I'm a medical doctor, what movie should I watch?\"\n",
-    "new_question = \"I'm a medical doctor, suggest only good movies to watch?\"\n",
-    "new_results = \\\n",
-    "    imdb_utilities.mc_search_imdb([new_question],\n",
-    "                                   encoder,\n",
-    "                                   mc,\n",
-    "                                   SEARCH_PARAMS, 3, \n",
-    "                                   milvus_client=True,\n",
-    "                                   COLLECTION_NAME=COLLECTION_NAME,\n",
-    "                                   )\n",
-    "\n",
-    "# Print the results.\n",
-    "# k: distance, movie_index, label, review text\n",
-    "i = 0\n",
-    "for row in new_results:\n",
-    "    print(f\"{i}: {np.round(row[0],3)}, {row[1]}, {row[3]}, {row[2][:100]}\")\n",
-    "    i += 1\n",
+    "new_question = \"I'm a medical doctor, suggest only positive movies to watch?\"\n",
+    "print(f\"Question: {new_question}\")\n",
+    "\n",
+    "# Run the search and time it.\n",
+    "start_time = time.time()\n",
+    "new_results = _utils.mc_client_search_imdb(\n",
+    "    [new_question], encoder, mc, SEARCH_PARAMS, OUTPUT_FIELDS, TOP_K)\n",
+    "    \n",
+    "elapsed_time = time.time() - start_time\n",
+    "print(f\"Milvus search time: {elapsed_time} sec\")\n",
+    "\n",
+    "# TODO - remove printing before saving in github.\n",
+    "# Loop throught each formatted result and print.\n",
+    "print(f\"#i: (distance, movie_id, label, chunk_text)\")\n",
+    "for i, result in enumerate(new_results):\n",
+    "    print(f\"#{i}: {result}\")\n",
     "\n",
     "# As expected, new_question answers are slightly different!\n",
     "# 0: 0.562, 45719, Positive, the stories but helps Malkovich to provoke some thought.<br /><br />I'd say it is worth seeing and t\n",
@@ -1288,19 +1091,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "id": "d0e81e68",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Shut down and cleanup the milvus server.\n",
-    "default_server.stop()\n",
-    "default_server.cleanup()"
+    "# Drop collection\n",
+    "utility.drop_collection(COLLECTION_NAME)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 22,
    "id": "c777937e",
    "metadata": {},
    "outputs": [
@@ -1311,16 +1113,16 @@
       "Author: Christy Bergman\n",
       "\n",
       "Python implementation: CPython\n",
-      "Python version       : 3.10.12\n",
-      "IPython version      : 8.15.0\n",
+      "Python version       : 3.11.6\n",
+      "IPython version      : 8.18.1\n",
       "\n",
-      "torch       : 2.0.1\n",
-      "transformers: 4.34.1\n",
+      "torch       : 2.1.1\n",
+      "transformers: 4.35.2\n",
       "milvus      : 2.3.3\n",
-      "pymilvus    : 2.3.3\n",
+      "pymilvus    : 2.3.4\n",
       "langchain   : 0.0.322\n",
       "\n",
-      "conda environment: py310\n",
+      "conda environment: py311\n",
       "\n"
      ]
     }
@@ -1358,7 +1160,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.11.6"
   }
  },
  "nbformat": 4,

	movie_index	text	chunk	vector	label_int	label
0	80	The whole town of Blackstone is afraid, becaus...	The whole town of Blackstone is afraid, becaus...	[-0.075508565, -0.022925325, 0.022277957, 0.03...	1	Positive
1	80	The whole town of Blackstone is afraid, becaus...	Mexican bandits (fighting the Gringos that too...	[0.0059213955, 0.0042556957, -0.028471153, 0.0...	1	Positive
2	80	The whole town of Blackstone is afraid, becaus...	and definitely everybody is bad to the bone......	[-0.004301766, -0.03188503, -0.0051136613, -0....	1	Positive
3	84	This Harold Lloyd short wasn't really much; no...	This Harold Lloyd short wasn't really much; no...	[-0.007607854, -0.033714272, -0.0077492087, 0....	0	Negative
4	84	This Harold Lloyd short wasn't really much; no...	part was the last four or five minutes when th...	[0.014139466, -0.04540589, 0.012334436, 0.0192...	0	Negative