rag changes suggestions (#7655)

GitOrigin-RevId: e40a7d91f5e6ba6dad7e082f196d5c88ffff2dab
pathwaycom · Nov 20, 2024 · 976cc83 · 976cc83
1 parent d77c9d3
commit 976cc83
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 27 deletions.
diff --git a/docs/2.developers/4.user-guide/50.llm-xpack/.vectorstore_pipeline/article.py b/docs/2.developers/4.user-guide/50.llm-xpack/.vectorstore_pipeline/article.py
@@ -37,7 +37,7 @@
 # ---
 # ::
 #
-# Pathway Vectorstore enables building a document index on top of you documents without the
+# Pathway Vectorstore enables building a document index on top of your documents without the
 # complexity of ETL pipelines, managing different containers for storing, embedding, and serving.
 # It allows for easy to manage, always up-to-date, LLM pipelines accesible using a RESTful API
 # and with integrations to popular LLM toolkits such as Langchain and LlamaIndex.
@@ -62,7 +62,7 @@
 
 # %%
 # _MD_SHOW_!pip install pathway litellm
-# # !pip install unstructured[all-docs]
+# _MD_SHOW_ !pip install unstructured[all-docs]
 # _MD_SHOW_!mkdir -p sample_documents
 # _MD_SHOW_![ -f sample_documents/repo_readme.md ] || wget 'https://gist.githubusercontent.com/janchorowski/dd22a293f3d99d1b726eedc7d46d2fc0/raw/pathway_readme.md' -O 'sample_documents/repo_readme.md'
 
@@ -321,9 +321,6 @@
 # The vectorization pipeline supports pluggable parsers. If not provided, defaults to `UTF-8` parser. You can find available parsers [here](https://github.com/pathwaycom/pathway/blob/main/python/pathway/xpacks/llm/parsers.py).
 # An example parser that can read PDFs, Word documents and other formats is provided with `parsers.ParseUnstructured`:
 
-# %%
-# # !pip install unstructured[all-docs]  # if you will need to parse complex documents
-
 # %% [markdown]
 # ```python
 # from pathway.xpacks.llm import parsers

diff --git a/docs/2.developers/4.user-guide/50.llm-xpack/10.overview.md b/docs/2.developers/4.user-guide/50.llm-xpack/10.overview.md
@@ -80,9 +80,7 @@ model = HFPipelineChat(
 responses = query.select(result=model(prompt_chat_single_qa(pw.this.prompt)))
 ```
 
-You can check an example with HuggingFace running locally in the [llm-app repository](https://github.com/pathwaycom/llm-app/blob/main/examples/pipelines/local/app.py).
-
-Pathway also comes with wrappers for embedding models - [`OpenAIEmbedder`](/developers/api-docs/pathway-xpacks-llm/embedders#pathway.xpacks.llm.embedders.OpenAIEmbedder), [`LiteLLMEmbedder`](/developers/api-docs/pathway-xpacks-llm/embedders#pathway.xpacks.llm.embedders.LiteLLMEmbedder) and [`SentenceTransformersEmbedder`](/developers/api-docs/pathway-xpacks-llm/embedders#pathway.xpacks.llm.embedders.SentenceTransformerEmbedder). Each of them can be applied to a column of strings and returns a column with a list of floats - the embeddings.
+Pathway also comes with wrappers for embedding models - [`OpenAIEmbedder`](/developers/api-docs/pathway-xpacks-llm/embedders#pathway.xpacks.llm.embedders.OpenAIEmbedder), [`LiteLLMEmbedder`](/developers/api-docs/pathway-xpacks-llm/embedders#pathway.xpacks.llm.embedders.LiteLLMEmbedder) and [`SentenceTransformersEmbedder`](/developers/api-docs/pathway-xpacks-llm/embedders#pathway.xpacks.llm.embedders.SentenceTransformerEmbedder). Each of them can be applied to a column of strings and returns a column with numpy arrays - the embeddings.
 
 ```python
 embedder = OpenAIEmbedder(
@@ -98,25 +96,24 @@ Wrapper for OpenAI and LiteLLM, both for chat and embedding, are asynchronous, a
 - `capacity`, which sets the number of concurrent operations allowed,
 - `retry_strategy`, which sets the strategy for handling retries in case of failures,
 - `cache_strategy`, which defines the cache mechanism.
-<!-- TODO: do we have a link describing them further? -->
 
-These three parameters need to be set during the initialization of the wrapper.
+These three parameters need to be set during the initialization of the wrapper. You can read more about them in the [UDFs guide](/developers/user-guide/data-transformation/user-defined-functions#asyncexecutor).
 
 ```python
 model = OpenAIChat(
     capacity=5, # maximum concurrent operations is 5
     # in case of failure, retry 5 times, each time waiting twice as long before retrying
-    retry_strategy=pw.asynchronous.ExponentialBackoffRetryStrategy(max_retries=5, initial_delay=1000, backoff_factor=2),
+    retry_strategy=pw.udfs.ExponentialBackoffRetryStrategy(max_retries=5, initial_delay=1000, backoff_factor=2),
     # if PATHWAY_PERSISTENT_STORAGE is set, then it is used to cache the calls
-    cache_strategy=pw.asynchronous.DefaultCache(),
+    cache_strategy=pw.udfs.DefaultCache(),
     model="gpt-3.5-turbo",
     api_key=os.environ["OPENAI_API_KEY"], # Read OpenAI API key from environmental variables
 )
 responses = query.select(result=model(prompt_chat_single_qa(pw.this.prompt)))
 ```
 
 ### Creating a Pathway LLM pipeline
-You can now combine these wrappers to create a LLM pipeline using Pathway. To learn how to do this, read [our tutorial](/developers/user-guide/llm-xpack/llm-app-pathway).
+You can now combine these wrappers to create an LLM pipeline using Pathway. To learn how to do this, read [our tutorial](/developers/user-guide/llm-xpack/llm-app-pathway).
 
 ## Preparing documents for LLMs
 
@@ -179,7 +176,7 @@ Vector Store offer integrations with both LlamaIndex and LangChain. These allow
 
 ## Rerankers
 
-Rerankers allow you to evaluate whether a document is relevant to the question asked. A typical application of rerankers is to implement a two-stage retrieval. First, some number of documents from a vector store, purposely more than you wish to embed in the query as a context. Then all these documents are ranked with a reranker, and then only the best of them are left, while the rest are discarded.
+Rerankers allow you to evaluate whether a document is relevant to the question asked. A typical application of rerankers is to implement a two-stage retrieval. First, some number of documents is retrieved from a vector store, purposely more than you wish to embed in the query as a context. Then all these documents are ranked with a reranker, and then only the best of them are left, while the rest are discarded.
 
 Pathway offers three rerankers:
 - [`LLMReranker`](/developers/api-docs/pathway-xpacks-llm/rerankers#pathway.xpacks.llm.rerankers.LLMReranker) asks an LLN chat of your choice to rank the relevance of a document against a query on a scale from 1 to 5,

diff --git a/docs/2.developers/4.user-guide/50.llm-xpack/20.llm-app-pathway.md b/docs/2.developers/4.user-guide/50.llm-xpack/20.llm-app-pathway.md
@@ -175,7 +175,7 @@ wget 'https://public-pathway-releases.s3.eu-central-1.amazonaws.com/data/pathway
 ```
 
 
-For each document and each query, embeddings are computed using a pre-trained language model. These embeddings are numerical representations of the documents: they are used to find the documents that are most relevant to each query. Pathway offers API integration with premier LLM service providers, including but not limited to OpenAI and HuggingFace. You can import the model interface for the provider of your choice and specify the API key and the model ID to call. By default, the embedder is `text-embedding-ada-002` from OpenAI, which returns vectors of dimension `1536`. Please check out [openai-model-endpoint-compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility) for more information on the available models.
+For each document and each query, embeddings are computed using a pre-trained language model. These embeddings are numerical representations of the documents: they are used to find the documents that are most relevant to each query. Pathway offers API integration with premier LLM service providers, including but not limited to OpenAI and HuggingFace. You can import the model interface for the provider of your choice and specify the API key and the model ID to call. By default, the embedder is `text-embedding-ada-002` from OpenAI, which returns vectors of dimension `1536`. Please check out [openai-model-endpoint-compatibility](https://platform.openai.com/docs/models#model-endpoint-compatibility) for more information on the available models.
 
 To implement this, remove the LLM query at the end of the program you obtained in the last section: you first need to retrieve context before querying the LLM. You should be left with the following code:
 ```python [app.py]

diff --git a/python/pathway/xpacks/llm/rerankers.py b/python/pathway/xpacks/llm/rerankers.py
@@ -21,26 +21,30 @@ def rerank_topk_filter(
         - docs: A column with lists of  documents or chunks to rank. Each row in this column
             is filtered separately.
         - scores: A column with lists of re-ranking scores for chunks.
-        - k: Number of documents to keep after filtering.
+        - k: The number of documents to keep after filtering.
 
     >>> import pathway as pw
     >>> from pathway.xpacks.llm import rerankers
     >>> import pandas as pd
-    >>> retrieved_docs = [{"text": "Something"}, {"text": "Something else"}, {"text": "Pathway"}]
+    >>> retrieved_docs = [
+    ...     {"text": "Something"},
+    ...     {"text": "Something else"},
+    ...     {"text": "Pathway"},
+    ... ]
     >>> df = pd.DataFrame({"docs": retrieved_docs, "reranker_scores": [1.0, 3.0, 2.0]})
     >>> table = pw.debug.table_from_pandas(df)
     >>> docs_table = table.reduce(
-    ... doc_list=pw.reducers.tuple(pw.this.docs),
-    ... score_list=pw.reducers.tuple(pw.this.reranker_scores),
+    ...     doc_list=pw.reducers.tuple(pw.this.docs),
+    ...     score_list=pw.reducers.tuple(pw.this.reranker_scores),
     ... )
     >>> docs_table = docs_table.select(
-    ... docs_scores_tuple=rerankers.rerank_topk_filter(
-    ... pw.this.doc_list, pw.this.score_list, 2
-    ... )
+    ...     docs_scores_tuple=rerankers.rerank_topk_filter(
+    ...         pw.this.doc_list, pw.this.score_list, 2
+    ...     )
     ... )
     >>> docs_table = docs_table.select(
-    ... doc_list=pw.this.docs_scores_tuple[0],
-    ... score_list=pw.this.docs_scores_tuple[1],
+    ...     doc_list=pw.this.docs_scores_tuple[0],
+    ...     score_list=pw.this.docs_scores_tuple[1],
     ... )
     >>> pw.debug.compute_and_print(docs_table, include_id=False)
     doc_list                                                            | score_list
@@ -79,7 +83,7 @@ class LLMReranker(pw.UDF):
     >>> df = pd.DataFrame({"docs": docs, "prompt": "query text"})
     >>> table = pw.debug.table_from_pandas(df)
     >>> table += table.select(
-    ... reranker_scores=reranker(pw.this.docs["text"], pw.this.prompt)
+    ...     reranker_scores=reranker(pw.this.docs["text"], pw.this.prompt)
     ... )
     >>> table
     <pathway.Table schema={'docs': <class 'pathway.internals.json.Json'>, 'prompt': <class 'str'>, 'reranker_scores': <class 'float'>}>
@@ -204,7 +208,7 @@ class CrossEncoderReranker(pw.UDF):
     >>> df = pd.DataFrame({"docs": docs, "prompt": "query text"})
     >>> table = pw.debug.table_from_pandas(df)
     >>> table += table.select(
-    ... reranker_scores=reranker(pw.this.docs["text"], pw.this.prompt)
+    ...     reranker_scores=reranker(pw.this.docs["text"], pw.this.prompt)
     ... )
     >>> table
     <pathway.Table schema={'docs': <class 'pathway.internals.json.Json'>, 'prompt': <class 'str'>, 'reranker_scores': <class 'float'>}>
@@ -269,7 +273,7 @@ class EncoderReranker(pw.UDF):
     >>> df = pd.DataFrame({"docs": docs, "prompt": "query text"})
     >>> table = pw.debug.table_from_pandas(df)
     >>> table += table.select(
-    ... reranker_scores=reranker(pw.this.docs["text"], pw.this.prompt)
+    ...     reranker_scores=reranker(pw.this.docs["text"], pw.this.prompt)
     ... )
     >>> table
     <pathway.Table schema={'docs': <class 'pathway.internals.json.Json'>, 'prompt': <class 'str'>, 'reranker_scores': <class 'float'>}>