From c31390edfcdac16ed1d0fc893890519dae028eb2 Mon Sep 17 00:00:00 2001 From: Szymon Dudycz Date: Tue, 29 Oct 2024 17:22:31 +0100 Subject: [PATCH] Update llm app readmes (#7536) Co-authored-by: berkecanrizai <63911408+berkecanrizai@users.noreply.github.com> GitOrigin-RevId: 697024be46cecfa892e9a40fddbb4ebbed1f6fcd --- examples/pipelines/adaptive-rag/README.md | 101 +++++++++++++-- examples/pipelines/adaptive-rag/app.py | 7 +- examples/pipelines/adaptive-rag/app.yaml | 7 +- .../demo-document-indexing/README.md | 85 +++++++----- .../pipelines/demo-document-indexing/app.py | 7 +- .../pipelines/demo-document-indexing/app.yaml | 13 +- .../demo-question-answering/README.md | 118 ++++++++--------- .../pipelines/demo-question-answering/app.py | 7 +- .../demo-question-answering/app.yaml | 5 + .../pipelines/gpt_4o_multimodal_rag/README.md | 112 ++++++++++++---- .../pipelines/gpt_4o_multimodal_rag/app.py | 7 +- .../pipelines/gpt_4o_multimodal_rag/app.yaml | 20 +++ examples/pipelines/private-rag/README.md | 121 +++++++++++++----- examples/pipelines/private-rag/app.py | 7 +- examples/pipelines/private-rag/app.yaml | 5 + examples/pipelines/slides_ai_search/README.md | 40 +++--- examples/pipelines/slides_ai_search/app.py | 12 +- examples/pipelines/slides_ai_search/app.yaml | 12 ++ 18 files changed, 483 insertions(+), 203 deletions(-) diff --git a/examples/pipelines/adaptive-rag/README.md b/examples/pipelines/adaptive-rag/README.md index 40fce00..c773c6f 100644 --- a/examples/pipelines/adaptive-rag/README.md +++ b/examples/pipelines/adaptive-rag/README.md @@ -32,27 +32,106 @@ We also set `strict_prompt=True`. This adjusts the prompt with additional instru We encourage you to check the implementation of `answer_with_geometric_rag_strategy_from_index`. -## Modifying the code +## Customizing the pipeline -Under the main function, we define: -- input folders +The code can be modified by changing the `app.yaml` configuration file. To read more about YAML files used in Pathway templates, read [our guide](https://pathway.com/developers/user-guide/llm-xpack/yaml-templates). + +In the `app.yaml` file we define: +- input connectors - LLM - embedder - index -- host and port to run the app -- run options (caching, cache folder) +and any of these can be replaced or, if no longer needed, removed. For components that can be used check +Pathway [LLM xpack](https://pathway.com/developers/user-guide/llm-xpack/overview), or you can implement your own. + +You can also check our other templates - [demo-question-answering](https://github.com/pathwaycom/llm-app/tree/main/examples/pipelines/demo-question-answering), +[Multimodal RAG](https://github.com/pathwaycom/llm-app/tree/main/examples/pipelines/gpt_4o_multimodal_rag) or +[Private RAG](https://github.com/pathwaycom/llm-app/tree/main/examples/pipelines/private-rag). As all of these only differ +in the YAML configuration file, you can also use them as an inspiration for your custom pipeline. + +Here some examples of what can be modified. + +### LLM Model + +You can choose any of the GPT-3.5 Turbo, GPT-4, or GPT-4 Turbo models proposed by Open AI. +You can find the whole list on their [models page](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo). + +You simply need to change the `model` to the one you want to use: +```yaml +$llm: !pw.xpacks.llm.llms.OpenAIChat + model: "gpt-3.5-turbo" + retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy + max_retries: 6 + cache_strategy: !pw.udfs.DiskCache + temperature: 0.05 + capacity: 8 +``` + +The default model is `gpt-3.5-turbo` + +You can also use different provider, by using different class from [Pathway LLM xpack](https://pathway.com/developers/user-guide/llm-xpack/overview), +e.g. here is configuration for locally run Mistral model. + +```yaml +$llm: !pw.xpacks.llm.llms.LiteLLMChat + model: "ollama/mistral" + retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy + max_retries: 6 + cache_strategy: !pw.udfs.DiskCache + temperature: 0 + top_p: 1 + api_base: "http://localhost:11434" +``` + +### Webserver + +You can configure the host and the port of the webserver. +Here is the default configuration: +```yaml +host: "0.0.0.0" +port: 8000 +``` + +### Cache + +You can configure whether you want to enable cache, to avoid repeated API accesses, and where the cache is stored. +Default values: +```yaml +with_cache: True +cache_backend: !pw.persistence.Backend.filesystem + path: ".Cache" +``` + +### Data sources + +You can configure the data sources by changing `$sources` in `app.yaml`. +You can add as many data sources as you want. You can have several sources of the same kind, for instance, several local sources from different folders. +The sections below describe how to configure local, Google Drive and Sharepoint source, but you can use any input [connector](https://pathway.com/developers/user-guide/connecting-to-data/connectors) from Pathway package. + +By default, the app uses a local data source to read documents from the `data` folder. + +#### Local Data Source + +The local data source is configured by using map with tag `!pw.io.fs.read`. Then set `path` to denote the path to a folder with files to be indexed. + +#### Google Drive Data Source + +The Google Drive data source is enabled by using map with tag `!pw.io.gdrive.read`. The map must contain two main parameters: +- `object_id`, containing the ID of the folder that needs to be indexed. It can be found from the URL in the web interface, where it's the last part of the address. For example, the publicly available demo folder in Google Drive has the URL `https://drive.google.com/drive/folders/1cULDv2OaViJBmOfG5WB0oWcgayNrGtVs`. Consequently, the last part of this address is `1cULDv2OaViJBmOfG5WB0oWcgayNrGtVs`, hence this is the `object_id` you would need to specify. +- `service_user_credentials_file`, containing the path to the credentials files for the Google [service account](https://cloud.google.com/iam/docs/service-account-overview). To get more details on setting up the service account and getting credentials, you can also refer to [this tutorial](https://pathway.com/developers/user-guide/connectors/gdrive-connector/#setting-up-google-drive). + +Besides, to speed up the indexing process you may want to specify the `refresh_interval` parameter, denoted by an integer number of seconds. It corresponds to the frequency between two sequential folder scans. If unset, it defaults to 30 seconds. -By default, we used OpenAI `gpt-3.5-turbo`. However, as done in the showcase, it is possible to use any LLM, including locally deployed LLMs. +For the full list of the available parameters, please refer to the Google Drive connector [documentation](https://pathway.com/developers/api-docs/pathway-io/gdrive#pathway.io.gdrive.read). -If you are interested in building this app in a fully private & local setup, check out the [private RAG example](../private-rag/README.md) that uses `Mistral 7B` as the LLM with a local embedding model. +#### SharePoint Data Source -You can modify any of the used components by checking the options from: `from pathway.xpacks.llm import embedders, llms, parsers, splitters`. -It is also possible to easily create new components by extending the [`pw.UDF`](https://pathway.com/developers/user-guide/data-transformation/user-defined-functions) class and implementing the `__wrapped__` function. +This data source requires Scale or Enterprise [license key](https://pathway.com/pricing) - you can obtain free Scale key on [Pathway website](https://pathway.com/get-license). -To see the setup used in our work, check [the showcase](https://pathway.com/developers/templates/private-rag-ollama-mistral). +To use it, set the map tag to be `!pw.xpacks.connectors.sharepoint.read`, and then provide values of `url`, `tenant`, `client_id`, `cert_path`, `thumbprint` and `root_path`. To read about the meaning of these arguments, check the Sharepoint connector [documentation](https://pathway.com/developers/api-docs/pathway-xpacks-sharepoint/#pathway.xpacks.connectors.sharepoint.read). ## Running the app -To run the app you need to set your OpenAI API key, by setting the environmental variable `OPENAI_API_KEY` or creating an `.env` file in this directory with line `OPENAI_API_KEY=sk-...`. If you modify the code to use another LLM provider, you may need to set a relevant API key. +To run the app, depending on the configuration, you may need to set up environmntal variables with LLM provider keys. By default, this template uses OpenAI API, so to run it you need to set `OPENAI_API_KEY` environmental key or create an `.env` file in this directory with your key: `OPENAI_API_KEY=sk-...`. If you modify the code to use another LLM provider, you may need to set a relevant API key. ### With Docker In order to let the pipeline get updated with each change in local files, you need to mount the folder onto the docker. The following commands show how to do that. diff --git a/examples/pipelines/adaptive-rag/app.py b/examples/pipelines/adaptive-rag/app.py index 68f224c..1a4599a 100644 --- a/examples/pipelines/adaptive-rag/app.py +++ b/examples/pipelines/adaptive-rag/app.py @@ -26,12 +26,17 @@ class App(BaseModel): port: int = 8000 with_cache: bool = True + cache_backend: InstanceOf[pw.persistence.Backend] = ( + pw.persistence.Backend.filesystem("./Cache") + ) terminate_on_error: bool = False def run(self) -> None: server = QASummaryRestServer(self.host, self.port, self.question_answerer) server.run( - with_cache=self.with_cache, terminate_on_error=self.terminate_on_error + with_cache=self.with_cache, + cache_backend=self.cache_backend, + terminate_on_error=self.terminate_on_error, ) model_config = ConfigDict(extra="forbid") diff --git a/examples/pipelines/adaptive-rag/app.yaml b/examples/pipelines/adaptive-rag/app.yaml index 6fb2fab..3380770 100644 --- a/examples/pipelines/adaptive-rag/app.yaml +++ b/examples/pipelines/adaptive-rag/app.yaml @@ -63,9 +63,14 @@ question_answerer: !pw.xpacks.llm.question_answering.AdaptiveRAGQuestionAnswerer strict_prompt: true -# Change host and port by uncommenting these files +# Change host and port by uncommenting these lines # host: "0.0.0.0" # port: 8000 +# Cache configuration # with_cache: true +# cache_backend: !pw.persistence.Backend.filesystem +# path: ".Cache" + +# Set `terminate_on_error` to true if you want the program to terminate whenever any error is encountered # terminate_on_error: false diff --git a/examples/pipelines/demo-document-indexing/README.md b/examples/pipelines/demo-document-indexing/README.md index 5a7e0d4..9b251e2 100644 --- a/examples/pipelines/demo-document-indexing/README.md +++ b/examples/pipelines/demo-document-indexing/README.md @@ -39,69 +39,84 @@ Finally, the embeddings are indexed with the capabilities of Pathway's machine-l This folder contains several objects: - `main.py`, the pipeline code using Pathway and written in Python; -- `sources_configuration.yaml`, the file containing configuration stubs for the data sources. It needs to be customized if you want to use the Google Drive data source or to change the filesystem directories that will be indexed; +- `app.yaml`, the file containing configuration of the pipeline, like embedding model, sources, or the server address; - `requirements.txt`, the textfile denoting the pip dependencies for running this pipeline. It can be passed to `pip install -r ...` to install everything that is needed to launch the pipeline locally; - `Dockerfile`, the Docker configuration for running the pipeline in the container; - `docker-compose.yml`, the docker-compose configuration for running the pipeline along with the chat UI; - `.env`, a short environment variables configuration file where the OpenAI key must be stored; - `files-for-indexing/`, a folder with exemplary files that can be used for the test runs. -## OpenAPI Key Configuration +## Customizing the pipeline -This example relies on the usage of OpenAI API, which is crucial to perform the embedding part. +The code can be modified by changing the `app.yaml` configuration file. To read more about YAML files used in Pathway templates, read [our guide](https://pathway.com/developers/user-guide/llm-xpack/yaml-templates). -**You need to have a working OpenAI key stored in the environment variable OPENAI_API_KEY**. +In the `app.yaml` file we define: +- input connectors +- embedder +- index +and any of these can be replaced or, if no longer needed, removed. For components that can be used check +Pathway [LLM xpack](https://pathway.com/developers/user-guide/llm-xpack/overview), or you can implement your own. -Please configure your key in a `.env` file by providing it as follows: `OPENAI_API_KEY=sk-*******`. You can refer to the stub file `.env` in this repository, where you will need to paste your key instead of `sk-*******`. +Here some examples of what can be modified. -## Sources configuration +### Embedding Model -You can configure data sources used for indexing by editing the configuration file. Here we provide the template config `sources_configuration.yaml` for these purposes. It contains stubs for the three possible input types - please refer to the examples. +By default this template uses locally run model `mixedbread-ai/mxbai-embed-large-v1`. If you wish, you can replace this with any other model, by changing +`$embedder` in `app.yaml`. For example, to use OpenAI embedder, set: +```yaml +$embedder: !pw.xpacks.llm.embedders.OpenAIEmbedder + model: "text-embedding-ada-002" + cache_strategy: !pw.udfs.DiskCache +``` -Each section of the config requires the specification of a data source type along with its parameters, such as the filesystem path, credentials, etc. The available kinds are `local`, `gdrive`, and `sharepoint`. The sections below describe the essential parameters that need to be specified for each of those sources. +If you choose to use a provider, that requires API key, remember to set appropriate environmental values (you can also set them in `.env` file). -### Local Data Source +### Webserver -The local data source is configured by setting the `kind` parameter to `local`. +You can configure the host and the port of the webserver. +Here is the default configuration: +```yaml +host: "0.0.0.0" +port: 8000 +``` -The section `config` must contain the string parameter `path` denoting the path to a folder with files to be indexed. +### Cache -For the full list of the available configuration options, please refer to the filesystem connector [documentation](https://pathway.com/developers/api-docs/pathway-io/gdrive#pathway.io.fs.read). +You can configure whether you want to enable cache, to avoid repeated API accesses, and where the cache is stored. +Default values: +```yaml +with_cache: True +cache_backend: !pw.persistence.Backend.filesystem + path: ".Cache" +``` -### Google Drive Data Source +### Data sources -The Google Drive data source is enabled by setting the `kind` parameter to `gdrive`. +You can configure the data sources by changing `$sources` in `app.yaml`. +You can add as many data sources as you want. You can have several sources of the same kind, for instance, several local sources from different folders. +The sections below describe how to configure local, Google Drive and Sharepoint source, but you can use any input [connector](https://pathway.com/developers/user-guide/connecting-to-data/connectors) from Pathway package. -The section `config` must contain two main parameters: -- `object_id`, containing the ID of the folder that needs to be indexed. It can be found from the URL in the web interface, where it's the last part of the address. For example, the publicly available demo folder in Google Drive has the URL `https://drive.google.com/drive/folders/1cULDv2OaViJBmOfG5WB0oWcgayNrGtVs`. Consequently, the last part of this address is `1cULDv2OaViJBmOfG5WB0oWcgayNrGtVs`, hence this is the `object_id` you would need to specify. -- `service_user_credentials_file`, containing the path to the credentials files for the Google [service account](https://cloud.google.com/iam/docs/service-account-overview). To get more details on setting up the service account and getting credentials, you can also refer to [this tutorial](https://pathway.com/developers/user-guide/connectors/gdrive-connector/#setting-up-google-drive). +By default, the app uses a local data source to read documents from the `data` folder. -Besides, to speed up the indexing process you may want to specify the `refresh_interval` parameter, denoted by an integer number of seconds. It corresponds to the frequency between two sequential folder scans. If unset, it defaults to 30 seconds. +#### Local Data Source -For the full list of the available parameters, please refer to the Google Drive connector [documentation](https://pathway.com/developers/api-docs/pathway-io/gdrive#pathway.io.gdrive.read). +The local data source is configured by using map with tag `!pw.io.fs.read`. Then set `path` to denote the path to a folder with files to be indexed. -#### Using Provided Demo Folder +#### Google Drive Data Source -We provide a publicly available folder in Google Drive for demo purposes; you can access it [here](https://drive.google.com/drive/folders/1cULDv2OaViJBmOfG5WB0oWcgayNrGtVs). - -A default configuration for the Google Drive source in `sources_configuration.yaml` is available and connects to the folder: uncomment the corresponding part and replace `SERVICE_CREDENTIALS` with the path to the credentials file. - -Once connected, you can upload files to the folder, which will be indexed by Pathway. -Note that this folder is publicly available, and you cannot remove anything: **please be careful not to upload files containing any sensitive information**. +The Google Drive data source is enabled by using map with tag `!pw.io.gdrive.read`. The map must contain two main parameters: +- `object_id`, containing the ID of the folder that needs to be indexed. It can be found from the URL in the web interface, where it's the last part of the address. For example, the publicly available demo folder in Google Drive has the URL `https://drive.google.com/drive/folders/1cULDv2OaViJBmOfG5WB0oWcgayNrGtVs`. Consequently, the last part of this address is `1cULDv2OaViJBmOfG5WB0oWcgayNrGtVs`, hence this is the `object_id` you would need to specify. +- `service_user_credentials_file`, containing the path to the credentials files for the Google [service account](https://cloud.google.com/iam/docs/service-account-overview). To get more details on setting up the service account and getting credentials, you can also refer to [this tutorial](https://pathway.com/developers/user-guide/connectors/gdrive-connector/#setting-up-google-drive). -#### Using a Custom Folder +Besides, to speed up the indexing process you may want to specify the `refresh_interval` parameter, denoted by an integer number of seconds. It corresponds to the frequency between two sequential folder scans. If unset, it defaults to 30 seconds. -If you want to test the indexing pipeline with the data you wouldn't like to share with others, it's possible: with your service account, you won't have to share the folders you've created in your private Google Drive. +For the full list of the available parameters, please refer to the Google Drive connector [documentation](https://pathway.com/developers/api-docs/pathway-io/gdrive#pathway.io.gdrive.read). -Therefore, all you would need to do is the following: -- Create a service account and download the credentials that will be used; -- For running the demo, create your folder in Google Drive and don't share it. +#### SharePoint Data Source -### SharePoint Data Source +This data source requires Scale or Enterprise [license key](https://pathway.com/pricing) - you can obtain free Scale key on [Pathway website](https://pathway.com/get-license). -This data source is the part of commercial Pathway offering. You can try it online in one of the following demos: -- The real-time document indexing pipeline with similarity search, available on the [Hosted Pipelines](https://pathway.com/solutions/ai-pipelines) webpage; -- The chatbot answering questions about the uploaded files, available on [Streamlit](https://chat-realtime-sharepoint-gdrive.demo.pathway.com/). +To use it, set the map tag to be `!pw.xpacks.connectors.sharepoint.read`, and then provide values of `url`, `tenant`, `client_id`, `cert_path`, `thumbprint` and `root_path`. To read about the meaning of these arguments, check the Sharepoint connector [documentation](https://pathway.com/developers/api-docs/pathway-xpacks-sharepoint/#pathway.xpacks.connectors.sharepoint.read). ## Running the Example diff --git a/examples/pipelines/demo-document-indexing/app.py b/examples/pipelines/demo-document-indexing/app.py index cee2cfb..70ca7a0 100644 --- a/examples/pipelines/demo-document-indexing/app.py +++ b/examples/pipelines/demo-document-indexing/app.py @@ -26,12 +26,17 @@ class App(BaseModel): port: int = 8000 with_cache: bool = True + cache_backend: InstanceOf[pw.persistence.Backend] = ( + pw.persistence.Backend.filesystem("./Cache") + ) terminate_on_error: bool = False def run(self) -> None: server = DocumentStoreServer(self.host, self.port, self.document_store) server.run( - with_cache=self.with_cache, terminate_on_error=self.terminate_on_error + with_cache=self.with_cache, + cache_backend=self.cache_backend, + terminate_on_error=self.terminate_on_error, ) model_config = ConfigDict(extra="forbid") diff --git a/examples/pipelines/demo-document-indexing/app.yaml b/examples/pipelines/demo-document-indexing/app.yaml index faf40a1..3ac1ba3 100644 --- a/examples/pipelines/demo-document-indexing/app.yaml +++ b/examples/pipelines/demo-document-indexing/app.yaml @@ -24,14 +24,6 @@ $sources: # with_metadata: true # refresh_interval: 30 -$llm: !pw.xpacks.llm.llms.OpenAIChat - model: "gpt-3.5-turbo" - retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy - max_retries: 6 - cache_strategy: !pw.udfs.DiskCache - temperature: 0.05 - capacity: 8 - $embedding_model: "mixedbread-ai/mxbai-embed-large-v1" $embedder: !pw.xpacks.llm.embedders.SentenceTransformerEmbedder @@ -62,5 +54,10 @@ document_store: !pw.xpacks.llm.document_store.DocumentStore # host: "0.0.0.0" # port: 8000 +# Cache configuration # with_cache: true +# cache_backend: !pw.persistence.Backend.filesystem +# path: ".Cache" + +# Set `terminate_on_error` to true if you want the program to terminate whenever any error is encountered # terminate_on_error: false diff --git a/examples/pipelines/demo-question-answering/README.md b/examples/pipelines/demo-question-answering/README.md index e8a8637..5cf959c 100644 --- a/examples/pipelines/demo-question-answering/README.md +++ b/examples/pipelines/demo-question-answering/README.md @@ -31,7 +31,7 @@ Note: This app relies on [Pathway Vector store](https://pathway.com/developers/a # Table of content - [Summary of available endpoints](#Summary-of-available-endpoints) - [How it works](#How-it-works) -- [Configuring the app](#Configuration) +- [Customizing the pipeline](#Customizing-the-pipeline) - [How to run the project](#How-to-run-the-project) - [Using the app](#Query-the-documents) @@ -63,7 +63,7 @@ Finally, the embeddings are indexed with the capabilities of Pathway's machine-l This folder contains several objects: - `app.py`, the application code using Pathway and written in Python; -- `config.yaml`, the file containing configuration stubs for the data sources, the OpenAI LLM model, and the web server. It needs to be customized if you want to change the LLM model, use the Google Drive data source or change the filesystem directories that will be indexed; +- `app.yaml`, the file containing configuration of the pipeline, like LLM models, sources or server address; - `requirements.txt`, the dependencies for the pipeline. It can be passed to `pip install -r ...` to install everything that is needed to launch the pipeline locally; - `Dockerfile`, the Docker configuration for running the pipeline in the container; - `.env`, a short environment variables configuration file where the OpenAI key must be stored; @@ -88,53 +88,66 @@ Don't hesitate to take a look at our [documentation](https://pathway.com/develop ## OpenAI API Key Configuration -This example relies on the usage of OpenAI API, which is crucial to perform the embedding part. +Default LLM provider in this template is OpenAI, so, unless you change the configuration, you need to provide OpenAI API key. Please configure your key in a `.env` file by providing it as follows: `OPENAI_API_KEY=sk-*******`. You can refer to the stub file `.env` in this repository, where you will need to paste your key instead of `sk-*******`. -**You need to have a working OpenAI key stored in the environment variable OPENAI_API_KEY**. +## Customizing the pipeline -Please configure your key in a `.env` file by providing it as follows: `OPENAI_API_KEY=sk-*******`. You can refer to the stub file `.env` in this repository, where you will need to paste your key instead of `sk-*******`. +The code can be modified by changing the `app.yaml` configuration file. To read more about YAML files used in Pathway templates, read [our guide](https://pathway.com/developers/user-guide/llm-xpack/yaml-templates). -You can also set the key in the `app.py` while initializing the embedder and chat instances as follows; +In the `app.yaml` file we define: +- input connectors +- LLM +- embedder +- index +and any of these can be replaced or, if no longer needed, removed. For components that can be used check +Pathway [LLM xpack](https://pathway.com/developers/user-guide/llm-xpack/overview), or you can implement your own. + +You can also check our other templates - [demo-question-answering](https://github.com/pathwaycom/llm-app/tree/main/examples/pipelines/demo-question-answering), +[Multimodal RAG](https://github.com/pathwaycom/llm-app/tree/main/examples/pipelines/gpt_4o_multimodal_rag) or +[Private RAG](https://github.com/pathwaycom/llm-app/tree/main/examples/pipelines/private-rag). As all of these only differ +in the YAML configuration file, you can also use them as an inspiration for your custom pipeline. -```python -chat = llms.OpenAIChat(api_key='sk-...', ...) +Here some examples of what can be modified. -embedder = embedders.OpenAIEmbedder(api_key='sk-...', ...) -``` - -If you want to use another model, you should put the associated key here. - -## Configuration - -By modifying the `conf.yaml` file, you can configure the following options: -- the Open AI LLM model -- the webserver -- the cache options -- the data sources - -### Model +### LLM Model You can choose any of the GPT-3.5 Turbo, GPT-4, or GPT-4 Turbo models proposed by Open AI. You can find the whole list on their [models page](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo). -You simply need to change the model to the one you want to use: +You simply need to change the `model` to the one you want to use: ```yaml -llm_config: - model: "gpt-4-0613" +$llm: !pw.xpacks.llm.llms.OpenAIChat + model: "gpt-3.5-turbo" + retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy + max_retries: 6 + cache_strategy: !pw.udfs.DiskCache + temperature: 0.05 + capacity: 8 ``` The default model is `gpt-3.5-turbo` -Note that if you want to use different models, such as the ones provided by HuggingFace, you will need to change the `run` function in `app.py`. You can use [Pathway LLM xpack](https://pathway.com/developers/user-guide/llm-xpack/overview) to access the model of your choice. Don't forget to update your key. +You can also use different provider, by using different class from [Pathway LLM xpack](https://pathway.com/developers/user-guide/llm-xpack/overview), +e.g. here is configuration for locally run Mistral model. + +```yaml +$llm: !pw.xpacks.llm.llms.LiteLLMChat + model: "ollama/mistral" + retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy + max_retries: 6 + cache_strategy: !pw.udfs.DiskCache + temperature: 0 + top_p: 1 + api_base: "http://localhost:11434" +``` ### Webserver You can configure the host and the port of the webserver. Here is the default configuration: ```yaml -host_config: - host: "0.0.0.0" - port: 8000 +host: "0.0.0.0" +port: 8000 ``` ### Cache @@ -142,34 +155,26 @@ host_config: You can configure whether you want to enable cache, to avoid repeated API accesses, and where the cache is stored. Default values: ```yaml -cache_options: - with_cache: True - cache_folder: "./Cache" +with_cache: True +cache_backend: !pw.persistence.Backend.filesystem + path: ".Cache" ``` ### Data sources -You can configure the data sources in the `config.source` part of the `conf.yaml`. -You can add as many data sources as you want, but the demo supports only three kinds: `local`, `gdrive`, and `sharepoint`. You can have several sources of the same kind, for instance, several local sources from different folders. -The sections below describe the essential parameters that need to be specified for each of those sources. +You can configure the data sources by changing `$sources` in `app.yaml`. +You can add as many data sources as you want. You can have several sources of the same kind, for instance, several local sources from different folders. +The sections below describe how to configure local, Google Drive and Sharepoint source, but you can use any input [connector](https://pathway.com/developers/user-guide/connecting-to-data/connectors) from Pathway package. By default, the app uses a local data source to read documents from the `data` folder. -You can use other kind of data sources using the different [connectors](https://pathway.com/developers/user-guide/connecting-to-data/connectors) provided by Pathway. -To do so, you need to add them in `data_sources` in `app.py` - - #### Local Data Source -The local data source is configured by setting the `kind` parameter to `local`. - -The section `config` must contain the string parameter `path` denoting the path to a folder with files to be indexed. +The local data source is configured by using map with tag `!pw.io.fs.read`. Then set `path` to denote the path to a folder with files to be indexed. #### Google Drive Data Source -The Google Drive data source is enabled by setting the `kind` parameter to `gdrive`. - -The section `config` must contain two main parameters: +The Google Drive data source is enabled by using map with tag `!pw.io.gdrive.read`. The map must contain two main parameters: - `object_id`, containing the ID of the folder that needs to be indexed. It can be found from the URL in the web interface, where it's the last part of the address. For example, the publicly available demo folder in Google Drive has the URL `https://drive.google.com/drive/folders/1cULDv2OaViJBmOfG5WB0oWcgayNrGtVs`. Consequently, the last part of this address is `1cULDv2OaViJBmOfG5WB0oWcgayNrGtVs`, hence this is the `object_id` you would need to specify. - `service_user_credentials_file`, containing the path to the credentials files for the Google [service account](https://cloud.google.com/iam/docs/service-account-overview). To get more details on setting up the service account and getting credentials, you can also refer to [this tutorial](https://pathway.com/developers/user-guide/connectors/gdrive-connector/#setting-up-google-drive). @@ -177,28 +182,11 @@ Besides, to speed up the indexing process you may want to specify the `refresh_i For the full list of the available parameters, please refer to the Google Drive connector [documentation](https://pathway.com/developers/api-docs/pathway-io/gdrive#pathway.io.gdrive.read). -#### Using the Provided Demo Folder - -We provide a publicly available folder in Google Drive for demo purposes; you can access it [here](https://drive.google.com/drive/folders/1cULDv2OaViJBmOfG5WB0oWcgayNrGtVs). - -A default configuration for the Google Drive source in `config.yaml` is available and connects to the folder: uncomment the corresponding part and replace `SERVICE_CREDENTIALS` with the path to the credentials file. - -Once connected, you can upload files to the folder, which will be indexed by Pathway. -Note that this folder is publicly available, and you cannot remove anything: **please be careful not to upload files containing any sensitive information**. - -#### Using a Custom Folder - -If you want to test the indexing pipeline with the data you wouldn't like to share with others, it's possible: with your service account, you won't have to share the folders you've created in your private Google Drive. - -Therefore, all you would need to do is the following: -- Create a service account and download the credentials that will be used; -- For running the demo, create your folder in Google Drive and don't share it. - #### SharePoint Data Source -This data source is the part of commercial Pathway offering. You can try it online in one of the following demos: -- The real-time document indexing pipeline with similarity search, available on the [Hosted Pipelines](https://pathway.com/solutions/ai-pipelines) webpage; -- The chatbot answering questions about the uploaded files, available on [Streamlit](https://chat-realtime-sharepoint-gdrive.demo.pathway.com/). +This data source requires Scale or Enterprise [license key](https://pathway.com/pricing) - you can obtain free Scale key on [Pathway website](https://pathway.com/get-license). + +To use it, set the map tag to be `!pw.xpacks.connectors.sharepoint.read`, and then provide values of `url`, `tenant`, `client_id`, `cert_path`, `thumbprint` and `root_path`. To read about the meaning of these arguments, check the Sharepoint connector [documentation](https://pathway.com/developers/api-docs/pathway-xpacks-sharepoint/#pathway.xpacks.connectors.sharepoint.read). ## How to run the project diff --git a/examples/pipelines/demo-question-answering/app.py b/examples/pipelines/demo-question-answering/app.py index 68f224c..1a4599a 100644 --- a/examples/pipelines/demo-question-answering/app.py +++ b/examples/pipelines/demo-question-answering/app.py @@ -26,12 +26,17 @@ class App(BaseModel): port: int = 8000 with_cache: bool = True + cache_backend: InstanceOf[pw.persistence.Backend] = ( + pw.persistence.Backend.filesystem("./Cache") + ) terminate_on_error: bool = False def run(self) -> None: server = QASummaryRestServer(self.host, self.port, self.question_answerer) server.run( - with_cache=self.with_cache, terminate_on_error=self.terminate_on_error + with_cache=self.with_cache, + cache_backend=self.cache_backend, + terminate_on_error=self.terminate_on_error, ) model_config = ConfigDict(extra="forbid") diff --git a/examples/pipelines/demo-question-answering/app.yaml b/examples/pipelines/demo-question-answering/app.yaml index a293544..5cbe8a2 100644 --- a/examples/pipelines/demo-question-answering/app.yaml +++ b/examples/pipelines/demo-question-answering/app.yaml @@ -63,5 +63,10 @@ question_answerer: !pw.xpacks.llm.question_answering.BaseRAGQuestionAnswerer # host: "0.0.0.0" # port: 8000 +# Cache configuration # with_cache: true +# cache_backend: !pw.persistence.Backend.filesystem +# path: ".Cache" + +# Set `terminate_on_error` to true if you want the program to terminate whenever any error is encountered # terminate_on_error: false diff --git a/examples/pipelines/gpt_4o_multimodal_rag/README.md b/examples/pipelines/gpt_4o_multimodal_rag/README.md index 2061fcb..e0a8fdc 100644 --- a/examples/pipelines/gpt_4o_multimodal_rag/README.md +++ b/examples/pipelines/gpt_4o_multimodal_rag/README.md @@ -32,8 +32,8 @@ This includes the technical details to the steps to create a REST Endpoint to ru - [Overview](#Overview) - [Architecture](#Architecture) - [Pipeline Organization](#Pipeline-Organization) +- [Customizing the pipeline](#Customizing-the-pipeline) - [Running the app](#Running-the-app) -- [Modifying the code](#Modifying-the-code) - [Conclusion](#Conclusion) @@ -59,20 +59,94 @@ For more advanced RAG options, make sure to check out [rerankers](https://pathwa ## Pipeline Organization This folder contains several objects: -- `app.py`, the main application code using Pathway and written in Python. This script sets up the document processing pipeline, including data ingestion, LLM configuration, and server initialization. - - **Input Sources**: The `folder` variable specifies the local folders and files to be processed. This can be extended to include other sources like Google Drive or SharePoint. - - **LLM Configuration**: Utilizes `GPT-4o` for chat-based question answering, configured with retry and cache strategies. - - **Document Parsing and Embedding**: Uses `OpenParse` for parsing documents and `OpenAIEmbedder` for embedding text. - - **table_args**: Configures table parsing with algorithms like "llm", "unitable", "pymupdf", or "table-transformers". - - **parse_images**: Handles and processes images within PDFs, enabling work with tables, charts, and images. - - **Vector Store**: The `VectorStoreServer` handles indexing the documents and retrieving relevant chunks for answering questions. - - **Server Setup**: The `BaseRAGQuestionAnswerer` class sets up the REST endpoint for serving the RAG application. - - **Running Options**: The pipeline includes options for caching and parallel processing to optimize performance. +- `app.py`, the main application code using Pathway and written in Python. It reads configuration from `app.yaml`, and runs a server answering queries to the defined pipeline. +- `app.yaml`, YAML configuration file, that defines components of the pipeline. - `Dockerfile`, the Docker configuration for running the pipeline in a container. It includes instructions for installing dependencies and setting up the runtime environment. - `requirements.txt`, the dependencies for the pipeline. This file can be passed to `pip install -r requirements.txt` to install everything needed to launch the pipeline locally. - `.env`, a short environment variables configuration file where the OpenAI key must be stored. This file ensures secure handling of sensitive information. - `data/`, a folder with exemplary files that can be used for test runs. It includes sample financial documents to demonstrate the pipeline's capabilities. +## Customizing the pipeline + +The code can be modified by changing the `app.yaml` configuration file. To read more about YAML files used in Pathway templates, read [our guide](https://pathway.com/developers/user-guide/llm-xpack/yaml-templates). + +In the `app.yaml` file we define: +- input connectors +- LLM +- embedder +- index +and any of these can be replaced or, if no longer needed, removed. For components that can be used check +Pathway [LLM xpack](https://pathway.com/developers/user-guide/llm-xpack/overview), or you can implement your own. + +You can also check our other templates - [demo-question-answering](https://github.com/pathwaycom/llm-app/tree/main/examples/pipelines/demo-question-answering), +[Multimodal RAG](https://github.com/pathwaycom/llm-app/tree/main/examples/pipelines/gpt_4o_multimodal_rag) or +[Private RAG](https://github.com/pathwaycom/llm-app/tree/main/examples/pipelines/private-rag). As all of these only differ +in the YAML configuration file, you can also use them as an inspiration for your custom pipeline. + +Here some examples of what can be modified. + +### LLM Model + +This template by default uses two llm models - GPT-3.5 Turbo for answering queries and GPT-4o for parsing tables and images. + +You can replace GPT-3.5 Turbo with other Open AI models, like GPT-4, or GPT-4 Turbo. +You can find the whole list on their [models page](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo). + +You simply need to change the `model` to the one you want to use: +```yaml +$llm: !pw.xpacks.llm.llms.OpenAIChat + model: "gpt-3.5-turbo" + retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy + max_retries: 6 + cache_strategy: !pw.udfs.DiskCache + temperature: 0.05 + capacity: 8 +``` + +You can also use different provider, by using different class from [Pathway LLM xpack](https://pathway.com/developers/user-guide/llm-xpack/overview), +e.g. here is configuration for locally run Mistral model. + +```yaml +$llm: !pw.xpacks.llm.llms.LiteLLMChat + model: "ollama/mistral" + retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy + max_retries: 6 + cache_strategy: !pw.udfs.DiskCache + temperature: 0 + top_p: 1 + api_base: "http://localhost:11434" +``` + +You can also change LLM used for parsing in the same way, by changing `!parsing_llm` in `app.yaml`, just keep in mind to use a multimodal model. + +### Webserver + +You can configure the host and the port of the webserver. +Here is the default configuration: +```yaml +host: "0.0.0.0" +port: 8000 +``` + +### Cache + +You can configure whether you want to enable cache, to avoid repeated API accesses, and where the cache is stored. +Default values: +```yaml +with_cache: True +cache_backend: !pw.persistence.Backend.filesystem + path: ".Cache" +``` + +### Data sources + +You can configure the data sources by changing `$sources` in `app.yaml`. +You can add as many data sources as you want. You can have several sources of the same kind, for instance, several local sources from different folders. +The sections below describe how to configure local, Google Drive and Sharepoint source, but you can use any input [connector](https://pathway.com/developers/user-guide/connecting-to-data/connectors) from Pathway package. + +By default, the app uses a local data source to read documents from the `data` folder. + + ## Running the app @@ -103,8 +177,8 @@ You can omit the ```-v `pwd`/data:/app/data``` part if you are not using local f # Make sure you are in the right directory. cd examples/pipelines/gpt_4o_multimodal_rag/ -# Build the image in this folder, make sure you have the latest Pathway image -docker build --pull -t rag . +# Build the image in this folder +docker build -t rag . # Run the image, mount the `data` folder into image and expose the port `8000` docker run -v `pwd`/data:/app/data -p 8000:8000 rag @@ -162,20 +236,6 @@ curl -X 'POST' 'http://0.0.0.0:8000/v1/pw_ai_answer' -H 'accept: */*' -H ' Looking good! -## Modifying the pipeline - -This template is easily configurable in the `app.yaml` file. In there you can define: -- input sources -- LLM -- embedder -- index -- host and port to run the app - -You can modify any of the components by checking the options from the [Pathway LLM xpack](https://pathway.com/developers/api-docs/pathway-xpacks-llm). - -It is also possible to easily create new components by extending the [`pw.UDF`](https://pathway.com/developers/user-guide/data-transformation/user-defined-functions) class and implementing the `__wrapped__` function. - - ## Conclusion This showcase demonstrates setting up a powerful RAG pipeline with advanced table parsing capabilities, unlocking new finance use cases. While we've only scratched the surface, there's more to explore: diff --git a/examples/pipelines/gpt_4o_multimodal_rag/app.py b/examples/pipelines/gpt_4o_multimodal_rag/app.py index 623c38a..3550c8c 100644 --- a/examples/pipelines/gpt_4o_multimodal_rag/app.py +++ b/examples/pipelines/gpt_4o_multimodal_rag/app.py @@ -36,12 +36,17 @@ class App(BaseModel): port: int = 8000 with_cache: bool = True + cache_backend: InstanceOf[pw.persistence.Backend] = ( + pw.persistence.Backend.filesystem("./Cache") + ) terminate_on_error: bool = False def run(self) -> None: server = QASummaryRestServer(self.host, self.port, self.question_answerer) server.run( - with_cache=self.with_cache, terminate_on_error=self.terminate_on_error + with_cache=self.with_cache, + cache_backend=self.cache_backend, + terminate_on_error=self.terminate_on_error, ) model_config = ConfigDict(extra="forbid") diff --git a/examples/pipelines/gpt_4o_multimodal_rag/app.yaml b/examples/pipelines/gpt_4o_multimodal_rag/app.yaml index 42bed4a..a00f1c5 100644 --- a/examples/pipelines/gpt_4o_multimodal_rag/app.yaml +++ b/examples/pipelines/gpt_4o_multimodal_rag/app.yaml @@ -36,8 +36,22 @@ $embedder: !pw.xpacks.llm.embedders.OpenAIEmbedder model: "text-embedding-ada-002" cache_strategy: !pw.udfs.DiskCache +$parsing_llm: !pw.xpacks.llm.llms.OpenAIChat + model: "gpt-4o" + retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy + max_retries: 6 + cache_strategy: !pw.udfs.DiskCache + $parser: !pw.xpacks.llm.parsers.OpenParse cache_strategy: !pw.udfs.DiskCache + table_args: + parsing_algorithm: "llm" + llm: $parsing_llm + prompt: pw.xpacks.llm.prompts.DEFAULT_MD_TABLE_PARSE_PROMPT + image_args: + parsing_algorithm: "llm" + llm: $parsing_llm + prompt: pw.xpacks.llm.prompts.DEFAULT_IMAGE_PARSE_PROMPT $retriever_factory: !pw.stdlib.indexing.BruteForceKnnFactory reserved_space: 1000 @@ -60,5 +74,11 @@ question_answerer: !pw.xpacks.llm.question_answering.BaseRAGQuestionAnswerer # host: "0.0.0.0" # port: 8000 + +# Cache configuration # with_cache: true +# cache_backend: !pw.persistence.Backend.filesystem +# path: ".Cache" + +# Set `terminate_on_error` to true if you want the program to terminate whenever any error is encountered # terminate_on_error: false diff --git a/examples/pipelines/private-rag/README.md b/examples/pipelines/private-rag/README.md index fc1e64b..39838a6 100644 --- a/examples/pipelines/private-rag/README.md +++ b/examples/pipelines/private-rag/README.md @@ -45,20 +45,91 @@ The architecture consists of two connected technology bricks, which will run as - Pathway brings support for real-time data synchronization pipelines out of the box, and the possibility of secure private document handling with enterprise connectors for synchronizing Sharepoint and Google Drive incrementally. The Pathway service you'll run performs live document indexing pipeline, and will use Pathway’s built-in vector store. - The language model you use will be a Mistral 7B, which you will locally deploy as an Ollama service. This model was chosen for its performance and compact size. +## Customizing the pipeline -## Deploying and using a local LLM +The code can be modified by changing the `app.yaml` configuration file. To read more about YAML files used in Pathway templates, read [our guide](https://pathway.com/developers/user-guide/llm-xpack/yaml-templates). + +In the `app.yaml` file we define: +- input connectors +- LLM +- embedder +- index +and any of these can be replaced or, if no longer needed, removed. For components that can be used check +Pathway [LLM xpack](https://pathway.com/developers/user-guide/llm-xpack/overview), or you can implement your own. + +You can also check our other templates - [demo-question-answering](https://github.com/pathwaycom/llm-app/tree/main/examples/pipelines/demo-question-answering), +[Multimodal RAG](https://github.com/pathwaycom/llm-app/tree/main/examples/pipelines/gpt_4o_multimodal_rag) or +[Private RAG](https://github.com/pathwaycom/llm-app/tree/main/examples/pipelines/private-rag). As all of these only differ +in the YAML configuration file, you can also use them as an inspiration for your custom pipeline. + +Here some examples of what can be modified. + +### LLM Model + +This template is prepared to run by default locally. However, the pipeline is LLM model agnostic, so you can change them to use other locally deployed model, or even +use LLM model available through API calls. For discussion on models used in this template check [the dedicated Section](#deploying-and-using-a-local-LLM). + +### Webserver + +You can configure the host and the port of the webserver. +Here is the default configuration: +```yaml +host: "0.0.0.0" +port: 8000 +``` + +### Cache + +You can configure whether you want to enable cache, to avoid repeated API accesses, and where the cache is stored. +Default values: +```yaml +with_cache: True +cache_backend: !pw.persistence.Backend.filesystem + path: ".Cache" +``` + +### Data sources + +You can configure the data sources by changing `$sources` in `app.yaml`. +You can add as many data sources as you want. You can have several sources of the same kind, for instance, several local sources from different folders. +The sections below describe how to configure local, Google Drive and Sharepoint source, but you can use any input [connector](https://pathway.com/developers/user-guide/connecting-to-data/connectors) from Pathway package. + +By default, the app uses a local data source to read documents from the `data` folder. + +#### Local Data Source +The local data source is configured by using map with tag `!pw.io.fs.read`. Then set `path` to denote the path to a folder with files to be indexed. + +#### Google Drive Data Source + +The Google Drive data source is enabled by using map with tag `!pw.io.gdrive.read`. The map must contain two main parameters: +- `object_id`, containing the ID of the folder that needs to be indexed. It can be found from the URL in the web interface, where it's the last part of the address. For example, the publicly available demo folder in Google Drive has the URL `https://drive.google.com/drive/folders/1cULDv2OaViJBmOfG5WB0oWcgayNrGtVs`. Consequently, the last part of this address is `1cULDv2OaViJBmOfG5WB0oWcgayNrGtVs`, hence this is the `object_id` you would need to specify. +- `service_user_credentials_file`, containing the path to the credentials files for the Google [service account](https://cloud.google.com/iam/docs/service-account-overview). To get more details on setting up the service account and getting credentials, you can also refer to [this tutorial](https://pathway.com/developers/user-guide/connectors/gdrive-connector/#setting-up-google-drive). + +Besides, to speed up the indexing process you may want to specify the `refresh_interval` parameter, denoted by an integer number of seconds. It corresponds to the frequency between two sequential folder scans. If unset, it defaults to 30 seconds. + +For the full list of the available parameters, please refer to the Google Drive connector [documentation](https://pathway.com/developers/api-docs/pathway-io/gdrive#pathway.io.gdrive.read). + +#### SharePoint Data Source + +This data source requires Scale or Enterprise [license key](https://pathway.com/pricing) - you can obtain free Scale key on [Pathway website](https://pathway.com/get-license). + +To use it, set the map tag to be `!pw.xpacks.connectors.sharepoint.read`, and then provide values of `url`, `tenant`, `client_id`, `cert_path`, `thumbprint` and `root_path`. To read about the meaning of these arguments, check the Sharepoint connector [documentation](https://pathway.com/developers/api-docs/pathway-xpacks-sharepoint/#pathway.xpacks.connectors.sharepoint.read). + + +## Deploying and using a local LLM ### Embedding Model Selection You will use `pathway.xpacks.llm.embedders` module to load open-source embedding models from the HuggingFace model library. For this showcase, pick the `avsolatorio/GIST-small-Embedding-v0` model which has a dimension of 384 as it is compact and performed well in our tests. -```python -embedding_model = "avsolatorio/GIST-small-Embedding-v0" +```yaml +$embedding_model: "avsolatorio/GIST-small-Embedding-v0" -embedder = embedders.SentenceTransformerEmbedder( - embedding_model, call_kwargs={"show_progress_bar": False} -) +$embedder: !pw.xpacks.llm.embedders.SentenceTransformerEmbedder + model: $embedding_model + call_kwargs: + show_progress_bar: False ``` If you would like to use a higher-dimensional model, here are some possible alternatives you could use instead: @@ -92,19 +163,23 @@ curl -X POST http://localhost:11434/api/generate -d '{ Now you will initialize the LLM instance that will call the local model. -```python -model = LiteLLMChat( - model="ollama/mistral", - temperature=0, - top_p=1, - api_base="http://localhost:11434", # local deployment - format="json", # only available in Ollama local deploy, do not use in Mistral API -) +```yaml +$llm_model: "ollama/mistral" + +$llm: !pw.xpacks.llm.llms.LiteLLMChat + model: $llm_model + retry_strategy: !pw.udfs.ExponentialBackoffRetryStrategy + max_retries: 6 + cache_strategy: !pw.udfs.DiskCache + temperature: 0 + top_p: 1 + format: "json" # only available in Ollama local deploy, not usable in Mistral API + api_base: "http://localhost:11434" ``` ## Running the app -First, make sure your local LLM is up and running. By default, the pipeline tries to access the LLM at `http://localhost:11434`. You can change that by setting `LLM_API_BASE` environmental variable or creating `.env` file which sets its value. +First, make sure your local LLM is up and running. By default, the pipeline tries to access the LLM at `http://localhost:11434`. You can change that by setting `api_base` value in the app.yaml file. ### With Docker In order to let the pipeline get updated with each change in local files, you need to mount the folder onto the docker. The following commands show how to do that. @@ -141,22 +216,6 @@ curl -X 'POST' 'http://0.0.0.0:8000/v1/pw_ai_answer' -H 'accept: */*' -H ' > `December 21, 2015 [6]` -## Modifying the code - -Under the main function, we define: -- input folders -- LLM -- embedder -- index -- host and port to run the app -- run options (caching, cache folder) - -By default, we used locally deployed `Mistral 7B`. App is LLM agnostic and, it is possible to use any LLM. -You can modify any of the components by checking the options from the imported modules: `from pathway.xpacks.llm import embedders, llms, parsers, splitters`. - -It is also possible to easily create new components by extending the [`pw.UDF`](https://pathway.com/developers/user-guide/data-transformation/user-defined-functions) class and implementing the `__wrapped__` function. - - ## Conclusion: Now you have a fully private RAG set up with Pathway and Ollama. All your data remains safe on your system. Moreover, the set-up is optimized for speed, thanks to how Ollama runs the LLM, and how Pathway’s adaptive retrieval mechanism reduces token consumption while preserving the accuracy of the RAG. diff --git a/examples/pipelines/private-rag/app.py b/examples/pipelines/private-rag/app.py index 68f224c..1a4599a 100644 --- a/examples/pipelines/private-rag/app.py +++ b/examples/pipelines/private-rag/app.py @@ -26,12 +26,17 @@ class App(BaseModel): port: int = 8000 with_cache: bool = True + cache_backend: InstanceOf[pw.persistence.Backend] = ( + pw.persistence.Backend.filesystem("./Cache") + ) terminate_on_error: bool = False def run(self) -> None: server = QASummaryRestServer(self.host, self.port, self.question_answerer) server.run( - with_cache=self.with_cache, terminate_on_error=self.terminate_on_error + with_cache=self.with_cache, + cache_backend=self.cache_backend, + terminate_on_error=self.terminate_on_error, ) model_config = ConfigDict(extra="forbid") diff --git a/examples/pipelines/private-rag/app.yaml b/examples/pipelines/private-rag/app.yaml index f6aa160..8cf856a 100644 --- a/examples/pipelines/private-rag/app.yaml +++ b/examples/pipelines/private-rag/app.yaml @@ -74,5 +74,10 @@ question_answerer: !pw.xpacks.llm.question_answering.AdaptiveRAGQuestionAnswerer # host: "0.0.0.0" # port: 8000 +# Cache configuration # with_cache: true +# cache_backend: !pw.persistence.Backend.filesystem +# path: ".Cache" + +# Set `terminate_on_error` to true if you want the program to terminate whenever any error is encountered # terminate_on_error: false diff --git a/examples/pipelines/slides_ai_search/README.md b/examples/pipelines/slides_ai_search/README.md index a77e64c..0c053be 100644 --- a/examples/pipelines/slides_ai_search/README.md +++ b/examples/pipelines/slides_ai_search/README.md @@ -70,7 +70,7 @@ This demo consists of three parts: 1. **Data Sources**: * The application reads slide files (PPTX and PDF) from a specified directory. The directory is set to `./data/`in the `app.py` file. - * In the default app setup, the connected folder is a local file folder. You can add more folders and file sources, such as [Google Drive](https://pathway.com/developers/user-guide/connectors/gdrive-connector/#google-drive-connector) or [Sharepoint](https://pathway.com/developers/user-guide/connecting-to-data/connectors/#tutorials), by adding a line of code to the template. + * In the default app setup, the connected folder is a local file folder. You can add more folders and file sources, such as [Google Drive](https://pathway.com/developers/user-guide/connectors/gdrive-connector/#google-drive-connector) or [Sharepoint](https://pathway.com/developers/user-guide/connecting-to-data/connectors/#tutorials), by changing configuration in `app.yaml`. * More inputs can be added by configuring the `sources` list in the `app.yaml`. @@ -268,18 +268,18 @@ curl http://localhost:8000/v1/completions \ns \ }' ``` -### Set the LLM Instance in the app - -```python -chat = llms.OpenAIChat( - model="microsoft/Phi-3-vision-128k-instruct", - temperature=0.0, - capacity=1, - base_url="http://localhost:8000/v1", - api_key="ignore the key, not needed", - cache_strategy=DiskCache(), - retry_strategy=ExponentialBackoffRetryStrategy(max_retries=3), -) +### Set the LLM Instance in the configuration file + +```yaml +llm: !pw.xpack.llm.llms.OpenAIChat + model: "microsoft/Phi-3-vision-128k-instruct" + temperature: 0.0 + capacity: 1 + base_url: "http://localhost:8000/v1" + api_key: "ignore the key, not needed" + cache_strategy: !DiskCache + retry_strategy: !ExponentialBackoffRetryStrategy + max_retries: 3 ``` This will use your local Phi 3 vision model as the LLM for parsing the slides. @@ -292,16 +292,16 @@ From performance/computational-cost standpoint, `avsolatorio/GIST-Embedding-v0`, Here, we go with the `avsolatorio/GIST-small-Embedding-v0`. Note that, larger models may take longer to process the inputs. -We replace the `embedder` with the following embedding model in `app.py`: +We replace the `embedder` with the following embedding model in `app.yaml`: -```python -embedding_model = "avsolatorio/GIST-small-Embedding-v0" +```yaml +$embedding_model: "avsolatorio/GIST-small-Embedding-v0" -embedder = embedders.SentenceTransformerEmbedder( - embedding_model, call_kwargs={"show_progress_bar": False} -) +embedder: !pw.xpack.llms.embedders.SentenceTransformerEmbedder + model: $embedding_model + call_kwargs: + show_progress_bar: false ``` -Alternatively you can also specify this embedder configuration through `app.yaml`. ## Advanced variant: Running without Docker Running the whole demo without Docker is a bit tricky as there are three components. diff --git a/examples/pipelines/slides_ai_search/app.py b/examples/pipelines/slides_ai_search/app.py index c955b3d..f29bd64 100755 --- a/examples/pipelines/slides_ai_search/app.py +++ b/examples/pipelines/slides_ai_search/app.py @@ -27,6 +27,12 @@ class App(BaseModel): details_schema: FilePath | dict[str, Any] | None = None + with_cache: bool = True + cache_backend: InstanceOf[pw.persistence.Backend] = ( + pw.persistence.Backend.filesystem("./Cache") + ) + terminate_on_error: bool = False + def run(self) -> None: if self.details_schema is not None: detail_schema = get_model(self.details_schema) @@ -56,7 +62,11 @@ def run(self) -> None: app.build_server(host=self.host, port=self.port) - app.run_server(with_cache=True, terminate_on_error=False) + app.run_server( + with_cache=self.with_cache, + cache_backend=self.cache_backend, + terminate_on_error=self.terminate_on_error, + ) model_config = ConfigDict(extra="forbid") diff --git a/examples/pipelines/slides_ai_search/app.yaml b/examples/pipelines/slides_ai_search/app.yaml index 7182ab7..83d946a 100644 --- a/examples/pipelines/slides_ai_search/app.yaml +++ b/examples/pipelines/slides_ai_search/app.yaml @@ -71,3 +71,15 @@ details_schema: has_images: type: bool description: "Whether the slide contains photographs" + +# Change host and port by uncommenting these lines +# host: "0.0.0.0" +# port: 8000 + +# Cache configuration +# with_cache: true +# cache_backend: !pw.persistence.Backend.filesystem +# path: ".Cache" + +# Set `terminate_on_error` to true if you want the program to terminate whenever any error is encountered +# terminate_on_error: false