From b3696aef73dcb4daf7fa7e190d9c1f5527a35aa2 Mon Sep 17 00:00:00 2001 From: Michele Riva Date: Thu, 30 Jan 2025 16:31:38 +0100 Subject: [PATCH] docs: adds configuration docs --- .../docs/getting-started/configuration.mdx | 78 +++++++++++++------ .../docs/getting-started/javascript-hooks.mdx | 6 +- 2 files changed, 59 insertions(+), 25 deletions(-) diff --git a/docs/content/docs/getting-started/configuration.mdx b/docs/content/docs/getting-started/configuration.mdx index 2a47f53..3dd08a2 100644 --- a/docs/content/docs/getting-started/configuration.mdx +++ b/docs/content/docs/getting-started/configuration.mdx @@ -27,9 +27,9 @@ writer_side: # The maximum number of embeddings that can be stored in the queue # before the writer starts to be blocked # NB: the elements are in memory, so be careful with this value - embedding_queue_limit: 50 + embedding_queue_limit: 50000 # The number of the document insertions after the write side will commit the changes - insert_batch_commit_size: 5000 + insert_batch_commit_size: 50000 # The default embedding model used to calculate the embeddings # if not specified in the collection creation default_embedding_model: MultilingualE5Small @@ -39,7 +39,7 @@ reader_side: config: data_dir: ./.data/reader # The number of the write operation after the read side will commit the changes - insert_batch_commit_size: 300 + insert_batch_commit_size: 50000 ai_server: scheme: http @@ -50,27 +50,61 @@ ai_server: total_threads: 12 embeddings: - default_model_group: small + default_model_group: multilingual dynamically_load_models: false execution_providers: - CUDAExecutionProvider - CPUExecutionProvider total_threads: 8 - LLMs: - google_query_translator: - id: "Qwen/Qwen2.5-3B-Instruct" - tensor_parallel_size: 1 - use_cpu: false - sampling_params: - temperature: 0.2 - top_p: 0.95 - max_tokens: 20 - answer: - id: "Qwen/Qwen2.5-3B-Instruct" - tensor_parallel_size: 1 - use_cpu: false - sampling_params: - temperature: 0 - top_p: 0.95 - max_tokens: 2048 -``` \ No newline at end of file +``` + +All the options above are optional, and you can customize them as needed. + +Let's break them down one section at a time. + +## `http` + +The `http` section configures the HTTP server that serves the OramaCore API. Here are the available options: + +- `host`: The host where the HTTP server will listen. By default, it listens on all interfaces (`0.0.0.0`). +- `port`: The port where the HTTP server will listen. By default, it listens on port `8080`. +- `allow_cors`: Whether to allow Cross-Origin Resource Sharing (CORS) requests. By default, it's set to `true`. We recommend keeping it enabled. +- `with_prometheus`: Whether to expose Prometheus metrics. By default, it's set to `true`. + +## `writer_side` + +The `writer_side` section configures the writer side of OramaCore. Here are the available options: + +- `output`: The output where the writer side will store the data. By default, it's set to `in-memory`. +- `config`: The configuration options for the writer side. Here are the available options: + - `data_dir`: The directory where the writer side will persist the data on disk. By default, it's set to `./.data/writer`. + - `embedding_queue_limit`: The maximum number of embeddings that can be stored in the queue before the writer starts to be blocked. By default, it's set to `50000`. + - `insert_batch_commit_size`: The number of document insertions after which the write side will commit the changes. By default, it's set to `5000`. + - `default_embedding_model`: The default embedding model used to calculate the embeddings if not specified in the collection creation. By default, it's set to `MultilingualE5Small`. See more about the available models in the [Embedding Models](/docs/getting-started/text-embeddings) section. + +## `reader_side` + +The `reader_side` section configures the reader side of OramaCore. Here are the available options: + +- `input`: The input where the reader side will store the data. By default, it's set to `in-memory`. +- `config`: The configuration options for the reader side. Here are the available options: + - `data_dir`: The directory where the reader side will persist the data on disk. By default, it's set to `./.data/reader`. + - `insert_batch_commit_size`: The number of write operations after which the read side will commit the changes. By default, it's set to `50000`. + +## `ai_server` + +The `ai_server` section configures the Python gRPC server that is responsible for calculating the embeddings and managing LLMs. Here are the available options: + +- `scheme`: The scheme where the AI server will listen. By default, it's set to `http`. +- `host`: The host where the AI server will listen. By default, it listens on all interfaces (`0.0.0.0`). +- `port`: The port where the AI server will listen. By default, it listens on port `50051`. +- `api_key`: The API key used to authenticate the requests to the AI server. By default, it's set to an empty string - no authentication is required since it's not recommended to expose the AI server to the public internet. +- `max_connections`: The maximum number of connections that the AI server will accept. By default, it's set to `15`. +- `total_threads`: The total number of threads that the AI server will use. By default, it's set to `12`. + +The `embeddings` section configures the embeddings calculation. Here are the available options: + +- `default_model_group`: The default model group used to calculate the embeddings if not specified in the collection creation. By default, it's set to `multilingual`. See more about the available models in the [Embedding Models](/docs/getting-started/text-embeddings) section. +- `dynamically_load_models`: Whether to dynamically load the models. By default, it's set to `false`. +- `execution_providers`: The execution providers used to calculate the embeddings. By default, it's set to `CUDAExecutionProvider` and `CPUExecutionProvider`. +- `total_threads`: The total number of threads used to calculate the embeddings. By default, it's set to `8`. diff --git a/docs/content/docs/getting-started/javascript-hooks.mdx b/docs/content/docs/getting-started/javascript-hooks.mdx index af6f7bc..9217b1d 100644 --- a/docs/content/docs/getting-started/javascript-hooks.mdx +++ b/docs/content/docs/getting-started/javascript-hooks.mdx @@ -76,7 +76,7 @@ Let's take the following documents as an example: As you can see, the structure of the documents is different. With the `selectEmbeddingProperties` hook, you can customize the text extraction process for each document. -### Returning a single string +#### Returning a single string You could write a JavaScript function like this: @@ -110,7 +110,7 @@ Which will return the following strings for the documents: This way, you can easily produce highly optimized embeddings for each document. -### Returning a single markdown string +#### Returning a single markdown string Another approach is to return a single markdown string that will be used for embeddings: @@ -162,7 +162,7 @@ This will produce the following outputs for the two documents: This approach allows you to generate complete markdown documents rich in information that can be used for embeddings. -### Returning an array of strings +#### Returning an array of strings Finally, you can return an array of strings with the properties name to use for each document. OramaCore will then concatenate the values of these properties to generate the embeddings.