Skip to content

Commit

Permalink
Merge pull request #122 from Sunbird-AIAssistant/release-3.0.0
Browse files Browse the repository at this point in the history
Merge Release 3.0.0 into main
  • Loading branch information
sajeshkayyath authored May 14, 2024
2 parents 5138e1f + 9eeaadf commit f0ed8c8
Show file tree
Hide file tree
Showing 12 changed files with 437 additions and 274 deletions.
72 changes: 42 additions & 30 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,15 @@ To use the code, you need to follow these steps:
3. To ingest data to marqo

```bash
python3 index_documents.py --marqo_url=<MARQO_URL> --index_name=<MARQO_INDEX_NAME> --folder_path=<PATH_TO_INPUT_FILE_DIRECTORY> --fresh_index
python3 index_documents.py --folder_path=<PATH_TO_INPUT_FILE_DIRECTORY> --fresh_index
```
--fresh_index: This is a flag that creating a new index or overwriting an existing one. Fresh indexing typically starts from scratch without using existing data.
PATH_TO_INPUT_FILE_DIRECTORY should have only PDF, audio, video and txt file only.

e.g.
```bash
python3 index_documents.py --marqo_url=http://0.0.0.0:8882 --index_name=sakhi_parent_activities --folder_path=parent_pdfs --fresh_index
python3 index_documents.py --marqo_url=http://0.0.0.0:8882 --index_name=sakhi_teacher_activities --folder_path=teacher_pfs --fresh_index
python3 index_documents.py --folder_path=parent_pdfs --fresh_index
python3 index_documents.py --folder_path=teacher_pfs --fresh_index
```
Create the index by using the above command. After creating the index add the index name in `config.ini` file.

Expand All @@ -82,32 +82,45 @@ To use the code, you need to follow these steps:

5. create another file **.env** which will hold the development credentials and add the following variables. Update the Azure OpenAI details, OCI details, Bhashini endpoint URL and API key.

```bash
SERVICE_ENVIRONMENT=<name_of_the_environment>
LOG_LEVEL=<log_level> # INFO, DEBUG, ERROR
CONFIG_INI_PATH=<your_config.ini_file_path>
OPENAI_TYPE=<openai_type> # openai, azure
OPENAI_API_BASE=<your_azure_openai_api_base_url>
OPENAI_API_VERSION=<your_openai_api_version>
OPENAI_API_KEY=<your_openai_api_key>
GPT_MODEL=<your_gpt_model>
TRANSLATION_TYPE=<translation_type> #bhashini, google, dhruva
BHASHINI_ENDPOINT_URL=<your_bhashini_api_endpoint>
BHASHINI_API_KEY=<your_bhashini_api_key>
GCP_CONFIG_PATH=<your_gcp.json_file_path>
BUCKET_TYPE=<bucket_type> #oci, gcp, aws
BUCKET_ENDPOINT_URL=<your_bucket_endpoint_url>
BUCKET_REGION_NAME=<your_bucket_region_name>
BUCKET_NAME=<your_bucket_name> #ai-assistent-prod
BUCKET_SECRET_ACCESS_KEY=<your_bucket_secret_access_key>
BUCKET_ACCESS_KEY_ID=<your_bucket_access_key_id>
MARQO_URL=<your_marqo_db_url>
TELEMETRY_ENDPOINT_URL=<telemetry_endpoint_url>
TELEMETRY_LOG_ENABLED=<telemetry_enable_or_disable> # true or false
REDIS_HOST=localhost
REDIS_PORT=6379
REDIS_DB=0
```
```bash
SERVICE_ENVIRONMENT=<name_of_the_environment>
LOG_LEVEL=<log_level>
CONFIG_INI_PATH=<your_config.ini_file_path>
#Redis Database
REDIS_HOST=localhost
REDIS_PORT=6379
REDIS_DB=0
#Telemetry
TELEMETRY_ENDPOINT_URL=<telemetry_endpoint_url>
TELEMETRY_LOG_ENABLED=<telemetry_enable_or_disable>
#LLM - openai, azure, ollama
LLM_TYPE=<llm_type>
OPENAI_API_KEY=<your_openai_api_key>
GPT_MODEL=<your_gpt_model>
#Translation - bhashini, google, dhruva
TRANSLATION_TYPE=<translation_type>
BHASHINI_ENDPOINT_URL=<your_bhashini_api_endpoint>
BHASHINI_API_KEY=<your_bhashini_api_key>
#Storage - oci, gcp, aws
BUCKET_TYPE=<bucket_type>
BUCKET_ENDPOINT_URL=<your_bucket_endpoint_url>
BUCKET_REGION_NAME=<your_bucket_region_name>
BUCKET_NAME=<your_bucket_name>
BUCKET_SECRET_ACCESS_KEY=<your_bucket_secret_access_key>
BUCKET_ACCESS_KEY_ID=<your_bucket_access_key_id>
#Vector Store - marqo
VECTOR_STORE_TYPE=marqo
VECTOR_STORE_ENDPOINT=http://localhost:8882
EMBEDDING_MODEL=flax-sentence-embeddings/all_datasets_v4_mpnet-base
VECTOR_COLLECTION_NAME=test
```

# 🏃🏻 2. Running

Expand Down Expand Up @@ -291,7 +304,6 @@ Make the necessary changes to your dockerfile with respect to your new changes.
| request.supported_response_format | Supported response formats | text,audio |
| request.supported_context | index name to be referred to from vector database based on context type | teacher, parent (Default) |
| llm.max_messages | Maximum number of messages to include in conversation history | 4 |
| llm.gpt_model | Gen AI GPT Model value | |
| llm.enable_bot_intent | Flag to enable or disable verification of user's query to check if it is referring to bot | false |
| llm.intent_prompt | System prompt to Gen AI to verify if the user's query is referring to the bot | |
| llm.bot_prompt | System prompt to Gen AI to generate responses for user's query related to bot | |
Expand Down
3 changes: 1 addition & 2 deletions config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ supported_context = parent,teacher

[llm]
max_messages=4
gpt_model=gpt-4
temperature=0.3
enable_bot_intent=false
intent_prompt=Identify if the user's query is about the bot's persona or 'Teacher Tara' or 'Parent Tara'. If yes, return the answer as 'Yes' else return answer as 'No' only.
bot_prompt = {
Expand Down Expand Up @@ -131,7 +131,6 @@ activity_prompt = {
chat_intent_prompt=Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history and that can be used to find the most relevant documents. Do NOT answer the question, just reformulate it if needed and otherwise return it as is.
[telemetry]
telemetry_log_enabled = true
environment = dev
service_id = api.djp.telemetry
service_ver = 3.1
actor_id = sakhi-api-service
Expand Down
9 changes: 3 additions & 6 deletions config_util.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
import os
from configparser import ConfigParser

from fastapi import HTTPException, status

from logger import logger

config_file_path = os.getenv('CONFIG_INI_PATH') # Update with your config.ini file path
config_file_path = os.getenv('CONFIG_INI_PATH', 'config.ini') # Update with your config.ini file path
config = ConfigParser()
config.read(config_file_path)


def get_config_value(section, key, default=None):
def get_config_value(section: str, key: str, default=None):
# Check if the key exists in the environment variables
value = os.getenv(key, default)
value = os.getenv(key.upper(), default)

# If the key is not in the environment variables, try reading from a config file
if value is None or value == "":
Expand Down
48 changes: 34 additions & 14 deletions env_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,25 @@
from translation.utils import (
BhashiniTranslationClass,
GoogleCloudTranslationClass,
DhruvaTranslationClass
DhruvaTranslationClass,
TranslationClass
)
from storage.utils import (
AwsS3MainClass,
GoogleBucketClass,
OciBucketClass
OciBucketClass,
StorageClass
)
from llm.utils import (
AzureAiClass,
OpenAiClass
BaseChatClient,
OpenAIChatClient,
AzureChatClient,
OllamaChatClient
)

from vectorstores.utils import (
MarqoVectorStore,
BaseVectorStore
)

class EnvironmentManager():
Expand All @@ -28,10 +37,11 @@ def __init__(self):
self.indexes = {
"llm": {
"class": {
"openai": OpenAiClass,
"azure": AzureAiClass
"openai": OpenAIChatClient,
"azure": AzureChatClient,
"ollama": OllamaChatClient
},
"env_key": "OPENAI_TYPE"
"env_key": "LLM_TYPE"
},
"translate": {
"class": {
Expand All @@ -48,21 +58,31 @@ def __init__(self):
"aws": AwsS3MainClass
},
"env_key": "BUCKET_TYPE"
},
"vectorstore": {
"class": {
"marqo": MarqoVectorStore
},
"env_key": "VECTOR_STORE_TYPE"
}
}

def create_instance(self, env_key):
env_var = self.indexes[env_key]["env_key"]
type_value = os.getenv(env_var)
logger.info(f"Init {env_key} class for: {type_value}")
if type_value is not None:
return self.indexes[env_key]["class"].get(type_value)()

if type_value is None:
raise ValueError(
f"Missing credentials. Please pass the `{env_var}` environment variable"
)

logger.info(f"Init {env_key} class for: {type_value}")
return self.indexes[env_key]["class"].get(type_value)()

env_class = EnvironmentManager()

# create instances of functions
logger.info(f"Initializing required classes for components")
ai_class = env_class.create_instance("llm")
translate_class = env_class.create_instance("translate")
storage_class = env_class.create_instance("storage")
ai_class: BaseChatClient = env_class.create_instance("llm")
translate_class: TranslationClass = env_class.create_instance("translate")
storage_class: StorageClass = env_class.create_instance("storage")
vectorstore_class: BaseVectorStore = env_class.create_instance("vectorstore")
Loading

0 comments on commit f0ed8c8

Please sign in to comment.