Merge pull request #117 from Sunbird-AIAssistant/release-2.0.0

Release 2.0.0
Sunbird-AIAssistant · Apr 23, 2024 · 5138e1f · 5138e1f
2 parents 7764f63 + 99b7a3d
commit 5138e1f
Show file tree

Hide file tree

Showing 5 changed files with 21 additions and 31 deletions.
diff --git a/README.md b/README.md
@@ -1,11 +1,11 @@
 # Activity Sakhi API : 
 
-A powerful service designed to enhance the educational experience for both parents and teachers. Our service revolves around a curated collection of documents focused on children's activities and curriculum frameworks. With simplicity at its core, "Activity Sakhi" empowers parents and teachers to effortlessly discover relevant content and find answers to audience-specific questions.
+A powerful service designed to enhance the educational experience for both parents and teachers. Our service revolves around a curated collection of documents focused on children's activities and curriculum frameworks. With simplicity at its core, "Activity Sakhi" empowers parents and teachers to effortlessly discover relevant content and find answers to context-specific questions.
 
 ### Key Features:
 #### Rich Content Repository: 
 Explore a predefined set of documents tailored to children's activities and curriculum frameworks, ensuring a wealth of valuable information at your fingertips.
-#### Audience-Centric: 
+#### Context-Centric: 
 Targeted specifically for parents and teachers, "Activity Sakhi" caters to their unique needs, providing insights and resources tailored to enhance the learning journey.
 Discover and Learn: Seamlessly discover engaging content and obtain answers to your specific questions, making the educational process more accessible and enjoyable.
 
@@ -147,7 +147,7 @@ curl -X 'POST' \
     "language": "en",
     "text": "string",
     "audio": "string",
-    "audienceType": "teacher"
+    "context": "teacher"
   },
   "output": {
     "format": "text"
@@ -161,8 +161,8 @@ curl -X 'POST' \
 | `input.language`    | en,bn,gu,hi,kn,ml,mr,or,pa,ta,te                          |
 | `input.text`        | User entered question (any of the above language)         |
 | `input.audio`       | Public file URL Or Base64 encoded audio                   |
-| `input.audienceType` | parent, teacher (default value is parent, if not passing) |
-| `output.format`      | text or audio                                             |
+| `input.context`     | parent, teacher (default value is parent, if not passing) |
+| `output.format`     | text or audio                                             |
 
 Required inputs are `text`, `audio` and `language`.
 
@@ -283,13 +283,13 @@ Make the necessary changes to your dockerfile with respect to your new changes.
 
 | Variable                        | Description                                                                                    | Default Value                        |
 |:--------------------------------|------------------------------------------------------------------------------------------------|--------------------------------------|
-| database.indices                | index or collection name to be referred to from vector database based on input audienceType    |                                      |
+| database.indices                | index or collection name to be referred to from vector database based on input context    |                                      |
 | database.top_docs_to_fetch      | Number of filtered documents retrieved from vector database to be passed to Gen AI as contexts | 5                                    |
 | database.docs_min_score         | Minimum score of the documents based on which filtration happens on retrieved documents        | 0.4                                  |
 | redis.ttl         | Redis cache expiration time for a key in seconds. (Only applicable for `/v1/chat` API.)        | 43200                               |
 | request.supported_lang_codes    | Supported languages by the service                                                             | en,bn,gu,hi,kn,ml,mr,or,pa,ta,te     |
-| request.support_response_format | Supported response formats                                                                     | text,audio                           |
-| request.support_audience_type | index name to be referred to from vector database based on audience type                                                                  | teacher, parent (Default)                           |
+| request.supported_response_format | Supported response formats                                                                     | text,audio                           |
+| request.supported_context | index name to be referred to from vector database based on context type                                                                  | teacher, parent (Default)                           |
 | llm.max_messages                   | Maximum number of messages to include in conversation history                                      |    4 |
 | llm.gpt_model                   | Gen AI GPT Model value                                                                         |                                      |
 | llm.enable_bot_intent           | Flag to enable or disable verification of user's query to check if it is referring to bot      | false                                |

diff --git a/config.ini b/config.ini
@@ -11,8 +11,8 @@ ttl=43200
 
 [request]
 supported_lang_codes = en,bn,gu,hi,kn,ml,mr,or,pa,ta,te
-support_response_format = text,audio
-support_audience_type = parent,teacher
+supported_response_format = text,audio
+supported_context = parent,teacher
 
 [llm]
 max_messages=4

diff --git a/indexed_documents.txt b/indexed_documents.txt
diff --git a/main.py b/main.py
@@ -44,8 +44,8 @@ async def shutdown_event():
     logger.info('Invoking shutdown_event')
     logger.info('shutdown_event : Engine closed')
 
-AudienceType = Enum("AudienceType", {type: type for type in get_config_value('request', 'support_audience_type', None).split(',')})
-DropdownOutputFormat = Enum("DropdownOutputFormat", {type: type for type in get_config_value('request', 'support_response_format', None).split(',')})
+Context = Enum("Context", {type: type for type in get_config_value('request', 'supported_context', None).split(',')})
+DropdownOutputFormat = Enum("DropdownOutputFormat", {type: type for type in get_config_value('request', 'supported_response_format', None).split(',')})
 DropDownInputLanguage = Enum("DropDownInputLanguage", {type: type for type in get_config_value('request', 'supported_lang_codes', None).split(',')})
 
 class OutputResponse(BaseModel):
@@ -67,25 +67,16 @@ class QueryInputModel(BaseModel):
     language: DropDownInputLanguage # type: ignore
     text: str = ""
     audio: str = ""
-    audienceType: AudienceType # type: ignore
+    context: Context # type: ignore
 
 
 class QueryOuputModel(BaseModel):
     format: DropdownOutputFormat # type: ignore
 
-
 class QueryModel(BaseModel):
     input: QueryInputModel
     output: QueryOuputModel
 
-class ChatInputModel(BaseModel):
-    language: DropDownInputLanguage # type: ignore
-    text: str = ""
-    audio: str = ""
-    context: AudienceType # type: ignore
-class ChatModel(QueryModel):
-    input: ChatInputModel
-
 # Telemetry API logs middleware
 app.add_middleware(TelemetryMiddleware)
 
@@ -122,16 +113,16 @@ async def query(request: QueryModel, x_request_id: str = Header(None, alias="X-R
     load_dotenv()
     indices = json.loads(get_config_value('database', 'indices', None))
     language = request.input.language.name
-    audience_type = request.input.audienceType.name
+    context = request.input.context.name
     output_format = request.output.format.name
-    index_id = indices.get(audience_type.lower())
+    index_id = indices.get(context.lower())
     audio_url = request.input.audio
     query_text = request.input.text
     is_audio = False
     text = None
     regional_answer = None
     audio_output_url = None
-    logger.info({"label": "query", "query_text": query_text, "index_id": index_id, "audience_type": audience_type, "input_language": language, "output_format": output_format, "audio_url": audio_url})
+    logger.info({"label": "query", "query_text": query_text, "index_id": index_id, "context": context, "input_language": language, "output_format": output_format, "audio_url": audio_url})
     if not query_text and not audio_url:
         raise HTTPException(status_code=422, detail="Either 'text' or 'audio' should be present!")
 
@@ -147,7 +138,7 @@ async def query(request: QueryModel, x_request_id: str = Header(None, alias="X-R
         is_audio = True
 
     if text is not None:
-        answer, error_message, status_code = querying_with_langchain_gpt3(index_id, text, audience_type)
+        answer, error_message, status_code = querying_with_langchain_gpt3(index_id, text, context)
         if len(answer) != 0:
             regional_answer, error_message = process_outgoing_text(answer, language)
             logger.info({"regional_answer": regional_answer})
@@ -178,7 +169,7 @@ async def query(request: QueryModel, x_request_id: str = Header(None, alias="X-R
     return response
 
 @app.post("/v1/chat", tags=["Conversation chat over Document Store"], include_in_schema=True)
-async def chat(request: ChatModel, x_request_id: str = Header(None, alias="X-Request-ID"),
+async def chat(request: QueryModel, x_request_id: str = Header(None, alias="X-Request-ID"),
                 x_source: str = Header(None, alias="x-source"),
                 x_consumer_id: str = Header(None, alias="x-consumer-id")) -> ResponseForQuery:
     load_dotenv()

diff --git a/query_with_langchain.py b/query_with_langchain.py
@@ -25,12 +25,12 @@
 marqoClient = marqo.Client(url=marqo_url)
 
 
-def querying_with_langchain_gpt3(index_id, query, audience_type):
+def querying_with_langchain_gpt3(index_id, query, context):
     logger.debug(f"gpt_model: {gpt_model}")
     if gpt_model is None or gpt_model.strip() == "":
         raise HTTPException(status_code=422, detail="Please configure gpt_model under llm section in config file!")
 
-    intent_response = check_bot_intent(query, audience_type)
+    intent_response = check_bot_intent(query, context)
     if intent_response:
         return intent_response, None, 200
 
@@ -40,7 +40,7 @@ def querying_with_langchain_gpt3(index_id, query, audience_type):
         logger.debug(f"activity_prompt_config: {activity_prompt_config}")
         if activity_prompt_config:
             activity_prompt_dict = ast.literal_eval(activity_prompt_config)
-            system_rules = activity_prompt_dict.get(audience_type)
+            system_rules = activity_prompt_dict.get(context)
 
         search_index = Marqo(marqoClient, index_id, searchable_attributes=["text"])
         top_docs_to_fetch = get_config_value("database", "top_docs_to_fetch", None)