diff --git a/config.json b/config.json index aea350c..1a9332d 100644 --- a/config.json +++ b/config.json @@ -1,48 +1,47 @@ { "models": [ - { - "serviceName": "token_counter_openai", - "modelBasePath": "src/token_counter/openai/local/.", - "apiBasePath": "token_counter/openai/local/", + { + "serviceName": "embedding_instructor_gpu", + "modelBasePath": "src/embeddings/instructor_gpu/local/.", + "apiBasePath": "embeddings/instructor_gpu/local/", "containerPort": 8000, - "environment": {}, - "nginx": [], - "build": true - }, - { + "environment": { + "OPENAI_API_KEY": "${OPENAI_API_KEY}", + "NVIDIA_VISIBLE_DEVICES": "${NVIDIA_VISIBLE_DEVICES}" + }, + "nginx": ["client_max_body_size 100M;", "proxy_read_timeout 600;", "proxy_connect_timeout 600;", "proxy_send_timeout 600;"], + "constraints": ["node.labels.node_vm_type==gpu"], + "build": false + }, + { "serviceName": "asr_whisper_en", "modelBasePath": "src/asr/whisper_en/local/.", "apiBasePath": "asr/whisper_en/local/", "containerPort": 8000, - "environment": { - "NVIDIA_VISIBLE_DEVICES": "${NVIDIA_VISIBLE_DEVICES}" - }, + "environment": {}, + "nginx": [], "nginx": ["client_max_body_size 100M;", "proxy_read_timeout 600;", "proxy_connect_timeout 600;", "proxy_send_timeout 600;"], "constraints": ["node.labels.node_vm_type==gpu"], "build": false }, - { + { "serviceName": "asr_lang_detect", "modelBasePath": "src/asr/whisper_lang_rec/local/.", "apiBasePath": "asr/whisper_lang_rec/local/", "containerPort": 8000, - "environment": { - "NVIDIA_VISIBLE_DEVICES": "${NVIDIA_VISIBLE_DEVICES}" - }, + "environment": {}, + "nginx": [], "nginx": ["client_max_body_size 100M;", "proxy_read_timeout 600;", "proxy_connect_timeout 600;", "proxy_send_timeout 600;"], "constraints": ["node.labels.node_vm_type==gpu"], "build": false - }, + }, { "serviceName": "ner", "modelBasePath": "src/ner/agri_ner_akai/local/.", "apiBasePath": "ner/agri_ner_akai/local/", "containerPort": 8000, - "environment": { - "NVIDIA_VISIBLE_DEVICES": "${NVIDIA_VISIBLE_DEVICES}" - }, + "environment": {}, "nginx": [], - "constraints": ["node.labels.node_vm_type==gpu"], "build": false }, { @@ -71,7 +70,7 @@ "environment": {}, "nginx": [], "constraints": ["node.role==worker"], - "build": true + "build": false }, { "serviceName": "text_translation_azure_dict", @@ -93,16 +92,27 @@ "OPENAI_API_KEY": "${OPENAI_API_KEY}" }, "nginx": [], + "constraints": ["node.labels.node_vm_type==gpu"], "build": false }, + { + "serviceName": "text_translation_azure", + "modelBasePath": "src/text_translation/azure/remote/.", + "apiBasePath": "/text_translation/azure/remote", + "containerPort": 8000, + "environment": { + "AZURE_TRANSLATE_KEY": "${AZURE_TRANSLATE_KEY}" + }, + "nginx": [], + "constraints": ["node.labels.node_vm_type==gpu"], + "build": false + }, { "serviceName": "asr_mms", "modelBasePath": "src/asr/fairseq_mms/local/.", "apiBasePath": "/asr/fairseq_mms/local", "containerPort": 8000, - "environment": { - "NVIDIA_VISIBLE_DEVICES": "${NVIDIA_VISIBLE_DEVICES}" - }, + "environment": {}, "nginx": ["client_max_body_size 100M;", "proxy_read_timeout 600;", "proxy_connect_timeout 600;", "proxy_send_timeout 600;"], "constraints": ["node.labels.node_vm_type==gpu"], "build": false @@ -112,21 +122,46 @@ "modelBasePath": "src/coref/fcoref/local/.", "apiBasePath": "/coref/fcoref/local", "containerPort": 8000, - "environment": { - "NVIDIA_VISIBLE_DEVICES": "${NVIDIA_VISIBLE_DEVICES}" - }, + "environment": {}, + "nginx": [], + "constraints": ["node.labels.node_vm_type==gpu"], + "build": false + }, + { + "serviceName": "text_translation_bhashini", + "modelBasePath": "src/text_translation/bhashini/remote/.", + "apiBasePath": "/text_translation/bhashini/remote", + "containerPort": 8000, + "environment": {}, "nginx": [], "constraints": ["node.labels.node_vm_type==gpu"], "build": false }, + { + "serviceName": "text_translation_ai4bharat", + "modelBasePath": "src/text_translation/ai4bharat/remote/.", + "apiBasePath": "src/text_translation/ai4bharat/remote", + "containerPort": 8000, + "environment": {}, + "constraints": ["node.labels.node_vm_type==gpu"], + "build": false + }, + { + "serviceName": "text_lang_detection_bhashini", + "modelBasePath": "src/text_lang_detection/bhashini/remote/.", + "apiBasePath": "/text_lang_detection/bhashini/remote", + "containerPort": 8000, + "environment": {}, + "constraints": ["node.labels.node_vm_type==gpu"], + "build": false + }, { "serviceName": "chunking_mpnet", "modelBasePath": "src/chunking/MPNet/local/.", "apiBasePath": "chunking/MPNet/local", "containerPort": 8000, "environment": { - "OPENAI_API_KEY": "${OPENAI_API_KEY}", - "NVIDIA_VISIBLE_DEVICES": "${NVIDIA_VISIBLE_DEVICES}" + "OPENAI_API_KEY": "${OPENAI_API_KEY}" }, "nginx": ["client_max_body_size 100M;", "proxy_read_timeout 600;", "proxy_connect_timeout 600;", "proxy_send_timeout 600;"], "constraints": ["node.labels.node_vm_type==gpu"], @@ -138,8 +173,7 @@ "apiBasePath": "/embeddings/instructor/local", "containerPort": 8000, "environment": { - "OPENAI_API_KEY": "${OPENAI_API_KEY}", - "NVIDIA_VISIBLE_DEVICES": "${NVIDIA_VISIBLE_DEVICES}" + "OPENAI_API_KEY": "${OPENAI_API_KEY}" }, "nginx": ["client_max_body_size 100M;", "proxy_read_timeout 600;", "proxy_connect_timeout 600;", "proxy_send_timeout 600;"], "constraints": ["node.labels.node_vm_type==gpu"], @@ -151,12 +185,11 @@ "apiBasePath": "/llm/openai/chatgpt3", "containerPort": 8000, "environment": { - "OPENAI_API_KEY": "${OPENAI_API_KEY}", - "NVIDIA_VISIBLE_DEVICES": "${NVIDIA_VISIBLE_DEVICES}" + "OPENAI_API_KEY": "${OPENAI_API_KEY}" }, "nginx": [], "constraints": ["node.labels.node_vm_type==gpu"], - "build": false + "build": true } ] -} +} \ No newline at end of file diff --git a/src/llm/openai/chatgpt3/model.py b/src/llm/openai/chatgpt3/model.py index 3fd2cc0..ea51e55 100644 --- a/src/llm/openai/chatgpt3/model.py +++ b/src/llm/openai/chatgpt3/model.py @@ -15,14 +15,13 @@ def __new__(cls, context): cls.instance = super(Model, cls).__new__(cls) return cls.instance - @AsyncTTL(time_to_live=600000, maxsize=1024) @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6)) async def inference(self, request: ModelRequest): response = await openai_async.chat_complete( openai.api_key, timeout=20000, payload={ - "model": "gpt-3.5-turbo-0301", + "model": "gpt-3.5-turbo-0613", "temperature": 0, "messages": request.prompt, }, diff --git a/src/llm/openai/chatgpt3/requirements.txt b/src/llm/openai/chatgpt3/requirements.txt index 753c69a..94b74f5 100644 --- a/src/llm/openai/chatgpt3/requirements.txt +++ b/src/llm/openai/chatgpt3/requirements.txt @@ -1,5 +1,5 @@ aiohttp==3.8.4 -quart==0.18.3 +quart async-cache==1.1.1 requests openai