From ccfbaa35503c73f18b14dac3f6caf09150d1d403 Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 15:01:29 -0400 Subject: [PATCH 01/20] added solutions --- solution/helm/Dockerfile | 22 +++++ solution/helm/main.py | 164 +++++++++++++++++++++++++++++++++ solution/helm/requirements.txt | 5 + 3 files changed, 191 insertions(+) create mode 100644 solution/helm/Dockerfile create mode 100644 solution/helm/main.py create mode 100644 solution/helm/requirements.txt diff --git a/solution/helm/Dockerfile b/solution/helm/Dockerfile new file mode 100644 index 0000000..dda84db --- /dev/null +++ b/solution/helm/Dockerfile @@ -0,0 +1,22 @@ +# Base image +FROM python:3.9 + +# Copy project files into the Docker image +COPY . /app +COPY requirements.txt /app + +# Set working directory +WORKDIR /app + +# Install dependencies +RUN pip install -r requirements.txt + +# Set environment variables +ENV FASTAPI_APP=main.py + +# Expose port +EXPOSE 8000 + +RUN python3 models.py +# Command to run the Flask application +CMD ["uvicorn", "main:app", "--reload"] diff --git a/solution/helm/main.py b/solution/helm/main.py new file mode 100644 index 0000000..729af5e --- /dev/null +++ b/solution/helm/main.py @@ -0,0 +1,164 @@ +import asyncio +import time +import numpy as np +from transformers import AutoConfig , AutoTokenizer +from onnxruntime.transformers.io_binding_helper import IOBindingHelper +from onnxruntime import InferenceSession, GraphOptimizationLevel +import torch +from fastapi import FastAPI, Request +from cachetools import TTLCache + +app = FastAPI() + +# Load the NLP models +models = { + "cardiffnlp": { + "model_name": "cardiffnlp/twitter-xlm-roberta-base-sentiment", + "tokenizer": AutoTokenizer.from_pretrained("cardiffnlp/twitter-xlm-roberta-base-sentiment"), + "model": InferenceSession("models/optimized_cardiffnlp/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), + "id2label": AutoConfig.from_pretrained("cardiffnlp/twitter-xlm-roberta-base-sentiment").id2label + }, + "ivanlau": { + "model_name": "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base", + "tokenizer": AutoTokenizer.from_pretrained("ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"), + "model": InferenceSession("models/optimized_ivanlau/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), + "id2label": AutoConfig.from_pretrained("ivanlau/language-detection-fine-tuned-on-xlm-roberta-base").id2label + }, + "svalabs": { + "model_name": "svalabs/twitter-xlm-roberta-crypto-spam", + "tokenizer": AutoTokenizer.from_pretrained("svalabs/twitter-xlm-roberta-crypto-spam"), + "model": InferenceSession("models/optimized_svalabs/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), + "id2label": AutoConfig.from_pretrained("svalabs/twitter-xlm-roberta-crypto-spam").id2label + }, + "EIStakovskii": { + "model_name": "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus", + "tokenizer": AutoTokenizer.from_pretrained("EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus"), + "model": InferenceSession("models/optimized_EIStakovskii/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), + "id2label": AutoConfig.from_pretrained("EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus").id2label + }, + "jy46604790": { + "model_name": "jy46604790/Fake-News-Bert-Detect", + "tokenizer": AutoTokenizer.from_pretrained("jy46604790/Fake-News-Bert-Detect"), + "model": InferenceSession("models/optimized_jy46604790/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), + "id2label": AutoConfig.from_pretrained("jy46604790/Fake-News-Bert-Detect").id2label + }, +} + + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + +def prepare_io_binding( + ort_session, input_dict, output_buffers, output_shapes +): + """ + Prepare the input/output binding for the provided ONNX Runtime session. + + :param ort_session: The ONNX Runtime session to bind inputs/outputs + :type ort_session: onnxruntime.InferenceSession + + :param input_dict: A dictionary containing the input names and values + :type input_dict: dict[str, numpy.ndarray] + + :param output_buffers: A dictionary containing the output buffers to store the output values + :type output_buffers: dict[str, numpy.ndarray] + + :param output_shapes: A dictionary containing the output shapes + :type output_shapes: dict[str, List[int]] + + :return: The IO binding for the provided ONNX Runtime session + :rtype: onnxruntime.OrtDevice + """ + ort_session.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL + io_binding = ort_session.io_binding() + + # Bind inputs + for name, input_val in input_dict.items(): + if input_val is not None: + input_val = torch.from_numpy(input_val) + io_binding.bind_input( + name, + input_val.device.type, + 0, + np.int64, + input_val.size(), + input_val.data_ptr(), + ) + + # Bind outputs + for output in ort_session.get_outputs(): + output_name = output.name + output_buffer = output_buffers[output_name] + io_binding.bind_output( + output_name, + "cuda", + 0, + np.float32, + output_shapes[output_name], + output_buffer.data_ptr(), + ) + + return io_binding + +async def model_inference(model, labels, inputs): + """ + Asynchronously performs inference on a PyTorch model using the provided inputs. + + :param model: The PyTorch model to perform inference on. + :type model: torch.nn.Module + :param labels: A list of labels corresponding to the model's output classes. + :type labels: List[str] + :param inputs: The inputs to be passed to the model for inference. + :type inputs: torch.Tensor + :return: A dictionary containing the highest scoring label and its corresponding score. + :rtype: Dict[str, Union[str, float]] + """ + output_buffers = { + "logits": torch.empty( + (model.get_outputs()[0].shape[1],), dtype=torch.float32, device="cuda" + ), +} + output_shapes = { + "logits": [1, model.get_outputs()[0].shape[1]], +} + io_binding = prepare_io_binding( + model, + inputs, + output_buffers, + output_shapes, +) + + model.run_with_iobinding(io_binding) + outputs = IOBindingHelper.get_outputs_from_io_binding_buffer( + model, output_buffers=output_buffers, output_shapes=output_shapes + ) + outputs = torch.from_numpy(outputs[0]) + scores = torch.nn.functional.softmax(outputs)[0] + max_i = scores.argmax().item() + return {"score": scores[max_i].item(), "label": labels[max_i]} + + +@app.post("/process") +async def process(request: Request): + # initiate timer + start_time = time.time() + text = (await request.body()).decode("utf-8") + results = TTLCache(maxsize=5000, ttl=300) + tasks = [] + + for model_name, model_data in models.items(): + tokenizer = model_data["tokenizer"] + model = model_data["model"] + + inputs = tokenizer(text, return_tensors="np", padding=True, truncation=True) + inputs = {key: np.array(val, dtype=np.int64) for key, val in inputs.items()} + tasks.append(asyncio.create_task(model_inference(model, model_data["id2label"], inputs))) + results = await asyncio.gather(*tasks) + results_dict = dict(zip(list(models.keys()), results)) + end_time = time.time() + print(end_time - start_time) + + return results_dict + +if __name__ == "__main__": + app.run(host="0.0.0.0") diff --git a/solution/helm/requirements.txt b/solution/helm/requirements.txt new file mode 100644 index 0000000..a059f93 --- /dev/null +++ b/solution/helm/requirements.txt @@ -0,0 +1,5 @@ +fastapi[all] +tritonclient[all]==2.34.0 +pydantic==1.10.9 +cachetools==5.3.1 +onnxruntime_gpu==1.15.1 \ No newline at end of file From 2cce6a7f67c78c5d062f1e9e7ca3b5acc5767043 Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 15:02:45 -0400 Subject: [PATCH 02/20] added values --- autotests/helm/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autotests/helm/values.yaml b/autotests/helm/values.yaml index cda6a5e..2111b14 100644 --- a/autotests/helm/values.yaml +++ b/autotests/helm/values.yaml @@ -25,8 +25,8 @@ global: activeDeadlineSeconds: 3600 # 1h env: - PARTICIPANT_NAME: - api_host: http://inca-smc-mlops-challenge-solution.default.svc.cluster.local/ + PARTICIPANT_NAME: jetjodh + api_host: http://inca-smc-mlops-challenge-solution.default.svc.cluster.local/process # K6, do not edit! K6_PROMETHEUS_RW_SERVER_URL: http://kube-prometheus-stack-prometheus.monitoring.svc.cluster.local:9090/api/v1/write From 5cfcae8237ada32877da6414d93ddd1676742afb Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 15:07:55 -0400 Subject: [PATCH 03/20] Delete Dockerfile --- solution/helm/Dockerfile | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 solution/helm/Dockerfile diff --git a/solution/helm/Dockerfile b/solution/helm/Dockerfile deleted file mode 100644 index dda84db..0000000 --- a/solution/helm/Dockerfile +++ /dev/null @@ -1,22 +0,0 @@ -# Base image -FROM python:3.9 - -# Copy project files into the Docker image -COPY . /app -COPY requirements.txt /app - -# Set working directory -WORKDIR /app - -# Install dependencies -RUN pip install -r requirements.txt - -# Set environment variables -ENV FASTAPI_APP=main.py - -# Expose port -EXPOSE 8000 - -RUN python3 models.py -# Command to run the Flask application -CMD ["uvicorn", "main:app", "--reload"] From 71df154e6bda138585f2f52a0471e690276a2840 Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 15:08:12 -0400 Subject: [PATCH 04/20] Delete main.py --- solution/helm/main.py | 164 ------------------------------------------ 1 file changed, 164 deletions(-) delete mode 100644 solution/helm/main.py diff --git a/solution/helm/main.py b/solution/helm/main.py deleted file mode 100644 index 729af5e..0000000 --- a/solution/helm/main.py +++ /dev/null @@ -1,164 +0,0 @@ -import asyncio -import time -import numpy as np -from transformers import AutoConfig , AutoTokenizer -from onnxruntime.transformers.io_binding_helper import IOBindingHelper -from onnxruntime import InferenceSession, GraphOptimizationLevel -import torch -from fastapi import FastAPI, Request -from cachetools import TTLCache - -app = FastAPI() - -# Load the NLP models -models = { - "cardiffnlp": { - "model_name": "cardiffnlp/twitter-xlm-roberta-base-sentiment", - "tokenizer": AutoTokenizer.from_pretrained("cardiffnlp/twitter-xlm-roberta-base-sentiment"), - "model": InferenceSession("models/optimized_cardiffnlp/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), - "id2label": AutoConfig.from_pretrained("cardiffnlp/twitter-xlm-roberta-base-sentiment").id2label - }, - "ivanlau": { - "model_name": "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base", - "tokenizer": AutoTokenizer.from_pretrained("ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"), - "model": InferenceSession("models/optimized_ivanlau/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), - "id2label": AutoConfig.from_pretrained("ivanlau/language-detection-fine-tuned-on-xlm-roberta-base").id2label - }, - "svalabs": { - "model_name": "svalabs/twitter-xlm-roberta-crypto-spam", - "tokenizer": AutoTokenizer.from_pretrained("svalabs/twitter-xlm-roberta-crypto-spam"), - "model": InferenceSession("models/optimized_svalabs/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), - "id2label": AutoConfig.from_pretrained("svalabs/twitter-xlm-roberta-crypto-spam").id2label - }, - "EIStakovskii": { - "model_name": "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus", - "tokenizer": AutoTokenizer.from_pretrained("EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus"), - "model": InferenceSession("models/optimized_EIStakovskii/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), - "id2label": AutoConfig.from_pretrained("EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus").id2label - }, - "jy46604790": { - "model_name": "jy46604790/Fake-News-Bert-Detect", - "tokenizer": AutoTokenizer.from_pretrained("jy46604790/Fake-News-Bert-Detect"), - "model": InferenceSession("models/optimized_jy46604790/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), - "id2label": AutoConfig.from_pretrained("jy46604790/Fake-News-Bert-Detect").id2label - }, -} - - -device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - -def prepare_io_binding( - ort_session, input_dict, output_buffers, output_shapes -): - """ - Prepare the input/output binding for the provided ONNX Runtime session. - - :param ort_session: The ONNX Runtime session to bind inputs/outputs - :type ort_session: onnxruntime.InferenceSession - - :param input_dict: A dictionary containing the input names and values - :type input_dict: dict[str, numpy.ndarray] - - :param output_buffers: A dictionary containing the output buffers to store the output values - :type output_buffers: dict[str, numpy.ndarray] - - :param output_shapes: A dictionary containing the output shapes - :type output_shapes: dict[str, List[int]] - - :return: The IO binding for the provided ONNX Runtime session - :rtype: onnxruntime.OrtDevice - """ - ort_session.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL - io_binding = ort_session.io_binding() - - # Bind inputs - for name, input_val in input_dict.items(): - if input_val is not None: - input_val = torch.from_numpy(input_val) - io_binding.bind_input( - name, - input_val.device.type, - 0, - np.int64, - input_val.size(), - input_val.data_ptr(), - ) - - # Bind outputs - for output in ort_session.get_outputs(): - output_name = output.name - output_buffer = output_buffers[output_name] - io_binding.bind_output( - output_name, - "cuda", - 0, - np.float32, - output_shapes[output_name], - output_buffer.data_ptr(), - ) - - return io_binding - -async def model_inference(model, labels, inputs): - """ - Asynchronously performs inference on a PyTorch model using the provided inputs. - - :param model: The PyTorch model to perform inference on. - :type model: torch.nn.Module - :param labels: A list of labels corresponding to the model's output classes. - :type labels: List[str] - :param inputs: The inputs to be passed to the model for inference. - :type inputs: torch.Tensor - :return: A dictionary containing the highest scoring label and its corresponding score. - :rtype: Dict[str, Union[str, float]] - """ - output_buffers = { - "logits": torch.empty( - (model.get_outputs()[0].shape[1],), dtype=torch.float32, device="cuda" - ), -} - output_shapes = { - "logits": [1, model.get_outputs()[0].shape[1]], -} - io_binding = prepare_io_binding( - model, - inputs, - output_buffers, - output_shapes, -) - - model.run_with_iobinding(io_binding) - outputs = IOBindingHelper.get_outputs_from_io_binding_buffer( - model, output_buffers=output_buffers, output_shapes=output_shapes - ) - outputs = torch.from_numpy(outputs[0]) - scores = torch.nn.functional.softmax(outputs)[0] - max_i = scores.argmax().item() - return {"score": scores[max_i].item(), "label": labels[max_i]} - - -@app.post("/process") -async def process(request: Request): - # initiate timer - start_time = time.time() - text = (await request.body()).decode("utf-8") - results = TTLCache(maxsize=5000, ttl=300) - tasks = [] - - for model_name, model_data in models.items(): - tokenizer = model_data["tokenizer"] - model = model_data["model"] - - inputs = tokenizer(text, return_tensors="np", padding=True, truncation=True) - inputs = {key: np.array(val, dtype=np.int64) for key, val in inputs.items()} - tasks.append(asyncio.create_task(model_inference(model, model_data["id2label"], inputs))) - results = await asyncio.gather(*tasks) - results_dict = dict(zip(list(models.keys()), results)) - end_time = time.time() - print(end_time - start_time) - - return results_dict - -if __name__ == "__main__": - app.run(host="0.0.0.0") From 75c4e55c6577e29f52d31416dd09ebeaec720e41 Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 15:08:21 -0400 Subject: [PATCH 05/20] Delete requirements.txt --- solution/helm/requirements.txt | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 solution/helm/requirements.txt diff --git a/solution/helm/requirements.txt b/solution/helm/requirements.txt deleted file mode 100644 index a059f93..0000000 --- a/solution/helm/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -fastapi[all] -tritonclient[all]==2.34.0 -pydantic==1.10.9 -cachetools==5.3.1 -onnxruntime_gpu==1.15.1 \ No newline at end of file From 18f9dced301f926016ee49edf475574dbc0e49ff Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 15:08:55 -0400 Subject: [PATCH 06/20] added files to correct dir --- solution/Dockerfile | 22 +++++ solution/docker-compose.yml | 22 +++++ solution/main.py | 164 ++++++++++++++++++++++++++++++++++++ solution/requirements.txt | 5 ++ 4 files changed, 213 insertions(+) create mode 100644 solution/Dockerfile create mode 100644 solution/docker-compose.yml create mode 100644 solution/main.py create mode 100644 solution/requirements.txt diff --git a/solution/Dockerfile b/solution/Dockerfile new file mode 100644 index 0000000..dda84db --- /dev/null +++ b/solution/Dockerfile @@ -0,0 +1,22 @@ +# Base image +FROM python:3.9 + +# Copy project files into the Docker image +COPY . /app +COPY requirements.txt /app + +# Set working directory +WORKDIR /app + +# Install dependencies +RUN pip install -r requirements.txt + +# Set environment variables +ENV FASTAPI_APP=main.py + +# Expose port +EXPOSE 8000 + +RUN python3 models.py +# Command to run the Flask application +CMD ["uvicorn", "main:app", "--reload"] diff --git a/solution/docker-compose.yml b/solution/docker-compose.yml new file mode 100644 index 0000000..11f9a0d --- /dev/null +++ b/solution/docker-compose.yml @@ -0,0 +1,22 @@ +version: "3.9" + +services: + web: + build: . + image: app + ports: + - "8000:8000" + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] + +networks: + default: + driver: bridge + ipam: + config: + - subnet: 172.16.57.0/24 \ No newline at end of file diff --git a/solution/main.py b/solution/main.py new file mode 100644 index 0000000..729af5e --- /dev/null +++ b/solution/main.py @@ -0,0 +1,164 @@ +import asyncio +import time +import numpy as np +from transformers import AutoConfig , AutoTokenizer +from onnxruntime.transformers.io_binding_helper import IOBindingHelper +from onnxruntime import InferenceSession, GraphOptimizationLevel +import torch +from fastapi import FastAPI, Request +from cachetools import TTLCache + +app = FastAPI() + +# Load the NLP models +models = { + "cardiffnlp": { + "model_name": "cardiffnlp/twitter-xlm-roberta-base-sentiment", + "tokenizer": AutoTokenizer.from_pretrained("cardiffnlp/twitter-xlm-roberta-base-sentiment"), + "model": InferenceSession("models/optimized_cardiffnlp/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), + "id2label": AutoConfig.from_pretrained("cardiffnlp/twitter-xlm-roberta-base-sentiment").id2label + }, + "ivanlau": { + "model_name": "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base", + "tokenizer": AutoTokenizer.from_pretrained("ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"), + "model": InferenceSession("models/optimized_ivanlau/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), + "id2label": AutoConfig.from_pretrained("ivanlau/language-detection-fine-tuned-on-xlm-roberta-base").id2label + }, + "svalabs": { + "model_name": "svalabs/twitter-xlm-roberta-crypto-spam", + "tokenizer": AutoTokenizer.from_pretrained("svalabs/twitter-xlm-roberta-crypto-spam"), + "model": InferenceSession("models/optimized_svalabs/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), + "id2label": AutoConfig.from_pretrained("svalabs/twitter-xlm-roberta-crypto-spam").id2label + }, + "EIStakovskii": { + "model_name": "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus", + "tokenizer": AutoTokenizer.from_pretrained("EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus"), + "model": InferenceSession("models/optimized_EIStakovskii/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), + "id2label": AutoConfig.from_pretrained("EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus").id2label + }, + "jy46604790": { + "model_name": "jy46604790/Fake-News-Bert-Detect", + "tokenizer": AutoTokenizer.from_pretrained("jy46604790/Fake-News-Bert-Detect"), + "model": InferenceSession("models/optimized_jy46604790/model.onnx",providers=['TensorrtExecutionProvider', "CUDAExecutionProvider"]), + "id2label": AutoConfig.from_pretrained("jy46604790/Fake-News-Bert-Detect").id2label + }, +} + + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + +def prepare_io_binding( + ort_session, input_dict, output_buffers, output_shapes +): + """ + Prepare the input/output binding for the provided ONNX Runtime session. + + :param ort_session: The ONNX Runtime session to bind inputs/outputs + :type ort_session: onnxruntime.InferenceSession + + :param input_dict: A dictionary containing the input names and values + :type input_dict: dict[str, numpy.ndarray] + + :param output_buffers: A dictionary containing the output buffers to store the output values + :type output_buffers: dict[str, numpy.ndarray] + + :param output_shapes: A dictionary containing the output shapes + :type output_shapes: dict[str, List[int]] + + :return: The IO binding for the provided ONNX Runtime session + :rtype: onnxruntime.OrtDevice + """ + ort_session.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL + io_binding = ort_session.io_binding() + + # Bind inputs + for name, input_val in input_dict.items(): + if input_val is not None: + input_val = torch.from_numpy(input_val) + io_binding.bind_input( + name, + input_val.device.type, + 0, + np.int64, + input_val.size(), + input_val.data_ptr(), + ) + + # Bind outputs + for output in ort_session.get_outputs(): + output_name = output.name + output_buffer = output_buffers[output_name] + io_binding.bind_output( + output_name, + "cuda", + 0, + np.float32, + output_shapes[output_name], + output_buffer.data_ptr(), + ) + + return io_binding + +async def model_inference(model, labels, inputs): + """ + Asynchronously performs inference on a PyTorch model using the provided inputs. + + :param model: The PyTorch model to perform inference on. + :type model: torch.nn.Module + :param labels: A list of labels corresponding to the model's output classes. + :type labels: List[str] + :param inputs: The inputs to be passed to the model for inference. + :type inputs: torch.Tensor + :return: A dictionary containing the highest scoring label and its corresponding score. + :rtype: Dict[str, Union[str, float]] + """ + output_buffers = { + "logits": torch.empty( + (model.get_outputs()[0].shape[1],), dtype=torch.float32, device="cuda" + ), +} + output_shapes = { + "logits": [1, model.get_outputs()[0].shape[1]], +} + io_binding = prepare_io_binding( + model, + inputs, + output_buffers, + output_shapes, +) + + model.run_with_iobinding(io_binding) + outputs = IOBindingHelper.get_outputs_from_io_binding_buffer( + model, output_buffers=output_buffers, output_shapes=output_shapes + ) + outputs = torch.from_numpy(outputs[0]) + scores = torch.nn.functional.softmax(outputs)[0] + max_i = scores.argmax().item() + return {"score": scores[max_i].item(), "label": labels[max_i]} + + +@app.post("/process") +async def process(request: Request): + # initiate timer + start_time = time.time() + text = (await request.body()).decode("utf-8") + results = TTLCache(maxsize=5000, ttl=300) + tasks = [] + + for model_name, model_data in models.items(): + tokenizer = model_data["tokenizer"] + model = model_data["model"] + + inputs = tokenizer(text, return_tensors="np", padding=True, truncation=True) + inputs = {key: np.array(val, dtype=np.int64) for key, val in inputs.items()} + tasks.append(asyncio.create_task(model_inference(model, model_data["id2label"], inputs))) + results = await asyncio.gather(*tasks) + results_dict = dict(zip(list(models.keys()), results)) + end_time = time.time() + print(end_time - start_time) + + return results_dict + +if __name__ == "__main__": + app.run(host="0.0.0.0") diff --git a/solution/requirements.txt b/solution/requirements.txt new file mode 100644 index 0000000..a059f93 --- /dev/null +++ b/solution/requirements.txt @@ -0,0 +1,5 @@ +fastapi[all] +tritonclient[all]==2.34.0 +pydantic==1.10.9 +cachetools==5.3.1 +onnxruntime_gpu==1.15.1 \ No newline at end of file From 7e1bd06d763c550d8cfe38c10510de6b813f76cf Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 15:16:16 -0400 Subject: [PATCH 07/20] Update requirements.txt --- solution/requirements.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/solution/requirements.txt b/solution/requirements.txt index a059f93..a123fe6 100644 --- a/solution/requirements.txt +++ b/solution/requirements.txt @@ -2,4 +2,7 @@ fastapi[all] tritonclient[all]==2.34.0 pydantic==1.10.9 cachetools==5.3.1 -onnxruntime_gpu==1.15.1 \ No newline at end of file +onnxruntime_gpu==1.15.1 +numpy +transformers==4.29.2 +torch==2.0.0 From 619c977e1c75c86769c91e72e21b5bef928121ee Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 15:22:58 -0400 Subject: [PATCH 08/20] fixed file copies --- solution/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/solution/Dockerfile b/solution/Dockerfile index dda84db..343d5d5 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -2,8 +2,9 @@ FROM python:3.9 # Copy project files into the Docker image -COPY . /app COPY requirements.txt /app +COPY main.py /app +COPY models.py /app # Set working directory WORKDIR /app From 416436e0892d711ad72dd103587c985355429a37 Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 15:25:29 -0400 Subject: [PATCH 09/20] Update Dockerfile --- solution/Dockerfile | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/solution/Dockerfile b/solution/Dockerfile index 343d5d5..5a8090f 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -1,14 +1,13 @@ # Base image FROM python:3.9 -# Copy project files into the Docker image -COPY requirements.txt /app -COPY main.py /app -COPY models.py /app - # Set working directory WORKDIR /app +# Copy project files into the Docker image +COPY ./ /app + + # Install dependencies RUN pip install -r requirements.txt From 8774ecd5a6e5d8b0ef7a1da5bb009e3387f646c7 Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 15:46:40 -0400 Subject: [PATCH 10/20] added explicit copies --- solution/Dockerfile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/solution/Dockerfile b/solution/Dockerfile index 5a8090f..d7cfc1c 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -5,8 +5,10 @@ FROM python:3.9 WORKDIR /app # Copy project files into the Docker image -COPY ./ /app - +COPY requirements.txt /app +COPY main.py /app +COPY models.py /app +RUN ls # Install dependencies RUN pip install -r requirements.txt From 19854affd0fa49e00fe103aea32bd6dce5da6eee Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 15:49:04 -0400 Subject: [PATCH 11/20] Add files via upload --- solution/models.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 solution/models.py diff --git a/solution/models.py b/solution/models.py new file mode 100644 index 0000000..68c799b --- /dev/null +++ b/solution/models.py @@ -0,0 +1,58 @@ +import os +from optimum.onnxruntime import ORTModelForSequenceClassification +from onnxruntime.transformers.io_binding_helper import IOBindingHelper +from onnxruntime.transformers.optimizer import optimize_model +import torch + +models = { + "cardiffnlp": { + "model_name": "cardiffnlp/twitter-xlm-roberta-base-sentiment", + "tokenizer": None, + "model": None, + }, + "ivanlau": { + "model_name": "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base", + "tokenizer": None, + "model": None, + }, + "svalabs": { + "model_name": "svalabs/twitter-xlm-roberta-crypto-spam", + "tokenizer": None, + "model": None, + }, + "EIStakovskii": { + "model_name": "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus", + "tokenizer": None, + "model": None, + }, + "jy46604790": { + "model_name": "jy46604790/Fake-News-Bert-Detect", + "tokenizer": None, + "model": None, + }, +} + +def download_model(model_name): + optimized_onnx_path = f"optimized_{model_name}" + model_data = models[model_name] + model_data["optimized_onnx_path"] = os.path.join("models", optimized_onnx_path) + model_dir = os.path.join("models", model_name) + + if not os.path.exists(model_dir): + os.makedirs(model_dir) + + model = ORTModelForSequenceClassification.from_pretrained(model_data["model_name"], export=True, use_io_binding=False).to(device) + model.save_pretrained(str("models/" + optimized_onnx_path)) + model = optimize_model( + input=str("models/" + optimized_onnx_path + "/model.onnx"), + model_type="bert", + use_gpu=True +) + model.save_model_to_file(str("models/" + optimized_onnx_path + "/model.onnx")) + + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +for model_name in models: + download_model(model_name) + print(model_name) \ No newline at end of file From a3db4888d531977924bea108cb1f5f5c5fea8ccc Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 15:55:44 -0400 Subject: [PATCH 12/20] added missing lib --- solution/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/solution/requirements.txt b/solution/requirements.txt index a123fe6..0b9052b 100644 --- a/solution/requirements.txt +++ b/solution/requirements.txt @@ -6,3 +6,4 @@ onnxruntime_gpu==1.15.1 numpy transformers==4.29.2 torch==2.0.0 +optimum==1.8.7 From f4415283e49b1ccdc7a83f303af954bddb125d10 Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 16:02:57 -0400 Subject: [PATCH 13/20] added onnx lib --- solution/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/solution/requirements.txt b/solution/requirements.txt index 0b9052b..e9715d1 100644 --- a/solution/requirements.txt +++ b/solution/requirements.txt @@ -7,3 +7,4 @@ numpy transformers==4.29.2 torch==2.0.0 optimum==1.8.7 +onnx==1.13.1 From a3d59a5ca4bdf8120c60b72aaa3827e78992882c Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 17:23:51 -0400 Subject: [PATCH 14/20] add cache clear --- solution/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/solution/models.py b/solution/models.py index 68c799b..8f76dfc 100644 --- a/solution/models.py +++ b/solution/models.py @@ -54,5 +54,6 @@ def download_model(model_name): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") for model_name in models: + torch.cuda.empty_cache() download_model(model_name) print(model_name) \ No newline at end of file From 5f6acd0aa029db60549feb1b06511020966f6d1d Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Thu, 22 Jun 2023 17:40:24 -0400 Subject: [PATCH 15/20] moved model building to runtime --- solution/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solution/Dockerfile b/solution/Dockerfile index d7cfc1c..fc879ca 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -19,6 +19,6 @@ ENV FASTAPI_APP=main.py # Expose port EXPOSE 8000 -RUN python3 models.py +# RUN python3 models.py # Command to run the Flask application -CMD ["uvicorn", "main:app", "--reload"] +CMD ["python3", "models.py", "&&","uvicorn", "main:app", "--reload"] From 0ff7828d11e39624c6ff49140bab414340c667ca Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Fri, 23 Jun 2023 13:04:09 -0400 Subject: [PATCH 16/20] updated models.py --- solution/models.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/solution/models.py b/solution/models.py index 8f76dfc..97e6291 100644 --- a/solution/models.py +++ b/solution/models.py @@ -1,6 +1,5 @@ import os from optimum.onnxruntime import ORTModelForSequenceClassification -from onnxruntime.transformers.io_binding_helper import IOBindingHelper from onnxruntime.transformers.optimizer import optimize_model import torch @@ -38,17 +37,20 @@ def download_model(model_name): model_data["optimized_onnx_path"] = os.path.join("models", optimized_onnx_path) model_dir = os.path.join("models", model_name) - if not os.path.exists(model_dir): - os.makedirs(model_dir) - - model = ORTModelForSequenceClassification.from_pretrained(model_data["model_name"], export=True, use_io_binding=False).to(device) - model.save_pretrained(str("models/" + optimized_onnx_path)) - model = optimize_model( - input=str("models/" + optimized_onnx_path + "/model.onnx"), - model_type="bert", - use_gpu=True -) - model.save_model_to_file(str("models/" + optimized_onnx_path + "/model.onnx")) + os.makedirs(model_dir, exist_ok=True) + + with torch.no_grad(): + model = ORTModelForSequenceClassification.from_pretrained( + model_data["model_name"], export=True, use_io_binding=False + ).to(device) + model.save_pretrained(f"models/{optimized_onnx_path}") + + optimize_model( + input=f"models/{optimized_onnx_path}/model.onnx", + model_type="bert", + use_gpu=True, + ).save_model_to_file(f"models/{optimized_onnx_path}/model.onnx") + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") From 01d2c3b242468679eebabfb8ee1966b3efcdcf54 Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Fri, 23 Jun 2023 13:10:15 -0400 Subject: [PATCH 17/20] updated server workers --- solution/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solution/Dockerfile b/solution/Dockerfile index fc879ca..e50d4d2 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -12,7 +12,7 @@ RUN ls # Install dependencies RUN pip install -r requirements.txt - +RUN python3 models.py # Set environment variables ENV FASTAPI_APP=main.py @@ -21,4 +21,4 @@ EXPOSE 8000 # RUN python3 models.py # Command to run the Flask application -CMD ["python3", "models.py", "&&","uvicorn", "main:app", "--reload"] +CMD ["uvicorn", "main:app", "--workers=2", "--worker-class=uvicorn.workers.UvicornWorker"] From 2088950fa88b80067f6002bd5c3f9dd64b392040 Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Fri, 23 Jun 2023 13:19:45 -0400 Subject: [PATCH 18/20] check RAM --- solution/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solution/Dockerfile b/solution/Dockerfile index e50d4d2..c22f5d8 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -8,7 +8,7 @@ WORKDIR /app COPY requirements.txt /app COPY main.py /app COPY models.py /app -RUN ls +RUN free -g # Install dependencies RUN pip install -r requirements.txt From 1a95a8f102ed6752bdac428ac9b4fbdeb9c51c5a Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Fri, 23 Jun 2023 13:26:46 -0400 Subject: [PATCH 19/20] moved execution to bashfile --- solution/Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/solution/Dockerfile b/solution/Dockerfile index c22f5d8..dfe205e 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -8,11 +8,9 @@ WORKDIR /app COPY requirements.txt /app COPY main.py /app COPY models.py /app -RUN free -g # Install dependencies RUN pip install -r requirements.txt -RUN python3 models.py # Set environment variables ENV FASTAPI_APP=main.py @@ -21,4 +19,4 @@ EXPOSE 8000 # RUN python3 models.py # Command to run the Flask application -CMD ["uvicorn", "main:app", "--workers=2", "--worker-class=uvicorn.workers.UvicornWorker"] +ENTRYPOINT [ "bash", "build_start.sh" ] From c07461c2f4894549551f08aca24ee1fb01386c5c Mon Sep 17 00:00:00 2001 From: Jodh Singh Date: Fri, 23 Jun 2023 13:29:36 -0400 Subject: [PATCH 20/20] added shell script --- solution/build_start.sh | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 solution/build_start.sh diff --git a/solution/build_start.sh b/solution/build_start.sh new file mode 100644 index 0000000..f5855fd --- /dev/null +++ b/solution/build_start.sh @@ -0,0 +1,3 @@ +#!/bin/sh +python3 models.py && \ +uvicorn main:app --workers=5 --worker-class=uvicorn.workers.UvicornWorker