tenstorrent · bgoelTT · Jan 11, 2025 · Dec 9, 2024 · Dec 9, 2024 · Dec 9, 2024
diff --git a/app/api/model_control/urls.py b/app/api/model_control/urls.py
@@ -10,5 +10,6 @@
     path("inference/", views.InferenceView.as_view()),
     path("deployed/", views.DeployedModelsView.as_view()),
     path("model_weights/", views.ModelWeightsView.as_view()),
+    path("object-detection/", views.ObjectDetectionInferenceView.as_view()),
     path("health/", views.ModelHealthView.as_view()),
 ]
diff --git a/app/api/model_control/views.py b/app/api/model_control/views.py
@@ -4,6 +4,9 @@
 
 # model_control/views.py
 from pathlib import Path
+import requests
+from PIL import Image
+import io
 
 from rest_framework import status
 from rest_framework.views import APIView
@@ -12,6 +15,7 @@
 
 from .serializers import InferenceSerializer, ModelWeightsSerializer
 from model_control.model_utils import (
+    encoded_jwt,
     get_deploy_cache,
     stream_response_from_external_api,
     health_check,
@@ -97,3 +101,39 @@ def get(self, request, *args, **kwargs):
             return Response(weights, status=status.HTTP_200_OK)
         else:
             return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
+
+
+class ObjectDetectionInferenceView(APIView):
+    def post(self, request, *args, **kwargs):
+        """special inference view that performs special handling"""
+        data = request.data
+        logger.info(f"InferenceView data:={data}")
+        serializer = InferenceSerializer(data=data)
+        if serializer.is_valid():
+            deploy_id = data.get("deploy_id")
+            image = data.get("image").file  # we should only receive 1 file
+            deploy = get_deploy_cache()[deploy_id]
+            internal_url = "http://" + deploy["internal_url"]
+            # construct file to send
+            pil_image = Image.open(image)
+            pil_image = pil_image.resize((320, 320))  # Resize to target dimensions
+            buf = io.BytesIO()
+            pil_image.save(
+                buf,
+                format="JPEG",
+            )
+            byte_im = buf.getvalue()
+            file = {"file": byte_im}
+            try:
+                headers = {"Authorization": f"Bearer {encoded_jwt}"}
+                inference_data = requests.post(internal_url, files=file, headers=headers, timeout=5)
+                inference_data.raise_for_status()
+            except requests.exceptions.HTTPError as http_err:
+                if inference_data.status_code == status.HTTP_401_UNAUTHORIZED:
+                    return Response(status=status.HTTP_401_UNAUTHORIZED)
+                else:
+                    return Response(status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+
+            return Response(inference_data.json(), status=status.HTTP_200_OK)
+        else:
+            return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
diff --git a/app/api/shared_config/device_config.py b/app/api/shared_config/device_config.py
@@ -6,10 +6,13 @@
 
 
 class DeviceConfigurations(Enum):
+    """The *WH_ARCH_YAML enumerations signal to use the wormhole_b0_80_arch_eth_dispatch.yaml"""
     CPU = auto()
     E150 = auto()
     N150 = auto()
+    N150_WH_ARCH_YAML = auto()
     N300x4 = auto()
+    N300x4_WH_ARCH_YAML = auto()
 
 
 def detect_available_devices():

diff --git a/app/api/shared_config/model_config.py b/app/api/shared_config/model_config.py
@@ -41,7 +41,6 @@ class ModelImpl:
     model_id: str
     image_name: str
     image_tag: str
-    hf_model_path: str
     device_configurations: Set["DeviceConfigurations"]
     docker_config: Dict[str, Any]
     user_uid: int  # user inside docker container uid (for file permissions)
@@ -51,6 +50,7 @@ class ModelImpl:
     service_route: str
     env_file: str = ""
     health_route: str = "/health"
+    hf_model_path: str = ""
 
     def __post_init__(self):
         self.docker_config.update({"volumes": self.get_volume_mounts()})
@@ -59,18 +59,22 @@ def __post_init__(self):
         self.docker_config["environment"]["HF_HOME"] = Path(
             backend_config.model_container_cache_root
         ).joinpath("huggingface")
-
-        # Set environment variable if N150 or N300x4 is in the device configurations
-        if DeviceConfigurations.N150 in self.device_configurations or DeviceConfigurations.N300x4 in self.device_configurations:
-            self.docker_config["environment"]["WH_ARCH_YAML"] = "wormhole_b0_80_arch_eth_dispatch.yaml"
+
+        # Set environment variable if N150_WH_ARCH_YAML or N300x4_WH_ARCH_YAML is in the device configurations
+        if (
+            DeviceConfigurations.N150_WH_ARCH_YAML in self.device_configurations
+            or DeviceConfigurations.N300x4_WH_ARCH_YAML in self.device_configurations
+        ):
+            self.docker_config["environment"]["WH_ARCH_YAML"] = (
+                "wormhole_b0_80_arch_eth_dispatch.yaml"
+            )
 
         if self.env_file:
             logger.info(f"Using env file: {self.env_file}")
             # env file should be in persistent volume mounted
             env_dict = load_dotenv_dict(self.env_file)
             # env file overrides any existing docker environment variables
             self.docker_config["environment"].update(env_dict)
-
 
     @property
     def image_version(self) -> str:
@@ -155,6 +159,19 @@ def base_docker_config():
 # model_ids are unique strings to define a model, they could be uuids but
 # using friendly strings prefixed with id_ is more helpful for debugging
 model_implmentations_list = [
+    ModelImpl(
+        model_name="YOLOv4",
+        model_id="id_yolov4v0.0.1",
+        image_name="ghcr.io/tenstorrent/tt-inference-server/tt-metal-yolov4-src-base",
+        image_tag="v0.0.1-tt-metal-65d246482b3f",
+        device_configurations={DeviceConfigurations.N150},
+        docker_config=base_docker_config(),
+        user_uid=1000,
+        user_gid=1000,
+        shm_size="32G",
+        service_port=7000,
+        service_route="/objdetection_v2",
+    ),
     ModelImpl(
         model_name="Mock-Llama-3.1-70B-Instruct",
         model_id="id_mock_vllm_modelv0.0.1",
@@ -174,8 +191,8 @@ def base_docker_config():
         model_id="id_tt-metal-falcon-7bv0.0.13",
         image_name="tt-metal-falcon-7b",
         image_tag="v0.0.13",
+        device_configurations={DeviceConfigurations.N150_WH_ARCH_YAML},
         hf_model_path="tiiuae/falcon-7b-instruct",
-        device_configurations={DeviceConfigurations.N150},
         docker_config=base_docker_config(),
         user_uid=1000,
         user_gid=1000,
@@ -189,7 +206,7 @@ def base_docker_config():
         image_name="ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm",
         image_tag="v0.0.3-tt-metal-385904186f81-384f1790c3be",
         hf_model_path="meta-llama/Llama-3.1-70B-Instruct",
-        device_configurations={DeviceConfigurations.N300x4},
+        device_configurations={DeviceConfigurations.N300x4_WH_ARCH_YAML},
         docker_config=base_docker_config(),
         user_uid=1000,
         user_gid=1000,
@@ -204,7 +221,7 @@ def base_docker_config():
         image_name="ghcr.io/tenstorrent/tt-inference-server/tt-metal-mistral-7b-src-base",
         image_tag="v0.0.3-tt-metal-v0.52.0-rc33",
         hf_model_path="mistralai/Mistral-7B-Instruct-v0.2",
-        device_configurations={DeviceConfigurations.N300x4},
+        device_configurations={DeviceConfigurations.N300x4_WH_ARCH_YAML},
         docker_config=base_docker_config(),
         user_uid=1000,
         user_gid=1000,

diff --git a/app/docker-compose.yml b/app/docker-compose.yml
@@ -21,7 +21,7 @@ services:
       - "8000:8000"
     # command: bash
     # dev server can be used for breakpoint debugging, does not support streaming
-    # command: ./manage.py runserver 0.0.0.0:8000
+    # command: python ./manage.py runserver 0.0.0.0:8000
     # gunicorn is used from production, supports streaming
 
     command: gunicorn --workers 3 --bind 0.0.0.0:8000 --preload --timeout 1200 api.wsgi:application
@@ -52,7 +52,7 @@ services:
       # On first application load resources for transformers/etc
       # are downloaded. The UI should not start until these resources
       # have been downloaded. Adjust timeout if on a very slow connection
-      test: ["CMD", "curl", "-f", "http://localhost:8000/up/"]
+      test: [ "CMD", "curl", "-f", "http://localhost:8000/up/" ]
       timeout: 120s
       interval: 10s
       retries: 5
@@ -93,7 +93,7 @@ services:
       - "8111:8111"
     healthcheck:
       # Adjust below to match your container port
-      test: ["CMD", "curl", "-f", "http://localhost:8111/api/v1/heartbeat"]
+      test: [ "CMD", "curl", "-f", "http://localhost:8111/api/v1/heartbeat" ]
       interval: 10s
       timeout: 10s
       retries: 3