Merge automotive reference into main branch (#2047)

* Automotive reference implementation sketch * WIP: automotive reference implementation * WIP add segmentation, dataset, and util functions to reference * WIP: reference implementation with issues during post processing * WIP update dockerfile remove pdb breaks * WIP: reference implementation that runs samples * Update README.md with initial docker runs * WIP: add accuracy checker to reference * Fix: set lidar detector to evaluation mode * [Automated Commit] Format Codebase * Update README.md * Remove unnecessary mlperf conf load and implement dataset get_item_count * add cpu only version of reference * update dockerfile and display overal mAP * code cleanup * Update README.md * fix packages versions in dockerfile * handle frames if model predicts no objects * Allow accuracy check on subset of datasets * [Automated Commit] Format Codebase * Updates automotive reference implementation (#2045) * Remove unnecessary mlperf conf load and implement dataset get_item_count * add cpu only version of reference * update dockerfile and display overal mAP * code cleanup * Update README.md * fix packages versions in dockerfile * handle frames if model predicts no objects * Allow accuracy check on subset of datasets * [Automated Commit] Format Codebase --------- Co-authored-by: mlcommons-bot <[email protected]> * [Automated Commit] Format Codebase * fix whitespace merge errors * [Automated Commit] Format Codebase * [Automated Commit] Format Codebase * add removed code during merge * [Automated Commit] Format Codebase * change nms thresholds on cpu only code --------- Co-authored-by: Pablo Gonzalez <[email protected]> Co-authored-by: Arjun Suresh <[email protected]> Co-authored-by: arjunsuresh <[email protected]> Co-authored-by: Miro <[email protected]> Co-authored-by: mlcommons-bot <[email protected]>
mlcommons · Jan 21, 2025 · e618c31 · e618c31
1 parent 25bba8f
commit e618c31
Show file tree

Hide file tree

Showing 11 changed files with 800 additions and 46 deletions.
diff --git a/automotive/3d-object-detection/README.md b/automotive/3d-object-detection/README.md
@@ -1,14 +1,27 @@
 ## Reference implementation fo automotive 3D detection benchmark
 
-## TODO: Instructions for dataset download after it is uploaded somewhere appropriate
-
-## TODO: Instructions for checkpoints downloads after it is uploaded somewhere appropriate
+## Dataset and model checkpoints
+Contact MLCommons support for accessing the Waymo Open Dataset along with the model checkpoints for the reference implementation. You will need to accept a license agreement and will be given directions to download the data. You will need to place the kitti_format folder under a directory named waymo. There are four total checkpoints 2 for pytorch and 2 for onnx.
 
 ## Running with docker
+Build the container and mount the inference repo and Waymo dataset directory.
 ```
 docker build -t auto_inference -f dockerfile.gpu .
 
-docker run --gpus=all -it -v <directory to inference repo>/inference/:/inference -v <directory to waymo dataset>/waymo:/waymo --rm auto_inference
-
+docker run --gpus=all -it -v <directory to inference repo>/inference/:/inference -v <path to waymo dataset>/waymo:/waymo --rm auto_inference
+```
+### Run with GPU
+```
 cd /inference/automotive/3d-object-detection
 python main.py --dataset waymo --dataset-path /waymo/kitti_format/ --lidar-path <checkpoint_path>/pp_ep36.pth --segmentor-path <checkpoint_path>/best_deeplabv3plus_resnet50_waymo_os16.pth --mlperf_conf /inference/mlperf.conf
+```
+
+### Run with CPU and ONNX
+```
+python main.py --dataset waymo --dataset-path /waymo/kitti_format/ --lidar-path <checkpoint_path>/pp.onnx --segmentor-path <checkpoint_path>/deeplabv3+.onnx --mlperf_conf /inference/mlperf.conf
+```
+
+### Run the accuracy checker
+```
+python accuracy_waymo.py --mlperf-accuracy-file <path to accuracy file>/mlperf_log_accuracy.json --waymo-dir /waymo/kitti_format/
+```
diff --git a/automotive/3d-object-detection/accuracy_waymo.py b/automotive/3d-object-detection/accuracy_waymo.py
@@ -95,13 +95,13 @@ def main():
                     'bbox': [],
                     'score': []
                 }
-
-            detections[image_idx]['name'].append(LABEL2CLASSES[label])
-            detections[image_idx]['dimensions'].append(dimension)
-            detections[image_idx]['location'].append(location)
-            detections[image_idx]['rotation_y'].append(rotation_y)
-            detections[image_idx]['bbox'].append(bbox)
-            detections[image_idx]['score'].append(score)
+            if dimension[0] > 0:
+                detections[image_idx]['name'].append(LABEL2CLASSES[label])
+                detections[image_idx]['dimensions'].append(dimension)
+                detections[image_idx]['location'].append(location)
+                detections[image_idx]['rotation_y'].append(rotation_y)
+                detections[image_idx]['bbox'].append(bbox)
+                detections[image_idx]['score'].append(score)
             image_ids.add(image_idx)
 
     with open(args.output_file, "w") as fp:
@@ -115,6 +115,7 @@ def main():
         val_dataset.data_infos,
         CLASSES,
         cam_sync=False)
+    map_stats['Total'] = np.mean(list(map_stats.values()))
 
     print(map_stats)
     if args.verbose:

diff --git a/automotive/3d-object-detection/backend_deploy.py b/automotive/3d-object-detection/backend_deploy.py
@@ -78,13 +78,11 @@ def load(self):
         return self
 
     def predict(self, inputs):
-        # TODO: implement predict
         dimensions, locations, rotation_y, box2d, class_labels, class_scores, ids = [
         ], [], [], [], [], [], []
         with torch.inference_mode():
             device = torch.device(
                 "cuda:0" if torch.cuda.is_available() else "cpu")
-            format_results = {}
             model_input = inputs[0]
             batched_pts = model_input['pts']
             scores_from_cam = []
@@ -114,7 +112,7 @@ def predict(self, inputs):
                 calib_info = model_input['calib_info']
                 image_info = model_input['image_info']
                 idx = model_input['image_info']['image_idx']
-
+                format_result['idx'] = idx
                 calib_info = change_calib_device(calib_info, False)
                 result_filter = keep_bbox_from_image_range(
                     result, calib_info, 5, image_info, False)
@@ -135,9 +133,6 @@ def predict(self, inputs):
                     format_result['location'].append(camera_bbox[:3])
                     format_result['rotation_y'].append(camera_bbox[6].item())
                     format_result['score'].append(score.item())
-                    format_results['idx'] = idx
-
-                # write_label(format_result, os.path.join(saved_submit_path, f'{idx:06d}.txt'))
 
                 if len(format_result['dimensions']) > 0:
                     format_result['dimensions'] = torch.stack(
@@ -150,6 +145,5 @@ def predict(self, inputs):
                 class_labels.append(format_result['class'])
                 class_scores.append(format_result['score'])
                 box2d.append(format_result['bbox'])
-                ids.append(format_results['idx'])
-            # return Boxes, Classes, Scores # Change to desired output
+                ids.append(format_result['idx'])
         return dimensions, locations, rotation_y, box2d, class_labels, class_scores, ids
diff --git a/automotive/3d-object-detection/backend_onnx.py b/automotive/3d-object-detection/backend_onnx.py
@@ -0,0 +1,168 @@
+from typing import Optional, List, Union
+import os
+import torch
+import logging
+import backend
+from collections import namedtuple
+from model.painter import Painter
+from model.pointpillars_core import PointPillarsPre, PointPillarsPos
+import numpy as np
+from tools.process import keep_bbox_from_image_range
+from waymo import Waymo
+import onnxruntime as ort
+
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger("backend-onnx")
+
+
+def change_calib_device(calib, cuda):
+    result = {}
+    if cuda:
+        device = 'cuda'
+    else:
+        device = 'cpu'
+    result['R0_rect'] = calib['R0_rect'].to(device=device, dtype=torch.float)
+    for i in range(5):
+        result['P' + str(i)] = calib['P' + str(i)
+                                     ].to(device=device, dtype=torch.float)
+        result['Tr_velo_to_cam_' +
+               str(i)] = calib['Tr_velo_to_cam_' +
+                               str(i)].to(device=device, dtype=torch.float)
+    return result
+
+
+def to_numpy(tensor):
+    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
+
+
+class BackendOnnx(backend.Backend):
+    def __init__(
+        self,
+        segmentor_path,
+        lidar_detector_path,
+        data_path
+    ):
+        super(BackendOnnx, self).__init__()
+        self.segmentor_path = segmentor_path
+        self.lidar_detector_path = lidar_detector_path
+        # self.segmentation_classes = 18
+        self.detection_classes = 3
+        self.data_root = data_path
+        CLASSES = Waymo.CLASSES
+        self.LABEL2CLASSES = {v: k for k, v in CLASSES.items()}
+
+    def version(self):
+        return torch.__version__
+
+    def name(self):
+        return "python-SUT"
+
+    def load(self):
+        device = torch.device("cpu")
+        PaintArgs = namedtuple(
+            'PaintArgs', [
+                'training_path', 'model_path', 'cam_sync'])
+        painting_args = PaintArgs(
+            os.path.join(
+                self.data_root,
+                'training'),
+            self.segmentor_path,
+            False)
+        self.painter = Painter(painting_args, onnx=True)
+        self.segmentor = self.painter.model
+        model_pre = PointPillarsPre()
+        model_post = PointPillarsPos(self.detection_classes)
+        model_pre.eval()
+        model_post.eval()
+        ort_sess = ort.InferenceSession(self.lidar_detector_path)
+        self.lidar_detector = ort_sess
+        self.model_pre = model_pre
+        self.model_post = model_post
+        return self
+
+    def predict(self, inputs):
+        dimensions, locations, rotation_y, box2d, class_labels, class_scores, ids = [
+        ], [], [], [], [], [], []
+        with torch.inference_mode():
+            model_input = inputs[0]
+            batched_pts = model_input['pts']
+            scores_from_cam = []
+            for i in range(len(model_input['images'])):
+                input_image_name = self.segmentor.get_inputs()[0].name
+                input_data = {
+                    input_image_name: to_numpy(
+                        model_input['images'][i])}
+                segmentation_score = self.segmentor.run(None, input_data)
+                segmentation_score = [
+                    torch.from_numpy(item) for item in segmentation_score]
+                scores_from_cam.append(
+                    self.painter.get_score(
+                        segmentation_score[0].squeeze(0)).cpu())
+            points = self.painter.augment_lidar_class_scores_both(
+                scores_from_cam, batched_pts, model_input['calib_info'])
+            pillars, coors_batch, npoints_per_pillar = self.model_pre(batched_pts=[
+                                                                      points])
+            input_pillars_name = self.lidar_detector.get_inputs()[0].name
+            input_coors_batch_name = self.lidar_detector.get_inputs()[1].name
+            input_npoints_per_pillar_name = self.lidar_detector.get_inputs()[
+                2].name
+            input_data = {input_pillars_name: to_numpy(pillars),
+                          input_coors_batch_name: to_numpy(coors_batch),
+                          input_npoints_per_pillar_name: to_numpy(npoints_per_pillar)}
+            result = self.lidar_detector.run(None, input_data)
+            result = [torch.from_numpy(item) for item in result]
+            batch_results = self.model_post(result)
+            for j, result in enumerate(batch_results):
+                format_result = {
+                    'class': [],
+                    'truncated': [],
+                    'occluded': [],
+                    'alpha': [],
+                    'bbox': [],
+                    'dimensions': [],
+                    'location': [],
+                    'rotation_y': [],
+                    'score': [],
+                    'idx': -1
+                }
+
+                calib_info = model_input['calib_info']
+                image_info = model_input['image_info']
+                idx = model_input['image_info']['image_idx']
+                format_result['idx'] = idx
+                calib_info = change_calib_device(calib_info, False)
+                result_filter = keep_bbox_from_image_range(
+                    result, calib_info, 5, image_info, False)
+
+                lidar_bboxes = result_filter['lidar_bboxes']
+                labels, scores = result_filter['labels'], result_filter['scores']
+                bboxes2d, camera_bboxes = result_filter['bboxes2d'], result_filter['camera_bboxes']
+                for lidar_bbox, label, score, bbox2d, camera_bbox in \
+                        zip(lidar_bboxes, labels, scores, bboxes2d, camera_bboxes):
+                    format_result['class'].append(label.item())
+                    format_result['truncated'].append(0.0)
+                    format_result['occluded'].append(0)
+                    alpha = camera_bbox[6] - \
+                        np.arctan2(camera_bbox[0], camera_bbox[2])
+                    format_result['alpha'].append(alpha.item())
+                    format_result['bbox'].append(bbox2d.tolist())
+                    format_result['dimensions'].append(camera_bbox[3:6])
+                    format_result['location'].append(camera_bbox[:3])
+                    format_result['rotation_y'].append(camera_bbox[6].item())
+                    format_result['score'].append(score.item())
+
+                if len(format_result['dimensions']) > 0:
+                    format_result['dimensions'] = torch.stack(
+                        format_result['dimensions'])
+                    format_result['location'] = torch.stack(
+                        format_result['location'])
+                dimensions.append(format_result['dimensions'])
+                locations.append(format_result['location'])
+                rotation_y.append(format_result['rotation_y'])
+                class_labels.append(format_result['class'])
+                class_scores.append(format_result['score'])
+                box2d.append(format_result['bbox'])
+                ids.append(format_result['idx'])
+
+        return dimensions, locations, rotation_y, box2d, class_labels, class_scores, ids
diff --git a/automotive/3d-object-detection/dockerfile.gpu b/automotive/3d-object-detection/dockerfile.gpu
@@ -1,4 +1,4 @@
-ARG FROM_IMAGE_NAME=pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel
+ARG FROM_IMAGE_NAME=pytorch/pytorch:2.2.2-cuda11.8-cudnn8-devel
 FROM ${FROM_IMAGE_NAME}
 
 ENV DEBIAN_FRONTEND=noninteractive
@@ -20,12 +20,12 @@ RUN cd /tmp && \
     CFLAGS="-std=c++14" python setup.py install && \
     rm -rf mlperf
 
-RUN pip install tqdm
-RUN pip install numba
-RUN pip install opencv-python
-RUN pip install open3d
-RUN pip install tensorboard
-RUN pip install scikit-image
-RUN pip install ninja
-RUN pip install visdom
-RUN pip install shapely
+RUN pip install tqdm==4.65.0
+RUN pip install numba==0.60.0
+RUN pip install opencv-python==4.11.0.86
+RUN pip install open3d==0.19.0
+RUN pip install scikit-image==0.25.0
+RUN pip install ninja==1.11.1
+RUN pip install shapely==2.0.6
+RUN pip install tensorboard==2.18.0
+RUN pip install onnxruntime==1.20.1
diff --git a/automotive/3d-object-detection/main.py b/automotive/3d-object-detection/main.py
@@ -167,7 +167,9 @@ def get_backend(backend, **kwargs):
         from backend_deploy import BackendDeploy
 
         backend = BackendDeploy(**kwargs)
-
+    elif backend == 'onnx':
+        from backend_onnx import BackendOnnx
+        backend = BackendOnnx(**kwargs)
     elif backend == "debug":
         from backend_debug import BackendDebug
 
@@ -403,7 +405,6 @@ def flush_queries():
     log_settings.log_output = log_output_settings
 
     settings = lg.TestSettings()
-    settings.FromConfig(mlperf_conf, args.model_name, args.scenario)
     settings.FromConfig(user_conf, args.model_name, args.scenario)
     settings.scenario = scenario
     settings.mode = lg.TestMode.PerformanceOnly

diff --git a/automotive/3d-object-detection/model/painter.py b/automotive/3d-object-detection/model/painter.py
@@ -1,3 +1,4 @@
+import onnxruntime as ort
 import argparse
 import model.segmentation as network
 import os
@@ -34,24 +35,34 @@ def get_calib_from_file(calib_file):
     return data
 
 
+def to_numpy(tensor):
+    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
+
+
 class Painter:
-    def __init__(self, args):
+    def __init__(self, args, onnx=False):
         self.root_split_path = args.training_path
         self.save_path = os.path.join(args.training_path, "painted_lidar/")
+        self.onnx = onnx
         if not os.path.exists(self.save_path):
             os.mkdir(self.save_path)
 
         self.seg_net_index = 0
         self.model = None
         print(f'Using Segmentation Network -- deeplabv3plus')
         checkpoint_file = args.model_path
-        model = network.modeling.__dict__['deeplabv3plus_resnet50'](
-            num_classes=19, output_stride=16)
-        checkpoint = torch.load(checkpoint_file)
-        model.load_state_dict(checkpoint["model_state"])
-        model.eval()
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        model.to(device)
+        if self.onnx:
+            model = ort.InferenceSession(checkpoint_file)
+            self.input_image_name = model.get_inputs()[0].name
+        else:
+            model = network.modeling.__dict__['deeplabv3plus_resnet50'](
+                num_classes=19, output_stride=16)
+            checkpoint = torch.load(checkpoint_file)
+            model.load_state_dict(checkpoint["model_state"])
+            model.eval()
+            device = torch.device(
+                'cuda' if torch.cuda.is_available() else 'cpu')
+            model.to(device)
         self.model = model
         self.cam_sync = args.cam_sync
 

diff --git a/automotive/3d-object-detection/model/pointpillars.py b/automotive/3d-object-detection/model/pointpillars.py
@@ -397,7 +397,9 @@ def get_predicted_bboxes_single(
 
             # 3.2 nms core
             keep_inds = ml3d.ops.nms(
-                cur_bbox_pred2d, cur_bbox_cls_pred, self.nms_thr)
+                cur_bbox_pred2d.cpu(),
+                cur_bbox_cls_pred.cpu(),
+                self.nms_thr)
 
             cur_bbox_cls_pred = cur_bbox_cls_pred[keep_inds]
             cur_bbox_pred = cur_bbox_pred[keep_inds]