Skip to content

Commit

Permalink
Align paddle inference configs with those of FD (#2436)
Browse files Browse the repository at this point in the history
  • Loading branch information
Bobholamovic authored Nov 8, 2024
1 parent 3744b71 commit f8357ca
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 44 deletions.
94 changes: 51 additions & 43 deletions paddlex/inference/components/paddle_predictor/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,44 @@ def _create(self):
params_file = (self.model_dir / f"{self.model_prefix}.pdiparams").as_posix()
config = Config(model_file, params_file)

config.enable_memory_optim()
if self.option.device in ("gpu", "dcu"):
config.enable_use_gpu(200, self.option.device_id)
if hasattr(config, "enable_new_ir"):
config.enable_new_ir(self.option.enable_new_ir)
if self.option.device == "gpu":
config.exp_disable_mixed_precision_ops({"feed", "fetch"})
config.enable_use_gpu(100, self.option.device_id)
if self.option.device == "gpu":
# NOTE: The pptrt settings are not aligned with those of FD.
precision_map = {
"trt_int8": Config.Precision.Int8,
"trt_fp32": Config.Precision.Float32,
"trt_fp16": Config.Precision.Half,
}
if self.option.run_mode in precision_map.keys():
config.enable_tensorrt_engine(
workspace_size=(1 << 25) * self.option.batch_size,
max_batch_size=self.option.batch_size,
min_subgraph_size=self.option.min_subgraph_size,
precision_mode=precision_map[self.option.run_mode],
use_static=self.option.trt_use_static,
use_calib_mode=self.option.trt_calib_mode,
)

if self.option.shape_info_filename is not None:
if not os.path.exists(self.option.shape_info_filename):
config.collect_shape_range_info(
self.option.shape_info_filename
)
logging.info(
f"Dynamic shape info is collected into: {self.option.shape_info_filename}"
)
else:
logging.info(
f"A dynamic shape info file ( {self.option.shape_info_filename} ) already exists. \
No need to generate again."
)
config.enable_tuned_tensorrt_dynamic_shape(
self.option.shape_info_filename, True
)
elif self.option.device == "npu":
config.enable_custom_device("npu")
elif self.option.device == "xpu":
Expand All @@ -86,53 +120,32 @@ def _create(self):
else:
assert self.option.device == "cpu"
config.disable_gpu()
if hasattr(config, "enable_new_ir"):
config.enable_new_ir(self.option.enable_new_ir)
if hasattr(config, "enable_new_executor"):
config.enable_new_executor(True)
if "mkldnn" in self.option.run_mode:
try:
config.enable_mkldnn()
config.set_cpu_math_library_num_threads(self.option.cpu_threads)
if "bf16" in self.option.run_mode:
config.enable_mkldnn_bfloat16()
except Exception as e:
logging.warning(
"MKL-DNN is not available. We will disable MKL-DNN."
)

precision_map = {
"trt_int8": Config.Precision.Int8,
"trt_fp32": Config.Precision.Float32,
"trt_fp16": Config.Precision.Half,
}
if self.option.run_mode in precision_map.keys():
config.enable_tensorrt_engine(
workspace_size=(1 << 25) * self.option.batch_size,
max_batch_size=self.option.batch_size,
min_subgraph_size=self.option.min_subgraph_size,
precision_mode=precision_map[self.option.run_mode],
use_static=self.option.trt_use_static,
use_calib_mode=self.option.trt_calib_mode,
)

if self.option.shape_info_filename is not None:
if not os.path.exists(self.option.shape_info_filename):
config.collect_shape_range_info(self.option.shape_info_filename)
logging.info(
f"Dynamic shape info is collected into: {self.option.shape_info_filename}"
)
else:
logging.info(
f"A dynamic shape info file ( {self.option.shape_info_filename} ) already exists. \
No need to generate again."
)
config.enable_tuned_tensorrt_dynamic_shape(
self.option.shape_info_filename, True
)
config.set_mkldnn_cache_capacity(-1)
else:
if hasattr(config, "disable_mkldnn"):
config.disable_mkldnn()

# Disable paddle inference logging
config.disable_glog_info()

config.set_cpu_math_library_num_threads(self.option.cpu_threads)

if not (self.option.device == "gpu" and self.option.run_mode.startswith("trt")):
if hasattr(config, "enable_new_ir"):
config.enable_new_ir(self.option.enable_new_ir)
if hasattr(config, "enable_new_executor"):
config.enable_new_executor()
config.set_optimization_level(3)

for del_p in self.option.delete_pass:
config.delete_pass(del_p)

Expand All @@ -142,11 +155,6 @@ def _create(self):
config.delete_pass("conv2d_add_act_fuse_pass")
config.delete_pass("conv2d_add_fuse_pass")

# Enable shared memory
config.enable_memory_optim()
config.switch_ir_optim(True)
# Disable feed, fetch OP, needed by zero_copy_run
config.switch_use_feed_fetch_ops(False)
predictor = create_predictor(config)

# Get input and output handlers
Expand Down
3 changes: 2 additions & 1 deletion paddlex/inference/utils/pp_option.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
class PaddlePredictorOption(object):
"""Paddle Inference Engine Option"""

# NOTE: TRT modes start with `trt_`
SUPPORT_RUN_MODE = (
"paddle",
"trt_fp32",
Expand Down Expand Up @@ -69,7 +70,7 @@ def _get_default_config(self):
"min_subgraph_size": 3,
"shape_info_filename": None,
"trt_calib_mode": False,
"cpu_threads": 1,
"cpu_threads": 8,
"trt_use_static": False,
"delete_pass": [],
"enable_new_ir": True if self.model_name not in NEWIR_BLOCKLIST else False,
Expand Down

0 comments on commit f8357ca

Please sign in to comment.