Deduplicate some benchmark code. (#3903)

NVIDIA · Feb 16, 2025 · 53bd0a7 · 53bd0a7
1 parent 204d795
commit 53bd0a7
Show file tree

Hide file tree

Showing 5 changed files with 114 additions and 298 deletions.
diff --git a/benchmarks/cpp/heuristic_cache.cpp b/benchmarks/cpp/heuristic_cache.cpp
@@ -24,74 +24,6 @@
 
 using namespace nvfuser;
 
-static auto getLayerBackwardNormRuntime(
-    std::unique_ptr<Fusion> fusion_ptr,
-    std::unique_ptr<FusionExecutorCache>& executor_cache,
-    std::vector<c10::IValue>& aten_inputs,
-    std::vector<int64_t>& shape,
-    std::vector<int64_t>& norm_shape) {
-  Fusion& fusion = *fusion_ptr.get();
-
-  const size_t kM = shape.size();
-  const size_t kN = norm_shape.size();
-  const size_t kOuterNumDims = kM - kN;
-
-  std::vector<int64_t> outer_shape;
-  for (size_t idx = 0; idx < kOuterNumDims; ++idx) {
-    outer_shape.push_back(shape[idx]);
-  }
-  for (size_t idx = kOuterNumDims; idx < kM; ++idx) {
-    outer_shape.push_back(1);
-  }
-
-  auto grad_out = makeSymbolicTensor(shape.size());
-  auto input = makeSymbolicTensor(shape.size());
-  auto mean = makeConcreteTensor(outer_shape);
-  auto rstd = makeConcreteTensor(outer_shape);
-  auto weight = makeSymbolicTensor(norm_shape.size());
-  auto bias = makeSymbolicTensor(norm_shape.size());
-  fusion.addInput(grad_out);
-  fusion.addInput(input);
-  fusion.addInput(mean);
-  fusion.addInput(rstd);
-  fusion.addInput(weight);
-  fusion.addInput(bias);
-
-  auto grads = layer_norm_backward(
-      grad_out,
-      input,
-      norm_shape,
-      mean,
-      rstd,
-      weight,
-      bias,
-      {true, true, true});
-
-  fusion.addOutput(grads.grad_input);
-  fusion.addOutput(grads.grad_weight);
-  fusion.addOutput(grads.grad_bias);
-
-  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
-  at::Tensor aten_grad_out = at::randn(shape, options);
-  at::Tensor aten_input = at::randn(shape, options);
-  at::Tensor aten_weight = at::randn(norm_shape, options);
-  at::Tensor aten_bias = at::randn(norm_shape, options);
-
-  const float kEps = 1e-5;
-  auto aten_results = at::native_layer_norm(
-      aten_input, norm_shape, aten_weight, aten_bias, kEps);
-  auto aten_output = std::get<0>(aten_results);
-  auto aten_mean = std::get<1>(aten_results);
-  auto aten_rstd = std::get<2>(aten_results);
-
-  executor_cache = std::make_unique<FusionExecutorCache>(std::move(fusion_ptr));
-  aten_inputs = {
-      aten_grad_out, aten_input, aten_mean, aten_rstd, aten_weight, aten_bias};
-  auto cg_outputs = executor_cache->runFusionWithInputs(aten_inputs);
-
-  return executor_cache->getMostRecentKernelRuntime();
-}
-
 static void NvFuserScheduler_LayerNormBackward_HeuristicCache(
     benchmark::State& benchmark_state) {
   std::unique_ptr<Fusion> fusion_ptr = std::make_unique<Fusion>();
@@ -117,36 +49,6 @@ static void NvFuserScheduler_LayerNormBackward_HeuristicCache(
   }
 }
 
-static auto getLayerForwardNormRuntime(
-    std::unique_ptr<Fusion> fusion_ptr,
-    std::unique_ptr<FusionExecutorCache>& executor_cache,
-    std::vector<c10::IValue>& aten_inputs,
-    std::vector<int64_t>& shape,
-    std::vector<int64_t>& norm_shape) {
-  Fusion& fusion = *fusion_ptr.get();
-
-  const float kEps = 1e-5;
-  Val* eps_ptr = IrBuilder::create<Val>(kEps);
-
-  auto input = makeSymbolicTensor(shape.size());
-  fusion.addInput(input);
-
-  auto result = layer_norm(input, norm_shape, nullptr, nullptr, eps_ptr);
-
-  fusion.addOutput(result.output);
-  fusion.addOutput(result.mean);
-  fusion.addOutput(result.invstd);
-
-  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
-  at::Tensor aten_input = at::randn(shape, options);
-
-  executor_cache = std::make_unique<FusionExecutorCache>(std::move(fusion_ptr));
-  aten_inputs = {aten_input};
-  auto cg_outputs = executor_cache->runFusionWithInputs(aten_inputs);
-
-  return executor_cache->getMostRecentKernelRuntime();
-}
-
 static void NvFuserScheduler_LayerNormForward_HeuristicCache(
     benchmark::State& benchmark_state) {
   std::unique_ptr<Fusion> fusion_ptr = std::make_unique<Fusion>();

diff --git a/benchmarks/cpp/heuristic_lookup.cpp b/benchmarks/cpp/heuristic_lookup.cpp
@@ -24,76 +24,6 @@
 
 using namespace nvfuser;
 
-static auto getLayerBackwardNormRuntime(
-    std::unique_ptr<Fusion> fusion_ptr,
-    std::unique_ptr<FusionExecutorCache>& executor_cache,
-    std::vector<c10::IValue>& aten_inputs,
-    std::vector<int64_t>& shape,
-    std::vector<int64_t>& norm_shape) {
-  Fusion& fusion = *fusion_ptr.get();
-
-  const size_t kM = shape.size();
-  const size_t kN = norm_shape.size();
-  const size_t kOuterNumDims = kM - kN;
-
-  std::vector<int64_t> outer_shape;
-  for (size_t idx = 0; idx < kOuterNumDims; ++idx) {
-    outer_shape.push_back(shape[idx]);
-  }
-  for (size_t idx = kOuterNumDims; idx < kM; ++idx) {
-    outer_shape.push_back(1);
-  }
-
-  auto grad_out = makeSymbolicTensor(shape.size());
-  auto input = makeSymbolicTensor(shape.size());
-  auto mean = makeConcreteTensor(outer_shape);
-  auto rstd = makeConcreteTensor(outer_shape);
-  auto weight = makeSymbolicTensor(norm_shape.size());
-  auto bias = makeSymbolicTensor(norm_shape.size());
-  fusion.addInput(grad_out);
-  fusion.addInput(input);
-  fusion.addInput(mean);
-  fusion.addInput(rstd);
-  fusion.addInput(weight);
-  fusion.addInput(bias);
-
-  auto grads = layer_norm_backward(
-      grad_out,
-      input,
-      norm_shape,
-      mean,
-      rstd,
-      weight,
-      bias,
-      {true, true, true});
-
-  fusion.addOutput(grads.grad_input);
-  fusion.addOutput(grads.grad_weight);
-  fusion.addOutput(grads.grad_bias);
-
-  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
-  at::Tensor aten_grad_out = at::randn(shape, options);
-  at::Tensor aten_input = at::randn(shape, options);
-  at::Tensor aten_weight = at::randn(norm_shape, options);
-  at::Tensor aten_bias = at::randn(norm_shape, options);
-  auto at_weight = c10::optional<at::Tensor>(aten_weight);
-  auto at_bias = c10::optional<at::Tensor>(aten_bias);
-
-  const float kEps = 1e-5;
-  auto aten_results =
-      at::native_layer_norm(aten_input, norm_shape, at_weight, at_bias, kEps);
-  auto aten_output = std::get<0>(aten_results);
-  auto aten_mean = std::get<1>(aten_results);
-  auto aten_rstd = std::get<2>(aten_results);
-
-  executor_cache = std::make_unique<FusionExecutorCache>(std::move(fusion_ptr));
-  aten_inputs = {
-      aten_grad_out, aten_input, aten_mean, aten_rstd, aten_weight, aten_bias};
-  auto cg_outputs = executor_cache->runFusionWithInputs(aten_inputs);
-
-  return executor_cache->getMostRecentKernelRuntime();
-}
-
 static void NvFuserScheduler_LayerNormBackward_HeuristicLookup(
     benchmark::State& benchmark_state) {
   std::unique_ptr<Fusion> fusion_ptr = std::make_unique<Fusion>();
@@ -119,36 +49,6 @@ static void NvFuserScheduler_LayerNormBackward_HeuristicLookup(
   }
 }
 
-static auto getLayerForwardNormRuntime(
-    std::unique_ptr<Fusion> fusion_ptr,
-    std::unique_ptr<FusionExecutorCache>& executor_cache,
-    std::vector<c10::IValue>& aten_inputs,
-    std::vector<int64_t>& shape,
-    std::vector<int64_t>& norm_shape) {
-  Fusion& fusion = *fusion_ptr.get();
-
-  const float kEps = 1e-5;
-  Val* eps_ptr = IrBuilder::create<Val>(kEps);
-
-  auto input = makeSymbolicTensor(shape.size());
-  fusion.addInput(input);
-
-  auto result = layer_norm(input, norm_shape, nullptr, nullptr, eps_ptr);
-
-  fusion.addOutput(result.output);
-  fusion.addOutput(result.mean);
-  fusion.addOutput(result.invstd);
-
-  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
-  at::Tensor aten_input = at::randn(shape, options);
-
-  executor_cache = std::make_unique<FusionExecutorCache>(std::move(fusion_ptr));
-  aten_inputs = {aten_input};
-  auto cg_outputs = executor_cache->runFusionWithInputs(aten_inputs);
-
-  return executor_cache->getMostRecentKernelRuntime();
-}
-
 static void NvFuserScheduler_LayerNormForward_HeuristicLookup(
     benchmark::State& benchmark_state) {
   std::unique_ptr<Fusion> fusion_ptr = std::make_unique<Fusion>();

diff --git a/benchmarks/cpp/shape_inference.cpp b/benchmarks/cpp/shape_inference.cpp
@@ -24,76 +24,6 @@
 
 using namespace nvfuser;
 
-static auto getLayerBackwardNormRuntime(
-    std::unique_ptr<Fusion> fusion_ptr,
-    std::unique_ptr<FusionExecutorCache>& executor_cache,
-    std::vector<c10::IValue>& aten_inputs,
-    std::vector<int64_t>& shape,
-    std::vector<int64_t>& norm_shape) {
-  Fusion& fusion = *fusion_ptr.get();
-
-  const size_t kM = shape.size();
-  const size_t kN = norm_shape.size();
-  const size_t kOuterNumDims = kM - kN;
-
-  std::vector<int64_t> outer_shape;
-  for (size_t idx = 0; idx < kOuterNumDims; ++idx) {
-    outer_shape.push_back(shape[idx]);
-  }
-  for (size_t idx = kOuterNumDims; idx < kM; ++idx) {
-    outer_shape.push_back(1);
-  }
-
-  auto grad_out = makeSymbolicTensor(shape.size());
-  auto input = makeSymbolicTensor(shape.size());
-  auto mean = makeConcreteTensor(outer_shape);
-  auto rstd = makeConcreteTensor(outer_shape);
-  auto weight = makeSymbolicTensor(norm_shape.size());
-  auto bias = makeSymbolicTensor(norm_shape.size());
-  fusion.addInput(grad_out);
-  fusion.addInput(input);
-  fusion.addInput(mean);
-  fusion.addInput(rstd);
-  fusion.addInput(weight);
-  fusion.addInput(bias);
-
-  auto grads = layer_norm_backward(
-      grad_out,
-      input,
-      norm_shape,
-      mean,
-      rstd,
-      weight,
-      bias,
-      {true, true, true});
-
-  fusion.addOutput(grads.grad_input);
-  fusion.addOutput(grads.grad_weight);
-  fusion.addOutput(grads.grad_bias);
-
-  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
-  at::Tensor aten_grad_out = at::randn(shape, options);
-  at::Tensor aten_input = at::randn(shape, options);
-  at::Tensor aten_weight = at::randn(norm_shape, options);
-  at::Tensor aten_bias = at::randn(norm_shape, options);
-  auto at_weight = c10::optional<at::Tensor>(aten_weight);
-  auto at_bias = c10::optional<at::Tensor>(aten_bias);
-
-  const float kEps = 1e-5;
-  auto aten_results =
-      at::native_layer_norm(aten_input, norm_shape, at_weight, at_bias, kEps);
-  auto aten_output = std::get<0>(aten_results);
-  auto aten_mean = std::get<1>(aten_results);
-  auto aten_rstd = std::get<2>(aten_results);
-
-  executor_cache = std::make_unique<FusionExecutorCache>(std::move(fusion_ptr));
-  aten_inputs = {
-      aten_grad_out, aten_input, aten_mean, aten_rstd, aten_weight, aten_bias};
-  auto cg_outputs = executor_cache->runFusionWithInputs(aten_inputs);
-
-  return executor_cache->getMostRecentKernelRuntime();
-}
-
 void LayerNormBackward_ShapeInference_Base(
     benchmark::State& benchmark_state,
     bool disable_launch_parameter_cache) {
@@ -137,36 +67,6 @@ static void NvFuserScheduler_LayerNormBackward_NoShapeInferenceCachedBaseline(
   LayerNormBackward_ShapeInference_Base(benchmark_state, false);
 }
 
-static auto getLayerForwardNormRuntime(
-    std::unique_ptr<Fusion> fusion_ptr,
-    std::unique_ptr<FusionExecutorCache>& executor_cache,
-    std::vector<c10::IValue>& aten_inputs,
-    std::vector<int64_t>& shape,
-    std::vector<int64_t>& norm_shape) {
-  Fusion& fusion = *fusion_ptr.get();
-
-  const float kEps = 1e-5;
-  Val* eps_ptr = IrBuilder::create<Val>(kEps);
-
-  auto input = makeSymbolicTensor(shape.size());
-  fusion.addInput(input);
-
-  auto result = layer_norm(input, norm_shape, nullptr, nullptr, eps_ptr);
-
-  fusion.addOutput(result.output);
-  fusion.addOutput(result.mean);
-  fusion.addOutput(result.invstd);
-
-  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
-  at::Tensor aten_input = at::randn(shape, options);
-
-  executor_cache = std::make_unique<FusionExecutorCache>(std::move(fusion_ptr));
-  aten_inputs = {aten_input};
-  auto cg_outputs = executor_cache->runFusionWithInputs(aten_inputs);
-
-  return executor_cache->getMostRecentKernelRuntime();
-}
-
 void LayerNormForward_ShapeInferenceBase(
     benchmark::State& benchmark_state,
     bool disable_launch_param_cache) {