A bunch more moving over to nonnegative_int

flexflow · Jan 28, 2025 · 3728251 · 3728251
1 parent f8df37e
commit 3728251
Show file tree

Hide file tree

Showing 262 changed files with 3,436 additions and 3,112 deletions.
diff --git a/cmake/flexflow-utils.cmake b/cmake/flexflow-utils.cmake
@@ -20,6 +20,7 @@ function(define_ff_vars target)
     MAX_TENSOR_DIM=${FF_MAX_DIM}
     MAX_NUM_TASK_REGIONS=${FF_MAX_NUM_TASK_REGIONS}
     MAX_NUM_TASK_ARGUMENTS=${FF_MAX_NUM_TASK_ARGUMENTS}
+    # _FORTIFY_SOURCE=0
     )
 
   if (FF_GPU_BACKEND STREQUAL "cuda")
@@ -39,7 +40,18 @@ function(ff_set_cxx_properties target)
       CXX_EXTENSIONS NO
   )
   target_compile_options(${target}
-    PRIVATE $<$<COMPILE_LANGUAGE:CXX>:> "-ffile-prefix-map=${CMAKE_SOURCE_DIR}=." # add C++ compile flags here
+    PUBLIC 
+    $<$<COMPILE_LANGUAGE:CXX>:> 
+    "-ffile-prefix-map=${CMAKE_SOURCE_DIR}=." 
+    "-fsanitize=undefined" 
+    "-fno-sanitize-recover=all"
+    # add C++ compile flags here
+  )
+  target_link_options(${target}
+    PUBLIC 
+    $<$<COMPILE_LANGUAGE:CXX>:> 
+    "-fsanitize=undefined" 
+    "-fno-sanitize-recover=all"
   )
 endfunction()
 

diff --git a/flake.nix b/flake.nix
@@ -38,9 +38,15 @@
       };
       lib = pkgs.lib;
 
-      mkShell = pkgs.mkShell.override {
+      mkShell = attrs: pkgs.mkShell.override {
         stdenv = pkgs.cudaPackages.backendStdenv;
-      };
+      } (attrs // {
+        hardeningDisable = ["all"]; # disable nixpkgs default compiler arguments, otherwise ubsan doesn't catch 
+                                    # signed overflows due to the signedoverflow hardening setting. 
+                                    # for more details, see the following (long-running) nixpkgs github issues: 
+                                    # - https://github.com/NixOS/nixpkgs/issues/18995
+                                    # - https://github.com/NixOS/nixpkgs/issues/60919
+      });
 
       proj = proj-repo.packages.${system}.proj;
     in 
@@ -121,6 +127,8 @@
 
         gpu-ci = mkShell {
           inputsFrom = [ ci ];
+          hardeningDisable = [ "all" ];
+
           buildInputs = builtins.concatLists [
             (with nixGL.packages.${system}; [
               nixGLDefault
@@ -135,6 +143,8 @@
             "${proj-repo.packages.${system}.proj-nvim}"
           ];
 
+          hardeningDisable = [ "all" ];
+
           buildInputs = builtins.concatLists [
             (with pkgs; [
               clang-tools

diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc
@@ -17,6 +17,8 @@
 #include "utils/containers/unordered_multiset_of.h"
 #include "utils/containers/unordered_set_of.h"
 #include "utils/containers/zip.h"
+#include "utils/nonnegative_int/nonnegative_range.h"
+#include "utils/nonnegative_int/num_elements.h"
 #include "utils/overload.h"
 
 namespace FlexFlow {
@@ -47,24 +49,25 @@ static std::unordered_set<MachineView>
                                 OperatorTaskSpace const &task,
                                 DeviceType const &device_type) {
 
-  auto get_max_stride_upper_bound = [](std::vector<int> const &tensor_dims,
-                                       int total_devices) -> int {
-    int min_num_devices_with_full_stride_volume = product(transform(
-        tensor_dims, [](int const &num_devices) { return num_devices - 1; }));
-    return std::ceil(total_devices / min_num_devices_with_full_stride_volume);
+  auto get_max_stride_upper_bound = [](std::vector<nonnegative_int> const &tensor_dims,
+                                       nonnegative_int total_devices) -> nonnegative_int {
+    nonnegative_int min_num_devices_with_full_stride_volume = product(transform(
+        tensor_dims, [](nonnegative_int num_devices) { return nonnegative_int{num_devices.value() - 1}; }));
+    return nonnegative_int{TODO colin
+      static_cast<int>(std::ceil(static_cast<float>(total_devices.value()) / min_num_devices_with_full_stride_volume.value()))};
   };
 
-  auto candidate_strides = [&](std::vector<int> const &tensor_dims,
-                               int total_devices)
+  auto candidate_strides = [&](std::vector<nonnegative_int> const &tensor_dims,
+                               nonnegative_int total_devices)
       -> std::unordered_multiset<MultiDimensionalStride> {
-    int max_stride_upper_bound =
+    nonnegative_int max_stride_upper_bound =
         get_max_stride_upper_bound(tensor_dims, total_devices);
 
     std::vector<stride_t> single_stride_range =
-        transform(range(1, max_stride_upper_bound + 1),
-                  [](int stride) { return stride_t{stride}; });
+        transform(nonnegative_range(1_n, max_stride_upper_bound + 1_n),
+                  [](nonnegative_int stride) { return stride_t{stride}; });
     std::unordered_multiset<std::vector<stride_t>> raw_stride_vectors =
-        cartesian_product(replicate(nonnegative_int{tensor_dims.size()},
+        cartesian_product(replicate(num_elements(tensor_dims),
                                     single_stride_range));
     std::unordered_multiset<MultiDimensionalStride> strides =
         transform(raw_stride_vectors, [](auto const &stride_vec) {
@@ -76,8 +79,8 @@ static std::unordered_set<MachineView>
   auto candidate_starts = [](MachineSpecification const &ms,
                              DeviceType const &device_type) {
     std::unordered_set<MachineSpaceCoordinate> result;
-    for (int node_idx : range(ms.num_nodes)) {
-      for (int device_idx : range(get_num_devices_per_node(ms, device_type))) {
+    for (nonnegative_int node_idx : nonnegative_range(ms.num_nodes)) {
+      for (nonnegative_int device_idx : nonnegative_range(get_num_devices_per_node(ms, device_type))) {
         result.insert(
             MachineSpaceCoordinate{node_idx, device_idx, device_type});
       }
@@ -92,8 +95,8 @@ static std::unordered_set<MachineView>
     return get_all_permutations_with_repetition(options, num_dims(task));
   };
 
-  std::vector<int> tensor_dims = task.degrees;
-  int total_devices = get_num_devices(machine_spec, device_type);
+  std::vector<nonnegative_int> tensor_dims = task.degrees;
+  nonnegative_int total_devices = get_num_devices(machine_spec, device_type);
 
   std::unordered_set<MachineView> machine_views;
 

diff --git a/lib/compiler/test/src/allowed_machine_views.cc b/lib/compiler/test/src/allowed_machine_views.cc
@@ -15,39 +15,39 @@ TEST_SUITE(FF_TEST_SUITE) {
 
     SUBCASE("1 degree of parallelism") {
       MachineSpecification ms = MachineSpecification{
-          /*num_nodes=*/1,
-          /*num_cpus_per_node=*/5,
-          /*num_gpus_per_node=*/5,
+          /*num_nodes=*/1_n,
+          /*num_cpus_per_node=*/5_n,
+          /*num_gpus_per_node=*/5_n,
           /*inter_node_bandwidth=*/0,
           /*intra_node_bandwidth=*/0,
       };
 
-      OperatorTaskSpace task = OperatorTaskSpace{{3}};
+      OperatorTaskSpace task = OperatorTaskSpace{{3_n}};
 
       std::unordered_set<MachineView> correct = {
           MachineView{
               MachineSpaceCoordinate{
-                  /*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU},
-              {MachineViewDimension{stride_t{1},
+                  /*node_idx=*/0_n, /*device_idx=*/0_n, DeviceType::GPU},
+              {MachineViewDimension{stride_t{1_n},
                                     MachineSpecificationDimension::INTRA_NODE}},
           },
 
           MachineView{
               MachineSpaceCoordinate{
-                  /*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU},
-              {MachineViewDimension{stride_t{1},
+                  /*node_idx=*/0_n, /*device_idx=*/1_n, DeviceType::GPU},
+              {MachineViewDimension{stride_t{1_n},
                                     MachineSpecificationDimension::INTRA_NODE}},
           },
           MachineView{
               MachineSpaceCoordinate{
-                  /*node_idx=*/0, /*device_idx=*/2, DeviceType::GPU},
-              {MachineViewDimension{stride_t{1},
+                  /*node_idx=*/0_n, /*device_idx=*/2_n, DeviceType::GPU},
+              {MachineViewDimension{stride_t{1_n},
                                     MachineSpecificationDimension::INTRA_NODE}},
           },
           MachineView{
               MachineSpaceCoordinate{
-                  /*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU},
-              {MachineViewDimension{stride_t{2},
+                  /*node_idx=*/0_n, /*device_idx=*/0_n, DeviceType::GPU},
+              {MachineViewDimension{stride_t{2_n},
                                     MachineSpecificationDimension::INTRA_NODE}},
           },
       };
@@ -61,18 +61,18 @@ TEST_SUITE(FF_TEST_SUITE) {
     SUBCASE("2 degrees of parallelism") {
 
       MachineSpecification ms = MachineSpecification{
-          /*num_nodes=*/3,
-          /*num_cpus_per_node=*/3,
-          /*num_gpus_per_node=*/3,
+          /*num_nodes=*/3_n,
+          /*num_cpus_per_node=*/3_n,
+          /*num_gpus_per_node=*/3_n,
           /*inter_node_bandwidth=*/0,
           /*intra_node_bandwidth=*/0,
       };
-      OperatorTaskSpace task = OperatorTaskSpace{{2, 3}};
+      OperatorTaskSpace task = OperatorTaskSpace{{2_n, 3_n}};
 
-      auto make_2d_view = [&](int start_node_idx,
-                              int start_device_idx,
-                              int stride1,
-                              int stride2,
+      auto make_2d_view = [&](nonnegative_int start_node_idx,
+                              nonnegative_int start_device_idx,
+                              nonnegative_int stride1,
+                              nonnegative_int stride2,
                               MachineSpecificationDimension m1,
                               MachineSpecificationDimension m2) {
         return MachineView{
@@ -86,13 +86,13 @@ TEST_SUITE(FF_TEST_SUITE) {
       auto intra = MachineSpecificationDimension::INTRA_NODE;
       auto inter = MachineSpecificationDimension::INTER_NODE;
       std::unordered_set<MachineView> correct = {
-          make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, inter, intra),
-          make_2d_view(1, 0, /*stride1=*/1, /*stride2=*/1, inter, intra),
-          make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, inter, intra),
+          make_2d_view(0_n, 0_n, /*stride1=*/1_n, /*stride2=*/1_n, inter, intra),
+          make_2d_view(1_n, 0_n, /*stride1=*/1_n, /*stride2=*/1_n, inter, intra),
+          make_2d_view(0_n, 0_n, /*stride1=*/2_n, /*stride2=*/1_n, inter, intra),
 
-          make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, intra, inter),
-          make_2d_view(0, 1, /*stride1=*/1, /*stride2=*/1, intra, inter),
-          make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, intra, inter),
+          make_2d_view(0_n, 0_n, /*stride1=*/1_n, /*stride2=*/1_n, intra, inter),
+          make_2d_view(0_n, 1_n, /*stride1=*/1_n, /*stride2=*/1_n, intra, inter),
+          make_2d_view(0_n, 0_n, /*stride1=*/2_n, /*stride2=*/1_n, intra, inter),
       };
 
       std::unordered_set<MachineView> result =

diff --git a/lib/kernels/src/legion_dim.cc b/lib/kernels/src/legion_dim.cc
@@ -7,7 +7,7 @@ legion_dim_t add_to_legion_dim(legion_dim_t legion_dim, int value) {
 }
 
 legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, int num_dimensions) {
-  return legion_dim_t(num_dimensions - ff_dim.value.get_value() - 1);
+  return legion_dim_t(num_dimensions - ff_dim.value.value() - 1);
 }
 
 } // namespace FlexFlow
diff --git a/lib/local-execution/src/legion_tensor_shape.cc b/lib/local-execution/src/legion_tensor_shape.cc
@@ -5,7 +5,7 @@
 namespace FlexFlow {
 
 legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, size_t num_dims) {
-  return legion_dim_t(num_dims - ff_dim.value.get_value() - 1);
+  return legion_dim_t(num_dims - ff_dim.value.value() - 1);
 }
 
 legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, TensorShape const &shape) {

diff --git a/lib/local-execution/src/ops/batch_matmul.h b/lib/local-execution/src/ops/batch_matmul.h
@@ -4,7 +4,7 @@
 #include "local-execution/op_task_invocation.h"
 #include "local-execution/op_task_signature.h"
 #include "local-execution/sim_environment.h"
-#include "op-attrs/ops/batch_matmul.dtg.h"
+#include "op-attrs/ops/batch_matmul_attrs.dtg.h"
 
 namespace FlexFlow {
 

diff --git a/lib/local-execution/src/ops/reverse.cc b/lib/local-execution/src/ops/reverse.cc
@@ -79,7 +79,7 @@ static std::optional<float>
   auto output_grad = acc.get_tensor_grad<Permissions::RO>(OUTPUT);
   auto attrs = acc.get_argument<ReverseAttrs>(ATTRS);
 
-  int axis = input_grad.shape.get_dim() - attrs.axis.value.get_value() - 1;
+  int axis = input_grad.shape.get_dim() - attrs.axis.value.value() - 1;
   coord_t in_blk_size = 1, reverse_dim_size = 1, num_out_blks = 1;
   for (int i = 0; i < input_grad.shape.get_dim(); i++) {
     if (i < axis) {

diff --git a/lib/local-execution/src/ops/softmax.cc b/lib/local-execution/src/ops/softmax.cc
@@ -66,7 +66,7 @@ static DeviceSpecificDeviceStates
 
   SoftmaxPerDeviceState per_device_state =
       init_kernel(handle,
-                  attrs.dim.value.get_value(),
+                  attrs.dim.value.value(),
                   output_n,
                   output_c,
                   output_h,

diff --git a/lib/local-execution/src/ops/split.cc b/lib/local-execution/src/ops/split.cc
@@ -51,7 +51,7 @@ void calc_block_size(coord_t &num_blocks,
   num_blocks = 1;
   block_size = 1;
   for (int d = 0; d < array_shape.num_elements(); d++) {
-    if (d <= axis.value.get_value()) {
+    if (d <= axis.value.value()) {
       block_size *= array_shape.at(legion_dim_t(d));
     } else {
       num_blocks *= array_shape.at(legion_dim_t(d));

diff --git a/lib/models/include/models/bert/bert_config.struct.toml b/lib/models/include/models/bert/bert_config.struct.toml
@@ -12,27 +12,28 @@ features = [
 
 includes = [
   "op-attrs/activation.dtg.h",
+  "utils/nonnegative_int/nonnegative_int.h",
 ]
 
 [[fields]]
 name = "vocab_size"
-type = "size_t"
+type = "::FlexFlow::nonnegative_int"
 
 [[fields]]
 name = "hidden_size"
-type = "size_t"
+type = "::FlexFlow::nonnegative_int"
 
 [[fields]]
 name = "num_encoder_layers"
-type = "size_t"
+type = "::FlexFlow::nonnegative_int"
 
 [[fields]]
 name = "num_heads"
-type = "size_t"
+type = "::FlexFlow::nonnegative_int"
 
 [[fields]]
 name = "dim_feedforward"
-type = "size_t"
+type = "::FlexFlow::nonnegative_int"
 
 [[fields]]
 name = "hidden_act"
@@ -64,8 +65,8 @@ type = "float"
 
 [[fields]]
 name = "sequence_length"
-type = "size_t"
+type = "::FlexFlow::nonnegative_int"
 
 [[fields]]
 name = "batch_size"
-type = "size_t"
+type = "::FlexFlow::nonnegative_int"
diff --git a/lib/models/include/models/candle_uno/candle_uno_config.struct.toml b/lib/models/include/models/candle_uno/candle_uno_config.struct.toml
@@ -14,6 +14,7 @@ includes = [
   "<vector>",
   "<map>",
   "<string>",
+  "utils/nonnegative_int/nonnegative_int.h",
 ]
 
 src_includes = [
@@ -25,19 +26,19 @@ src_includes = [
 
 [[fields]]
 name = "batch_size"
-type = "size_t"
+type = "::FlexFlow::nonnegative_int"
 
 [[fields]]
 name = "dense_layers"
-type = "std::vector<int>"
+type = "std::vector<::FlexFlow::nonnegative_int>"
 
 [[fields]]
 name = "dense_feature_layers"
-type = "std::vector<int>"
+type = "std::vector<::FlexFlow::nonnegative_int>"
 
 [[fields]]
 name = "feature_shapes"
-type = "std::map<std::string, int>"
+type = "std::map<std::string, ::FlexFlow::nonnegative_int>"
 
 [[fields]]
 name = "input_features"

diff --git a/lib/models/include/models/inception_v3/inception_v3_config.struct.toml b/lib/models/include/models/inception_v3/inception_v3_config.struct.toml
@@ -10,13 +10,17 @@ features = [
   "fmt",
 ]
 
+includes = [
+  "utils/nonnegative_int/nonnegative_int.h",
+]
+
 [[fields]]
 name = "num_classes"
-type = "int"
+type = "::FlexFlow::nonnegative_int"
 
 [[fields]]
 name = "batch_size"
-type = "int"
+type = "::FlexFlow::nonnegative_int"
 
 [[fields]]
 name = "aux_logits"

diff --git a/lib/models/include/models/split_test/split_test.h b/lib/models/include/models/split_test/split_test.h
@@ -12,7 +12,7 @@ namespace FlexFlow {
  * @note This is a tiny model developed for testing the original Unity
  * implementation. It is not a "real" model and has never been trained.
  */
-ComputationGraph get_split_test_computation_graph(int batch_size);
+ComputationGraph get_split_test_computation_graph(nonnegative_int batch_size);
 
 } // namespace FlexFlow