Skip to content

Commit

Permalink
A bunch more moving over to nonnegative_int
Browse files Browse the repository at this point in the history
  • Loading branch information
lockshaw committed Jan 28, 2025
1 parent f8df37e commit 3728251
Show file tree
Hide file tree
Showing 262 changed files with 3,436 additions and 3,112 deletions.
14 changes: 13 additions & 1 deletion cmake/flexflow-utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ function(define_ff_vars target)
MAX_TENSOR_DIM=${FF_MAX_DIM}
MAX_NUM_TASK_REGIONS=${FF_MAX_NUM_TASK_REGIONS}
MAX_NUM_TASK_ARGUMENTS=${FF_MAX_NUM_TASK_ARGUMENTS}
# _FORTIFY_SOURCE=0
)

if (FF_GPU_BACKEND STREQUAL "cuda")
Expand All @@ -39,7 +40,18 @@ function(ff_set_cxx_properties target)
CXX_EXTENSIONS NO
)
target_compile_options(${target}
PRIVATE $<$<COMPILE_LANGUAGE:CXX>:> "-ffile-prefix-map=${CMAKE_SOURCE_DIR}=." # add C++ compile flags here
PUBLIC
$<$<COMPILE_LANGUAGE:CXX>:>
"-ffile-prefix-map=${CMAKE_SOURCE_DIR}=."
"-fsanitize=undefined"
"-fno-sanitize-recover=all"
# add C++ compile flags here
)
target_link_options(${target}
PUBLIC
$<$<COMPILE_LANGUAGE:CXX>:>
"-fsanitize=undefined"
"-fno-sanitize-recover=all"
)
endfunction()

Expand Down
14 changes: 12 additions & 2 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,15 @@
};
lib = pkgs.lib;

mkShell = pkgs.mkShell.override {
mkShell = attrs: pkgs.mkShell.override {
stdenv = pkgs.cudaPackages.backendStdenv;
};
} (attrs // {
hardeningDisable = ["all"]; # disable nixpkgs default compiler arguments, otherwise ubsan doesn't catch
# signed overflows due to the signedoverflow hardening setting.
# for more details, see the following (long-running) nixpkgs github issues:
# - https://github.com/NixOS/nixpkgs/issues/18995
# - https://github.com/NixOS/nixpkgs/issues/60919
});

proj = proj-repo.packages.${system}.proj;
in
Expand Down Expand Up @@ -121,6 +127,8 @@

gpu-ci = mkShell {
inputsFrom = [ ci ];
hardeningDisable = [ "all" ];

buildInputs = builtins.concatLists [
(with nixGL.packages.${system}; [
nixGLDefault
Expand All @@ -135,6 +143,8 @@
"${proj-repo.packages.${system}.proj-nvim}"
];

hardeningDisable = [ "all" ];

buildInputs = builtins.concatLists [
(with pkgs; [
clang-tools
Expand Down
33 changes: 18 additions & 15 deletions lib/compiler/src/compiler/allowed_machine_views.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "utils/containers/unordered_multiset_of.h"
#include "utils/containers/unordered_set_of.h"
#include "utils/containers/zip.h"
#include "utils/nonnegative_int/nonnegative_range.h"
#include "utils/nonnegative_int/num_elements.h"
#include "utils/overload.h"

namespace FlexFlow {
Expand Down Expand Up @@ -47,24 +49,25 @@ static std::unordered_set<MachineView>
OperatorTaskSpace const &task,
DeviceType const &device_type) {

auto get_max_stride_upper_bound = [](std::vector<int> const &tensor_dims,
int total_devices) -> int {
int min_num_devices_with_full_stride_volume = product(transform(
tensor_dims, [](int const &num_devices) { return num_devices - 1; }));
return std::ceil(total_devices / min_num_devices_with_full_stride_volume);
auto get_max_stride_upper_bound = [](std::vector<nonnegative_int> const &tensor_dims,
nonnegative_int total_devices) -> nonnegative_int {
nonnegative_int min_num_devices_with_full_stride_volume = product(transform(
tensor_dims, [](nonnegative_int num_devices) { return nonnegative_int{num_devices.value() - 1}; }));
return nonnegative_int{TODO colin
static_cast<int>(std::ceil(static_cast<float>(total_devices.value()) / min_num_devices_with_full_stride_volume.value()))};
};

auto candidate_strides = [&](std::vector<int> const &tensor_dims,
int total_devices)
auto candidate_strides = [&](std::vector<nonnegative_int> const &tensor_dims,
nonnegative_int total_devices)
-> std::unordered_multiset<MultiDimensionalStride> {
int max_stride_upper_bound =
nonnegative_int max_stride_upper_bound =
get_max_stride_upper_bound(tensor_dims, total_devices);

std::vector<stride_t> single_stride_range =
transform(range(1, max_stride_upper_bound + 1),
[](int stride) { return stride_t{stride}; });
transform(nonnegative_range(1_n, max_stride_upper_bound + 1_n),
[](nonnegative_int stride) { return stride_t{stride}; });
std::unordered_multiset<std::vector<stride_t>> raw_stride_vectors =
cartesian_product(replicate(nonnegative_int{tensor_dims.size()},
cartesian_product(replicate(num_elements(tensor_dims),
single_stride_range));
std::unordered_multiset<MultiDimensionalStride> strides =
transform(raw_stride_vectors, [](auto const &stride_vec) {
Expand All @@ -76,8 +79,8 @@ static std::unordered_set<MachineView>
auto candidate_starts = [](MachineSpecification const &ms,
DeviceType const &device_type) {
std::unordered_set<MachineSpaceCoordinate> result;
for (int node_idx : range(ms.num_nodes)) {
for (int device_idx : range(get_num_devices_per_node(ms, device_type))) {
for (nonnegative_int node_idx : nonnegative_range(ms.num_nodes)) {
for (nonnegative_int device_idx : nonnegative_range(get_num_devices_per_node(ms, device_type))) {
result.insert(
MachineSpaceCoordinate{node_idx, device_idx, device_type});
}
Expand All @@ -92,8 +95,8 @@ static std::unordered_set<MachineView>
return get_all_permutations_with_repetition(options, num_dims(task));
};

std::vector<int> tensor_dims = task.degrees;
int total_devices = get_num_devices(machine_spec, device_type);
std::vector<nonnegative_int> tensor_dims = task.degrees;
nonnegative_int total_devices = get_num_devices(machine_spec, device_type);

std::unordered_set<MachineView> machine_views;

Expand Down
52 changes: 26 additions & 26 deletions lib/compiler/test/src/allowed_machine_views.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,39 +15,39 @@ TEST_SUITE(FF_TEST_SUITE) {

SUBCASE("1 degree of parallelism") {
MachineSpecification ms = MachineSpecification{
/*num_nodes=*/1,
/*num_cpus_per_node=*/5,
/*num_gpus_per_node=*/5,
/*num_nodes=*/1_n,
/*num_cpus_per_node=*/5_n,
/*num_gpus_per_node=*/5_n,
/*inter_node_bandwidth=*/0,
/*intra_node_bandwidth=*/0,
};

OperatorTaskSpace task = OperatorTaskSpace{{3}};
OperatorTaskSpace task = OperatorTaskSpace{{3_n}};

std::unordered_set<MachineView> correct = {
MachineView{
MachineSpaceCoordinate{
/*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU},
{MachineViewDimension{stride_t{1},
/*node_idx=*/0_n, /*device_idx=*/0_n, DeviceType::GPU},
{MachineViewDimension{stride_t{1_n},
MachineSpecificationDimension::INTRA_NODE}},
},

MachineView{
MachineSpaceCoordinate{
/*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU},
{MachineViewDimension{stride_t{1},
/*node_idx=*/0_n, /*device_idx=*/1_n, DeviceType::GPU},
{MachineViewDimension{stride_t{1_n},
MachineSpecificationDimension::INTRA_NODE}},
},
MachineView{
MachineSpaceCoordinate{
/*node_idx=*/0, /*device_idx=*/2, DeviceType::GPU},
{MachineViewDimension{stride_t{1},
/*node_idx=*/0_n, /*device_idx=*/2_n, DeviceType::GPU},
{MachineViewDimension{stride_t{1_n},
MachineSpecificationDimension::INTRA_NODE}},
},
MachineView{
MachineSpaceCoordinate{
/*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU},
{MachineViewDimension{stride_t{2},
/*node_idx=*/0_n, /*device_idx=*/0_n, DeviceType::GPU},
{MachineViewDimension{stride_t{2_n},
MachineSpecificationDimension::INTRA_NODE}},
},
};
Expand All @@ -61,18 +61,18 @@ TEST_SUITE(FF_TEST_SUITE) {
SUBCASE("2 degrees of parallelism") {

MachineSpecification ms = MachineSpecification{
/*num_nodes=*/3,
/*num_cpus_per_node=*/3,
/*num_gpus_per_node=*/3,
/*num_nodes=*/3_n,
/*num_cpus_per_node=*/3_n,
/*num_gpus_per_node=*/3_n,
/*inter_node_bandwidth=*/0,
/*intra_node_bandwidth=*/0,
};
OperatorTaskSpace task = OperatorTaskSpace{{2, 3}};
OperatorTaskSpace task = OperatorTaskSpace{{2_n, 3_n}};

auto make_2d_view = [&](int start_node_idx,
int start_device_idx,
int stride1,
int stride2,
auto make_2d_view = [&](nonnegative_int start_node_idx,
nonnegative_int start_device_idx,
nonnegative_int stride1,
nonnegative_int stride2,
MachineSpecificationDimension m1,
MachineSpecificationDimension m2) {
return MachineView{
Expand All @@ -86,13 +86,13 @@ TEST_SUITE(FF_TEST_SUITE) {
auto intra = MachineSpecificationDimension::INTRA_NODE;
auto inter = MachineSpecificationDimension::INTER_NODE;
std::unordered_set<MachineView> correct = {
make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, inter, intra),
make_2d_view(1, 0, /*stride1=*/1, /*stride2=*/1, inter, intra),
make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, inter, intra),
make_2d_view(0_n, 0_n, /*stride1=*/1_n, /*stride2=*/1_n, inter, intra),
make_2d_view(1_n, 0_n, /*stride1=*/1_n, /*stride2=*/1_n, inter, intra),
make_2d_view(0_n, 0_n, /*stride1=*/2_n, /*stride2=*/1_n, inter, intra),

make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, intra, inter),
make_2d_view(0, 1, /*stride1=*/1, /*stride2=*/1, intra, inter),
make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, intra, inter),
make_2d_view(0_n, 0_n, /*stride1=*/1_n, /*stride2=*/1_n, intra, inter),
make_2d_view(0_n, 1_n, /*stride1=*/1_n, /*stride2=*/1_n, intra, inter),
make_2d_view(0_n, 0_n, /*stride1=*/2_n, /*stride2=*/1_n, intra, inter),
};

std::unordered_set<MachineView> result =
Expand Down
2 changes: 1 addition & 1 deletion lib/kernels/src/legion_dim.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ legion_dim_t add_to_legion_dim(legion_dim_t legion_dim, int value) {
}

legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, int num_dimensions) {
return legion_dim_t(num_dimensions - ff_dim.value.get_value() - 1);
return legion_dim_t(num_dimensions - ff_dim.value.value() - 1);
}

} // namespace FlexFlow
2 changes: 1 addition & 1 deletion lib/local-execution/src/legion_tensor_shape.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
namespace FlexFlow {

legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, size_t num_dims) {
return legion_dim_t(num_dims - ff_dim.value.get_value() - 1);
return legion_dim_t(num_dims - ff_dim.value.value() - 1);
}

legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, TensorShape const &shape) {
Expand Down
2 changes: 1 addition & 1 deletion lib/local-execution/src/ops/batch_matmul.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "local-execution/op_task_invocation.h"
#include "local-execution/op_task_signature.h"
#include "local-execution/sim_environment.h"
#include "op-attrs/ops/batch_matmul.dtg.h"
#include "op-attrs/ops/batch_matmul_attrs.dtg.h"

namespace FlexFlow {

Expand Down
2 changes: 1 addition & 1 deletion lib/local-execution/src/ops/reverse.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ static std::optional<float>
auto output_grad = acc.get_tensor_grad<Permissions::RO>(OUTPUT);
auto attrs = acc.get_argument<ReverseAttrs>(ATTRS);

int axis = input_grad.shape.get_dim() - attrs.axis.value.get_value() - 1;
int axis = input_grad.shape.get_dim() - attrs.axis.value.value() - 1;
coord_t in_blk_size = 1, reverse_dim_size = 1, num_out_blks = 1;
for (int i = 0; i < input_grad.shape.get_dim(); i++) {
if (i < axis) {
Expand Down
2 changes: 1 addition & 1 deletion lib/local-execution/src/ops/softmax.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ static DeviceSpecificDeviceStates

SoftmaxPerDeviceState per_device_state =
init_kernel(handle,
attrs.dim.value.get_value(),
attrs.dim.value.value(),
output_n,
output_c,
output_h,
Expand Down
2 changes: 1 addition & 1 deletion lib/local-execution/src/ops/split.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void calc_block_size(coord_t &num_blocks,
num_blocks = 1;
block_size = 1;
for (int d = 0; d < array_shape.num_elements(); d++) {
if (d <= axis.value.get_value()) {
if (d <= axis.value.value()) {
block_size *= array_shape.at(legion_dim_t(d));
} else {
num_blocks *= array_shape.at(legion_dim_t(d));
Expand Down
15 changes: 8 additions & 7 deletions lib/models/include/models/bert/bert_config.struct.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,28 @@ features = [

includes = [
"op-attrs/activation.dtg.h",
"utils/nonnegative_int/nonnegative_int.h",
]

[[fields]]
name = "vocab_size"
type = "size_t"
type = "::FlexFlow::nonnegative_int"

[[fields]]
name = "hidden_size"
type = "size_t"
type = "::FlexFlow::nonnegative_int"

[[fields]]
name = "num_encoder_layers"
type = "size_t"
type = "::FlexFlow::nonnegative_int"

[[fields]]
name = "num_heads"
type = "size_t"
type = "::FlexFlow::nonnegative_int"

[[fields]]
name = "dim_feedforward"
type = "size_t"
type = "::FlexFlow::nonnegative_int"

[[fields]]
name = "hidden_act"
Expand Down Expand Up @@ -64,8 +65,8 @@ type = "float"

[[fields]]
name = "sequence_length"
type = "size_t"
type = "::FlexFlow::nonnegative_int"

[[fields]]
name = "batch_size"
type = "size_t"
type = "::FlexFlow::nonnegative_int"
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ includes = [
"<vector>",
"<map>",
"<string>",
"utils/nonnegative_int/nonnegative_int.h",
]

src_includes = [
Expand All @@ -25,19 +26,19 @@ src_includes = [

[[fields]]
name = "batch_size"
type = "size_t"
type = "::FlexFlow::nonnegative_int"

[[fields]]
name = "dense_layers"
type = "std::vector<int>"
type = "std::vector<::FlexFlow::nonnegative_int>"

[[fields]]
name = "dense_feature_layers"
type = "std::vector<int>"
type = "std::vector<::FlexFlow::nonnegative_int>"

[[fields]]
name = "feature_shapes"
type = "std::map<std::string, int>"
type = "std::map<std::string, ::FlexFlow::nonnegative_int>"

[[fields]]
name = "input_features"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,17 @@ features = [
"fmt",
]

includes = [
"utils/nonnegative_int/nonnegative_int.h",
]

[[fields]]
name = "num_classes"
type = "int"
type = "::FlexFlow::nonnegative_int"

[[fields]]
name = "batch_size"
type = "int"
type = "::FlexFlow::nonnegative_int"

[[fields]]
name = "aux_logits"
Expand Down
2 changes: 1 addition & 1 deletion lib/models/include/models/split_test/split_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace FlexFlow {
* @note This is a tiny model developed for testing the original Unity
* implementation. It is not a "real" model and has never been trained.
*/
ComputationGraph get_split_test_computation_graph(int batch_size);
ComputationGraph get_split_test_computation_graph(nonnegative_int batch_size);

} // namespace FlexFlow

Expand Down
Loading

0 comments on commit 3728251

Please sign in to comment.