From de7fa32511e77fc34cb9a9f09df12dba826a84ab Mon Sep 17 00:00:00 2001 From: Marsella8 <45826022+Marsella8@users.noreply.github.com> Date: Fri, 24 Jan 2025 15:55:04 -0800 Subject: [PATCH 1/5] Task Simulator (#1565) * task_simulator_forward_pass * task simulator fixes * additional testing + bug fix * task simulator fix * refactor to task simulator * minor fix * task simulator refactor * added testing * added task graph profiler * fixes * minor refactoring + moving things around * interface update * minor fix * merge fix * fixes * minor fix * fixes * fixes * fmt * fixes * uncommented test * fmt * test fix * fix --------- Co-authored-by: Pietro Max Marsella Co-authored-by: Colin Unger --- .../cost_estimator/op_cost_estimate_key.h | 19 ++ .../op_cost_metrics.struct.toml | 6 +- .../cost_estimator/tensor_set_movement.h | 19 ++ .../machine_mapping/machine_mapping.h | 4 + .../unstructured_device_mapping.h | 18 ++ .../unstructured_device_mapping.struct.toml | 26 ++ .../in_progress_task.struct.toml | 26 ++ .../in_progress_task_comparator.h | 13 + .../pcg_task.variant.toml | 20 ++ .../task_graph_simulator/pcg_task_graph.h | 17 ++ .../pcg_task_graph.struct.toml | 34 +++ .../simulate_task_graph_execution.h | 17 ++ .../task_execution_constraint.struct.toml | 15 + .../task_graph_execution_state.struct.toml | 40 +++ .../task_graph_execution_trace.h | 12 + .../task_graph_execution_trace.struct.toml | 23 ++ .../task_profile.struct.toml | 26 ++ .../task_graph_simulator/task_simulator.h | 18 ++ .../src/compiler/allowed_machine_views.cc | 4 + .../cost_estimator/op_cost_estimate_key.cc | 23 ++ .../cost_estimator/tensor_set_movement.cc | 16 ++ .../get_optimal_machine_mapping.cc | 5 +- .../machine_mapping/machine_mapping.cc | 13 +- .../machine_mapping_with_memory_result.cc | 13 +- .../unstructured_device_mapping.cc | 28 ++ .../in_progress_task_comparator.cc | 11 + .../task_graph_simulator/pcg_task_graph.cc | 59 ++++ .../simulate_task_graph_execution.cc | 107 +++++++ .../task_graph_execution_trace.cc | 27 ++ .../task_graph_simulator/task_simulator.cc | 71 +++++ .../cost_estimator_for_test.cc | 13 + .../cost_estimator_for_test.h | 10 +- .../get_optimal_machine_mapping.cc | 19 +- .../get_tensor_set_movement_across_split.cc | 2 +- .../machine_mapping/machine_mapping.cc | 1 - ...get_optimal_machine_mapping_with_memory.cc | 48 +++- .../machine_mapping_result_with_memory.cc | 51 +++- .../simulate_task_graph_execution.cc | 211 ++++++++++++++ .../task_graph_simulator/task_simulator.cc | 265 ++++++++++++++++++ lib/pcg/include/pcg/machine_specification.h | 1 + lib/pcg/include/pcg/machine_view.h | 8 + lib/pcg/include/pcg/operator_task_space.h | 5 + .../parallel_computation_graph.h | 18 ++ lib/pcg/src/pcg/machine_specification.cc | 1 + lib/pcg/src/pcg/machine_view.cc | 57 +++- lib/pcg/src/pcg/operator_task_space.cc | 21 +- .../parallel_computation_graph.cc | 44 +++ .../parallel_computation_graph_edge.cc | 1 + lib/pcg/test/src/pcg/machine_view.cc | 91 ++++++ .../parallel_computation_graph.cc | 80 +++++- lib/runtime/src/parallel_compuation_graph.cc | 7 - .../include/utils/archetypes/value_type.h | 13 + .../include/utils/containers/lookup_in_map.h | 27 ++ lib/utils/include/utils/containers/minimum.h | 21 ++ .../utils/deduplicated_priority_queue.h | 11 + .../algorithms/get_outgoing_edges.h | 16 ++ .../src/utils/containers/lookup_in_map.cc | 12 + lib/utils/src/utils/containers/minimum.cc | 1 + .../algorithms/get_outgoing_edges.cc | 28 ++ .../src/utils/containers/lookup_in_map.cc | 31 ++ .../algorithms/get_incoming_edges.cc | 51 ++++ .../algorithms/get_outgoing_edges.cc | 90 ++++++ 62 files changed, 1923 insertions(+), 62 deletions(-) create mode 100644 lib/compiler/include/compiler/cost_estimator/op_cost_estimate_key.h create mode 100644 lib/compiler/include/compiler/cost_estimator/tensor_set_movement.h create mode 100644 lib/compiler/include/compiler/machine_mapping/unstructured_device_mapping.h create mode 100644 lib/compiler/include/compiler/machine_mapping/unstructured_device_mapping.struct.toml create mode 100644 lib/compiler/include/compiler/task_graph_simulator/in_progress_task.struct.toml create mode 100644 lib/compiler/include/compiler/task_graph_simulator/in_progress_task_comparator.h create mode 100644 lib/compiler/include/compiler/task_graph_simulator/pcg_task.variant.toml create mode 100644 lib/compiler/include/compiler/task_graph_simulator/pcg_task_graph.h create mode 100644 lib/compiler/include/compiler/task_graph_simulator/pcg_task_graph.struct.toml create mode 100644 lib/compiler/include/compiler/task_graph_simulator/simulate_task_graph_execution.h create mode 100644 lib/compiler/include/compiler/task_graph_simulator/task_execution_constraint.struct.toml create mode 100644 lib/compiler/include/compiler/task_graph_simulator/task_graph_execution_state.struct.toml create mode 100644 lib/compiler/include/compiler/task_graph_simulator/task_graph_execution_trace.h create mode 100644 lib/compiler/include/compiler/task_graph_simulator/task_graph_execution_trace.struct.toml create mode 100644 lib/compiler/include/compiler/task_graph_simulator/task_profile.struct.toml create mode 100644 lib/compiler/include/compiler/task_graph_simulator/task_simulator.h create mode 100644 lib/compiler/src/compiler/cost_estimator/op_cost_estimate_key.cc create mode 100644 lib/compiler/src/compiler/cost_estimator/tensor_set_movement.cc create mode 100644 lib/compiler/src/compiler/machine_mapping/unstructured_device_mapping.cc create mode 100644 lib/compiler/src/compiler/task_graph_simulator/in_progress_task_comparator.cc create mode 100644 lib/compiler/src/compiler/task_graph_simulator/pcg_task_graph.cc create mode 100644 lib/compiler/src/compiler/task_graph_simulator/simulate_task_graph_execution.cc create mode 100644 lib/compiler/src/compiler/task_graph_simulator/task_graph_execution_trace.cc create mode 100644 lib/compiler/src/compiler/task_graph_simulator/task_simulator.cc rename lib/compiler/test/src/compiler/{machine_mapping => }/cost_estimator_for_test.cc (72%) rename lib/compiler/test/src/compiler/{machine_mapping => }/cost_estimator_for_test.h (77%) create mode 100644 lib/compiler/test/src/compiler/task_graph_simulator/simulate_task_graph_execution.cc create mode 100644 lib/compiler/test/src/compiler/task_graph_simulator/task_simulator.cc delete mode 100644 lib/runtime/src/parallel_compuation_graph.cc create mode 100644 lib/utils/include/utils/containers/lookup_in_map.h create mode 100644 lib/utils/include/utils/containers/minimum.h create mode 100644 lib/utils/include/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.h create mode 100644 lib/utils/src/utils/containers/lookup_in_map.cc create mode 100644 lib/utils/src/utils/containers/minimum.cc create mode 100644 lib/utils/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc create mode 100644 lib/utils/test/src/utils/containers/lookup_in_map.cc create mode 100644 lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_incoming_edges.cc create mode 100644 lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc diff --git a/lib/compiler/include/compiler/cost_estimator/op_cost_estimate_key.h b/lib/compiler/include/compiler/cost_estimator/op_cost_estimate_key.h new file mode 100644 index 0000000000..93a1143cde --- /dev/null +++ b/lib/compiler/include/compiler/cost_estimator/op_cost_estimate_key.h @@ -0,0 +1,19 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_OP_COST_ESTIMATE_KEY_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_OP_COST_ESTIMATE_KEY_H + +#include "compiler/cost_estimator/op_cost_estimate_key.dtg.h" +#include "pcg/device_id_t.dtg.h" +#include "pcg/machine_specification.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" +#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" + +namespace FlexFlow { + +OpCostEstimateKey get_mapped_op_cost_estimate_key_for_layer( + ParallelComputationGraph const &pcg, + parallel_layer_guid_t const &layer, + MachineView const &machine_view); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/cost_estimator/op_cost_metrics.struct.toml b/lib/compiler/include/compiler/cost_estimator/op_cost_metrics.struct.toml index d2ff3f42e7..5e81d6c10e 100644 --- a/lib/compiler/include/compiler/cost_estimator/op_cost_metrics.struct.toml +++ b/lib/compiler/include/compiler/cost_estimator/op_cost_metrics.struct.toml @@ -11,7 +11,11 @@ includes = [ ] [[fields]] -name = "runtime" +name = "forward_runtime" +type = "float" + +[[fields]] +name = "backward_runtime" type = "float" [[fields]] diff --git a/lib/compiler/include/compiler/cost_estimator/tensor_set_movement.h b/lib/compiler/include/compiler/cost_estimator/tensor_set_movement.h new file mode 100644 index 0000000000..34188ff97c --- /dev/null +++ b/lib/compiler/include/compiler/cost_estimator/tensor_set_movement.h @@ -0,0 +1,19 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_TENSOR_SET_MOVEMENT_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_TENSOR_SET_MOVEMENT_H + +#include "compiler/cost_estimator/tensor_set_movement.dtg.h" +#include "pcg/machine_view.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.dtg.h" + +namespace FlexFlow { + +TensorSetMovement get_tensor_set_movement_from_pcg_edge( + ParallelComputationGraphEdge const &edge, + ParallelComputationGraph const &pcg, + MachineView const &src_mv, + MachineView const &dst_mv); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/machine_mapping/machine_mapping.h b/lib/compiler/include/compiler/machine_mapping/machine_mapping.h index 06cbbf942d..7375cde985 100644 --- a/lib/compiler/include/compiler/machine_mapping/machine_mapping.h +++ b/lib/compiler/include/compiler/machine_mapping/machine_mapping.h @@ -2,6 +2,10 @@ #define _FLEXFLOW_COMPILER_MACHINE_MAPPING_H #include "compiler/machine_mapping/machine_mapping.dtg.h" +#include "pcg/device_id_t.dtg.h" +#include "pcg/machine_specification.dtg.h" +#include "pcg/operator_task_space.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" namespace FlexFlow { diff --git a/lib/compiler/include/compiler/machine_mapping/unstructured_device_mapping.h b/lib/compiler/include/compiler/machine_mapping/unstructured_device_mapping.h new file mode 100644 index 0000000000..0fb31210fd --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/unstructured_device_mapping.h @@ -0,0 +1,18 @@ +#ifndef _FLEXFLOW_COMPILER_MACHINE_MAPPING_UNSTRUCTURED_DEVICE_MAPPING_H +#define _FLEXFLOW_COMPILER_MACHINE_MAPPING_UNSTRUCTURED_DEVICE_MAPPING_H + +#include "compiler/machine_mapping/machine_mapping.dtg.h" +#include "compiler/machine_mapping/unstructured_device_mapping.dtg.h" +#include "pcg/machine_specification.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" + +namespace FlexFlow { + +UnstructuredDeviceMapping + get_unstructured_device_mapping(MachineMapping const &machine_mapping, + MachineSpecification const &machine_spec, + ParallelComputationGraph const &pcg); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/machine_mapping/unstructured_device_mapping.struct.toml b/lib/compiler/include/compiler/machine_mapping/unstructured_device_mapping.struct.toml new file mode 100644 index 0000000000..ae38a37292 --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/unstructured_device_mapping.struct.toml @@ -0,0 +1,26 @@ +namespace = "FlexFlow" +name = "UnstructuredDeviceMapping" +features = [ + "eq", + # "ord", + "hash", + # "json", + # "rapidcheck", + "fmt", +] + +includes = [ + "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h", + "pcg/device_id_t.dtg.h" +] + +src_includes = [ + "utils/hash/unordered_map.h", + "utils/fmt/unordered_map.h", + "utils/hash/unordered_set.h", + "utils/fmt/unordered_set.h" +] + +[[fields]] +name = "raw_device_map" +type = "std::unordered_map<::FlexFlow::parallel_layer_guid_t, std::unordered_set<::FlexFlow::device_id_t>>" diff --git a/lib/compiler/include/compiler/task_graph_simulator/in_progress_task.struct.toml b/lib/compiler/include/compiler/task_graph_simulator/in_progress_task.struct.toml new file mode 100644 index 0000000000..71e0e17f5e --- /dev/null +++ b/lib/compiler/include/compiler/task_graph_simulator/in_progress_task.struct.toml @@ -0,0 +1,26 @@ +namespace = "FlexFlow" +name = "InProgressTask" + +features = [ + "eq", + "hash", + "fmt", + "ord" +] + +includes = [ + "utils/graph/node/node.dtg.h" +] + + +[[fields]] +name = "start_time" +type = "float" + +[[fields]] +name = "end_time" +type = "float" + +[[fields]] +name = "node" +type = "::FlexFlow::Node" diff --git a/lib/compiler/include/compiler/task_graph_simulator/in_progress_task_comparator.h b/lib/compiler/include/compiler/task_graph_simulator/in_progress_task_comparator.h new file mode 100644 index 0000000000..ed509cb7be --- /dev/null +++ b/lib/compiler/include/compiler/task_graph_simulator/in_progress_task_comparator.h @@ -0,0 +1,13 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_IN_PROGRESS_TASK_COMPARATOR_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_IN_PROGRESS_TASK_COMPARATOR_H + +#include "compiler/task_graph_simulator/in_progress_task.dtg.h" +#include + +namespace FlexFlow { +struct InProgressTaskComparator { + bool operator()(InProgressTask const &lhs, InProgressTask const &rhs) const; +}; +} // namespace FlexFlow + +#endif // _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_IN_PROGRESS_TASK_COMPARATOR_H diff --git a/lib/compiler/include/compiler/task_graph_simulator/pcg_task.variant.toml b/lib/compiler/include/compiler/task_graph_simulator/pcg_task.variant.toml new file mode 100644 index 0000000000..13f2f17652 --- /dev/null +++ b/lib/compiler/include/compiler/task_graph_simulator/pcg_task.variant.toml @@ -0,0 +1,20 @@ +namespace = "FlexFlow" +name = "PCGTask" +features = [ + "eq", + "hash", + "fmt", +] + +includes = [ + "compiler/cost_estimator/op_cost_estimate_key.dtg.h", + "compiler/cost_estimator/tensor_set_movement.dtg.h", +] + +[[values]] +type = "::FlexFlow::OpCostEstimateKey" +key = "operator" + +[[values]] +type = "::FlexFlow::TensorSetMovement" +key = "tensor_movement" diff --git a/lib/compiler/include/compiler/task_graph_simulator/pcg_task_graph.h b/lib/compiler/include/compiler/task_graph_simulator/pcg_task_graph.h new file mode 100644 index 0000000000..2c6d6514e8 --- /dev/null +++ b/lib/compiler/include/compiler/task_graph_simulator/pcg_task_graph.h @@ -0,0 +1,17 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_PCG_TASK_GRAPH_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_PCG_TASK_GRAPH_H + +#include "compiler/machine_mapping/machine_mapping.dtg.h" +#include "compiler/task_graph_simulator/pcg_task_graph.dtg.h" +#include "pcg/machine_specification.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" + +namespace FlexFlow { + +PCGTaskGraph get_pcg_task_graph(ParallelComputationGraph const &pcg, + MachineMapping const &machine_mapping, + MachineSpecification const &machine_spec); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/task_graph_simulator/pcg_task_graph.struct.toml b/lib/compiler/include/compiler/task_graph_simulator/pcg_task_graph.struct.toml new file mode 100644 index 0000000000..099f44c564 --- /dev/null +++ b/lib/compiler/include/compiler/task_graph_simulator/pcg_task_graph.struct.toml @@ -0,0 +1,34 @@ +namespace = "FlexFlow" +name = "PCGTaskGraph" + +features = [ +] + +includes = [ + "utils/graph/digraph/digraph_view.h", + "utils/bidict/bidict.h", + "compiler/task_graph_simulator/pcg_task.dtg.h", + "pcg/device_id_t.dtg.h", + "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h", + "", + "" +] + +src_includes = [ + "utils/fmt/unordered_set.h", + "utils/hash/unordered_set.h", + "utils/fmt/unordered_map.h", + "utils/hash/unordered_map.h" +] + +[[fields]] +name = "graph" +type = "::FlexFlow::DiGraphView" + +[[fields]] +name = "node_to_task" +type = "::FlexFlow::bidict<::FlexFlow::Node, ::FlexFlow::PCGTask>" + +[[fields]] +name = "node_to_devices" +type = "std::unordered_map<::FlexFlow::Node, std::unordered_set<::FlexFlow::device_id_t>>" diff --git a/lib/compiler/include/compiler/task_graph_simulator/simulate_task_graph_execution.h b/lib/compiler/include/compiler/task_graph_simulator/simulate_task_graph_execution.h new file mode 100644 index 0000000000..424e65f9df --- /dev/null +++ b/lib/compiler/include/compiler/task_graph_simulator/simulate_task_graph_execution.h @@ -0,0 +1,17 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_SIMULATE_TASK_GRAPH_EXECUTION_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_SIMULATE_TASK_GRAPH_EXECUTION_H + +#include "compiler/task_graph_simulator/task_execution_constraint.dtg.h" +#include "compiler/task_graph_simulator/task_graph_execution_trace.dtg.h" +#include "utils/graph/digraph/digraph_view.h" +#include +namespace FlexFlow { + +TaskGraphExecutionTrace simulate_task_graph_execution( + DiGraphView const &task_graph, + std::function cost_function, + TaskExecutionConstraint const &constraint); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/task_graph_simulator/task_execution_constraint.struct.toml b/lib/compiler/include/compiler/task_graph_simulator/task_execution_constraint.struct.toml new file mode 100644 index 0000000000..004655b5ec --- /dev/null +++ b/lib/compiler/include/compiler/task_graph_simulator/task_execution_constraint.struct.toml @@ -0,0 +1,15 @@ +namespace = "FlexFlow" +name = "TaskExecutionConstraint" +features = [ +] + +includes = [ + "utils/graph/node/node.dtg.h", + "", + "" +] + + +[[fields]] +name = "is_satisfied" +type = "std::function const &, std::unordered_set const &)>" diff --git a/lib/compiler/include/compiler/task_graph_simulator/task_graph_execution_state.struct.toml b/lib/compiler/include/compiler/task_graph_simulator/task_graph_execution_state.struct.toml new file mode 100644 index 0000000000..b96d7264b9 --- /dev/null +++ b/lib/compiler/include/compiler/task_graph_simulator/task_graph_execution_state.struct.toml @@ -0,0 +1,40 @@ +namespace = "FlexFlow" +name = "TaskGraphExecutionState" + +features = [ +] + +includes = [ + "utils/deduplicated_priority_queue.h", + "utils/graph/node/node.dtg.h", + "compiler/task_graph_simulator/in_progress_task.dtg.h", + "compiler/task_graph_simulator/in_progress_task_comparator.h", + "", + "", + "" +] + +src_includes = [ + "utils/hash/unordered_set.h", + "utils/fmt/unordered_set.h", + "utils/hash/set.h", + "utils/fmt/set.h", + "utils/fmt/vector.h", + "utils/hash/vector.h" +] + +[[fields]] +name = "ready_tasks" +type = "std::set<::FlexFlow::Node>" + +[[fields]] +name = "in_progress_tasks" +type = "::FlexFlow::DeduplicatedPriorityQueue<::FlexFlow::InProgressTask, std::vector<::FlexFlow::InProgressTask>, ::FlexFlow::InProgressTaskComparator>" + +[[fields]] +name = "finished_tasks" +type = "std::unordered_set<::FlexFlow::Node>" + +[[fields]] +name = "current_time" +type = "float" diff --git a/lib/compiler/include/compiler/task_graph_simulator/task_graph_execution_trace.h b/lib/compiler/include/compiler/task_graph_simulator/task_graph_execution_trace.h new file mode 100644 index 0000000000..0ad5b4824b --- /dev/null +++ b/lib/compiler/include/compiler/task_graph_simulator/task_graph_execution_trace.h @@ -0,0 +1,12 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_TASK_GRAPH_SIMULATOR_TASK_GRAPH_EXECUTION_TRACE_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_TASK_GRAPH_SIMULATOR_TASK_GRAPH_EXECUTION_TRACE_H + +#include "compiler/task_graph_simulator/task_graph_execution_trace.dtg.h" + +namespace FlexFlow { + +float get_total_execution_time(TaskGraphExecutionTrace const &trace); + +} // namespace FlexFlow + +#endif // _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_TASK_GRAPH_SIMULATOR_TASK_GRAPH_EXECUTION_TRACE_H diff --git a/lib/compiler/include/compiler/task_graph_simulator/task_graph_execution_trace.struct.toml b/lib/compiler/include/compiler/task_graph_simulator/task_graph_execution_trace.struct.toml new file mode 100644 index 0000000000..3003e5a157 --- /dev/null +++ b/lib/compiler/include/compiler/task_graph_simulator/task_graph_execution_trace.struct.toml @@ -0,0 +1,23 @@ +namespace = "FlexFlow" +name = "TaskGraphExecutionTrace" + +features = [ + "hash", + "fmt", + "eq" +] + +includes = [ + "compiler/task_graph_simulator/task_profile.dtg.h", + "" +] + +src_includes = [ + "utils/fmt/unordered_set.h", + "utils/hash/unordered_set.h" +] + + +[[fields]] +name = "task_profiles" +type = "std::unordered_set<::FlexFlow::TaskProfile>" diff --git a/lib/compiler/include/compiler/task_graph_simulator/task_profile.struct.toml b/lib/compiler/include/compiler/task_graph_simulator/task_profile.struct.toml new file mode 100644 index 0000000000..1a47acfa0e --- /dev/null +++ b/lib/compiler/include/compiler/task_graph_simulator/task_profile.struct.toml @@ -0,0 +1,26 @@ +namespace = "FlexFlow" +name = "TaskProfile" + +features = [ + "eq", + "hash", + "fmt", + "ord" +] + +includes = [ + "utils/graph/node/node.dtg.h" +] + + +[[fields]] +name = "node" +type = "::FlexFlow::Node" + +[[fields]] +name = "start_time" +type = "float" + +[[fields]] +name = "end_time" +type = "float" diff --git a/lib/compiler/include/compiler/task_graph_simulator/task_simulator.h b/lib/compiler/include/compiler/task_graph_simulator/task_simulator.h new file mode 100644 index 0000000000..b35733e419 --- /dev/null +++ b/lib/compiler/include/compiler/task_graph_simulator/task_simulator.h @@ -0,0 +1,18 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_TASK_SIMULATOR_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_TASK_SIMULATOR_H + +#include "compiler/cost_estimator/cost_estimator.h" +#include "compiler/machine_mapping/machine_mapping.dtg.h" +#include "pcg/machine_specification.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" + +namespace FlexFlow { +float task_simulator_estimate_forward_pass_time( + ParallelComputationGraph const &pcg, + CostEstimator const &estimator, + MachineMapping const &machine_mapping, + MachineSpecification const &machine_spec); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc index 1c226f79b0..db7477b460 100644 --- a/lib/compiler/src/compiler/allowed_machine_views.cc +++ b/lib/compiler/src/compiler/allowed_machine_views.cc @@ -24,6 +24,10 @@ namespace FlexFlow { bool is_valid_machine_view(MachineView const &mv, OperatorTaskSpace const &task, MachineSpecification const &ms) { + if (num_dims(mv) != num_dims(task)) { + return false; + } + std::optional maximum_device_coord = get_machine_space_coordinate( task, mv, get_task_space_maximum_coordinate(task), ms); diff --git a/lib/compiler/src/compiler/cost_estimator/op_cost_estimate_key.cc b/lib/compiler/src/compiler/cost_estimator/op_cost_estimate_key.cc new file mode 100644 index 0000000000..ef5775851f --- /dev/null +++ b/lib/compiler/src/compiler/cost_estimator/op_cost_estimate_key.cc @@ -0,0 +1,23 @@ +#include "compiler/cost_estimator/op_cost_estimate_key.h" +#include "compiler/cost_estimator/op_cost_estimate_key.dtg.h" +#include "compiler/machine_mapping/machine_mapping_problem_tree/unmapped_op_cost_estimate_key.h" +#include "op-attrs/parallel_tensor_shape.dtg.h" +#include "pcg/device_id_t.dtg.h" +#include "pcg/machine_specification.dtg.h" +#include "pcg/machine_view.dtg.h" +#include "pcg/machine_view.h" +#include "pcg/operator_task_space.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" +#include + +namespace FlexFlow { + +OpCostEstimateKey get_mapped_op_cost_estimate_key_for_layer( + ParallelComputationGraph const &pcg, + parallel_layer_guid_t const &layer, + MachineView const &machine_view) { + return map_unmapped_op_cost_estimate_key( + get_unmapped_op_cost_estimate_key_for_layer(pcg, layer), machine_view); +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/cost_estimator/tensor_set_movement.cc b/lib/compiler/src/compiler/cost_estimator/tensor_set_movement.cc new file mode 100644 index 0000000000..8f2ab84b84 --- /dev/null +++ b/lib/compiler/src/compiler/cost_estimator/tensor_set_movement.cc @@ -0,0 +1,16 @@ +#include "compiler/cost_estimator/tensor_set_movement.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" +namespace FlexFlow { + +TensorSetMovement get_tensor_set_movement_from_pcg_edge( + ParallelComputationGraphEdge const &edge, + ParallelComputationGraph const &pcg, + MachineView const &src_mv, + MachineView const &dst_mv) { + ParallelTensorShape tensor_shape = + get_parallel_tensor_shape(pcg, parallel_tensor_guid_t{edge.raw_edge.src}); + return TensorSetMovement{ + {SingleTensorMovement{tensor_shape, {src_mv}, {dst_mv}}}}; +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_mapping/get_optimal_machine_mapping.cc b/lib/compiler/src/compiler/machine_mapping/get_optimal_machine_mapping.cc index 5bdd8645a5..49d528e4ab 100644 --- a/lib/compiler/src/compiler/machine_mapping/get_optimal_machine_mapping.cc +++ b/lib/compiler/src/compiler/machine_mapping/get_optimal_machine_mapping.cc @@ -1,4 +1,5 @@ #include "compiler/machine_mapping/get_optimal_machine_mapping.h" +#include "compiler/cost_estimator/op_cost_metrics.dtg.h" #include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h" #include "compiler/machine_mapping/get_machine_resource_splits.h" #include "compiler/machine_mapping/machine_mapping_cache.h" @@ -240,8 +241,8 @@ MachineMappingResult auto get_mapping_result = [&](MachineView const &machine_view) { OpCostEstimateKey mapped = map_unmapped_op_cost_estimate_key(leaf, machine_view); - float cost = context.cost_estimator.estimate_cost(mapped).runtime; - + OpCostMetrics metrics = context.cost_estimator.estimate_cost(mapped); + float cost = metrics.forward_runtime + metrics.backward_runtime; return make_singleton_machine_mapping_result(cost, machine_view); }; diff --git a/lib/compiler/src/compiler/machine_mapping/machine_mapping.cc b/lib/compiler/src/compiler/machine_mapping/machine_mapping.cc index 57e82684e9..fc3a58995c 100644 --- a/lib/compiler/src/compiler/machine_mapping/machine_mapping.cc +++ b/lib/compiler/src/compiler/machine_mapping/machine_mapping.cc @@ -1,13 +1,20 @@ #include "compiler/machine_mapping/machine_mapping.h" +#include "pcg/machine_specification.h" +#include "pcg/machine_view.h" +#include "pcg/operator_task_space.dtg.h" +#include "pcg/operator_task_space.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "utils/containers/are_disjoint.h" +#include "utils/containers/get_one_of.h" #include "utils/containers/keys.h" +#include "utils/containers/map_values.h" #include "utils/containers/merge_maps.h" namespace FlexFlow { -MachineMapping combine_disjoint_mappings(MachineMapping const &s1, - MachineMapping const &s2) { - return MachineMapping{merge_maps(s1.machine_views, s2.machine_views)}; +MachineMapping combine_disjoint_mappings(MachineMapping const &m1, + MachineMapping const &m2) { + return MachineMapping{merge_maps(m1.machine_views, m2.machine_views)}; } bool nodes_are_disjoint(MachineMapping const &m1, MachineMapping const &m2) { diff --git a/lib/compiler/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.cc b/lib/compiler/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.cc index a6c2d1ed04..9b4a1fd6fe 100644 --- a/lib/compiler/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.cc +++ b/lib/compiler/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.cc @@ -30,7 +30,9 @@ MachineMappingWithMemoryResult remove_non_pareto_optimal_machine_mapping_result( bool is_pareto_optimal = true; for (MachineMappingForSingleLayer const &other_mapping : result.machine_mappings) { - if (mapping.cost.runtime >= other_mapping.cost.runtime && + if (mapping.cost.forward_runtime >= other_mapping.cost.forward_runtime && + mapping.cost.backward_runtime >= + other_mapping.cost.backward_runtime && mapping.cost.memory >= other_mapping.cost.memory && mapping != other_mapping) { is_pareto_optimal = false; @@ -54,7 +56,10 @@ MachineMappingWithMemoryResult [&](MachineMappingForSingleLayer const &pre_mm, MachineMappingForSingleLayer const &post_mm) { OpCostMetrics cost = OpCostMetrics{ - pre_mm.cost.runtime + comm_cost + post_mm.cost.runtime, + pre_mm.cost.forward_runtime + comm_cost + + post_mm.cost.forward_runtime, + pre_mm.cost.backward_runtime + comm_cost + + post_mm.cost.backward_runtime, pre_mm.cost.memory + post_mm.cost.memory, }; @@ -93,7 +98,9 @@ MachineMappingWithMemoryResult [&](MachineMappingForSingleLayer const &lhs_mm, MachineMappingForSingleLayer const &rhs_mm) { OpCostMetrics cost = OpCostMetrics{ - std::max(lhs_mm.cost.runtime, rhs_mm.cost.runtime), + std::max(lhs_mm.cost.forward_runtime, rhs_mm.cost.forward_runtime), + std::max(lhs_mm.cost.backward_runtime, + rhs_mm.cost.backward_runtime), //(@wmdi) is this correct? std::max(lhs_mm.cost.memory, rhs_mm.cost.memory), }; diff --git a/lib/compiler/src/compiler/machine_mapping/unstructured_device_mapping.cc b/lib/compiler/src/compiler/machine_mapping/unstructured_device_mapping.cc new file mode 100644 index 0000000000..63e359d9ac --- /dev/null +++ b/lib/compiler/src/compiler/machine_mapping/unstructured_device_mapping.cc @@ -0,0 +1,28 @@ + +#include "compiler/machine_mapping/unstructured_device_mapping.h" +#include "compiler/machine_mapping/unstructured_device_mapping.dtg.h" +#include "pcg/machine_specification.h" +#include "pcg/machine_view.h" +#include "pcg/operator_task_space.dtg.h" +#include "pcg/operator_task_space.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" +#include "utils/containers/keys.h" +#include "utils/containers/map_values.h" + +namespace FlexFlow { + +UnstructuredDeviceMapping + get_unstructured_device_mapping(MachineMapping const &machine_mapping, + MachineSpecification const &machine_spec, + ParallelComputationGraph const &pcg) { + std::unordered_map> + device_mapping; + for (auto const &[layer, machine_view] : machine_mapping.machine_views) { + OperatorTaskSpace op = get_operator_task_space(pcg, layer); + device_mapping.insert( + {layer, get_device_ids(op, machine_view, machine_spec)}); + } + return UnstructuredDeviceMapping{device_mapping}; +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/task_graph_simulator/in_progress_task_comparator.cc b/lib/compiler/src/compiler/task_graph_simulator/in_progress_task_comparator.cc new file mode 100644 index 0000000000..2064c56a52 --- /dev/null +++ b/lib/compiler/src/compiler/task_graph_simulator/in_progress_task_comparator.cc @@ -0,0 +1,11 @@ +#include "compiler/task_graph_simulator/in_progress_task_comparator.h" +#include + +namespace FlexFlow { + +bool InProgressTaskComparator::operator()(InProgressTask const &lhs, + InProgressTask const &rhs) const { + return std::tie(lhs.end_time, lhs.node) > std::tie(rhs.end_time, rhs.node); +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/task_graph_simulator/pcg_task_graph.cc b/lib/compiler/src/compiler/task_graph_simulator/pcg_task_graph.cc new file mode 100644 index 0000000000..539c44a963 --- /dev/null +++ b/lib/compiler/src/compiler/task_graph_simulator/pcg_task_graph.cc @@ -0,0 +1,59 @@ +#include "compiler/task_graph_simulator/pcg_task_graph.h" +#include "compiler/cost_estimator/op_cost_estimate_key.h" +#include "compiler/cost_estimator/tensor_set_movement.h" +#include "compiler/machine_mapping/machine_mapping.dtg.h" +#include "pcg/device_id_t.dtg.h" +#include "pcg/machine_specification.dtg.h" +#include "pcg/machine_view.dtg.h" +#include "pcg/machine_view.h" +#include "pcg/operator_task_space.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.h" +#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" +#include "utils/bidict/bidict.h" +#include "utils/graph/instances/adjacency_digraph.h" +#include +#include + +namespace FlexFlow { + +PCGTaskGraph get_pcg_task_graph(ParallelComputationGraph const &pcg, + MachineMapping const &machine_mapping, + MachineSpecification const &machine_spec) { + DiGraph digraph = DiGraph::create(); + bidict node_to_task; + bidict node_to_layer; + std::unordered_map> node_to_devices; + + for (parallel_layer_guid_t const &layer : get_parallel_layers(pcg)) { + MachineView mv = machine_mapping.machine_views.at(layer); + OpCostEstimateKey op_key = + get_mapped_op_cost_estimate_key_for_layer(pcg, layer, mv); + Node node = digraph.add_node(); + node_to_task.equate(node, PCGTask{op_key}); + node_to_layer.equate(node, layer); + node_to_devices[node] = + get_device_ids(get_operator_task_space(pcg, layer), + machine_mapping.machine_views.at(layer), + machine_spec); + } + + for (ParallelComputationGraphEdge const &edge : get_edges(pcg)) { + MachineView src_mv = machine_mapping.machine_views.at(get_src_layer(edge)); + MachineView dst_mv = machine_mapping.machine_views.at(get_dst_layer(edge)); + TensorSetMovement movement = + get_tensor_set_movement_from_pcg_edge(edge, pcg, src_mv, dst_mv); + Node node = digraph.add_node(); + node_to_task.equate(node, PCGTask{movement}); + node_to_devices[node] = {}; + Node src_node = node_to_layer.at_r(get_src_layer(edge)); + Node dst_node = node_to_layer.at_r(get_dst_layer(edge)); + + digraph.add_edge(DirectedEdge{src_node, node}); + digraph.add_edge(DirectedEdge{node, dst_node}); + } + + return PCGTaskGraph{digraph, node_to_task, node_to_devices}; +} +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/task_graph_simulator/simulate_task_graph_execution.cc b/lib/compiler/src/compiler/task_graph_simulator/simulate_task_graph_execution.cc new file mode 100644 index 0000000000..974a70ddf5 --- /dev/null +++ b/lib/compiler/src/compiler/task_graph_simulator/simulate_task_graph_execution.cc @@ -0,0 +1,107 @@ +#include "compiler/task_graph_simulator/simulate_task_graph_execution.h" +#include "compiler/task_graph_simulator/in_progress_task.dtg.h" +#include "compiler/task_graph_simulator/task_graph_execution_state.dtg.h" +#include "compiler/task_graph_simulator/task_graph_execution_trace.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" +#include "utils/containers/filtrans.h" +#include "utils/containers/is_subseteq_of.h" +#include "utils/containers/set_of.h" +#include "utils/containers/sorted.h" +#include "utils/exception.h" +#include "utils/graph/digraph/algorithms.h" +#include "utils/graph/digraph/algorithms/get_predecessors.h" +#include "utils/graph/digraph/algorithms/get_successors.h" +#include "utils/graph/digraph/algorithms/is_acyclic.h" +#include "utils/graph/digraph/digraph_view.h" +#include "utils/graph/node/algorithms.h" +#include "utils/overload.h" +#include +#include + +namespace FlexFlow { + +TaskGraphExecutionTrace simulate_task_graph_execution( + DiGraphView const &task_graph, + std::function cost_function, + TaskExecutionConstraint const &constraint) { + if (!is_acyclic(task_graph)) { + throw mk_runtime_error( + "simulate_task_graph_execution cannot simulate cyclic directed graphs"); + } + + TaskGraphExecutionState execution_state = + TaskGraphExecutionState{/*ready_tasks=*/set_of(get_sources(task_graph)), + /*in_progress_tasks=*/{}, + /*finished_tasks=*/{}, + /*current_time=*/0.0}; + + std::unordered_set task_profiles; + + auto start_task_processing = [&](Node const &task) { + float cost = cost_function(task); + execution_state.in_progress_tasks.push( + InProgressTask{execution_state.current_time, + execution_state.current_time + cost, + task}); + execution_state.ready_tasks.erase(task); + }; + + auto dependencies_are_satisfied = [&](Node const &task) { + std::unordered_set incoming_dependencies = + get_predecessors(task_graph, task); + return is_subseteq_of(incoming_dependencies, + execution_state.finished_tasks); + }; + + auto finish_task_processing = [&](InProgressTask const &in_progress_task) { + execution_state.finished_tasks.insert(in_progress_task.node); + for (Node const &task : get_successors(task_graph, in_progress_task.node)) { + if (dependencies_are_satisfied(task)) { + execution_state.ready_tasks.insert(task); + } + } + task_profiles.insert(TaskProfile{in_progress_task.node, + in_progress_task.start_time, + in_progress_task.end_time}); + execution_state.current_time = in_progress_task.end_time; + }; + + auto is_processing_done = [&]() { + return execution_state.ready_tasks.empty() && + execution_state.in_progress_tasks.empty(); + }; + + auto get_next_task_to_finish = [&]() { + InProgressTask task = execution_state.in_progress_tasks.top(); + execution_state.in_progress_tasks.pop(); + return task; + }; + + while (!is_processing_done()) { + auto ready_tasks_copy = execution_state.ready_tasks; + for (Node const &task : ready_tasks_copy) { + std::unordered_set raw_in_progress_tasks = transform( + unordered_set_of(execution_state.in_progress_tasks.contents()), + [](InProgressTask const &t) { return t.node; }); + + if (constraint.is_satisfied( + task, raw_in_progress_tasks, execution_state.finished_tasks)) { + start_task_processing(task); + } + } + + if (!execution_state.in_progress_tasks.empty()) { + InProgressTask next_task = get_next_task_to_finish(); + finish_task_processing(next_task); + } else { + throw mk_runtime_error("Constraints cannot be satisfied"); + } + } + if (execution_state.finished_tasks.size() != num_nodes(task_graph)) { + throw mk_runtime_error("Failed to execute all tasks in given graph"); + } + + return TaskGraphExecutionTrace{task_profiles}; +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/task_graph_simulator/task_graph_execution_trace.cc b/lib/compiler/src/compiler/task_graph_simulator/task_graph_execution_trace.cc new file mode 100644 index 0000000000..716a7afe15 --- /dev/null +++ b/lib/compiler/src/compiler/task_graph_simulator/task_graph_execution_trace.cc @@ -0,0 +1,27 @@ +#include "compiler/task_graph_simulator/task_graph_execution_trace.h" +#include "utils/containers/maximum.h" +#include "utils/containers/minimum.h" +#include "utils/containers/transform.h" +#include "utils/exception.h" +#include "utils/fmt/unordered_set.h" + +namespace FlexFlow { + +float get_total_execution_time(TaskGraphExecutionTrace const &trace) { + if (trace.task_profiles.empty()) { + throw mk_runtime_error( + fmt::format("TaskGraphExecutionTrace {} is empty", trace)); + } + float end_time = + maximum(transform(trace.task_profiles, [](TaskProfile const &profile) { + return profile.end_time; + })); + float start_time = + minimum(transform(trace.task_profiles, [](TaskProfile const &profile) { + return profile.start_time; + })); + + return end_time - start_time; +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/task_graph_simulator/task_simulator.cc b/lib/compiler/src/compiler/task_graph_simulator/task_simulator.cc new file mode 100644 index 0000000000..ab204e7d71 --- /dev/null +++ b/lib/compiler/src/compiler/task_graph_simulator/task_simulator.cc @@ -0,0 +1,71 @@ +#include "compiler/task_graph_simulator/task_simulator.h" +#include "compiler/cost_estimator/cost_estimator.h" +#include "compiler/cost_estimator/op_cost_estimate_key.h" +#include "compiler/machine_mapping/unstructured_device_mapping.dtg.h" +#include "compiler/machine_mapping/unstructured_device_mapping.h" +#include "compiler/task_graph_simulator/pcg_task.dtg.h" +#include "compiler/task_graph_simulator/pcg_task_graph.h" +#include "compiler/task_graph_simulator/simulate_task_graph_execution.h" +#include "compiler/task_graph_simulator/task_execution_constraint.dtg.h" +#include "compiler/task_graph_simulator/task_graph_execution_trace.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" +#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" +#include "utils/containers/filtrans.h" +#include "utils/containers/set_union.h" +#include "utils/containers/transform.h" +#include "utils/graph/digraph/digraph.h" +#include "utils/hash/unordered_set.h" +#include + +namespace FlexFlow { + +float task_simulator_estimate_forward_pass_time( + ParallelComputationGraph const &pcg, + CostEstimator const &estimator, + MachineMapping const &machine_mapping, + MachineSpecification const &machine_spec) { + + PCGTaskGraph task_graph = + get_pcg_task_graph(pcg, machine_mapping, machine_spec); + + auto cost_function = [&](Node const &node) -> float { + PCGTask task = task_graph.node_to_task.at_l(node); + if (task.is_operator()) { + return estimator.estimate_cost(task.require_operator()).forward_runtime; + } else { + return estimator.estimate_cost(task.require_tensor_movement()); + } + }; + + auto is_allowed_to_run = + [&](Node const &task, + std::unordered_set const &in_progress_tasks, + std::unordered_set const &finished_tasks) -> bool { + PCGTask current_task = task_graph.node_to_task.at_l(task); + + UnstructuredDeviceMapping device_map = + get_unstructured_device_mapping(machine_mapping, machine_spec, pcg); + + if (current_task.is_tensor_movement()) { + return true; + } + assert(current_task.is_operator()); + + auto get_devices = [&](Node const &n) { + return task_graph.node_to_devices.at(n); + }; + + std::unordered_set devices_occupied = + set_union(transform(in_progress_tasks, get_devices)); + std::unordered_set required_devices = get_devices(task); + return intersection(devices_occupied, required_devices).empty(); + }; + + TaskExecutionConstraint constraint = + TaskExecutionConstraint{is_allowed_to_run}; + + return get_total_execution_time(simulate_task_graph_execution( + task_graph.graph, cost_function, constraint)); +} + +} // namespace FlexFlow diff --git a/lib/compiler/test/src/compiler/machine_mapping/cost_estimator_for_test.cc b/lib/compiler/test/src/compiler/cost_estimator_for_test.cc similarity index 72% rename from lib/compiler/test/src/compiler/machine_mapping/cost_estimator_for_test.cc rename to lib/compiler/test/src/compiler/cost_estimator_for_test.cc index 0431104878..48e6f5e561 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/cost_estimator_for_test.cc +++ b/lib/compiler/test/src/compiler/cost_estimator_for_test.cc @@ -1,6 +1,8 @@ #include "./cost_estimator_for_test.h" +#include "compiler/cost_estimator/op_cost_metrics.dtg.h" #include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/unmapped_op_cost_estimate_key.h" +#include "utils/nonnegative_int/nonnegative_int.h" namespace FlexFlow { @@ -40,4 +42,15 @@ CostEstimator make_fake_cost_estimator( }); } +CostEstimator make_fake_constant_cost_estimator(float forward_op_cost, + float backward_op_cost, + float comm_cost, + nonnegative_int memory_cost) { + return make_fake_cost_estimator( + [=](OpCostEstimateKey const &op) { + return OpCostMetrics{forward_op_cost, backward_op_cost, memory_cost}; + }, + [=](TensorSetMovement const &op) { return comm_cost; }); +} + } // namespace FlexFlow diff --git a/lib/compiler/test/src/compiler/machine_mapping/cost_estimator_for_test.h b/lib/compiler/test/src/compiler/cost_estimator_for_test.h similarity index 77% rename from lib/compiler/test/src/compiler/machine_mapping/cost_estimator_for_test.h rename to lib/compiler/test/src/compiler/cost_estimator_for_test.h index 16ea3a85bc..1e8ce83caf 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/cost_estimator_for_test.h +++ b/lib/compiler/test/src/compiler/cost_estimator_for_test.h @@ -1,5 +1,5 @@ -#ifndef _FLEXFLOW_TEST_COST_ESTIMATOR_H -#define _FLEXFLOW_TEST_COST_ESTIMATOR_H +#ifndef _FLEXFLOW_TEST_COST_ESTIMATOR_FOR_TEST_H +#define _FLEXFLOW_TEST_COST_ESTIMATOR_FOR_TEST_H #include "compiler/cost_estimator/cost_estimator.h" #include "compiler/cost_estimator/op_cost_estimate_key.dtg.h" @@ -7,6 +7,7 @@ #include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.dtg.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/unmapped_op_cost_estimate_key.dtg.h" #include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.dtg.h" +#include "utils/nonnegative_int/nonnegative_int.h" namespace FlexFlow { @@ -34,6 +35,11 @@ CostEstimator make_fake_cost_estimator( std::unordered_map const &op_cost_map, std::unordered_map const &comm_cost_map); +CostEstimator make_fake_constant_cost_estimator(float forward_op_cost, + float backward_op_cost, + float comm_cost, + nonnegative_int memory_cost); + } // namespace FlexFlow #endif diff --git a/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc b/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc index ac180cd079..542edd9fa9 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc @@ -1,5 +1,5 @@ #include "compiler/machine_mapping/get_optimal_machine_mapping.h" -#include "./cost_estimator_for_test.h" +#include "../cost_estimator_for_test.h" #include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h" #include "compiler/machine_mapping/machine_mapping_cache.h" #include "compiler/machine_mapping/machine_mapping_constraints.h" @@ -9,6 +9,7 @@ #include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h" #include "utils/containers/get_only.h" #include "utils/full_binary_tree/binary_tree_path.h" +#include "utils/nonnegative_int/nonnegative_int.h" #include using namespace FlexFlow; @@ -146,13 +147,21 @@ TEST_SUITE(FF_TEST_SUITE) { auto map1 = std::unordered_map{{ {map_unmapped_op_cost_estimate_key(k1, mv1), - OpCostMetrics{/*runtime=*/1.0, /*memory=*/nonnegative_int{0}}}, + OpCostMetrics{/*forward_runtime=*/0.5, + /*backward_runtime=*/0.5, + /*memory=*/nonnegative_int{0}}}, {map_unmapped_op_cost_estimate_key(k2, mv1), - OpCostMetrics{/*runtime=*/2.0, /*memory=*/nonnegative_int{0}}}, + OpCostMetrics{/*forward_runtime=*/1.0, + /*backward_runtime=*/1.0, + /*memory=*/nonnegative_int{0}}}, {map_unmapped_op_cost_estimate_key(k1, mv2), - OpCostMetrics{/*runtime=*/1.5, /*memory=*/nonnegative_int{0}}}, + OpCostMetrics{/*forward_runtime=*/0.75, + /*backward_runtime=*/0.75, + /*memory=*/nonnegative_int{0}}}, {map_unmapped_op_cost_estimate_key(k2, mv2), - OpCostMetrics{/*runtime=*/2.5, /*memory=*/nonnegative_int{0}}}, + OpCostMetrics{/*forward_runtime=*/1.25, + /*backward_runtime=*/1.25, + /*memory=*/nonnegative_int{0}}}, }}; CostEstimator cost_estimator = make_fake_cost_estimator( diff --git a/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc b/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc index e22f715d82..52ad82595d 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc @@ -1,5 +1,5 @@ #include "compiler/machine_mapping/get_tensor_set_movement_across_split.h" -#include "./cost_estimator_for_test.h" +#include "../cost_estimator_for_test.h" #include "compiler/machine_mapping/transitive_reduced_pcg.h" #include "pcg/machine_view.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" diff --git a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc index 221cca3ae1..304034f9be 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc @@ -1,5 +1,4 @@ #include "compiler/machine_mapping/machine_mapping.h" -#include "cost_estimator_for_test.h" #include "doctest/doctest.h" #include "pcg/machine_view.h" diff --git a/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc b/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc index 9706f1c75f..8612017705 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc @@ -1,5 +1,5 @@ #include "compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.h" -#include "../cost_estimator_for_test.h" +#include "../../cost_estimator_for_test.h" #include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h" #include "compiler/machine_mapping/machine_mapping_constraints.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.h" @@ -9,6 +9,7 @@ #include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h" #include "utils/containers/get_only.h" #include "utils/full_binary_tree/binary_tree_path.h" +#include "utils/nonnegative_int/nonnegative_int.h" #include using namespace FlexFlow; @@ -147,24 +148,32 @@ TEST_SUITE(FF_TEST_SUITE) { CostEstimator cost_estimator = make_fake_cost_estimator( std::unordered_map{{ {map_unmapped_op_cost_estimate_key(k1, mv1), - OpCostMetrics{1.0, nonnegative_int{2}}}, + OpCostMetrics{/*forward_runtime=*/1.0, + /*backward_runtime=*/1.0, + /*memory=*/nonnegative_int{2}}}, {map_unmapped_op_cost_estimate_key(k2, mv1), - OpCostMetrics{2.0, nonnegative_int{3}}}, + OpCostMetrics{/*forward_runtime=*/2.0, + /*backward_runtime=*/2.0, + /*memory=*/nonnegative_int{3}}}, {map_unmapped_op_cost_estimate_key(k1, mv2), - OpCostMetrics{1.5, nonnegative_int{1}}}, + OpCostMetrics{/*forward_runtime=*/1.5, + /*backward_runtime=*/1.5, + /*memory=*/nonnegative_int{1}}}, {map_unmapped_op_cost_estimate_key(k2, mv2), - OpCostMetrics{2.5, nonnegative_int{2}}}, + OpCostMetrics{/*forward_runtime=*/2.5, + /*backward_runtime=*/2.5, + /*memory=*/nonnegative_int{2}}}, }}, std::unordered_map{{ - {TensorSetMovement{{}}, 0.0}, + {TensorSetMovement{/*movements=*/{}}, /*cost=*/0.0}, {concretize_abstracted_tensor_set_movement(movement1, mm1, mm1), - 0.1}, + /*cost=*/0.1}, {concretize_abstracted_tensor_set_movement(movement1, mm2, mm2), - 0.2}, + /*cost=*/0.2}, {concretize_abstracted_tensor_set_movement(movement1, mm1, mm2), - 0.3}, + /*cost=*/0.3}, {concretize_abstracted_tensor_set_movement(movement1, mm2, mm1), - 0.4}, + /*cost=*/0.4}, }}); MachineMappingContext context = MachineMappingContext{ @@ -187,13 +196,17 @@ TEST_SUITE(FF_TEST_SUITE) { cache, context, problem_tree, full_machine_spec, constraints); MachineMappingWithMemoryResult correct = MachineMappingWithMemoryResult{{ MachineMappingForSingleLayer{ - OpCostMetrics{1.0, nonnegative_int{2}}, + OpCostMetrics{/*forward_runtime=*/1.0, + /*backward_runtime=*/1.0, + /*memory=*/nonnegative_int{2}}, ParallelLayerGuidObliviousMachineMapping{{ {binary_tree_root_path(), mv1}, }}, }, MachineMappingForSingleLayer{ - OpCostMetrics{1.5, nonnegative_int{1}}, + OpCostMetrics{/*forward_runtime=*/1.5, + /*backward_runtime=*/1.5, + /*memory=*/nonnegative_int{1}}, ParallelLayerGuidObliviousMachineMapping{{ {binary_tree_root_path(), mv2}, }}, @@ -217,7 +230,8 @@ TEST_SUITE(FF_TEST_SUITE) { MachineMappingWithMemoryResult correct = MachineMappingWithMemoryResult{{ MachineMappingForSingleLayer{ OpCostMetrics{ - /*runtime=*/1.0 + 2.0 + 0.1, + /*forward_runtime=*/1.0 + 2.0 + 0.1, + /*backward_runtime=*/1.0 + 2.0 + 0.1, /*memory=*/nonnegative_int{2 + 3}, }, ParallelLayerGuidObliviousMachineMapping{{ @@ -236,7 +250,9 @@ TEST_SUITE(FF_TEST_SUITE) { }}, }, MachineMappingForSingleLayer{ - OpCostMetrics{1.5 + 2.5 + 0.1, nonnegative_int{1 + 2}}, + OpCostMetrics{/*forward_runtime=*/1.5 + 2.5 + 0.1, + /*backward_runtime=*/1.5 + 2.5 + 0.1, + /*memory=*/nonnegative_int{1 + 2}}, ParallelLayerGuidObliviousMachineMapping{{ { BinaryTreePath{{ @@ -270,7 +286,9 @@ TEST_SUITE(FF_TEST_SUITE) { cache, context, problem_tree, full_machine_spec, constraints); MachineMappingWithMemoryResult correct = MachineMappingWithMemoryResult{{MachineMappingForSingleLayer{ - OpCostMetrics{2.5, nonnegative_int{2}}, + OpCostMetrics{/*forward_runtime=*/2.5, + /*backward_runtime=*/2.5, + /*memory=*/nonnegative_int{2}}, ParallelLayerGuidObliviousMachineMapping{{ { BinaryTreePath{{ diff --git a/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/machine_mapping_result_with_memory.cc b/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/machine_mapping_result_with_memory.cc index ecfb7cfeb3..1f3b7545a8 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/machine_mapping_result_with_memory.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/machine_mapping_result_with_memory.cc @@ -1,5 +1,6 @@ #include "compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.h" #include "pcg/machine_view.h" +#include "utils/nonnegative_int/nonnegative_int.h" #include using namespace FlexFlow; @@ -52,15 +53,20 @@ TEST_SUITE(FF_TEST_SUITE) { }; OpCostMetrics cost1 = OpCostMetrics{ - /*runtime=*/2.0, + /*forward_runtime=*/2.0, + /*backward_runtime=*/2.0, /*memory=*/nonnegative_int{2}, }; + OpCostMetrics cost2 = OpCostMetrics{ - /*runtime=*/4.0, + /*forward_runtime=*/4.0, + /*backward_runtime=*/4.0, /*memory=*/nonnegative_int{1}, }; + OpCostMetrics cost3 = OpCostMetrics{ - /*runtime=*/2.0, + /*forward_runtime=*/2.0, + /*backward_runtime=*/2.0, /*memory=*/nonnegative_int{3}, }; @@ -182,7 +188,8 @@ TEST_SUITE(FF_TEST_SUITE) { }; OpCostMetrics pre_cost = OpCostMetrics{ - /*runtime=*/2.0, + /*forward_runtime=*/2.0, + /*backward_runtime=*/2.0, /*memory=*/nonnegative_int{2}, }; MachineMappingWithMemoryResult pre = MachineMappingWithMemoryResult{{ @@ -208,7 +215,8 @@ TEST_SUITE(FF_TEST_SUITE) { }}; OpCostMetrics post_cost = OpCostMetrics{ - /*runtime=*/4.0, + /*forward_runtime=*/4.0, + /*backward_runtime=*/4.0, /*memory=*/nonnegative_int{1}, }; @@ -253,8 +261,10 @@ TEST_SUITE(FF_TEST_SUITE) { { MachineMappingForSingleLayer{ /*cost=*/OpCostMetrics{ - /*runtime=*/pre_cost.runtime + comm_cost + - post_cost.runtime, + /*forward_runtime=*/pre_cost.forward_runtime + + comm_cost + post_cost.forward_runtime, + /*backward_runtime=*/pre_cost.backward_runtime + + comm_cost + post_cost.backward_runtime, /*memory=*/pre_cost.memory + post_cost.memory, }, /*machine_mapping=*/ @@ -307,8 +317,10 @@ TEST_SUITE(FF_TEST_SUITE) { { MachineMappingForSingleLayer{ /*cost=*/OpCostMetrics{ - /*runtime=*/pre_cost.runtime + comm_cost + - post_cost.runtime, + /*forward_runtime=*/pre_cost.forward_runtime + + comm_cost + post_cost.forward_runtime, + /*backward_runtime=*/pre_cost.backward_runtime + + comm_cost + post_cost.backward_runtime, /*memory=*/pre_cost.memory + post_cost.memory, }, /*machine_mapping=*/ @@ -377,7 +389,8 @@ TEST_SUITE(FF_TEST_SUITE) { }; OpCostMetrics lhs_cost = OpCostMetrics{ - /*runtime=*/2.0, + /*forward_runtime=*/2.0, + /*backward_runtime=*/2.0, /*memory=*/nonnegative_int{2}, }; MachineMappingWithMemoryResult lhs = MachineMappingWithMemoryResult{{ @@ -403,7 +416,8 @@ TEST_SUITE(FF_TEST_SUITE) { }}; OpCostMetrics rhs_cost = OpCostMetrics{ - /*runtime=*/4.0, + /*forward_runtime=*/4.0, + /*backward_runtime=*/4.0, /*memory=*/nonnegative_int{1}, }; MachineMappingWithMemoryResult rhs = MachineMappingWithMemoryResult{{ @@ -442,7 +456,11 @@ TEST_SUITE(FF_TEST_SUITE) { MachineMappingWithMemoryResult correct = MachineMappingWithMemoryResult{{ MachineMappingForSingleLayer{ /*cost=*/OpCostMetrics{ - /*runtime=*/std::max(lhs_cost.runtime, rhs_cost.runtime), + /*forward_runtime=*/std::max(lhs_cost.forward_runtime, + rhs_cost.forward_runtime), + /*backward_runtime=*/ + std::max(lhs_cost.backward_runtime, + rhs_cost.backward_runtime), /*memory=*/std::max(lhs_cost.memory, rhs_cost.memory), }, /*machine_mapping=*/ @@ -518,15 +536,18 @@ TEST_SUITE(FF_TEST_SUITE) { }; OpCostMetrics cost1 = OpCostMetrics{ - /*runtime=*/2.0, + /*forward_runtime=*/2.0, + /*backward_runtime=*/2.0, /*memory=*/nonnegative_int{2}, }; OpCostMetrics cost2 = OpCostMetrics{ - /*runtime=*/4.0, + /*forward_runtime=*/4.0, + /*backward_runtime=*/4.0, /*memory=*/nonnegative_int{1}, }; OpCostMetrics cost3 = OpCostMetrics{ - /*runtime=*/2.0, + /*forward_runtime=*/2.0, + /*backward_runtime=*/2.0, /*memory=*/nonnegative_int{3}, }; diff --git a/lib/compiler/test/src/compiler/task_graph_simulator/simulate_task_graph_execution.cc b/lib/compiler/test/src/compiler/task_graph_simulator/simulate_task_graph_execution.cc new file mode 100644 index 0000000000..e88f2b7840 --- /dev/null +++ b/lib/compiler/test/src/compiler/task_graph_simulator/simulate_task_graph_execution.cc @@ -0,0 +1,211 @@ +#include "compiler/task_graph_simulator/simulate_task_graph_execution.h" +#include "compiler/task_graph_simulator/task_graph_execution_state.dtg.h" +#include "compiler/task_graph_simulator/task_graph_execution_trace.dtg.h" +#include "utils/containers/lookup_in_map.h" +#include "utils/graph/algorithms.h" +#include "utils/graph/digraph/directed_edge.dtg.h" +#include "utils/graph/instances/adjacency_digraph.h" +#include +#include + +namespace FlexFlow { + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("simulate_task_graph_execution") { + DiGraph g = DiGraph::create(); + SUBCASE("linear graph") { + std::vector n = add_nodes(g, 4); + add_edges(g, + { + DirectedEdge{n.at(0), n.at(1)}, + DirectedEdge{n.at(1), n.at(2)}, + DirectedEdge{n.at(2), n.at(3)}, + }); + + auto cost_function = lookup_in_map( + {{n.at(0), 1}, {n.at(1), 10}, {n.at(2), 100}, {n.at(3), 1000}}); + + auto is_allowed_to_run = + [&](Node const &n, + std::unordered_set const &in_progress_tasks, + std::unordered_set const &finished_tasks) { return true; }; + + TaskExecutionConstraint constraint = + TaskExecutionConstraint{is_allowed_to_run}; + + TaskGraphExecutionTrace result = + simulate_task_graph_execution(g, cost_function, constraint); + TaskGraphExecutionTrace correct = TaskGraphExecutionTrace{{ + TaskProfile{n.at(0), 0, 1}, + TaskProfile{n.at(1), 1, 11}, + TaskProfile{n.at(2), 11, 111}, + TaskProfile{n.at(3), 111, 1111}, + }}; + CHECK(correct == result); + } + + SUBCASE("rhomboidal graph") { + std::vector n = add_nodes(g, 4); + + add_edges(g, + {DirectedEdge{n.at(0), n.at(1)}, + DirectedEdge{n.at(0), n.at(2)}, + DirectedEdge{n.at(1), n.at(3)}, + DirectedEdge{n.at(2), n.at(3)}}); + + auto cost_function = lookup_in_map( + {{n.at(0), 10}, {n.at(1), 15}, {n.at(2), 20}, {n.at(3), 25}}); + + SUBCASE("no processing constraints") { + auto is_allowed_to_run = + [&](Node const &n, + std::unordered_set const &in_progress_tasks, + std::unordered_set const &finished_tasks) { + return true; + }; + + TaskExecutionConstraint constraint = + TaskExecutionConstraint{is_allowed_to_run}; + TaskGraphExecutionTrace result = + simulate_task_graph_execution(g, cost_function, constraint); + TaskGraphExecutionTrace correct = TaskGraphExecutionTrace{{ + TaskProfile{n.at(0), 0, 10}, + TaskProfile{n.at(1), 10, 25}, + TaskProfile{n.at(2), 10, 30}, + TaskProfile{n.at(3), 30, 55}, + }}; + CHECK(correct == result); + } + + SUBCASE("one node at a time") { + auto is_allowed_to_run = + [&](Node const &n, + std::unordered_set const &in_progress_tasks, + std::unordered_set const &finished_tasks) { + return in_progress_tasks.size() == 0; + }; + + TaskExecutionConstraint constraint = + TaskExecutionConstraint{is_allowed_to_run}; + TaskGraphExecutionTrace result = + simulate_task_graph_execution(g, cost_function, constraint); + TaskGraphExecutionTrace correct = TaskGraphExecutionTrace{{ + TaskProfile{n.at(0), 0, 10}, + TaskProfile{n.at(1), 10, 25}, + TaskProfile{n.at(2), 25, 45}, + TaskProfile{n.at(3), 45, 70}, + }}; + CHECK(correct == result); + } + } + + SUBCASE("diamond graph with crossing") { + std::vector n = add_nodes(g, 6); + + add_edges(g, + { + DirectedEdge{n.at(0), n.at(1)}, + DirectedEdge{n.at(0), n.at(2)}, + DirectedEdge{n.at(1), n.at(3)}, + DirectedEdge{n.at(2), n.at(3)}, + DirectedEdge{n.at(2), n.at(4)}, + DirectedEdge{n.at(3), n.at(5)}, + DirectedEdge{n.at(4), n.at(5)}, + }); + + auto cost_function = lookup_in_map({{n.at(0), 10}, + {n.at(1), 15}, + {n.at(2), 20}, + {n.at(3), 25}, + {n.at(4), 30}, + {n.at(5), 35}}); + + SUBCASE("no processing constraints") { + auto is_allowed_to_run = + [&](Node const &n, + std::unordered_set const &in_progress_tasks, + std::unordered_set const &finished_tasks) { + return true; + }; + + TaskExecutionConstraint constraint = + TaskExecutionConstraint{is_allowed_to_run}; + TaskGraphExecutionTrace result = + simulate_task_graph_execution(g, cost_function, constraint); + TaskGraphExecutionTrace correct = TaskGraphExecutionTrace{{ + TaskProfile{n.at(0), 0, 10}, + TaskProfile{n.at(1), 10, 25}, + TaskProfile{n.at(2), 10, 30}, + TaskProfile{n.at(3), 30, 55}, + TaskProfile{n.at(4), 30, 60}, + TaskProfile{n.at(5), 60, 95}, + }}; + CHECK(correct == result); + } + + SUBCASE("one node at a time") { + auto is_allowed_to_run = + [&](Node const &n, + std::unordered_set const &in_progress_tasks, + std::unordered_set const &finished_tasks) { + return in_progress_tasks.size() == 0; + }; + + TaskExecutionConstraint constraint = + TaskExecutionConstraint{is_allowed_to_run}; + TaskGraphExecutionTrace result = + simulate_task_graph_execution(g, cost_function, constraint); + TaskGraphExecutionTrace correct = TaskGraphExecutionTrace{{ + TaskProfile{n.at(0), 0, 10}, + TaskProfile{n.at(1), 10, 25}, + TaskProfile{n.at(2), 25, 45}, + TaskProfile{n.at(3), 45, 70}, + TaskProfile{n.at(4), 70, 100}, + TaskProfile{n.at(5), 100, 135}, + }}; + CHECK(correct == result); + } + } + + SUBCASE("all-to-all intermediate") { + std::vector n = add_nodes(g, 5); + + add_edges(g, + {DirectedEdge{n.at(0), n.at(1)}, + DirectedEdge{n.at(0), n.at(2)}, + DirectedEdge{n.at(0), n.at(3)}, + DirectedEdge{n.at(1), n.at(4)}, + DirectedEdge{n.at(2), n.at(4)}, + DirectedEdge{n.at(3), n.at(4)}}); + + auto cost_function = lookup_in_map({{n.at(0), 10}, + {n.at(1), 100}, + {n.at(2), 100}, + {n.at(3), 100}, + {n.at(4), 20}}); + + SUBCASE("at most two nodes at a time") { + auto is_allowed_to_run = + [&](Node const &n, + std::unordered_set const &in_progress_tasks, + std::unordered_set const &finished_tasks) { + return in_progress_tasks.size() < 2; + }; + + TaskExecutionConstraint constraint = + TaskExecutionConstraint{is_allowed_to_run}; + TaskGraphExecutionTrace result = + simulate_task_graph_execution(g, cost_function, constraint); + TaskGraphExecutionTrace correct = TaskGraphExecutionTrace{{ + TaskProfile{n.at(0), 0, 10}, + TaskProfile{n.at(1), 10, 110}, + TaskProfile{n.at(2), 10, 110}, + TaskProfile{n.at(3), 110, 210}, + TaskProfile{n.at(4), 210, 230}, + }}; + CHECK(correct == result); + } + } + } +} +} // namespace FlexFlow diff --git a/lib/compiler/test/src/compiler/task_graph_simulator/task_simulator.cc b/lib/compiler/test/src/compiler/task_graph_simulator/task_simulator.cc new file mode 100644 index 0000000000..e278338440 --- /dev/null +++ b/lib/compiler/test/src/compiler/task_graph_simulator/task_simulator.cc @@ -0,0 +1,265 @@ +#include "compiler/task_graph_simulator/task_simulator.h" +#include "../cost_estimator_for_test.h" +#include "compiler/cost_estimator/cost_estimator.h" +#include "compiler/cost_estimator/op_cost_metrics.dtg.h" +#include "compiler/machine_mapping/machine_mapping.dtg.h" +#include "compiler/machine_mapping/machine_mapping.h" +#include "compiler/machine_mapping/machine_mapping_problem_tree/unmapped_op_cost_estimate_key.h" +#include "op-attrs/ops/input_attrs.dtg.h" +#include "op-attrs/parallel_tensor_dims.dtg.h" +#include "op-attrs/parallel_tensor_shape.dtg.h" +#include "op-attrs/parallel_tensor_shape.h" +#include "pcg/device_id.h" +#include "pcg/device_type.dtg.h" +#include "pcg/machine_space_coordinate.dtg.h" +#include "pcg/machine_specification.h" +#include "pcg/machine_specification_dimension.dtg.h" +#include "pcg/machine_view.dtg.h" +#include "pcg/machine_view.h" +#include "pcg/machine_view_dimension.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h" +#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" +#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.h" +#include "pcg/stride_t.dtg.h" +#include "substitutions/sub_parallel_computation_graph.dtg.h" +#include "substitutions/sub_parallel_computation_graph.h" +#include "utils/containers/get_only.h" +#include "utils/deduplicated_priority_queue.h" +#include "utils/graph/open_dataflow_graph/algorithms/get_source_nodes.h" +#include "utils/nonnegative_int/nonnegative_int.h" +#include +#include +#include +#include + +namespace FlexFlow { + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("task_simulator_estimate_forward_pass_time") { + MachineSpecification machine_spec = + MachineSpecification{/*num_nodes=*/3, + /*num_cpus_per_node=*/3, + /*num_gpus_per_node=*/3, + /*inter_node_bandwidth=*/1.0f, + /*intra_node_bandwidth=*/1.0f}; + + SUBCASE("linear graph") { + ParallelComputationGraphBuilder b; + ParallelTensorShape input_shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{}, + ReplicaParallelDimSet{ + SumDegree{1}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + parallel_tensor_guid_t tensor0 = b.create_input_tensor(input_shape); + parallel_tensor_guid_t tensor1 = b.relu(tensor0); + + parallel_layer_guid_t layer0 = get_source_layer(tensor0); + parallel_layer_guid_t layer1 = get_source_layer(tensor1); + + std::vector dims = { + MachineViewDimension{stride_t{1}, + MachineSpecificationDimension::INTER_NODE}, + MachineViewDimension{stride_t{1}, + MachineSpecificationDimension::INTER_NODE}, + }; + ParallelComputationGraph pcg = b.pcg; + MachineView mv1 = + MachineView{MachineSpaceCoordinate{0, 0, DeviceType::GPU}, dims}; + MachineView mv2 = + MachineView{MachineSpaceCoordinate{0, 1, DeviceType::GPU}, dims}; + + MachineMapping device_mapping = MachineMapping{{ + {layer0, mv1}, + {layer1, mv2}, + }}; + + SUBCASE("constant op, comm cost") { + CostEstimator estimator = make_fake_constant_cost_estimator( + /*forward_op_cost=*/10.0f, + /*backward_op_cost=*/10.0f, + /*comm_cost=*/1.0f, + /*memory_cost=*/nonnegative_int{0}); + + float result = task_simulator_estimate_forward_pass_time( + pcg, estimator, device_mapping, machine_spec); + + float correct = 10 + 1 + 10; + CHECK(result == correct); + } + + SUBCASE("variable op, comm cost") { + CostEstimator cost_estimator = make_fake_cost_estimator( + [](OpCostEstimateKey const &op) { + if (op.op_attrs.has()) { + return OpCostMetrics{/*forward_runtime=*/10.0f, + /*backward_runtime=*/10.0f, + /*memory=*/nonnegative_int{0}}; // layer0 + } + if (op.op_attrs.has()) { + return OpCostMetrics{/*forward_runtime=*/1.0f, + /*backward_runtime=*/1.0f, + /*memory=*/nonnegative_int{0}}; // layer1 + } + return OpCostMetrics{/*forward_runtime=*/0.0f, + /*backward_runtime=*/0.0f, + /*memory=*/nonnegative_int{0}}; + }, + [](TensorSetMovement const &comm) { return 5.0f; }); + + float result = task_simulator_estimate_forward_pass_time( + pcg, cost_estimator, device_mapping, machine_spec); + float correct = 10 + 5 + 1; + CHECK(result == correct); + } + } + + SUBCASE("rhomboidal graph") { + ParallelComputationGraphBuilder b; + + ParallelTensorShape input_shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ShardParallelDim{10, 1}}, + ReplicaParallelDimSet{ + SumDegree{1}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + + parallel_tensor_guid_t tensor0 = b.create_input_tensor(input_shape); + parallel_tensor_guid_t tensor1 = b.relu(tensor0); + parallel_tensor_guid_t tensor2 = b.relu(tensor0); + parallel_tensor_guid_t tensor3 = b.add(tensor1, tensor2); + + parallel_layer_guid_t layer0 = get_source_layer(tensor0); + parallel_layer_guid_t layer1 = get_source_layer(tensor1); + parallel_layer_guid_t layer2 = get_source_layer(tensor2); + parallel_layer_guid_t layer3 = get_source_layer(tensor3); + + ParallelComputationGraph pcg = b.pcg; + std::vector dims = { + MachineViewDimension{stride_t{1}, + MachineSpecificationDimension::INTER_NODE}, + MachineViewDimension{stride_t{1}, + MachineSpecificationDimension::INTER_NODE}, + MachineViewDimension{stride_t{1}, + MachineSpecificationDimension::INTER_NODE}, + }; + + SUBCASE("all different devices") { + MachineView mv0 = + MachineView{MachineSpaceCoordinate{0, 0, DeviceType::GPU}, dims}; + MachineView mv1 = + MachineView{MachineSpaceCoordinate{0, 1, DeviceType::GPU}, dims}; + MachineView mv2 = + MachineView{MachineSpaceCoordinate{1, 0, DeviceType::GPU}, dims}; + MachineView mv3 = + MachineView{MachineSpaceCoordinate{1, 1, DeviceType::GPU}, dims}; + + MachineMapping device_mapping = MachineMapping{{ + {layer0, mv0}, + {layer1, mv1}, + {layer2, mv2}, + {layer3, mv3}, + }}; + SUBCASE("constant op, comm cost") { + CostEstimator estimator = make_fake_constant_cost_estimator( + /*forward_op_cost=*/10.0f, + /*backward_op_cost=*/10.0f, + /*comm_cost=*/1.0f, + /*memory_cost=*/nonnegative_int{0}); + + float result = task_simulator_estimate_forward_pass_time( + pcg, estimator, device_mapping, machine_spec); + float correct = 10 + 1 + 10 + 1 + 10; + CHECK(result == correct); + } + SUBCASE("variable op, comm cost") { + CostEstimator cost_estimator = make_fake_cost_estimator( + [](OpCostEstimateKey const &op) { + if (op.op_attrs.has()) { + return OpCostMetrics{/*forward_runtime=*/10.0f, + /*backward_runtime=*/10.0f, + /*memory=*/nonnegative_int{0}}; // layer0 + } + if (op.op_attrs.has()) { + return OpCostMetrics{ + /*forward_runtime=*/1.0f, + /*backward_runtime=*/1.0f, + /*memory=*/nonnegative_int{0}}; // layers 1, 2 + } + if (op.op_attrs.has()) { + return OpCostMetrics{/*forward_runtime=*/2.0f, + /*backward_runtime=*/2.0f, + /*memory=*/nonnegative_int{0}}; // layer3 + } + return OpCostMetrics{/*forward_runtime=*/0.0f, + /*backward_runtime=*/0.0f, + /*memory=*/nonnegative_int{0}}; + }, + [](TensorSetMovement const &comm) { return 5.0f; }); + } + } + + SUBCASE("all the same device") { + MachineView mv = + MachineView{MachineSpaceCoordinate{0, 0, DeviceType::GPU}, dims}; + MachineMapping device_mapping = MachineMapping{{ + {layer0, mv}, + {layer1, mv}, + {layer2, mv}, + {layer3, mv}, + }}; + SUBCASE("constant op, cost cost") { + CostEstimator cost_estimator = make_fake_constant_cost_estimator( + /*forward_op_cost=*/10.0f, + /*backward_op_cost=*/10.0f, + /*comm_cost=*/1.0f, + /*memory_cost=*/nonnegative_int{0}); + + float result = task_simulator_estimate_forward_pass_time( + pcg, cost_estimator, device_mapping, machine_spec); + float correct = 10 + 10 + 10 + 10 + 1 + 1; + CHECK(result == correct); + } + SUBCASE("variable op, cost cost") { + CostEstimator cost_estimator = make_fake_cost_estimator( + [](OpCostEstimateKey const &op) { + if (op.op_attrs.has()) { + return OpCostMetrics{/*forward_runtime=*/10.0f, + /*backward_runtime=*/10.0f, + /*memory=*/nonnegative_int{0}}; // layer0 + } + if (op.op_attrs.has()) { + return OpCostMetrics{ + /*forward_runtime=*/1.0f, + /*backward_runtime=*/1.0f, + /*memory=*/nonnegative_int{0}}; // layers 1, 2 + } + if (op.op_attrs.has()) { + return OpCostMetrics{/*forward_runtime=*/2.0f, + /*backward_runtime=*/2.0f, + /*memory=*/nonnegative_int{0}}; // layer3 + } + return OpCostMetrics{/*forward_runtime=*/0.0f, + /*backward_runtime=*/0.0f, + /*memory=*/nonnegative_int{0}}; + }, + [](TensorSetMovement const &comm) { return 5.0f; }); + float result = task_simulator_estimate_forward_pass_time( + pcg, cost_estimator, device_mapping, machine_spec); + float correct = 10 + 5 + (1 + 1) + 5 + 2; + CHECK(result == correct); + } + } + } + } +} +} // namespace FlexFlow diff --git a/lib/pcg/include/pcg/machine_specification.h b/lib/pcg/include/pcg/machine_specification.h index 6ffa9900c2..39591e8a70 100644 --- a/lib/pcg/include/pcg/machine_specification.h +++ b/lib/pcg/include/pcg/machine_specification.h @@ -20,6 +20,7 @@ bool is_valid_machine_space_coordinate(MachineSpecification const &ms, device_id_t get_device_id(MachineSpecification const &ms, MachineSpaceCoordinate const &coord); + } // namespace FlexFlow #endif diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index 293227b7a1..f72b2359dc 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -37,6 +37,14 @@ std::unordered_set MachineView const &mv, MachineSpecification const &ms); +std::unordered_set get_device_ids(OperatorTaskSpace const &task, + MachineView const &mv, + MachineSpecification const &ms); + +MachineView make_1d_machine_view(MachineSpaceCoordinate const &start, + MachineSpecificationDimension const &dim, + stride_t stride); + } // namespace FlexFlow #endif diff --git a/lib/pcg/include/pcg/operator_task_space.h b/lib/pcg/include/pcg/operator_task_space.h index 61cab4eff1..1a19397c72 100644 --- a/lib/pcg/include/pcg/operator_task_space.h +++ b/lib/pcg/include/pcg/operator_task_space.h @@ -2,6 +2,8 @@ #define _FLEXFLOW_PCG_INCLUDE_OPERATOR_TASK_SPACE_H #include "pcg/operator_task_space.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" +#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" #include "pcg/task_space_coordinate.dtg.h" #include #include @@ -17,6 +19,9 @@ TaskSpaceCoordinate size_t num_dims(OperatorTaskSpace const &task); size_t num_tasks(OperatorTaskSpace const &task); +OperatorTaskSpace get_operator_task_space(ParallelComputationGraph const &pcg, + parallel_layer_guid_t const &layer); + } // namespace FlexFlow #endif diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h index c740e1ffd2..f7567b5025 100644 --- a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h +++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h @@ -6,6 +6,7 @@ #include "pcg/parallel_computation_graph/parallel_layer_added_result.dtg.h" #include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" #include "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h" +#include namespace FlexFlow { @@ -31,6 +32,20 @@ std::unordered_set parallel_layer_guid_t const &, parallel_layer_guid_t const &); +std::unordered_set + get_edges(ParallelComputationGraph const &); + +std::unordered_set + get_outgoing_edges(ParallelComputationGraph const &, + parallel_layer_guid_t const &); + +std::unordered_set + get_incoming_edges(ParallelComputationGraph const &, + parallel_layer_guid_t const &); + +std::unordered_set + get_initial_layers(ParallelComputationGraph const &); + std::vector get_incoming_tensors(ParallelComputationGraph const &, parallel_layer_guid_t const &); @@ -45,6 +60,9 @@ std::vector get_incoming_weights(ParallelComputationGraph const &, parallel_layer_guid_t const &); +parallel_layer_guid_t get_source_layer(ParallelComputationGraph const &g, + parallel_tensor_guid_t const &t); + ParallelLayerAttrs get_parallel_layer_attrs(ParallelComputationGraph const &, parallel_layer_guid_t const &); PCGOperatorAttrs pcg_get_op_attrs(ParallelComputationGraph const &, diff --git a/lib/pcg/src/pcg/machine_specification.cc b/lib/pcg/src/pcg/machine_specification.cc index ca5b8ba047..19ff50b4b7 100644 --- a/lib/pcg/src/pcg/machine_specification.cc +++ b/lib/pcg/src/pcg/machine_specification.cc @@ -1,5 +1,6 @@ #include "pcg/machine_specification.h" #include "pcg/device_id.h" +#include "utils/containers/transform.h" #include "utils/exception.h" namespace FlexFlow { diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index 18f6cacb7e..cc42ad83b2 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -1,14 +1,21 @@ #include "pcg/machine_view.h" +#include "pcg/machine_space_coordinate.dtg.h" +#include "pcg/machine_specification.dtg.h" #include "pcg/machine_specification.h" +#include "pcg/machine_specification_dimension.dtg.h" +#include "pcg/machine_view_dimension.dtg.h" +#include "pcg/operator_task_space.dtg.h" #include "pcg/operator_task_space.h" +#include "pcg/stride_t.dtg.h" #include "utils/containers/contains.h" #include "utils/containers/count.h" #include "utils/containers/filter.h" +#include "utils/containers/get_only.h" #include "utils/containers/scanl.h" #include "utils/containers/sum.h" #include "utils/containers/transform.h" #include "utils/containers/zip.h" - +#include "utils/exception.h" namespace FlexFlow { size_t num_dims(MachineView const &mv) { @@ -35,6 +42,13 @@ MachineView machine_view_from_strides_and_machine_spec_dimensions( MachineSpaceCoordinate const &start, std::vector const &strides, std::vector const &dims) { + if (strides.size() != dims.size()) { + throw mk_runtime_error(fmt::format( + "Length of strides ({}) and dims ({}) must match when calling " + "machine_view_from_strides_and_machine_spec_dimensions", + start, + strides)); + } std::vector dimensions = transform(zip(strides, dims), [&](auto const &p) { return MachineViewDimension{p.first, p.second}; @@ -48,6 +62,14 @@ std::optional get_machine_space_coordinate( TaskSpaceCoordinate const &coord, MachineSpecification const &machine_specification) { + if (num_dims(machine_view) != task.degrees.size()) { + throw mk_runtime_error( + fmt::format("Dimension of machine_view ({}) must match dimension of " + "task ({}) when computing machine space coordinate", + machine_view, + task.degrees)); + } + auto get_dimension_indices_for_dimension = [&](MachineSpecificationDimension dimension) { std::vector mv_dimensions = @@ -106,10 +128,37 @@ std::unordered_set get_machine_space_coordinates( MachineSpecification const &machine_specification) { return transform( get_task_space_coordinates(task), [&](TaskSpaceCoordinate const &coord) { - return get_machine_space_coordinate( - task, machine_view, coord, machine_specification) - .value(); + std::optional maybe_coordinate = + get_machine_space_coordinate( + task, machine_view, coord, machine_specification); + if (!maybe_coordinate.has_value()) { + throw mk_runtime_error( + fmt::format("In get_machine_space_coordinates, the given " + "OperatorTaskSpace {} and MachineView {} are not " + "compatible with the given MachineSpecification {}", + task, + machine_view, + machine_specification)); + } + return maybe_coordinate.value(); }); } +std::unordered_set get_device_ids(OperatorTaskSpace const &task, + MachineView const &mv, + MachineSpecification const &ms) { + return transform(get_machine_space_coordinates(task, mv, ms), + [&](MachineSpaceCoordinate const &coord) { + return get_device_id(ms, coord); + }); +} + +MachineView make_1d_machine_view(MachineSpaceCoordinate const &start, + MachineSpecificationDimension const &dim, + stride_t stride) { + + return machine_view_from_strides_and_machine_spec_dimensions( + start, {stride}, {dim}); +} + } // namespace FlexFlow diff --git a/lib/pcg/src/pcg/operator_task_space.cc b/lib/pcg/src/pcg/operator_task_space.cc index 2538cb4ea0..7157b75082 100644 --- a/lib/pcg/src/pcg/operator_task_space.cc +++ b/lib/pcg/src/pcg/operator_task_space.cc @@ -1,12 +1,19 @@ #include "pcg/operator_task_space.h" +#include "op-attrs/parallel_tensor_shape.dtg.h" +#include "op-attrs/parallel_tensor_shape.h" +#include "pcg/operator_task_space.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" +#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" +#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h" #include "utils/containers/cartesian_product.h" +#include "utils/containers/extend.h" #include "utils/containers/maximum.h" #include "utils/containers/product.h" #include "utils/containers/range.h" #include "utils/containers/transform.h" #include "utils/containers/unordered_set_of.h" +#include "utils/containers/vector_of.h" #include "utils/fmt/unordered_set.h" - namespace FlexFlow { std::unordered_set @@ -36,4 +43,16 @@ size_t num_tasks(OperatorTaskSpace const &task) { return product(task.degrees); } +OperatorTaskSpace get_operator_task_space(ParallelComputationGraph const &pcg, + parallel_layer_guid_t const &layer) { + parallel_tensor_guid_t out_tensor = get_layer_outputs(pcg, layer).at(0); + ParallelTensorShape shape = get_parallel_tensor_shape(pcg, out_tensor); + + std::vector degrees; + extend(degrees, vector_of(ff_ordered_shard_degrees(shape))); + degrees.push_back(get_sum_degree(shape)); + degrees.push_back(get_discard_copy_degree(shape)); + return OperatorTaskSpace{degrees}; +} + } // namespace FlexFlow diff --git a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc index 781c44640c..4cc0500fa2 100644 --- a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc +++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc @@ -1,15 +1,25 @@ #include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "op-attrs/get_incoming_tensor_roles.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.dtg.h" +#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" #include "utils/containers/filtrans.h" #include "utils/containers/get_only.h" #include "utils/containers/transform.h" +#include "utils/containers/unordered_set_of.h" #include "utils/graph/dataflow_graph/algorithms.h" #include "utils/graph/dataflow_graph/algorithms/get_dataflow_edges_from_node_to_node.h" +#include "utils/graph/dataflow_graph/algorithms/get_incoming_edges.h" +#include "utils/graph/dataflow_graph/algorithms/get_outgoing_edges.h" +#include "utils/graph/dataflow_graph/dataflow_edge.dtg.h" +#include "utils/graph/digraph/algorithms.h" #include "utils/graph/digraph/algorithms/get_topological_ordering.h" #include "utils/graph/instances/unordered_set_labelled_open_dataflow_graph.h" #include "utils/graph/labelled_dataflow_graph/algorithms/find_isomorphism.h" #include "utils/graph/labelled_dataflow_graph/algorithms/rewrite_node_labels.h" #include "utils/graph/node/algorithms.h" +#include "utils/graph/node/node.dtg.h" +#include namespace FlexFlow { @@ -66,6 +76,13 @@ ParallelLayerAddedResult /*output_labels=*/{tensor_attrs}); } +std::unordered_set + get_edges(ParallelComputationGraph const &pcg) { + return transform(get_edges(pcg.raw_graph), [](DataflowEdge const &e) { + return ParallelComputationGraphEdge{e}; + }); +} + std::unordered_set get_pcg_edges_from_layer_to_layer(ParallelComputationGraph const &pcg, parallel_layer_guid_t const &src, @@ -78,6 +95,33 @@ std::unordered_set }); } +std::unordered_set + get_outgoing_edges(ParallelComputationGraph const &pcg, + parallel_layer_guid_t const &l) { + std::unordered_set raw_edges = + get_outgoing_edges(pcg.raw_graph, l.raw_graph_node); + return transform(raw_edges, [](DataflowEdge const &e) { + return ParallelComputationGraphEdge{e}; + }); +} + +std::unordered_set + get_incoming_edges(ParallelComputationGraph const &pcg, + parallel_layer_guid_t const &l) { + std::unordered_set raw_edges = + unordered_set_of(get_incoming_edges(pcg.raw_graph, l.raw_graph_node)); + return transform(raw_edges, [](DataflowEdge const &e) { + return ParallelComputationGraphEdge{e}; + }); +} + +std::unordered_set + get_initial_layers(ParallelComputationGraph const &pcg) { + std::unordered_set raw_sources = get_sources(pcg.raw_graph); + return transform(raw_sources, + [](Node const &n) { return parallel_layer_guid_t{n}; }); +} + std::vector get_incoming_tensors(ParallelComputationGraph const &pcg, parallel_layer_guid_t const &l) { diff --git a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_edge.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_edge.cc index dca8154eb4..d30739486e 100644 --- a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_edge.cc +++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_edge.cc @@ -1,4 +1,5 @@ #include "pcg/parallel_computation_graph/parallel_computation_graph_edge.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.dtg.h" namespace FlexFlow { diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index dcf22d6c00..3e9d48fac3 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -1,4 +1,5 @@ #include "pcg/machine_view.h" +#include "pcg/gpu_id_t.dtg.h" #include "test/utils/doctest/fmt/optional.h" #include "utils/containers/transform.h" #include "utils/fmt/unordered_set.h" @@ -298,4 +299,94 @@ TEST_SUITE(FF_TEST_SUITE) { } } } + + TEST_CASE("get_device_ids") { + + SUBCASE("1D machine view") { + + // This operator has shape (3,), and thus 3 tasks. + // The (only) dimension is projected onto the INTRA (device) dimension + // with a stride of 2. The start of the projection defined by MachineView + // is at MachineSpaceCoordinate (0, 1), and the machine space has 1 node + // and 6 devices per node. + + /** + * The tasks will thus be distributed like this: + * +-------+-------+-------+-------+-------+-------+ + * | 0 | ((1)) | 2 | ((3)) | 4 | ((5)) | + * +-------+-------+-------+-------+-------+-------+ + * Where the integers are the device ids and ((x)) are the devices we + * select + */ + MachineSpecification ms = + MachineSpecification{/*num_nodes=*/1, + /*num_cpus_per_node=*/6, + /*num_gpus_per_node=*/6, + /*inter_node_bandwidth=*/0, + /*intra_node_bandwidth=*/0}; + + OperatorTaskSpace task = OperatorTaskSpace{{3}}; + MachineView mv = MachineView{ + MachineSpaceCoordinate{ + /*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU}, + {MachineViewDimension{stride_t{2}, + MachineSpecificationDimension::INTRA_NODE}}}; + + std::unordered_set correct = { + device_id_t{gpu_id_t{1}}, + device_id_t{gpu_id_t{3}}, + device_id_t{gpu_id_t{5}}, + }; + std::unordered_set result = get_device_ids(task, mv, ms); + CHECK(result == correct); + } + + SUBCASE("2D machine view") { + // This operator has shape (2, 2), and thus 2 * 2 = 4 tasks. + // - The first dimension is projected onto the INTER (node) dimension with + // stride 1, + // - The second dimension is projected onto the INTRA (device) dimension + // with stride 2. The start of the projection defined by MachineView is at + // MachineSpaceCoordinate (1, 2), and the machine space has 3 nodes and 5 + // devices per node. + + /** + * The tasks will thus be distributed like this: + * +-------+-------+-------+-------+-------+ + * | 0 | 1 | 2 | 3 | 4 | + * +-------+-------+-------+-------+-------+ + * | 5 | 6 | ((7)) | 8 | ((9)) | + * +-------+-------+-------+-------+-------+ + * | 10 | 11 | ((12))| 13 | ((14))| + * +-------+-------+-------+-------+-------+ + * Where the integers are the device ids and ((x)) are the devices we + * select + */ + + MachineSpecification ms = + MachineSpecification{/*num_nodes=*/3, + /*num_cpus_per_node=*/5, + /*num_gpus_per_node=*/5, + /*inter_node_bandwidth=*/0, + /*intra_node_bandwidth=*/0}; + + OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; + MachineView mv = MachineView{ + MachineSpaceCoordinate{ + /*node_idx=*/1, /*device_idx=*/2, DeviceType::GPU}, + {MachineViewDimension{stride_t{1}, + MachineSpecificationDimension::INTER_NODE}, + MachineViewDimension{stride_t{2}, + MachineSpecificationDimension::INTRA_NODE}}}; + + std::unordered_set correct = { + device_id_t{gpu_id_t{7}}, + device_id_t{gpu_id_t{9}}, + device_id_t{gpu_id_t{12}}, + device_id_t{gpu_id_t{14}}, + }; + std::unordered_set result = get_device_ids(task, mv, ms); + CHECK(result == correct); + } + } } diff --git a/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc b/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc index fc07edf5b3..dd8308561f 100644 --- a/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc +++ b/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc @@ -36,8 +36,8 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_tensor_guid_t tensor3 = get_only(layer3_added.outputs); std::vector result = topological_ordering(pcg); - // std::vector correct = {layer1, layer2, layer3}; - // CHECK(result == correct); + std::vector correct = {layer1, layer2, layer3}; + CHECK(result == correct); } TEST_CASE( @@ -105,6 +105,82 @@ TEST_SUITE(FF_TEST_SUITE) { } } + TEST_CASE( + "get_source_layer(ParallelComputationGraph, parallel_tensor_guid_t)") { + ParallelTensorShape tensor_shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 2}, + ShardParallelDim{12, 1}, + }, + ReplicaParallelDimSet{ + SumDegree{1}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + + ParallelComputationGraph pcg = empty_parallel_computation_graph(); + + ParallelLayerAttrs layer_label = some(); + ParallelTensorAttrs tensor_label = some(); + + SUBCASE("single layer") { + ParallelLayerAddedResult layer1_added = + add_parallel_layer(pcg, layer_label, {}, {tensor_label}); + parallel_layer_guid_t layer1 = layer1_added.parallel_layer; + parallel_tensor_guid_t tensor1 = get_only(layer1_added.outputs); + + parallel_layer_guid_t result = get_source_layer(pcg, tensor1); + parallel_layer_guid_t correct = layer1; + CHECK(result == correct); + } + + SUBCASE("two connected layers") { + ParallelLayerAddedResult layer1_added = + add_parallel_layer(pcg, layer_label, {}, {tensor_label}); + parallel_layer_guid_t layer1 = layer1_added.parallel_layer; + parallel_tensor_guid_t tensor1 = get_only(layer1_added.outputs); + + ParallelLayerAddedResult layer2_added = + add_parallel_layer(pcg, layer_label, {tensor1}, {tensor_label}); + parallel_layer_guid_t layer2 = layer2_added.parallel_layer; + + parallel_layer_guid_t result = get_source_layer(pcg, tensor1); + parallel_layer_guid_t correct = layer1; + CHECK(result == correct); + } + + SUBCASE("three layers in series") { + ParallelLayerAddedResult layer1_added = + add_parallel_layer(pcg, layer_label, {}, {tensor_label}); + parallel_layer_guid_t layer1 = layer1_added.parallel_layer; + parallel_tensor_guid_t tensor1 = get_only(layer1_added.outputs); + + ParallelLayerAddedResult layer2_added = + add_parallel_layer(pcg, layer_label, {tensor1}, {tensor_label}); + parallel_layer_guid_t layer2 = layer2_added.parallel_layer; + parallel_tensor_guid_t tensor2 = get_only(layer2_added.outputs); + + ParallelLayerAddedResult layer3_added = + add_parallel_layer(pcg, layer_label, {tensor2}, {tensor_label}); + parallel_layer_guid_t layer3 = layer3_added.parallel_layer; + + SUBCASE("tensor 1") { + parallel_layer_guid_t result = get_source_layer(pcg, tensor1); + parallel_layer_guid_t correct = layer1; + CHECK(result == correct); + } + + SUBCASE("tensor 2") { + parallel_layer_guid_t result = get_source_layer(pcg, tensor2); + parallel_layer_guid_t correct = layer2; + CHECK(result == correct); + } + } + } + TEST_CASE( "get_incoming_weights(ParallelComputationGraph, parallel_layer_guid_t)") { ParallelTensorShape input_shape = ParallelTensorShape{ diff --git a/lib/runtime/src/parallel_compuation_graph.cc b/lib/runtime/src/parallel_compuation_graph.cc deleted file mode 100644 index ebc5ac1e8e..0000000000 --- a/lib/runtime/src/parallel_compuation_graph.cc +++ /dev/null @@ -1,7 +0,0 @@ -#include "parallel_computation_graph.h" - -namespace FlexFlow { - -ParallelTensor ParallelComputationGraph::{} - -} // namespace FlexFlow diff --git a/lib/utils/include/utils/archetypes/value_type.h b/lib/utils/include/utils/archetypes/value_type.h index 1635747612..e45b8fda7e 100644 --- a/lib/utils/include/utils/archetypes/value_type.h +++ b/lib/utils/include/utils/archetypes/value_type.h @@ -2,7 +2,10 @@ #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ARCHETYPES_VALUE_TYPE_H #include +#include #include +#include +#include namespace FlexFlow { @@ -32,6 +35,16 @@ struct value_type { } }; +template +std::string format_as(value_type const &) { + assert(false); +} + +template +std::ostream &operator<<(std::ostream &s, value_type const &x) { + assert(false); +} + } // namespace FlexFlow namespace std { diff --git a/lib/utils/include/utils/containers/lookup_in_map.h b/lib/utils/include/utils/containers/lookup_in_map.h new file mode 100644 index 0000000000..946fc589db --- /dev/null +++ b/lib/utils/include/utils/containers/lookup_in_map.h @@ -0,0 +1,27 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_LOOKUP_IN_MAP_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_LOOKUP_IN_MAP_H + +#include "utils/containers/contains.h" +#include "utils/containers/keys.h" +#include "utils/exception.h" +#include "utils/fmt/unordered_map.h" +#include +#include +#include + +namespace FlexFlow { + +template +std::function lookup_in_map(std::unordered_map const &map) { + return [map](K const &key) -> V { + if (!contains(keys(map), key)) { + throw mk_runtime_error(fmt::format( + "Key {} is not present in the underlying map {}", key, map)); + } + return map.at(key); + }; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/minimum.h b/lib/utils/include/utils/containers/minimum.h new file mode 100644 index 0000000000..8bdd6ea985 --- /dev/null +++ b/lib/utils/include/utils/containers/minimum.h @@ -0,0 +1,21 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MINIMUM_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MINIMUM_H + +#include "utils/exception.h" +#include + +namespace FlexFlow { + +template +typename C::value_type minimum(C const &c) { + if (c.empty()) { + throw mk_runtime_error( + fmt::format("minimum expected non-empty container but received {}", c)); + } + + return *std::min_element(c.begin(), c.end()); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/deduplicated_priority_queue.h b/lib/utils/include/utils/deduplicated_priority_queue.h index 66f6e524d4..afad3f5889 100644 --- a/lib/utils/include/utils/deduplicated_priority_queue.h +++ b/lib/utils/include/utils/deduplicated_priority_queue.h @@ -3,6 +3,7 @@ #include "utils/containers/contains.h" #include +#include #include #include @@ -38,6 +39,16 @@ class DeduplicatedPriorityQueue { impl.pop(); } + std::set contents() const { + auto temp = impl; + std::set result; + while (!temp.empty()) { + result.insert(temp.top()); + temp.pop(); + } + return result; + } + private: std::priority_queue impl; std::unordered_set hashmap; diff --git a/lib/utils/include/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.h b/lib/utils/include/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.h new file mode 100644 index 0000000000..a8b5efe66e --- /dev/null +++ b/lib/utils/include/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.h @@ -0,0 +1,16 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_DATAFLOW_GRAPH_ALGORITHMS_GET_OUTGOING_EDGES_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_DATAFLOW_GRAPH_ALGORITHMS_GET_OUTGOING_EDGES_H + +#include "utils/graph/dataflow_graph/dataflow_graph_view.h" + +namespace FlexFlow { + +std::unordered_set get_outgoing_edges(DataflowGraphView const &, + Node const &); +std::unordered_set + get_outgoing_edges(DataflowGraphView const &, + std::unordered_set const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/src/utils/containers/lookup_in_map.cc b/lib/utils/src/utils/containers/lookup_in_map.cc new file mode 100644 index 0000000000..a0d7db8e82 --- /dev/null +++ b/lib/utils/src/utils/containers/lookup_in_map.cc @@ -0,0 +1,12 @@ +#include "utils/containers/lookup_in_map.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using K = value_type<0>; +using V = value_type<1>; + +template std::function + lookup_in_map(std::unordered_map const &map); + +} // namespace FlexFlow diff --git a/lib/utils/src/utils/containers/minimum.cc b/lib/utils/src/utils/containers/minimum.cc new file mode 100644 index 0000000000..c9bbc7706f --- /dev/null +++ b/lib/utils/src/utils/containers/minimum.cc @@ -0,0 +1 @@ +#include "utils/containers/minimum.h" diff --git a/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc b/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc new file mode 100644 index 0000000000..2376e4897f --- /dev/null +++ b/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc @@ -0,0 +1,28 @@ +#include "utils/graph/dataflow_graph/algorithms/get_outgoing_edges.h" +#include "utils/containers/sorted_by.h" + +namespace FlexFlow { + +std::unordered_set get_outgoing_edges(DataflowGraphView const &g, + Node const &n) { + return g.query_edges(DataflowEdgeQuery{ + {n}, + query_set::matchall(), + query_set::matchall(), + query_set::matchall(), + }); +} + +std::unordered_set + get_outgoing_edges(DataflowGraphView const &g, + std::unordered_set const &ns) { + DataflowEdgeQuery query = DataflowEdgeQuery{ + query_set{ns}, + query_set::matchall(), + query_set::matchall(), + query_set::matchall(), + }; + return g.query_edges(query); +} + +} // namespace FlexFlow diff --git a/lib/utils/test/src/utils/containers/lookup_in_map.cc b/lib/utils/test/src/utils/containers/lookup_in_map.cc new file mode 100644 index 0000000000..9ca356ee4b --- /dev/null +++ b/lib/utils/test/src/utils/containers/lookup_in_map.cc @@ -0,0 +1,31 @@ +#include "utils/containers/lookup_in_map.h" +#include +#include +#include + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + + TEST_CASE("lookup_in_map") { + + std::unordered_map map = {{"a", 1}, {"b", 2}}; + + SUBCASE("existing keys") { + std::function func = lookup_in_map(map); + CHECK(func("a") == 1); + CHECK(func("b") == 2); + } + + SUBCASE("missing key") { + std::function func = lookup_in_map(map); + CHECK_THROWS(func("c")); + } + + SUBCASE("empty map") { + std::unordered_map map = {}; + std::function func = lookup_in_map(map); + CHECK_THROWS(func("a")); + } + } +} diff --git a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_incoming_edges.cc b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_incoming_edges.cc new file mode 100644 index 0000000000..86e4802cdb --- /dev/null +++ b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_incoming_edges.cc @@ -0,0 +1,51 @@ +#include "utils/graph/dataflow_graph/algorithms/get_incoming_edges.h" +#include "utils/containers/get_only.h" +#include "utils/graph/dataflow_graph/dataflow_graph.h" +#include "utils/graph/instances/unordered_set_dataflow_graph.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_incoming_edges(DataflowGraphView, Node)") { + DataflowGraph g = DataflowGraph::create(); + + NodeAddedResult n1_added = g.add_node({}, 1); + Node n1 = n1_added.node; + DataflowOutput o1 = get_only(n1_added.outputs); + + NodeAddedResult n2_added = g.add_node({}, 1); + Node n2 = n2_added.node; + DataflowOutput o2 = get_only(n2_added.outputs); + + NodeAddedResult n3_added = g.add_node({o2}, 1); + Node n3 = n3_added.node; + DataflowOutput o3 = get_only(n3_added.outputs); + + NodeAddedResult n4_added = g.add_node({o2, o3}, 1); + Node n4 = n4_added.node; + DataflowOutput o4 = get_only(n4_added.outputs); + + SUBCASE("n4 - multiple incoming edges") { + std::vector result = get_incoming_edges(g, n4); + std::vector correct = { + DataflowEdge{o2, DataflowInput{n4, 0}}, + DataflowEdge{o3, DataflowInput{n4, 1}}}; + CHECK(result == correct); + } + + SUBCASE("n3- single incoming edge") { + std::vector result = get_incoming_edges(g, n3); + std::vector correct = { + DataflowEdge{o2, DataflowInput{n3, 0}}, + }; + CHECK(result == correct); + } + + SUBCASE("n1- no incoming edges") { + std::vector result = get_incoming_edges(g, n1); + std::vector correct = {}; + CHECK(result == correct); + } + } +} diff --git a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc new file mode 100644 index 0000000000..be874b7e29 --- /dev/null +++ b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc @@ -0,0 +1,90 @@ +#include "utils/graph/dataflow_graph/algorithms/get_outgoing_edges.h" +#include "utils/containers/get_only.h" +#include "utils/graph/dataflow_graph/dataflow_graph.h" +#include "utils/graph/instances/unordered_set_dataflow_graph.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_outgoing_edges(DataflowGraphView, Node)") { + DataflowGraph g = DataflowGraph::create(); + + NodeAddedResult n1_added = g.add_node({}, 1); + Node n1 = n1_added.node; + DataflowOutput o1 = get_only(n1_added.outputs); + + NodeAddedResult n2_added = g.add_node({o1}, 1); + Node n2 = n2_added.node; + DataflowOutput o2 = get_only(n2_added.outputs); + + NodeAddedResult n3_added = g.add_node({o1}, 1); + Node n3 = n3_added.node; + DataflowOutput o3 = get_only(n3_added.outputs); + + NodeAddedResult n4_added = g.add_node({o2}, 1); + Node n4 = n4_added.node; + DataflowOutput o4 = get_only(n4_added.outputs); + + SUBCASE("n2 - single outgoing edge") { + std::unordered_set result = get_outgoing_edges(g, n2); + std::unordered_set correct = { + DataflowEdge{o2, DataflowInput{n4, 0}}, + }; + CHECK(result == correct); + } + + SUBCASE("n1 - multiple outgoing edges") { + std::unordered_set result = get_outgoing_edges(g, n1); + std::unordered_set correct = { + DataflowEdge{o1, DataflowInput{n2, 0}}, + DataflowEdge{o1, DataflowInput{n3, 0}}, + }; + CHECK(result == correct); + } + + SUBCASE("n4 - no outgoing edges") { + std::unordered_set result = get_outgoing_edges(g, n4); + std::unordered_set correct = {}; + CHECK(result == correct); + } + } + + TEST_CASE("get_outgoing_edges(DataflowGraphView, std::unordered_set)") { + DataflowGraph g = DataflowGraph::create(); + + NodeAddedResult n1_added = g.add_node({}, 1); + Node n1 = n1_added.node; + DataflowOutput o1 = get_only(n1_added.outputs); + + NodeAddedResult n2_added = g.add_node({o1}, 1); + Node n2 = n2_added.node; + DataflowOutput o2 = get_only(n2_added.outputs); + + NodeAddedResult n3_added = g.add_node({o1}, 1); + Node n3 = n3_added.node; + DataflowOutput o3 = get_only(n3_added.outputs); + + NodeAddedResult n4_added = g.add_node({o2}, 1); + Node n4 = n4_added.node; + DataflowOutput o4 = get_only(n4_added.outputs); + + SUBCASE("multiple nodes - combined outgoing edges") { + std::unordered_set nodes = {n1, n2}; + std::unordered_set result = get_outgoing_edges(g, nodes); + std::unordered_set correct = { + DataflowEdge{o1, DataflowInput{n2, 0}}, + DataflowEdge{o1, DataflowInput{n3, 0}}, + DataflowEdge{o2, DataflowInput{n4, 0}}, + }; + CHECK(result == correct); + } + + SUBCASE("multiple nodes - no outgoing edges") { + std::unordered_set nodes = {n3, n4}; + std::unordered_set result = get_outgoing_edges(g, nodes); + std::unordered_set correct = {}; + CHECK(result == correct); + } + } +} From c116872ed69ae58ac0711405d7d222fe60b80b25 Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Wed, 29 Jan 2025 19:39:32 -0800 Subject: [PATCH 2/5] Add AWS linux AMI to runs-on for testing (#1589) --- .github/runs-on.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/runs-on.yml b/.github/runs-on.yml index 14f75549dd..b558b5131a 100644 --- a/.github/runs-on.yml +++ b/.github/runs-on.yml @@ -1,4 +1,10 @@ images: + amazon-linux-gpu-x64: + platform: "linux" + arch: "x64" + owner: "898082745236" # AWS + name: "Amazon Linux 2 AMI with NVIDIA TESLA GPU Driver*" + dlami-x64: platform: "linux" arch: "x64" @@ -8,4 +14,4 @@ images: runners: gpu-nvidia: family: ["g4dn.xlarge"] - image: dlami-x64 + image: amazon-linux-gpu-x64 From 41d2fb5e6ad35f47df6f8d93b79b8cf0c4630c81 Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 30 Jan 2025 13:57:39 -0800 Subject: [PATCH 3/5] Pin runs-on images (#1590) --- .github/runs-on.yml | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/.github/runs-on.yml b/.github/runs-on.yml index b558b5131a..6312b64955 100644 --- a/.github/runs-on.yml +++ b/.github/runs-on.yml @@ -1,17 +1,12 @@ images: - amazon-linux-gpu-x64: + runs-on-gpu-pinned: platform: "linux" arch: "x64" - owner: "898082745236" # AWS - name: "Amazon Linux 2 AMI with NVIDIA TESLA GPU Driver*" + owner: "135269210855" # runs-on + name: "runs-on-v2.2-ubuntu22-gpu-x64-20250123194414" - dlami-x64: + runs-on-cpu-pinned: platform: "linux" arch: "x64" - owner: "898082745236" # AWS - name: "Deep Learning Base OSS Nvidia Driver GPU AMI (Ubuntu 22.04)*" - -runners: - gpu-nvidia: - family: ["g4dn.xlarge"] - image: amazon-linux-gpu-x64 + owner: "135269210855" # runs-on + name: "runs-on-v2.2-ubuntu22-full-x64-20250101080516" From 4d3294ab3f6858590d3d1f6d6d0cbaa09afc692a Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Fri, 31 Jan 2025 00:20:51 -0800 Subject: [PATCH 4/5] GPU CI Fix (Pin runs-on GPU image) (#1588) * Debug * Change to base DL AMI * Print disk usage * Run nvidia-smi * Remove excess cuda installs in base ami * Re-enable freeing space in GPU CI * Try updating nix-develop version * Check what happens if you just enter the non-nixGL environment * Try switching AMIs * Try to remove the module stuff * Move to lockshaw/develop-action * Try pointing at a fixed commit * Update nix-develop action * Update nix-develop action to use BASH_FUNC filtering * Remove all the /usr/local/cuda entries * Switch back to gpu-ci env * Update the cuda arch * Try out the new runs-on gpu image * Move over to pinned runs-on image * Remove a bunch more unnecessary stuff in image to get back disk space * Try using an emphemeral store * Try mounting * Fix bug * Try sudo * Move nix into _work * Rollback all unnecessary changes * Re-enable waiting on cpu-ci --- .github/workflows/helpers/free_space_on_runner_gpu.sh | 8 -------- .github/workflows/tests.yml | 9 +++++---- 2 files changed, 5 insertions(+), 12 deletions(-) delete mode 100755 .github/workflows/helpers/free_space_on_runner_gpu.sh diff --git a/.github/workflows/helpers/free_space_on_runner_gpu.sh b/.github/workflows/helpers/free_space_on_runner_gpu.sh deleted file mode 100755 index a382ee58f6..0000000000 --- a/.github/workflows/helpers/free_space_on_runner_gpu.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -set -euo pipefail -set -x - -sudo rm -rf /usr/share/dotnet -sudo rm -rf /usr/local/lib/android -sudo rm -rf /opt/ghc -sudo rm -rf "/usr/local/share/boost" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7e2dabd784..e2fc0b6df6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -16,7 +16,7 @@ jobs: submodules: recursive - name: Free additional space on runner - run: ./.github/workflows/helpers/free_space_on_runner_gpu.sh + run: ./.github/workflows/helpers/free_space_on_runner.sh - name: Install nix uses: cachix/install-nix-action@v25 @@ -67,7 +67,7 @@ jobs: runs-on: - runs-on - family=g4dn.xlarge - - image=ubuntu22-full-x64 + - image=runs-on-gpu-pinned strategy: max-parallel: 1 @@ -79,8 +79,9 @@ jobs: with: submodules: recursive - - name: free additional space on runner - run: ./.github/workflows/helpers/free_space_on_runner_gpu.sh + - name: mount ephemeral drive to nix + run: | + sudo mkdir $HOME/_work/nix && sudo mkdir /nix && sudo mount --bind $HOME/_work/nix /nix - name: install nix uses: cachix/install-nix-action@v25 From 2b71235b66f15bbc5eaa9ad3b24fc3d470d335c3 Mon Sep 17 00:00:00 2001 From: Victor Li <32348970+victorli2002@users.noreply.github.com> Date: Sat, 1 Feb 2025 12:54:42 -0800 Subject: [PATCH 5/5] Merge substitution-builder (#1575) * Start on pcg builder * Add tests and some implementation for pcg builder * Add pcg tests, make dtgen constructors explicit to fix bug * Add remainder of PCG tests * Fix build issues in local-execution * Format * Address Reyna comments, add topological_order function for PCG * Pre multidigraph refactor * Removing visitable from sp code * Add open dataflow graph, start to replace pcg dataflow graph * Start refactoring substitutions * Add utility functions to support pattern matching * Pre-refactor inputs * Fix proj url * Get back to substitutions, now with unordered graph inputs * Get substitutions building * substitutions-tests now builds * Fix bug in filter, pass some initial substitution tests * Add tests for fmt::to_string, fix some substitutions bugs * Pass initial unit tests for find_pattern_matches * Start on unit tests for pcg pattern * Pass initial test for find_pattern_matches * Fix small build issue in tests * Format * Sync tests in CI with tests in proj * Fix minor build errors in kernels and local-execution * Format * Remove outdated code * More outdated code removal * More cleanup, add test for sp decomposition * Pull apart containers.h * More sp testing and fixes * Break up graph algorithms.h * Pre- full SP algo commit * Add initial implementation and tests for cbc decomposition and inverse line graph * Pass test for get_inverse_line_graph * Add new multidigraph * Fix get_inverse_line_graph to return a MultiDiGraph instead of a DiGraph * Add tests for parallel and series reduction finding * Add really rough implementation of valdez sp decomposition * Fix local-execution build * Add implementations and tests for applying series/parallel reductions * Format * Clean up sp decomposition interface and tests * Format * Add comments for top-level substitutions functions, add proj doxygen support * Start sketching out substitutions code * Fix build errors * Add ability to permute node ids * Cleanup and start to test new substitutions code * Add test case for evaluate_substitution_output * Add naive isomorphism detection code * Add graph inputs to open dataflow graph isomorphism * Add input permutation to evaluate_substitution_output * Fix permute_node_ids * Add test for permute_input_ids * Migrate over to mutable implementation of apply_substitution * Add fast isomorphism checking and an initial implementation of full substitution logic * Pass initial full substitutions test * Cleanup old isomorphism checking code * Fix post-merge bugs * Fix broken pcg builder test * Format * Reorganize code and remove some outdated code pre-code-review * Format * Restarting work on this after working on export-model-arch * Adding in some a simple function to get the currently available substritutions * nonnegative_int additions, code cleanup, etc. * A bunch more moving over to nonnegative_int * Even more nonnegative_int updating * Fix build * Fix failing tests * Format * Format --------- Co-authored-by: Colin Unger Co-authored-by: Victor Li --- .../src/export_model_arch.cc | 17 +- cmake/flexflow-utils.cmake | 14 +- flake.nix | 14 +- ...omputation_graph_binary_sp_decomposition.h | 5 +- .../src/compiler/allowed_machine_views.cc | 41 +- .../get_machine_resource_splits.cc | 10 +- .../machine_mapping/machine_mapping.cc | 10 +- ...el_layer_guid_oblivious_machine_mapping.cc | 4 +- ...mputation_graph_binary_sp_decomposition.cc | 2 +- .../test/src/allowed_machine_views.cc | 60 +- ...racted_tensor_set_movement_across_split.cc | 8 +- .../get_machine_resource_splits.cc | 193 ++--- .../get_optimal_machine_mapping.cc | 28 +- .../get_tensor_set_movement_across_split.cc | 32 +- .../machine_mapping/machine_mapping.cc | 24 +- .../get_machine_mapping_problem_tree.cc | 6 +- .../machine_mapping/machine_mapping_result.cc | 36 +- ...get_optimal_machine_mapping_with_memory.cc | 28 +- .../machine_mapping_result_with_memory.cc | 80 +-- ...ion_graph_series_parallel_decomposition.cc | 64 +- .../task_graph_simulator/task_simulator.cc | 88 ++- lib/compiler/test/src/graph_optimize_state.cc | 59 +- lib/kernels/include/kernels/array_shape.h | 36 +- .../include/kernels/batch_norm_kernels.h | 41 +- .../batch_norm_per_device_state.struct.toml | 68 ++ lib/kernels/include/kernels/legion_dim.h | 10 +- .../include/kernels/legion_dim_t.struct.toml | 7 +- .../include/kernels/transpose_kernels.h | 17 +- lib/kernels/src/allocation.cc | 3 +- lib/kernels/src/array_shape.cc | 53 +- lib/kernels/src/cuda/cuda_helper.cu | 8 +- .../src/cuda/ops/batch_norm_kernels.cu | 32 +- lib/kernels/src/cuda/ops/cast_kernels.cu | 4 +- lib/kernels/src/cuda/ops/combine_kernels.cu | 5 +- lib/kernels/src/cuda/ops/concat_kernels.cu | 7 +- lib/kernels/src/cuda/ops/conv_2d_kernels.cu | 16 +- .../src/cuda/ops/element_unary_kernels.cu | 8 +- lib/kernels/src/cuda/ops/flat_kernels.cu | 12 +- lib/kernels/src/cuda/ops/gather_kernels.cu | 54 +- lib/kernels/src/cuda/ops/partition_kernels.cu | 16 +- lib/kernels/src/cuda/ops/reduction_kernels.cu | 8 +- lib/kernels/src/cuda/ops/replicate_kernels.cu | 8 +- lib/kernels/src/cuda/ops/reshape_kernels.cu | 7 +- lib/kernels/src/cuda/ops/transpose_kernels.cu | 99 +-- lib/kernels/src/legion_dim.cc | 9 +- lib/kernels/test/src/test_attention_kernel.cc | 50 +- .../test/src/test_batch_matmul_kernel.cc | 30 +- .../test/src/test_batch_norm_kernel.cc | 58 +- lib/kernels/test/src/test_cast_kernel.cc | 4 +- lib/kernels/test/src/test_combine_kernel.cc | 2 +- lib/kernels/test/src/test_concat_kernel.cc | 15 +- lib/kernels/test/src/test_dropout.cc | 8 +- lib/kernels/test/src/test_flat_kernel.cc | 6 +- lib/kernels/test/src/test_gather_kernels.cc | 7 +- .../test/src/test_layer_norm_kernels.cc | 8 +- lib/kernels/test/src/test_partition_kernel.cc | 6 +- lib/kernels/test/src/test_pool_2d_kernels.cc | 52 +- lib/kernels/test/src/test_reduction_kernel.cc | 9 +- lib/kernels/test/src/test_replicate_kernel.cc | 8 +- lib/kernels/test/src/test_reshape_kernel.cc | 6 +- lib/kernels/test/src/test_reverse_kernels.cc | 31 +- lib/kernels/test/src/test_softmax_kernel.cc | 23 +- lib/kernels/test/src/test_split_kernel.cc | 16 +- lib/kernels/test/src/test_transpose_kernel.cc | 19 +- lib/kernels/test/src/test_utils.cc | 15 +- lib/kernels/test/src/test_utils.h | 16 +- ...device_specific_device_states.variant.toml | 6 +- .../local-execution/legion_tensor_shape.h | 40 -- .../per_device_op_state.variant.toml | 5 - .../local-execution/task_id_t.enum.toml | 3 - .../src/legion_tensor_shape.cc | 15 - lib/local-execution/src/ops/attention.cc | 55 +- lib/local-execution/src/ops/batch_matmul.cc | 73 +- lib/local-execution/src/ops/batch_matmul.h | 2 +- lib/local-execution/src/ops/batch_norm.cc | 27 +- lib/local-execution/src/ops/conv_2d.cc | 26 +- lib/local-execution/src/ops/gather.cc | 9 +- lib/local-execution/src/ops/layer_norm.cc | 24 +- lib/local-execution/src/ops/linear.cc | 49 +- lib/local-execution/src/ops/pool_2d.cc | 91 ++- lib/local-execution/src/ops/reduce.cc | 9 +- lib/local-execution/src/ops/reduction.cc | 4 +- lib/local-execution/src/ops/replicate.cc | 4 +- lib/local-execution/src/ops/reverse.cc | 46 +- lib/local-execution/src/ops/softmax.cc | 20 +- lib/local-execution/src/ops/split.cc | 51 +- lib/local-execution/src/ops/topk.cc | 20 +- lib/local-execution/src/ops/transpose.cc | 55 +- lib/local-execution/src/ops/transpose.h | 3 - .../src/task_signature_impl.cc | 4 - .../test/src/test_local_slots_backing.cc | 13 +- .../test/src/test_local_task_arg_accessor.cc | 13 +- .../test/src/test_task_registry.cc | 8 +- .../models/bert/bert_config.struct.toml | 15 +- .../candle_uno/candle_uno_config.struct.toml | 9 +- .../inception_v3_config.struct.toml | 8 +- .../include/models/split_test/split_test.h | 2 +- .../transformer_config.struct.toml | 21 +- lib/models/src/models/bert/bert.cc | 41 +- .../src/models/candle_uno/candle_uno.cc | 57 +- .../src/models/inception_v3/inception_v3.cc | 675 +++++++++--------- .../src/models/split_test/split_test.cc | 16 +- .../src/models/transformer/transformer.cc | 89 +-- .../computation_graph_op_attrs.variant.toml | 2 +- lib/op-attrs/include/op-attrs/datatype.h | 3 +- .../op-attrs/dim_ordered/dim_ordered.h | 27 +- .../include/op-attrs/dim_ordered/slice.h | 4 +- lib/op-attrs/include/op-attrs/get_op_type.h | 2 +- lib/op-attrs/include/op-attrs/ops/attention.h | 36 +- .../multihead_attention_inputs.struct.toml | 12 +- .../op-attrs/ops/attention_attrs.struct.toml | 12 +- .../include/op-attrs/ops/batch_matmul.h | 2 +- .../op-attrs/ops/batch_matmul.struct.toml | 19 - .../ops/batch_matmul_attrs.struct.toml | 30 + .../op-attrs/ops/combine_attrs.struct.toml | 3 +- .../conv_2d/conv_2d_input_shape.struct.toml | 9 +- .../conv_2d_parallel_input_shape.struct.toml | 5 +- .../op-attrs/ops/conv_2d_attrs.struct.toml | 17 +- .../op-attrs/ops/embedding_attrs.struct.toml | 7 +- .../op-attrs/ops/linear_attrs.struct.toml | 3 +- lib/op-attrs/include/op-attrs/ops/pool_2d.h | 4 +- .../op-attrs/ops/pool_2d_attrs.struct.toml | 13 +- .../op-attrs/ops/reduction_attrs.struct.toml | 6 +- .../ops/repartition_attrs.struct.toml | 3 +- .../op-attrs/ops/replicate_attrs.struct.toml | 6 +- .../op-attrs/ops/split_attrs.struct.toml | 3 +- .../op-attrs/ops/topk_attrs.struct.toml | 6 +- .../parallel_tensor_dim_degrees.struct.toml | 3 +- .../include/op-attrs/parallel_tensor_dims.h | 20 +- .../include/op-attrs/parallel_tensor_shape.h | 25 +- .../discard_copy_degree.struct.toml | 6 +- .../sum_degree.struct.toml | 6 +- .../op-attrs/pcg_operator_attrs.variant.toml | 2 +- .../include/op-attrs/relative_ff_dim_t.h | 2 +- .../op-attrs/replica_parallel_dim.struct.toml | 3 +- .../op-attrs/replica_parallel_dim_set.h | 3 +- .../op-attrs/shard_parallel_dim.struct.toml | 8 +- lib/op-attrs/include/op-attrs/tensor_dims.h | 8 +- .../include/op-attrs/tensor_dims.struct.toml | 4 +- lib/op-attrs/include/op-attrs/tensor_shape.h | 10 +- lib/op-attrs/src/op-attrs/datatype.cc | 15 +- lib/op-attrs/src/op-attrs/ff_dim_t.cc | 2 +- lib/op-attrs/src/op-attrs/ops/attention.cc | 474 ++---------- .../attention/multihead_attention_inputs.cc | 18 +- .../multihead_attention_parallel_inputs.cc | 6 +- lib/op-attrs/src/op-attrs/ops/batch_matmul.cc | 21 +- lib/op-attrs/src/op-attrs/ops/batch_norm.cc | 27 +- lib/op-attrs/src/op-attrs/ops/concat.cc | 15 +- lib/op-attrs/src/op-attrs/ops/conv_2d.cc | 65 +- .../ops/conv_2d/conv_2d_input_shape.cc | 8 +- lib/op-attrs/src/op-attrs/ops/embedding.cc | 24 +- lib/op-attrs/src/op-attrs/ops/flat.cc | 18 +- lib/op-attrs/src/op-attrs/ops/layer_norm.cc | 6 +- lib/op-attrs/src/op-attrs/ops/linear.cc | 20 +- lib/op-attrs/src/op-attrs/ops/pool_2d.cc | 73 +- .../src/op-attrs/parallel_tensor_dims.cc | 39 +- .../src/op-attrs/parallel_tensor_shape.cc | 64 +- .../src/op-attrs/relative_ff_dim_t.cc | 4 +- .../src/op-attrs/replica_parallel_dim_set.cc | 6 +- lib/op-attrs/src/op-attrs/tensor_dims.cc | 15 +- lib/op-attrs/src/op-attrs/tensor_shape.cc | 13 +- .../test/src/op-attrs/ops/attention.cc | 178 +++-- .../test/src/op-attrs/ops/batch_matmul.cc | 144 ++-- .../test/src/op-attrs/ops/batch_norm.cc | 84 +-- lib/op-attrs/test/src/op-attrs/ops/cast.cc | 34 +- lib/op-attrs/test/src/op-attrs/ops/combine.cc | 20 +- lib/op-attrs/test/src/op-attrs/ops/concat.cc | 176 ++--- lib/op-attrs/test/src/op-attrs/ops/conv_2d.cc | 168 ++--- lib/op-attrs/test/src/op-attrs/ops/dropout.cc | 62 +- .../test/src/op-attrs/ops/element_binary.cc | 70 +- .../test/src/op-attrs/ops/element_unary.cc | 38 +- .../test/src/op-attrs/ops/embedding.cc | 68 +- lib/op-attrs/test/src/op-attrs/ops/flat.cc | 110 +-- .../test/src/op-attrs/ops/layer_norm.cc | 93 +-- lib/op-attrs/test/src/op-attrs/ops/linear.cc | 142 ++-- lib/op-attrs/test/src/op-attrs/ops/pool_2d.cc | 226 +++--- .../test/src/op-attrs/ops/reduction.cc | 16 +- .../test/src/op-attrs/ops/repartition.cc | 16 +- .../test/src/op-attrs/ops/replicate.cc | 17 +- lib/op-attrs/test/src/op-attrs/ops/softmax.cc | 78 +- .../test/src/op-attrs/pcg_operator_attrs.cc | 4 +- .../test/src/op-attrs/relative_ff_dim_t.cc | 10 +- lib/op-attrs/test/src/op-attrs/tensor_dims.cc | 31 +- .../include/pcg/computation_graph_builder.h | 91 +-- lib/pcg/include/pcg/cpu_id_t.struct.toml | 6 +- lib/pcg/include/pcg/device_id.h | 4 +- .../file_format/v1/graphs/v1_dataflow_graph.h | 2 +- .../v1/graphs/v1_dataflow_graph.struct.toml | 3 +- .../v1/graphs/v1_graph_edge.struct.toml | 12 +- .../v1/graphs/v1_labelled_dataflow_graph.h | 9 +- .../v1_labelled_dataflow_graph.struct.toml | 5 +- .../v1_binary_sp_decomposition.variant.toml | 3 +- .../pcg/file_format/v1/v1_computation_graph.h | 2 +- lib/pcg/include/pcg/gpu_id_t.struct.toml | 6 +- .../pcg/machine_space_coordinate.struct.toml | 5 +- lib/pcg/include/pcg/machine_specification.h | 12 +- .../pcg/machine_specification.struct.toml | 10 +- lib/pcg/include/pcg/machine_view.h | 2 +- lib/pcg/include/pcg/operator_task_space.h | 4 +- .../pcg/operator_task_space.struct.toml | 3 +- .../parallel_computation_graph_builder.h | 38 +- .../parallel_computation_graph_edge.h | 2 +- .../pcg/start_invariant_machine_view.h | 2 +- lib/pcg/include/pcg/stride_t.struct.toml | 6 +- .../pcg/task_space_coordinate.struct.toml | 3 +- lib/pcg/src/pcg/computation_graph_builder.cc | 137 ++-- lib/pcg/src/pcg/device_id.cc | 4 +- .../v1/graphs/v1_dataflow_graph.cc | 6 +- .../v1/graphs/v1_labelled_dataflow_graph.cc | 16 + .../v1/v1_binary_sp_decomposition/json.cc | 4 +- .../file_format/v1/v1_computation_graph.cc | 7 +- lib/pcg/src/pcg/machine_space_offset.cc | 6 +- lib/pcg/src/pcg/machine_specification.cc | 17 +- lib/pcg/src/pcg/machine_view.cc | 79 +- lib/pcg/src/pcg/operator_task_space.cc | 22 +- .../generate_weight_transform.cc | 4 +- .../parallel_computation_graph_builder.cc | 98 +-- .../parallel_computation_graph_edge.cc | 2 +- .../src/pcg/start_invariant_machine_view.cc | 7 +- lib/pcg/test/src/pcg/computation_graph.cc | 40 +- .../test/src/pcg/computation_graph_builder.cc | 18 +- .../v1/v1_binary_sp_decomposition/json.cc | 18 +- .../file_format/v1/v1_computation_graph.cc | 8 +- .../v1/v1_parallel_computation_graph.cc | 10 +- lib/pcg/test/src/pcg/machine_specification.cc | 17 +- lib/pcg/test/src/pcg/machine_view.cc | 160 ++--- lib/pcg/test/src/pcg/operator_task_space.cc | 28 +- .../parallel_computation_graph.cc | 38 +- .../parallel_computation_graph_builder.cc | 155 ++-- .../src/pcg/start_invariant_machine_view.cc | 56 +- .../apply_substitution/apply_substitution.h | 31 + .../evaluate_substitution_output.h | 6 +- .../output_expr_to_result_sub_pcg_mapping.h | 6 +- ...expr_to_result_sub_pcg_mapping.struct.toml | 0 .../perform_shape_inference.h | 4 +- .../substitutions/constraint_type.enum.toml | 3 + .../operator_pattern/get_attribute_map.h | 15 + .../operator_attribute_constraint.h | 2 + .../operator_attribute_key.enum.toml | 1 + .../operator_pattern/operator_attribute_key.h | 12 + ...operator_attribute_list_access.struct.toml | 5 +- .../operator_attribute_value.variant.toml | 14 +- .../output_graph/output_graph_expr.h | 5 + .../output_graph/output_graph_expr_value.h | 16 + .../output_graph_expr_value.variant.toml | 19 + .../output_operator_attrs_assignment.h | 3 + ...tput_operator_attrs_assignment.struct.toml | 7 +- .../include/substitutions/pcg_pattern.h | 2 + .../include/substitutions/pcg_pattern_match.h | 4 +- .../sub_parallel_computation_graph_edge.h | 2 +- .../include/substitutions/substitution.h | 25 +- .../substitutions/substitution_builder.h | 49 ++ .../tensor_attribute_list_access.struct.toml | 5 +- .../tensor_pattern/tensor_attribute_pattern.h | 3 + .../tensor_attribute_value.variant.toml | 5 +- .../substitutions/unity_substitution_set.h | 47 ++ .../unlabelled/input_pattern_edge.h | 2 +- .../unlabelled/pattern_matching.h | 10 +- .../unlabelled/pattern_node_output.h | 2 +- .../unlabelled/standard_pattern_edge.h | 4 +- .../apply_substitution/apply_substitution.cc | 165 +++++ .../evaluate_substitution_output.cc | 4 +- .../output_expr_to_result_sub_pcg_mapping.cc | 2 +- .../perform_shape_inference.cc | 2 +- .../operator_pattern/eval_list_access.cc | 21 +- .../operator_pattern/eval_list_size.cc | 5 +- .../operator_pattern/get_attribute.cc | 156 ++-- .../operator_pattern/get_attribute_map.cc | 25 + .../operator_attribute_constraint.cc | 10 + .../operator_attribute_key.cc | 68 ++ .../materialize_operator_from_attrs_map.cc | 27 +- .../output_graph/output_graph_expr.cc | 18 + .../output_graph/output_graph_expr_value.cc | 30 + .../output_operator_attrs_assignment.cc | 41 +- .../src/substitutions/pcg_pattern.cc | 18 + .../sub_parallel_computation_graph.cc | 55 +- .../sub_parallel_computation_graph_edge.cc | 2 +- .../src/substitutions/substitution.cc | 301 ++++---- .../src/substitutions/substitution_builder.cc | 162 +++++ .../tensor_pattern/eval_list_access.cc | 5 +- .../tensor_pattern/eval_list_size.cc | 5 +- .../tensor_pattern/get_attribute.cc | 10 +- .../tensor_attribute_pattern.cc | 16 + .../substitutions/unity_substitution_set.cc | 235 ++++++ .../unlabelled/input_pattern_edge.cc | 2 +- .../unlabelled/pattern_node_output.cc | 2 +- .../unlabelled/standard_pattern_edge.cc | 4 +- .../apply_substitution/apply_substitution.cc | 174 +++++ .../evaluate_substitution_output.cc | 63 +- .../perform_shape_inference.cc | 63 +- .../operator_pattern/get_attribute.cc | 2 +- .../test/src/substitutions/pcg_pattern.cc | 14 +- .../test/src/substitutions/substitution.cc | 345 ++++----- .../src/substitutions/substitution_builder.cc | 145 ++++ .../substitutions/unity_substitution_set.cc | 20 + .../unlabelled/find_pattern_matches.cc} | 29 +- .../unlabelled/pattern_matching.cc | 210 ++++++ .../substitutions/unlabelled/pattern_split.cc | 8 +- .../unlabelled/unlabelled_graph_pattern.cc | 4 +- .../test/src/test_substitution.cc | 148 ---- .../algorithms/bidict_from_enumerating.h | 14 +- .../utils/cli/cli_flag_key.struct.toml | 6 +- .../cli_positional_argument_key.struct.toml | 6 +- lib/utils/include/utils/containers/at_idx.h | 5 +- .../include/utils/containers/enumerate.h | 16 +- .../utils/containers/enumerate_vector.h | 11 +- lib/utils/include/utils/containers/flatmap.h | 15 +- .../get_all_permutations_with_repetition.h | 10 +- lib/utils/include/utils/containers/make.h | 13 + .../include/utils/containers/merge_maps.h | 60 +- .../utils/containers/merge_method.enum.toml | 17 + lib/utils/include/utils/containers/product.h | 2 +- lib/utils/include/utils/containers/repeat.h | 5 +- .../include/utils/containers/repeat_element.h | 22 + .../include/utils/containers/replicate.h | 15 - lib/utils/include/utils/containers/sum.h | 2 +- .../algorithms/view_as_open_dataflow_graph.h | 34 + .../dataflow_edge_query.struct.toml | 5 +- .../graph/dataflow_graph/dataflow_graph.h | 3 +- .../dataflow_graph/dataflow_input.struct.toml | 3 +- .../dataflow_output.struct.toml | 3 +- .../dataflow_output_query.struct.toml | 6 +- .../graph/dataflow_graph/i_dataflow_graph.h | 2 +- .../instances/unordered_set_dataflow_graph.h | 4 +- ...ordered_set_labelled_open_dataflow_graph.h | 7 +- .../algorithms/get_graph_data.h | 1 + .../algorithms/permute_node_ids.h | 1 + .../graph/multidigraph/algorithms/add_nodes.h | 3 +- .../algorithms/are_isomorphic.h | 13 + .../open_dataflow_graph_isomorphism.h | 21 + .../dataflow_input_edge_query.struct.toml | 3 +- .../i_open_dataflow_graph.h | 2 +- .../open_dataflow_graph/open_dataflow_edge.h | 2 +- .../open_dataflow_graph/open_dataflow_graph.h | 2 +- .../unordered_set_open_dataflow_graph.h | 2 +- lib/utils/include/utils/graph/render_dot.h | 19 + .../include/utils/nonnegative_int/ceildiv.h | 11 + .../utils/nonnegative_int/nonnegative_int.h | 28 +- .../utils/nonnegative_int/nonnegative_range.h | 14 + .../utils/nonnegative_int/num_elements.h | 17 + lib/utils/include/utils/variant.h | 1 + .../algorithms/bidict_from_enumerating.cc | 13 + lib/utils/src/utils/cli/cli_parse.cc | 6 +- lib/utils/src/utils/cli/cli_spec.cc | 15 +- lib/utils/src/utils/containers/at_idx.cc | 9 + lib/utils/src/utils/containers/enumerate.cc | 11 + .../src/utils/containers/enumerate_vector.cc | 9 + lib/utils/src/utils/containers/make.cc | 8 + lib/utils/src/utils/containers/range.cc | 1 + lib/utils/src/utils/containers/repeat.cc | 10 + .../src/utils/containers/repeat_element.cc | 10 + lib/utils/src/utils/containers/replicate.cc | 1 - .../utils/graph/dataflow_graph/algorithms.cc | 2 +- .../graph/dataflow_graph/algorithms/as_dot.cc | 41 +- .../get_dataflow_edges_from_node_to_node.cc | 4 +- .../algorithms/get_incoming_edges.cc | 8 +- .../algorithms/get_outgoing_edges.cc | 8 +- .../algorithms/get_subgraph_incoming_edges.cc | 4 +- .../algorithms/get_subgraph_outgoing_edges.cc | 4 +- .../algorithms/view_as_open_dataflow_graph.cc | 22 +- .../algorithms/view_as_open_dataflow_graph.h | 32 - .../dataflow_graph/dataflow_edge_query.cc | 20 +- .../graph/dataflow_graph/dataflow_graph.cc | 2 +- .../dataflow_graph/dataflow_output_query.cc | 6 +- .../dataflow_graph/i_dataflow_graph_view.cc | 4 +- .../digraph/algorithms/transitive_closure.cc | 5 +- .../algorithms/transitive_reduction.cc | 4 +- .../instances/unordered_set_dataflow_graph.cc | 10 +- .../multidigraph/algorithms/add_nodes.cc | 2 +- .../algorithms/are_isomorphic.cc | 11 + .../open_dataflow_graph/algorithms/as_dot.cc | 11 +- .../algorithms/get_incoming_edges.cc | 6 +- .../algorithms/get_subgraph_incoming_edges.cc | 6 +- .../open_dataflow_graph_isomorphism.cc | 54 ++ .../dataflow_input_edge_query.cc | 10 +- .../open_dataflow_graph/open_dataflow_edge.cc | 2 +- .../open_dataflow_graph.cc | 2 +- .../unordered_set_open_dataflow_graph.cc | 2 +- lib/utils/src/utils/graph/render_dot.cc | 90 +++ .../src/utils/nonnegative_int/ceildiv.cc | 20 + .../utils/nonnegative_int/nonnegative_int.cc | 79 +- .../nonnegative_int/nonnegative_range.cc | 19 + .../src/utils/nonnegative_int/num_elements.cc | 10 + lib/utils/test/src/main.cc | 2 - .../algorithms/bidict_from_enumerating.cc | 19 +- lib/utils/test/src/utils/cli/cli_parse.cc | 34 +- lib/utils/test/src/utils/containers/at_idx.cc | 29 + .../test/src/utils/containers/enumerate.cc | 33 +- .../src/utils/containers/enumerate_vector.cc | 33 + .../test/src/utils/containers/flatmap.cc | 32 + .../get_all_permutations_with_repetition.cc | 8 +- lib/utils/test/src/utils/containers/make.cc | 15 + .../test/src/utils/containers/merge_maps.cc | 78 +- .../test/src/utils/containers/product.cc | 20 + lib/utils/test/src/utils/containers/repeat.cc | 2 +- .../{replicate.cc => repeat_element.cc} | 9 +- .../utils/graph/dataflow_graph/algorithms.cc | 14 +- .../dataflow_graphs_are_isomorphic.cc | 24 +- .../algorithms/find_isomorphism.cc | 24 +- .../get_dataflow_edges_from_node_to_node.cc | 26 +- .../algorithms/get_incoming_edges.cc | 14 +- .../algorithms/get_outgoing_edges.cc | 28 +- .../algorithms/get_subgraph_incoming_edges.cc | 14 +- .../algorithms/get_subgraph_outgoing_edges.cc | 12 +- ...sitive_reduced_boundary_nodes_for_split.cc | 8 +- ...t_transitive_reduced_edges_across_split.cc | 34 +- ...transitive_reduced_outputs_across_split.cc | 8 +- .../unordered_open_dataflow_graph.cc | 8 +- .../multidigraph/algorithms/add_edges.cc | 2 +- .../multidigraph/algorithms/add_nodes.cc | 2 +- .../multidigraph/algorithms/get_edges.cc | 2 +- .../algorithms/find_isomorphism.cc | 23 +- .../get_open_dataflow_graph_inputs.cc | 2 +- .../get_open_dataflow_value_uses.cc | 20 +- .../get_unused_open_dataflow_graph_inputs.cc | 4 +- .../open_dataflow_graphs_are_isomorphic.cc | 23 +- .../algorithms/permute_input_ids.cc | 14 +- .../algorithms/permute_node_ids.cc | 28 +- .../series_parallel/parallel_reduction.cc | 14 +- .../graph/series_parallel/series_reduction.cc | 18 +- .../test/src/utils/nonnegative_int/ceildiv.cc | 52 ++ .../utils/nonnegative_int/nonnegative_int.cc | 90 ++- .../nonnegative_int/nonnegative_range.cc | 42 ++ .../src/utils/nonnegative_int/num_elements.cc | 15 + lib/utils/test/src/utils/random_utils.cc | 6 +- 425 files changed, 7351 insertions(+), 5065 deletions(-) create mode 100644 lib/kernels/include/kernels/batch_norm_per_device_state.struct.toml delete mode 100644 lib/local-execution/include/local-execution/legion_tensor_shape.h delete mode 100644 lib/local-execution/src/legion_tensor_shape.cc delete mode 100644 lib/op-attrs/include/op-attrs/ops/batch_matmul.struct.toml create mode 100644 lib/op-attrs/include/op-attrs/ops/batch_matmul_attrs.struct.toml create mode 100644 lib/substitutions/include/substitutions/apply_substitution/apply_substitution.h rename lib/substitutions/include/substitutions/{substitution_internal => apply_substitution}/evaluate_substitution_output.h (76%) rename lib/substitutions/include/substitutions/{substitution_internal => apply_substitution}/output_expr_to_result_sub_pcg_mapping.h (62%) rename lib/substitutions/include/substitutions/{substitution_internal => apply_substitution}/output_expr_to_result_sub_pcg_mapping.struct.toml (100%) rename lib/substitutions/include/substitutions/{substitution_internal => apply_substitution}/perform_shape_inference.h (85%) create mode 100644 lib/substitutions/include/substitutions/operator_pattern/get_attribute_map.h create mode 100644 lib/substitutions/include/substitutions/operator_pattern/operator_attribute_key.h create mode 100644 lib/substitutions/include/substitutions/output_graph/output_graph_expr_value.h create mode 100644 lib/substitutions/include/substitutions/output_graph/output_graph_expr_value.variant.toml create mode 100644 lib/substitutions/include/substitutions/substitution_builder.h create mode 100644 lib/substitutions/include/substitutions/unity_substitution_set.h create mode 100644 lib/substitutions/src/substitutions/apply_substitution/apply_substitution.cc rename lib/substitutions/src/substitutions/{substitution_internal => apply_substitution}/evaluate_substitution_output.cc (96%) rename lib/substitutions/src/substitutions/{substitution_internal => apply_substitution}/output_expr_to_result_sub_pcg_mapping.cc (93%) rename lib/substitutions/src/substitutions/{substitution_internal => apply_substitution}/perform_shape_inference.cc (95%) create mode 100644 lib/substitutions/src/substitutions/operator_pattern/get_attribute_map.cc create mode 100644 lib/substitutions/src/substitutions/operator_pattern/operator_attribute_key.cc create mode 100644 lib/substitutions/src/substitutions/output_graph/output_graph_expr_value.cc create mode 100644 lib/substitutions/src/substitutions/substitution_builder.cc create mode 100644 lib/substitutions/src/substitutions/unity_substitution_set.cc create mode 100644 lib/substitutions/test/src/substitutions/apply_substitution/apply_substitution.cc rename lib/substitutions/test/src/substitutions/{substitution_internal => apply_substitution}/evaluate_substitution_output.cc (86%) rename lib/substitutions/test/src/substitutions/{substitution_internal => apply_substitution}/perform_shape_inference.cc (78%) create mode 100644 lib/substitutions/test/src/substitutions/substitution_builder.cc create mode 100644 lib/substitutions/test/src/substitutions/unity_substitution_set.cc rename lib/substitutions/test/src/{test_pattern_matches.cc => substitutions/unlabelled/find_pattern_matches.cc} (94%) create mode 100644 lib/substitutions/test/src/substitutions/unlabelled/pattern_matching.cc delete mode 100644 lib/substitutions/test/src/test_substitution.cc create mode 100644 lib/utils/include/utils/containers/make.h create mode 100644 lib/utils/include/utils/containers/merge_method.enum.toml create mode 100644 lib/utils/include/utils/containers/repeat_element.h delete mode 100644 lib/utils/include/utils/containers/replicate.h create mode 100644 lib/utils/include/utils/graph/dataflow_graph/algorithms/view_as_open_dataflow_graph.h create mode 100644 lib/utils/include/utils/graph/open_dataflow_graph/algorithms/are_isomorphic.h create mode 100644 lib/utils/include/utils/graph/open_dataflow_graph/algorithms/open_dataflow_graph_isomorphism.h create mode 100644 lib/utils/include/utils/graph/render_dot.h create mode 100644 lib/utils/include/utils/nonnegative_int/ceildiv.h create mode 100644 lib/utils/include/utils/nonnegative_int/nonnegative_range.h create mode 100644 lib/utils/include/utils/nonnegative_int/num_elements.h create mode 100644 lib/utils/src/utils/containers/make.cc create mode 100644 lib/utils/src/utils/containers/repeat_element.cc delete mode 100644 lib/utils/src/utils/containers/replicate.cc delete mode 100644 lib/utils/src/utils/graph/dataflow_graph/algorithms/view_as_open_dataflow_graph.h create mode 100644 lib/utils/src/utils/graph/open_dataflow_graph/algorithms/are_isomorphic.cc create mode 100644 lib/utils/src/utils/graph/open_dataflow_graph/algorithms/open_dataflow_graph_isomorphism.cc create mode 100644 lib/utils/src/utils/graph/render_dot.cc create mode 100644 lib/utils/src/utils/nonnegative_int/ceildiv.cc create mode 100644 lib/utils/src/utils/nonnegative_int/nonnegative_range.cc create mode 100644 lib/utils/src/utils/nonnegative_int/num_elements.cc delete mode 100644 lib/utils/test/src/main.cc create mode 100644 lib/utils/test/src/utils/containers/at_idx.cc create mode 100644 lib/utils/test/src/utils/containers/enumerate_vector.cc create mode 100644 lib/utils/test/src/utils/containers/make.cc rename lib/utils/test/src/utils/containers/{replicate.cc => repeat_element.cc} (69%) create mode 100644 lib/utils/test/src/utils/nonnegative_int/ceildiv.cc create mode 100644 lib/utils/test/src/utils/nonnegative_int/nonnegative_range.cc create mode 100644 lib/utils/test/src/utils/nonnegative_int/num_elements.cc diff --git a/bin/export-model-arch/src/export_model_arch.cc b/bin/export-model-arch/src/export_model_arch.cc index 64419acce4..a9f6c65b86 100644 --- a/bin/export-model-arch/src/export_model_arch.cc +++ b/bin/export-model-arch/src/export_model_arch.cc @@ -13,6 +13,7 @@ #include "utils/cli/cli_parse.h" #include "utils/cli/cli_parse_result.h" #include "utils/cli/cli_spec.h" +#include "utils/graph/open_dataflow_graph/algorithms/as_dot.h" #include "utils/graph/series_parallel/binary_sp_decomposition_tree/right_associative_binary_sp_tree_from_nary.h" #include "utils/graph/series_parallel/get_series_parallel_decomposition.h" @@ -21,11 +22,11 @@ using namespace ::FlexFlow; ComputationGraph get_single_operator_computation_graph() { ComputationGraphBuilder b; - size_t batch_size = 8; - size_t in_channels = 16; - size_t out_channels = 12; + nonnegative_int batch_size = 8_n; + nonnegative_int in_channels = 16_n; + nonnegative_int out_channels = 12_n; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ batch_size, in_channels, out_channels, @@ -69,7 +70,7 @@ tl::expected } else if (model_name == "bert") { return get_bert_computation_graph(get_default_bert_config()); } else if (model_name == "split_test") { - int batch_size = 8; + nonnegative_int batch_size = 8_n; return get_split_test_computation_graph(batch_size); } else if (model_name == "single_operator") { return get_single_operator_computation_graph(); @@ -100,10 +101,10 @@ tl::expected result.value(); }); - std::pair> v1_result = - to_v1_including_node_numbering(computation_graph); + std::pair> + v1_result = to_v1_including_node_numbering(computation_graph); V1ComputationGraph v1_cg = v1_result.first; - bidict layer_numbering = v1_result.second; + bidict layer_numbering = v1_result.second; V1BinarySPDecomposition v1_sp_decomposition = to_v1(sp_decomposition, layer_numbering); diff --git a/cmake/flexflow-utils.cmake b/cmake/flexflow-utils.cmake index 7ba39e92c9..515a249521 100644 --- a/cmake/flexflow-utils.cmake +++ b/cmake/flexflow-utils.cmake @@ -20,6 +20,7 @@ function(define_ff_vars target) MAX_TENSOR_DIM=${FF_MAX_DIM} MAX_NUM_TASK_REGIONS=${FF_MAX_NUM_TASK_REGIONS} MAX_NUM_TASK_ARGUMENTS=${FF_MAX_NUM_TASK_ARGUMENTS} + # _FORTIFY_SOURCE=0 ) if (FF_GPU_BACKEND STREQUAL "cuda") @@ -39,7 +40,18 @@ function(ff_set_cxx_properties target) CXX_EXTENSIONS NO ) target_compile_options(${target} - PRIVATE $<$:> "-ffile-prefix-map=${CMAKE_SOURCE_DIR}=." # add C++ compile flags here + PUBLIC + $<$:> + "-ffile-prefix-map=${CMAKE_SOURCE_DIR}=." + "-fsanitize=undefined" + "-fno-sanitize-recover=all" + # add C++ compile flags here + ) + target_link_options(${target} + PUBLIC + $<$:> + "-fsanitize=undefined" + "-fno-sanitize-recover=all" ) endfunction() diff --git a/flake.nix b/flake.nix index 91651bd0c1..e4644ef727 100644 --- a/flake.nix +++ b/flake.nix @@ -38,9 +38,15 @@ }; lib = pkgs.lib; - mkShell = pkgs.mkShell.override { + mkShell = attrs: pkgs.mkShell.override { stdenv = pkgs.cudaPackages.backendStdenv; - }; + } (attrs // { + hardeningDisable = ["all"]; # disable nixpkgs default compiler arguments, otherwise ubsan doesn't catch + # signed overflows due to the signedoverflow hardening setting. + # for more details, see the following (long-running) nixpkgs github issues: + # - https://github.com/NixOS/nixpkgs/issues/18995 + # - https://github.com/NixOS/nixpkgs/issues/60919 + }); proj = proj-repo.packages.${system}.proj; in @@ -121,6 +127,8 @@ gpu-ci = mkShell { inputsFrom = [ ci ]; + hardeningDisable = [ "all" ]; + buildInputs = builtins.concatLists [ (with nixGL.packages.${system}; [ nixGLDefault @@ -135,6 +143,8 @@ "${proj-repo.packages.${system}.proj-nvim}" ]; + hardeningDisable = [ "all" ]; + buildInputs = builtins.concatLists [ (with pkgs; [ clang-tools diff --git a/lib/compiler/include/compiler/series_parallel/computation_graph/computation_graph_binary_sp_decomposition.h b/lib/compiler/include/compiler/series_parallel/computation_graph/computation_graph_binary_sp_decomposition.h index fdc80a1e37..8a7c467303 100644 --- a/lib/compiler/include/compiler/series_parallel/computation_graph/computation_graph_binary_sp_decomposition.h +++ b/lib/compiler/include/compiler/series_parallel/computation_graph/computation_graph_binary_sp_decomposition.h @@ -36,8 +36,9 @@ bool is_right_associative(ComputationGraphBinarySPDecomposition const &); std::unordered_multiset get_layers(ComputationGraphBinarySPDecomposition const &); -V1BinarySPDecomposition to_v1(ComputationGraphBinarySPDecomposition const &, - bidict const &layer_numbering); +V1BinarySPDecomposition + to_v1(ComputationGraphBinarySPDecomposition const &, + bidict const &layer_numbering); } // namespace FlexFlow diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc index db7477b460..6f86d1d82a 100644 --- a/lib/compiler/src/compiler/allowed_machine_views.cc +++ b/lib/compiler/src/compiler/allowed_machine_views.cc @@ -11,12 +11,15 @@ #include "utils/containers/map_from_keys_and_values.h" #include "utils/containers/product.h" #include "utils/containers/range.h" -#include "utils/containers/replicate.h" +#include "utils/containers/repeat_element.h" #include "utils/containers/sorted.h" #include "utils/containers/transform.h" #include "utils/containers/unordered_multiset_of.h" #include "utils/containers/unordered_set_of.h" #include "utils/containers/zip.h" +#include "utils/nonnegative_int/ceildiv.h" +#include "utils/nonnegative_int/nonnegative_range.h" +#include "utils/nonnegative_int/num_elements.h" #include "utils/overload.h" namespace FlexFlow { @@ -47,24 +50,29 @@ static std::unordered_set OperatorTaskSpace const &task, DeviceType const &device_type) { - auto get_max_stride_upper_bound = [](std::vector const &tensor_dims, - int total_devices) -> int { - int min_num_devices_with_full_stride_volume = product(transform( - tensor_dims, [](int const &num_devices) { return num_devices - 1; })); - return std::ceil(total_devices / min_num_devices_with_full_stride_volume); + auto get_max_stride_upper_bound = + [](std::vector const &tensor_dims, + nonnegative_int total_devices) -> nonnegative_int { + nonnegative_int min_num_devices_with_full_stride_volume = + product(transform(tensor_dims, [](nonnegative_int num_devices) { + return nonnegative_int{num_devices.unwrap_nonnegative() - 1}; + })); + return ceildiv(total_devices, min_num_devices_with_full_stride_volume); }; - auto candidate_strides = [&](std::vector const &tensor_dims, - int total_devices) + auto candidate_strides = [&](std::vector const &tensor_dims, + nonnegative_int total_devices) -> std::unordered_multiset { - int max_stride_upper_bound = + nonnegative_int max_stride_upper_bound = get_max_stride_upper_bound(tensor_dims, total_devices); std::vector single_stride_range = - transform(range(1, max_stride_upper_bound + 1), - [](int stride) { return stride_t{stride}; }); + transform(nonnegative_range(1_n, max_stride_upper_bound + 1_n), + [](nonnegative_int stride) { return stride_t{stride}; }); std::unordered_multiset> raw_stride_vectors = - cartesian_product(replicate(tensor_dims.size(), single_stride_range)); + cartesian_product( + repeat_element(/*num_times=*/num_elements(tensor_dims), + /*element=*/single_stride_range)); std::unordered_multiset strides = transform(raw_stride_vectors, [](auto const &stride_vec) { return MultiDimensionalStride{stride_vec}; @@ -75,8 +83,9 @@ static std::unordered_set auto candidate_starts = [](MachineSpecification const &ms, DeviceType const &device_type) { std::unordered_set result; - for (int node_idx : range(ms.num_nodes)) { - for (int device_idx : range(get_num_devices_per_node(ms, device_type))) { + for (nonnegative_int node_idx : nonnegative_range(ms.num_nodes)) { + for (nonnegative_int device_idx : + nonnegative_range(get_num_devices_per_node(ms, device_type))) { result.insert( MachineSpaceCoordinate{node_idx, device_idx, device_type}); } @@ -91,8 +100,8 @@ static std::unordered_set return get_all_permutations_with_repetition(options, num_dims(task)); }; - std::vector tensor_dims = task.degrees; - int total_devices = get_num_devices(machine_spec, device_type); + std::vector tensor_dims = task.degrees; + nonnegative_int total_devices = get_num_devices(machine_spec, device_type); std::unordered_set machine_views; diff --git a/lib/compiler/src/compiler/machine_mapping/get_machine_resource_splits.cc b/lib/compiler/src/compiler/machine_mapping/get_machine_resource_splits.cc index 5126d9687e..bb9d54f1e9 100644 --- a/lib/compiler/src/compiler/machine_mapping/get_machine_resource_splits.cc +++ b/lib/compiler/src/compiler/machine_mapping/get_machine_resource_splits.cc @@ -11,8 +11,9 @@ std::unordered_set> for (int i = 1; i < resource.num_nodes; i *= 2) { MachineSpecification sub_resource1 = resource; MachineSpecification sub_resource2 = resource; - sub_resource1.num_nodes = i; - sub_resource2.num_nodes = resource.num_nodes - i; + sub_resource1.num_nodes = nonnegative_int{i}; + sub_resource2.num_nodes = + nonnegative_int{resource.num_nodes.unwrap_nonnegative() - i}; result.insert(std::make_pair(sub_resource1, sub_resource2)); result.insert(std::make_pair(sub_resource2, sub_resource1)); } @@ -20,8 +21,9 @@ std::unordered_set> for (int i = 1; i < resource.num_gpus_per_node; i *= 2) { MachineSpecification sub_resource1 = resource; MachineSpecification sub_resource2 = resource; - sub_resource1.num_gpus_per_node = i; - sub_resource2.num_gpus_per_node = resource.num_gpus_per_node - i; + sub_resource1.num_gpus_per_node = nonnegative_int{i}; + sub_resource2.num_gpus_per_node = + nonnegative_int{resource.num_gpus_per_node.unwrap_nonnegative() - i}; result.insert(std::make_pair(sub_resource1, sub_resource2)); result.insert(std::make_pair(sub_resource2, sub_resource1)); } diff --git a/lib/compiler/src/compiler/machine_mapping/machine_mapping.cc b/lib/compiler/src/compiler/machine_mapping/machine_mapping.cc index fc3a58995c..82c8274808 100644 --- a/lib/compiler/src/compiler/machine_mapping/machine_mapping.cc +++ b/lib/compiler/src/compiler/machine_mapping/machine_mapping.cc @@ -1,20 +1,14 @@ #include "compiler/machine_mapping/machine_mapping.h" -#include "pcg/machine_specification.h" -#include "pcg/machine_view.h" -#include "pcg/operator_task_space.dtg.h" -#include "pcg/operator_task_space.h" -#include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "utils/containers/are_disjoint.h" -#include "utils/containers/get_one_of.h" #include "utils/containers/keys.h" -#include "utils/containers/map_values.h" #include "utils/containers/merge_maps.h" namespace FlexFlow { MachineMapping combine_disjoint_mappings(MachineMapping const &m1, MachineMapping const &m2) { - return MachineMapping{merge_maps(m1.machine_views, m2.machine_views)}; + return MachineMapping{ + merge_disjoint_maps(m1.machine_views, m2.machine_views)}; } bool nodes_are_disjoint(MachineMapping const &m1, MachineMapping const &m2) { diff --git a/lib/compiler/src/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.cc b/lib/compiler/src/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.cc index 715a4c2e3d..ed60004bf4 100644 --- a/lib/compiler/src/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.cc +++ b/lib/compiler/src/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.cc @@ -10,8 +10,8 @@ ParallelLayerGuidObliviousMachineMapping binary_combine_mappings( ParallelLayerGuidObliviousMachineMapping const &lhs, ParallelLayerGuidObliviousMachineMapping const &rhs) { return ParallelLayerGuidObliviousMachineMapping{ - merge_maps(map_keys(lhs.raw_mapping, nest_inside_left_child), - map_keys(rhs.raw_mapping, nest_inside_right_child)), + merge_disjoint_maps(map_keys(lhs.raw_mapping, nest_inside_left_child), + map_keys(rhs.raw_mapping, nest_inside_right_child)), }; } diff --git a/lib/compiler/src/compiler/series_parallel/computation_graph/computation_graph_binary_sp_decomposition.cc b/lib/compiler/src/compiler/series_parallel/computation_graph/computation_graph_binary_sp_decomposition.cc index 32fb53b58a..9886468386 100644 --- a/lib/compiler/src/compiler/series_parallel/computation_graph/computation_graph_binary_sp_decomposition.cc +++ b/lib/compiler/src/compiler/series_parallel/computation_graph/computation_graph_binary_sp_decomposition.cc @@ -164,7 +164,7 @@ std::unordered_multiset V1BinarySPDecomposition to_v1(ComputationGraphBinarySPDecomposition const &tree, - bidict const &layer_numbering) { + bidict const &layer_numbering) { return tree.visit( overload{[&](ComputationGraphBinarySeriesSplit const &series) { return V1BinarySPDecomposition{ diff --git a/lib/compiler/test/src/allowed_machine_views.cc b/lib/compiler/test/src/allowed_machine_views.cc index 936894ad2d..817cc80700 100644 --- a/lib/compiler/test/src/allowed_machine_views.cc +++ b/lib/compiler/test/src/allowed_machine_views.cc @@ -15,39 +15,39 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("1 degree of parallelism") { MachineSpecification ms = MachineSpecification{ - /*num_nodes=*/1, - /*num_cpus_per_node=*/5, - /*num_gpus_per_node=*/5, + /*num_nodes=*/1_n, + /*num_cpus_per_node=*/5_n, + /*num_gpus_per_node=*/5_n, /*inter_node_bandwidth=*/0, /*intra_node_bandwidth=*/0, }; - OperatorTaskSpace task = OperatorTaskSpace{{3}}; + OperatorTaskSpace task = OperatorTaskSpace{{3_n}}; std::unordered_set correct = { MachineView{ MachineSpaceCoordinate{ - /*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU}, - {MachineViewDimension{stride_t{1}, + /*node_idx=*/0_n, /*device_idx=*/0_n, DeviceType::GPU}, + {MachineViewDimension{stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE}}, }, MachineView{ MachineSpaceCoordinate{ - /*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU}, - {MachineViewDimension{stride_t{1}, + /*node_idx=*/0_n, /*device_idx=*/1_n, DeviceType::GPU}, + {MachineViewDimension{stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE}}, }, MachineView{ MachineSpaceCoordinate{ - /*node_idx=*/0, /*device_idx=*/2, DeviceType::GPU}, - {MachineViewDimension{stride_t{1}, + /*node_idx=*/0_n, /*device_idx=*/2_n, DeviceType::GPU}, + {MachineViewDimension{stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE}}, }, MachineView{ MachineSpaceCoordinate{ - /*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU}, - {MachineViewDimension{stride_t{2}, + /*node_idx=*/0_n, /*device_idx=*/0_n, DeviceType::GPU}, + {MachineViewDimension{stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE}}, }, }; @@ -61,18 +61,18 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("2 degrees of parallelism") { MachineSpecification ms = MachineSpecification{ - /*num_nodes=*/3, - /*num_cpus_per_node=*/3, - /*num_gpus_per_node=*/3, + /*num_nodes=*/3_n, + /*num_cpus_per_node=*/3_n, + /*num_gpus_per_node=*/3_n, /*inter_node_bandwidth=*/0, /*intra_node_bandwidth=*/0, }; - OperatorTaskSpace task = OperatorTaskSpace{{2, 3}}; + OperatorTaskSpace task = OperatorTaskSpace{{2_n, 3_n}}; - auto make_2d_view = [&](int start_node_idx, - int start_device_idx, - int stride1, - int stride2, + auto make_2d_view = [&](nonnegative_int start_node_idx, + nonnegative_int start_device_idx, + nonnegative_int stride1, + nonnegative_int stride2, MachineSpecificationDimension m1, MachineSpecificationDimension m2) { return MachineView{ @@ -86,13 +86,19 @@ TEST_SUITE(FF_TEST_SUITE) { auto intra = MachineSpecificationDimension::INTRA_NODE; auto inter = MachineSpecificationDimension::INTER_NODE; std::unordered_set correct = { - make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, inter, intra), - make_2d_view(1, 0, /*stride1=*/1, /*stride2=*/1, inter, intra), - make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, inter, intra), - - make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, intra, inter), - make_2d_view(0, 1, /*stride1=*/1, /*stride2=*/1, intra, inter), - make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, intra, inter), + make_2d_view( + 0_n, 0_n, /*stride1=*/1_n, /*stride2=*/1_n, inter, intra), + make_2d_view( + 1_n, 0_n, /*stride1=*/1_n, /*stride2=*/1_n, inter, intra), + make_2d_view( + 0_n, 0_n, /*stride1=*/2_n, /*stride2=*/1_n, inter, intra), + + make_2d_view( + 0_n, 0_n, /*stride1=*/1_n, /*stride2=*/1_n, intra, inter), + make_2d_view( + 0_n, 1_n, /*stride1=*/1_n, /*stride2=*/1_n, intra, inter), + make_2d_view( + 0_n, 0_n, /*stride1=*/2_n, /*stride2=*/1_n, intra, inter), }; std::unordered_set result = diff --git a/lib/compiler/test/src/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.cc b/lib/compiler/test/src/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.cc index 5c8ea1c0f1..b0d86124a1 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.cc @@ -28,12 +28,12 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{10, 2}, - ShardParallelDim{12, 1}, + ShardParallelDim{10_n, 2_n}, + ShardParallelDim{12_n, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, diff --git a/lib/compiler/test/src/compiler/machine_mapping/get_machine_resource_splits.cc b/lib/compiler/test/src/compiler/machine_mapping/get_machine_resource_splits.cc index 499b111f8f..5f4ba2bfdc 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/get_machine_resource_splits.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/get_machine_resource_splits.cc @@ -8,10 +8,11 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_machine_resource_splits") { - auto make_machine_spec = [](int num_nodes, int num_gpus_per_node) { + auto make_machine_spec = [](nonnegative_int num_nodes, + nonnegative_int num_gpus_per_node) { return MachineSpecification{ /*num_nodes=*/num_nodes, - /*num_cpus_per_node=*/1, + /*num_cpus_per_node=*/1_n, /*num_gpus_per_node=*/num_gpus_per_node, /*inter_node_bandwidth=*/1.0, /*intra_node_bandwidth=*/1.0, @@ -19,8 +20,8 @@ TEST_SUITE(FF_TEST_SUITE) { }; SUBCASE("returns no splits if no splits are possible") { - MachineSpecification input = make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/1); + MachineSpecification input = make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/1_n); std::unordered_set> result = get_machine_resource_splits(input); @@ -32,8 +33,8 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE( "returns splits in gpu and node dimensions, but not at the same time") { - MachineSpecification input = make_machine_spec(/*num_nodes=*/2, - /*num_gpus_per_node=*/2); + MachineSpecification input = make_machine_spec(/*num_nodes=*/2_n, + /*num_gpus_per_node=*/2_n); std::unordered_set> result = get_machine_resource_splits(input); @@ -41,16 +42,16 @@ TEST_SUITE(FF_TEST_SUITE) { std::unordered_set> correct = { { - make_machine_spec(/*num_nodes=*/2, - /*num_gpus_per_node=*/1), - make_machine_spec(/*num_nodes=*/2, - /*num_gpus_per_node=*/1), + make_machine_spec(/*num_nodes=*/2_n, + /*num_gpus_per_node=*/1_n), + make_machine_spec(/*num_nodes=*/2_n, + /*num_gpus_per_node=*/1_n), }, { - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/2), - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/2), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/2_n), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/2_n), }, }; @@ -60,8 +61,9 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("returns splits in node dimension in powers of two") { SUBCASE("num_nodes is a power of 2") { - MachineSpecification input = make_machine_spec(/*num_nodes=*/8, - /*num_gpus_per_node=*/1); + MachineSpecification input = + make_machine_spec(/*num_nodes=*/8_n, + /*num_gpus_per_node=*/1_n); std::unordered_set< std::pair> @@ -71,34 +73,34 @@ TEST_SUITE(FF_TEST_SUITE) { std::pair> correct = { { - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/1), - make_machine_spec(/*num_nodes=*/7, - /*num_gpus_per_node=*/1), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/1_n), + make_machine_spec(/*num_nodes=*/7_n, + /*num_gpus_per_node=*/1_n), }, { - make_machine_spec(/*num_nodes=*/2, - /*num_gpus_per_node=*/1), - make_machine_spec(/*num_nodes=*/6, - /*num_gpus_per_node=*/1), + make_machine_spec(/*num_nodes=*/2_n, + /*num_gpus_per_node=*/1_n), + make_machine_spec(/*num_nodes=*/6_n, + /*num_gpus_per_node=*/1_n), }, { - make_machine_spec(/*num_nodes=*/4, - /*num_gpus_per_node=*/1), - make_machine_spec(/*num_nodes=*/4, - /*num_gpus_per_node=*/1), + make_machine_spec(/*num_nodes=*/4_n, + /*num_gpus_per_node=*/1_n), + make_machine_spec(/*num_nodes=*/4_n, + /*num_gpus_per_node=*/1_n), }, { - make_machine_spec(/*num_nodes=*/6, - /*num_gpus_per_node=*/1), - make_machine_spec(/*num_nodes=*/2, - /*num_gpus_per_node=*/1), + make_machine_spec(/*num_nodes=*/6_n, + /*num_gpus_per_node=*/1_n), + make_machine_spec(/*num_nodes=*/2_n, + /*num_gpus_per_node=*/1_n), }, { - make_machine_spec(/*num_nodes=*/7, - /*num_gpus_per_node=*/1), - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/1), + make_machine_spec(/*num_nodes=*/7_n, + /*num_gpus_per_node=*/1_n), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/1_n), }, }; @@ -106,8 +108,9 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("num_nodes is not a power of 2") { - MachineSpecification input = make_machine_spec(/*num_nodes=*/6, - /*num_gpus_per_node=*/1); + MachineSpecification input = + make_machine_spec(/*num_nodes=*/6_n, + /*num_gpus_per_node=*/1_n); std::unordered_set< std::pair> @@ -117,28 +120,28 @@ TEST_SUITE(FF_TEST_SUITE) { std::pair> correct = { { - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/1), - make_machine_spec(/*num_nodes=*/5, - /*num_gpus_per_node=*/1), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/1_n), + make_machine_spec(/*num_nodes=*/5_n, + /*num_gpus_per_node=*/1_n), }, { - make_machine_spec(/*num_nodes=*/2, - /*num_gpus_per_node=*/1), - make_machine_spec(/*num_nodes=*/4, - /*num_gpus_per_node=*/1), + make_machine_spec(/*num_nodes=*/2_n, + /*num_gpus_per_node=*/1_n), + make_machine_spec(/*num_nodes=*/4_n, + /*num_gpus_per_node=*/1_n), }, { - make_machine_spec(/*num_nodes=*/4, - /*num_gpus_per_node=*/1), - make_machine_spec(/*num_nodes=*/2, - /*num_gpus_per_node=*/1), + make_machine_spec(/*num_nodes=*/4_n, + /*num_gpus_per_node=*/1_n), + make_machine_spec(/*num_nodes=*/2_n, + /*num_gpus_per_node=*/1_n), }, { - make_machine_spec(/*num_nodes=*/5, - /*num_gpus_per_node=*/1), - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/1), + make_machine_spec(/*num_nodes=*/5_n, + /*num_gpus_per_node=*/1_n), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/1_n), }, }; @@ -148,8 +151,9 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("returns splits in gpu dimension in powers of two") { SUBCASE("num_gpus_per_node is a power of 2") { - MachineSpecification input = make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/8); + MachineSpecification input = + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/8_n); std::unordered_set< std::pair> @@ -159,34 +163,34 @@ TEST_SUITE(FF_TEST_SUITE) { std::pair> correct = { { - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/1), - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/7), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/1_n), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/7_n), }, { - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/2), - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/6), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/2_n), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/6_n), }, { - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/4), - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/4), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/4_n), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/4_n), }, { - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/6), - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/2), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/6_n), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/2_n), }, { - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/7), - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/1), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/7_n), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/1_n), }, }; @@ -194,8 +198,9 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("num_gpus_per_node is not a power of 2") { - MachineSpecification input = make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/6); + MachineSpecification input = + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/6_n); std::unordered_set< std::pair> @@ -205,28 +210,28 @@ TEST_SUITE(FF_TEST_SUITE) { std::pair> correct = { { - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/1), - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/5), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/1_n), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/5_n), }, { - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/2), - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/4), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/2_n), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/4_n), }, { - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/4), - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/2), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/4_n), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/2_n), }, { - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/5), - make_machine_spec(/*num_nodes=*/1, - /*num_gpus_per_node=*/1), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/5_n), + make_machine_spec(/*num_nodes=*/1_n, + /*num_gpus_per_node=*/1_n), }, }; } diff --git a/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc b/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc index 542edd9fa9..c5b891781d 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc @@ -45,14 +45,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView mv1 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{1}, + stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -60,31 +60,31 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView mv2 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{2}, + stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE, }, }, }; MachineSpecification full_machine_spec = MachineSpecification{ - /*num_nodes=*/2, - /*num_cpus_per_node=*/1, - /*num_gpus_per_node=*/1, + /*num_nodes=*/2_n, + /*num_cpus_per_node=*/1_n, + /*num_gpus_per_node=*/1_n, /*inter_node_bandwidth=*/1, /*intra_node_bandwidth=*/1, }; MachineSpecification split_machine_spec = MachineSpecification{ - /*num_nodes=*/1, - /*num_cpus_per_node=*/1, - /*num_gpus_per_node=*/1, + /*num_nodes=*/1_n, + /*num_cpus_per_node=*/1_n, + /*num_gpus_per_node=*/1_n, /*inter_node_bandwidth=*/1, /*intra_node_bandwidth=*/1, }; @@ -121,8 +121,8 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorDims{ FFOrdered{}, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, diff --git a/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc b/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc index 52ad82595d..642fdf7ae1 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc @@ -30,12 +30,12 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{10, 2}, - ShardParallelDim{12, 1}, + ShardParallelDim{10_n, 2_n}, + ShardParallelDim{12_n, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -66,14 +66,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView pre_mv1 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{1}, + stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -81,14 +81,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView pre_mv2 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{2}, + stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -96,14 +96,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView post_mv1 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{3}, + stride_t{3_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -111,14 +111,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView post_mv2 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{4}, + stride_t{4_n}, MachineSpecificationDimension::INTRA_NODE, }, }, diff --git a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc index 304034f9be..e88b714bd4 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc @@ -9,14 +9,14 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("combine_disjoint_mappings(MachineMapping, MachineMappping)") { MachineView machine_view_0 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{1}, + stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -24,14 +24,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView machine_view_1 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{2}, + stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -55,14 +55,14 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("nodes_are_disjoint(MachineMapping, MachineMappping)") { MachineView machine_view_0 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{1}, + stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -70,14 +70,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView machine_view_1 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{2}, + stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE, }, }, diff --git a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_problem_tree/get_machine_mapping_problem_tree.cc b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_problem_tree/get_machine_mapping_problem_tree.cc index 06ab1e5b8c..a8ec24de63 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_problem_tree/get_machine_mapping_problem_tree.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_problem_tree/get_machine_mapping_problem_tree.cc @@ -65,11 +65,11 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{10, 1}, + ShardParallelDim{10_n, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, diff --git a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc index 73b921fc98..4a261bcdae 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc @@ -8,14 +8,14 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("series_combine") { MachineView machine_view_0 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{1}, + stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -23,14 +23,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView machine_view_1 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{2}, + stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -189,14 +189,14 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("parallel_combine") { MachineView machine_view_0 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{1}, + stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -204,14 +204,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView machine_view_1 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{2}, + stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -312,14 +312,14 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("minimize_runtime") { MachineView machine_view_0 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{1}, + stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -327,14 +327,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView machine_view_1 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{2}, + stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE, }, }, diff --git a/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc b/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc index 8612017705..313f24c384 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc @@ -45,14 +45,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView mv1 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{1}, + stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -60,31 +60,31 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView mv2 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{2}, + stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE, }, }, }; MachineSpecification full_machine_spec = MachineSpecification{ - /*num_nodes=*/2, - /*num_cpus_per_node=*/1, - /*num_gpus_per_node=*/1, + /*num_nodes=*/2_n, + /*num_cpus_per_node=*/1_n, + /*num_gpus_per_node=*/1_n, /*inter_node_bandwidth=*/1, /*intra_node_bandwidth=*/1, }; MachineSpecification split_machine_spec = MachineSpecification{ - /*num_nodes=*/1, - /*num_cpus_per_node=*/1, - /*num_gpus_per_node=*/1, + /*num_nodes=*/1_n, + /*num_cpus_per_node=*/1_n, + /*num_gpus_per_node=*/1_n, /*inter_node_bandwidth=*/1, /*intra_node_bandwidth=*/1, }; @@ -121,8 +121,8 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorDims{ FFOrdered{}, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, diff --git a/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/machine_mapping_result_with_memory.cc b/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/machine_mapping_result_with_memory.cc index 1f3b7545a8..04149cae8f 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/machine_mapping_result_with_memory.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/machine_mapping_result_with_memory.cc @@ -9,14 +9,14 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("remove_non_pareto_optimal_machine_mapping_result") { MachineView machine_view_0 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{1}, + stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -24,14 +24,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView machine_view_1 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{2}, + stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -39,14 +39,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView machine_view_2 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{4}, + stride_t{4_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -55,19 +55,19 @@ TEST_SUITE(FF_TEST_SUITE) { OpCostMetrics cost1 = OpCostMetrics{ /*forward_runtime=*/2.0, /*backward_runtime=*/2.0, - /*memory=*/nonnegative_int{2}, + /*memory=*/2_n, }; OpCostMetrics cost2 = OpCostMetrics{ /*forward_runtime=*/4.0, /*backward_runtime=*/4.0, - /*memory=*/nonnegative_int{1}, + /*memory=*/1_n, }; OpCostMetrics cost3 = OpCostMetrics{ /*forward_runtime=*/2.0, /*backward_runtime=*/2.0, - /*memory=*/nonnegative_int{3}, + /*memory=*/3_n, }; MachineMappingForSingleLayer mm1 = MachineMappingForSingleLayer{ @@ -159,14 +159,14 @@ TEST_SUITE(FF_TEST_SUITE) { "std::optional const&)") { MachineView machine_view_0 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{1}, + stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -174,14 +174,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView machine_view_1 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{2}, + stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -190,7 +190,7 @@ TEST_SUITE(FF_TEST_SUITE) { OpCostMetrics pre_cost = OpCostMetrics{ /*forward_runtime=*/2.0, /*backward_runtime=*/2.0, - /*memory=*/nonnegative_int{2}, + /*memory=*/2_n, }; MachineMappingWithMemoryResult pre = MachineMappingWithMemoryResult{{ MachineMappingForSingleLayer{ @@ -217,7 +217,7 @@ TEST_SUITE(FF_TEST_SUITE) { OpCostMetrics post_cost = OpCostMetrics{ /*forward_runtime=*/4.0, /*backward_runtime=*/4.0, - /*memory=*/nonnegative_int{1}, + /*memory=*/1_n, }; MachineMappingWithMemoryResult post = MachineMappingWithMemoryResult{{ @@ -360,14 +360,14 @@ TEST_SUITE(FF_TEST_SUITE) { "std::optional const&)") { MachineView machine_view_0 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{1}, + stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -375,14 +375,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView machine_view_1 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{2}, + stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -391,7 +391,7 @@ TEST_SUITE(FF_TEST_SUITE) { OpCostMetrics lhs_cost = OpCostMetrics{ /*forward_runtime=*/2.0, /*backward_runtime=*/2.0, - /*memory=*/nonnegative_int{2}, + /*memory=*/2_n, }; MachineMappingWithMemoryResult lhs = MachineMappingWithMemoryResult{{ MachineMappingForSingleLayer{ @@ -418,7 +418,7 @@ TEST_SUITE(FF_TEST_SUITE) { OpCostMetrics rhs_cost = OpCostMetrics{ /*forward_runtime=*/4.0, /*backward_runtime=*/4.0, - /*memory=*/nonnegative_int{1}, + /*memory=*/1_n, }; MachineMappingWithMemoryResult rhs = MachineMappingWithMemoryResult{{ MachineMappingForSingleLayer{ @@ -492,14 +492,14 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("minimize_runtime(memory)") { MachineView machine_view_0 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{1}, + stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -507,14 +507,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView machine_view_1 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{2}, + stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -522,14 +522,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView machine_view_2 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, + /*node_idx=*/0_n, + /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{4}, + stride_t{4_n}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -538,17 +538,17 @@ TEST_SUITE(FF_TEST_SUITE) { OpCostMetrics cost1 = OpCostMetrics{ /*forward_runtime=*/2.0, /*backward_runtime=*/2.0, - /*memory=*/nonnegative_int{2}, + /*memory=*/2_n, }; OpCostMetrics cost2 = OpCostMetrics{ /*forward_runtime=*/4.0, /*backward_runtime=*/4.0, - /*memory=*/nonnegative_int{1}, + /*memory=*/1_n, }; OpCostMetrics cost3 = OpCostMetrics{ /*forward_runtime=*/2.0, /*backward_runtime=*/2.0, - /*memory=*/nonnegative_int{3}, + /*memory=*/3_n, }; MachineMappingForSingleLayer mm1 = MachineMappingForSingleLayer{ diff --git a/lib/compiler/test/src/compiler/series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.cc b/lib/compiler/test/src/compiler/series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.cc index 2b59669aad..d0f289043c 100644 --- a/lib/compiler/test/src/compiler/series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.cc +++ b/lib/compiler/test/src/compiler/series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.cc @@ -29,11 +29,12 @@ TEST_SUITE(FF_TEST_SUITE) { ComputationGraph cg = [&] { ComputationGraphBuilder b; - TensorShape input_shape = TensorShape{TensorDims{FFOrdered{ - 10, - 12, - }}, - DataType::FLOAT}; + TensorShape input_shape = + TensorShape{TensorDims{FFOrdered{ + 10_n, + 12_n, + }}, + DataType::FLOAT}; b.create_input(input_shape, CreateGrad::YES, input_layer_name); return b.computation_graph; @@ -57,16 +58,17 @@ TEST_SUITE(FF_TEST_SUITE) { ComputationGraph cg = [&] { ComputationGraphBuilder b; - TensorShape input_shape = TensorShape{TensorDims{FFOrdered{ - 10, - 12, - }}, - DataType::FLOAT}; + TensorShape input_shape = + TensorShape{TensorDims{FFOrdered{ + 10_n, + 12_n, + }}, + DataType::FLOAT}; tensor_guid_t input = b.create_input(input_shape, CreateGrad::YES, input_layer_name); b.dense(input, - /*outDim=*/14, + /*outDim=*/14_n, /*activation=*/std::nullopt, /*use_bias=*/true, /*data_type=*/DataType::FLOAT, @@ -119,9 +121,9 @@ TEST_SUITE(FF_TEST_SUITE) { ComputationGraphBuilder b; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ - 10, - 12, + TensorDims{FFOrdered{ + 10_n, + 12_n, }}, DataType::FLOAT, }; @@ -129,7 +131,7 @@ TEST_SUITE(FF_TEST_SUITE) { b.create_input(input_shape, CreateGrad::YES, input_name); b.dense(input, - /*outDim=*/14, + /*outDim=*/14_n, /*activation=*/std::nullopt, /*use_bias=*/false, /*data_type=*/DataType::FLOAT, @@ -138,7 +140,7 @@ TEST_SUITE(FF_TEST_SUITE) { /*name=*/op1_name, /*projection_name=*/w1_name); b.dense(input, - /*outDim=*/14, + /*outDim=*/14_n, /*activation=*/std::nullopt, /*use_bias=*/false, /*data_type=*/DataType::FLOAT, @@ -189,9 +191,9 @@ TEST_SUITE(FF_TEST_SUITE) { ComputationGraphBuilder b; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ - 10, - 12, + TensorDims{FFOrdered{ + 10_n, + 12_n, }}, DataType::FLOAT, }; @@ -246,9 +248,9 @@ TEST_SUITE(FF_TEST_SUITE) { ComputationGraphBuilder b; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ - 10, - 12, + TensorDims{FFOrdered{ + 10_n, + 12_n, }}, DataType::FLOAT, }; @@ -277,7 +279,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("real models") { SUBCASE("split_test") { ComputationGraph cg = - get_split_test_computation_graph(/*batch_size=*/8); + get_split_test_computation_graph(/*batch_size=*/8_n); std::optional sp_decomposition = get_computation_graph_series_parallel_decomposition(cg); @@ -339,14 +341,15 @@ TEST_SUITE(FF_TEST_SUITE) { ComputationGraph cg = [&] { ComputationGraphBuilder b; - TensorShape input_shape = TensorShape{TensorDims{FFOrdered{ - 10, - 12, - }}, - DataType::FLOAT}; + TensorShape input_shape = + TensorShape{TensorDims{FFOrdered{ + 10_n, + 12_n, + }}, + DataType::FLOAT}; tensor_guid_t input = b.create_input(input_shape, CreateGrad::YES); - b.dense(input, /*outDim=*/14); + b.dense(input, /*outDim=*/14_n); return b.computation_graph; }(); @@ -356,7 +359,8 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("split_test") { - ComputationGraph cg = get_split_test_computation_graph(/*batch_size=*/8); + ComputationGraph cg = + get_split_test_computation_graph(/*batch_size=*/8_n); std::string result = render_preprocessed_computation_graph_for_sp_decomposition(cg); diff --git a/lib/compiler/test/src/compiler/task_graph_simulator/task_simulator.cc b/lib/compiler/test/src/compiler/task_graph_simulator/task_simulator.cc index e278338440..d262539dc1 100644 --- a/lib/compiler/test/src/compiler/task_graph_simulator/task_simulator.cc +++ b/lib/compiler/test/src/compiler/task_graph_simulator/task_simulator.cc @@ -38,9 +38,9 @@ namespace FlexFlow { TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("task_simulator_estimate_forward_pass_time") { MachineSpecification machine_spec = - MachineSpecification{/*num_nodes=*/3, - /*num_cpus_per_node=*/3, - /*num_gpus_per_node=*/3, + MachineSpecification{/*num_nodes=*/3_n, + /*num_cpus_per_node=*/3_n, + /*num_gpus_per_node=*/3_n, /*inter_node_bandwidth=*/1.0f, /*intra_node_bandwidth=*/1.0f}; @@ -50,8 +50,8 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorDims{ FFOrdered{}, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -63,16 +63,16 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_layer_guid_t layer1 = get_source_layer(tensor1); std::vector dims = { - MachineViewDimension{stride_t{1}, + MachineViewDimension{stride_t{1_n}, MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{1}, + MachineViewDimension{stride_t{1_n}, MachineSpecificationDimension::INTER_NODE}, }; ParallelComputationGraph pcg = b.pcg; MachineView mv1 = - MachineView{MachineSpaceCoordinate{0, 0, DeviceType::GPU}, dims}; + MachineView{MachineSpaceCoordinate{0_n, 0_n, DeviceType::GPU}, dims}; MachineView mv2 = - MachineView{MachineSpaceCoordinate{0, 1, DeviceType::GPU}, dims}; + MachineView{MachineSpaceCoordinate{0_n, 1_n, DeviceType::GPU}, dims}; MachineMapping device_mapping = MachineMapping{{ {layer0, mv1}, @@ -84,7 +84,7 @@ TEST_SUITE(FF_TEST_SUITE) { /*forward_op_cost=*/10.0f, /*backward_op_cost=*/10.0f, /*comm_cost=*/1.0f, - /*memory_cost=*/nonnegative_int{0}); + /*memory_cost=*/0_n); float result = task_simulator_estimate_forward_pass_time( pcg, estimator, device_mapping, machine_spec); @@ -99,16 +99,16 @@ TEST_SUITE(FF_TEST_SUITE) { if (op.op_attrs.has()) { return OpCostMetrics{/*forward_runtime=*/10.0f, /*backward_runtime=*/10.0f, - /*memory=*/nonnegative_int{0}}; // layer0 + /*memory=*/0_n}; // layer0 } if (op.op_attrs.has()) { return OpCostMetrics{/*forward_runtime=*/1.0f, /*backward_runtime=*/1.0f, - /*memory=*/nonnegative_int{0}}; // layer1 + /*memory=*/0_n}; // layer1 } return OpCostMetrics{/*forward_runtime=*/0.0f, /*backward_runtime=*/0.0f, - /*memory=*/nonnegative_int{0}}; + /*memory=*/0_n}; }, [](TensorSetMovement const &comm) { return 5.0f; }); @@ -124,10 +124,10 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ - FFOrdered{ShardParallelDim{10, 1}}, + FFOrdered{ShardParallelDim{10_n, 1_n}}, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -145,23 +145,23 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelComputationGraph pcg = b.pcg; std::vector dims = { - MachineViewDimension{stride_t{1}, + MachineViewDimension{stride_t{1_n}, MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{1}, + MachineViewDimension{stride_t{1_n}, MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{1}, + MachineViewDimension{stride_t{1_n}, MachineSpecificationDimension::INTER_NODE}, }; SUBCASE("all different devices") { - MachineView mv0 = - MachineView{MachineSpaceCoordinate{0, 0, DeviceType::GPU}, dims}; - MachineView mv1 = - MachineView{MachineSpaceCoordinate{0, 1, DeviceType::GPU}, dims}; - MachineView mv2 = - MachineView{MachineSpaceCoordinate{1, 0, DeviceType::GPU}, dims}; - MachineView mv3 = - MachineView{MachineSpaceCoordinate{1, 1, DeviceType::GPU}, dims}; + MachineView mv0 = MachineView{ + MachineSpaceCoordinate{0_n, 0_n, DeviceType::GPU}, dims}; + MachineView mv1 = MachineView{ + MachineSpaceCoordinate{0_n, 1_n, DeviceType::GPU}, dims}; + MachineView mv2 = MachineView{ + MachineSpaceCoordinate{1_n, 0_n, DeviceType::GPU}, dims}; + MachineView mv3 = MachineView{ + MachineSpaceCoordinate{1_n, 1_n, DeviceType::GPU}, dims}; MachineMapping device_mapping = MachineMapping{{ {layer0, mv0}, @@ -174,7 +174,7 @@ TEST_SUITE(FF_TEST_SUITE) { /*forward_op_cost=*/10.0f, /*backward_op_cost=*/10.0f, /*comm_cost=*/1.0f, - /*memory_cost=*/nonnegative_int{0}); + /*memory_cost=*/0_n); float result = task_simulator_estimate_forward_pass_time( pcg, estimator, device_mapping, machine_spec); @@ -187,30 +187,29 @@ TEST_SUITE(FF_TEST_SUITE) { if (op.op_attrs.has()) { return OpCostMetrics{/*forward_runtime=*/10.0f, /*backward_runtime=*/10.0f, - /*memory=*/nonnegative_int{0}}; // layer0 + /*memory=*/0_n}; // layer0 } if (op.op_attrs.has()) { - return OpCostMetrics{ - /*forward_runtime=*/1.0f, - /*backward_runtime=*/1.0f, - /*memory=*/nonnegative_int{0}}; // layers 1, 2 + return OpCostMetrics{/*forward_runtime=*/1.0f, + /*backward_runtime=*/1.0f, + /*memory=*/0_n}; // layers 1, 2 } if (op.op_attrs.has()) { return OpCostMetrics{/*forward_runtime=*/2.0f, /*backward_runtime=*/2.0f, - /*memory=*/nonnegative_int{0}}; // layer3 + /*memory=*/0_n}; // layer3 } return OpCostMetrics{/*forward_runtime=*/0.0f, /*backward_runtime=*/0.0f, - /*memory=*/nonnegative_int{0}}; + /*memory=*/0_n}; }, [](TensorSetMovement const &comm) { return 5.0f; }); } } SUBCASE("all the same device") { - MachineView mv = - MachineView{MachineSpaceCoordinate{0, 0, DeviceType::GPU}, dims}; + MachineView mv = MachineView{ + MachineSpaceCoordinate{0_n, 0_n, DeviceType::GPU}, dims}; MachineMapping device_mapping = MachineMapping{{ {layer0, mv}, {layer1, mv}, @@ -222,7 +221,7 @@ TEST_SUITE(FF_TEST_SUITE) { /*forward_op_cost=*/10.0f, /*backward_op_cost=*/10.0f, /*comm_cost=*/1.0f, - /*memory_cost=*/nonnegative_int{0}); + /*memory_cost=*/0_n); float result = task_simulator_estimate_forward_pass_time( pcg, cost_estimator, device_mapping, machine_spec); @@ -235,22 +234,21 @@ TEST_SUITE(FF_TEST_SUITE) { if (op.op_attrs.has()) { return OpCostMetrics{/*forward_runtime=*/10.0f, /*backward_runtime=*/10.0f, - /*memory=*/nonnegative_int{0}}; // layer0 + /*memory=*/0_n}; // layer0 } if (op.op_attrs.has()) { - return OpCostMetrics{ - /*forward_runtime=*/1.0f, - /*backward_runtime=*/1.0f, - /*memory=*/nonnegative_int{0}}; // layers 1, 2 + return OpCostMetrics{/*forward_runtime=*/1.0f, + /*backward_runtime=*/1.0f, + /*memory=*/0_n}; // layers 1, 2 } if (op.op_attrs.has()) { return OpCostMetrics{/*forward_runtime=*/2.0f, /*backward_runtime=*/2.0f, - /*memory=*/nonnegative_int{0}}; // layer3 + /*memory=*/0_n}; // layer3 } return OpCostMetrics{/*forward_runtime=*/0.0f, /*backward_runtime=*/0.0f, - /*memory=*/nonnegative_int{0}}; + /*memory=*/0_n}; }, [](TensorSetMovement const &comm) { return 5.0f; }); float result = task_simulator_estimate_forward_pass_time( diff --git a/lib/compiler/test/src/graph_optimize_state.cc b/lib/compiler/test/src/graph_optimize_state.cc index 46177ad420..0fd9e245a6 100644 --- a/lib/compiler/test/src/graph_optimize_state.cc +++ b/lib/compiler/test/src/graph_optimize_state.cc @@ -11,35 +11,37 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input_shape = ParallelTensorShape{ParallelTensorDims{ FFOrdered{ - ShardParallelDim{32, 2}, - ShardParallelDim{16, 1}, + ShardParallelDim{32_n, 2_n}, + ShardParallelDim{16_n, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT}; parallel_tensor_guid_t input0 = builder.create_input_tensor(input_shape, CreateGrad::YES, "input0"); - parallel_tensor_guid_t dense0 = builder.dense(input0, - 8, - Activation::RELU, - true, - DataType::FLOAT, - std::nullopt, - std::nullopt, - "dense0"); + parallel_tensor_guid_t dense0 = + builder.dense(/*input=*/input0, + /*outDim=*/8_n, + /*activation=*/Activation::RELU, + /*use_bias=*/true, + /*data_type=*/DataType::FLOAT, + /*projection_initializer=*/std::nullopt, + /*bias_initializer=*/std::nullopt, + /*name=*/"dense0"); - parallel_tensor_guid_t dense1 = builder.dense(dense0, - 4, - Activation::RELU, - true, - DataType::FLOAT, - std::nullopt, - std::nullopt, - "dense1"); + parallel_tensor_guid_t dense1 = + builder.dense(/*input=*/dense0, + /*outDim=*/4_n, + /*activation=*/Activation::RELU, + /*use_bias=*/true, + /*data_type=*/DataType::FLOAT, + /*projection_initializer=*/std::nullopt, + /*bias_initializer=*/std::nullopt, + /*name=*/"dense1"); ParallelComputationGraph pcg = builder.pcg; @@ -59,14 +61,15 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_tensor_guid_t input0_ = builder.create_input_tensor(input_shape, CreateGrad::YES, "input0"); - parallel_tensor_guid_t dense0_ = builder.dense(input0, - 8, - Activation::RELU, - true, - DataType::FLOAT, - std::nullopt, - std::nullopt, - "dense0"); + parallel_tensor_guid_t dense0_ = + builder.dense(/*input=*/input0, + /*outDim=*/8_n, + /*activation=*/Activation::RELU, + /*use_bias=*/true, + /*data_type=*/DataType::FLOAT, + /*projection_initializer=*/std::nullopt, + /*bias_initializer=*/std::nullopt, + /*name=*/"dense0"); ParallelComputationGraph pcg_ = builder.pcg; diff --git a/lib/kernels/include/kernels/array_shape.h b/lib/kernels/include/kernels/array_shape.h index 326c6922f9..57498ee466 100644 --- a/lib/kernels/include/kernels/array_shape.h +++ b/lib/kernels/include/kernels/array_shape.h @@ -1,8 +1,9 @@ #ifndef _FLEXFLOW_KERNELS_ARRAY_SHAPE_H #define _FLEXFLOW_KERNELS_ARRAY_SHAPE_H -#include "legion_dim.h" +#include "kernels/legion_dim.h" #include "op-attrs/tensor_shape.dtg.h" +#include "utils/nonnegative_int/nonnegative_int.h" #include "utils/stack_vector/stack_vector.h" #include "utils/visitable.h" #include @@ -14,44 +15,49 @@ namespace FlexFlow { struct ArrayShape { public: ArrayShape() = delete; - ArrayShape(size_t *dims, size_t num_dims); + ArrayShape(nonnegative_int *dims, nonnegative_int num_dims); ArrayShape(TensorShape const &shape); - ArrayShape(std::vector const &); + ArrayShape(std::vector const &); /** * @brief Alias of ArrayShape::num_elements for compatibility with * Legion::Domain */ - std::size_t get_volume() const; + nonnegative_int get_volume() const; /** * @brief Alias of ArrayShape::num_dims for compatibility with Legion::Domain */ - std::size_t get_dim() const; + nonnegative_int get_dim() const; - std::size_t num_elements() const; - std::size_t num_dims() const; + nonnegative_int num_elements() const; + nonnegative_int num_dims() const; - std::size_t operator[](legion_dim_t) const; - std::size_t at(legion_dim_t) const; - std::size_t at(ff_dim_t) const; + nonnegative_int operator[](legion_dim_t) const; + nonnegative_int at(legion_dim_t) const; + nonnegative_int at(ff_dim_t) const; + + bool operator==(ArrayShape const &) const; + bool operator!=(ArrayShape const &) const; legion_dim_t last_idx() const; legion_dim_t neg_idx(int) const; - std::optional at_maybe(legion_dim_t) const; - std::optional at_maybe(ff_dim_t) const; + std::optional at_maybe(legion_dim_t) const; + std::optional at_maybe(ff_dim_t) const; ArrayShape sub_shape(std::optional> start, std::optional> end) const; public: - LegionTensorDims dims; + LegionOrdered dims; + +private: + std::tuple tie() const; }; -FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(ArrayShape, dims); -size_t get_volume(ArrayShape const &); +nonnegative_int get_volume(ArrayShape const &); TensorShape get_tensor_shape(ArrayShape const &, DataType); diff --git a/lib/kernels/include/kernels/batch_norm_kernels.h b/lib/kernels/include/kernels/batch_norm_kernels.h index 7d533d672c..f2ca17f429 100644 --- a/lib/kernels/include/kernels/batch_norm_kernels.h +++ b/lib/kernels/include/kernels/batch_norm_kernels.h @@ -3,46 +3,11 @@ #include "device.h" #include "kernels/allocation.h" +#include "kernels/batch_norm_per_device_state.dtg.h" #include "kernels/ff_handle.h" #include namespace FlexFlow { - -struct BatchNormPerDeviceState { - PerDeviceFFHandle handle; - ffTensorDescriptor_t inputTensor; - ffTensorDescriptor_t outputTensor; - ffTensorDescriptor_t biasTensor; - ffActivationDescriptor_t actiDesc; - ffBatchNormMode_t mode; - float *runningMean; - float *runningVar; - float *saveMean; - float *saveVar; - int output_n; - int output_c; - int output_h; - int output_w; - req relu; -}; - -FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(BatchNormPerDeviceState, - handle, - inputTensor, - outputTensor, - biasTensor, - actiDesc, - mode, - runningMean, - runningVar, - saveMean, - saveVar, - output_n, - output_c, - output_h, - output_w, - relu); - namespace Kernels { namespace BatchNorm { @@ -56,14 +21,14 @@ BatchNormPerDeviceState init_kernel(PerDeviceFFHandle handle, bool relu); void forward_kernel(ffStream_t stream, - BatchNormPerDeviceState const &m, + BatchNormPerDeviceState const &per_device_statem, float const *input_ptr, float *output_ptr, float const *scale_ptr, float const *bias_ptr); void backward_kernel(ffStream_t stream, - BatchNormPerDeviceState const &m, + BatchNormPerDeviceState const &per_device_state, float const *input_ptr, float *output_grad_ptr, float const *output_ptr, diff --git a/lib/kernels/include/kernels/batch_norm_per_device_state.struct.toml b/lib/kernels/include/kernels/batch_norm_per_device_state.struct.toml new file mode 100644 index 0000000000..6d2f04f60c --- /dev/null +++ b/lib/kernels/include/kernels/batch_norm_per_device_state.struct.toml @@ -0,0 +1,68 @@ +namespace = "FlexFlow" +name = "BatchNormPerDeviceState" +features = [] + +includes = [ + "kernels/device.h", + "kernels/ff_handle.h", +] + +[[fields]] +name = "handle" +type = "::FlexFlow::PerDeviceFFHandle" + +[[fields]] +name = "inputTensor" +type = "ffTensorDescriptor_t" + +[[fields]] +name = "outputTensor" +type = "ffTensorDescriptor_t" + +[[fields]] +name = "biasTensor" +type = "ffTensorDescriptor_t" + +[[fields]] +name = "actiDesc" +type = "ffActivationDescriptor_t" + +[[fields]] +name = "mode" +type = "ffBatchNormMode_t" + +[[fields]] +name = "runningMean" +type = "float *" + +[[fields]] +name = "runningVar" +type = "float *" + +[[fields]] +name = "saveMean" +type = "float *" + +[[fields]] +name = "saveVar" +type = "float *" + +[[fields]] +name = "output_n" +type = "int" + +[[fields]] +name = "output_c" +type = "int" + +[[fields]] +name = "output_h" +type = "int" + +[[fields]] +name = "output_w" +type = "int" + +[[fields]] +name = "relu" +type = "bool" diff --git a/lib/kernels/include/kernels/legion_dim.h b/lib/kernels/include/kernels/legion_dim.h index e4dd9723b8..7b9b9c455c 100644 --- a/lib/kernels/include/kernels/legion_dim.h +++ b/lib/kernels/include/kernels/legion_dim.h @@ -8,19 +8,23 @@ namespace FlexFlow { legion_dim_t add_to_legion_dim(legion_dim_t legion_dim, int value); -legion_dim_t legion_dim_from_ff_dim(ff_dim_t, int num_dimensions); +legion_dim_t legion_dim_from_ff_dim(ff_dim_t, nonnegative_int num_dimensions); template using LegionOrdered = DimOrdered; -using LegionTensorDims = LegionOrdered; - template FFOrdered ff_ordered_from_legion_ordered(LegionOrdered const &legion_ordered) { return FFOrdered(legion_ordered.rbegin(), legion_ordered.rend()); } +template +LegionOrdered + legion_ordered_from_ff_ordered(FFOrdered const &ff_ordered) { + return LegionOrdered(ff_ordered.rbegin(), ff_ordered.rend()); +} + template std::string format_as(LegionOrdered const &v) { std::vector as_vec(v.cbegin(), v.cend()); diff --git a/lib/kernels/include/kernels/legion_dim_t.struct.toml b/lib/kernels/include/kernels/legion_dim_t.struct.toml index d2afb0d73f..6c047f096b 100644 --- a/lib/kernels/include/kernels/legion_dim_t.struct.toml +++ b/lib/kernels/include/kernels/legion_dim_t.struct.toml @@ -1,6 +1,5 @@ namespace = "FlexFlow" name = "legion_dim_t" - features = [ "eq", "ord", @@ -9,6 +8,10 @@ features = [ "fmt", ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + [[fields]] name = "value" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/kernels/include/kernels/transpose_kernels.h b/lib/kernels/include/kernels/transpose_kernels.h index 56da81ba2b..0f1cc2ae61 100644 --- a/lib/kernels/include/kernels/transpose_kernels.h +++ b/lib/kernels/include/kernels/transpose_kernels.h @@ -3,32 +3,21 @@ #include "device.h" #include "kernels/accessor.h" +#include "op-attrs/ops/transpose_attrs.dtg.h" #include namespace FlexFlow { -struct TransposePerDeviceState { - int num_dim; - req> perm; -}; - -FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(TransposePerDeviceState, - num_dim, - perm); - namespace Kernels { namespace Transpose { -TransposePerDeviceState init_kernel(int num_dim, - std::vector const &perm); - void forward_kernel(cudaStream_t stream, - TransposePerDeviceState const &m, + TransposeAttrs const &attrs, GenericTensorAccessorR const &input, GenericTensorAccessorW const &output); void backward_kernel(cudaStream_t stream, - TransposePerDeviceState const &m, + TransposeAttrs const &attrs, GenericTensorAccessorW const &in_grad, GenericTensorAccessorR const &out_grad); diff --git a/lib/kernels/src/allocation.cc b/lib/kernels/src/allocation.cc index ccd88580db..d666592e77 100644 --- a/lib/kernels/src/allocation.cc +++ b/lib/kernels/src/allocation.cc @@ -13,7 +13,8 @@ void Allocator::deallocate(void *ptr) { GenericTensorAccessorW Allocator::allocate_tensor(TensorShape const &tensor_shape) { - void *ptr = this->allocate(get_size_in_bytes(tensor_shape)); + void *ptr = + this->allocate(get_size_in_bytes(tensor_shape).unwrap_nonnegative()); return {tensor_shape.data_type, tensor_shape, ptr}; } diff --git a/lib/kernels/src/array_shape.cc b/lib/kernels/src/array_shape.cc index d5e2f1167d..243185ada4 100644 --- a/lib/kernels/src/array_shape.cc +++ b/lib/kernels/src/array_shape.cc @@ -1,62 +1,71 @@ #include "kernels/array_shape.h" #include "utils/containers/product.h" +#include "utils/containers/reversed.h" +#include "utils/containers/vector_of.h" +#include "utils/nonnegative_int/num_elements.h" namespace FlexFlow { -static LegionTensorDims - legion_dims_from_ff_dims(FFOrdered const &ff_ordered) { - std::vector sizes(ff_ordered.size()); - std::reverse_copy(ff_ordered.begin(), ff_ordered.end(), sizes.begin()); - return LegionTensorDims(sizes.begin(), sizes.end()); +static LegionOrdered + legion_dims_from_ff_dims(FFOrdered const &ff_ordered) { + return LegionOrdered{reversed(vector_of(ff_ordered))}; } -ArrayShape::ArrayShape(size_t *_dims, size_t num_dims) - : dims(_dims, _dims + num_dims) {} +ArrayShape::ArrayShape(nonnegative_int *_dims, nonnegative_int num_dims) + : dims(_dims, _dims + num_dims.unwrap_nonnegative()) {} ArrayShape::ArrayShape(TensorShape const &shape) : dims(legion_dims_from_ff_dims(shape.dims.ff_ordered)) {} -ArrayShape::ArrayShape(std::vector const &input_dims) +ArrayShape::ArrayShape(std::vector const &input_dims) : dims(input_dims) {} -std::size_t ArrayShape::get_volume() const { +nonnegative_int ArrayShape::get_volume() const { return this->num_elements(); } -std::size_t ArrayShape::num_dims() const { - return this->dims.size(); +nonnegative_int ArrayShape::num_dims() const { + return ::FlexFlow::num_elements(this->dims); } -std::size_t ArrayShape::get_dim() const { +nonnegative_int ArrayShape::get_dim() const { return this->num_dims(); } -std::size_t ArrayShape::num_elements() const { +nonnegative_int ArrayShape::num_elements() const { if (dims.size() == 0) { - return 0; + return 0_n; } return product(this->dims); } -std::size_t ArrayShape::operator[](legion_dim_t idx) const { +nonnegative_int ArrayShape::operator[](legion_dim_t idx) const { return dims.at(idx); } -std::size_t ArrayShape::at(legion_dim_t idx) const { +nonnegative_int ArrayShape::at(legion_dim_t idx) const { return dims.at(idx); } -std::size_t ArrayShape::at(ff_dim_t idx) const { +nonnegative_int ArrayShape::at(ff_dim_t idx) const { return dims.at(legion_dim_from_ff_dim(idx, this->num_dims())); } +bool ArrayShape::operator==(ArrayShape const &other) const { + return this->tie() == other.tie(); +} + +bool ArrayShape::operator!=(ArrayShape const &other) const { + return this->tie() != other.tie(); +} + ArrayShape ArrayShape::sub_shape( std::optional> start, std::optional> end) const { NOT_IMPLEMENTED(); } -std::optional ArrayShape::at_maybe(legion_dim_t index) const { +std::optional ArrayShape::at_maybe(legion_dim_t index) const { if (index.value < dims.size()) { return dims.at(index); } else { @@ -64,11 +73,15 @@ std::optional ArrayShape::at_maybe(legion_dim_t index) const { } } -std::optional ArrayShape::at_maybe(ff_dim_t index) const { +std::optional ArrayShape::at_maybe(ff_dim_t index) const { return this->at_maybe(legion_dim_from_ff_dim(index, this->num_dims())); } -size_t get_volume(ArrayShape const &shape) { +std::tuple const &> ArrayShape::tie() const { + return std::tie(this->dims); +} + +nonnegative_int get_volume(ArrayShape const &shape) { return shape.get_volume(); } diff --git a/lib/kernels/src/cuda/cuda_helper.cu b/lib/kernels/src/cuda/cuda_helper.cu index 2ff02038f4..66388c0ec8 100644 --- a/lib/kernels/src/cuda/cuda_helper.cu +++ b/lib/kernels/src/cuda/cuda_helper.cu @@ -224,10 +224,10 @@ ffStatus_t tensor, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, - shape.at_maybe(legion_dim_t{0}).value_or(1), - shape.at_maybe(legion_dim_t{1}).value_or(1), - shape.at_maybe(legion_dim_t{2}).value_or(1), - shape.at_maybe(legion_dim_t{3}).value_or(1)); + shape.at_maybe(legion_dim_t{0_n}).value_or(1_n).unwrap_nonnegative(), + shape.at_maybe(legion_dim_t{1_n}).value_or(1_n).unwrap_nonnegative(), + shape.at_maybe(legion_dim_t{2_n}).value_or(1_n).unwrap_nonnegative(), + shape.at_maybe(legion_dim_t{3_n}).value_or(1_n).unwrap_nonnegative()); } cudnnDataType_t ff_to_cudnn_datatype(DataType type) { diff --git a/lib/kernels/src/cuda/ops/batch_norm_kernels.cu b/lib/kernels/src/cuda/ops/batch_norm_kernels.cu index 6c6e17a181..4e153a028e 100644 --- a/lib/kernels/src/cuda/ops/batch_norm_kernels.cu +++ b/lib/kernels/src/cuda/ops/batch_norm_kernels.cu @@ -145,21 +145,23 @@ BatchNormPerDeviceState init_kernel(PerDeviceFFHandle handle, actiDesc, CUDNN_ACTIVATION_RELU, CUDNN_PROPAGATE_NAN, 0.0)); } - BatchNormPerDeviceState per_device_state = {handle, - inputTensor, - outputTensor, - biasTensor, - actiDesc, - mode, - runningMean, - runningVar, - saveMean, - saveVar, - output_n, - output_c, - output_h, - output_w, - relu}; + BatchNormPerDeviceState per_device_state = BatchNormPerDeviceState{ + handle, + inputTensor, + outputTensor, + biasTensor, + actiDesc, + mode, + runningMean, + runningVar, + saveMean, + saveVar, + output_n, + output_c, + output_h, + output_w, + relu, + }; checkCUDA(cudaStreamDestroy(stream)); return per_device_state; diff --git a/lib/kernels/src/cuda/ops/cast_kernels.cu b/lib/kernels/src/cuda/ops/cast_kernels.cu index b895ffb68f..fe7aec68b9 100644 --- a/lib/kernels/src/cuda/ops/cast_kernels.cu +++ b/lib/kernels/src/cuda/ops/cast_kernels.cu @@ -41,7 +41,7 @@ struct ForwardKernel { void operator()(ffStream_t stream, GenericTensorAccessorR const &input, GenericTensorAccessorW const &output) { - size_t volume = input.shape.get_volume(); + size_t volume = input.shape.get_volume().unwrap_nonnegative(); cast_forward<<>>( input.get(), output.get(), volume); } @@ -52,7 +52,7 @@ struct BackwardKernel { void operator()(ffStream_t stream, GenericTensorAccessorR const &input, GenericTensorAccessorW const &output) { - size_t volume = input.shape.get_volume(); + size_t volume = input.shape.get_volume().unwrap_nonnegative(); cast_backward<<>>( input.get(), output.get(), volume, cast_to(1.0f)); } diff --git a/lib/kernels/src/cuda/ops/combine_kernels.cu b/lib/kernels/src/cuda/ops/combine_kernels.cu index 98c01d1f7c..7cc67ceed8 100644 --- a/lib/kernels/src/cuda/ops/combine_kernels.cu +++ b/lib/kernels/src/cuda/ops/combine_kernels.cu @@ -29,7 +29,8 @@ struct ForwardKernel { GenericTensorAccessorW const &output) { checkCUDA(cudaMemcpyAsync(output.get
(), input.get
(), - input.shape.get_volume() * size_of_datatype(DT), + input.shape.get_volume().unwrap_nonnegative() * + size_of_datatype(DT).unwrap_nonnegative(), cudaMemcpyDeviceToDevice, stream)); } @@ -40,7 +41,7 @@ struct BackwardKernel { void operator()(ffStream_t stream, GenericTensorAccessorR const &output_grad, GenericTensorAccessorW const &input_grad) { - size_t num_elements = output_grad.shape.get_volume(); + size_t num_elements = output_grad.shape.get_volume().unwrap_nonnegative(); add_kernel> <<>>( input_grad.get
(), output_grad.get
(), num_elements); diff --git a/lib/kernels/src/cuda/ops/concat_kernels.cu b/lib/kernels/src/cuda/ops/concat_kernels.cu index 68004738d2..2715ff16e9 100644 --- a/lib/kernels/src/cuda/ops/concat_kernels.cu +++ b/lib/kernels/src/cuda/ops/concat_kernels.cu @@ -25,8 +25,11 @@ void calc_blk_size(size_t &num_blocks, size_t &blk_size, ArrayShape const &shape, ff_dim_t axis) { - blk_size = shape.sub_shape(legion_dim_t{0}, axis).num_elements(); - num_blocks = shape.sub_shape(axis, std::nullopt).num_elements(); + blk_size = shape.sub_shape(legion_dim_t{0_n}, axis) + .num_elements() + .unwrap_nonnegative(); + num_blocks = + shape.sub_shape(axis, std::nullopt).num_elements().unwrap_nonnegative(); } void forward_kernel(cudaStream_t stream, diff --git a/lib/kernels/src/cuda/ops/conv_2d_kernels.cu b/lib/kernels/src/cuda/ops/conv_2d_kernels.cu index e3a4c97a31..dac55539d2 100644 --- a/lib/kernels/src/cuda/ops/conv_2d_kernels.cu +++ b/lib/kernels/src/cuda/ops/conv_2d_kernels.cu @@ -137,15 +137,15 @@ Conv2DPerDeviceState init_kernel(PerDeviceFFHandle handle, ffConvolutionBwdFilterAlgo_t bwdFilterAlgo; ffConvolutionBwdDataAlgo_t bwdDataAlgo; - int input_w = input.shape[legion_dim_t(0)]; - int input_h = input.shape[legion_dim_t(1)]; - int input_c = input.shape[legion_dim_t(2)]; - int input_n = input.shape[legion_dim_t(3)]; + int input_w = input.shape.at(legion_dim_t(0_n)).unwrap_nonnegative(); + int input_h = input.shape.at(legion_dim_t(1_n)).unwrap_nonnegative(); + int input_c = input.shape.at(legion_dim_t(2_n)).unwrap_nonnegative(); + int input_n = input.shape.at(legion_dim_t(3_n)).unwrap_nonnegative(); - int output_w = output.shape[legion_dim_t(0)]; - int output_h = output.shape[legion_dim_t(1)]; - int output_c = output.shape[legion_dim_t(2)]; - int output_n = output.shape[legion_dim_t(3)]; + int output_w = output.shape.at(legion_dim_t(0_n)).unwrap_nonnegative(); + int output_h = output.shape.at(legion_dim_t(1_n)).unwrap_nonnegative(); + int output_c = output.shape.at(legion_dim_t(2_n)).unwrap_nonnegative(); + int output_n = output.shape.at(legion_dim_t(3_n)).unwrap_nonnegative(); checkCUDNN(cudnnCreateTensorDescriptor(&inputTensor)); checkCUDNN(cudnnCreateTensorDescriptor(&biasTensor)); diff --git a/lib/kernels/src/cuda/ops/element_unary_kernels.cu b/lib/kernels/src/cuda/ops/element_unary_kernels.cu index a35d28fa8c..056c80ecf6 100644 --- a/lib/kernels/src/cuda/ops/element_unary_kernels.cu +++ b/lib/kernels/src/cuda/ops/element_unary_kernels.cu @@ -266,7 +266,7 @@ struct ForwardKernel { output.get())); } else if (use_scalar(op_type)) { assert(scalar.has_value()); - size_t num_elements = input.shape.num_elements(); + size_t num_elements = input.shape.num_elements().unwrap_nonnegative(); elewise_scalar_unary_forward_kernel> <<>>( num_elements, @@ -275,7 +275,7 @@ struct ForwardKernel { input.get(), output.get()); } else { - size_t num_elements = input.shape.num_elements(); + size_t num_elements = input.shape.num_elements().unwrap_nonnegative(); elewise_unary_forward_kernel> <<>>( num_elements, op_type, input.get(), output.get()); @@ -312,7 +312,7 @@ struct BackwardKernel { input_grad.get())); } else if (use_scalar(op_type)) { assert(scalar.has_value()); - size_t num_elements = input.shape.num_elements(); + size_t num_elements = input.shape.num_elements().unwrap_nonnegative(); elewise_scalar_unary_backward_kernel> <<>>( num_elements, @@ -323,7 +323,7 @@ struct BackwardKernel { input.get(), input_grad.get()); } else { - size_t num_elements = input.shape.num_elements(); + size_t num_elements = input.shape.num_elements().unwrap_nonnegative(); elewise_unary_backward_kernel> <<>>( num_elements, diff --git a/lib/kernels/src/cuda/ops/flat_kernels.cu b/lib/kernels/src/cuda/ops/flat_kernels.cu index 941db108a0..973d05f596 100644 --- a/lib/kernels/src/cuda/ops/flat_kernels.cu +++ b/lib/kernels/src/cuda/ops/flat_kernels.cu @@ -27,7 +27,8 @@ void forward_kernel(cudaStream_t stream, checkCUDA(cudaMemcpyAsync(output_ptr, input.get_float_ptr(), - (input.shape.num_elements()) * sizeof(float), + input.shape.num_elements().unwrap_nonnegative() * + sizeof(float), cudaMemcpyDeviceToDevice, stream)); } @@ -39,8 +40,13 @@ void backward_kernel(cudaStream_t stream, float alpha = 1.0f; apply_add_with_scale - <<>>( - input_grad_ptr, output_grad_ptr, input.shape.num_elements(), alpha); + <<>>(input_grad_ptr, + output_grad_ptr, + input.shape.num_elements().unwrap_nonnegative(), + alpha); } } // namespace Flat diff --git a/lib/kernels/src/cuda/ops/gather_kernels.cu b/lib/kernels/src/cuda/ops/gather_kernels.cu index 11c0a1a5e7..31c1bac217 100644 --- a/lib/kernels/src/cuda/ops/gather_kernels.cu +++ b/lib/kernels/src/cuda/ops/gather_kernels.cu @@ -128,22 +128,24 @@ void forward_kernel(ffStream_t stream, coord_t stride = output.shape.sub_shape(std::nullopt, add_to_legion_dim(m.legion_dim, 1)) - .num_elements(); - coord_t output_dim_size = output.shape[m.legion_dim]; - coord_t input_dim_size = input.shape[m.legion_dim]; + .num_elements() + .unwrap_nonnegative(); + coord_t output_dim_size = output.shape.at(m.legion_dim).unwrap_nonnegative(); + coord_t input_dim_size = input.shape.at(m.legion_dim).unwrap_nonnegative(); assert(index.data_type == DataType::INT32 || index.data_type == DataType::INT64); - DataTypeDispatch1{}(index.data_type, - stream, - input, - index, - output, - output.shape.get_volume(), - stride, - input_dim_size, - output_dim_size); + DataTypeDispatch1{}( + index.data_type, + stream, + input, + index, + output, + output.shape.get_volume().unwrap_nonnegative(), + stride, + input_dim_size, + output_dim_size); } void backward_kernel(ffStream_t stream, @@ -156,22 +158,26 @@ void backward_kernel(ffStream_t stream, coord_t stride = output_grad.shape .sub_shape(std::nullopt, add_to_legion_dim(m.legion_dim, 1)) - .get_volume(); - coord_t output_dim_size = output_grad.shape[m.legion_dim]; - coord_t input_dim_size = input_grad.shape[m.legion_dim]; + .get_volume() + .unwrap_nonnegative(); + coord_t output_dim_size = + output_grad.shape.at(m.legion_dim).unwrap_nonnegative(); + coord_t input_dim_size = + input_grad.shape.at(m.legion_dim).unwrap_nonnegative(); assert(index.data_type == DataType::INT32 || index.data_type == DataType::INT64); - DataTypeDispatch1{}(index.data_type, - stream, - output_grad, - index, - input_grad, - output_grad.shape.get_volume(), - stride, - input_dim_size, - output_dim_size); + DataTypeDispatch1{}( + index.data_type, + stream, + output_grad, + index, + input_grad, + output_grad.shape.get_volume().unwrap_nonnegative(), + stride, + input_dim_size, + output_dim_size); } } // namespace Gather diff --git a/lib/kernels/src/cuda/ops/partition_kernels.cu b/lib/kernels/src/cuda/ops/partition_kernels.cu index 1d07efb5fa..2831562f58 100644 --- a/lib/kernels/src/cuda/ops/partition_kernels.cu +++ b/lib/kernels/src/cuda/ops/partition_kernels.cu @@ -29,7 +29,8 @@ struct ForwardKernel { GenericTensorAccessorW const &output) { checkCUDA(cudaMemcpyAsync(output.get(), input.get(), - input.shape.num_elements() * size_of_datatype(T), + input.shape.num_elements().unwrap_nonnegative() * + size_of_datatype(T).unwrap_nonnegative(), cudaMemcpyDeviceToDevice, stream)); } @@ -41,12 +42,13 @@ struct BackwardKernel { RepartitionPerDeviceState const &m, GenericTensorAccessorW const &input_grad, GenericTensorAccessorR const &output_grad) { - add_kernel><<>>(input_grad.get(), - output_grad.get(), - input_grad.shape.num_elements()); + add_kernel> + <<>>(input_grad.get(), + output_grad.get(), + input_grad.shape.num_elements().unwrap_nonnegative()); } }; diff --git a/lib/kernels/src/cuda/ops/reduction_kernels.cu b/lib/kernels/src/cuda/ops/reduction_kernels.cu index 0c6ba7d8e3..5d95a3766a 100644 --- a/lib/kernels/src/cuda/ops/reduction_kernels.cu +++ b/lib/kernels/src/cuda/ops/reduction_kernels.cu @@ -41,12 +41,13 @@ struct ForwardKernel { GenericTensorAccessorW const &output, size_t num_replicas) { - size_t total_elements = input.shape.num_elements() * num_replicas; + size_t total_elements = + input.shape.num_elements().unwrap_nonnegative() * num_replicas; reduction_forward_kernel> <<>>( input.get(), output.get(), - input.shape.num_elements(), + input.shape.num_elements().unwrap_nonnegative(), num_replicas); } }; @@ -58,7 +59,8 @@ struct BackwardKernel { GenericTensorAccessorR const &output) { checkCUDA(cudaMemcpyAsync(input.get(), output.get(), - input.shape.num_elements() * size_of_datatype(T), + input.shape.num_elements().unwrap_nonnegative() * + size_of_datatype(T).unwrap_nonnegative(), cudaMemcpyDeviceToDevice, stream)); } diff --git a/lib/kernels/src/cuda/ops/replicate_kernels.cu b/lib/kernels/src/cuda/ops/replicate_kernels.cu index 76bfbe2658..4706f38fd4 100644 --- a/lib/kernels/src/cuda/ops/replicate_kernels.cu +++ b/lib/kernels/src/cuda/ops/replicate_kernels.cu @@ -41,7 +41,8 @@ struct ForwardKernel { checkCUDA(cudaMemcpyAsync((void *)output.get(), (void *)input.get(), - input.shape.num_elements() * size_of_datatype(T), + input.shape.num_elements().unwrap_nonnegative() * + size_of_datatype(T).unwrap_nonnegative(), cudaMemcpyDeviceToDevice, stream)); } @@ -53,12 +54,13 @@ struct BackwardKernel { GenericTensorAccessorW const &input, GenericTensorAccessorR const &output, size_t num_replicas) { - size_t total_elements = input.shape.num_elements() * num_replicas; + size_t total_elements = + input.shape.num_elements().unwrap_nonnegative() * num_replicas; replicate_backward_kernel> <<>>( input.get(), output.get(), - input.shape.num_elements(), + input.shape.num_elements().unwrap_nonnegative(), num_replicas); } }; diff --git a/lib/kernels/src/cuda/ops/reshape_kernels.cu b/lib/kernels/src/cuda/ops/reshape_kernels.cu index 5b7843a3a5..c5a289ce6b 100644 --- a/lib/kernels/src/cuda/ops/reshape_kernels.cu +++ b/lib/kernels/src/cuda/ops/reshape_kernels.cu @@ -33,7 +33,8 @@ struct ForwardKernel { GenericTensorAccessorW const &output) { checkCUDA(cudaMemcpyAsync(output.get(), input.get(), - input.shape.num_elements() * size_of_datatype(T), + input.shape.num_elements().unwrap_nonnegative() * + size_of_datatype(T).unwrap_nonnegative(), cudaMemcpyDeviceToDevice, stream)); } @@ -46,12 +47,12 @@ struct BackwardKernel { GenericTensorAccessorR const &output) { float alpha = 1.0f; apply_add_with_scale> - <<>>(input.get(), output.get(), - input.shape.num_elements(), + input.shape.num_elements().unwrap_nonnegative(), static_cast>(alpha)); } }; diff --git a/lib/kernels/src/cuda/ops/transpose_kernels.cu b/lib/kernels/src/cuda/ops/transpose_kernels.cu index 3b3f80944d..60d2f7f342 100644 --- a/lib/kernels/src/cuda/ops/transpose_kernels.cu +++ b/lib/kernels/src/cuda/ops/transpose_kernels.cu @@ -16,7 +16,9 @@ #include "device.h" #include "kernels/accessor.h" #include "kernels/transpose_kernels.h" +#include "op-attrs/dim_ordered/transform.h" #include "utils/exception.h" +#include "utils/nonnegative_int/num_elements.h" namespace FlexFlow { @@ -29,19 +31,6 @@ struct TransposeStrides { namespace Kernels { namespace Transpose { -TransposePerDeviceState init_kernel(int num_dim, - std::vector const &perm) { - int const length = perm.size(); - - std::vector perm_vector; - assert(length <= MAX_TENSOR_DIM); - for (int i = 0; i < length; ++i) { - perm_vector.push_back(legion_dim_from_ff_dim(perm[i], num_dim)); - } - - return {num_dim, perm_vector}; -} - __global__ void transpose_simple_kernel(std::size_t volume, float const *in_ptr, float *out_ptr, @@ -59,64 +48,92 @@ __global__ void transpose_simple_kernel(std::size_t volume, } } +static LegionOrdered + legion_ordered_perm_from_ff_ordered(FFOrdered const &perm) { + nonnegative_int perm_size = num_elements(perm); + LegionOrdered legion_ordered_perm = + transform(legion_ordered_from_ff_ordered(perm), [&](ff_dim_t d) { + return legion_dim_from_ff_dim(d, perm_size); + }); + + return legion_ordered_perm; +} + void forward_kernel(cudaStream_t stream, - TransposePerDeviceState const &m, + TransposeAttrs const &m, GenericTensorAccessorR const &input, GenericTensorAccessorW const &output) { TransposeStrides info; - info.num_dim = input.shape.num_dims(); - assert(info.num_dim == m.num_dim); + info.num_dim = input.shape.num_dims().unwrap_nonnegative(); + assert(info.num_dim == m.perm.size()); + + LegionOrdered legion_ordered_perm = + legion_ordered_perm_from_ff_ordered(m.perm); + for (int i = 0; i < info.num_dim; i++) { if (i == 0) { info.in_strides[i] = 1; info.out_strides[i] = 1; } else { - int in_dim_size = input.shape[legion_dim_t(i)] + 1; - int out_dim_size = output.shape[legion_dim_t(i)] + 1; + int in_dim_size = + input.shape.at(legion_dim_t{nonnegative_int{i}}).unwrap_nonnegative(); + int out_dim_size = output.shape.at(legion_dim_t{nonnegative_int{i}}) + .unwrap_nonnegative(); info.in_strides[i] = info.in_strides[i - 1] * in_dim_size; info.out_strides[i] = info.out_strides[i - 1] * out_dim_size; } - info.perm[i] = m.perm[i].value; + + info.perm[i] = legion_ordered_perm.at(legion_dim_t{nonnegative_int{i}}) + .value.unwrap_nonnegative(); } - transpose_simple_kernel<<>>(output.shape.get_volume(), - input.get_float_ptr(), - output.get_float_ptr(), - info, - 0.0f /*beta*/); + transpose_simple_kernel<<< + GET_BLOCKS(output.shape.get_volume().unwrap_nonnegative()), + CUDA_NUM_THREADS, + 0, + stream>>>(output.shape.get_volume().unwrap_nonnegative(), + input.get_float_ptr(), + output.get_float_ptr(), + info, + 0.0f /*beta*/); } void backward_kernel(cudaStream_t stream, - TransposePerDeviceState const &m, + TransposeAttrs const &m, GenericTensorAccessorW const &in_grad, GenericTensorAccessorR const &out_grad) { TransposeStrides info; - info.num_dim = in_grad.shape.num_dims(); - assert(info.num_dim == m.num_dim); + info.num_dim = in_grad.shape.num_dims().unwrap_nonnegative(); + assert(info.num_dim == m.perm.size()); + + LegionOrdered legion_ordered_perm = + legion_ordered_perm_from_ff_ordered(m.perm); + for (int i = 0; i < info.num_dim; i++) { if (i == 0) { info.in_strides[i] = 1; info.out_strides[i] = 1; } else { - int in_dim_size = out_grad.shape[legion_dim_t(i)] + 1; - int out_dim_size = in_grad.shape[legion_dim_t(i)] + 1; + int in_dim_size = out_grad.shape.at(legion_dim_t{nonnegative_int{i}}) + .unwrap_nonnegative(); + int out_dim_size = in_grad.shape.at(legion_dim_t{nonnegative_int{i}}) + .unwrap_nonnegative(); info.in_strides[i] = info.in_strides[i - 1] * in_dim_size; info.out_strides[i] = info.out_strides[i - 1] * out_dim_size; } - info.perm[m.perm[i].value] = i; + info.perm[legion_ordered_perm.at(legion_dim_t{nonnegative_int{i}}) + .value.unwrap_nonnegative()] = i; } - transpose_simple_kernel<<>>(in_grad.shape.get_volume(), - out_grad.get_float_ptr(), - in_grad.get_float_ptr(), - info, - 1.0f /*beta*/); + transpose_simple_kernel<<< + GET_BLOCKS(in_grad.shape.get_volume().unwrap_nonnegative()), + CUDA_NUM_THREADS, + 0, + stream>>>(in_grad.shape.get_volume().unwrap_nonnegative(), + out_grad.get_float_ptr(), + in_grad.get_float_ptr(), + info, + 1.0f /*beta*/); } } // namespace Transpose diff --git a/lib/kernels/src/legion_dim.cc b/lib/kernels/src/legion_dim.cc index 142dcbcb2c..bbb15c5636 100644 --- a/lib/kernels/src/legion_dim.cc +++ b/lib/kernels/src/legion_dim.cc @@ -3,11 +3,14 @@ namespace FlexFlow { legion_dim_t add_to_legion_dim(legion_dim_t legion_dim, int value) { - return legion_dim_t(legion_dim.value + value); + return legion_dim_t{ + nonnegative_int{legion_dim.value.unwrap_nonnegative() + value}}; } -legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, int num_dimensions) { - return legion_dim_t(num_dimensions - ff_dim.value.get_value() - 1); +legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, + nonnegative_int num_dimensions) { + return legion_dim_t{nonnegative_int{num_dimensions.unwrap_nonnegative() - + ff_dim.value.unwrap_nonnegative() - 1}}; } } // namespace FlexFlow diff --git a/lib/kernels/test/src/test_attention_kernel.cc b/lib/kernels/test/src/test_attention_kernel.cc index d44129ece1..64264f6c39 100644 --- a/lib/kernels/test/src/test_attention_kernel.cc +++ b/lib/kernels/test/src/test_attention_kernel.cc @@ -6,32 +6,38 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Test multi-head attention kernel") { - size_t num_samples = 10; - size_t num_heads = 4; - size_t qSize = 64, kSize = 64, vSize = 64; - size_t qProjSize = 64, kProjSize = 64, vProjSize = 64, oProjSize = 64; - size_t qoSeqLength = 20, kvSeqLength = 20; + nonnegative_int num_samples = 10_n; + nonnegative_int num_heads = 4_n; + nonnegative_int qSize = 64_n; + nonnegative_int kSize = 64_n; + nonnegative_int vSize = 64_n; + nonnegative_int qProjSize = 64_n; + nonnegative_int kProjSize = 64_n; + nonnegative_int vProjSize = 64_n; + nonnegative_int oProjSize = 64_n; + nonnegative_int qoSeqLength = 20_n; + nonnegative_int kvSeqLength = 20_n; ManagedFFStream managed_stream{}; ManagedPerDeviceFFHandle managed_handle{}; Allocator allocator = create_local_cuda_memory_allocator(); - MHAPerDeviceState state = - Kernels::MultiHeadAttention::init_kernel(managed_handle.raw_handle(), - allocator, - num_samples, - num_heads, - qSize, - kSize, - vSize, - qProjSize, - kProjSize, - vProjSize, - oProjSize, - qoSeqLength, - kvSeqLength, - false); + MHAPerDeviceState state = Kernels::MultiHeadAttention::init_kernel( + managed_handle.raw_handle(), + allocator, + /*num_samples=*/num_samples.unwrap_nonnegative(), + /*num_heads=*/num_heads.unwrap_nonnegative(), + /*qSize=*/qSize.unwrap_nonnegative(), + /*kSize=*/kSize.unwrap_nonnegative(), + /*vSize=*/vSize.unwrap_nonnegative(), + /*qProjSize=*/qProjSize.unwrap_nonnegative(), + /*kProjSize=*/kProjSize.unwrap_nonnegative(), + /*vProjSize=*/vProjSize.unwrap_nonnegative(), + /*oProjSize=*/oProjSize.unwrap_nonnegative(), + /*qoSeqLength=*/qoSeqLength.unwrap_nonnegative(), + /*kvSeqLength=*/kvSeqLength.unwrap_nonnegative(), + /*add_bias_kv=*/false); TensorShape query_shape = make_float_tensor_shape_from_legion_dims( {qoSeqLength, num_samples, qSize}); @@ -41,8 +47,8 @@ TEST_SUITE(FF_TEST_SUITE) { {kvSeqLength, num_samples, vSize}); TensorShape output_shape = make_float_tensor_shape_from_legion_dims( {qoSeqLength, num_samples, oProjSize}); - TensorShape weight_shape = - make_float_tensor_shape_from_legion_dims({state.weightSize}); + TensorShape weight_shape = make_float_tensor_shape_from_legion_dims( + {nonnegative_int{state.weightSize}}); GenericTensorAccessorW query_accessor = create_random_filled_accessor_w(query_shape, allocator); diff --git a/lib/kernels/test/src/test_batch_matmul_kernel.cc b/lib/kernels/test/src/test_batch_matmul_kernel.cc index 18e6977148..cacd5b60fb 100644 --- a/lib/kernels/test/src/test_batch_matmul_kernel.cc +++ b/lib/kernels/test/src/test_batch_matmul_kernel.cc @@ -6,13 +6,13 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Test BatchMatmul Kernel") { - size_t m = 10; - size_t n = 10; - size_t k = 10; - size_t batch = 5; - size_t a_seq_length_dim = -1; - size_t b_seq_length_dim = -1; - size_t seq_length = -1; + nonnegative_int m = 10_n; + nonnegative_int n = 10_n; + nonnegative_int k = 10_n; + nonnegative_int batch = 5_n; + int a_seq_length_dim = -1; + int b_seq_length_dim = -1; + int seq_length = -1; ManagedFFStream managed_stream{}; ManagedPerDeviceFFHandle managed_handle{}; @@ -39,10 +39,10 @@ TEST_SUITE(FF_TEST_SUITE) { output_accessor.get_float_ptr(), a_accessor.get_float_ptr(), b_accessor.get_float_ptr(), - m, - n, - k, - batch, + m.unwrap_nonnegative(), + n.unwrap_nonnegative(), + k.unwrap_nonnegative(), + batch.unwrap_nonnegative(), a_seq_length_dim, b_seq_length_dim, seq_length); @@ -64,10 +64,10 @@ TEST_SUITE(FF_TEST_SUITE) { a_grad_accessor.get_float_ptr(), b_accessor.get_float_ptr(), b_grad_accessor.get_float_ptr(), - m, - n, - k, - batch); + m.unwrap_nonnegative(), + n.unwrap_nonnegative(), + k.unwrap_nonnegative(), + batch.unwrap_nonnegative()); } } } diff --git a/lib/kernels/test/src/test_batch_norm_kernel.cc b/lib/kernels/test/src/test_batch_norm_kernel.cc index 8487bbda6a..b4c43cf1d8 100644 --- a/lib/kernels/test/src/test_batch_norm_kernel.cc +++ b/lib/kernels/test/src/test_batch_norm_kernel.cc @@ -6,22 +6,25 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Test BatchNorm Kernel") { - size_t output_n = 1, output_c = 10, output_h = 10, output_w = 10; + nonnegative_int output_n = 1_n; + nonnegative_int output_c = 10_n; + nonnegative_int output_h = 10_n; + nonnegative_int output_w = 10_n; ManagedFFStream managed_stream{}; ManagedPerDeviceFFHandle managed_handle{}; Allocator allocator = create_local_cuda_memory_allocator(); - BatchNormPerDeviceState state = - Kernels::BatchNorm::init_kernel(managed_handle.raw_handle(), - allocator, - nullptr, - output_n, - output_c, - output_h, - output_w, - true); + BatchNormPerDeviceState state = Kernels::BatchNorm::init_kernel( + /*handle=*/managed_handle.raw_handle(), + /*allocator=*/allocator, + /*runningMean=*/nullptr, + /*output_n=*/output_n.unwrap_nonnegative(), + /*output_c=*/output_c.unwrap_nonnegative(), + /*output_h=*/output_h.unwrap_nonnegative(), + /*output_w=*/output_w.unwrap_nonnegative(), + /*relu=*/true); TensorShape input_shape = make_float_tensor_shape_from_legion_dims( {output_n, output_c, output_h, output_w}); @@ -43,12 +46,13 @@ TEST_SUITE(FF_TEST_SUITE) { GenericTensorAccessorW bias_accessor = create_filled_accessor_w(bias_shape, allocator, 0.0f); - Kernels::BatchNorm::forward_kernel(managed_stream.raw_stream(), - state, - input_accessor.get_float_ptr(), - output_accessor.get_float_ptr(), - scale_accessor.get_float_ptr(), - bias_accessor.get_float_ptr()); + Kernels::BatchNorm::forward_kernel( + /*stream=*/managed_stream.raw_stream(), + /*per_device_state=*/state, + /*input_ptr=*/input_accessor.get_float_ptr(), + /*output_ptr=*/output_accessor.get_float_ptr(), + /*scale_ptr=*/scale_accessor.get_float_ptr(), + /*bias_ptr=*/bias_accessor.get_float_ptr()); std::vector host_output_data = load_data_to_host_from_device( @@ -66,16 +70,18 @@ TEST_SUITE(FF_TEST_SUITE) { GenericTensorAccessorW bias_grad_accessor = create_random_filled_accessor_w(bias_shape, allocator); - Kernels::BatchNorm::backward_kernel(managed_stream.raw_stream(), - state, - input_accessor.get_float_ptr(), - output_grad_accessor.get_float_ptr(), - output_accessor.get_float_ptr(), - input_grad_accessor.get_float_ptr(), - scale_accessor.get_float_ptr(), - scale_grad_accessor.get_float_ptr(), - bias_grad_accessor.get_float_ptr(), - input_accessor.shape.num_elements()); + Kernels::BatchNorm::backward_kernel( + /*stream=*/managed_stream.raw_stream(), + /*per_device_state=*/state, + /*input_ptr=*/input_accessor.get_float_ptr(), + /*output_grad_ptr=*/output_grad_accessor.get_float_ptr(), + /*output_ptr=*/output_accessor.get_float_ptr(), + /*input_grad_ptr=*/input_grad_accessor.get_float_ptr(), + /*scale_ptr=*/scale_accessor.get_float_ptr(), + /*scale_grad_ptr=*/scale_grad_accessor.get_float_ptr(), + /*bias_grad_ptr=*/bias_grad_accessor.get_float_ptr(), + /*numElements=*/ + input_accessor.shape.num_elements().unwrap_nonnegative()); std::vector host_input_grad_data = load_data_to_host_from_device( diff --git a/lib/kernels/test/src/test_cast_kernel.cc b/lib/kernels/test/src/test_cast_kernel.cc index b110208bce..0e0769014d 100644 --- a/lib/kernels/test/src/test_cast_kernel.cc +++ b/lib/kernels/test/src/test_cast_kernel.cc @@ -11,9 +11,9 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator allocator = create_local_cuda_memory_allocator(); TensorShape input_shape = - make_float_tensor_shape_from_legion_dims({100, 100}); + make_float_tensor_shape_from_legion_dims({100_n, 100_n}); TensorShape output_shape = - make_double_tensor_shape_from_legion_dims({100, 100}); + make_double_tensor_shape_from_legion_dims({100_n, 100_n}); GenericTensorAccessorW output_accessor = create_random_filled_accessor_w(output_shape, allocator); diff --git a/lib/kernels/test/src/test_combine_kernel.cc b/lib/kernels/test/src/test_combine_kernel.cc index 2e1000cb95..2b6b9bf589 100644 --- a/lib/kernels/test/src/test_combine_kernel.cc +++ b/lib/kernels/test/src/test_combine_kernel.cc @@ -11,7 +11,7 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator allocator = create_local_cuda_memory_allocator(); TensorShape input_shape = - make_float_tensor_shape_from_legion_dims({100, 100}); + make_float_tensor_shape_from_legion_dims({100_n, 100_n}); TensorShape output_shape = input_shape; SUBCASE("forward_kernel") { diff --git a/lib/kernels/test/src/test_concat_kernel.cc b/lib/kernels/test/src/test_concat_kernel.cc index 2212e384fa..215e599716 100644 --- a/lib/kernels/test/src/test_concat_kernel.cc +++ b/lib/kernels/test/src/test_concat_kernel.cc @@ -1,13 +1,14 @@ #include "doctest/doctest.h" #include "kernels/concat_kernels.h" #include "test_utils.h" +#include "utils/containers/repeat.h" using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Test concat kernel forward and backward") { - size_t num_inputs = 3; - size_t size_per_input = 100; - ff_dim_t concat_axis = ff_dim_t{nonnegative_int{0}}; + nonnegative_int num_inputs = 3_n; + nonnegative_int size_per_input = 100_n; + ff_dim_t concat_axis = ff_dim_t{0_n}; ManagedPerDeviceFFHandle managed_handle{}; ManagedFFStream managed_stream{}; @@ -21,7 +22,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("forward_kernel") { std::vector input_accessors = - repeat(num_inputs, [&]() { + repeat(num_inputs, [&]() { return read_only_accessor_from_write_accessor( create_random_filled_accessor_w(input_shape, allocator)); }); @@ -44,10 +45,8 @@ TEST_SUITE(FF_TEST_SUITE) { GenericTensorAccessorR output_grad_accessor = read_only_accessor_from_write_accessor( create_random_filled_accessor_w(output_shape, allocator)); - std::vector input_grad_accessors = - repeat(num_inputs, [&]() { - return allocator.allocate_tensor(input_shape); - }); + std::vector input_grad_accessors = repeat( + num_inputs, [&]() { return allocator.allocate_tensor(input_shape); }); Kernels::Concat::backward_kernel(managed_stream.raw_stream(), output_grad_accessor, input_grad_accessors, diff --git a/lib/kernels/test/src/test_dropout.cc b/lib/kernels/test/src/test_dropout.cc index e29143e251..86f8f2102b 100644 --- a/lib/kernels/test/src/test_dropout.cc +++ b/lib/kernels/test/src/test_dropout.cc @@ -1,6 +1,7 @@ #include "doctest/doctest.h" #include "kernels/dropout_kernels.h" #include "test_utils.h" +#include "utils/containers/count.h" using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { @@ -9,11 +10,11 @@ TEST_SUITE(FF_TEST_SUITE) { float dropout_rate = 0.1; ArrayShape shape = ArrayShape{ - std::vector{10, 10}, + std::vector{10_n, 10_n}, }; TensorShape input_shape = - make_float_tensor_shape_from_legion_dims({10, 10}); + make_float_tensor_shape_from_legion_dims({10_n, 10_n}); TensorShape output_shape = input_shape; ManagedFFStream managed_stream{}; @@ -25,8 +26,7 @@ TEST_SUITE(FF_TEST_SUITE) { managed_handle.raw_handle(), dropout_rate, seed, shape, allocator); auto get_zero_count = [](std::vector const &data) { - return std::count_if( - data.begin(), data.end(), [](float x) { return x == 0.0f; }); + return count(data, [](float x) { return x == 0.0f; }); }; SUBCASE("forward_kernel") { diff --git a/lib/kernels/test/src/test_flat_kernel.cc b/lib/kernels/test/src/test_flat_kernel.cc index 70894858e3..83f7f0445e 100644 --- a/lib/kernels/test/src/test_flat_kernel.cc +++ b/lib/kernels/test/src/test_flat_kernel.cc @@ -10,7 +10,7 @@ TEST_SUITE(FF_TEST_SUITE) { ManagedPerDeviceFFHandle managed_handle{}; ManagedFFStream managed_stream{}; - TensorShape input_shape = make_float_tensor_shape_from_legion_dims({100}); + TensorShape input_shape = make_float_tensor_shape_from_legion_dims({100_n}); TensorShape output_shape = input_shape; GenericTensorAccessorR input_accessor = @@ -30,7 +30,7 @@ TEST_SUITE(FF_TEST_SUITE) { read_only_accessor_from_write_accessor(output_accessor)); std::vector expected_output_data( - input_accessor.shape.num_elements(), 2.0f); + input_accessor.shape.num_elements().unwrap_nonnegative(), 2.0f); CHECK(check_output_data == expected_output_data); } @@ -50,7 +50,7 @@ TEST_SUITE(FF_TEST_SUITE) { read_only_accessor_from_write_accessor(input_grad_accessor)); std::vector expected_output_data( - input_accessor.shape.num_elements(), 1.0f); + input_accessor.shape.num_elements().unwrap_nonnegative(), 1.0f); CHECK(backward_output_data == expected_output_data); } } diff --git a/lib/kernels/test/src/test_gather_kernels.cc b/lib/kernels/test/src/test_gather_kernels.cc index 88ac2f6889..1a8cf5f82a 100644 --- a/lib/kernels/test/src/test_gather_kernels.cc +++ b/lib/kernels/test/src/test_gather_kernels.cc @@ -10,10 +10,11 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator allocator = create_local_cuda_memory_allocator(); - GatherPerDeviceState state = {managed_handle.raw_handle(), legion_dim_t(2)}; + GatherPerDeviceState state = {managed_handle.raw_handle(), + legion_dim_t{2_n}}; - TensorShape input_shape = make_float_tensor_shape_from_legion_dims({100}); - TensorShape output_shape = make_float_tensor_shape_from_legion_dims({50}); + TensorShape input_shape = make_float_tensor_shape_from_legion_dims({100_n}); + TensorShape output_shape = make_float_tensor_shape_from_legion_dims({50_n}); GenericTensorAccessorR index_accessor = read_only_accessor_from_write_accessor( diff --git a/lib/kernels/test/src/test_layer_norm_kernels.cc b/lib/kernels/test/src/test_layer_norm_kernels.cc index 03b2f56bb9..5386c1d943 100644 --- a/lib/kernels/test/src/test_layer_norm_kernels.cc +++ b/lib/kernels/test/src/test_layer_norm_kernels.cc @@ -6,8 +6,8 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Test LayerNorm Forward and Backward Kernel") { - size_t batch_size = 10; - size_t feature_size = 10; + nonnegative_int batch_size = 10_n; + nonnegative_int feature_size = 10_n; float epsilon = 1e-5f; bool elementwise_affine = true; @@ -26,8 +26,8 @@ TEST_SUITE(FF_TEST_SUITE) { Kernels::LayerNorm::init_kernel(managed_handle.raw_handle(), allocator, elementwise_affine, - batch_size, - feature_size, + batch_size.unwrap_nonnegative(), + feature_size.unwrap_nonnegative(), epsilon); GenericTensorAccessorR input_accessor = diff --git a/lib/kernels/test/src/test_partition_kernel.cc b/lib/kernels/test/src/test_partition_kernel.cc index 437b37e954..4fd1b53210 100644 --- a/lib/kernels/test/src/test_partition_kernel.cc +++ b/lib/kernels/test/src/test_partition_kernel.cc @@ -15,7 +15,7 @@ TEST_SUITE(FF_TEST_SUITE) { managed_handle.raw_handle(), DataType::FLOAT); TensorShape input_shape = - make_float_tensor_shape_from_legion_dims({10, 10}); + make_float_tensor_shape_from_legion_dims({10_n, 10_n}); TensorShape output_shape = input_shape; SUBCASE("forward_kernel") { @@ -33,7 +33,7 @@ TEST_SUITE(FF_TEST_SUITE) { read_only_accessor_from_write_accessor(output_accessor)); std::vector expected_output_data( - input_accessor.shape.num_elements(), 1.0f); + input_accessor.shape.num_elements().unwrap_nonnegative(), 1.0f); CHECK(check_output_data == expected_output_data); } @@ -54,7 +54,7 @@ TEST_SUITE(FF_TEST_SUITE) { read_only_accessor_from_write_accessor(input_grad_accessor)); std::vector expected_grad_input_data( - input_grad_accessor.shape.num_elements(), 3.0f); + input_grad_accessor.shape.num_elements().unwrap_nonnegative(), 3.0f); CHECK(host_grad_input_data == expected_grad_input_data); } } diff --git a/lib/kernels/test/src/test_pool_2d_kernels.cc b/lib/kernels/test/src/test_pool_2d_kernels.cc index ebb92d39db..62b61707c6 100644 --- a/lib/kernels/test/src/test_pool_2d_kernels.cc +++ b/lib/kernels/test/src/test_pool_2d_kernels.cc @@ -5,10 +5,20 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Test Pool2D Forward and Backward Kernel") { - size_t input_w = 10, input_h = 10, input_c = 3, input_n = 1; - size_t output_w = 5, output_h = 5, output_c = 3, output_n = 1; - size_t pad_h = 0, pad_w = 0, kernel_h = 2, kernel_w = 2, stride_h = 2, - stride_w = 2; + nonnegative_int input_w = 10_n; + nonnegative_int input_h = 10_n; + nonnegative_int input_c = 3_n; + nonnegative_int input_n = 1_n; + nonnegative_int output_w = 5_n; + nonnegative_int output_h = 5_n; + nonnegative_int output_c = 3_n; + nonnegative_int output_n = 1_n; + nonnegative_int pad_h = 0_n; + nonnegative_int pad_w = 0_n; + nonnegative_int kernel_h = 2_n; + nonnegative_int kernel_w = 2_n; + nonnegative_int stride_h = 2_n; + nonnegative_int stride_w = 2_n; PoolOp pool_type = PoolOp::MAX; @@ -18,23 +28,23 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator allocator = create_local_cuda_memory_allocator(); Pool2DPerDeviceState state = - Kernels::Pool2D::init_kernel(managed_handle.raw_handle(), - std::nullopt, - input_w, - input_h, - input_c, - input_n, - output_w, - output_h, - output_c, - output_n, - pad_h, - pad_w, - kernel_h, - kernel_w, - stride_h, - stride_w, - pool_type); + Kernels::Pool2D::init_kernel(/*handle=*/managed_handle.raw_handle(), + /*activation=*/std::nullopt, + /*input_w=*/input_w.unwrap_nonnegative(), + /*input_h=*/input_h.unwrap_nonnegative(), + /*input_c=*/input_c.unwrap_nonnegative(), + /*input_n=*/input_n.unwrap_nonnegative(), + /*output_w=*/output_w.unwrap_nonnegative(), + /*output_h=*/output_h.unwrap_nonnegative(), + /*output_c=*/output_c.unwrap_nonnegative(), + /*output_n=*/output_n.unwrap_nonnegative(), + /*pad_h=*/pad_h.unwrap_nonnegative(), + /*pad_w=*/pad_w.unwrap_nonnegative(), + /*kernel_h=*/kernel_h.unwrap_nonnegative(), + /*kernel_w=*/kernel_w.unwrap_nonnegative(), + /*stride_h=*/stride_h.unwrap_nonnegative(), + /*stride_w=*/stride_w.unwrap_nonnegative(), + /*pool_type=*/pool_type); TensorShape input_shape = make_float_tensor_shape_from_legion_dims( {input_w, input_h, input_c, input_n}); diff --git a/lib/kernels/test/src/test_reduction_kernel.cc b/lib/kernels/test/src/test_reduction_kernel.cc index 1ea740f336..04a3817b84 100644 --- a/lib/kernels/test/src/test_reduction_kernel.cc +++ b/lib/kernels/test/src/test_reduction_kernel.cc @@ -7,8 +7,8 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Test Reduction Forward and Backward Kernel") { std::size_t num_replicas = 5; - TensorShape input_shape = - make_float_tensor_shape_from_legion_dims({10, 10, 10, 10, 10}); + TensorShape input_shape = make_float_tensor_shape_from_legion_dims( + {10_n, 10_n, 10_n, 10_n, 10_n}); ManagedPerDeviceFFHandle managed_handle{}; ManagedFFStream managed_stream{}; @@ -16,7 +16,8 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator allocator = create_local_cuda_memory_allocator(); SUBCASE("forward_kernel") { - TensorShape output_shape = make_float_tensor_shape_from_legion_dims({10}); + TensorShape output_shape = + make_float_tensor_shape_from_legion_dims({10_n}); GenericTensorAccessorR input_accessor = read_only_accessor_from_write_accessor( @@ -49,7 +50,7 @@ TEST_SUITE(FF_TEST_SUITE) { output_grad_accessor); std::vector expected_grad_input_data( - input_grad_accessor.shape.num_elements(), 1.0f); + input_grad_accessor.shape.num_elements().unwrap_nonnegative(), 1.0f); std::vector host_grad_data = load_data_to_host_from_device( read_only_accessor_from_write_accessor(input_grad_accessor)); CHECK(host_grad_data == expected_grad_input_data); diff --git a/lib/kernels/test/src/test_replicate_kernel.cc b/lib/kernels/test/src/test_replicate_kernel.cc index 86d790f03c..fa726898f2 100644 --- a/lib/kernels/test/src/test_replicate_kernel.cc +++ b/lib/kernels/test/src/test_replicate_kernel.cc @@ -5,9 +5,9 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Test Replicate Kernel") { - std::size_t num_replicas = 10; + nonnegative_int num_replicas = 10_n; - TensorShape input_shape = make_float_tensor_shape_from_legion_dims({100}); + TensorShape input_shape = make_float_tensor_shape_from_legion_dims({100_n}); TensorShape output_shape = input_shape; ManagedPerDeviceFFHandle managed_handle{}; @@ -30,7 +30,7 @@ TEST_SUITE(FF_TEST_SUITE) { read_only_accessor_from_write_accessor(output_accessor)); std::vector expected_output_data( - input_accessor.shape.num_elements(), 1.0f); + input_accessor.shape.num_elements().unwrap_nonnegative(), 1.0f); CHECK(check_output_data == expected_output_data); } @@ -44,7 +44,7 @@ TEST_SUITE(FF_TEST_SUITE) { Kernels::Replicate::backward_kernel(managed_stream.raw_stream(), input_grad_accessor, output_grad_accessor, - num_replicas); + num_replicas.unwrap_nonnegative()); std::vector check_aggregated_data = load_data_to_host_from_device( diff --git a/lib/kernels/test/src/test_reshape_kernel.cc b/lib/kernels/test/src/test_reshape_kernel.cc index f56bfacc2b..d329a347b3 100644 --- a/lib/kernels/test/src/test_reshape_kernel.cc +++ b/lib/kernels/test/src/test_reshape_kernel.cc @@ -10,7 +10,7 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator allocator = create_local_cuda_memory_allocator(); - TensorShape input_shape = make_float_tensor_shape_from_legion_dims({100}); + TensorShape input_shape = make_float_tensor_shape_from_legion_dims({100_n}); TensorShape output_shape = input_shape; ReshapePerDeviceState state = @@ -31,7 +31,7 @@ TEST_SUITE(FF_TEST_SUITE) { read_only_accessor_from_write_accessor(output_accessor)); std::vector expected_output_data( - input_accessor.shape.num_elements(), 1.0f); + input_accessor.shape.num_elements().unwrap_nonnegative(), 1.0f); CHECK(check_output_data == expected_output_data); } @@ -52,7 +52,7 @@ TEST_SUITE(FF_TEST_SUITE) { read_only_accessor_from_write_accessor(input_grad_accessor)); std::vector expected_grad_input_data( - input_grad_accessor.shape.num_elements(), 3.0f); + input_grad_accessor.shape.num_elements().unwrap_nonnegative(), 3.0f); CHECK(host_grad_input_data == expected_grad_input_data); } } diff --git a/lib/kernels/test/src/test_reverse_kernels.cc b/lib/kernels/test/src/test_reverse_kernels.cc index cdaf65a305..9c8475f6d6 100644 --- a/lib/kernels/test/src/test_reverse_kernels.cc +++ b/lib/kernels/test/src/test_reverse_kernels.cc @@ -5,11 +5,11 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Call Reverse Forward and Backward Kernels") { - std::size_t reverse_dim_size = 10; - std::size_t in_blk_size = 10; - std::size_t num_out_blks = 1; + nonnegative_int reverse_dim_size = 10_n; + nonnegative_int in_blk_size = 10_n; + nonnegative_int num_out_blks = 1_n; - TensorShape input_shape = make_float_tensor_shape_from_legion_dims({100}); + TensorShape input_shape = make_float_tensor_shape_from_legion_dims({100_n}); TensorShape output_shape = input_shape; ManagedPerDeviceFFHandle managed_handle{}; @@ -24,13 +24,14 @@ TEST_SUITE(FF_TEST_SUITE) { GenericTensorAccessorW output_accessor = allocator.allocate_tensor(output_shape); - Kernels::Reverse::forward_kernel(managed_stream.raw_stream(), - input_accessor.get_float_ptr(), - output_accessor.get_float_ptr(), - num_out_blks, - reverse_dim_size, - in_blk_size, - input_accessor.shape.num_elements()); + Kernels::Reverse::forward_kernel( + managed_stream.raw_stream(), + input_accessor.get_float_ptr(), + output_accessor.get_float_ptr(), + num_out_blks.unwrap_nonnegative(), + reverse_dim_size.unwrap_nonnegative(), + in_blk_size.unwrap_nonnegative(), + input_accessor.shape.num_elements().unwrap_nonnegative()); std::vector check_output_data = load_data_to_host_from_device( @@ -48,10 +49,10 @@ TEST_SUITE(FF_TEST_SUITE) { managed_stream.raw_stream(), output_grad_accessor.get_float_ptr(), input_grad_accessor.get_float_ptr(), - num_out_blks, - reverse_dim_size, - in_blk_size, - input_grad_accessor.shape.num_elements()); + num_out_blks.unwrap_nonnegative(), + reverse_dim_size.unwrap_nonnegative(), + in_blk_size.unwrap_nonnegative(), + input_grad_accessor.shape.num_elements().unwrap_nonnegative()); std::vector host_grad_input_data = load_data_to_host_from_device( diff --git a/lib/kernels/test/src/test_softmax_kernel.cc b/lib/kernels/test/src/test_softmax_kernel.cc index f49c1ebbcc..c9eaa76b86 100644 --- a/lib/kernels/test/src/test_softmax_kernel.cc +++ b/lib/kernels/test/src/test_softmax_kernel.cc @@ -6,18 +6,27 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Test Softmax Kernel Operations") { - int input_n = 1, input_c = 1, input_h = 1, input_w = 100, channels = 100; + nonnegative_int input_n = 1_n; + nonnegative_int input_c = 1_n; + nonnegative_int input_h = 1_n; + nonnegative_int input_w = 100_n; + nonnegative_int channels = 100_n; ManagedPerDeviceFFHandle managed_handle{}; ManagedFFStream managed_stream{}; Allocator allocator = create_local_cuda_memory_allocator(); - TensorShape input_shape = make_float_tensor_shape_from_legion_dims({100}); + TensorShape input_shape = make_float_tensor_shape_from_legion_dims({100_n}); TensorShape output_shape = input_shape; - SoftmaxPerDeviceState state = Kernels::Softmax::init_kernel( - managed_handle.raw_handle(), 0, input_n, channels, input_h, input_w); + SoftmaxPerDeviceState state = + Kernels::Softmax::init_kernel(managed_handle.raw_handle(), + 0, + input_n.unwrap_nonnegative(), + channels.unwrap_nonnegative(), + input_h.unwrap_nonnegative(), + input_w.unwrap_nonnegative()); GenericTensorAccessorW output_accessor = create_random_filled_accessor_w(output_shape, allocator); @@ -47,10 +56,10 @@ TEST_SUITE(FF_TEST_SUITE) { managed_stream.raw_stream(), input_grad_accessor.get_float_ptr(), output_grad_accessor.get_float_ptr(), - output_grad_accessor.shape.num_elements()); + output_grad_accessor.shape.num_elements().unwrap_nonnegative()); - std::vector expected_input_grad_data = - std::vector(input_grad_accessor.shape.num_elements(), 1.0f); + std::vector expected_input_grad_data = std::vector( + input_grad_accessor.shape.num_elements().unwrap_nonnegative(), 1.0f); std::vector host_input_grad_data = load_data_to_host_from_device( read_only_accessor_from_write_accessor(input_grad_accessor)); diff --git a/lib/kernels/test/src/test_split_kernel.cc b/lib/kernels/test/src/test_split_kernel.cc index f2346c9244..ea0d280f68 100644 --- a/lib/kernels/test/src/test_split_kernel.cc +++ b/lib/kernels/test/src/test_split_kernel.cc @@ -1,12 +1,13 @@ #include "doctest/doctest.h" #include "kernels/split_kernels.h" #include "test_utils.h" +#include "utils/containers/repeat.h" using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Test Split Forward and Backward Kernel") { - size_t num_outputs = 2; + nonnegative_int num_outputs = 2_n; coord_t out_blk_sizes[] = {50, 50}; coord_t in_blk_size = 100; coord_t num_blks = 1; @@ -16,15 +17,14 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator allocator = create_local_cuda_memory_allocator(); - TensorShape input_shape = make_float_tensor_shape_from_legion_dims({100}); - TensorShape output_shape = make_float_tensor_shape_from_legion_dims({50}); + TensorShape input_shape = make_float_tensor_shape_from_legion_dims({100_n}); + TensorShape output_shape = make_float_tensor_shape_from_legion_dims({50_n}); SUBCASE("forward_kernel") { GenericTensorAccessorW input_accessor = create_random_filled_accessor_w(input_shape, allocator); - std::vector output_ptrs(num_outputs); - generate_n(output_ptrs.begin(), num_outputs, [&]() { + std::vector output_ptrs = repeat(num_outputs, [&]() { GenericTensorAccessorW output_accessor = allocator.allocate_tensor(output_shape); return output_accessor.get_float_ptr(); @@ -36,11 +36,11 @@ TEST_SUITE(FF_TEST_SUITE) { out_blk_sizes, in_blk_size, num_blks, - num_outputs); + num_outputs.unwrap_nonnegative()); } SUBCASE("backward_kernel") { - std::vector output_grad_ptrs(num_outputs); + std::vector output_grad_ptrs(num_outputs.unwrap_nonnegative()); for (int i = 0; i < num_outputs; i++) { GenericTensorAccessorW output_grad_accessor = create_random_filled_accessor_w(output_shape, allocator); @@ -56,7 +56,7 @@ TEST_SUITE(FF_TEST_SUITE) { out_blk_sizes, in_blk_size, num_blks, - num_outputs); + num_outputs.unwrap_nonnegative()); } } } diff --git a/lib/kernels/test/src/test_transpose_kernel.cc b/lib/kernels/test/src/test_transpose_kernel.cc index 2904fa01ae..02d99c86a1 100644 --- a/lib/kernels/test/src/test_transpose_kernel.cc +++ b/lib/kernels/test/src/test_transpose_kernel.cc @@ -5,21 +5,20 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Test Transpose Kernel Operations") { - std::size_t num_dims = 2; - - std::vector perm = {ff_dim_t{nonnegative_int{0}}, - ff_dim_t{nonnegative_int{1}}}; + TransposeAttrs attrs = TransposeAttrs{ + FFOrdered{ + ff_dim_t{0_n}, + ff_dim_t{1_n}, + }, + }; ManagedPerDeviceFFHandle managed_handle{}; ManagedFFStream managed_stream{}; Allocator allocator = create_local_cuda_memory_allocator(); - TransposePerDeviceState state = - Kernels::Transpose::init_kernel(num_dims, perm); - TensorShape input_shape = - make_float_tensor_shape_from_legion_dims({10, 10}); + make_float_tensor_shape_from_legion_dims({10_n, 10_n}); TensorShape output_shape = input_shape; SUBCASE("forward_kernel") { @@ -30,7 +29,7 @@ TEST_SUITE(FF_TEST_SUITE) { allocator.allocate_tensor(output_shape); Kernels::Transpose::forward_kernel( - managed_stream.raw_stream(), state, input_accessor, output_accessor); + managed_stream.raw_stream(), attrs, input_accessor, output_accessor); std::vector host_output_data = load_data_to_host_from_device( @@ -46,7 +45,7 @@ TEST_SUITE(FF_TEST_SUITE) { create_random_filled_accessor_w(input_shape, allocator); Kernels::Transpose::backward_kernel(managed_stream.raw_stream(), - state, + attrs, input_grad_accessor, output_grad_accessor); diff --git a/lib/kernels/test/src/test_utils.cc b/lib/kernels/test/src/test_utils.cc index b591642570..903b666fa9 100644 --- a/lib/kernels/test/src/test_utils.cc +++ b/lib/kernels/test/src/test_utils.cc @@ -4,7 +4,7 @@ GenericTensorAccessorW create_random_filled_accessor_w(TensorShape const &shape, Allocator &allocator, bool cpu_fill) { GenericTensorAccessorW accessor = allocator.allocate_tensor(shape); - size_t volume = accessor.shape.num_elements(); + size_t volume = accessor.shape.num_elements().unwrap_nonnegative(); std::vector host_data(volume); std::random_device rd; std::mt19937 gen(rd()); @@ -31,7 +31,7 @@ GenericTensorAccessorW create_filled_accessor_w(TensorShape const &shape, float val, bool cpu_fill) { GenericTensorAccessorW accessor = allocator.allocate_tensor(shape); - size_t volume = accessor.shape.num_elements(); + size_t volume = accessor.shape.num_elements().unwrap_nonnegative(); std::vector host_data(volume, val); if (cpu_fill) { @@ -50,7 +50,7 @@ GenericTensorAccessorW create_iota_filled_accessor_w(TensorShape const &shape, Allocator &allocator, bool cpu_fill) { GenericTensorAccessorW accessor = allocator.allocate_tensor(shape); - size_t volume = accessor.shape.num_elements(); + size_t volume = accessor.shape.num_elements().unwrap_nonnegative(); std::vector host_data(volume); for (size_t i = 0; i < volume; i++) { @@ -72,8 +72,7 @@ GenericTensorAccessorW create_iota_filled_accessor_w(TensorShape const &shape, void fill_tensor_accessor_w(GenericTensorAccessorW accessor, float val, bool cpu_fill) { - LegionTensorDims dims = accessor.shape.dims; - size_t volume = accessor.shape.num_elements(); + size_t volume = accessor.shape.num_elements().unwrap_nonnegative(); std::vector host_data(volume, val); if (cpu_fill) { @@ -86,7 +85,8 @@ void fill_tensor_accessor_w(GenericTensorAccessorW accessor, } } -TensorShape make_float_tensor_shape_from_legion_dims(FFOrdered dims) { +TensorShape + make_float_tensor_shape_from_legion_dims(FFOrdered dims) { return TensorShape{ TensorDims{ dims, @@ -95,7 +95,8 @@ TensorShape make_float_tensor_shape_from_legion_dims(FFOrdered dims) { }; } -TensorShape make_double_tensor_shape_from_legion_dims(FFOrdered dims) { +TensorShape + make_double_tensor_shape_from_legion_dims(FFOrdered dims) { return TensorShape{ TensorDims{ dims, diff --git a/lib/kernels/test/src/test_utils.h b/lib/kernels/test/src/test_utils.h index 21d4923881..08f0f382fb 100644 --- a/lib/kernels/test/src/test_utils.h +++ b/lib/kernels/test/src/test_utils.h @@ -30,9 +30,11 @@ void fill_tensor_accessor_w(GenericTensorAccessorW accessor, float val, bool cpu_fill = false); -TensorShape make_float_tensor_shape_from_legion_dims(FFOrdered dims); +TensorShape + make_float_tensor_shape_from_legion_dims(FFOrdered dims); -TensorShape make_double_tensor_shape_from_legion_dims(FFOrdered dims); +TensorShape + make_double_tensor_shape_from_legion_dims(FFOrdered dims); template std::vector load_data_to_host_from_device(GenericTensorAccessorR accessor) { @@ -52,16 +54,6 @@ bool contains_non_zero(std::vector &data) { data.begin(), data.end(), [](T const &val) { return val == 0; }); } -template -std::vector repeat(std::size_t n, Func &&func) { - std::vector result; - // result.reserve(n); // Sometimes we don't have default constructor for T - for (std::size_t i = 0; i < n; ++i) { - result.push_back(func()); - } - return result; -} - // Specialize doctest's StringMaker for std::vector template <> struct doctest::StringMaker> { diff --git a/lib/local-execution/include/local-execution/device_specific_device_states.variant.toml b/lib/local-execution/include/local-execution/device_specific_device_states.variant.toml index 5f73bbbb8e..db476e771d 100644 --- a/lib/local-execution/include/local-execution/device_specific_device_states.variant.toml +++ b/lib/local-execution/include/local-execution/device_specific_device_states.variant.toml @@ -6,7 +6,7 @@ features = [ includes = [ "kernels/attention_kernels.h", - "kernels/batch_norm_kernels.h", + "kernels/batch_norm_per_device_state.dtg.h", "kernels/conv_2d_kernels.h", "kernels/dropout_kernels.h", "kernels/element_binary_kernels.h", @@ -84,7 +84,3 @@ key = "device_specific_softmax_per_device_state" [[values]] type = "::FlexFlow::DeviceSpecific<::FlexFlow::TopKPerDeviceState>" key = "device_specific_topk_per_device_state" - -[[values]] -type = "::FlexFlow::DeviceSpecific<::FlexFlow::TransposePerDeviceState>" -key = "device_specific_transpose_per_device_state" diff --git a/lib/local-execution/include/local-execution/legion_tensor_shape.h b/lib/local-execution/include/local-execution/legion_tensor_shape.h deleted file mode 100644 index 3786383865..0000000000 --- a/lib/local-execution/include/local-execution/legion_tensor_shape.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef _FLEXFLOW_RUNTIME_SRC_TENSOR_SHAPE_H -#define _FLEXFLOW_RUNTIME_SRC_TENSOR_SHAPE_H - -#include "kernels/legion_dim.h" -#include "op-attrs/datatype.h" -#include "op-attrs/ff_dim_t.h" -#include "op-attrs/tensor_shape.dtg.h" -#include "utils/stack_vector/stack_vector.h" -#include "utils/visitable.h" -#include - -namespace FlexFlow { - -// TODO FIXME @lockshaw remove inheritance from legion tensor dims -struct LegionTensorShape : public use_visitable_cmp, - public LegionTensorDims { - LegionTensorShape() = delete; - LegionTensorShape(std::vector const &dims, DataType data_type); - LegionTensorShape(TensorShape const &); - - template - LegionTensorShape(stack_vector const &dims, - DataType data_type) - : LegionTensorDims(dims.start(), dims.end()), data_type(data_type) {} - - operator TensorShape() const; - -public: - DataType data_type; -}; - -ff_dim_t to_ff(legion_dim_t, size_t num_dims); -legion_dim_t legion_dim_from_ff_dim(ff_dim_t, size_t num_dims); - -ff_dim_t to_ff(legion_dim_t, TensorShape const &); -legion_dim_t legion_dim_from_ff_dim(ff_dim_t, TensorShape const &); - -} // namespace FlexFlow - -#endif diff --git a/lib/local-execution/include/local-execution/per_device_op_state.variant.toml b/lib/local-execution/include/local-execution/per_device_op_state.variant.toml index f99ff10bb9..0171e3e497 100644 --- a/lib/local-execution/include/local-execution/per_device_op_state.variant.toml +++ b/lib/local-execution/include/local-execution/per_device_op_state.variant.toml @@ -19,7 +19,6 @@ includes = [ "kernels/reshape_kernels.h", "kernels/softmax_kernels.h", "kernels/topk_kernels.h", - "kernels/transpose_kernels.h", ] [[values]] @@ -81,7 +80,3 @@ key = "softmax_per_device_state" [[values]] type = "::FlexFlow::TopKPerDeviceState" key = "topk_per_device_state" - -[[values]] -type = "::FlexFlow::TransposePerDeviceState" -key = "transpose_per_device_state" diff --git a/lib/local-execution/include/local-execution/task_id_t.enum.toml b/lib/local-execution/include/local-execution/task_id_t.enum.toml index 9cbe64c268..b0c82b5d26 100644 --- a/lib/local-execution/include/local-execution/task_id_t.enum.toml +++ b/lib/local-execution/include/local-execution/task_id_t.enum.toml @@ -205,9 +205,6 @@ name = "TOPK_FWD_TASK_ID" [[values]] name = "TOPK_BWD_TASK_ID" -[[values]] -name = "TRANSPOSE_INIT_TASK_ID" - [[values]] name = "TRANSPOSE_FWD_TASK_ID" diff --git a/lib/local-execution/src/legion_tensor_shape.cc b/lib/local-execution/src/legion_tensor_shape.cc deleted file mode 100644 index b227accc2e..0000000000 --- a/lib/local-execution/src/legion_tensor_shape.cc +++ /dev/null @@ -1,15 +0,0 @@ -#include "local-execution/legion_tensor_shape.h" -#include "kernels/legion_dim.h" -#include "op-attrs/tensor_shape.h" - -namespace FlexFlow { - -legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, size_t num_dims) { - return legion_dim_t(num_dims - ff_dim.value.get_value() - 1); -} - -legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, TensorShape const &shape) { - return legion_dim_from_ff_dim(ff_dim, num_dims(shape)); -} - -} // namespace FlexFlow diff --git a/lib/local-execution/src/ops/attention.cc b/lib/local-execution/src/ops/attention.cc index eebef9039d..e652b666a8 100644 --- a/lib/local-execution/src/ops/attention.cc +++ b/lib/local-execution/src/ops/attention.cc @@ -85,10 +85,10 @@ static DeviceSpecificDeviceStates init_task_impl(TaskArgumentAccessor const &acc) { auto const &attrs = acc.get_argument(ATTRS); Allocator allocator = acc.get_allocator(); - size_t qProjSize = acc.get_argument(QPROJSIZE); - size_t kProjSize = acc.get_argument(KPROJSIZE); - size_t vProjSize = acc.get_argument(VPROJSIZE); - size_t oProjSize = acc.get_argument(OPROJSIZE); + nonnegative_int qProjSize = acc.get_argument(QPROJSIZE); + nonnegative_int kProjSize = acc.get_argument(KPROJSIZE); + nonnegative_int vProjSize = acc.get_argument(VPROJSIZE); + nonnegative_int oProjSize = acc.get_argument(OPROJSIZE); PerDeviceFFHandle handle = acc.get_argument(HANDLE); ParallelTensorShape query_parallel_tensor_shape = @@ -108,29 +108,30 @@ static DeviceSpecificDeviceStates key_parallel_tensor_shape, value_parallel_tensor_shape)); - int kvSeqLength = get_kvSeqLength(parsed); - int qSize = get_qSize(parsed); - int kSize = get_kSize(parsed); - int vSize = get_vSize(parsed); - - int qoSeqLength = get_qoSeqLength(parsed); - int num_samples = get_num_samples(parsed); - int num_heads = attrs.num_heads; - - MHAPerDeviceState per_device_state = init_kernel(handle, - allocator, - num_samples, - num_heads, - qSize, - kSize, - vSize, - qProjSize, - kProjSize, - vProjSize, - oProjSize, - qoSeqLength, - kvSeqLength, - attrs.add_bias_kv); + nonnegative_int kvSeqLength = get_kvSeqLength(parsed); + nonnegative_int qSize = get_qSize(parsed); + nonnegative_int kSize = get_kSize(parsed); + nonnegative_int vSize = get_vSize(parsed); + + nonnegative_int qoSeqLength = get_qoSeqLength(parsed); + nonnegative_int num_samples = get_num_samples(parsed); + nonnegative_int num_heads = attrs.num_heads; + + MHAPerDeviceState per_device_state = + init_kernel(handle, + allocator, + num_samples.unwrap_nonnegative(), + num_heads.unwrap_nonnegative(), + qSize.unwrap_nonnegative(), + kSize.unwrap_nonnegative(), + vSize.unwrap_nonnegative(), + qProjSize.unwrap_nonnegative(), + kProjSize.unwrap_nonnegative(), + vProjSize.unwrap_nonnegative(), + oProjSize.unwrap_nonnegative(), + qoSeqLength.unwrap_nonnegative(), + kvSeqLength.unwrap_nonnegative(), + attrs.add_bias_kv); return DeviceSpecificDeviceStates{ DeviceSpecific::create(per_device_state)}; } diff --git a/lib/local-execution/src/ops/batch_matmul.cc b/lib/local-execution/src/ops/batch_matmul.cc index 1eae409ae2..ad331156b5 100644 --- a/lib/local-execution/src/ops/batch_matmul.cc +++ b/lib/local-execution/src/ops/batch_matmul.cc @@ -18,6 +18,8 @@ #include "local-execution/op_task_signature.h" #include "op-attrs/get_output_shapes.h" #include "op-attrs/ops/batch_matmul.h" +#include "utils/containers/transform.h" +#include "utils/nonnegative_int/nonnegative_range.h" namespace FlexFlow { @@ -65,24 +67,30 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { FFIterationConfig iter_config = acc.get_argument(ITERATION_CONFIG); - int m = b_input.shape[legion_dim_t(0)]; - assert(m == output.shape[legion_dim_t(0)]); - int n = a_input.shape[legion_dim_t(1)]; - assert(n == output.shape[legion_dim_t(1)]); - int k = a_input.shape[legion_dim_t(0)]; - assert(k == b_input.shape[legion_dim_t(1)]); + nonnegative_int m = b_input.shape.at(legion_dim_t{0_n}); + assert(m == output.shape.at(legion_dim_t{0_n})); + nonnegative_int n = a_input.shape.at(legion_dim_t{1_n}); + assert(n == output.shape.at(legion_dim_t{1_n})); + nonnegative_int k = a_input.shape.at(legion_dim_t{0_n}); + assert(k == b_input.shape.at(legion_dim_t{1_n})); assert(a_input.shape.get_volume() == b_input.shape.get_volume()); assert(a_input.shape.get_volume() == output.shape.get_volume()); - int batch = 1; - for (int i = 2; i < a_input.shape.get_dim(); i++) { - int dim_size = a_input.shape[legion_dim_t(i)]; - assert(dim_size == b_input.shape[legion_dim_t(i)]); - assert(dim_size == output.shape[legion_dim_t(i)]); + nonnegative_int batch = 1_n; + for (nonnegative_int i : nonnegative_range(2_n, a_input.shape.get_dim())) { + nonnegative_int dim_size = a_input.shape.at(legion_dim_t{i}); + assert(dim_size == b_input.shape.at(legion_dim_t{i})); + assert(dim_size == output.shape.at(legion_dim_t{i})); batch *= dim_size; } + auto get_raw_seq_len = [](std::optional seq_len) -> int { + return transform(seq_len, + [](nonnegative_int x) { return x.unwrap_nonnegative(); }) + .value_or(-1); + }; + return profile(forward_kernel, profiling, "[BatchMatmul] forward_time = {:.2lf}ms\n", @@ -90,12 +98,12 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { output.get_float_ptr(), a_input.get_float_ptr(), b_input.get_float_ptr(), - m, - n, - k, - batch, - attrs.a_seq_length_dim, - attrs.b_seq_length_dim, + m.unwrap_nonnegative(), + n.unwrap_nonnegative(), + k.unwrap_nonnegative(), + batch.unwrap_nonnegative(), + get_raw_seq_len(attrs.a_seq_length_dim), + get_raw_seq_len(attrs.b_seq_length_dim), iter_config.seq_length); } @@ -120,19 +128,20 @@ static std::optional assert(b_input.shape == b_input_grad.shape); // check dins - int m = b_input.shape[legion_dim_t(0)]; - assert(m == output.shape[legion_dim_t(0)]); - int n = a_input.shape[legion_dim_t(1)]; - assert(n == output.shape[legion_dim_t(1)]); - int k = a_input.shape[legion_dim_t(0)]; - assert(k == b_input.shape[legion_dim_t(1)]); + nonnegative_int m = b_input.shape.at(legion_dim_t{0_n}); + assert(m == output.shape.at(legion_dim_t{0_n})); + nonnegative_int n = a_input.shape.at(legion_dim_t{1_n}); + assert(n == output.shape.at(legion_dim_t{1_n})); + nonnegative_int k = a_input.shape.at(legion_dim_t{0_n}); + assert(k == b_input.shape.at(legion_dim_t{1_n})); assert(a_input.shape.get_volume() == b_input.shape.get_volume()); assert(a_input.shape.get_volume() == output.shape.get_volume()); - int batch = 1; - for (int i = 2; i < a_input.shape.dims.num_dims(); i++) { - int dim_size = a_input.shape[legion_dim_t(i)]; - assert(dim_size == b_input.shape[legion_dim_t(i)]); - assert(dim_size == output.shape[legion_dim_t(i)]); + + nonnegative_int batch = 1_n; + for (nonnegative_int i : nonnegative_range(2_n, a_input.shape.get_dim())) { + nonnegative_int dim_size = a_input.shape.at(legion_dim_t{i}); + assert(dim_size == b_input.shape.at(legion_dim_t{i})); + assert(dim_size == output.shape.at(legion_dim_t{i})); batch *= dim_size; } @@ -146,10 +155,10 @@ static std::optional a_input_grad.get_float_ptr(), b_input.get_float_ptr(), b_input_grad.get_float_ptr(), - m, - n, - k, - batch); + m.unwrap_nonnegative(), + n.unwrap_nonnegative(), + k.unwrap_nonnegative(), + batch.unwrap_nonnegative()); } TaskImplFunction get_batch_matmul_fwd_task_impl() { diff --git a/lib/local-execution/src/ops/batch_matmul.h b/lib/local-execution/src/ops/batch_matmul.h index a7e29b1931..23389d5083 100644 --- a/lib/local-execution/src/ops/batch_matmul.h +++ b/lib/local-execution/src/ops/batch_matmul.h @@ -4,7 +4,7 @@ #include "local-execution/op_task_invocation.h" #include "local-execution/op_task_signature.h" #include "local-execution/sim_environment.h" -#include "op-attrs/ops/batch_matmul.dtg.h" +#include "op-attrs/ops/batch_matmul_attrs.dtg.h" namespace FlexFlow { diff --git a/lib/local-execution/src/ops/batch_norm.cc b/lib/local-execution/src/ops/batch_norm.cc index 851566fc02..1df6da8d8e 100644 --- a/lib/local-execution/src/ops/batch_norm.cc +++ b/lib/local-execution/src/ops/batch_norm.cc @@ -75,21 +75,22 @@ static DeviceSpecificDeviceStates auto output = acc.get_tensor(OUTPUT); auto const &attrs = acc.get_argument(ATTRS); - int output_w = output.shape[legion_dim_t(0)]; - int output_h = output.shape[legion_dim_t(1)]; - int output_c = output.shape[legion_dim_t(2)]; - int output_n = output.shape[legion_dim_t(3)]; + nonnegative_int output_w = output.shape.at(legion_dim_t{0_n}); + nonnegative_int output_h = output.shape.at(legion_dim_t{1_n}); + nonnegative_int output_c = output.shape.at(legion_dim_t{2_n}); + nonnegative_int output_n = output.shape.at(legion_dim_t{3_n}); float *runningMean; - BatchNormPerDeviceState per_device_state = init_kernel(handle, - allocator, - runningMean, - output_n, - output_c, - output_h, - output_w, - attrs.relu); + BatchNormPerDeviceState per_device_state = + init_kernel(handle, + allocator, + runningMean, + output_n.unwrap_nonnegative(), + output_c.unwrap_nonnegative(), + output_h.unwrap_nonnegative(), + output_w.unwrap_nonnegative(), + attrs.relu); return DeviceSpecificDeviceStates{ DeviceSpecific::create(per_device_state)}; @@ -140,7 +141,7 @@ static std::optional scale.get_float_ptr(), scale_grad.get_float_ptr(), bias_grad.get_float_ptr(), - output.shape.get_volume()); + output.shape.get_volume().unwrap_nonnegative()); } TaskImplFunction get_batch_norm_init_task_impl() { diff --git a/lib/local-execution/src/ops/conv_2d.cc b/lib/local-execution/src/ops/conv_2d.cc index d5c6e7f851..ac59143f00 100644 --- a/lib/local-execution/src/ops/conv_2d.cc +++ b/lib/local-execution/src/ops/conv_2d.cc @@ -62,19 +62,19 @@ static DeviceSpecificDeviceStates auto filter_grad = acc.get_tensor_grad(FILTER); Conv2DPerDeviceState per_device_state = - init_kernel(handle, - attrs.activation, - attrs.kernel_h, - attrs.kernel_w, - attrs.groups, - attrs.padding_h, - attrs.padding_w, - attrs.stride_h, - attrs.stride_w, - input, - output, - filter.get_float_ptr(), - filter_grad.get_float_ptr()); + init_kernel(/*handle=*/handle, + /*activation=*/attrs.activation, + /*kernel_h=*/attrs.kernel_h.unwrap_nonnegative(), + /*kernel_w=*/attrs.kernel_w.unwrap_nonnegative(), + /*groups=*/attrs.groups.unwrap_nonnegative(), + /*padding_h=*/attrs.padding_h.unwrap_nonnegative(), + /*padding_w=*/attrs.padding_w.unwrap_nonnegative(), + /*stride_h=*/attrs.stride_h.unwrap_nonnegative(), + /*stride_w=*/attrs.stride_w.unwrap_nonnegative(), + /*input=*/input, + /*output=*/output, + /*filter_ptr=*/filter.get_float_ptr(), + /*filter_grad_ptr=*/filter_grad.get_float_ptr()); return DeviceSpecificDeviceStates{ DeviceSpecific::create(per_device_state)}; } diff --git a/lib/local-execution/src/ops/gather.cc b/lib/local-execution/src/ops/gather.cc index a015c64f4d..a43c0f757f 100644 --- a/lib/local-execution/src/ops/gather.cc +++ b/lib/local-execution/src/ops/gather.cc @@ -15,8 +15,8 @@ #include "gather.h" #include "kernels/gather_kernels.h" -#include "local-execution/legion_tensor_shape.h" #include "op-attrs/get_output_shapes.h" +#include "utils/nonnegative_int/nonnegative_range.h" #include namespace FlexFlow { @@ -72,10 +72,11 @@ static DeviceSpecificDeviceStates assert(input.shape.get_dim() == index.shape.get_dim()); assert(output.shape.get_dim() == index.shape.get_dim()); - for (int i = 0; i < input.shape.get_dim(); i++) { - assert(index.shape[legion_dim_t(i)] == output.shape[legion_dim_t(i)]); + for (nonnegative_int i : nonnegative_range(input.shape.get_dim())) { + assert(index.shape.at(legion_dim_t{i}) == output.shape.at(legion_dim_t{i})); if (i != legion_dim.value) { - assert(input.shape[legion_dim_t(i)] == index.shape[legion_dim_t(i)]); + assert(input.shape.at(legion_dim_t{i}) == + index.shape.at(legion_dim_t{i})); } } diff --git a/lib/local-execution/src/ops/layer_norm.cc b/lib/local-execution/src/ops/layer_norm.cc index e99d27319c..c01475d4a4 100644 --- a/lib/local-execution/src/ops/layer_norm.cc +++ b/lib/local-execution/src/ops/layer_norm.cc @@ -15,12 +15,12 @@ #include "layer_norm.h" #include "kernels/layer_norm_kernels.h" -#include "local-execution/legion_tensor_shape.h" #include "op-attrs/get_output_shapes.h" #include "op-attrs/ops/layer_norm.h" #include "op-attrs/parallel_tensor_shape.h" #include "utils/exception.h" #include "utils/hash-utils.h" +#include "utils/nonnegative_int/nonnegative_range.h" #include namespace FlexFlow { @@ -119,27 +119,25 @@ static DeviceSpecificDeviceStates auto input = acc.get_tensor(INPUT); auto handle = acc.get_argument(HANDLE); - // question: how to get batch_size and effective_num_elements - int64_t effective_batch_size, effective_num_elements; - int M = 1; + nonnegative_int M = 1_n; for (int i = 0; i < attrs.axes.size(); i++) { - legion_dim_t legion_dim = legion_dim_from_ff_dim( - attrs.axes[i], get_tensor_shape(input.shape, input.data_type)); + legion_dim_t legion_dim = + legion_dim_from_ff_dim(attrs.axes[i], input.shape.num_dims()); M *= input.shape.at(legion_dim); } - int num_replicas = 1; - for (int i = 0; i < input.shape.num_dims(); i++) { - num_replicas *= input.shape.at(legion_dim_t(i)); - effective_num_elements = M; - effective_batch_size = input.shape.get_volume() / M; + nonnegative_int num_replicas = 1_n; + for (nonnegative_int i : nonnegative_range(input.shape.num_dims())) { + num_replicas *= input.shape.at(legion_dim_t{i}); } + nonnegative_int effective_num_elements = M; + nonnegative_int effective_batch_size = input.shape.get_volume() / M; LayerNormPerDeviceState per_device_state = init_kernel(handle, allocator, attrs.elementwise_affine, - effective_batch_size, - effective_num_elements, + effective_batch_size.unwrap_nonnegative(), + effective_num_elements.unwrap_nonnegative(), attrs.eps); return DeviceSpecificDeviceStates{ DeviceSpecific::create(per_device_state)}; diff --git a/lib/local-execution/src/ops/linear.cc b/lib/local-execution/src/ops/linear.cc index 3e0b4672ab..e10f1a8e9c 100644 --- a/lib/local-execution/src/ops/linear.cc +++ b/lib/local-execution/src/ops/linear.cc @@ -66,21 +66,22 @@ static DeviceSpecificDeviceStates auto input = acc.get_tensor(INPUT); auto weight = acc.get_tensor(WEIGHT); auto output = acc.get_tensor(OUTPUT); - int out_dim = output.shape.at(ff_dim_t{nonnegative_int{0}}); - int batch_size = output.shape.at(ff_dim_t{nonnegative_int{1}}); + nonnegative_int out_dim = output.shape.at(ff_dim_t{0_n}); + nonnegative_int batch_size = output.shape.at(ff_dim_t{1_n}); float *one_ptr; - LinearPerDeviceState per_device_state = init_kernel(handle, - one_ptr, - attrs.activation, - attrs.regularizer, - attrs.use_bias, - input.data_type, - weight.data_type, - output.data_type, - batch_size, - attrs.out_channels); + LinearPerDeviceState per_device_state = + init_kernel(handle, + one_ptr, + attrs.activation, + attrs.regularizer, + attrs.use_bias, + input.data_type, + weight.data_type, + output.data_type, + batch_size.unwrap_nonnegative(), + attrs.out_channels.unwrap_nonnegative()); return DeviceSpecificDeviceStates{ DeviceSpecific::create(per_device_state)}; } @@ -96,9 +97,9 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { ProfilingSettings profiling = acc.get_argument(PROFILING); auto attrs = acc.get_argument(ATTRS); - int in_dim = input.shape.at(ff_dim_t{nonnegative_int{0}}) + 1; - int out_dim = output.shape.at(ff_dim_t{nonnegative_int{0}}) + 1; - int batch_size = output.shape.get_volume() / out_dim; + nonnegative_int in_dim = input.shape.at(ff_dim_t{0_n}); + nonnegative_int out_dim = output.shape.at(ff_dim_t{0_n}); + nonnegative_int batch_size = output.shape.get_volume() / out_dim; float const *bias_ptr = NULL; if (attrs.use_bias) { @@ -113,9 +114,9 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { output.get_float_ptr(), weight.get_float_ptr(), bias_ptr, - in_dim, - out_dim, - batch_size); + in_dim.unwrap_nonnegative(), + out_dim.unwrap_nonnegative(), + batch_size.unwrap_nonnegative()); } ; @@ -140,9 +141,9 @@ static std::optional bias_ptr = bias.get_float_ptr(); } - int in_dim = input.shape.at(ff_dim_t{nonnegative_int{0}}) + 1; - int out_dim = output.shape.at(ff_dim_t{nonnegative_int{0}}) + 1; - int batch_size = output.shape.get_volume() / out_dim; + nonnegative_int in_dim = input.shape.at(ff_dim_t{0_n}); + nonnegative_int out_dim = output.shape.at(ff_dim_t{0_n}); + nonnegative_int batch_size = output.shape.get_volume() / out_dim; return profile(backward_kernel, profiling, @@ -155,9 +156,9 @@ static std::optional (void *)weight.get_float_ptr(), (void *)weight_grad.get_float_ptr(), (void *)bias_ptr, - in_dim, - out_dim, - batch_size); + in_dim.unwrap_nonnegative(), + out_dim.unwrap_nonnegative(), + batch_size.unwrap_nonnegative()); } TaskImplFunction get_linear_init_task_impl() { diff --git a/lib/local-execution/src/ops/pool_2d.cc b/lib/local-execution/src/ops/pool_2d.cc index 3ab33a2ad6..897b545569 100644 --- a/lib/local-execution/src/ops/pool_2d.cc +++ b/lib/local-execution/src/ops/pool_2d.cc @@ -22,6 +22,20 @@ OpTaskInvocation init(Pool2DAttrs const &attrs) { return {task_id_t::POOL2D_INIT_TASK_ID, binding}; } +static nonnegative_int calculate_padding(nonnegative_int output_size, + nonnegative_int stride, + nonnegative_int kernel_size, + nonnegative_int input_size) { + int o = output_size.unwrap_nonnegative(); + int s = stride.unwrap_nonnegative(); + int k = kernel_size.unwrap_nonnegative(); + int i = kernel_size.unwrap_nonnegative(); + + return nonnegative_int{ + ((o - 1) * s + k - i + 1) / 2, + }; +} + static DeviceSpecificDeviceStates init_task_impl(TaskArgumentAccessor const &acc) { auto const &attrs = acc.get_argument(ATTRS); @@ -30,56 +44,33 @@ static DeviceSpecificDeviceStates auto input = acc.get_tensor(INPUT); auto output = acc.get_tensor(OUTPUT); - int input_w = input.shape.at(ff_dim_t{nonnegative_int{0}}) + 1; - int input_h = input.shape.at(ff_dim_t{nonnegative_int{1}}) + 1; - int input_c = input.shape.at(ff_dim_t{nonnegative_int{2}}) + 1; - int input_n = input.shape.at(ff_dim_t{nonnegative_int{3}}) + 1; - int output_w = output.shape.at(ff_dim_t{nonnegative_int{0}}) + 1; - int output_h = output.shape.at(ff_dim_t{nonnegative_int{1}}) + 1; - int output_c = output.shape.at(ff_dim_t{nonnegative_int{2}}) + 1; - int output_n = output.shape.at(ff_dim_t{nonnegative_int{3}}) + 1; - - printf("init pool (input): n(%d) c(%d) h(%d) " - "w(%d)\n", - input_n, - input_c, - input_h, - input_w); - printf("init pool (output): n(%d) c(%d) h(%d) w(%d)\n", - output_n, - output_c, - output_h, - output_w); - - int pad_h = - ((output_h - 1) * attrs.stride_h + attrs.kernel_h - input_h + 1) / 2; - int pad_w = - ((output_w - 1) * attrs.stride_w + attrs.kernel_w - input_w + 1) / 2; - if (pad_h != attrs.padding_h) { - printf("Warning: changing pool_padding_h to satisfy output_h size\n"); - } - - if (pad_w != attrs.padding_w) { - printf("Warning: changing pool_padding_w to satisfy output_w size\n"); - } - - Pool2DPerDeviceState per_device_state = init_kernel(handle, - attrs.activation, - input_w, - input_h, - input_c, - input_n, - output_w, - output_h, - output_c, - output_n, - pad_h, - pad_w, - attrs.kernel_h, - attrs.kernel_w, - attrs.stride_h, - attrs.stride_w, - attrs.pool_type); + nonnegative_int input_w = input.shape.at(ff_dim_t{0_n}); + nonnegative_int input_h = input.shape.at(ff_dim_t{1_n}); + nonnegative_int input_c = input.shape.at(ff_dim_t{2_n}); + nonnegative_int input_n = input.shape.at(ff_dim_t{3_n}); + nonnegative_int output_w = output.shape.at(ff_dim_t{0_n}); + nonnegative_int output_h = output.shape.at(ff_dim_t{1_n}); + nonnegative_int output_c = output.shape.at(ff_dim_t{2_n}); + nonnegative_int output_n = output.shape.at(ff_dim_t{3_n}); + + Pool2DPerDeviceState per_device_state = + init_kernel(handle, + attrs.activation, + input_w.unwrap_nonnegative(), + input_h.unwrap_nonnegative(), + input_c.unwrap_nonnegative(), + input_n.unwrap_nonnegative(), + output_w.unwrap_nonnegative(), + output_h.unwrap_nonnegative(), + output_c.unwrap_nonnegative(), + output_n.unwrap_nonnegative(), + attrs.padding_h.unwrap_nonnegative(), + attrs.padding_w.unwrap_nonnegative(), + attrs.kernel_h.unwrap_nonnegative(), + attrs.kernel_w.unwrap_nonnegative(), + attrs.stride_h.unwrap_nonnegative(), + attrs.stride_w.unwrap_nonnegative(), + attrs.pool_type); return DeviceSpecificDeviceStates{ DeviceSpecific::create(per_device_state)}; diff --git a/lib/local-execution/src/ops/reduce.cc b/lib/local-execution/src/ops/reduce.cc index a043d9f847..3f92d7fd77 100644 --- a/lib/local-execution/src/ops/reduce.cc +++ b/lib/local-execution/src/ops/reduce.cc @@ -41,9 +41,14 @@ static DeviceSpecificDeviceStates OperatorType op_type = attrs.op_type; - size_t reduction_size = input.shape.get_volume() / output.shape.get_volume(); + nonnegative_int reduction_size = + input.shape.get_volume() / output.shape.get_volume(); ReducePerDeviceState per_device_state = - init_kernel(handle, op_type, reduction_size, input.shape, output.shape); + init_kernel(handle, + op_type, + reduction_size.unwrap_nonnegative(), + input.shape, + output.shape); return DeviceSpecificDeviceStates{ DeviceSpecific::create(per_device_state)}; } diff --git a/lib/local-execution/src/ops/reduction.cc b/lib/local-execution/src/ops/reduction.cc index a58d79a4f8..0892bcde82 100644 --- a/lib/local-execution/src/ops/reduction.cc +++ b/lib/local-execution/src/ops/reduction.cc @@ -50,14 +50,14 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { auto output = acc.get_tensor(OUTPUT); auto attrs = acc.get_argument(ATTRS); - size_t num_replicas = attrs.reduction_degree; + nonnegative_int num_replicas = attrs.reduction_degree; return profile(forward_kernel, profiling_settings, "[Reduction] forward_time = {:.2lf}ms\n", input, output, - num_replicas); + num_replicas.unwrap_nonnegative()); } static std::optional diff --git a/lib/local-execution/src/ops/replicate.cc b/lib/local-execution/src/ops/replicate.cc index 135475a711..d7b06d6bfe 100644 --- a/lib/local-execution/src/ops/replicate.cc +++ b/lib/local-execution/src/ops/replicate.cc @@ -62,14 +62,14 @@ static std::optional auto input_grad = acc.get_tensor_grad(INPUT); auto output_grad = acc.get_tensor_grad(OUTPUT); - auto const &attrs = acc.get_argument(ATTRS); + auto attrs = acc.get_argument(ATTRS); return profile(backward_kernel, profiling, "[replicate] backward_time = {:.2lf}ms\n", input_grad, output_grad, - attrs.replicate_degree); + attrs.replicate_degree.unwrap_nonnegative()); } TaskImplFunction get_replicate_fwd_task_impl() { diff --git a/lib/local-execution/src/ops/reverse.cc b/lib/local-execution/src/ops/reverse.cc index 8ac4c045c7..94dfc90f7a 100644 --- a/lib/local-execution/src/ops/reverse.cc +++ b/lib/local-execution/src/ops/reverse.cc @@ -17,6 +17,7 @@ #include "kernels/accessor.h" #include "kernels/reverse_kernels.h" #include "op-attrs/get_output_shapes.h" +#include "utils/nonnegative_int/nonnegative_range.h" namespace FlexFlow { @@ -48,16 +49,18 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { auto output = acc.get_tensor(OUTPUT); auto attrs = acc.get_argument(ATTRS); - int output_size = output.shape.get_volume(); + nonnegative_int output_size = output.shape.get_volume(); auto axis = attrs.axis; - coord_t in_blk_size = 1, reverse_dim_size = 1, num_out_blks = 1; - for (int i = 0; i < output.shape.get_dim(); i++) { + nonnegative_int in_blk_size = 1_n; + nonnegative_int reverse_dim_size = 1_n; + nonnegative_int num_out_blks = 1_n; + for (nonnegative_int i : nonnegative_range(output.shape.get_dim())) { if (i < axis.value) { - in_blk_size *= output.shape.at(ff_dim_t{nonnegative_int{i}}); + in_blk_size *= output.shape.at(ff_dim_t{i}); } else if (i == axis.value) { - reverse_dim_size = output.shape.at(ff_dim_t{nonnegative_int{i}}); + reverse_dim_size = output.shape.at(ff_dim_t{i}); } else { - num_out_blks *= output.shape.at(ff_dim_t{nonnegative_int{i}}); + num_out_blks *= output.shape.at(ff_dim_t{i}); } } @@ -66,10 +69,10 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { "[reverse] forward_time = {:.2lf}ms\n", input.get_float_ptr(), output.get_float_ptr(), - num_out_blks, - reverse_dim_size, - in_blk_size, - output_size); + num_out_blks.unwrap_nonnegative(), + reverse_dim_size.unwrap_nonnegative(), + in_blk_size.unwrap_nonnegative(), + output_size.unwrap_nonnegative()); } static std::optional @@ -79,15 +82,18 @@ static std::optional auto output_grad = acc.get_tensor_grad(OUTPUT); auto attrs = acc.get_argument(ATTRS); - int axis = input_grad.shape.get_dim() - attrs.axis.value.get_value() - 1; - coord_t in_blk_size = 1, reverse_dim_size = 1, num_out_blks = 1; - for (int i = 0; i < input_grad.shape.get_dim(); i++) { + int axis = input_grad.shape.num_dims().unwrap_nonnegative() - + attrs.axis.value.unwrap_nonnegative() - 1; + nonnegative_int in_blk_size = 1_n; + nonnegative_int reverse_dim_size = 1_n; + nonnegative_int num_out_blks = 1_n; + for (nonnegative_int i : nonnegative_range(input_grad.shape.get_dim())) { if (i < axis) { - in_blk_size *= input_grad.shape.at(ff_dim_t{nonnegative_int{i}}); + in_blk_size *= input_grad.shape.at(ff_dim_t{i}); } else if (i == axis) { - reverse_dim_size = input_grad.shape.at(ff_dim_t{nonnegative_int{i}}); + reverse_dim_size = input_grad.shape.at(ff_dim_t{i}); } else { - num_out_blks *= input_grad.shape.at(ff_dim_t{nonnegative_int{i}}); + num_out_blks *= input_grad.shape.at(ff_dim_t{i}); } } @@ -96,10 +102,10 @@ static std::optional "[reverse] backward_time = {:.2lf}ms\n", output_grad.get_float_ptr(), input_grad.get_float_ptr(), - num_out_blks, - reverse_dim_size, - in_blk_size, - input_grad.shape.get_volume()); + num_out_blks.unwrap_nonnegative(), + reverse_dim_size.unwrap_nonnegative(), + in_blk_size.unwrap_nonnegative(), + input_grad.shape.get_volume().unwrap_nonnegative()); } TaskImplFunction get_reverse_fwd_task_impl() { diff --git a/lib/local-execution/src/ops/softmax.cc b/lib/local-execution/src/ops/softmax.cc index 8d412c739b..ca5450f4f0 100644 --- a/lib/local-execution/src/ops/softmax.cc +++ b/lib/local-execution/src/ops/softmax.cc @@ -59,18 +59,18 @@ static DeviceSpecificDeviceStates auto output = acc.get_tensor(OUTPUT); auto const &attrs = acc.get_argument(ATTRS); - int output_w = output.shape.at(legion_dim_t(0)); - int output_h = output.shape.at(legion_dim_t(1)); - int output_c = output.shape.at(legion_dim_t(2)); - int output_n = output.shape.at(legion_dim_t(3)); + nonnegative_int output_w = output.shape.at(legion_dim_t{0_n}); + nonnegative_int output_h = output.shape.at(legion_dim_t{1_n}); + nonnegative_int output_c = output.shape.at(legion_dim_t{2_n}); + nonnegative_int output_n = output.shape.at(legion_dim_t{3_n}); SoftmaxPerDeviceState per_device_state = init_kernel(handle, - attrs.dim.value.get_value(), - output_n, - output_c, - output_h, - output_w); + attrs.dim.value.unwrap_nonnegative(), + output_n.unwrap_nonnegative(), + output_c.unwrap_nonnegative(), + output_h.unwrap_nonnegative(), + output_w.unwrap_nonnegative()); return DeviceSpecificDeviceStates{ DeviceSpecific::create(per_device_state)}; @@ -109,7 +109,7 @@ static std::optional "[SoftMax] backward_time = {:.2lf}ms\n", input_grad.get_float_ptr(), output_grad.get_float_ptr(), - output_grad.shape.get_volume()); + output_grad.shape.get_volume().unwrap_nonnegative()); } TaskImplFunction get_softmax_init_task_impl() { diff --git a/lib/local-execution/src/ops/split.cc b/lib/local-execution/src/ops/split.cc index c289bca205..f119ae235b 100644 --- a/lib/local-execution/src/ops/split.cc +++ b/lib/local-execution/src/ops/split.cc @@ -19,6 +19,7 @@ #include "op-attrs/get_output_shapes.h" #include "utils/exception.h" #include "utils/hash-utils.h" +#include "utils/nonnegative_int/nonnegative_range.h" namespace FlexFlow { @@ -44,19 +45,18 @@ OpTaskInvocation backward(SplitAttrs const &attrs) { return {task_id_t::SPLIT_BWD_TASK_ID, binding}; } -void calc_block_size(coord_t &num_blocks, - coord_t &block_size, - ArrayShape const &array_shape, - ff_dim_t axis) { - num_blocks = 1; - block_size = 1; - for (int d = 0; d < array_shape.num_elements(); d++) { - if (d <= axis.value.get_value()) { - block_size *= array_shape.at(legion_dim_t(d)); +static std::pair + calc_block_size(ArrayShape const &array_shape, ff_dim_t axis) { + nonnegative_int num_blocks = 1_n; + nonnegative_int block_size = 1_n; + for (nonnegative_int d : nonnegative_range(array_shape.num_elements())) { + if (d <= axis.value) { + block_size *= array_shape.at(legion_dim_t{d}); } else { - num_blocks *= array_shape.at(legion_dim_t(d)); + num_blocks *= array_shape.at(legion_dim_t{d}); } } + return {num_blocks, block_size}; } static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { @@ -65,13 +65,12 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { auto output = acc.get_tensor(OUTPUT); auto attrs = acc.get_argument(ATTRS); - coord_t num_blocks, in_block_size, out_block_size[MAX_NUM_OUTPUTS]; - calc_block_size(num_blocks, in_block_size, input.shape, attrs.axis); + coord_t out_block_sizes[MAX_NUM_OUTPUTS]; + auto [num_blocks, in_block_size] = calc_block_size(input.shape, attrs.axis); for (int i = 0; i < attrs.splits.size(); i++) { - coord_t out_num_blocks; - calc_block_size( - out_num_blocks, out_block_size[i], output.shape, attrs.axis); + auto [_, out_block_size] = calc_block_size(output.shape, attrs.axis); + out_block_sizes[i] = out_block_size.unwrap_nonnegative(); } float *output_float_ptr = output.get_float_ptr(); return profile(forward_kernel, @@ -79,9 +78,9 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { "Split forward_time = {:.2lf}ms\n", &output_float_ptr, input.get_float_ptr(), - out_block_size, - in_block_size, - num_blocks, + out_block_sizes, + in_block_size.unwrap_nonnegative(), + num_blocks.unwrap_nonnegative(), attrs.splits.size()); } @@ -93,12 +92,14 @@ static std::optional auto output_grad = acc.get_tensor_grad(OUTPUT); auto attrs = acc.get_argument(ATTRS); - coord_t num_blocks, in_block_size, out_block_size[MAX_NUM_OUTPUTS]; - calc_block_size(num_blocks, in_block_size, input_grad.shape, attrs.axis); + coord_t out_block_sizes[MAX_NUM_OUTPUTS]; + auto [num_blocks, in_block_size] = + calc_block_size(input_grad.shape, attrs.axis); + for (int i = 0; i < attrs.splits.size(); i++) { coord_t out_num_blocks; - calc_block_size( - out_num_blocks, out_block_size[i], output_grad.shape, attrs.axis); + auto [_, out_block_size] = calc_block_size(output_grad.shape, attrs.axis); + out_block_sizes[i] = out_block_size.unwrap_nonnegative(); } float const *output_grad_ptr = output_grad.get_float_ptr(); return profile(backward_kernel, @@ -106,9 +107,9 @@ static std::optional "Split backward_time = {:.2lf}ms\n", input_grad.get_float_ptr(), &output_grad_ptr, - out_block_size, - in_block_size, - num_blocks, + out_block_sizes, + in_block_size.unwrap_nonnegative(), + num_blocks.unwrap_nonnegative(), attrs.splits.size()); } diff --git a/lib/local-execution/src/ops/topk.cc b/lib/local-execution/src/ops/topk.cc index 7f3519529a..e9d202a38f 100644 --- a/lib/local-execution/src/ops/topk.cc +++ b/lib/local-execution/src/ops/topk.cc @@ -75,8 +75,8 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { auto input = acc.get_tensor(INPUT); auto output = acc.get_tensor(OUTPUT); - int length = input.shape.at(legion_dim_t(0)) + 1; - size_t batch_size = input.shape.get_volume() / length; + nonnegative_int length = input.shape.at(legion_dim_t{0_n}); + nonnegative_int batch_size = input.shape.get_volume() / length; auto indices = acc.get_tensor(INDICES); return profile(forward_kernel, @@ -86,9 +86,9 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { input.get_float_ptr(), output.get_float_ptr(), indices.get_int32_ptr(), - batch_size, - length, - attrs.k, + batch_size.unwrap_nonnegative(), + length.unwrap_nonnegative(), + attrs.k.unwrap_nonnegative(), attrs.sorted); } @@ -104,8 +104,8 @@ static std::optional auto indices = acc.get_tensor(INDICES); - int length = input_grad.shape.at(legion_dim_t(0)) + 1; - size_t batch_size = input_grad.shape.get_volume() / length; + nonnegative_int length = input_grad.shape.at(legion_dim_t{0_n}); + nonnegative_int batch_size = input_grad.shape.get_volume() / length; return profile(backward_kernel, profiling, @@ -114,9 +114,9 @@ static std::optional output_grad.get_float_ptr(), indices.get_int32_ptr(), input_grad.get_float_ptr(), - batch_size, - length, - attrs.k); + batch_size.unwrap_nonnegative(), + length.unwrap_nonnegative(), + attrs.k.unwrap_nonnegative()); } TaskImplFunction get_topk_init_task_impl() { diff --git a/lib/local-execution/src/ops/transpose.cc b/lib/local-execution/src/ops/transpose.cc index 53cf1f20ed..5f183305ab 100644 --- a/lib/local-execution/src/ops/transpose.cc +++ b/lib/local-execution/src/ops/transpose.cc @@ -28,39 +28,11 @@ enum Slots { OUTPUT, // tensor ATTRS, PROFILING, - PER_DEVICE_STATE, }; -OpTaskInvocation init(TransposeAttrs const &attrs) { - OpTaskBinding binding; - binding.bind_arg(ATTRS, attrs); - return {task_id_t::TRANSPOSE_INIT_TASK_ID, binding}; -} - -static DeviceSpecificDeviceStates - init_task_impl(TaskArgumentAccessor const &acc) { - auto const &attrs = acc.get_argument(ATTRS); - int size = int_from_size_t(attrs.perm.size()); - - std::vector perm = [&] { - std::vector result; - for (int i : range(size)) { - result.push_back(ff_dim_t{nonnegative_int{size - i - 1}}); - } - return result; - }(); - - TransposePerDeviceState per_device_state = init_kernel(size, perm); - - return DeviceSpecificDeviceStates{ - DeviceSpecific::create(per_device_state)}; -} - OpTaskInvocation forward(TransposeAttrs const &attrs) { OpTaskBinding binding; - binding.bind_arg(PER_DEVICE_STATE, - per_device_op_state()); binding.bind_arg(PROFILING, profiling_settings()); binding.bind(INPUT, input_tensor(0)); @@ -71,8 +43,7 @@ OpTaskInvocation forward(TransposeAttrs const &attrs) { static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { ProfilingSettings profiling = acc.get_argument(PROFILING); - auto per_device_state = - acc.get_argument(PER_DEVICE_STATE); + auto attrs = acc.get_argument(ATTRS); auto input = acc.get_tensor(INPUT); auto output = acc.get_tensor(OUTPUT); @@ -80,7 +51,7 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { return profile(forward_kernel, profiling, "[Transpose] Forward_time = {:.2lf} [ms]", - per_device_state, + attrs, input, output); } @@ -88,8 +59,7 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { static std::optional backward_task_impl(TaskArgumentAccessor const &acc) { ProfilingSettings profiling = acc.get_argument(PROFILING); - auto per_device_state = - acc.get_argument(PER_DEVICE_STATE); + auto attrs = acc.get_argument(ATTRS); auto input_grad = acc.get_tensor_grad(INPUT); auto output_grad = acc.get_tensor_grad(OUTPUT); @@ -97,7 +67,7 @@ static std::optional return profile(backward_kernel, profiling, "[Transpose] Backward_time = {:.2lf} [ms]", - per_device_state, + attrs, input_grad, output_grad); } @@ -108,42 +78,31 @@ OpTaskInvocation backward(TransposeAttrs const &attrs) { return {task_id_t::TRANSPOSE_BWD_TASK_ID, binding}; } -TaskImplFunction get_transpose_init_task_impl() { - return TaskImplFunction{InitTaskImplFunction{init_task_impl}}; -} TaskImplFunction get_transpose_fwd_task_impl() { return TaskImplFunction{FwdBwdTaskImplFunction{forward_task_impl}}; } + TaskImplFunction get_transpose_bwd_task_impl() { return TaskImplFunction{FwdBwdTaskImplFunction{backward_task_impl}}; } -OpTaskSignature get_transpose_init_signature() { - OpTaskSignature init(OpTaskType::INIT); - - init.add_arg_slot(ATTRS); - init.add_return_value(); - return init; -} OpTaskSignature get_transpose_fwd_signature() { OpTaskSignature fwd(OpTaskType::FWD); fwd.add_arg_slot(PROFILING); - fwd.add_unchecked_arg_slot(PER_DEVICE_STATE); fwd.add_input_slot(INPUT); fwd.add_output_slot(OUTPUT); return fwd; } + OpTaskSignature get_transpose_bwd_signature() { OpTaskSignature bwd = infer_bwd_signature(get_transpose_fwd_signature()); return bwd; } std::vector get_task_ids(TransposeAttrs const &) { - return {task_id_t::TRANSPOSE_INIT_TASK_ID, - task_id_t::TRANSPOSE_FWD_TASK_ID, - task_id_t::TRANSPOSE_BWD_TASK_ID}; + return {task_id_t::TRANSPOSE_FWD_TASK_ID, task_id_t::TRANSPOSE_BWD_TASK_ID}; } } // namespace FlexFlow diff --git a/lib/local-execution/src/ops/transpose.h b/lib/local-execution/src/ops/transpose.h index 0f3a2e80a0..f2ce014aa7 100644 --- a/lib/local-execution/src/ops/transpose.h +++ b/lib/local-execution/src/ops/transpose.h @@ -9,15 +9,12 @@ namespace FlexFlow { std::vector get_task_ids(TransposeAttrs const &); -TaskImplFunction get_transpose_init_task_impl(); TaskImplFunction get_transpose_fwd_task_impl(); TaskImplFunction get_transpose_bwd_task_impl(); -OpTaskSignature get_transpose_init_signature(); OpTaskSignature get_transpose_fwd_signature(); OpTaskSignature get_transpose_bwd_signature(); -OpTaskInvocation init(TransposeAttrs const &); OpTaskInvocation forward(TransposeAttrs const &); OpTaskInvocation backward(TransposeAttrs const &); diff --git a/lib/local-execution/src/task_signature_impl.cc b/lib/local-execution/src/task_signature_impl.cc index ca428aad25..60928d42d7 100644 --- a/lib/local-execution/src/task_signature_impl.cc +++ b/lib/local-execution/src/task_signature_impl.cc @@ -193,9 +193,6 @@ TaskSignatureAndImpl get_task_sig_impl(task_id_t const &task_id) { case task_id_t::TOPK_BWD_TASK_ID: return TaskSignatureAndImpl{get_topk_bwd_task_impl(), get_topk_bwd_signature()}; - case task_id_t::TRANSPOSE_INIT_TASK_ID: - return TaskSignatureAndImpl{get_transpose_init_task_impl(), - get_transpose_init_signature()}; case task_id_t::TRANSPOSE_FWD_TASK_ID: return TaskSignatureAndImpl{get_transpose_fwd_task_impl(), get_transpose_fwd_signature()}; @@ -296,7 +293,6 @@ OpTaskInvocation init(ComputationGraphOpAttrs const &op) { [](ReshapeAttrs const &attrs) { return init(attrs); }, [](SoftmaxAttrs const &attrs) { return init(attrs); }, [](TopKAttrs const &attrs) { return init(attrs); }, - [](TransposeAttrs const &attrs) { return init(attrs); }, [](auto const &attrs) -> OpTaskInvocation { throw mk_runtime_error(fmt::format("Unhandled attr type {}", attrs)); }, diff --git a/lib/local-execution/test/src/test_local_slots_backing.cc b/lib/local-execution/test/src/test_local_slots_backing.cc index 1ec441fbca..46827e3981 100644 --- a/lib/local-execution/test/src/test_local_slots_backing.cc +++ b/lib/local-execution/test/src/test_local_slots_backing.cc @@ -19,16 +19,17 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("LocalSlotsBacking -- Attention Op") { // allocate input memory Allocator allocator = create_local_cpu_memory_allocator(); - int embed_dim = 32; - int num_heads = 10; + nonnegative_int embed_dim = 32_n; + nonnegative_int num_heads = 10_n; - size_t batch_size = 40; - size_t seq_len = 48; - size_t feature_size = 36; + nonnegative_int batch_size = 40_n; + nonnegative_int seq_len = 48_n; + nonnegative_int feature_size = 36_n; DataType dtype = DataType::FLOAT; TensorShape input_tensor_shape = TensorShape{ - TensorDims{FFOrdered{batch_size, seq_len, feature_size}}, + TensorDims{ + FFOrdered{batch_size, seq_len, feature_size}}, DataType::FLOAT, }; TensorShape query_shape = input_tensor_shape; diff --git a/lib/local-execution/test/src/test_local_task_arg_accessor.cc b/lib/local-execution/test/src/test_local_task_arg_accessor.cc index f52fccb1ed..0fab0f6a60 100644 --- a/lib/local-execution/test/src/test_local_task_arg_accessor.cc +++ b/lib/local-execution/test/src/test_local_task_arg_accessor.cc @@ -9,16 +9,17 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("LocalTaskArgumentAccessor") { Allocator allocator = create_local_cpu_memory_allocator(); - int embed_dim = 32; - int num_heads = 10; + nonnegative_int embed_dim = 32_n; + nonnegative_int num_heads = 10_n; - size_t batch_size = 40; - size_t seq_len = 48; - size_t feature_size = 36; + nonnegative_int batch_size = 40_n; + nonnegative_int seq_len = 48_n; + nonnegative_int feature_size = 36_n; DataType dtype = DataType::FLOAT; TensorShape input_tensor_shape = TensorShape{ - TensorDims{FFOrdered{batch_size, seq_len, feature_size}}, + TensorDims{ + FFOrdered{batch_size, seq_len, feature_size}}, DataType::FLOAT, }; diff --git a/lib/local-execution/test/src/test_task_registry.cc b/lib/local-execution/test/src/test_task_registry.cc index e18b7ea2de..58d6d9be6c 100644 --- a/lib/local-execution/test/src/test_task_registry.cc +++ b/lib/local-execution/test/src/test_task_registry.cc @@ -14,8 +14,8 @@ TEST_SUITE(FF_TEST_SUITE) { TaskRegistry task_registry = empty_task_registry(); layer_guid_t layer_guid = layer_guid_t{Node{0}}; - int embed_dim = 32; - int num_heads = 10; + nonnegative_int embed_dim = 32_n; + nonnegative_int num_heads = 10_n; ComputationGraphOpAttrs attrs = ComputationGraphOpAttrs{MultiHeadAttentionAttrs{ /*embed_dim=*/embed_dim, @@ -76,7 +76,7 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK(correct_task_mapping == task_registry.task_mapping); } SUBCASE("different attrs, still same task fn mapping") { - int embed_dim = 100; + nonnegative_int embed_dim = 100_n; layer_guid_t layer_3 = layer_guid_t{Node{3}}; ComputationGraphOpAttrs other_attrs = ComputationGraphOpAttrs{MultiHeadAttentionAttrs{ @@ -98,7 +98,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("equality") { TaskRegistry other_task_registry = empty_task_registry(); SUBCASE("different attrs is still equal") { - int embed_dim = 100; + nonnegative_int embed_dim = 100_n; ComputationGraphOpAttrs other_attrs = ComputationGraphOpAttrs{MultiHeadAttentionAttrs{ /*embed_dim=*/embed_dim, diff --git a/lib/models/include/models/bert/bert_config.struct.toml b/lib/models/include/models/bert/bert_config.struct.toml index 398210cf48..cc2a8eb0a7 100644 --- a/lib/models/include/models/bert/bert_config.struct.toml +++ b/lib/models/include/models/bert/bert_config.struct.toml @@ -12,27 +12,28 @@ features = [ includes = [ "op-attrs/activation.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] name = "vocab_size" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "hidden_size" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "num_encoder_layers" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "num_heads" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "dim_feedforward" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "hidden_act" @@ -64,8 +65,8 @@ type = "float" [[fields]] name = "sequence_length" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "batch_size" -type = "size_t" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/models/include/models/candle_uno/candle_uno_config.struct.toml b/lib/models/include/models/candle_uno/candle_uno_config.struct.toml index 667a6531c3..e7d83efd07 100644 --- a/lib/models/include/models/candle_uno/candle_uno_config.struct.toml +++ b/lib/models/include/models/candle_uno/candle_uno_config.struct.toml @@ -14,6 +14,7 @@ includes = [ "", "", "", + "utils/nonnegative_int/nonnegative_int.h", ] src_includes = [ @@ -25,19 +26,19 @@ src_includes = [ [[fields]] name = "batch_size" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "dense_layers" -type = "std::vector" +type = "std::vector<::FlexFlow::nonnegative_int>" [[fields]] name = "dense_feature_layers" -type = "std::vector" +type = "std::vector<::FlexFlow::nonnegative_int>" [[fields]] name = "feature_shapes" -type = "std::map" +type = "std::map" [[fields]] name = "input_features" diff --git a/lib/models/include/models/inception_v3/inception_v3_config.struct.toml b/lib/models/include/models/inception_v3/inception_v3_config.struct.toml index a2a75c83bb..1290420e16 100644 --- a/lib/models/include/models/inception_v3/inception_v3_config.struct.toml +++ b/lib/models/include/models/inception_v3/inception_v3_config.struct.toml @@ -10,13 +10,17 @@ features = [ "fmt", ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + [[fields]] name = "num_classes" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "batch_size" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "aux_logits" diff --git a/lib/models/include/models/split_test/split_test.h b/lib/models/include/models/split_test/split_test.h index b03e45b2d2..dd7089c4f6 100644 --- a/lib/models/include/models/split_test/split_test.h +++ b/lib/models/include/models/split_test/split_test.h @@ -12,7 +12,7 @@ namespace FlexFlow { * @note This is a tiny model developed for testing the original Unity * implementation. It is not a "real" model and has never been trained. */ -ComputationGraph get_split_test_computation_graph(int batch_size); +ComputationGraph get_split_test_computation_graph(nonnegative_int batch_size); } // namespace FlexFlow diff --git a/lib/models/include/models/transformer/transformer_config.struct.toml b/lib/models/include/models/transformer/transformer_config.struct.toml index 23b0478dde..2a0b39feb9 100644 --- a/lib/models/include/models/transformer/transformer_config.struct.toml +++ b/lib/models/include/models/transformer/transformer_config.struct.toml @@ -1,6 +1,5 @@ namespace = "FlexFlow" name = "TransformerConfig" - features = [ "eq", "ord", @@ -10,33 +9,37 @@ features = [ "fmt", ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + [[fields]] name = "num_features" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "sequence_length" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "batch_size" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "dim_feedforward" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "num_heads" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "num_encoder_layers" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "num_decoder_layers" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "dropout" @@ -48,4 +51,4 @@ type = "float" [[fields]] name = "vocab_size" -type = "size_t" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/models/src/models/bert/bert.cc b/lib/models/src/models/bert/bert.cc index cf48f2399b..a5d63e8fdc 100644 --- a/lib/models/src/models/bert/bert.cc +++ b/lib/models/src/models/bert/bert.cc @@ -6,20 +6,22 @@ namespace FlexFlow { BertConfig get_default_bert_config() { - return BertConfig{/*vocab_size=*/30522, - /*hidden_size=*/768, - /*num_encoder_layers=*/12, - /*num_heads=*/12, - /*dim_feedforward=*/3072, - /*hidden_act=*/Activation::GELU, - /*hidden_dropout_prob=*/0.1, - /*attention_probs_dropout_prob=*/0.1, - /*initializer_range=*/0.02, - /*layer_norm_eps=*/1e-12, - /*position_embedding_type=*/"absolute", - /*classifier_dropout=*/0.1, - /*sequence_length=*/512, - /*batch_size=*/64}; + return BertConfig{ + /*vocab_size=*/30522_n, + /*hidden_size=*/768_n, + /*num_encoder_layers=*/12_n, + /*num_heads=*/12_n, + /*dim_feedforward=*/3072_n, + /*hidden_act=*/Activation::GELU, + /*hidden_dropout_prob=*/0.1, + /*attention_probs_dropout_prob=*/0.1, + /*initializer_range=*/0.02, + /*layer_norm_eps=*/1e-12, + /*position_embedding_type=*/"absolute", + /*classifier_dropout=*/0.1, + /*sequence_length=*/512_n, + /*batch_size=*/64_n, + }; } tensor_guid_t @@ -56,9 +58,10 @@ tensor_guid_t InitializerAttrs const &bias_initializer, InitializerAttrs const &projection_initializer) { assert(num_dims(cgb.get_shape(input)) == 3); - std::vector layer_norm_axis = {2}; // Apply layernorm across the last dim - int kdim = config.dim_feedforward / config.num_heads; - int vdim = config.dim_feedforward / config.num_heads; + std::vector layer_norm_axis = { + relative_ff_dim_t{-1}}; // Apply layernorm across the last dim + nonnegative_int kdim = config.dim_feedforward / config.num_heads; + nonnegative_int vdim = config.dim_feedforward / config.num_heads; tensor_guid_t self_attention = cgb.multihead_attention(input, input, @@ -127,7 +130,7 @@ ComputationGraph get_bert_computation_graph(BertConfig const &config) { InitializerAttrs bias_initializer = InitializerAttrs{ZeroInitializerAttrs{}}; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ config.batch_size, config.sequence_length, config.hidden_size}}, DataType::FLOAT, }; @@ -149,7 +152,7 @@ ComputationGraph get_bert_computation_graph(BertConfig const &config) { assert( (cgb.get_shape(out_prob) == TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ config.batch_size, config.sequence_length, config.vocab_size}}, DataType::FLOAT, })); diff --git a/lib/models/src/models/candle_uno/candle_uno.cc b/lib/models/src/models/candle_uno/candle_uno.cc index 4d52d515fb..60422359a5 100644 --- a/lib/models/src/models/candle_uno/candle_uno.cc +++ b/lib/models/src/models/candle_uno/candle_uno.cc @@ -1,32 +1,34 @@ #include "models/candle_uno/candle_uno.h" #include "pcg/initializers/glorot_normal_attrs.dtg.h" +#include "utils/containers/repeat_element.h" namespace FlexFlow { CandleUnoConfig get_default_candle_uno_config() { - CandleUnoConfig config{ - /*batch_size=*/64, - /*dense_layers=*/std::vector(4, 4192), - /*dense_feature_layers=*/std::vector(8, 4192), - /*feature_shapes=*/std::map{}, - /*input_features=*/std::map{}, + return CandleUnoConfig{ + /*batch_size=*/64_n, + /*dense_layers=*/repeat_element(/*num_times=*/4_n, /*element=*/4192_n), + /*dense_feature_layers=*/ + repeat_element(/*num_times=*/8_n, /*element=*/4192_n), + /*feature_shapes=*/ + { + {"dose", 1_n}, + {"cell.rnaseq", 942_n}, + {"drug.descriptors", 5270_n}, + {"drug.fingerprints", 2048_n}, + }, + /*input_features=*/ + { + {"dose1", "dose"}, + {"dose2", "dose"}, + {"cell.rnaseq", "cell.rnaseq"}, + {"drug1.descriptors", "drug.descriptors"}, + {"drug1.fingerprints", "drug.fingerprints"}, + {"drug2.descriptors", "drug.descriptors"}, + {"drug2.fingerprints", "drug.fingerprints"}, + }, /*dropout=*/0.1, /*residual=*/false}; - - config.feature_shapes["dose"] = 1; - config.feature_shapes["cell.rnaseq"] = 942; - config.feature_shapes["drug.descriptors"] = 5270; - config.feature_shapes["drug.fingerprints"] = 2048; - - config.input_features["dose1"] = "dose"; - config.input_features["dose2"] = "dose"; - config.input_features["cell.rnaseq"] = "cell.rnaseq"; - config.input_features["drug1.descriptors"] = "drug.descriptors"; - config.input_features["drug1.fingerprints"] = "drug.fingerprints"; - config.input_features["drug2.descriptors"] = "drug.descriptors"; - config.input_features["drug2.fingerprints"] = "drug.fingerprints"; - - return config; } tensor_guid_t create_candle_uno_feature_model( @@ -35,7 +37,7 @@ tensor_guid_t create_candle_uno_feature_model( tensor_guid_t const &input, InitializerAttrs const &kernel_initializer) { tensor_guid_t t = input; - for (int const dense_dim : config.dense_feature_layers) { + for (nonnegative_int dense_dim : config.dense_feature_layers) { t = cgb.dense(t, dense_dim, Activation::RELU, @@ -56,7 +58,7 @@ ComputationGraph InitializerAttrs{GlorotNormalAttrs{/*seed=*/0}}; auto create_input_tensor = - [&](FFOrdered const &dims) -> tensor_guid_t { + [&](FFOrdered const &dims) -> tensor_guid_t { TensorShape input_shape = TensorShape{ TensorDims{dims}, DataType::FLOAT, @@ -82,7 +84,7 @@ ComputationGraph for (auto const &input_feature : config.input_features) { std::string const &feature_name = input_feature.second; - size_t shape = config.feature_shapes.at(feature_name); + nonnegative_int shape = config.feature_shapes.at(feature_name); tensor_guid_t input = create_input_tensor({config.batch_size, shape}); all_inputs.push_back(input); @@ -94,8 +96,9 @@ ComputationGraph } } - tensor_guid_t output = cgb.concat(encoded_inputs, /*axis=*/1); - for (int const &dense_layer_dim : config.dense_layers) { + tensor_guid_t output = + cgb.concat(encoded_inputs, /*axis=*/relative_ff_dim_t{1}); + for (nonnegative_int dense_layer_dim : config.dense_layers) { tensor_guid_t residual_input = output; output = cgb.dense(output, dense_layer_dim, @@ -111,7 +114,7 @@ ComputationGraph } } output = cgb.dense(output, - /*outDim=*/1, + /*outDim=*/1_n, /*activation=*/std::nullopt, /*use_bias=*/false, /*data_type=*/DataType::FLOAT, diff --git a/lib/models/src/models/inception_v3/inception_v3.cc b/lib/models/src/models/inception_v3/inception_v3.cc index f540eae629..3a829f3754 100644 --- a/lib/models/src/models/inception_v3/inception_v3.cc +++ b/lib/models/src/models/inception_v3/inception_v3.cc @@ -15,14 +15,17 @@ struct CheckShape { ComputationGraphBuilder const &cgb; InceptionV3Config const &config; - void operator()(tensor_guid_t t, int c, int h, int w) const { + void operator()(tensor_guid_t t, + nonnegative_int c, + nonnegative_int h, + nonnegative_int w) const { TensorShape current_shape = cgb.get_shape(t); TensorShape expected_shape = TensorShape{ - TensorDims{FFOrdered{ - size_t_from_int(config.batch_size), - size_t_from_int(c), - size_t_from_int(h), - size_t_from_int(w), + TensorDims{FFOrdered{ + config.batch_size, + c, + h, + w, }}, DataType::FLOAT, }; @@ -35,12 +38,12 @@ struct CheckShape { } } - void operator()(tensor_guid_t t, int c) const { + void operator()(tensor_guid_t t, nonnegative_int c) const { TensorShape current_shape = cgb.get_shape(t); TensorShape expected_shape = TensorShape{ - TensorDims{FFOrdered{ - size_t_from_int(config.batch_size), - size_t_from_int(c), + TensorDims{FFOrdered{ + config.batch_size, + c, }}, DataType::FLOAT, }; @@ -56,11 +59,11 @@ struct CheckShape { InceptionV3Config get_default_inception_v3_training_config() { return InceptionV3Config{ - /*num_classes=*/1000, + /*num_classes=*/1000_n, // see section 8 of https://arxiv.org/abs/1512.00567 for the source of the // batch size - /*batch_size=*/32, + /*batch_size=*/32_n, // see section 4 of https://arxiv.org/abs/1512.00567 for a discussion of // auxiliary logits. they are used by default in training @@ -70,13 +73,13 @@ InceptionV3Config get_default_inception_v3_training_config() { static tensor_guid_t create_conv_block(ComputationGraphBuilder &cgb, tensor_guid_t const &input, - int filters, - int kernel_size_h, - int kernel_size_w, - int stride_h = 1, - int stride_w = 1, - int padding_h = 0, - int padding_w = 0, + nonnegative_int filters, + nonnegative_int kernel_size_h, + nonnegative_int kernel_size_w, + nonnegative_int stride_h = 1_n, + nonnegative_int stride_w = 1_n, + nonnegative_int padding_h = 0_n, + nonnegative_int padding_w = 0_n, bool use_bias = false) { tensor_guid_t conv = cgb.conv2d(input, /*outChannels=*/filters, @@ -87,7 +90,7 @@ static tensor_guid_t create_conv_block(ComputationGraphBuilder &cgb, /*paddingH=*/padding_h, /*paddingW=*/padding_w, /*activation=*/std::nullopt, - /*groups=*/1, + /*groups=*/1_n, /*use_bias=*/use_bias); return cgb.batch_norm(conv, /*affine=*/true, @@ -98,29 +101,29 @@ static tensor_guid_t create_conv_block(ComputationGraphBuilder &cgb, static tensor_guid_t create_inception_module_a(ComputationGraphBuilder &cgb, tensor_guid_t const &input, - int pool_features) { + nonnegative_int pool_features) { tensor_guid_t branch1x1 = create_conv_block(cgb, input, - /*filters=*/64, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); + /*filters=*/64_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); tensor_guid_t branch5x5 = [&] { tensor_guid_t t = input; t = create_conv_block(cgb, t, - /*filters=*/48, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); + /*filters=*/48_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); t = create_conv_block(cgb, t, - /*filters=*/64, - /*kernel_size_h=*/5, - /*kernel_size_w=*/5, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/2, - /*padding_w=*/2); + /*filters=*/64_n, + /*kernel_size_h=*/5_n, + /*kernel_size_w=*/5_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/2_n, + /*padding_w=*/2_n); return t; }(); @@ -128,208 +131,209 @@ static tensor_guid_t create_inception_module_a(ComputationGraphBuilder &cgb, tensor_guid_t t = input; t = create_conv_block(cgb, t, - /*filters=*/64, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); + /*filters=*/64_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); t = create_conv_block(cgb, t, - /*filters=*/96, - /*kernel_size_h=*/3, - /*kernel_size_w=*/3, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/1, - /*padding_w=*/1); + /*filters=*/96_n, + /*kernel_size_h=*/3_n, + /*kernel_size_w=*/3_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/1_n, + /*padding_w=*/1_n); t = create_conv_block(cgb, t, - /*filters=*/96, - /*kernel_size_h=*/3, - /*kernel_size_w=*/3, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/1, - /*padding_w=*/1); + /*filters=*/96_n, + /*kernel_size_h=*/3_n, + /*kernel_size_w=*/3_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/1_n, + /*padding_w=*/1_n); return t; }(); tensor_guid_t branch_pool = [&] { tensor_guid_t t = input; t = cgb.pool2d(t, - /*kernelH=*/3, - /*kernelW=*/3, - /*strideH=*/1, - /*strideW=*/1, - /*paddingH=*/1, - /*paddingW=*/1, + /*kernelH=*/3_n, + /*kernelW=*/3_n, + /*strideH=*/1_n, + /*strideW=*/1_n, + /*paddingH=*/1_n, + /*paddingW=*/1_n, /*type=*/PoolOp::AVG); t = create_conv_block(cgb, t, /*filters=*/pool_features, - /*kernel_stride_h=*/1, - /*kernel_stride_w=*/1); + /*kernel_stride_h=*/1_n, + /*kernel_stride_w=*/1_n); return t; }(); return cgb.concat({branch1x1, branch5x5, branch3x3dbl, branch_pool}, - /*axis=*/1); + /*axis=*/relative_ff_dim_t{1}); } static tensor_guid_t create_inception_module_b(ComputationGraphBuilder &cgb, tensor_guid_t const &input) { tensor_guid_t branch3x3 = create_conv_block(cgb, input, - /*filters=*/384, - /*kernel_size_h=*/3, - /*kernel_size_w=*/3, - /*stride_h=*/2, - /*stride_w=*/2); + /*filters=*/384_n, + /*kernel_size_h=*/3_n, + /*kernel_size_w=*/3_n, + /*stride_h=*/2_n, + /*stride_w=*/2_n); tensor_guid_t branch3x3dbl = [&] { tensor_guid_t t = input; t = create_conv_block(cgb, t, - /*filters=*/64, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); + /*filters=*/64_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); t = create_conv_block(cgb, t, - /*filters=*/96, - /*kernel_size_h=*/3, - /*kernel_size_w=*/3, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/1, - /*padding_w=*/1); + /*filters=*/96_n, + /*kernel_size_h=*/3_n, + /*kernel_size_w=*/3_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/1_n, + /*padding_w=*/1_n); t = create_conv_block(cgb, t, - /*filters=*/96, - /*kernel_stride_h=*/3, - /*kernel_stride_w=*/3, - /*stride_h=*/2, - /*stride_w=*/2); + /*filters=*/96_n, + /*kernel_stride_h=*/3_n, + /*kernel_stride_w=*/3_n, + /*stride_h=*/2_n, + /*stride_w=*/2_n); return t; }(); tensor_guid_t branch_pool = cgb.pool2d(input, - /*kernelH=*/3, - /*kernelW=*/3, - /*strideH=*/2, - /*strideW=*/2, - /*paddingH=*/0, - /*paddingW=*/0, + /*kernelH=*/3_n, + /*kernelW=*/3_n, + /*strideH=*/2_n, + /*strideW=*/2_n, + /*paddingH=*/0_n, + /*paddingW=*/0_n, /*type=*/PoolOp::MAX); - return cgb.concat({branch3x3, branch3x3dbl, branch_pool}, /*axis=*/1); + return cgb.concat({branch3x3, branch3x3dbl, branch_pool}, + /*axis=*/relative_ff_dim_t{1}); } static tensor_guid_t create_inception_module_c(ComputationGraphBuilder &cgb, CheckShape const &check_shape, tensor_guid_t const &input, - int channels_7x7) { + nonnegative_int channels_7x7) { tensor_guid_t branch1x1 = create_conv_block(cgb, input, - /*filters=*/192, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); - check_shape(branch1x1, 192, 17, 17); + /*filters=*/192_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); + check_shape(branch1x1, 192_n, 17_n, 17_n); tensor_guid_t branch7x7 = [&] { tensor_guid_t t = input; t = create_conv_block(cgb, t, /*filters=*/channels_7x7, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); t = create_conv_block(cgb, t, /*filters=*/channels_7x7, - /*kernel_size_h=*/1, - /*kernel_size_w=*/7, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/0, - /*padding_w=*/3); + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/7_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/0_n, + /*padding_w=*/3_n); t = create_conv_block(cgb, t, - /*filters=*/192, - /*kernel_size_h=*/7, - /*kernel_size_w=*/1, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/3, - /*padding_w=*/0); + /*filters=*/192_n, + /*kernel_size_h=*/7_n, + /*kernel_size_w=*/1_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/3_n, + /*padding_w=*/0_n); return t; }(); - check_shape(branch7x7, 192, 17, 17); + check_shape(branch7x7, 192_n, 17_n, 17_n); tensor_guid_t branch7x7dbl = [&] { tensor_guid_t t = input; t = create_conv_block(cgb, t, /*filters=*/channels_7x7, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); t = create_conv_block(cgb, t, /*filters=*/channels_7x7, - /*kernel_size_h=*/7, - /*kernel_size_w=*/1, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/3, - /*padding_w=*/0); + /*kernel_size_h=*/7_n, + /*kernel_size_w=*/1_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/3_n, + /*padding_w=*/0_n); t = create_conv_block(cgb, t, /*filters=*/channels_7x7, - /*kernel_size_h=*/1, - /*kernel_size_w=*/7, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/0, - /*padding_w=*/3); + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/7_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/0_n, + /*padding_w=*/3_n); t = create_conv_block(cgb, t, /*filters=*/channels_7x7, - /*kernel_size_h=*/7, - /*kernel_size_w=*/1, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/3, - /*padding_w=*/0); + /*kernel_size_h=*/7_n, + /*kernel_size_w=*/1_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/3_n, + /*padding_w=*/0_n); t = create_conv_block(cgb, t, - /*filters=*/192, - /*kernel_size_h=*/1, - /*kernel_size_w=*/7, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/0, - /*padding_w=*/3); + /*filters=*/192_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/7_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/0_n, + /*padding_w=*/3_n); return t; }(); - check_shape(branch7x7dbl, 192, 17, 17); + check_shape(branch7x7dbl, 192_n, 17_n, 17_n); tensor_guid_t branch_pool = [&] { tensor_guid_t t = input; t = cgb.pool2d(t, - /*kernelH=*/3, - /*kernelW=*/3, - /*strideH=*/1, - /*strideW=*/1, - /*paddingH=*/1, - /*paddingW=*/1, + /*kernelH=*/3_n, + /*kernelW=*/3_n, + /*strideH=*/1_n, + /*strideW=*/1_n, + /*paddingH=*/1_n, + /*paddingW=*/1_n, /*type=*/PoolOp::AVG); t = create_conv_block(cgb, t, - /*filters=*/192, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); + /*filters=*/192_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); return t; }(); - check_shape(branch_pool, 192, 17, 17); + check_shape(branch_pool, 192_n, 17_n, 17_n); return cgb.concat({branch1x1, branch7x7, branch7x7dbl, branch_pool}, - /*axis=*/1); + /*axis=*/relative_ff_dim_t{1}); } static tensor_guid_t create_inception_module_d(ComputationGraphBuilder &cgb, @@ -338,10 +342,10 @@ static tensor_guid_t create_inception_module_d(ComputationGraphBuilder &cgb, tensor_guid_t t = input; t = create_conv_block(cgb, t, - /*filters=*/192, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); - t = create_conv_block(cgb, t, 320, 3, 3, 2, 2); + /*filters=*/192_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); + t = create_conv_block(cgb, t, 320_n, 3_n, 3_n, 2_n, 2_n); return t; }(); @@ -349,83 +353,84 @@ static tensor_guid_t create_inception_module_d(ComputationGraphBuilder &cgb, tensor_guid_t t = input; t = create_conv_block(cgb, t, - /*filters=*/192, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); + /*filters=*/192_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); t = create_conv_block(cgb, t, - /*filters=*/192, - /*kernel_size_h=*/1, - /*kernel_size_w=*/7, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/0, - /*padding_w=*/3); + /*filters=*/192_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/7_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/0_n, + /*padding_w=*/3_n); t = create_conv_block(cgb, t, - /*filters=*/192, - /*kernel_size_h=*/7, - /*kernel_size_w=*/1, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/3, - /*padding_w=*/0); + /*filters=*/192_n, + /*kernel_size_h=*/7_n, + /*kernel_size_w=*/1_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/3_n, + /*padding_w=*/0_n); t = create_conv_block(cgb, t, - /*filters=*/192, - /*kernel_size_h=*/3, - /*kernel_size_w=*/3, - /*stride_h=*/2, - /*stride_w=*/2); + /*filters=*/192_n, + /*kernel_size_h=*/3_n, + /*kernel_size_w=*/3_n, + /*stride_h=*/2_n, + /*stride_w=*/2_n); return t; }(); tensor_guid_t branch_pool = cgb.pool2d(input, - /*kernelH=*/3, - /*kernelW=*/3, - /*strideH=*/2, - /*strideW=*/2, - /*paddingH=*/0, - /*paddingW=*/0, + /*kernelH=*/3_n, + /*kernelW=*/3_n, + /*strideH=*/2_n, + /*strideW=*/2_n, + /*paddingH=*/0_n, + /*paddingW=*/0_n, /*type=*/PoolOp::MAX); - return cgb.concat({branch3x3, branch7x7x3, branch_pool}, /*axis=*/1); + return cgb.concat({branch3x3, branch7x7x3, branch_pool}, + /*axis=*/relative_ff_dim_t{1}); } static tensor_guid_t create_inception_module_e(ComputationGraphBuilder &cgb, tensor_guid_t const &input) { tensor_guid_t branch1x1 = create_conv_block(cgb, input, - /*filters=*/320, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); + /*filters=*/320_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); tensor_guid_t branch3x3 = [&] { tensor_guid_t t = input; t = create_conv_block(cgb, t, - /*filters=*/384, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); + /*filters=*/384_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); tensor_guid_t t_1 = create_conv_block(cgb, t, - /*filters=*/384, - /*kernel_size_h=*/1, - /*kernel_size_w=*/3, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/0, - /*padding_w=*/1); + /*filters=*/384_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/3_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/0_n, + /*padding_w=*/1_n); tensor_guid_t t_2 = create_conv_block(cgb, t, - /*filters=*/384, - /*kernel_size_h=*/3, - /*kernel_size_w=*/1, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/1, - /*padding_w=*/0); - t = cgb.concat({t_1, t_2}, /*axis=*/1); + /*filters=*/384_n, + /*kernel_size_h=*/3_n, + /*kernel_size_w=*/1_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/1_n, + /*padding_w=*/0_n); + t = cgb.concat({t_1, t_2}, /*axis=*/relative_ff_dim_t{1}); return t; }(); @@ -433,60 +438,60 @@ static tensor_guid_t create_inception_module_e(ComputationGraphBuilder &cgb, tensor_guid_t t = input; t = create_conv_block(cgb, t, - /*filters=*/448, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); + /*filters=*/448_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); t = create_conv_block(cgb, t, - /*filters=*/384, - /*kernel_size_h=*/3, - /*kernel_size_w=*/3, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/1, - /*padding_w=*/1); + /*filters=*/384_n, + /*kernel_size_h=*/3_n, + /*kernel_size_w=*/3_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/1_n, + /*padding_w=*/1_n); tensor_guid_t t_1 = create_conv_block(cgb, t, - /*filters=*/384, - /*kernel_size_h=*/1, - /*kernel_size_w=*/3, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/0, - /*padding_w=*/1); + /*filters=*/384_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/3_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/0_n, + /*padding_w=*/1_n); tensor_guid_t t_2 = create_conv_block(cgb, t, - /*filters=*/384, - /*kernel_size_h=*/3, - /*kernel_size_w=*/1, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/1, - /*padding_w=*/0); - t = cgb.concat({t_1, t_2}, /*axis=*/1); + /*filters=*/384_n, + /*kernel_size_h=*/3_n, + /*kernel_size_w=*/1_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/1_n, + /*padding_w=*/0_n); + t = cgb.concat({t_1, t_2}, /*axis=*/relative_ff_dim_t{1}); return t; }(); tensor_guid_t branch_pool = [&] { tensor_guid_t t = input; t = cgb.pool2d(t, - /*kernelH=*/3, - /*kernelW=*/3, - /*strideH=*/1, - /*strideW=*/1, - /*paddingH=*/1, - /*paddingW=*/1, + /*kernelH=*/3_n, + /*kernelW=*/3_n, + /*strideH=*/1_n, + /*strideW=*/1_n, + /*paddingH=*/1_n, + /*paddingW=*/1_n, /*type=*/PoolOp::AVG); t = create_conv_block(cgb, t, - /*filters=*/192, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); + /*filters=*/192_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); return t; }(); return cgb.concat({branch1x1, branch3x3, branch3x3dbl, branch_pool}, - /*axis=*/1); + /*axis=*/relative_ff_dim_t{1}); } static tensor_guid_t create_initial_layers(ComputationGraphBuilder &cgb, @@ -494,75 +499,75 @@ static tensor_guid_t create_initial_layers(ComputationGraphBuilder &cgb, tensor_guid_t const &input) { tensor_guid_t t = input; - check_shape(t, 3, 299, 299); + check_shape(t, 3_n, 299_n, 299_n); // Conv2d_1a_3x3 t = create_conv_block(cgb, t, - /*filters=*/32, - /*kernel_size_h=*/3, - /*kernel_size_w=*/3, - /*stride_h=*/2, - /*stride_w=*/2); - check_shape(t, 32, 149, 149); + /*filters=*/32_n, + /*kernel_size_h=*/3_n, + /*kernel_size_w=*/3_n, + /*stride_h=*/2_n, + /*stride_w=*/2_n); + check_shape(t, 32_n, 149_n, 149_n); // Conv2d_2a_3x3 t = create_conv_block(cgb, t, - /*filters=*/32, - /*kernel_size_h=*/3, - /*kernel_size_w=*/3); - check_shape(t, 32, 147, 147); + /*filters=*/32_n, + /*kernel_size_h=*/3_n, + /*kernel_size_w=*/3_n); + check_shape(t, 32_n, 147_n, 147_n); // Conv2d_2b_3x3 t = create_conv_block(cgb, t, - /*filters=*/64, - /*kernel_size_h=*/3, - /*kernel_size_w=*/3, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/1, - /*padding_w=*/1); - check_shape(t, 64, 147, 147); + /*filters=*/64_n, + /*kernel_size_h=*/3_n, + /*kernel_size_w=*/3_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/1_n, + /*padding_w=*/1_n); + check_shape(t, 64_n, 147_n, 147_n); // maxpool1 t = cgb.pool2d(t, - /*kernelH=*/3, - /*kernelW=*/3, - /*strideH=*/2, - /*strideW=*/2, - /*paddingH=*/0, - /*paddingW=*/0, + /*kernelH=*/3_n, + /*kernelW=*/3_n, + /*strideH=*/2_n, + /*strideW=*/2_n, + /*paddingH=*/0_n, + /*paddingW=*/0_n, /*type=*/PoolOp::MAX); - check_shape(t, 64, 73, 73); + check_shape(t, 64_n, 73_n, 73_n); // Conv2d_3b_1x1 t = create_conv_block(cgb, t, - /*filters=*/80, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); - check_shape(t, 80, 73, 73); + /*filters=*/80_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); + check_shape(t, 80_n, 73_n, 73_n); // Conv2d_4a_3x3 t = create_conv_block(cgb, t, - /*filters=*/192, - /*kernel_size_h=*/3, - /*kernel_size_w=*/3); - check_shape(t, 192, 71, 71); + /*filters=*/192_n, + /*kernel_size_h=*/3_n, + /*kernel_size_w=*/3_n); + check_shape(t, 192_n, 71_n, 71_n); // maxpool2 t = cgb.pool2d(t, - /*kernelH=*/3, - /*kernelW=*/3, - /*strideH=*/2, - /*strideW=*/2, - /*paddingH=*/0, - /*paddingW=*/0, + /*kernelH=*/3_n, + /*kernelW=*/3_n, + /*strideH=*/2_n, + /*strideW=*/2_n, + /*paddingH=*/0_n, + /*paddingW=*/0_n, /*type=*/PoolOp::MAX); - check_shape(t, 192, 35, 35); + check_shape(t, 192_n, 35_n, 35_n); return t; } @@ -570,26 +575,26 @@ static tensor_guid_t create_initial_layers(ComputationGraphBuilder &cgb, static tensor_guid_t create_final_layers(ComputationGraphBuilder &cgb, CheckShape const &check_shape, tensor_guid_t const &input, - size_t num_classes) { + nonnegative_int num_classes) { // avgpool tensor_guid_t x = cgb.pool2d(input, - /*kernelH=*/8, - /*kernelW=*/8, - /*strideH=*/1, - /*strideW=*/1, - /*paddingH=*/0, - /*paddingW=*/0, + /*kernelH=*/8_n, + /*kernelW=*/8_n, + /*strideH=*/1_n, + /*strideW=*/1_n, + /*paddingH=*/0_n, + /*paddingW=*/0_n, /*type=*/PoolOp::AVG); - check_shape(x, 2048, 1, 1); + check_shape(x, 2048_n, 1_n, 1_n); // dropout x = cgb.dropout(x, /*rate=*/0.5); - check_shape(x, 2048, 1, 1); + check_shape(x, 2048_n, 1_n, 1_n); x = cgb.flat(x, - /*start_dim=*/1); - check_shape(x, 2048); + /*start_dim=*/relative_ff_dim_t{1}); + check_shape(x, 2048_n); // fc x = cgb.dense(x, @@ -597,7 +602,7 @@ static tensor_guid_t create_final_layers(ComputationGraphBuilder &cgb, check_shape(x, num_classes); // softmax (not in pytorch model, but shown in Table 1 on p6 of - // https://arxiv.org/abs/1512.00567) + // https://arxiv.org/abs/1512.00567_n) x = cgb.softmax(x); check_shape(x, num_classes); @@ -607,44 +612,44 @@ static tensor_guid_t create_final_layers(ComputationGraphBuilder &cgb, static tensor_guid_t create_inception_aux(ComputationGraphBuilder &cgb, CheckShape const &check_shape, tensor_guid_t const &input, - size_t num_classes) { + nonnegative_int num_classes) { tensor_guid_t x = input; - check_shape(x, 768, 17, 17); + check_shape(x, 768_n, 17_n, 17_n); x = cgb.pool2d(x, - /*kernelH=*/5, - /*kernelW=*/5, - /*strideH=*/3, - /*strideW=*/3, - /*paddingH=*/0, - /*paddingW=*/0, + /*kernelH=*/5_n, + /*kernelW=*/5_n, + /*strideH=*/3_n, + /*strideW=*/3_n, + /*paddingH=*/0_n, + /*paddingW=*/0_n, /*type=*/PoolOp::AVG); - check_shape(x, 768, 5, 5); + check_shape(x, 768_n, 5_n, 5_n); // conv0 x = create_conv_block(cgb, x, - /*filters=*/128, - /*kernel_size_h=*/1, - /*kernel_size_w=*/1); - check_shape(x, 128, 5, 5); + /*filters=*/128_n, + /*kernel_size_h=*/1_n, + /*kernel_size_w=*/1_n); + check_shape(x, 128_n, 5_n, 5_n); // conv1 x = create_conv_block(cgb, x, - /*filters=*/768, - /*kernel_size_h=*/5, - /*kernel_size_w=*/5); - check_shape(x, 768, 1, 1); + /*filters=*/768_n, + /*kernel_size_h=*/5_n, + /*kernel_size_w=*/5_n); + check_shape(x, 768_n, 1_n, 1_n); x = cgb.adaptive_pool2d(x, - /*output_h=*/1, - /*output_w=*/1); - check_shape(x, 768, 1, 1); + /*output_h=*/1_n, + /*output_w=*/1_n); + check_shape(x, 768_n, 1_n, 1_n); x = cgb.flat(x, - /*start_dim=*/1); - check_shape(x, 768); + /*start_dim=*/relative_ff_dim_t{1}); + check_shape(x, 768_n); // fc x = cgb.dense(x, @@ -666,39 +671,39 @@ static InceptionV3Output create_inception_v3(ComputationGraphBuilder &cgb, }; tensor_guid_t x = create_initial_layers(cgb, check_shape, input); - check_shape(x, 192, 35, 35); + check_shape(x, 192_n, 35_n, 35_n); // Mixed_5b - x = create_inception_module_a(cgb, x, 32); - check_shape(x, 256, 35, 35); + x = create_inception_module_a(cgb, x, 32_n); + check_shape(x, 256_n, 35_n, 35_n); // Mixed_5c - x = create_inception_module_a(cgb, x, 64); - check_shape(x, 288, 35, 35); + x = create_inception_module_a(cgb, x, 64_n); + check_shape(x, 288_n, 35_n, 35_n); // Mixed_5d - x = create_inception_module_a(cgb, x, 64); - check_shape(x, 288, 35, 35); + x = create_inception_module_a(cgb, x, 64_n); + check_shape(x, 288_n, 35_n, 35_n); // Mixed_6a x = create_inception_module_b(cgb, x); - check_shape(x, 768, 17, 17); + check_shape(x, 768_n, 17_n, 17_n); // Mixed_6b - x = create_inception_module_c(cgb, check_shape, x, 128); - check_shape(x, 768, 17, 17); + x = create_inception_module_c(cgb, check_shape, x, 128_n); + check_shape(x, 768_n, 17_n, 17_n); // Mixed_6c - x = create_inception_module_c(cgb, check_shape, x, 160); - check_shape(x, 768, 17, 17); + x = create_inception_module_c(cgb, check_shape, x, 160_n); + check_shape(x, 768_n, 17_n, 17_n); // Mixed_6d - x = create_inception_module_c(cgb, check_shape, x, 160); - check_shape(x, 768, 17, 17); + x = create_inception_module_c(cgb, check_shape, x, 160_n); + check_shape(x, 768_n, 17_n, 17_n); // Mixed_6e - x = create_inception_module_c(cgb, check_shape, x, 192); - check_shape(x, 768, 17, 17); + x = create_inception_module_c(cgb, check_shape, x, 192_n); + check_shape(x, 768_n, 17_n, 17_n); std::optional aux; if (config.aux_logits) { @@ -708,15 +713,15 @@ static InceptionV3Output create_inception_v3(ComputationGraphBuilder &cgb, // Mixed_7a x = create_inception_module_d(cgb, x); - check_shape(x, 1280, 8, 8); + check_shape(x, 1280_n, 8_n, 8_n); // Mixed_7b x = create_inception_module_e(cgb, x); - check_shape(x, 2048, 8, 8); + check_shape(x, 2048_n, 8_n, 8_n); // Mixed_7c x = create_inception_module_e(cgb, x); - check_shape(x, 2048, 8, 8); + check_shape(x, 2048_n, 8_n, 8_n); x = create_final_layers(cgb, check_shape, x, config.num_classes); check_shape(x, config.num_classes); @@ -732,11 +737,11 @@ ComputationGraph ComputationGraphBuilder cgb; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ - size_t_from_int(config.batch_size), - 3, - 299, - 299, + TensorDims{FFOrdered{ + config.batch_size, + 3_n, + 299_n, + 299_n, }}, DataType::FLOAT, }; diff --git a/lib/models/src/models/split_test/split_test.cc b/lib/models/src/models/split_test/split_test.cc index 118f94ec06..d3876d8bfc 100644 --- a/lib/models/src/models/split_test/split_test.cc +++ b/lib/models/src/models/split_test/split_test.cc @@ -4,18 +4,18 @@ namespace FlexFlow { -ComputationGraph get_split_test_computation_graph(int batch_size) { +ComputationGraph get_split_test_computation_graph(nonnegative_int batch_size) { ComputationGraphBuilder cgb; - int layer_dim1 = 256; - int layer_dim2 = 128; - int layer_dim3 = 64; - int layer_dim4 = 32; + nonnegative_int layer_dim1 = 256_n; + nonnegative_int layer_dim2 = 128_n; + nonnegative_int layer_dim3 = 64_n; + nonnegative_int layer_dim4 = 32_n; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ - size_t_from_int(batch_size), - size_t_from_int(layer_dim1), + TensorDims{FFOrdered{ + batch_size, + layer_dim1, }}, DataType::FLOAT, }; diff --git a/lib/models/src/models/transformer/transformer.cc b/lib/models/src/models/transformer/transformer.cc index 173a1b291c..f71763313a 100644 --- a/lib/models/src/models/transformer/transformer.cc +++ b/lib/models/src/models/transformer/transformer.cc @@ -4,16 +4,16 @@ namespace FlexFlow { TransformerConfig get_default_transformer_config() { - return TransformerConfig{/*num_features=*/512, - /*sequence_length=*/512, - /*batch_size=*/64, - /*dim_feedforward=*/2048, - /*num_heads=*/8, - /*num_encoder_layers=*/6, - /*num_decoder_layers=*/6, + return TransformerConfig{/*num_features=*/512_n, + /*sequence_length=*/512_n, + /*batch_size=*/64_n, + /*dim_feedforward=*/2048_n, + /*num_heads=*/8_n, + /*num_encoder_layers=*/6_n, + /*num_decoder_layers=*/6_n, /*dropout=*/0.1, /*layer_norm_eps=*/1e-05, - /*vocab_size=*/64}; + /*vocab_size=*/64_n}; } tensor_guid_t create_feedforward_network(ComputationGraphBuilder &cgb, @@ -32,18 +32,20 @@ tensor_guid_t create_feedforward_network(ComputationGraphBuilder &cgb, tensor_guid_t create_transformer_encoder_layer(ComputationGraphBuilder &cgb, TransformerConfig const &config, tensor_guid_t const &input) { - std::vector layer_norm_axis{2}; // Normalize the last dim - int kdim = config.dim_feedforward / config.num_heads; - int vdim = config.dim_feedforward / config.num_heads; - tensor_guid_t self_attention = cgb.multihead_attention(input, - input, - input, - config.num_features, - config.num_heads, - kdim, - vdim, - config.dropout, - /*bias=*/false); + std::vector layer_norm_axis = { + relative_ff_dim_t{-1}}; // Normalize the last dim + nonnegative_int kdim = config.dim_feedforward / config.num_heads; + nonnegative_int vdim = config.dim_feedforward / config.num_heads; + tensor_guid_t self_attention = + cgb.multihead_attention(/*query=*/input, + /*key=*/input, + /*value=*/input, + /*embed_dim=*/config.num_features, + /*num_heads=*/config.num_heads, + /*kdim=*/kdim, + /*vdim=*/vdim, + /*dropout=*/config.dropout, + /*bias=*/false); assert(are_tensor_guid_shapes_equivalent( cgb.computation_graph, input, self_attention)); @@ -79,18 +81,20 @@ tensor_guid_t TransformerConfig const &config, tensor_guid_t const &input, tensor_guid_t const &encoder_output) { - std::vector layer_norm_axis{2}; // Normalize the last dim - int kdim = config.dim_feedforward / config.num_heads; - int vdim = config.dim_feedforward / config.num_heads; - tensor_guid_t self_attention = cgb.multihead_attention(input, - input, - input, - config.num_features, - config.num_heads, - kdim, - vdim, - config.dropout, - /*bias=*/false); + std::vector layer_norm_axis = { + relative_ff_dim_t{-1}}; // Normalize the last dim + nonnegative_int kdim = config.dim_feedforward / config.num_heads; + nonnegative_int vdim = config.dim_feedforward / config.num_heads; + tensor_guid_t self_attention = + cgb.multihead_attention(/*query=*/input, + /*key=*/input, + /*value=*/input, + /*embed_dim=*/config.num_features, + /*num_heads=*/config.num_heads, + /*kdim=*/kdim, + /*vdim=*/vdim, + /*dropout=*/config.dropout, + /*bias=*/false); assert(are_tensor_guid_shapes_equivalent( cgb.computation_graph, input, self_attention)); @@ -102,15 +106,16 @@ tensor_guid_t assert(are_tensor_guid_shapes_equivalent( cgb.computation_graph, input, self_attention_normalized)); - tensor_guid_t mha = cgb.multihead_attention(self_attention_normalized, - encoder_output, - encoder_output, - config.num_features, - config.num_heads, - kdim, - vdim, - config.dropout, - /*bias=*/false); + tensor_guid_t mha = + cgb.multihead_attention(/*query=*/self_attention_normalized, + /*key=*/encoder_output, + /*value=*/encoder_output, + /*embed_dim=*/config.num_features, + /*num_heads=*/config.num_heads, + /*kdim=*/kdim, + /*vdim=*/vdim, + /*dropout=*/config.dropout, + /*bias=*/false); assert(are_tensor_guid_shapes_equivalent(cgb.computation_graph, input, mha)); tensor_guid_t mha_normalized = @@ -148,7 +153,7 @@ ComputationGraph ComputationGraphBuilder cgb; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ config.batch_size, config.sequence_length, config.num_features}}, DataType::FLOAT, }; diff --git a/lib/op-attrs/include/op-attrs/computation_graph_op_attrs.variant.toml b/lib/op-attrs/include/op-attrs/computation_graph_op_attrs.variant.toml index 014526a601..f1c5fe6b23 100644 --- a/lib/op-attrs/include/op-attrs/computation_graph_op_attrs.variant.toml +++ b/lib/op-attrs/include/op-attrs/computation_graph_op_attrs.variant.toml @@ -11,7 +11,7 @@ features = [ includes = [ "op-attrs/ops/attention_attrs.dtg.h", - "op-attrs/ops/batch_matmul.dtg.h", + "op-attrs/ops/batch_matmul_attrs.dtg.h", "op-attrs/ops/batch_norm_attrs.dtg.h", "op-attrs/ops/broadcast_attrs.dtg.h", "op-attrs/ops/cast_attrs.dtg.h", diff --git a/lib/op-attrs/include/op-attrs/datatype.h b/lib/op-attrs/include/op-attrs/datatype.h index 5af00fb510..3a817af38c 100644 --- a/lib/op-attrs/include/op-attrs/datatype.h +++ b/lib/op-attrs/include/op-attrs/datatype.h @@ -4,6 +4,7 @@ #include "op-attrs/datatype.dtg.h" #include "utils/fmt.h" #include "utils/fp16.h" +#include "utils/nonnegative_int/nonnegative_int.h" #include namespace FlexFlow { @@ -49,7 +50,7 @@ typename data_type_enum_to_class
::type cast_to(T t) { template using real_type_t = typename data_type_enum_to_class
::type; -size_t size_of_datatype(DataType); +nonnegative_int size_of_datatype(DataType); bool can_strictly_promote_datatype_from_to(DataType, DataType); diff --git a/lib/op-attrs/include/op-attrs/dim_ordered/dim_ordered.h b/lib/op-attrs/include/op-attrs/dim_ordered/dim_ordered.h index 3977f4e0fd..f2355289dc 100644 --- a/lib/op-attrs/include/op-attrs/dim_ordered/dim_ordered.h +++ b/lib/op-attrs/include/op-attrs/dim_ordered/dim_ordered.h @@ -32,19 +32,13 @@ struct DimOrdered { : contents(contents.begin(), contents.end()) {} T const &at(Idx idx) const { - int raw = idx.value; - if (raw < 0) { - raw = this->contents.size() + raw; - } - return this->contents.at(raw); + nonnegative_int raw = idx.value; + return this->contents.at(raw.unwrap_nonnegative()); } T &at(Idx idx) { - int raw = idx.value; - if (raw < 0) { - raw = this->contents.size() + raw; - } - return this->contents.at(raw); + nonnegative_int raw = idx.value; + return this->contents.at(raw.unwrap_nonnegative()); } T const &operator[](Idx idx) const { @@ -56,11 +50,8 @@ struct DimOrdered { } bool idx_is_valid(Idx const &idx) const { - int raw = idx.value; - if (raw < 0) { - raw = this->contents.size() + raw; - } - return (raw >= 0 && raw < this->contents.size()); + nonnegative_int raw = idx.value; + return (raw < this->contents.size()); } bool operator==(DimOrdered const &other) const { @@ -172,7 +163,7 @@ struct DimOrdered { : contents(contents.begin(), contents.end()) {} T const &at(ff_dim_t idx) const { - int raw = idx.value.get_value(); + int raw = idx.value.unwrap_nonnegative(); return this->contents.at(raw); } @@ -185,7 +176,7 @@ struct DimOrdered { } T &at(ff_dim_t idx) { - int raw = idx.value.get_value(); + int raw = idx.value.unwrap_nonnegative(); return this->contents.at(raw); } @@ -214,7 +205,7 @@ struct DimOrdered { } bool idx_is_valid(ff_dim_t const &idx) const { - int raw = idx.value.get_value(); + int raw = idx.value.unwrap_nonnegative(); return raw < this->contents.size(); } diff --git a/lib/op-attrs/include/op-attrs/dim_ordered/slice.h b/lib/op-attrs/include/op-attrs/dim_ordered/slice.h index c9e6db4d17..166916dd44 100644 --- a/lib/op-attrs/include/op-attrs/dim_ordered/slice.h +++ b/lib/op-attrs/include/op-attrs/dim_ordered/slice.h @@ -27,8 +27,8 @@ FFOrdered ff_dim_t_nonoverloaded_slice(FFOrdered const &d, std::optional const &end) { auto to_raw_idx = [](std::optional const &idx) -> std::optional { - return transform(idx, - [](ff_dim_t const &i) { return i.value.get_value(); }); + return transform( + idx, [](ff_dim_t const &i) { return i.value.unwrap_nonnegative(); }); }; return FFOrdered{subvec(vector_of(d), to_raw_idx(start), to_raw_idx(end))}; diff --git a/lib/op-attrs/include/op-attrs/get_op_type.h b/lib/op-attrs/include/op-attrs/get_op_type.h index b60880a98b..7799900709 100644 --- a/lib/op-attrs/include/op-attrs/get_op_type.h +++ b/lib/op-attrs/include/op-attrs/get_op_type.h @@ -2,7 +2,7 @@ #define _FLEXFLOW_OP_ATTRS_GET_OP_TYPE_H #include "op-attrs/ops/attention_attrs.dtg.h" -#include "op-attrs/ops/batch_matmul.dtg.h" +#include "op-attrs/ops/batch_matmul_attrs.dtg.h" #include "op-attrs/ops/batch_norm_attrs.dtg.h" #include "op-attrs/ops/broadcast_attrs.dtg.h" #include "op-attrs/ops/cast_attrs.dtg.h" diff --git a/lib/op-attrs/include/op-attrs/ops/attention.h b/lib/op-attrs/include/op-attrs/ops/attention.h index e06d795c04..5f1b11c1bb 100644 --- a/lib/op-attrs/include/op-attrs/ops/attention.h +++ b/lib/op-attrs/include/op-attrs/ops/attention.h @@ -12,31 +12,31 @@ namespace FlexFlow { -int get_qProjSize(MultiHeadAttentionAttrs const &); -int get_vProjSize(MultiHeadAttentionAttrs const &); -int get_kProjSize(MultiHeadAttentionAttrs const &); -int get_oProjSize(MultiHeadAttentionAttrs const &); +nonnegative_int get_qProjSize(MultiHeadAttentionAttrs const &); +nonnegative_int get_vProjSize(MultiHeadAttentionAttrs const &); +nonnegative_int get_kProjSize(MultiHeadAttentionAttrs const &); +nonnegative_int get_oProjSize(MultiHeadAttentionAttrs const &); -int get_qSize(MultiHeadAttentionParallelInputs const &); -int get_qSize(MultiHeadAttentionInputs const &); +nonnegative_int get_qSize(MultiHeadAttentionParallelInputs const &); +nonnegative_int get_qSize(MultiHeadAttentionInputs const &); -int get_kSize(MultiHeadAttentionParallelInputs const &); -int get_kSize(MultiHeadAttentionInputs const &); +nonnegative_int get_kSize(MultiHeadAttentionParallelInputs const &); +nonnegative_int get_kSize(MultiHeadAttentionInputs const &); -int get_vSize(MultiHeadAttentionParallelInputs const &); -int get_vSize(MultiHeadAttentionInputs const &); +nonnegative_int get_vSize(MultiHeadAttentionParallelInputs const &); +nonnegative_int get_vSize(MultiHeadAttentionInputs const &); -int get_oSize(ParallelTensorShape const &); -int get_oSize(TensorShape const &); +nonnegative_int get_oSize(ParallelTensorShape const &); +nonnegative_int get_oSize(TensorShape const &); -int get_qoSeqLength(MultiHeadAttentionParallelInputs const &); -int get_qoSeqLength(MultiHeadAttentionInputs const &); +nonnegative_int get_qoSeqLength(MultiHeadAttentionParallelInputs const &); +nonnegative_int get_qoSeqLength(MultiHeadAttentionInputs const &); -int get_kvSeqLength(MultiHeadAttentionParallelInputs const &); -int get_kvSeqLength(MultiHeadAttentionInputs const &); +nonnegative_int get_kvSeqLength(MultiHeadAttentionParallelInputs const &); +nonnegative_int get_kvSeqLength(MultiHeadAttentionInputs const &); -int get_num_samples(MultiHeadAttentionParallelInputs const &); -int get_num_samples(MultiHeadAttentionInputs const &); +nonnegative_int get_num_samples(MultiHeadAttentionParallelInputs const &); +nonnegative_int get_num_samples(MultiHeadAttentionInputs const &); std::vector get_attention_incoming_tensor_roles(MultiHeadAttentionAttrs const &); diff --git a/lib/op-attrs/include/op-attrs/ops/attention/multihead_attention_inputs.struct.toml b/lib/op-attrs/include/op-attrs/ops/attention/multihead_attention_inputs.struct.toml index b82b285451..f85b7268af 100644 --- a/lib/op-attrs/include/op-attrs/ops/attention/multihead_attention_inputs.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/attention/multihead_attention_inputs.struct.toml @@ -10,29 +10,29 @@ features = [ ] includes = [ - "", "op-attrs/datatype.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] name = "batch_size" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "sequence_length" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "query_size" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "key_size" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "value_size" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "datatype" diff --git a/lib/op-attrs/include/op-attrs/ops/attention_attrs.struct.toml b/lib/op-attrs/include/op-attrs/ops/attention_attrs.struct.toml index d96d8af69c..019131b07c 100644 --- a/lib/op-attrs/include/op-attrs/ops/attention_attrs.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/attention_attrs.struct.toml @@ -10,21 +10,25 @@ features = [ "fmt", ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + [[fields]] name = "embed_dim" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "num_heads" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "kdim" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "vdim" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "dropout" diff --git a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h index 574b4ef579..333da4fa29 100644 --- a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h +++ b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h @@ -1,7 +1,7 @@ #ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_BATCH_MATMUL_H #define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_BATCH_MATMUL_H -#include "op-attrs/ops/batch_matmul.dtg.h" +#include "op-attrs/ops/batch_matmul_attrs.dtg.h" #include "op-attrs/ops/core.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" diff --git a/lib/op-attrs/include/op-attrs/ops/batch_matmul.struct.toml b/lib/op-attrs/include/op-attrs/ops/batch_matmul.struct.toml deleted file mode 100644 index 3b1dd3f687..0000000000 --- a/lib/op-attrs/include/op-attrs/ops/batch_matmul.struct.toml +++ /dev/null @@ -1,19 +0,0 @@ -namespace = "FlexFlow" -name = "BatchMatmulAttrs" - -features = [ - "eq", - "ord", - "hash", - "json", - "rapidcheck", - "fmt", -] - -[[fields]] -name = "a_seq_length_dim" -type = "int" - -[[fields]] -name = "b_seq_length_dim" -type = "int" diff --git a/lib/op-attrs/include/op-attrs/ops/batch_matmul_attrs.struct.toml b/lib/op-attrs/include/op-attrs/ops/batch_matmul_attrs.struct.toml new file mode 100644 index 0000000000..394dfb5fcc --- /dev/null +++ b/lib/op-attrs/include/op-attrs/ops/batch_matmul_attrs.struct.toml @@ -0,0 +1,30 @@ +namespace = "FlexFlow" +name = "BatchMatmulAttrs" + +features = [ + "eq", + "ord", + "hash", + "json", + "rapidcheck", + "fmt", +] + +includes = [ + "utils/nonnegative_int/nonnegative_int.h", + "", +] + +src_includes = [ + "utils/fmt/optional.h", + "utils/json/optional.h", + "utils/rapidcheck/optional.h", +] + +[[fields]] +name = "a_seq_length_dim" +type = "std::optional<::FlexFlow::nonnegative_int>" + +[[fields]] +name = "b_seq_length_dim" +type = "std::optional<::FlexFlow::nonnegative_int>" diff --git a/lib/op-attrs/include/op-attrs/ops/combine_attrs.struct.toml b/lib/op-attrs/include/op-attrs/ops/combine_attrs.struct.toml index e7eeedec06..b3c574264c 100644 --- a/lib/op-attrs/include/op-attrs/ops/combine_attrs.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/combine_attrs.struct.toml @@ -12,6 +12,7 @@ features = [ includes = [ "op-attrs/ff_dim_t.h", "op-attrs/ff_dim_t.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] @@ -20,4 +21,4 @@ type = "::FlexFlow::ff_dim_t" [[fields]] name = "combine_degree" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_input_shape.struct.toml b/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_input_shape.struct.toml index 77e8c51244..c4fb74ebd8 100644 --- a/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_input_shape.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_input_shape.struct.toml @@ -12,23 +12,24 @@ features = [ includes = [ "", "op-attrs/datatype.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] name = "num_samples" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "num_channels" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "height" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "width" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "datatype" diff --git a/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.struct.toml b/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.struct.toml index 68cbd878d1..fdf0eaca78 100644 --- a/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.struct.toml @@ -12,6 +12,7 @@ features = [ includes = [ "op-attrs/shard_parallel_dim.dtg.h", "op-attrs/datatype.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] @@ -32,11 +33,11 @@ type = "::FlexFlow::ShardParallelDim" [[fields]] name = "sum_reduction_degree" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "discard_copy_reduction_degree" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "datatype" diff --git a/lib/op-attrs/include/op-attrs/ops/conv_2d_attrs.struct.toml b/lib/op-attrs/include/op-attrs/ops/conv_2d_attrs.struct.toml index 5bef144cd9..8b86d42e04 100644 --- a/lib/op-attrs/include/op-attrs/ops/conv_2d_attrs.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/conv_2d_attrs.struct.toml @@ -12,6 +12,7 @@ features = [ includes = [ "", "op-attrs/activation.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] src_includes = [ @@ -21,14 +22,14 @@ src_includes = [ ] fields = [ - { name = "out_channels", type = "int" }, - { name = "kernel_h", type = "int" }, - { name = "kernel_w", type = "int" }, - { name = "stride_h", type = "int" }, - { name = "stride_w", type = "int" }, - { name = "padding_h", type = "int" }, - { name = "padding_w", type = "int" }, - { name = "groups", type = "int" }, + { name = "out_channels", type = "::FlexFlow::nonnegative_int" }, + { name = "kernel_h", type = "::FlexFlow::nonnegative_int" }, + { name = "kernel_w", type = "::FlexFlow::nonnegative_int" }, + { name = "stride_h", type = "::FlexFlow::nonnegative_int" }, + { name = "stride_w", type = "::FlexFlow::nonnegative_int" }, + { name = "padding_h", type = "::FlexFlow::nonnegative_int" }, + { name = "padding_w", type = "::FlexFlow::nonnegative_int" }, + { name = "groups", type = "::FlexFlow::nonnegative_int" }, { name = "activation", type = "std::optional<::FlexFlow::Activation>" }, { name = "use_bias", type = "bool" }, ] diff --git a/lib/op-attrs/include/op-attrs/ops/embedding_attrs.struct.toml b/lib/op-attrs/include/op-attrs/ops/embedding_attrs.struct.toml index b8d15284e9..5a857efb3e 100644 --- a/lib/op-attrs/include/op-attrs/ops/embedding_attrs.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/embedding_attrs.struct.toml @@ -10,9 +10,10 @@ features = [ ] includes = [ - "utils/stack_vector/stack_vector.h", "op-attrs/aggregate_op.dtg.h", "op-attrs/datatype.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", + "", ] src_includes = [ @@ -23,11 +24,11 @@ src_includes = [ [[fields]] name = "num_entries" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "out_channels" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "aggr" diff --git a/lib/op-attrs/include/op-attrs/ops/linear_attrs.struct.toml b/lib/op-attrs/include/op-attrs/ops/linear_attrs.struct.toml index 0a35a6c5ec..ffbe93c975 100644 --- a/lib/op-attrs/include/op-attrs/ops/linear_attrs.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/linear_attrs.struct.toml @@ -14,6 +14,7 @@ includes = [ "op-attrs/activation.dtg.h", "op-attrs/regularizer_attrs.dtg.h", "", + "utils/nonnegative_int/nonnegative_int.h", ] src_includes = [ @@ -24,7 +25,7 @@ src_includes = [ [[fields]] name = "out_channels" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "use_bias" diff --git a/lib/op-attrs/include/op-attrs/ops/pool_2d.h b/lib/op-attrs/include/op-attrs/ops/pool_2d.h index 1af22ad022..af11d61f07 100644 --- a/lib/op-attrs/include/op-attrs/ops/pool_2d.h +++ b/lib/op-attrs/include/op-attrs/ops/pool_2d.h @@ -13,8 +13,8 @@ CHECK_VALID_OP_ATTR(Pool2DAttrs); tl::expected make_adaptive_pool2d_attrs(TensorDims const &input_dims, - int output_h, - int output_w, + nonnegative_int output_h, + nonnegative_int output_w, PoolOp pool_type, std::optional const &activation); diff --git a/lib/op-attrs/include/op-attrs/ops/pool_2d_attrs.struct.toml b/lib/op-attrs/include/op-attrs/ops/pool_2d_attrs.struct.toml index 20ca7deabc..fea318d46d 100644 --- a/lib/op-attrs/include/op-attrs/ops/pool_2d_attrs.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/pool_2d_attrs.struct.toml @@ -13,6 +13,7 @@ includes = [ "op-attrs/pool_op.dtg.h", "op-attrs/activation.dtg.h", "", + "utils/nonnegative_int/nonnegative_int.h", ] src_includes = [ @@ -23,27 +24,27 @@ src_includes = [ [[fields]] name = "kernel_h" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "kernel_w" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "stride_h" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "stride_w" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "padding_h" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "padding_w" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "pool_type" diff --git a/lib/op-attrs/include/op-attrs/ops/reduction_attrs.struct.toml b/lib/op-attrs/include/op-attrs/ops/reduction_attrs.struct.toml index ee0ae54132..2798a85caf 100644 --- a/lib/op-attrs/include/op-attrs/ops/reduction_attrs.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/reduction_attrs.struct.toml @@ -9,6 +9,10 @@ features = [ "fmt", ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + [[fields]] name = "reduction_degree" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/op-attrs/include/op-attrs/ops/repartition_attrs.struct.toml b/lib/op-attrs/include/op-attrs/ops/repartition_attrs.struct.toml index 69c4b7580f..965c40c05a 100644 --- a/lib/op-attrs/include/op-attrs/ops/repartition_attrs.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/repartition_attrs.struct.toml @@ -12,6 +12,7 @@ features = [ includes = [ "op-attrs/ff_dim_t.h", "op-attrs/ff_dim_t.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] @@ -20,4 +21,4 @@ type = "::FlexFlow::ff_dim_t" [[fields]] name = "repartition_degree" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/op-attrs/include/op-attrs/ops/replicate_attrs.struct.toml b/lib/op-attrs/include/op-attrs/ops/replicate_attrs.struct.toml index 4e43ea747a..58e365c0f2 100644 --- a/lib/op-attrs/include/op-attrs/ops/replicate_attrs.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/replicate_attrs.struct.toml @@ -9,8 +9,10 @@ features = [ "fmt", ] -includes = [ ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] [[fields]] name = "replicate_degree" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/op-attrs/include/op-attrs/ops/split_attrs.struct.toml b/lib/op-attrs/include/op-attrs/ops/split_attrs.struct.toml index fce827f5c2..7ce1ad7e34 100644 --- a/lib/op-attrs/include/op-attrs/ops/split_attrs.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/split_attrs.struct.toml @@ -13,11 +13,12 @@ includes = [ "utils/stack_vector/stack_vector.h", "op-attrs/ff_dim_t.h", "op-attrs/ff_dim_t.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] name = "splits" -type = "::FlexFlow::stack_vector" +type = "::FlexFlow::stack_vector<::FlexFlow::nonnegative_int, MAX_NUM_OUTPUTS>" [[fields]] name = "axis" diff --git a/lib/op-attrs/include/op-attrs/ops/topk_attrs.struct.toml b/lib/op-attrs/include/op-attrs/ops/topk_attrs.struct.toml index 9ecbf1d725..1c5bfc8e10 100644 --- a/lib/op-attrs/include/op-attrs/ops/topk_attrs.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/topk_attrs.struct.toml @@ -9,9 +9,13 @@ features = [ "fmt", ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + [[fields]] name = "k" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "sorted" diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_degrees.struct.toml b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_degrees.struct.toml index 974b27d2a7..be3a95eec8 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_degrees.struct.toml +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_degrees.struct.toml @@ -13,6 +13,7 @@ includes = [ "op-attrs/parallel_tensor_shape/sum_degree.dtg.h", "op-attrs/parallel_tensor_shape/discard_copy_degree.dtg.h", "op-attrs/dim_ordered/dim_ordered.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] @@ -25,4 +26,4 @@ type = "::FlexFlow::DiscardCopyDegree" [[fields]] name = "shard_degrees" -type = "::FlexFlow::FFOrdered" +type = "::FlexFlow::FFOrdered<::FlexFlow::nonnegative_int>" diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_dims.h b/lib/op-attrs/include/op-attrs/parallel_tensor_dims.h index 6b88a7bda1..67864e637b 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_dims.h +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_dims.h @@ -9,27 +9,27 @@ namespace FlexFlow { FFOrdered ff_ordered_shard_dims(ParallelTensorDims const &); -FFOrdered ff_ordered_shard_degrees(ParallelTensorDims const &); +FFOrdered ff_ordered_shard_degrees(ParallelTensorDims const &); std::unordered_set replica_dims(ParallelTensorDims const &); /* size_t get_volume(ParallelTensorDims const &); */ -size_t num_shard_dims(ParallelTensorDims const &); +nonnegative_int num_shard_dims(ParallelTensorDims const &); ParallelTensorDimDegrees get_parallel_degrees(ParallelTensorDims const &); ParallelTensorDims lift_to_parallel(TensorDims const &); -ParallelTensorDims - lift_to_parallel_with_degrees(TensorDims const &, - SumDegree const &, - DiscardCopyDegree const &, - FFOrdered const &shard_degrees); +ParallelTensorDims lift_to_parallel_with_degrees( + TensorDims const &, + SumDegree const &, + DiscardCopyDegree const &, + FFOrdered const &shard_degrees); ParallelTensorDims lift_to_parallel_with_degrees(TensorDims const &, ParallelTensorDimDegrees const &); -int total_replica_degree(ParallelTensorDims const &); -int total_shard_degree(ParallelTensorDims const &); -int total_parallel_degree(ParallelTensorDims const &); +nonnegative_int total_replica_degree(ParallelTensorDims const &); +nonnegative_int total_shard_degree(ParallelTensorDims const &); +nonnegative_int total_parallel_degree(ParallelTensorDims const &); ShardParallelDim shard_dim_at_idx(ParallelTensorDims const &, relative_ff_dim_t); diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h index 0339b9b8a6..d461ffc9e4 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h @@ -12,12 +12,13 @@ namespace FlexFlow { -int num_shard_dims(ParallelTensorShape const &); +nonnegative_int num_shard_dims(ParallelTensorShape const &); ShardParallelDim shard_dim_at_idx(ParallelTensorShape const &, relative_ff_dim_t); ShardParallelDim &shard_dim_at_idx(ParallelTensorShape &, relative_ff_dim_t); -FFOrdered ff_ordered_shard_degrees(ParallelTensorShape const &); +FFOrdered + ff_ordered_shard_degrees(ParallelTensorShape const &); std::optional try_get_shard_dim_at_idx(ParallelTensorShape const &, relative_ff_dim_t); @@ -25,11 +26,11 @@ std::optional ParallelTensorDimDegrees get_parallel_degrees(ParallelTensorShape const &); ParallelTensorShape lift_to_parallel(TensorShape const &); -ParallelTensorShape - lift_to_parallel_with_degrees(TensorShape const &, - SumDegree const &, - DiscardCopyDegree const &, - FFOrdered const &shard_degrees); +ParallelTensorShape lift_to_parallel_with_degrees( + TensorShape const &, + SumDegree const &, + DiscardCopyDegree const &, + FFOrdered const &shard_degrees); ParallelTensorShape lift_to_parallel_with_degrees(TensorShape const &, ParallelTensorDimDegrees const &); @@ -37,13 +38,13 @@ ParallelTensorShape std::unordered_set replica_dims(ParallelTensorShape const &); TensorShape get_piece_shape(ParallelTensorShape const &); -int get_num_replica_dims(ParallelTensorShape const &); -int get_num_replicas(ParallelTensorShape const &); +nonnegative_int get_num_replica_dims(ParallelTensorShape const &); +nonnegative_int get_num_replicas(ParallelTensorShape const &); -int get_sum_degree(ParallelTensorShape const &); -int get_discard_copy_degree(ParallelTensorShape const &); +nonnegative_int get_sum_degree(ParallelTensorShape const &); +nonnegative_int get_discard_copy_degree(ParallelTensorShape const &); -int get_total_parallel_degree(ParallelTensorShape const &); +nonnegative_int get_total_parallel_degree(ParallelTensorShape const &); bool is_valid(ParallelTensorShape const &); diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_shape/discard_copy_degree.struct.toml b/lib/op-attrs/include/op-attrs/parallel_tensor_shape/discard_copy_degree.struct.toml index b4905fb0ce..76b52bcdef 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_shape/discard_copy_degree.struct.toml +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_shape/discard_copy_degree.struct.toml @@ -9,6 +9,10 @@ features = [ "fmt", ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + [[fields]] name = "value" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_shape/sum_degree.struct.toml b/lib/op-attrs/include/op-attrs/parallel_tensor_shape/sum_degree.struct.toml index d86917211e..550a384ba9 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_shape/sum_degree.struct.toml +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_shape/sum_degree.struct.toml @@ -9,6 +9,10 @@ features = [ "fmt", ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + [[fields]] name = "value" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/op-attrs/include/op-attrs/pcg_operator_attrs.variant.toml b/lib/op-attrs/include/op-attrs/pcg_operator_attrs.variant.toml index a44d712dbf..fdd11ac11f 100644 --- a/lib/op-attrs/include/op-attrs/pcg_operator_attrs.variant.toml +++ b/lib/op-attrs/include/op-attrs/pcg_operator_attrs.variant.toml @@ -11,7 +11,7 @@ features = [ includes = [ "op-attrs/ops/attention_attrs.dtg.h", - "op-attrs/ops/batch_matmul.dtg.h", + "op-attrs/ops/batch_matmul_attrs.dtg.h", "op-attrs/ops/batch_norm_attrs.dtg.h", "op-attrs/ops/broadcast_attrs.dtg.h", "op-attrs/ops/cast_attrs.dtg.h", diff --git a/lib/op-attrs/include/op-attrs/relative_ff_dim_t.h b/lib/op-attrs/include/op-attrs/relative_ff_dim_t.h index af51cc69be..5205b1ead8 100644 --- a/lib/op-attrs/include/op-attrs/relative_ff_dim_t.h +++ b/lib/op-attrs/include/op-attrs/relative_ff_dim_t.h @@ -7,7 +7,7 @@ namespace FlexFlow { ff_dim_t ff_dim_t_from_relative_ff_dim_t(relative_ff_dim_t ff_dim, - int input_dim); + nonnegative_int input_dim); } // namespace FlexFlow namespace rc { diff --git a/lib/op-attrs/include/op-attrs/replica_parallel_dim.struct.toml b/lib/op-attrs/include/op-attrs/replica_parallel_dim.struct.toml index 2ad442aa22..5ca486181e 100644 --- a/lib/op-attrs/include/op-attrs/replica_parallel_dim.struct.toml +++ b/lib/op-attrs/include/op-attrs/replica_parallel_dim.struct.toml @@ -11,11 +11,12 @@ features = [ includes = [ "op-attrs/replica_type.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] name = "degree" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "replica_type" diff --git a/lib/op-attrs/include/op-attrs/replica_parallel_dim_set.h b/lib/op-attrs/include/op-attrs/replica_parallel_dim_set.h index 74a8df339b..92d2b0abb2 100644 --- a/lib/op-attrs/include/op-attrs/replica_parallel_dim_set.h +++ b/lib/op-attrs/include/op-attrs/replica_parallel_dim_set.h @@ -8,7 +8,8 @@ namespace FlexFlow { ReplicaParallelDimSet empty_replica_parallel_dim_set(); -int get_degree_of_replica_type(ReplicaParallelDimSet const &, ReplicaType); +nonnegative_int get_degree_of_replica_type(ReplicaParallelDimSet const &, + ReplicaType); std::unordered_set get_replica_dims(ReplicaParallelDimSet const &); bool is_valid(ReplicaParallelDimSet const &); diff --git a/lib/op-attrs/include/op-attrs/shard_parallel_dim.struct.toml b/lib/op-attrs/include/op-attrs/shard_parallel_dim.struct.toml index 21c81396d1..5c5d2dc5b2 100644 --- a/lib/op-attrs/include/op-attrs/shard_parallel_dim.struct.toml +++ b/lib/op-attrs/include/op-attrs/shard_parallel_dim.struct.toml @@ -9,10 +9,14 @@ features = [ "fmt", ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + [[fields]] name = "size" -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "degree" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/op-attrs/include/op-attrs/tensor_dims.h b/lib/op-attrs/include/op-attrs/tensor_dims.h index 5e1503360b..bf11f36e51 100644 --- a/lib/op-attrs/include/op-attrs/tensor_dims.h +++ b/lib/op-attrs/include/op-attrs/tensor_dims.h @@ -6,11 +6,11 @@ namespace FlexFlow { -FFOrdered const &ff_ordered(TensorDims const &); +FFOrdered const &ff_ordered(TensorDims const &); -size_t num_dims(TensorDims const &); -size_t dim_at_idx(TensorDims const &, relative_ff_dim_t); -size_t &dim_at_idx(TensorDims &, relative_ff_dim_t); +nonnegative_int num_dims(TensorDims const &); +nonnegative_int dim_at_idx(TensorDims const &, relative_ff_dim_t); +nonnegative_int &dim_at_idx(TensorDims &, relative_ff_dim_t); bool tensor_dims_is_broadcastable_to(TensorDims const &curr, TensorDims const &goal); diff --git a/lib/op-attrs/include/op-attrs/tensor_dims.struct.toml b/lib/op-attrs/include/op-attrs/tensor_dims.struct.toml index b262dd32b6..e86b866fd6 100644 --- a/lib/op-attrs/include/op-attrs/tensor_dims.struct.toml +++ b/lib/op-attrs/include/op-attrs/tensor_dims.struct.toml @@ -8,10 +8,12 @@ features = [ "rapidcheck", "fmt", ] + includes = [ "op-attrs/dim_ordered/dim_ordered.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] name = "ff_ordered" -type = "::FlexFlow::FFOrdered" +type = "::FlexFlow::FFOrdered<::FlexFlow::nonnegative_int>" diff --git a/lib/op-attrs/include/op-attrs/tensor_shape.h b/lib/op-attrs/include/op-attrs/tensor_shape.h index b8733cddbe..15958a1daf 100644 --- a/lib/op-attrs/include/op-attrs/tensor_shape.h +++ b/lib/op-attrs/include/op-attrs/tensor_shape.h @@ -5,11 +5,11 @@ namespace FlexFlow { -size_t num_dims(TensorShape const &); -size_t dim_at_idx(TensorShape const &, relative_ff_dim_t); -size_t &dim_at_idx(TensorShape &, relative_ff_dim_t); -size_t get_num_elements(TensorShape const &); -size_t get_size_in_bytes(TensorShape const &); +nonnegative_int num_dims(TensorShape const &); +nonnegative_int dim_at_idx(TensorShape const &, relative_ff_dim_t); +nonnegative_int &dim_at_idx(TensorShape &, relative_ff_dim_t); +nonnegative_int get_num_elements(TensorShape const &); +nonnegative_int get_size_in_bytes(TensorShape const &); } // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/datatype.cc b/lib/op-attrs/src/op-attrs/datatype.cc index 3bee05c253..9bb3b34390 100644 --- a/lib/op-attrs/src/op-attrs/datatype.cc +++ b/lib/op-attrs/src/op-attrs/datatype.cc @@ -1,23 +1,24 @@ #include "op-attrs/datatype.h" #include "utils/containers/contains.h" #include "utils/exception.h" +#include "utils/nonnegative_int/nonnegative_int.h" namespace FlexFlow { -size_t size_of_datatype(DataType data_type) { +nonnegative_int size_of_datatype(DataType data_type) { switch (data_type) { case DataType::BOOL: - return sizeof(bool); + return nonnegative_int{sizeof(bool)}; case DataType::INT32: - return sizeof(int32_t); + return nonnegative_int{sizeof(int32_t)}; case DataType::INT64: - return sizeof(int64_t); + return nonnegative_int{sizeof(int64_t)}; case DataType::HALF: - return sizeof(float) / 2; + return nonnegative_int{sizeof(float)} / 2_n; case DataType::FLOAT: - return sizeof(float); + return nonnegative_int{sizeof(float)}; case DataType::DOUBLE: - return sizeof(double); + return nonnegative_int{sizeof(double)}; default: throw mk_runtime_error(fmt::format("Unknown DataType {}", data_type)); } diff --git a/lib/op-attrs/src/op-attrs/ff_dim_t.cc b/lib/op-attrs/src/op-attrs/ff_dim_t.cc index 0a99e39a91..44672fc391 100644 --- a/lib/op-attrs/src/op-attrs/ff_dim_t.cc +++ b/lib/op-attrs/src/op-attrs/ff_dim_t.cc @@ -2,7 +2,7 @@ namespace FlexFlow { relative_ff_dim_t relative_ff_dim_t_from_ff_dim_t(ff_dim_t ff_dim) { - return relative_ff_dim_t{ff_dim.value.get_value()}; + return relative_ff_dim_t{ff_dim.value.unwrap_nonnegative()}; } } // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/ops/attention.cc b/lib/op-attrs/src/op-attrs/ops/attention.cc index 57c7105534..10fbf412f7 100644 --- a/lib/op-attrs/src/op-attrs/ops/attention.cc +++ b/lib/op-attrs/src/op-attrs/ops/attention.cc @@ -16,79 +16,82 @@ namespace FlexFlow { /* return is_valid; */ /* } */ -int get_qProjSize(MultiHeadAttentionAttrs const &attrs) { +nonnegative_int get_qProjSize(MultiHeadAttentionAttrs const &attrs) { return attrs.kdim; } -int get_vProjSize(MultiHeadAttentionAttrs const &attrs) { +nonnegative_int get_vProjSize(MultiHeadAttentionAttrs const &attrs) { return attrs.vdim; } -int get_kProjSize(MultiHeadAttentionAttrs const &attrs) { +nonnegative_int get_kProjSize(MultiHeadAttentionAttrs const &attrs) { return attrs.kdim; } -int get_oProjSize(MultiHeadAttentionAttrs const &attrs) { +nonnegative_int get_oProjSize(MultiHeadAttentionAttrs const &attrs) { return attrs.embed_dim; } -int get_qSize(TensorShape const &query_shape) { +nonnegative_int get_qSize(TensorShape const &query_shape) { return dim_at_idx(query_shape, relative_ff_dim_t{0}); } -int get_kSize(TensorShape const &key_shape) { +nonnegative_int get_kSize(TensorShape const &key_shape) { return dim_at_idx(key_shape, relative_ff_dim_t{0}); } -int get_vSize(TensorShape const &value_shape) { +nonnegative_int get_vSize(TensorShape const &value_shape) { return dim_at_idx(value_shape, relative_ff_dim_t{0}); } -int get_qSize(MultiHeadAttentionParallelInputs const &inputs) { +nonnegative_int get_qSize(MultiHeadAttentionParallelInputs const &inputs) { return inputs.query_dim.size; } -int get_qSize(MultiHeadAttentionInputs const &inputs) { +nonnegative_int get_qSize(MultiHeadAttentionInputs const &inputs) { return inputs.query_size; } -int get_kSize(MultiHeadAttentionParallelInputs const &inputs) { +nonnegative_int get_kSize(MultiHeadAttentionParallelInputs const &inputs) { return inputs.key_dim.size; } -int get_kSize(MultiHeadAttentionInputs const &inputs) { +nonnegative_int get_kSize(MultiHeadAttentionInputs const &inputs) { return inputs.key_size; } -int get_vSize(MultiHeadAttentionParallelInputs const &inputs) { +nonnegative_int get_vSize(MultiHeadAttentionParallelInputs const &inputs) { return inputs.value_dim.size; } -int get_vSize(MultiHeadAttentionInputs const &inputs) { +nonnegative_int get_vSize(MultiHeadAttentionInputs const &inputs) { return inputs.value_size; } -int get_kvSeqLength(MultiHeadAttentionParallelInputs const &inputs) { +nonnegative_int + get_kvSeqLength(MultiHeadAttentionParallelInputs const &inputs) { return inputs.sequence_dim.size; } -int get_kvSeqLength(MultiHeadAttentionInputs const &inputs) { +nonnegative_int get_kvSeqLength(MultiHeadAttentionInputs const &inputs) { return inputs.sequence_length; } -int get_qoSeqLength(MultiHeadAttentionParallelInputs const &inputs) { +nonnegative_int + get_qoSeqLength(MultiHeadAttentionParallelInputs const &inputs) { return inputs.sequence_dim.size; // FIXME -- assumes only prefill } -int get_qoSeqLength(MultiHeadAttentionInputs const &inputs) { +nonnegative_int get_qoSeqLength(MultiHeadAttentionInputs const &inputs) { return inputs.sequence_length; // FIXME -- assumes only prefil } -int get_num_samples(MultiHeadAttentionParallelInputs const &inputs) { +nonnegative_int + get_num_samples(MultiHeadAttentionParallelInputs const &inputs) { return inputs.batch_dim.size; } -int get_num_samples(MultiHeadAttentionInputs const &inputs) { +nonnegative_int get_num_samples(MultiHeadAttentionInputs const &inputs) { return inputs.batch_size; } @@ -124,10 +127,10 @@ tl::expected MultiHeadAttentionInputs parsed = parse_result.value(); return TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ parsed.batch_size, parsed.sequence_length, - size_t_from_int(attrs.embed_dim), + attrs.embed_dim, }}, parsed.datatype, }; @@ -147,23 +150,23 @@ tl::expected MultiHeadAttentionInputs parsed = parse_result.value(); // W^Q_i in "Attention Is All You Need" top of page 5 - size_t qProjectWeightSize = parsed.query_size * attrs.kdim; + nonnegative_int qProjectWeightSize = parsed.query_size * attrs.kdim; // W^K_i in "Attention Is All You Need" top of page 5 (all i's put together) - size_t kProjectWeightSize = parsed.key_size * attrs.kdim; + nonnegative_int kProjectWeightSize = parsed.key_size * attrs.kdim; // W^V_i in "Attention Is All You Need" top of page 5 (all i's put together) - size_t vProjectWeightSize = parsed.value_size * attrs.vdim; + nonnegative_int vProjectWeightSize = parsed.value_size * attrs.vdim; // W^O in "Attention Is All You Need" top of page 5, with num_heads factored // out - size_t outWeightSize = attrs.vdim * attrs.embed_dim; + nonnegative_int outWeightSize = attrs.vdim * attrs.embed_dim; return TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ (qProjectWeightSize + kProjectWeightSize + vProjectWeightSize + outWeightSize), - size_t_from_int(attrs.num_heads), + attrs.num_heads, }}, parsed.datatype, }; @@ -184,8 +187,8 @@ tl::expected }); return TensorShape{ - TensorDims{FFOrdered{ - size_t_from_int(attrs.kdim + attrs.kdim + attrs.vdim), + TensorDims{FFOrdered{ + attrs.kdim + attrs.kdim + attrs.vdim, }}, parsed.datatype, }; @@ -206,8 +209,8 @@ tl::expected }); return TensorShape{ - TensorDims{FFOrdered{ - size_t_from_int(attrs.embed_dim), + TensorDims{FFOrdered{ + attrs.embed_dim, }}, parsed.datatype, }; @@ -235,14 +238,14 @@ tl::expected } TensorShape unpar_shape = result_unpar_get_shape.value(); - int joined_dim_degree = 1; - int head_dim_degree = parsed.discard_copy_degree.value; + nonnegative_int joined_dim_degree = 1_n; + nonnegative_int head_dim_degree = parsed.discard_copy_degree.value; return lift_to_parallel_with_degrees( unpar_shape, - SumDegree{1}, + SumDegree{1_n}, DiscardCopyDegree{parsed.batch_dim.degree}, - FFOrdered{joined_dim_degree, head_dim_degree}); + FFOrdered{joined_dim_degree, head_dim_degree}); } tl::expected @@ -273,10 +276,10 @@ tl::expected result_unpar.value(); }); - SumDegree sum_degree = SumDegree{1}; + SumDegree sum_degree = SumDegree{1_n}; DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{ parsed.batch_dim.degree * parsed.discard_copy_degree.value}; - FFOrdered shard_degrees = FFOrdered{1}; + FFOrdered shard_degrees = FFOrdered{1_n}; return lift_to_parallel_with_degrees( unpar_shape, sum_degree, discard_copy_degree, shard_degrees); } @@ -309,10 +312,10 @@ tl::expected result_unpar.value(); }); - SumDegree sum_degree = SumDegree{1}; + SumDegree sum_degree = SumDegree{1_n}; DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{ parsed.batch_dim.degree * parsed.discard_copy_degree.value}; - FFOrdered shard_degrees = FFOrdered{1}; + FFOrdered shard_degrees = FFOrdered{1_n}; return lift_to_parallel_with_degrees( unpar_shape, sum_degree, discard_copy_degree, shard_degrees); } @@ -339,402 +342,25 @@ tl::expected } TensorShape unpar_shape = result_unpar_get_shape.value(); - int sum_degree = parsed.discard_copy_degree.value; - int discard_copy_degree = 1; - int batch_degree = parsed.batch_dim.degree; - int seq_len_degree = 1; - int out_dim_degree = 1; + nonnegative_int sum_degree = parsed.discard_copy_degree.value; + nonnegative_int discard_copy_degree = 1_n; + nonnegative_int batch_degree = parsed.batch_dim.degree; + nonnegative_int seq_len_degree = 1_n; + nonnegative_int out_dim_degree = 1_n; return lift_to_parallel_with_degrees( unpar_shape, SumDegree{sum_degree}, DiscardCopyDegree{discard_copy_degree}, - FFOrdered{batch_degree, seq_len_degree, out_dim_degree}); + FFOrdered{batch_degree, seq_len_degree, out_dim_degree}); } -int get_oSize(ParallelTensorShape const &) { +nonnegative_int get_oSize(ParallelTensorShape const &) { NOT_IMPLEMENTED(); } -int get_oSize(TensorShape const &) { +nonnegative_int get_oSize(TensorShape const &) { NOT_IMPLEMENTED(); } } // namespace FlexFlow - -// Tensor FFModel::multihead_attention(const Tensor query, -// const Tensor key, -// const Tensor value, -// int embed_dim, -// int num_heads, -// int kdim, -// int vdim, -// float dropout, -// bool bias, -// bool add_bias_kv, -// bool add_zero_attn, -// Initializer *kernel_initializer, -// char const *name) { -// Layer *li = new Layer(this, -// OP_MULTIHEAD_ATTENTION, -// DT_FLOAT, -// name, -// 3 /*inputs*/, -// 1 /*weights*/, -// 1 /*outputs*/, -// query, -// key, -// value); -// { -// int numdims = query->num_dims; -// int dims[MAX_TENSOR_DIM]; -// for (int i = 0; i < numdims; i++) { -// dims[i] = query->dims[i]; -// } -// dims[0] = embed_dim; -// li->outputs[0] = create_tensor_legion_ordering( -// numdims, dims, DT_FLOAT, li, 0, true /*create_grad*/); -// } -// { -// // Compute weight size -// int qProjSize = kdim, kProjSize = kdim, vProjSize = kdim, -// oProjSize = embed_dim; -// int qSize = query->dims[0], kSize = key->dims[0], vSize = value->dims[0]; -// int qParas = qProjSize * qSize; -// int kParas = kProjSize * kSize; -// int vParas = vProjSize * vSize; -// int oParas = oProjSize * (vProjSize > 0 ? vProjSize : vSize); -// int dims[2] = {qParas + kParas + vParas + oParas, num_heads}; -// li->weights[0] = create_weight_legion_ordering(2, -// dims, -// DT_FLOAT, -// li, -// true /*create_grad*/, -// kernel_initializer, -// CHOSEN_SYNC_TYPE); -// } -// li->data_type = DT_FLOAT; -// li->add_int_property("embed_dim", embed_dim); -// li->add_int_property("num_heads", num_heads); -// li->add_int_property("kdim", kdim); -// li->add_int_property("vdim", vdim); -// li->add_int_property("bias", bias); -// li->add_int_property("add_bias_kv", add_bias_kv); -// li->add_int_property("add_zero_attn", add_zero_attn); -// li->add_float_property("dropout", dropout); -// layers.push_back(li); -// return li->outputs[0]; -// } - -// MultiHeadAttention::MultiHeadAttention(FFModel &model, -// LayerID const &_layer_guid, -// const ParallelTensor _query, -// const ParallelTensor _key, -// const ParallelTensor _value, -// int _embed_dim, -// int _num_heads, -// int _kdim, -// int _vdim, -// float _dropout, -// bool _bias, -// bool _add_bias_kv, -// bool _add_zero_attn, -// bool allocate_weights, -// char const *name) -// // Initializer* _bias_initializer) -// : Op(model, -// OP_MULTIHEAD_ATTENTION, -// DT_FLOAT, -// name, -// 3 /*inputs*/, -// 1 /*weights*/, -// 1 /*outputs*/, -// _query, -// _key, -// _value), -// attrs(_embed_dim, -// _num_heads, -// _kdim, -// _vdim, -// _dropout, -// _bias, -// _add_bias_kv, -// _add_zero_attn), -// qSize(_query->dims[0].size), kSize(_key->dims[0].size), -// vSize(_value->dims[0].size), qProjSize(_kdim), -// qoSeqLength(_query->dims[1].size), kvSeqLength(_key->dims[1].size) { -// // overwrite layer_guid -// layer_guid = _layer_guid; - -// // assert key and value have the same sequence length -// assert(_key->dims[1] == _value->dims[1]); -// numOutputs = 1; -// int numdim = _query->num_dims; -// ParallelDim dims[MAX_TENSOR_DIM]; -// for (int i = 0; i < numdim; i++) { -// dims[i] = _query->dims[i]; -// } -// dims[0].size = _embed_dim; -// // Currently require no parallelism along this dim -// assert(dims[0].degree == 1); -// if (allocate_weights) { -// // Create weight tensor -// int num_dims = inputs[0]->num_dims; -// // Compute weight size -// int qParas = this->qProjSize * this->qSize; -// int kParas = kProjSize(attrs) * this->kSize; -// int vParas = vProjSize(attrs) * this->vSize; -// int oParas = oProjSize(attrs) * -// (vProjSize(attrs) > 0 ? vProjSize(attrs) : this->vSize); -// ParallelDim dims[3]; -// dims[0] = inputs[0]->dims[num_dims - 2]; -// dims[0].size = dims[0].degree; -// dims[1] = inputs[0]->dims[num_dims - 1]; -// dims[1].size = this->attrs.num_heads; -// dims[2].size = qParas + kParas + vParas + oParas; -// dims[2].degree = 1; -// dims[2].parallel_idx = -1; -// int seed = std::rand(); -// Initializer *initializer = new GlorotUniform(seed); -// #ifdef USE_NCCL -// ParameterSyncType comm_type = ParameterSyncType::NCCL; -// #else -// ParameterSyncType comm_type = ParameterSyncType::PS; -// #endif -// weights[0] = model.create_parallel_weight<3>(dims, -// DT_FLOAT, -// NULL /*owner_op*/, -// true /*create_grad*/, -// initializer, -// comm_type); -// } - -// outputs[0] = model.create_parallel_tensor_legion_ordering( -// _query->num_dims, dims, DT_FLOAT, this); -// /* for (int i = 0; i < numdim; i++) { */ -// /* register_output_input_parallel_dims(outputs[0], i, inputs[0], i); */ -// /* } */ -// /* // Check correctness */ -// /* assert(check_output_input_weight_parallel_dims()); */ -// } - -// MultiHeadAttention::MultiHeadAttention(FFModel &model, -// const ParallelTensor _query, -// const ParallelTensor _key, -// const ParallelTensor _value, -// const ParallelTensor _weight, -// int _embed_dim, -// int _num_heads, -// int _kdim, -// int _vdim, -// float _dropout, -// bool _bias, -// bool _add_bias_kv, -// bool _add_zero_attn, -// bool allocate_weights, -// char const *name) -// // Initializer* _bias_initializer) -// : Op(model, -// OP_MULTIHEAD_ATTENTION, -// DT_FLOAT, -// name, -// 3 /*inputs*/, -// 1 /*weights*/, -// 1 /*outputs*/, -// _query, -// _key, -// _value, -// _weight), -// attrs(_embed_dim, -// _num_heads, -// _kdim, -// _vdim, -// _dropout, -// _bias, -// _add_bias_kv, -// _add_zero_attn), -// qSize(_query->dims[0].size), kSize(_key->dims[0].size), -// vSize(_value->dims[0].size), qProjSize(_kdim), -// qoSeqLength(_query->dims[1].size), kvSeqLength(_key->dims[1].size) -// // bias_initializer(_bias_initializer) -// { -// // assert key and value have the same sequence length -// assert(_key->dims[1] == _value->dims[1]); -// numOutputs = 1; -// int numdim = _query->num_dims; -// ParallelDim dims[MAX_TENSOR_DIM]; -// for (int i = 0; i < numdim; i++) { -// dims[i] = _query->dims[i]; -// } -// // assert key and value have the same sequence length -// assert(_key->dims[1] == _value->dims[1]); -// dims[0].size = _embed_dim; -// // Currently require no parallelism along this dim -// assert(dims[0].degree == 1); -// if (allocate_weights) { -// // Create weight tensor -// int num_dims = inputs[0]->num_dims; -// // Compute weight size -// int qParas = this->qProjSize * this->qSize; -// int kParas = kProjSize(attrs) * this->kSize; -// int vParas = vProjSize(attrs) * this->vSize; -// int oParas = oProjSize(attrs) * -// (vProjSize(attrs) > 0 ? vProjSize(attrs) : this->vSize); -// ParallelDim dims[3]; -// dims[0] = inputs[0]->dims[num_dims - 2]; -// dims[0].size = dims[0].degree; -// dims[1] = inputs[0]->dims[num_dims - 1]; -// dims[1].size = this->attrs.num_heads; -// dims[2].size = qParas + kParas + vParas + oParas; -// int seed = std::rand(); -// Initializer *initializer = new GlorotUniform(seed); -// #ifdef USE_NCCL -// ParameterSyncType comm_type = ParameterSyncType::NCCL; -// #else -// ParameterSyncType comm_type = ParameterSyncType::PS; -// #endif -// weights[0] = model.create_parallel_weight<3>(dims, -// DT_FLOAT, -// NULL /*owner_op*/, -// true /*create_grad*/, -// initializer, -// comm_type); -// } -// outputs[0] = model.create_parallel_tensor_legion_ordering( -// _query->num_dims, dims, DT_FLOAT, this); - -// /* for (int i = 0; i < numdim; i++) { */ -// /* register_output_input_parallel_dims(outputs[0], i, inputs[0], i); */ -// /* } */ -// /* register_output_weight_parallel_dims(outputs[0], numdim-1, _weight, 1); -// */ -// /* register_output_weight_parallel_dims(outputs[0], numdim-2, _weight, 2); -// */ -// // Check correctness -// /* assert(check_output_input_weight_parallel_dims()); */ -// } - -// void MultiHeadAttention::forward(FFModel const &ff) { -// ArgumentMap argmap; -// Context ctx = ff.config.lg_ctx; -// Runtime *runtime = ff.config.lg_hlr; -// set_argumentmap_for_forward(ff, argmap); -// int idx = 0; -// IndexLauncher launcher(ATTENTION_FWD_TASK_ID, -// parallel_is, -// TaskArgument(NULL, 0), -// argmap, -// Predicate::TRUE_PRED, -// false /*must*/, -// 0 /*mapper_id*/, -// outputs[0]->machine_view.hash()); -// launcher.add_region_requirement(RegionRequirement(inputs[0]->part, -// 0 /*projection id*/, -// READ_ONLY, -// EXCLUSIVE, -// inputs[0]->region)); -// launcher.add_field(idx++, FID_DATA); -// launcher.add_region_requirement(RegionRequirement(inputs[1]->part, -// 0 /*projection id*/, -// READ_ONLY, -// EXCLUSIVE, -// inputs[1]->region)); -// launcher.add_field(idx++, FID_DATA); -// launcher.add_region_requirement(RegionRequirement(inputs[2]->part, -// 0 /*projection id*/, -// READ_ONLY, -// EXCLUSIVE, -// inputs[2]->region)); -// launcher.add_field(idx++, FID_DATA); -// launcher.add_region_requirement(RegionRequirement(weights[0]->part, -// 0 /*projection id*/, -// READ_ONLY, -// EXCLUSIVE, -// weights[0]->region)); -// launcher.add_field(idx++, FID_DATA); -// launcher.add_region_requirement(RegionRequirement(outputs[0]->part, -// 0 /*projection id*/, -// WRITE_ONLY, -// EXCLUSIVE, -// outputs[0]->region)); -// launcher.add_field(4, FID_DATA); -// runtime->execute_index_space(ctx, launcher); -// } - -// void MultiHeadAttention::backward(FFModel const &ff) { -// ArgumentMap argmap; -// Context ctx = ff.config.lg_ctx; -// Runtime *runtime = ff.config.lg_hlr; -// set_argumentmap_for_backward(ff, argmap); -// IndexLauncher launcher(ATTENTION_BWD_TASK_ID, -// parallel_is, -// TaskArgument(NULL, 0), -// argmap, -// Predicate::TRUE_PRED, -// false /*must*/, -// 0 /*mapper_id*/, -// outputs[0]->machine_view.hash()); -// launcher.add_region_requirement(RegionRequirement(inputs[0]->part, -// 0 /*projection id*/, -// READ_ONLY, -// EXCLUSIVE, -// inputs[0]->region)); -// launcher.add_field(0, FID_DATA); -// launcher.add_region_requirement(RegionRequirement(inputs[1]->part, -// 0 /*projection id*/, -// READ_ONLY, -// EXCLUSIVE, -// inputs[1]->region)); -// launcher.add_field(1, FID_DATA); -// launcher.add_region_requirement(RegionRequirement(inputs[2]->part, -// 0 /*projection id*/, -// READ_ONLY, -// EXCLUSIVE, -// inputs[2]->region)); -// launcher.add_field(2, FID_DATA); -// launcher.add_region_requirement(RegionRequirement(weights[0]->part, -// 0 /*projection id*/, -// READ_ONLY, -// EXCLUSIVE, -// weights[0]->region)); -// launcher.add_field(3, FID_DATA); -// launcher.add_region_requirement(RegionRequirement(outputs[0]->part_grad, -// 0 /*projection id*/, -// READ_ONLY, -// EXCLUSIVE, -// outputs[0]->region_grad)); -// launcher.add_field(4, FID_DATA); -// launcher.add_region_requirement(RegionRequirement(weights[0]->part_grad, -// 0 /*projection id*/, -// READ_WRITE, -// EXCLUSIVE, -// weights[0]->region_grad)); -// launcher.add_field(5, FID_DATA); -// launcher.add_region_requirement(RegionRequirement(inputs[0]->part_grad, -// 0 /*projection id*/, -// READ_WRITE, -// EXCLUSIVE, -// inputs[0]->region_grad)); -// launcher.add_field(6, FID_DATA); -// int num_regions = 7; -// if (inputs[1]->region != inputs[0]->region) { -// // when key != query -// launcher.add_region_requirement(RegionRequirement(inputs[1]->part_grad, -// 0 /*projection id*/, -// READ_WRITE, -// EXCLUSIVE, -// inputs[1]->region_grad)); -// launcher.add_field(num_regions++, FID_DATA); -// } -// if ((inputs[2]->region != inputs[0]->region) && -// (inputs[2]->region != inputs[1]->region)) { -// // when value != key and value != query -// launcher.add_region_requirement(RegionRequirement(inputs[2]->part_grad, -// 0 /*projection id*/, -// READ_WRITE, -// EXCLUSIVE, -// inputs[2]->region_grad)); -// launcher.add_field(num_regions++, FID_DATA); -// } -// runtime->execute_index_space(ctx, launcher); -// } diff --git a/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_inputs.cc b/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_inputs.cc index 97544d1750..b9049bf461 100644 --- a/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_inputs.cc +++ b/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_inputs.cc @@ -31,9 +31,9 @@ tl::expected 3)); } - size_t seq_len_q = dim_at_idx(input_q, relative_ff_dim_t{-2}); - size_t seq_len_k = dim_at_idx(input_k, relative_ff_dim_t{-2}); - size_t seq_len_v = dim_at_idx(input_v, relative_ff_dim_t{-2}); + nonnegative_int seq_len_q = dim_at_idx(input_q, relative_ff_dim_t{-2}); + nonnegative_int seq_len_k = dim_at_idx(input_k, relative_ff_dim_t{-2}); + nonnegative_int seq_len_v = dim_at_idx(input_v, relative_ff_dim_t{-2}); if (!all_same(seq_len_q, seq_len_k, seq_len_v)) { return tl::unexpected(fmt::format( @@ -43,9 +43,9 @@ tl::expected seq_len_v)); } - size_t batch_size_q = dim_at_idx(input_q, relative_ff_dim_t{-3}); - size_t batch_size_k = dim_at_idx(input_k, relative_ff_dim_t{-3}); - size_t batch_size_v = dim_at_idx(input_v, relative_ff_dim_t{-3}); + nonnegative_int batch_size_q = dim_at_idx(input_q, relative_ff_dim_t{-3}); + nonnegative_int batch_size_k = dim_at_idx(input_k, relative_ff_dim_t{-3}); + nonnegative_int batch_size_v = dim_at_idx(input_v, relative_ff_dim_t{-3}); if (!all_same(batch_size_q, batch_size_k, batch_size_v)) { return tl::unexpected(fmt::format( @@ -63,9 +63,9 @@ tl::expected input_v.data_type)); } - size_t q_size = dim_at_idx(input_q, relative_ff_dim_t{-1}); - size_t k_size = dim_at_idx(input_k, relative_ff_dim_t{-1}); - size_t v_size = dim_at_idx(input_v, relative_ff_dim_t{-1}); + nonnegative_int q_size = dim_at_idx(input_q, relative_ff_dim_t{-1}); + nonnegative_int k_size = dim_at_idx(input_k, relative_ff_dim_t{-1}); + nonnegative_int v_size = dim_at_idx(input_v, relative_ff_dim_t{-1}); return MultiHeadAttentionInputs{ batch_size_q, diff --git a/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.cc b/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.cc index 3bd0825555..d69b62b759 100644 --- a/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.cc +++ b/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.cc @@ -107,9 +107,9 @@ tl::expected value_dim.degree)); } - int discard_copy_q = get_discard_copy_degree(input_q); - int discard_copy_k = get_discard_copy_degree(input_k); - int discard_copy_v = get_discard_copy_degree(input_v); + nonnegative_int discard_copy_q = get_discard_copy_degree(input_q); + nonnegative_int discard_copy_k = get_discard_copy_degree(input_k); + nonnegative_int discard_copy_v = get_discard_copy_degree(input_v); if (!all_same(discard_copy_q, discard_copy_k, discard_copy_v)) { return tl::unexpected(fmt::format("Q, K, V disagree on the discard-copy " diff --git a/lib/op-attrs/src/op-attrs/ops/batch_matmul.cc b/lib/op-attrs/src/op-attrs/ops/batch_matmul.cc index 71118db7a6..d32ae33d14 100644 --- a/lib/op-attrs/src/op-attrs/ops/batch_matmul.cc +++ b/lib/op-attrs/src/op-attrs/ops/batch_matmul.cc @@ -57,13 +57,13 @@ tl::expected input_rhs.data_type)); } - size_t lhs_b = dim_at_idx(input_lhs, relative_ff_dim_t{0}); - size_t n = dim_at_idx(input_lhs, relative_ff_dim_t{1}); - size_t lhs_m = dim_at_idx(input_lhs, relative_ff_dim_t{2}); + nonnegative_int lhs_b = dim_at_idx(input_lhs, relative_ff_dim_t{0}); + nonnegative_int n = dim_at_idx(input_lhs, relative_ff_dim_t{1}); + nonnegative_int lhs_m = dim_at_idx(input_lhs, relative_ff_dim_t{2}); - size_t rhs_b = dim_at_idx(input_rhs, relative_ff_dim_t{0}); - size_t rhs_m = dim_at_idx(input_rhs, relative_ff_dim_t{1}); - size_t p = dim_at_idx(input_rhs, relative_ff_dim_t{2}); + nonnegative_int rhs_b = dim_at_idx(input_rhs, relative_ff_dim_t{0}); + nonnegative_int rhs_m = dim_at_idx(input_rhs, relative_ff_dim_t{1}); + nonnegative_int p = dim_at_idx(input_rhs, relative_ff_dim_t{2}); if (lhs_b != rhs_b) { return tl::unexpected( @@ -76,7 +76,7 @@ tl::expected return TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ lhs_b, n, p, @@ -151,9 +151,10 @@ tl::expected ShardParallelDim output_n = n; ShardParallelDim output_p = p; - int output_discard_copy_degree = 1; - int output_sum_degree = get_total_parallel_degree(input_lhs) / - (output_b.degree * output_n.degree * output_p.degree); + nonnegative_int output_discard_copy_degree = 1_n; + nonnegative_int output_sum_degree = + get_total_parallel_degree(input_lhs) / + (output_b.degree * output_n.degree * output_p.degree); ParallelTensorShape result = ParallelTensorShape{ ParallelTensorDims{ diff --git a/lib/op-attrs/src/op-attrs/ops/batch_norm.cc b/lib/op-attrs/src/op-attrs/ops/batch_norm.cc index 472e5f1a25..ed58fe5189 100644 --- a/lib/op-attrs/src/op-attrs/ops/batch_norm.cc +++ b/lib/op-attrs/src/op-attrs/ops/batch_norm.cc @@ -67,10 +67,10 @@ tl::expected return tl::unexpected("No gamma weights exist for attrs.affine = false"); } - size_t num_channels = dim_at_idx(input_shape, relative_ff_dim_t{1}); + nonnegative_int num_channels = dim_at_idx(input_shape, relative_ff_dim_t{1}); return TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ num_channels, }}, DataType::FLOAT, @@ -97,26 +97,23 @@ static std::optional input_degrees); } - if (input_degrees.sum_degree != SumDegree{1}) { + if (input_degrees.sum_degree != SumDegree{1_n}) { return fmt::format("Expected sum degree 1, but receieved sum degree {}", input_degrees.sum_degree); } - if (input_degrees.discard_copy_degree != DiscardCopyDegree{1}) { + if (input_degrees.discard_copy_degree != DiscardCopyDegree{1_n}) { return fmt::format( "Expected discard copy degree 1, but receieved discard copy degree {}", input_degrees.discard_copy_degree); } - FFOrdered non_channel_degrees = - concat(slice(input_degrees.shard_degrees, - ff_dim_t{nonnegative_int{0}}, - ff_dim_t{nonnegative_int{1}}), - slice(input_degrees.shard_degrees, - ff_dim_t{nonnegative_int{2}}, - std::nullopt)); + FFOrdered non_channel_degrees = + concat(slice(input_degrees.shard_degrees, ff_dim_t{0_n}, ff_dim_t{1_n}), + slice(input_degrees.shard_degrees, ff_dim_t{2_n}, std::nullopt)); - if (any_of(non_channel_degrees, [](int degree) { return degree != 1; })) { + if (any_of(non_channel_degrees, + [](nonnegative_int degree) { return degree != 1_n; })) { return fmt::format("Expected parallel degree of all non-channel dimensions " "to be 1, but received input with degrees {}", input_degrees); @@ -159,9 +156,9 @@ tl::expected relative_ff_dim_t channel_dim = relative_ff_dim_t{1}; return ParallelTensorDimDegrees{ - SumDegree{1}, - DiscardCopyDegree{1}, - FFOrdered{input_degrees.shard_degrees.at(channel_dim)}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, + FFOrdered{input_degrees.shard_degrees.at(channel_dim)}, }; } diff --git a/lib/op-attrs/src/op-attrs/ops/concat.cc b/lib/op-attrs/src/op-attrs/ops/concat.cc index 3019151236..fc42241ef2 100644 --- a/lib/op-attrs/src/op-attrs/ops/concat.cc +++ b/lib/op-attrs/src/op-attrs/ops/concat.cc @@ -17,7 +17,8 @@ tl::expected get_output_shape(ConcatAttrs const &attrs, std::vector const &inputs) { auto get_non_axis_dims = [&](TensorShape const &s) { - std::map dim_sizes = enumerate(ff_ordered(s.dims)); + std::map dim_sizes = + enumerate(ff_ordered(s.dims)); dim_sizes.erase(attrs.axis); return dim_sizes; }; @@ -40,8 +41,8 @@ tl::expected inputs)); } - std::map non_axis_dims = ({ - tl::expected, std::string> returned = + std::map non_axis_dims = ({ + tl::expected, std::string> returned = require_all_same1(transform(inputs, get_non_axis_dims)); if (!returned.has_value()) { return tl::unexpected(returned.error()); @@ -49,12 +50,12 @@ tl::expected returned.value(); }); - std::vector axis_dim_sizes = + std::vector axis_dim_sizes = transform(inputs, [&](TensorShape const &s) { return dim_at_idx(s, relative_ff_dim_t_from_ff_dim_t(attrs.axis)); }); - size_t output_axis_dim_size = sum(axis_dim_sizes); + nonnegative_int output_axis_dim_size = sum(axis_dim_sizes); non_axis_dims.insert({attrs.axis, output_axis_dim_size}); @@ -88,7 +89,7 @@ tl::expected }); SumDegree sum_degree = ({ - tl::expected returned = + tl::expected returned = require_all_same1(transform(inputs, get_sum_degree)); if (!returned.has_value()) { return tl::unexpected(returned.error()); @@ -97,7 +98,7 @@ tl::expected }); DiscardCopyDegree discard_copy_degree = ({ - tl::expected returned = + tl::expected returned = require_all_same1(transform(inputs, get_discard_copy_degree)); if (!returned.has_value()) { return tl::unexpected(returned.error()); diff --git a/lib/op-attrs/src/op-attrs/ops/conv_2d.cc b/lib/op-attrs/src/op-attrs/ops/conv_2d.cc index eac756cc15..d1ba536d24 100644 --- a/lib/op-attrs/src/op-attrs/ops/conv_2d.cc +++ b/lib/op-attrs/src/op-attrs/ops/conv_2d.cc @@ -25,11 +25,11 @@ TensorShape get_kernel_shape(Conv2DAttrs const &attrs, Conv2DInputShape input = parse_input_shape(raw_input_shape); return TensorShape{ - TensorDims{FFOrdered{ - size_t_from_int(attrs.out_channels), + TensorDims{FFOrdered{ + attrs.out_channels, input.num_channels, - size_t_from_int(attrs.kernel_h), - size_t_from_int(attrs.kernel_w), + attrs.kernel_h, + attrs.kernel_w, }}, input.datatype, }; @@ -42,29 +42,44 @@ TensorShape get_bias_shape(Conv2DAttrs const &attrs, return TensorShape{ TensorDims{ - FFOrdered{size_t_from_int(attrs.out_channels)}, + FFOrdered{attrs.out_channels}, }, input.datatype, }; } +static nonnegative_int calculate_output_size(nonnegative_int input_size, + nonnegative_int padding_size, + nonnegative_int kernel_size, + nonnegative_int stride) { + int input_size_raw = input_size.unwrap_nonnegative(); + int padding_raw = padding_size.unwrap_nonnegative(); + int kernel_size_raw = kernel_size.unwrap_nonnegative(); + int stride_raw = stride.unwrap_nonnegative(); + + return nonnegative_int{ + (input_size_raw + (2 * padding_raw) - kernel_size_raw) / stride_raw + 1}; +} + TensorShape get_output_shape(Conv2DAttrs const &attrs, TensorShape const &raw_input_shape) { assert(attrs.groups == 1); // TODO(@lockshaw): currently not supported Conv2DInputShape input = parse_input_shape(raw_input_shape); - size_t out_height = - (input.height + (2 * attrs.padding_h) - attrs.kernel_h) / attrs.stride_h + - 1; - size_t out_width = - (input.width + (2 * attrs.padding_w) - attrs.kernel_w) / attrs.stride_w + - 1; - - assert(attrs.out_channels > 0); - - return TensorShape{TensorDims{FFOrdered{ + nonnegative_int out_height = + calculate_output_size(/*input_size=*/input.height, + /*padding_size=*/attrs.padding_h, + /*kernel_size=*/attrs.kernel_h, + /*stride_size=*/attrs.stride_h); + nonnegative_int out_width = + calculate_output_size(/*input_size=*/input.width, + /*padding_size=*/attrs.padding_w, + /*kernel_size=*/attrs.kernel_w, + /*stride_size=*/attrs.stride_w); + + return TensorShape{TensorDims{FFOrdered{ input.num_samples, - size_t_from_int(attrs.out_channels), + attrs.out_channels, out_height, out_width, }}, @@ -82,14 +97,14 @@ ParallelTensorShape get_kernel_shape(Conv2DAttrs const &attrs, assert(parsed.height_dim.degree == 1); assert(parsed.width_dim.degree == 1); - SumDegree sum_degree = SumDegree{1}; + SumDegree sum_degree = SumDegree{1_n}; DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{parsed.sample_dim.degree * parsed.sum_reduction_degree}; - FFOrdered shard_degrees = { + FFOrdered shard_degrees = { parsed.discard_copy_reduction_degree, parsed.channel_dim.degree, - 1, - 1, + 1_n, + 1_n, }; return lift_to_parallel_with_degrees( @@ -109,7 +124,7 @@ ParallelTensorShape get_bias_shape(Conv2DAttrs const &attrs, DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{parsed.height_dim.degree * parsed.width_dim.degree * parsed.sample_dim.degree}; - FFOrdered shard_degrees = { + FFOrdered shard_degrees = { parsed.discard_copy_reduction_degree, }; @@ -130,12 +145,12 @@ ParallelTensorShape get_output_shape(Conv2DAttrs const &attrs, SumDegree sum_degree = SumDegree{parsed.sum_reduction_degree * parsed.channel_dim.degree}; - DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{1}; - FFOrdered shard_degrees = { + DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{1_n}; + FFOrdered shard_degrees = { parsed.sample_dim.degree, parsed.discard_copy_reduction_degree, - 1, - 1, + 1_n, + 1_n, }; return lift_to_parallel_with_degrees( diff --git a/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_input_shape.cc b/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_input_shape.cc index aad067feb2..1491410491 100644 --- a/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_input_shape.cc +++ b/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_input_shape.cc @@ -6,10 +6,10 @@ namespace FlexFlow { Conv2DInputShape parse_input_shape(TensorShape const &input) { assert(num_dims(input) == 4); - size_t num_samples = dim_at_idx(input, relative_ff_dim_t{0}); - size_t in_channels = dim_at_idx(input, relative_ff_dim_t{1}); - size_t in_height = dim_at_idx(input, relative_ff_dim_t{2}); - size_t in_width = dim_at_idx(input, relative_ff_dim_t{3}); + nonnegative_int num_samples = dim_at_idx(input, relative_ff_dim_t{0}); + nonnegative_int in_channels = dim_at_idx(input, relative_ff_dim_t{1}); + nonnegative_int in_height = dim_at_idx(input, relative_ff_dim_t{2}); + nonnegative_int in_width = dim_at_idx(input, relative_ff_dim_t{3}); return Conv2DInputShape{ num_samples, diff --git a/lib/op-attrs/src/op-attrs/ops/embedding.cc b/lib/op-attrs/src/op-attrs/ops/embedding.cc index fe557695da..29bd70be2f 100644 --- a/lib/op-attrs/src/op-attrs/ops/embedding.cc +++ b/lib/op-attrs/src/op-attrs/ops/embedding.cc @@ -50,9 +50,9 @@ tl::expected return TensorShape{ TensorDims{ - FFOrdered{ - size_t_from_int(attrs.num_entries), - size_t_from_int(attrs.out_channels), + FFOrdered{ + attrs.num_entries, + attrs.out_channels, }, }, attrs.data_type, @@ -74,8 +74,8 @@ tl::expected SumDegree sum_degree = SumDegree{shard_dim_at_idx(input, relative_ff_dim_t{-1}).degree}; - DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{1}; - FFOrdered shard_degrees = + DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{1_n}; + FFOrdered shard_degrees = transform(input.dims.shard_dims, [](ShardParallelDim const &d) { return d.degree; }); shard_degrees.at(relative_ff_dim_t{-1}) = get_discard_copy_degree(input); @@ -96,13 +96,13 @@ tl::expected result_unpar.value(); }); - SumDegree sum_degree = SumDegree{1}; - DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{product( - transform(ff_ordered_shard_dims(input.dims), - [](ShardParallelDim const &d) -> int { return d.degree; }))}; - int entry_dim_degree = 1; - int out_channel_degree = get_discard_copy_degree(input); - FFOrdered shard_degrees = { + SumDegree sum_degree = SumDegree{1_n}; + DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{product(transform( + ff_ordered_shard_dims(input.dims), + [](ShardParallelDim const &d) -> nonnegative_int { return d.degree; }))}; + nonnegative_int entry_dim_degree = 1_n; + nonnegative_int out_channel_degree = get_discard_copy_degree(input); + FFOrdered shard_degrees = { entry_dim_degree, out_channel_degree, }; diff --git a/lib/op-attrs/src/op-attrs/ops/flat.cc b/lib/op-attrs/src/op-attrs/ops/flat.cc index bc86102566..8ed12167b3 100644 --- a/lib/op-attrs/src/op-attrs/ops/flat.cc +++ b/lib/op-attrs/src/op-attrs/ops/flat.cc @@ -11,12 +11,11 @@ namespace FlexFlow { TensorShape get_output_shape(FlatAttrs const &attrs, TensorShape const &input_shape) { - FFOrdered leading_dims = slice(ff_ordered(input_shape.dims), - ff_dim_t{nonnegative_int{0}}, - attrs.start_dim); - FFOrdered flattened_dims = + FFOrdered leading_dims = + slice(ff_ordered(input_shape.dims), ff_dim_t{0_n}, attrs.start_dim); + FFOrdered flattened_dims = slice(ff_ordered(input_shape.dims), attrs.start_dim, attrs.end_dim); - FFOrdered trailing_dims = + FFOrdered trailing_dims = slice(ff_ordered(input_shape.dims), attrs.end_dim, std::nullopt); if (flattened_dims.empty()) { @@ -38,14 +37,15 @@ TensorShape get_output_shape(FlatAttrs const &attrs, tl::expected get_output_parallel_dim_degrees( FlatAttrs const &attrs, ParallelTensorDimDegrees const &input_degrees) { - FFOrdered flattened_dim_degrees = + FFOrdered flattened_dim_degrees = slice(input_degrees.shard_degrees, attrs.start_dim, attrs.end_dim); if (flattened_dim_degrees.empty()) { return input_degrees; } - if (any_of(flattened_dim_degrees, [](int degree) { return degree != 1; })) { + if (any_of(flattened_dim_degrees, + [](nonnegative_int degree) { return degree != 1; })) { return tl::unexpected( fmt::format("get_output_parallel_dim_degrees for {} expected all shard " "degrees of flattened dimensions to be 1, but received {}", @@ -58,9 +58,7 @@ tl::expected /*discard_copy_degree=*/input_degrees.discard_copy_degree, /*shard_degrees=*/ concat(std::vector{ - slice(input_degrees.shard_degrees, - ff_dim_t{nonnegative_int{0}}, - attrs.start_dim), + slice(input_degrees.shard_degrees, ff_dim_t{0_n}, attrs.start_dim), {product(flattened_dim_degrees)}, slice(input_degrees.shard_degrees, attrs.end_dim, std::nullopt), }), diff --git a/lib/op-attrs/src/op-attrs/ops/layer_norm.cc b/lib/op-attrs/src/op-attrs/ops/layer_norm.cc index 86426dd18f..2394579e53 100644 --- a/lib/op-attrs/src/op-attrs/ops/layer_norm.cc +++ b/lib/op-attrs/src/op-attrs/ops/layer_norm.cc @@ -71,7 +71,7 @@ tl::expected std::vector non_layer_norm_dim_idxs = filter( get_idxs(input_shape.dims.ff_ordered), [&](ff_dim_t const &dim_idx) { return !contains(attrs.axes, dim_idx); }); - std::vector raw_weight_dims = + std::vector raw_weight_dims = transform(non_layer_norm_dim_idxs, [&](ff_dim_t const &dim_idx) { return dim_at_idx(input_shape, relative_ff_dim_t_from_ff_dim_t(dim_idx)); @@ -174,8 +174,8 @@ tl::expected ParallelTensorDims{ ff_ordered_of(raw_weight_shard_dims), ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, diff --git a/lib/op-attrs/src/op-attrs/ops/linear.cc b/lib/op-attrs/src/op-attrs/ops/linear.cc index e00a47d490..0387c143d7 100644 --- a/lib/op-attrs/src/op-attrs/ops/linear.cc +++ b/lib/op-attrs/src/op-attrs/ops/linear.cc @@ -41,11 +41,11 @@ RecordFormatter as_dot(LinearAttrs const &attrs) { tl::expected get_projection_shape(LinearAttrs const &attrs, TensorShape const &input_shape) { - size_t in_channels = dim_at_idx(input_shape, relative_ff_dim_t{-1}); + nonnegative_int in_channels = dim_at_idx(input_shape, relative_ff_dim_t{-1}); return TensorShape{ TensorDims{ - FFOrdered{in_channels, size_t_from_int(attrs.out_channels)}, + FFOrdered{in_channels, attrs.out_channels}, }, input_shape.data_type, }; @@ -55,7 +55,7 @@ tl::expected get_bias_shape(LinearAttrs const &attrs, TensorShape const &input_shape) { return TensorShape{ TensorDims{ - FFOrdered{size_t_from_int(attrs.out_channels)}, + FFOrdered{attrs.out_channels}, }, input_shape.data_type, }; @@ -64,8 +64,7 @@ tl::expected tl::expected get_output_shape(LinearAttrs const &attrs, TensorShape const &input_shape) { TensorShape output_shape = input_shape; - output_shape.dims.ff_ordered.at(relative_ff_dim_t{-1}) = - size_t_from_int(attrs.out_channels); + output_shape.dims.ff_ordered.at(relative_ff_dim_t{-1}) = attrs.out_channels; return output_shape; } @@ -82,12 +81,12 @@ tl::expected result_unpar.value(); }); - SumDegree sum_degree = SumDegree{1}; + SumDegree sum_degree = SumDegree{1_n}; DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{ get_sum_degree(input) * product(slice(ff_ordered_shard_degrees(input), std::nullopt, relative_ff_dim_t{-1}))}; - FFOrdered shard_degrees = FFOrdered{ + FFOrdered shard_degrees = FFOrdered{ shard_dim_at_idx(input, relative_ff_dim_t{-1}).degree, get_discard_copy_degree(input), }; @@ -112,7 +111,8 @@ tl::expected shard_dim_at_idx(input, relative_ff_dim_t{-1}).degree}; DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{product(slice( ff_ordered_shard_degrees(input), std::nullopt, relative_ff_dim_t{-1}))}; - FFOrdered shard_degrees = FFOrdered{get_discard_copy_degree(input)}; + FFOrdered shard_degrees = + FFOrdered{get_discard_copy_degree(input)}; return lift_to_parallel_with_degrees( unpar, sum_degree, discard_copy_degree, shard_degrees); @@ -133,8 +133,8 @@ tl::expected SumDegree sum_degree = SumDegree{get_sum_degree(input) * shard_dim_at_idx(input, relative_ff_dim_t{-1}).degree}; - DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{1}; - FFOrdered shard_degrees = ff_ordered_shard_degrees(input); + DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{1_n}; + FFOrdered shard_degrees = ff_ordered_shard_degrees(input); shard_degrees.at(relative_ff_dim_t{-1}) = get_discard_copy_degree(input); return lift_to_parallel_with_degrees( diff --git a/lib/op-attrs/src/op-attrs/ops/pool_2d.cc b/lib/op-attrs/src/op-attrs/ops/pool_2d.cc index 86d287ebc8..f9630e16b1 100644 --- a/lib/op-attrs/src/op-attrs/ops/pool_2d.cc +++ b/lib/op-attrs/src/op-attrs/ops/pool_2d.cc @@ -8,8 +8,8 @@ namespace FlexFlow { tl::expected make_adaptive_pool2d_attrs(TensorDims const &input_dims, - int output_h, - int output_w, + nonnegative_int output_h, + nonnegative_int output_w, PoolOp pool_type, std::optional const &activation) { // AdaptivePool2D semantics pulled from @@ -22,10 +22,10 @@ tl::expected input_dims)); } - size_t num_samples = dim_at_idx(input_dims, relative_ff_dim_t{0}); - size_t num_channels = dim_at_idx(input_dims, relative_ff_dim_t{1}); - size_t input_h = dim_at_idx(input_dims, relative_ff_dim_t{2}); - size_t input_w = dim_at_idx(input_dims, relative_ff_dim_t{3}); + nonnegative_int num_samples = dim_at_idx(input_dims, relative_ff_dim_t{0}); + nonnegative_int num_channels = dim_at_idx(input_dims, relative_ff_dim_t{1}); + nonnegative_int input_h = dim_at_idx(input_dims, relative_ff_dim_t{2}); + nonnegative_int input_w = dim_at_idx(input_dims, relative_ff_dim_t{3}); if (input_h % output_h != 0) { return tl::unexpected(fmt::format( @@ -55,29 +55,29 @@ tl::expected // = `ind / outd` // = `stride` - int kernel_h = input_h / output_h; - int kernel_w = input_w / output_w; + nonnegative_int kernel_h = input_h / output_h; + nonnegative_int kernel_w = input_w / output_w; - int stride_h = kernel_h; - int stride_w = kernel_w; + nonnegative_int stride_h = kernel_h; + nonnegative_int stride_w = kernel_w; Pool2DAttrs attrs = Pool2DAttrs{ /*kernel_h=*/kernel_h, /*kernel_w=*/kernel_w, /*stride_h=*/stride_h, /*stride_w=*/stride_w, - /*padding_h=*/0, - /*padding_w=*/0, + /*padding_h=*/0_n, + /*padding_w=*/0_n, /*pool_type=*/pool_type, /*activation=*/activation, }; TensorShape expected_ouput_shape = TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ num_samples, num_channels, - size_t_from_int(output_h), - size_t_from_int(output_w), + output_h, + output_w, }}, DataType::FLOAT, }; @@ -104,6 +104,19 @@ tl::expected return attrs; } +static nonnegative_int calculate_output_size(nonnegative_int input_size, + nonnegative_int padding_size, + nonnegative_int kernel_size, + nonnegative_int stride) { + int input_size_raw = input_size.unwrap_nonnegative(); + int padding_raw = padding_size.unwrap_nonnegative(); + int kernel_size_raw = kernel_size.unwrap_nonnegative(); + int stride_raw = stride.unwrap_nonnegative(); + + return nonnegative_int{ + (input_size_raw + (2 * padding_raw) - kernel_size_raw) / stride_raw + 1}; +} + tl::expected get_output_shape(Pool2DAttrs const &attrs, TensorShape const &input_shape) { if (num_dims(input_shape) != 4) { @@ -113,19 +126,23 @@ tl::expected input_shape)); } - size_t num_samples = dim_at_idx(input_shape, relative_ff_dim_t{0}); - size_t num_channels = dim_at_idx(input_shape, relative_ff_dim_t{1}); - size_t input_height = dim_at_idx(input_shape, relative_ff_dim_t{2}); - size_t input_width = dim_at_idx(input_shape, relative_ff_dim_t{3}); - - size_t output_height = - (input_height + 2 * attrs.padding_h - attrs.kernel_h) / attrs.stride_h + - 1; - - size_t output_width = - (input_width + 2 * attrs.padding_w - attrs.kernel_w) / attrs.stride_w + 1; - - return TensorShape{TensorDims{FFOrdered{ + nonnegative_int num_samples = dim_at_idx(input_shape, relative_ff_dim_t{0}); + nonnegative_int num_channels = dim_at_idx(input_shape, relative_ff_dim_t{1}); + nonnegative_int input_height = dim_at_idx(input_shape, relative_ff_dim_t{2}); + nonnegative_int input_width = dim_at_idx(input_shape, relative_ff_dim_t{3}); + + nonnegative_int output_height = + calculate_output_size(/*input_size=*/input_height, + /*padding_size=*/attrs.padding_h, + /*kernel_size=*/attrs.kernel_h, + /*stride_size=*/attrs.stride_h); + nonnegative_int output_width = + calculate_output_size(/*input_size=*/input_width, + /*padding_size=*/attrs.padding_w, + /*kernel_size=*/attrs.kernel_w, + /*stride_size=*/attrs.stride_w); + + return TensorShape{TensorDims{FFOrdered{ num_samples, num_channels, output_height, diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_dims.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_dims.cc index 0bb940924a..7a8f91e498 100644 --- a/lib/op-attrs/src/op-attrs/parallel_tensor_dims.cc +++ b/lib/op-attrs/src/op-attrs/parallel_tensor_dims.cc @@ -7,9 +7,11 @@ #include "op-attrs/tensor_dims.h" #include "utils/containers/all_of.h" #include "utils/containers/product.h" +#include "utils/containers/repeat_element.h" #include "utils/containers/transform.h" #include "utils/containers/vector_of.h" #include "utils/integer_conversions.h" +#include "utils/nonnegative_int/num_elements.h" namespace FlexFlow { @@ -17,7 +19,8 @@ FFOrdered ff_ordered_shard_dims(ParallelTensorDims const &d) { return d.shard_dims; } -FFOrdered ff_ordered_shard_degrees(ParallelTensorDims const &d) { +FFOrdered + ff_ordered_shard_degrees(ParallelTensorDims const &d) { return transform(d.shard_dims, [](ShardParallelDim const &d) { return d.degree; }); } @@ -27,8 +30,8 @@ std::unordered_set return get_replica_dims(d.replica_dims); } -size_t num_shard_dims(ParallelTensorDims const &dims) { - return dims.shard_dims.size(); +nonnegative_int num_shard_dims(ParallelTensorDims const &dims) { + return num_elements(dims.shard_dims); } ParallelTensorDimDegrees get_parallel_degrees(ParallelTensorDims const &d) { @@ -40,22 +43,22 @@ ParallelTensorDimDegrees get_parallel_degrees(ParallelTensorDims const &d) { } ParallelTensorDims lift_to_parallel(TensorDims const &dims) { - std::vector shard_degrees(num_dims(dims), - 1); // 1 repeated num_dims(dims) times + std::vector shard_degrees = + repeat_element(/*num_times=*/num_dims(dims), /*element=*/1_n); return lift_to_parallel_with_degrees( - dims, SumDegree{1}, DiscardCopyDegree{1}, shard_degrees); + dims, SumDegree{1_n}, DiscardCopyDegree{1_n}, shard_degrees); } -ParallelTensorDims - lift_to_parallel_with_degrees(TensorDims const &unpar, - SumDegree const &sum_degree, - DiscardCopyDegree const &discard_copy_degree, - FFOrdered const &shard_degrees) { +ParallelTensorDims lift_to_parallel_with_degrees( + TensorDims const &unpar, + SumDegree const &sum_degree, + DiscardCopyDegree const &discard_copy_degree, + FFOrdered const &shard_degrees) { std::vector lifted = transform(zip(vector_of(unpar.ff_ordered), vector_of(shard_degrees)), - [](std::pair const &p) { - size_t size = p.first; - int degree = p.second; + [](std::pair const &p) { + nonnegative_int size = p.first; + nonnegative_int degree = p.second; return ShardParallelDim{size, degree}; }); @@ -75,17 +78,17 @@ ParallelTensorDims degrees.shard_degrees); } -int total_replica_degree(ParallelTensorDims const &dims) { +nonnegative_int total_replica_degree(ParallelTensorDims const &dims) { return dims.replica_dims.discard_copy_degree.value * dims.replica_dims.sum_degree.value; } -int total_shard_degree(ParallelTensorDims const &dims) { +nonnegative_int total_shard_degree(ParallelTensorDims const &dims) { return product(transform(vector_of(dims.shard_dims), [](ShardParallelDim const &d) { return d.degree; })); } -int total_parallel_degree(ParallelTensorDims const &dims) { +nonnegative_int total_parallel_degree(ParallelTensorDims const &dims) { return total_replica_degree(dims) * total_shard_degree(dims); } @@ -115,7 +118,7 @@ TensorDims get_tensor_dims_unsafe(ParallelTensorDims const &) { } TensorDims get_reduced_dims(ParallelTensorDims const &dims) { - FFOrdered dim_sizes = transform( + FFOrdered dim_sizes = transform( dims.shard_dims, [](ShardParallelDim const &d) { return d.size; }); return TensorDims{dim_sizes}; } diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc index bbad13b46b..260ec7c3cd 100644 --- a/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc +++ b/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc @@ -6,11 +6,12 @@ #include "utils/containers/range.h" #include "utils/containers/transform.h" #include "utils/hash-utils.h" +#include "utils/nonnegative_int/nonnegative_range.h" #include "utils/overload.h" namespace FlexFlow { -int num_shard_dims(ParallelTensorShape const &s) { +nonnegative_int num_shard_dims(ParallelTensorShape const &s) { return num_shard_dims(s.dims); } @@ -19,21 +20,21 @@ std::unordered_set return replica_dims(s.dims); } -int get_num_replicas(ParallelTensorShape const &shape) { - return product( - transform(replica_dims(shape), - [](ReplicaParallelDim const &d) -> int { return d.degree; })); +nonnegative_int get_num_replicas(ParallelTensorShape const &shape) { + return product(transform( + replica_dims(shape), + [](ReplicaParallelDim const &d) -> nonnegative_int { return d.degree; })); } -int get_sum_degree(ParallelTensorShape const &shape) { +nonnegative_int get_sum_degree(ParallelTensorShape const &shape) { return shape.dims.replica_dims.sum_degree.value; } -int get_discard_copy_degree(ParallelTensorShape const &shape) { +nonnegative_int get_discard_copy_degree(ParallelTensorShape const &shape) { return shape.dims.replica_dims.discard_copy_degree.value; } -int get_total_parallel_degree(ParallelTensorShape const &s) { +nonnegative_int get_total_parallel_degree(ParallelTensorShape const &s) { return total_parallel_degree(s.dims); } @@ -51,7 +52,8 @@ ShardParallelDim &shard_dim_at_idx(ParallelTensorShape &s, return shard_dim_at_idx(s.dims, d); } -FFOrdered ff_ordered_shard_degrees(ParallelTensorShape const &s) { +FFOrdered + ff_ordered_shard_degrees(ParallelTensorShape const &s) { return ff_ordered_shard_degrees(s.dims); } @@ -73,11 +75,11 @@ ParallelTensorShape lift_to_parallel(TensorShape const &s) { return ParallelTensorShape{lift_to_parallel(s.dims), s.data_type}; } -ParallelTensorShape - lift_to_parallel_with_degrees(TensorShape const &unpar, - SumDegree const &sum_degree, - DiscardCopyDegree const &discard_copy_degree, - FFOrdered const &shard_degrees) { +ParallelTensorShape lift_to_parallel_with_degrees( + TensorShape const &unpar, + SumDegree const &sum_degree, + DiscardCopyDegree const &discard_copy_degree, + FFOrdered const &shard_degrees) { return ParallelTensorShape{ lift_to_parallel_with_degrees( unpar.dims, sum_degree, discard_copy_degree, shard_degrees), @@ -95,8 +97,8 @@ ParallelTensorShape } TensorShape require_not_parallel(ParallelTensorShape const &s) { - int total_degree = get_total_parallel_degree(s); - if (total_degree != 1) { + nonnegative_int total_degree = get_total_parallel_degree(s); + if (total_degree != 1_n) { throw mk_runtime_error( fmt::format("Error: require_not_parallel received a parallel tensor " "shape with parallel degree {}: {}", @@ -124,25 +126,27 @@ TensorShape get_reduced_shape(ParallelTensorShape const &s) { ParallelDim get_parallel_dim_at_idx(ParallelTensorShape const &shape, parallel_tensor_dim_idx_t idx) { - return idx.visit( - overload{[&](ff_dim_t shard_dim) { - return ParallelDim{shape.dims.shard_dims.at(shard_dim)}; - }, - [&](ReplicaType replica_type) { - ReplicaParallelDimSet replicas = shape.dims.replica_dims; - int degree = (ReplicaType::SUM == replica_type - ? replicas.sum_degree.value - : replicas.discard_copy_degree.value); - return ParallelDim{ReplicaParallelDim{degree, replica_type}}; - }}); + return idx.visit(overload{ + [&](ff_dim_t shard_dim) { + return ParallelDim{shape.dims.shard_dims.at(shard_dim)}; + }, + [&](ReplicaType replica_type) { + ReplicaParallelDimSet replicas = shape.dims.replica_dims; + nonnegative_int degree = (ReplicaType::SUM == replica_type + ? replicas.sum_degree.value + : replicas.discard_copy_degree.value); + return ParallelDim{ReplicaParallelDim{degree, replica_type}}; + }}); } std::unordered_set get_parallel_tensor_dim_indices(ParallelTensorShape const &shape) { std::unordered_set indices; - extend(indices, transform(range(num_shard_dims(shape.dims)), [](int idx) { - return parallel_tensor_dim_idx_t{ff_dim_t{nonnegative_int{idx}}}; - })); + extend(indices, + transform(nonnegative_range(num_shard_dims(shape.dims)), + [](nonnegative_int idx) { + return parallel_tensor_dim_idx_t{ff_dim_t{idx}}; + })); indices.insert(parallel_tensor_dim_idx_t{ReplicaType::SUM}); indices.insert(parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}); return indices; diff --git a/lib/op-attrs/src/op-attrs/relative_ff_dim_t.cc b/lib/op-attrs/src/op-attrs/relative_ff_dim_t.cc index 0671bb05f2..a987841b18 100644 --- a/lib/op-attrs/src/op-attrs/relative_ff_dim_t.cc +++ b/lib/op-attrs/src/op-attrs/relative_ff_dim_t.cc @@ -3,10 +3,10 @@ namespace FlexFlow { ff_dim_t ff_dim_t_from_relative_ff_dim_t(relative_ff_dim_t ff_dim, - int input_dim) { + nonnegative_int input_dim) { int raw = ff_dim.value; if (raw < 0) { - raw = input_dim + raw; + raw = input_dim.unwrap_nonnegative() + raw; } return ff_dim_t{nonnegative_int{raw}}; } diff --git a/lib/op-attrs/src/op-attrs/replica_parallel_dim_set.cc b/lib/op-attrs/src/op-attrs/replica_parallel_dim_set.cc index 20c88c77dc..fc712be10b 100644 --- a/lib/op-attrs/src/op-attrs/replica_parallel_dim_set.cc +++ b/lib/op-attrs/src/op-attrs/replica_parallel_dim_set.cc @@ -4,11 +4,11 @@ namespace FlexFlow { ReplicaParallelDimSet empty_replica_parallel_dim_set() { - return ReplicaParallelDimSet{SumDegree{1}, DiscardCopyDegree{1}}; + return ReplicaParallelDimSet{SumDegree{1_n}, DiscardCopyDegree{1_n}}; } -int get_order_of_replica_type(ReplicaParallelDimSet const &s, - ReplicaType replica_type) { +nonnegative_int get_degree_of_replica_type(ReplicaParallelDimSet const &s, + ReplicaType replica_type) { switch (replica_type) { case ReplicaType::SUM: return s.sum_degree.value; diff --git a/lib/op-attrs/src/op-attrs/tensor_dims.cc b/lib/op-attrs/src/op-attrs/tensor_dims.cc index f0ac88d8e4..f9198bbe28 100644 --- a/lib/op-attrs/src/op-attrs/tensor_dims.cc +++ b/lib/op-attrs/src/op-attrs/tensor_dims.cc @@ -8,22 +8,23 @@ #include "utils/containers/vector_of.h" #include "utils/containers/zip.h" #include "utils/integer_conversions.h" +#include "utils/nonnegative_int/num_elements.h" namespace FlexFlow { -FFOrdered const &ff_ordered(TensorDims const &dims) { +FFOrdered const &ff_ordered(TensorDims const &dims) { return dims.ff_ordered; } -size_t num_dims(TensorDims const &dims) { - return dims.ff_ordered.size(); +nonnegative_int num_dims(TensorDims const &dims) { + return num_elements(dims.ff_ordered); } -size_t dim_at_idx(TensorDims const &dims, relative_ff_dim_t idx) { +nonnegative_int dim_at_idx(TensorDims const &dims, relative_ff_dim_t idx) { return dims.ff_ordered.at(idx); } -size_t &dim_at_idx(TensorDims &dims, relative_ff_dim_t idx) { +nonnegative_int &dim_at_idx(TensorDims &dims, relative_ff_dim_t idx) { return dims.ff_ordered.at(idx); } @@ -33,8 +34,8 @@ bool tensor_dims_is_broadcastable_to(TensorDims const &curr, return false; } - std::vector curr_dims = vector_of(curr.ff_ordered); - std::vector goal_dims = vector_of(goal.ff_ordered); + std::vector curr_dims = vector_of(curr.ff_ordered); + std::vector goal_dims = vector_of(goal.ff_ordered); for (auto const &[curr_dim, goal_dim] : zip(reversed(curr_dims), reversed(goal_dims))) { diff --git a/lib/op-attrs/src/op-attrs/tensor_shape.cc b/lib/op-attrs/src/op-attrs/tensor_shape.cc index 70ed58aac6..690a07d26a 100644 --- a/lib/op-attrs/src/op-attrs/tensor_shape.cc +++ b/lib/op-attrs/src/op-attrs/tensor_shape.cc @@ -4,26 +4,27 @@ #include "utils/containers/get_only.h" #include "utils/containers/product.h" #include "utils/containers/transform.h" +#include "utils/nonnegative_int/num_elements.h" namespace FlexFlow { -size_t num_dims(TensorShape const &s) { - return s.dims.ff_ordered.size(); +nonnegative_int num_dims(TensorShape const &s) { + return num_elements(s.dims.ff_ordered); } -size_t dim_at_idx(TensorShape const &s, relative_ff_dim_t idx) { +nonnegative_int dim_at_idx(TensorShape const &s, relative_ff_dim_t idx) { return dim_at_idx(s.dims, idx); } -size_t &dim_at_idx(TensorShape &s, relative_ff_dim_t idx) { +nonnegative_int &dim_at_idx(TensorShape &s, relative_ff_dim_t idx) { return dim_at_idx(s.dims, idx); } -size_t get_num_elements(TensorShape const &s) { +nonnegative_int get_num_elements(TensorShape const &s) { return product(s.dims.ff_ordered); } -size_t get_size_in_bytes(TensorShape const &s) { +nonnegative_int get_size_in_bytes(TensorShape const &s) { return get_num_elements(s) * size_of_datatype(s.data_type); } diff --git a/lib/op-attrs/test/src/op-attrs/ops/attention.cc b/lib/op-attrs/test/src/op-attrs/ops/attention.cc index eca8559b21..b317c5c69c 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/attention.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/attention.cc @@ -10,10 +10,10 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_attention_incoming_tensor_roles(MultiHeadAttentionAttrs)") { auto make_attrs = [](bool bias) { return MultiHeadAttentionAttrs{ - /*embed_dim=*/32, - /*num_heads=*/10, - /*kdim=*/32, - /*vdim=*/32, + /*embed_dim=*/32_n, + /*num_heads=*/10_n, + /*kdim=*/32_n, + /*vdim=*/32_n, /*dropout=*/0.0, /*bias=*/bias, /*add_bias_kv=*/false, @@ -58,8 +58,8 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_output_shape(MultiHeadAttentionAttrs, TensorShape, " "TensorShape, TensorShape)") { - int embed_dim = 32; - int num_heads = 10; + nonnegative_int embed_dim = 32_n; + nonnegative_int num_heads = 10_n; /* Parameter meanings match those at * https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html @@ -75,13 +75,13 @@ TEST_SUITE(FF_TEST_SUITE) { /*add_zero_attn=*/false, }; - size_t batch_size = 40; - size_t seq_len = 48; - size_t feature_size = 36; + nonnegative_int batch_size = 40_n; + nonnegative_int seq_len = 48_n; + nonnegative_int feature_size = 36_n; TensorShape input_q = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ batch_size, seq_len, feature_size, @@ -92,7 +92,7 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape input_k = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ batch_size, seq_len, feature_size, @@ -103,7 +103,7 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape input_v = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ batch_size, seq_len, feature_size, @@ -114,10 +114,10 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape output = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ batch_size, seq_len, - size_t_from_int(attrs.embed_dim), + attrs.embed_dim, }, }, DataType::FLOAT, @@ -125,9 +125,9 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape weights = TensorShape{ TensorDims{ - FFOrdered{ - (feature_size * embed_dim) * 3 + (embed_dim * embed_dim), - size_t_from_int(num_heads), + FFOrdered{ + (feature_size * embed_dim) * 3_n + (embed_dim * embed_dim), + num_heads, }, }, DataType::FLOAT, @@ -135,8 +135,8 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape input_bias = TensorShape{ TensorDims{ - FFOrdered{ - size_t_from_int(embed_dim * 3), + FFOrdered{ + embed_dim * 3_n, }, }, DataType::FLOAT, @@ -144,8 +144,8 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape output_bias = TensorShape{ TensorDims{ - FFOrdered{ - size_t_from_int(embed_dim), + FFOrdered{ + embed_dim, }, }, DataType::FLOAT, @@ -184,72 +184,94 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("parallel shape inference") { auto make_q = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_batch, - int o_seq_len, - int o_q) { + nonnegative_int o_batch, + nonnegative_int o_seq_len, + nonnegative_int o_q) { return lift_to_parallel_with_degrees( - input_q, o_sum, o_eq, FFOrdered{o_batch, o_seq_len, o_q}); + input_q, + o_sum, + o_eq, + FFOrdered{o_batch, o_seq_len, o_q}); }; auto make_k = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_batch, - int o_seq_len, - int o_k) { + nonnegative_int o_batch, + nonnegative_int o_seq_len, + nonnegative_int o_k) { return lift_to_parallel_with_degrees( - input_k, o_sum, o_eq, FFOrdered{o_batch, o_seq_len, o_k}); + input_k, + o_sum, + o_eq, + FFOrdered{o_batch, o_seq_len, o_k}); }; auto make_v = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_batch, - int o_seq_len, - int o_v) { + nonnegative_int o_batch, + nonnegative_int o_seq_len, + nonnegative_int o_v) { return lift_to_parallel_with_degrees( - input_v, o_sum, o_eq, FFOrdered{o_batch, o_seq_len, o_v}); + input_v, + o_sum, + o_eq, + FFOrdered{o_batch, o_seq_len, o_v}); }; auto make_o = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_batch, - int o_seq_len, - int o_o) { + nonnegative_int o_batch, + nonnegative_int o_seq_len, + nonnegative_int o_o) { return lift_to_parallel_with_degrees( - output, o_sum, o_eq, FFOrdered{o_batch, o_seq_len, o_o}); + output, + o_sum, + o_eq, + FFOrdered{o_batch, o_seq_len, o_o}); }; - auto make_w = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o_e, int o_h) { - return lift_to_parallel_with_degrees( - weights, o_sum, o_eq, FFOrdered{o_e, o_h}); - }; + auto make_w = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o_e, + nonnegative_int o_h) { + return lift_to_parallel_with_degrees( + weights, o_sum, o_eq, FFOrdered{o_e, o_h}); + }; - auto make_input_bias = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o_in_proj_channel) { - return lift_to_parallel_with_degrees( - input_bias, o_sum, o_eq, FFOrdered{o_in_proj_channel}); - }; + auto make_input_bias = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o_in_proj_channel) { + return lift_to_parallel_with_degrees( + input_bias, + o_sum, + o_eq, + FFOrdered{o_in_proj_channel}); + }; - auto make_output_bias = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o_out_proj_channel) { - return lift_to_parallel_with_degrees( - output_bias, o_sum, o_eq, FFOrdered{o_out_proj_channel}); - }; + auto make_output_bias = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o_out_proj_channel) { + return lift_to_parallel_with_degrees( + output_bias, + o_sum, + o_eq, + FFOrdered{o_out_proj_channel}); + }; SUBCASE("data parallelism") { - int o_b = 4; + nonnegative_int o_b = 4_n; ParallelTensorShape q = - make_q(SumDegree{1}, DiscardCopyDegree{1}, o_b, 1, 1); + make_q(SumDegree{1_n}, DiscardCopyDegree{1_n}, o_b, 1_n, 1_n); ParallelTensorShape k = - make_k(SumDegree{1}, DiscardCopyDegree{1}, o_b, 1, 1); + make_k(SumDegree{1_n}, DiscardCopyDegree{1_n}, o_b, 1_n, 1_n); ParallelTensorShape v = - make_v(SumDegree{1}, DiscardCopyDegree{1}, o_b, 1, 1); + make_v(SumDegree{1_n}, DiscardCopyDegree{1_n}, o_b, 1_n, 1_n); SUBCASE("get_output_shape") { tl::expected result = get_output_shape(attrs, q, k, v); tl::expected correct = - make_o(SumDegree{1}, DiscardCopyDegree{1}, o_b, 1, 1); + make_o(SumDegree{1_n}, DiscardCopyDegree{1_n}, o_b, 1_n, 1_n); CHECK(result == correct); } @@ -257,7 +279,7 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_weights_shape(attrs, q, k, v); tl::expected correct = - make_w(SumDegree{1}, DiscardCopyDegree{o_b}, 1, 1); + make_w(SumDegree{1_n}, DiscardCopyDegree{o_b}, 1_n, 1_n); CHECK(result == correct); } @@ -265,7 +287,7 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_input_bias_shape(attrs, q, k, v); tl::expected correct = - make_input_bias(SumDegree{1}, DiscardCopyDegree{o_b}, 1); + make_input_bias(SumDegree{1_n}, DiscardCopyDegree{o_b}, 1_n); CHECK(result == correct); } @@ -273,25 +295,25 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_output_bias_shape(attrs, q, k, v); tl::expected correct = - make_output_bias(SumDegree{1}, DiscardCopyDegree{o_b}, 1); + make_output_bias(SumDegree{1_n}, DiscardCopyDegree{o_b}, 1_n); CHECK(result == correct); } } SUBCASE("attention head parallelism") { - int o_h = 2; + nonnegative_int o_h = 2_n; ParallelTensorShape q = - make_q(SumDegree{1}, DiscardCopyDegree{o_h}, 1, 1, 1); + make_q(SumDegree{1_n}, DiscardCopyDegree{o_h}, 1_n, 1_n, 1_n); ParallelTensorShape k = - make_k(SumDegree{1}, DiscardCopyDegree{o_h}, 1, 1, 1); + make_k(SumDegree{1_n}, DiscardCopyDegree{o_h}, 1_n, 1_n, 1_n); ParallelTensorShape v = - make_v(SumDegree{1}, DiscardCopyDegree{o_h}, 1, 1, 1); + make_v(SumDegree{1_n}, DiscardCopyDegree{o_h}, 1_n, 1_n, 1_n); SUBCASE("get_output_shape") { tl::expected result = get_output_shape(attrs, q, k, v); tl::expected correct = - make_o(SumDegree{o_h}, DiscardCopyDegree{1}, 1, 1, 1); + make_o(SumDegree{o_h}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n); CHECK(result == correct); } @@ -299,7 +321,7 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_weights_shape(attrs, q, k, v); tl::expected correct = - make_w(SumDegree{1}, DiscardCopyDegree{1}, 1, o_h); + make_w(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, o_h); CHECK(result == correct); } @@ -307,7 +329,7 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_input_bias_shape(attrs, q, k, v); tl::expected correct = - make_input_bias(SumDegree{1}, DiscardCopyDegree{o_h}, 1); + make_input_bias(SumDegree{1_n}, DiscardCopyDegree{o_h}, 1_n); CHECK(result == correct); } @@ -315,26 +337,26 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_output_bias_shape(attrs, q, k, v); tl::expected correct = - make_output_bias(SumDegree{1}, DiscardCopyDegree{o_h}, 1); + make_output_bias(SumDegree{1_n}, DiscardCopyDegree{o_h}, 1_n); CHECK(result == correct); } } SUBCASE("combined data & attention head parallelism") { - int o_b = 4; - int o_h = 2; + nonnegative_int o_b = 4_n; + nonnegative_int o_h = 2_n; ParallelTensorShape q = - make_q(SumDegree{1}, DiscardCopyDegree{o_h}, o_b, 1, 1); + make_q(SumDegree{1_n}, DiscardCopyDegree{o_h}, o_b, 1_n, 1_n); ParallelTensorShape k = - make_k(SumDegree{1}, DiscardCopyDegree{o_h}, o_b, 1, 1); + make_k(SumDegree{1_n}, DiscardCopyDegree{o_h}, o_b, 1_n, 1_n); ParallelTensorShape v = - make_v(SumDegree{1}, DiscardCopyDegree{o_h}, o_b, 1, 1); + make_v(SumDegree{1_n}, DiscardCopyDegree{o_h}, o_b, 1_n, 1_n); SUBCASE("get_output_shape") { tl::expected result = get_output_shape(attrs, q, k, v); tl::expected correct = - make_o(SumDegree{o_h}, DiscardCopyDegree{1}, o_b, 1, 1); + make_o(SumDegree{o_h}, DiscardCopyDegree{1_n}, o_b, 1_n, 1_n); CHECK(result == correct); } @@ -342,7 +364,7 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_weights_shape(attrs, q, k, v); tl::expected correct = - make_w(SumDegree{1}, DiscardCopyDegree{o_b}, 1, o_h); + make_w(SumDegree{1_n}, DiscardCopyDegree{o_b}, 1_n, o_h); CHECK(result == correct); } @@ -350,7 +372,8 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_input_bias_shape(attrs, q, k, v); tl::expected correct = - make_input_bias(SumDegree{1}, DiscardCopyDegree{o_b * o_h}, 1); + make_input_bias( + SumDegree{1_n}, DiscardCopyDegree{o_b * o_h}, 1_n); CHECK(result == correct); } @@ -358,7 +381,8 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_output_bias_shape(attrs, q, k, v); tl::expected correct = - make_output_bias(SumDegree{1}, DiscardCopyDegree{o_b * o_h}, 1); + make_output_bias( + SumDegree{1_n}, DiscardCopyDegree{o_b * o_h}, 1_n); CHECK(result == correct); } } diff --git a/lib/op-attrs/test/src/op-attrs/ops/batch_matmul.cc b/lib/op-attrs/test/src/op-attrs/ops/batch_matmul.cc index 56a2e3fa52..27c59ee497 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/batch_matmul.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/batch_matmul.cc @@ -6,20 +6,20 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_output_shape(BatchMatmulAttrs, TensorShape)") { - size_t b = 4; - size_t m = 6; - size_t n = 8; - size_t p = 10; + nonnegative_int b = 4_n; + nonnegative_int m = 6_n; + nonnegative_int n = 8_n; + nonnegative_int p = 10_n; BatchMatmulAttrs attrs = BatchMatmulAttrs{ - /*a_seq_length_dim=*/0, // TODO figure out if these arguments are still - // relevant - /*b_seq_length_dim=*/0, + /*a_seq_length_dim=*/0_n, // TODO figure out if these arguments are + // still relevant + /*b_seq_length_dim=*/0_n, }; TensorShape input_lhs_shape = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ b, n, m, @@ -31,7 +31,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("valid") { TensorShape input_rhs_shape = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ b, m, p, @@ -45,7 +45,7 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected correct_output_shape = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ b, n, p, @@ -60,8 +60,8 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("mismatched b") { TensorShape input_rhs_shape = TensorShape{ TensorDims{ - FFOrdered{ - b + 1, + FFOrdered{ + b + 1_n, m, p, }, @@ -78,9 +78,9 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("mismatched m") { TensorShape input_rhs_shape = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ b, - m + 1, + m + 1_n, p, }, }, @@ -95,27 +95,27 @@ TEST_SUITE(FF_TEST_SUITE) { } TEST_CASE("get_output_shape(BatchMatmulAttrs, ParallelTensorShape)") { - size_t b = 2 * 2; - int o_b = 2; - size_t m = 3 * 3; - int o_m = 3; - size_t n = 5 * 5; - int o_n = 5; - size_t p = 7 * 7; - int o_p = 7; - int o_sum = 11; + nonnegative_int b = 2_n * 2_n; + nonnegative_int o_b = 2_n; + nonnegative_int m = 3_n * 3_n; + nonnegative_int o_m = 3_n; + nonnegative_int n = 5_n * 5_n; + nonnegative_int o_n = 5_n; + nonnegative_int p = 7_n * 7_n; + nonnegative_int o_p = 7_n; + nonnegative_int o_sum = 11_n; BatchMatmulAttrs attrs = BatchMatmulAttrs{ - /*a_seq_length_dim=*/0, // TODO figure out if these arguments are still - // relevant - /*b_seq_length_dim=*/0, + /*a_seq_length_dim=*/0_n, // TODO figure out if these arguments are + // still relevant + /*b_seq_length_dim=*/0_n, }; auto make_lhs = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_b, - int o_n, - int o_m) { + nonnegative_int o_b, + nonnegative_int o_n, + nonnegative_int o_m) { return ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ @@ -134,9 +134,9 @@ TEST_SUITE(FF_TEST_SUITE) { auto make_rhs = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_b, - int o_m, - int o_p) { + nonnegative_int o_b, + nonnegative_int o_m, + nonnegative_int o_p) { return ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ @@ -155,9 +155,9 @@ TEST_SUITE(FF_TEST_SUITE) { auto make_output = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_b, - int o_n, - int o_p) { + nonnegative_int o_b, + nonnegative_int o_n, + nonnegative_int o_p) { return ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ @@ -177,10 +177,10 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("data parallel") { tl::expected result = get_output_shape( attrs, - make_lhs(SumDegree{1}, DiscardCopyDegree{1}, o_b, 1, 1), - make_rhs(SumDegree{1}, DiscardCopyDegree{1}, o_b, 1, 1)); + make_lhs(SumDegree{1_n}, DiscardCopyDegree{1_n}, o_b, 1_n, 1_n), + make_rhs(SumDegree{1_n}, DiscardCopyDegree{1_n}, o_b, 1_n, 1_n)); tl::expected correct = - make_output(SumDegree{1}, DiscardCopyDegree{1}, o_b, 1, 1); + make_output(SumDegree{1_n}, DiscardCopyDegree{1_n}, o_b, 1_n, 1_n); CHECK(result == correct); } @@ -188,10 +188,10 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("n parallel") { tl::expected result = get_output_shape( attrs, - make_lhs(SumDegree{1}, DiscardCopyDegree{1}, 1, o_n, 1), - make_rhs(SumDegree{1}, DiscardCopyDegree{o_n}, 1, 1, 1)); + make_lhs(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, o_n, 1_n), + make_rhs(SumDegree{1_n}, DiscardCopyDegree{o_n}, 1_n, 1_n, 1_n)); tl::expected correct = - make_output(SumDegree{1}, DiscardCopyDegree{1}, 1, o_n, 1); + make_output(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, o_n, 1_n); CHECK(result == correct); } @@ -199,10 +199,10 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("p parallel") { tl::expected result = get_output_shape( attrs, - make_lhs(SumDegree{1}, DiscardCopyDegree{o_p}, 1, 1, 1), - make_rhs(SumDegree{1}, DiscardCopyDegree{1}, 1, 1, o_p)); + make_lhs(SumDegree{1_n}, DiscardCopyDegree{o_p}, 1_n, 1_n, 1_n), + make_rhs(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, 1_n, o_p)); tl::expected correct = - make_output(SumDegree{1}, DiscardCopyDegree{1}, 1, 1, o_p); + make_output(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, 1_n, o_p); CHECK(result == correct); } @@ -210,10 +210,10 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("reduction parallel") { tl::expected result = get_output_shape( attrs, - make_lhs(SumDegree{1}, DiscardCopyDegree{1}, 1, 1, o_m), - make_rhs(SumDegree{1}, DiscardCopyDegree{1}, 1, o_m, 1)); + make_lhs(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, 1_n, o_m), + make_rhs(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, o_m, 1_n)); tl::expected correct = - make_output(SumDegree{o_m}, DiscardCopyDegree{1}, 1, 1, 1); + make_output(SumDegree{o_m}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n); CHECK(result == correct); } @@ -221,10 +221,10 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("propagate reduction lhs") { tl::expected result = get_output_shape( attrs, - make_lhs(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, 1, 1), - make_rhs(SumDegree{1}, DiscardCopyDegree{o_sum}, 1, 1, 1)); + make_lhs(SumDegree{o_sum}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n), + make_rhs(SumDegree{1_n}, DiscardCopyDegree{o_sum}, 1_n, 1_n, 1_n)); tl::expected correct = - make_output(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, 1, 1); + make_output(SumDegree{o_sum}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n); CHECK(result == correct); } @@ -232,10 +232,10 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("propagate reduction rhs") { tl::expected result = get_output_shape( attrs, - make_lhs(SumDegree{1}, DiscardCopyDegree{o_sum}, 1, 1, 1), - make_rhs(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, 1, 1)); + make_lhs(SumDegree{1_n}, DiscardCopyDegree{o_sum}, 1_n, 1_n, 1_n), + make_rhs(SumDegree{o_sum}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n)); tl::expected correct = - make_output(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, 1, 1); + make_output(SumDegree{o_sum}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n); CHECK(result == correct); } @@ -243,10 +243,10 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("reduction lhs & reduction rhs") { tl::expected result = get_output_shape( attrs, - make_lhs(SumDegree{o_sum}, DiscardCopyDegree{o_sum}, 1, 1, 1), - make_rhs(SumDegree{o_sum}, DiscardCopyDegree{o_sum}, 1, 1, 1)); - tl::expected correct = - make_output(SumDegree{o_sum * o_sum}, DiscardCopyDegree{1}, 1, 1, 1); + make_lhs(SumDegree{o_sum}, DiscardCopyDegree{o_sum}, 1_n, 1_n, 1_n), + make_rhs(SumDegree{o_sum}, DiscardCopyDegree{o_sum}, 1_n, 1_n, 1_n)); + tl::expected correct = make_output( + SumDegree{o_sum * o_sum}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n); CHECK(result == correct); } @@ -254,8 +254,8 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("reduction lhs & rhs (invalid)") { tl::expected result = get_output_shape( attrs, - make_lhs(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, 1, 1), - make_rhs(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, 1, 1)); + make_lhs(SumDegree{o_sum}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n), + make_rhs(SumDegree{o_sum}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n)); CHECK_MESSAGE( !result.has_value(), "Unexpected successful value: ", result); @@ -264,10 +264,11 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("reduction lhs & n") { tl::expected result = get_output_shape( attrs, - make_lhs(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, o_n, 1), - make_rhs(SumDegree{1}, DiscardCopyDegree{o_sum * o_n}, 1, 1, 1)); + make_lhs(SumDegree{o_sum}, DiscardCopyDegree{1_n}, 1_n, o_n, 1_n), + make_rhs( + SumDegree{1_n}, DiscardCopyDegree{o_sum * o_n}, 1_n, 1_n, 1_n)); tl::expected correct = - make_output(SumDegree{o_sum}, DiscardCopyDegree{1}, 1, o_n, 1); + make_output(SumDegree{o_sum}, DiscardCopyDegree{1_n}, 1_n, o_n, 1_n); CHECK(result == correct); } @@ -275,10 +276,11 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("reduction lhs & reduction rhs & n") { tl::expected result = get_output_shape( attrs, - make_lhs(SumDegree{o_sum}, DiscardCopyDegree{o_sum}, 1, o_n, 1), - make_rhs(SumDegree{o_sum}, DiscardCopyDegree{o_sum * o_n}, 1, 1, 1)); + make_lhs(SumDegree{o_sum}, DiscardCopyDegree{o_sum}, 1_n, o_n, 1_n), + make_rhs( + SumDegree{o_sum}, DiscardCopyDegree{o_sum * o_n}, 1_n, 1_n, 1_n)); tl::expected correct = make_output( - SumDegree{o_sum * o_sum}, DiscardCopyDegree{1}, 1, o_n, 1); + SumDegree{o_sum * o_sum}, DiscardCopyDegree{1_n}, 1_n, o_n, 1_n); CHECK(result == correct); } @@ -286,11 +288,15 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("reduction lhs & reduction rhs & n & m") { tl::expected result = get_output_shape( attrs, - make_lhs(SumDegree{o_sum}, DiscardCopyDegree{o_sum}, 1, o_n, o_m), + make_lhs(SumDegree{o_sum}, DiscardCopyDegree{o_sum}, 1_n, o_n, o_m), make_rhs( - SumDegree{o_sum}, DiscardCopyDegree{o_sum * o_n}, 1, o_m, 1)); - tl::expected correct = make_output( - SumDegree{o_sum * o_sum * o_m}, DiscardCopyDegree{1}, 1, o_n, 1); + SumDegree{o_sum}, DiscardCopyDegree{o_sum * o_n}, 1_n, o_m, 1_n)); + tl::expected correct = + make_output(SumDegree{o_sum * o_sum * o_m}, + DiscardCopyDegree{1_n}, + 1_n, + o_n, + 1_n); CHECK(result == correct); } diff --git a/lib/op-attrs/test/src/op-attrs/ops/batch_norm.cc b/lib/op-attrs/test/src/op-attrs/ops/batch_norm.cc index 4196394d00..cd9796945c 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/batch_norm.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/batch_norm.cc @@ -60,11 +60,11 @@ TEST_SUITE(FF_TEST_SUITE) { }(); TensorShape input = TensorShape{ - TensorDims{FFOrdered{ - 12, - 14, - 16, - 18, + TensorDims{FFOrdered{ + 12_n, + 14_n, + 16_n, + 18_n, }}, DataType::FLOAT, }; @@ -72,8 +72,8 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape output = input; TensorShape gamma = TensorShape{ - TensorDims{FFOrdered{ - 14, + TensorDims{FFOrdered{ + 14_n, }}, DataType::FLOAT, }; @@ -140,16 +140,16 @@ TEST_SUITE(FF_TEST_SUITE) { }(); SUBCASE("partition parallelism (in channel dim)") { - int degree = 2; + nonnegative_int degree = 2_n; ParallelTensorDimDegrees input = ParallelTensorDimDegrees{ - SumDegree{1}, - DiscardCopyDegree{1}, - FFOrdered{ - 1, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, + FFOrdered{ + 1_n, degree, - 1, - 1, + 1_n, + 1_n, }, }; @@ -169,9 +169,9 @@ TEST_SUITE(FF_TEST_SUITE) { get_gamma_weights_parallel_dim_degrees(attrs_affine_true, input); tl::expected correct = ParallelTensorDimDegrees{ - SumDegree{1}, - DiscardCopyDegree{1}, - FFOrdered{degree}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, + FFOrdered{degree}, }; CHECK(result == correct); @@ -194,9 +194,9 @@ TEST_SUITE(FF_TEST_SUITE) { get_beta_weights_parallel_dim_degrees(attrs_affine_true, input); tl::expected correct = ParallelTensorDimDegrees{ - SumDegree{1}, - DiscardCopyDegree{1}, - FFOrdered{degree}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, + FFOrdered{degree}, }; CHECK(result == correct); @@ -214,12 +214,12 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("partition parallelism (not in channel dim)") { - int degree = 2; + nonnegative_int degree = 2_n; ParallelTensorDimDegrees input = ParallelTensorDimDegrees{ - SumDegree{1}, - DiscardCopyDegree{1}, - FFOrdered{1, 1, degree, 1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, + FFOrdered{1_n, 1_n, degree, 1_n}, }; SUBCASE("get_output_parallel_dim_degrees(BatchNormAttrs, " @@ -251,12 +251,12 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("sum parallelism") { - SumDegree sum_degree = SumDegree{2}; + SumDegree sum_degree = SumDegree{2_n}; ParallelTensorDimDegrees input = ParallelTensorDimDegrees{ sum_degree, - DiscardCopyDegree{1}, - FFOrdered{1, 1, 1, 1}, + DiscardCopyDegree{1_n}, + FFOrdered{1_n, 1_n, 1_n, 1_n}, }; SUBCASE("get_output_parallel_dim_degrees(BatchNormAttrs, " @@ -288,12 +288,12 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("discard copy parallelism") { - DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{2}; + DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{2_n}; ParallelTensorDimDegrees input = ParallelTensorDimDegrees{ - SumDegree{1}, + SumDegree{1_n}, discard_copy_degree, - FFOrdered{1, 1, 1, 1}, + FFOrdered{1_n, 1_n, 1_n, 1_n}, }; SUBCASE("get_output_parallel_dim_degrees(BatchNormAttrs, " @@ -340,14 +340,14 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{12, 1}, - ShardParallelDim{14, 2}, - ShardParallelDim{16, 1}, - ShardParallelDim{18, 1}, + ShardParallelDim{12_n, 1_n}, + ShardParallelDim{14_n, 2_n}, + ShardParallelDim{16_n, 1_n}, + ShardParallelDim{18_n, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -368,11 +368,11 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{14, 2}, + ShardParallelDim{14_n, 2_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -388,11 +388,11 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{14, 2}, + ShardParallelDim{14_n, 2_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, diff --git a/lib/op-attrs/test/src/op-attrs/ops/cast.cc b/lib/op-attrs/test/src/op-attrs/ops/cast.cc index c7395316ad..e9ec890b4b 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/cast.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/cast.cc @@ -12,15 +12,15 @@ TEST_SUITE(FF_TEST_SUITE) { CastAttrs attrs = CastAttrs{output_datatype}; - size_t d1 = 12; - size_t d2 = 16; + nonnegative_int d1 = 12_n; + nonnegative_int d2 = 16_n; TensorShape input = TensorShape{ - TensorDims{FFOrdered{d1, d2}}, + TensorDims{FFOrdered{d1, d2}}, input_datatype, }; TensorShape output = TensorShape{ - TensorDims{FFOrdered{d1, d2}}, + TensorDims{FFOrdered{d1, d2}}, output_datatype, }; @@ -34,24 +34,30 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("get_output_shape(CastAttrs, ParallelTensorShape)") { auto make_input = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_batch, - int o_features) { + nonnegative_int o_batch, + nonnegative_int o_features) { return lift_to_parallel_with_degrees( - input, o_sum, o_eq, FFOrdered{o_batch, o_features}); + input, + o_sum, + o_eq, + FFOrdered{o_batch, o_features}); }; auto make_output = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_batch, - int o_outchannels) { + nonnegative_int o_batch, + nonnegative_int o_outchannels) { return lift_to_parallel_with_degrees( - output, o_sum, o_eq, FFOrdered{o_batch, o_outchannels}); + output, + o_sum, + o_eq, + FFOrdered{o_batch, o_outchannels}); }; - SumDegree sum_degree = SumDegree{2}; - DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{3}; - int batch_degree = 4; - int feature_degree = 8; + SumDegree sum_degree = SumDegree{2_n}; + DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{3_n}; + nonnegative_int batch_degree = 4_n; + nonnegative_int feature_degree = 8_n; ParallelTensorShape par_input = make_input( sum_degree, discard_copy_degree, batch_degree, feature_degree); diff --git a/lib/op-attrs/test/src/op-attrs/ops/combine.cc b/lib/op-attrs/test/src/op-attrs/ops/combine.cc index 577961b7b1..14fbca5b3a 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/combine.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/combine.cc @@ -10,22 +10,22 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{12, 2}, - ShardParallelDim{14, 1}, - ShardParallelDim{16, 3}, - ShardParallelDim{18, 2}, + ShardParallelDim{12_n, 2_n}, + ShardParallelDim{14_n, 1_n}, + ShardParallelDim{16_n, 3_n}, + ShardParallelDim{18_n, 2_n}, }, ReplicaParallelDimSet{ - SumDegree{3}, - DiscardCopyDegree{2}, + SumDegree{3_n}, + DiscardCopyDegree{2_n}, }, }, DataType::FLOAT, }; SUBCASE("valid") { - ff_dim_t dim = ff_dim_t{nonnegative_int{2}}; - int degree = 3; + ff_dim_t dim = ff_dim_t{2_n}; + nonnegative_int degree = 3_n; CombineAttrs attrs = CombineAttrs{ /*repartition_dim=*/dim, /*repartition_degree=*/degree, @@ -44,8 +44,8 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("invalid") { - ff_dim_t dim = ff_dim_t{nonnegative_int{2}}; - int degree = 4; + ff_dim_t dim = ff_dim_t{2_n}; + nonnegative_int degree = 4_n; CombineAttrs attrs = CombineAttrs{ /*repartition_dim=*/dim, /*repartition_degree=*/degree, diff --git a/lib/op-attrs/test/src/op-attrs/ops/concat.cc b/lib/op-attrs/test/src/op-attrs/ops/concat.cc index 2d9842b1dd..b84cf38753 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/concat.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/concat.cc @@ -23,12 +23,12 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK(result == correct); } - size_t dim0_size = 12; - size_t dim2_size = 20; + nonnegative_int dim0_size = 12_n; + nonnegative_int dim2_size = 20_n; TensorShape input_shape1 = TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ dim0_size, - 14, + 14_n, dim2_size, }}, DataType::FLOAT, @@ -45,26 +45,26 @@ TEST_SUITE(FF_TEST_SUITE) { } TensorShape input_shape2 = TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ dim0_size, - 16, + 16_n, dim2_size, }}, DataType::FLOAT, }; TensorShape input_shape3 = TensorShape{ - TensorDims{FFOrdered{dim0_size, 18, dim2_size}}, + TensorDims{FFOrdered{dim0_size, 18_n, dim2_size}}, DataType::FLOAT, }; SUBCASE("input shapes do not shared the same num_dims") { TensorShape mismatched_num_dims = TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ dim0_size, - 20, + 20_n, dim2_size, - 1, + 1_n, }}, DataType::FLOAT, }; @@ -101,9 +101,9 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_output_shape(attrs, input_shapes); tl::expected correct = TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ dim0_size, - 14 + 16 + 18, + 14_n + 16_n + 18_n, dim2_size, }}, DataType::FLOAT, @@ -118,84 +118,97 @@ TEST_SUITE(FF_TEST_SUITE) { ff_dim_t{nonnegative_int{1}}, }; - size_t dim0_size = 12; - size_t dim2_size = 20; + nonnegative_int dim0_size = 12_n; + nonnegative_int dim2_size = 20_n; TensorShape input_shape1 = TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ dim0_size, - 14, + 14_n, dim2_size, }}, DataType::FLOAT, }; TensorShape input_shape2 = TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ dim0_size, - 16, + 16_n, dim2_size, }}, DataType::FLOAT, }; TensorShape input_shape3 = TensorShape{ - TensorDims{FFOrdered{dim0_size, 18, dim2_size}}, + TensorDims{FFOrdered{dim0_size, 18_n, dim2_size}}, DataType::FLOAT, }; TensorShape output_shape = TensorShape{ - TensorDims{FFOrdered{dim0_size, 14 + 16 + 18, dim2_size}}, + TensorDims{FFOrdered{ + dim0_size, 14_n + 16_n + 18_n, dim2_size}}, DataType::FLOAT, }; - auto lift_input1 = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o0, int o1, int o2) { - return lift_to_parallel_with_degrees( - input_shape1, o_sum, o_eq, FFOrdered{o0, o1, o2}); - }; + auto lift_input1 = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o0, + nonnegative_int o1, + nonnegative_int o2) { + return lift_to_parallel_with_degrees( + input_shape1, o_sum, o_eq, FFOrdered{o0, o1, o2}); + }; - auto lift_input2 = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o0, int o1, int o2) { - return lift_to_parallel_with_degrees( - input_shape2, o_sum, o_eq, FFOrdered{o0, o1, o2}); - }; + auto lift_input2 = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o0, + nonnegative_int o1, + nonnegative_int o2) { + return lift_to_parallel_with_degrees( + input_shape2, o_sum, o_eq, FFOrdered{o0, o1, o2}); + }; - auto lift_input3 = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o0, int o1, int o2) { - return lift_to_parallel_with_degrees( - input_shape3, o_sum, o_eq, FFOrdered{o0, o1, o2}); - }; + auto lift_input3 = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o0, + nonnegative_int o1, + nonnegative_int o2) { + return lift_to_parallel_with_degrees( + input_shape3, o_sum, o_eq, FFOrdered{o0, o1, o2}); + }; - auto lift_output = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o0, int o1, int o2) { - return lift_to_parallel_with_degrees( - output_shape, o_sum, o_eq, FFOrdered{o0, o1, o2}); - }; + auto lift_output = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o0, + nonnegative_int o1, + nonnegative_int o2) { + return lift_to_parallel_with_degrees( + output_shape, o_sum, o_eq, FFOrdered{o0, o1, o2}); + }; SUBCASE("sum reduction parallelism") { SUBCASE("matching") { - SumDegree sum_degree = SumDegree{2}; + SumDegree sum_degree = SumDegree{2_n}; std::vector inputs = { - lift_input1(sum_degree, DiscardCopyDegree{1}, 1, 1, 1), - lift_input2(sum_degree, DiscardCopyDegree{1}, 1, 1, 1), - lift_input3(sum_degree, DiscardCopyDegree{1}, 1, 1, 1), + lift_input1(sum_degree, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n), + lift_input2(sum_degree, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n), + lift_input3(sum_degree, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n), }; tl::expected result = get_output_shape(attrs, inputs); tl::expected correct = - lift_output(sum_degree, DiscardCopyDegree{1}, 1, 1, 1); + lift_output(sum_degree, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n); CHECK(result == correct); } SUBCASE("not matching") { std::vector inputs = { - lift_input1(SumDegree{2}, DiscardCopyDegree{1}, 1, 1, 1), - lift_input2(SumDegree{4}, DiscardCopyDegree{1}, 1, 1, 1), - lift_input3(SumDegree{4}, DiscardCopyDegree{1}, 1, 1, 1), + lift_input1(SumDegree{2_n}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n), + lift_input2(SumDegree{4_n}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n), + lift_input3(SumDegree{4_n}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n), }; std::optional result = @@ -208,27 +221,27 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("discard copy reduction parallelism") { SUBCASE("matching") { - DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{2}; + DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{2_n}; std::vector inputs = { - lift_input1(SumDegree{1}, discard_copy_degree, 1, 1, 1), - lift_input2(SumDegree{1}, discard_copy_degree, 1, 1, 1), - lift_input3(SumDegree{1}, discard_copy_degree, 1, 1, 1), + lift_input1(SumDegree{1_n}, discard_copy_degree, 1_n, 1_n, 1_n), + lift_input2(SumDegree{1_n}, discard_copy_degree, 1_n, 1_n, 1_n), + lift_input3(SumDegree{1_n}, discard_copy_degree, 1_n, 1_n, 1_n), }; tl::expected result = get_output_shape(attrs, inputs); tl::expected correct = - lift_output(SumDegree{1}, discard_copy_degree, 1, 1, 1); + lift_output(SumDegree{1_n}, discard_copy_degree, 1_n, 1_n, 1_n); CHECK(result == correct); } SUBCASE("not matching") { std::vector inputs = { - lift_input1(SumDegree{1}, DiscardCopyDegree{2}, 1, 1, 1), - lift_input2(SumDegree{1}, DiscardCopyDegree{2}, 1, 1, 1), - lift_input3(SumDegree{1}, DiscardCopyDegree{4}, 1, 1, 1), + lift_input1(SumDegree{1_n}, DiscardCopyDegree{2_n}, 1_n, 1_n, 1_n), + lift_input2(SumDegree{1_n}, DiscardCopyDegree{2_n}, 1_n, 1_n, 1_n), + lift_input3(SumDegree{1_n}, DiscardCopyDegree{4_n}, 1_n, 1_n, 1_n), }; std::optional result = @@ -241,12 +254,15 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("parallelism in axis dim") { SUBCASE("matching") { - int degree = 2; + nonnegative_int degree = 2_n; std::vector inputs = { - lift_input1(SumDegree{1}, DiscardCopyDegree{1}, 1, degree, 1), - lift_input2(SumDegree{1}, DiscardCopyDegree{1}, 1, degree, 1), - lift_input3(SumDegree{1}, DiscardCopyDegree{1}, 1, degree, 1), + lift_input1( + SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, degree, 1_n), + lift_input2( + SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, degree, 1_n), + lift_input3( + SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, degree, 1_n), }; std::optional result = @@ -258,9 +274,9 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("not matching") { std::vector inputs = { - lift_input1(SumDegree{1}, DiscardCopyDegree{1}, 1, 1, 1), - lift_input2(SumDegree{1}, DiscardCopyDegree{1}, 1, 1, 1), - lift_input3(SumDegree{1}, DiscardCopyDegree{1}, 1, 2, 1), + lift_input1(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n), + lift_input2(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n), + lift_input3(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, 2_n, 1_n), }; std::optional result = @@ -273,31 +289,31 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("parallelism in non-axis shard dims") { SUBCASE("matching") { - int degree0 = 2; - int degree2 = 4; + nonnegative_int degree0 = 2_n; + nonnegative_int degree2 = 4_n; std::vector inputs = { lift_input1( - SumDegree{1}, DiscardCopyDegree{1}, degree0, 1, degree2), + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree0, 1_n, degree2), lift_input2( - SumDegree{1}, DiscardCopyDegree{1}, degree0, 1, degree2), + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree0, 1_n, degree2), lift_input3( - SumDegree{1}, DiscardCopyDegree{1}, degree0, 1, degree2), + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree0, 1_n, degree2), }; tl::expected result = get_output_shape(attrs, inputs); tl::expected correct = lift_output( - SumDegree{1}, DiscardCopyDegree{1}, degree0, 1, degree2); + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree0, 1_n, degree2); CHECK(result == correct); } SUBCASE("not matching") { std::vector inputs = { - lift_input1(SumDegree{1}, DiscardCopyDegree{1}, 2, 1, 4), - lift_input2(SumDegree{1}, DiscardCopyDegree{1}, 4, 1, 2), - lift_input3(SumDegree{1}, DiscardCopyDegree{1}, 4, 1, 2), + lift_input1(SumDegree{1_n}, DiscardCopyDegree{1_n}, 2_n, 1_n, 4_n), + lift_input2(SumDegree{1_n}, DiscardCopyDegree{1_n}, 4_n, 1_n, 2_n), + lift_input3(SumDegree{1_n}, DiscardCopyDegree{1_n}, 4_n, 1_n, 2_n), }; std::optional result = @@ -309,21 +325,21 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("parallelism degrees are not mutually exclusive") { - SumDegree sum_degree = SumDegree{3}; - DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{5}; - int degree0 = 2; - int degree2 = 4; + SumDegree sum_degree = SumDegree{3_n}; + DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{5_n}; + nonnegative_int degree0 = 2_n; + nonnegative_int degree2 = 4_n; std::vector inputs = { - lift_input1(sum_degree, discard_copy_degree, degree0, 1, degree2), - lift_input2(sum_degree, discard_copy_degree, degree0, 1, degree2), - lift_input3(sum_degree, discard_copy_degree, degree0, 1, degree2), + lift_input1(sum_degree, discard_copy_degree, degree0, 1_n, degree2), + lift_input2(sum_degree, discard_copy_degree, degree0, 1_n, degree2), + lift_input3(sum_degree, discard_copy_degree, degree0, 1_n, degree2), }; tl::expected result = get_output_shape(attrs, inputs); tl::expected correct = - lift_output(sum_degree, discard_copy_degree, degree0, 1, degree2); + lift_output(sum_degree, discard_copy_degree, degree0, 1_n, degree2); CHECK(result == correct); } diff --git a/lib/op-attrs/test/src/op-attrs/ops/conv_2d.cc b/lib/op-attrs/test/src/op-attrs/ops/conv_2d.cc index 7abb98f3e3..f5006d4352 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/conv_2d.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/conv_2d.cc @@ -7,14 +7,14 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_conv2d_incoming_tensor_roles(Conv2DAttrs") { auto make_attrs = [](bool use_bias) { - return Conv2DAttrs{/*out_channels=*/4, - /*kernel_h=*/3, - /*kernel_w=*/2, - /*stride_h=*/2, - /*stride_w=*/2, - /*padding_h=*/1, - /*padding_w=*/1, - /*groups=*/1, + return Conv2DAttrs{/*out_channels=*/4_n, + /*kernel_h=*/3_n, + /*kernel_w=*/2_n, + /*stride_h=*/2_n, + /*stride_w=*/2_n, + /*padding_h=*/1_n, + /*padding_w=*/1_n, + /*groups=*/1_n, /*activation=*/std::nullopt, /*use_bias=*/use_bias}; }; @@ -48,14 +48,14 @@ TEST_SUITE(FF_TEST_SUITE) { } TEST_CASE("Conv2D shape inference") { - int out_channels = 4; - int kernel_h = 3; - int kernel_w = 2; - int stride_h = 2; - int stride_w = 2; - int padding_h = 1; - int padding_w = 1; - int groups = 1; + nonnegative_int out_channels = 4_n; + nonnegative_int kernel_h = 3_n; + nonnegative_int kernel_w = 2_n; + nonnegative_int stride_h = 2_n; + nonnegative_int stride_w = 2_n; + nonnegative_int padding_h = 1_n; + nonnegative_int padding_w = 1_n; + nonnegative_int groups = 1_n; std::optional activation = std::nullopt; bool use_bias = true; @@ -72,13 +72,13 @@ TEST_SUITE(FF_TEST_SUITE) { /*use_bias=*/true, }; - size_t num_samples = 7; - size_t input_channels = 4; - size_t input_height = 11; - size_t input_width = 15; + nonnegative_int num_samples = 7_n; + nonnegative_int input_channels = 4_n; + nonnegative_int input_height = 11_n; + nonnegative_int input_width = 15_n; TensorShape input = TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ num_samples, input_channels, input_height, @@ -87,13 +87,13 @@ TEST_SUITE(FF_TEST_SUITE) { DataType::FLOAT, }; - size_t output_height = 6; - size_t output_width = 8; + nonnegative_int output_height = 6_n; + nonnegative_int output_width = 8_n; TensorShape output = TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ num_samples, - size_t_from_int(out_channels), + out_channels, output_height, output_width, }}, @@ -101,18 +101,18 @@ TEST_SUITE(FF_TEST_SUITE) { }; TensorShape kernel = TensorShape{ - TensorDims{FFOrdered{ - size_t_from_int(out_channels), + TensorDims{FFOrdered{ + out_channels, input_channels, - size_t_from_int(kernel_h), - size_t_from_int(kernel_w), + kernel_h, + kernel_w, }}, DataType::FLOAT, }; TensorShape bias = TensorShape{ - TensorDims{FFOrdered{ - size_t_from_int(out_channels), + TensorDims{FFOrdered{ + out_channels, }}, DataType::FLOAT, }; @@ -137,147 +137,149 @@ TEST_SUITE(FF_TEST_SUITE) { auto make_input = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_n, - int o_c, - int o_h, - int o_w) { + nonnegative_int o_n, + nonnegative_int o_c, + nonnegative_int o_h, + nonnegative_int o_w) { return lift_to_parallel_with_degrees( - input, o_sum, o_eq, FFOrdered{o_n, o_c, o_h, o_w}); + input, o_sum, o_eq, FFOrdered{o_n, o_c, o_h, o_w}); }; auto make_output = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_n, - int o_c, - int o_h, - int o_w) { + nonnegative_int o_n, + nonnegative_int o_c, + nonnegative_int o_h, + nonnegative_int o_w) { return lift_to_parallel_with_degrees( - output, o_sum, o_eq, FFOrdered{o_n, o_c, o_h, o_w}); + output, o_sum, o_eq, FFOrdered{o_n, o_c, o_h, o_w}); }; auto make_kernel = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_outchannels, - int o_inchannels, - int o_kernel_h, - int o_kernel_w) { + nonnegative_int o_outchannels, + nonnegative_int o_inchannels, + nonnegative_int o_kernel_h, + nonnegative_int o_kernel_w) { return lift_to_parallel_with_degrees( kernel, o_sum, o_eq, - FFOrdered{o_outchannels, o_inchannels, o_kernel_h, o_kernel_w}); + FFOrdered{ + o_outchannels, o_inchannels, o_kernel_h, o_kernel_w}); }; - auto make_bias = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o_outchannels) { - return lift_to_parallel_with_degrees( - bias, o_sum, o_eq, FFOrdered{o_outchannels}); - }; + auto make_bias = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o_outchannels) { + return lift_to_parallel_with_degrees( + bias, o_sum, o_eq, FFOrdered{o_outchannels}); + }; SUBCASE("data parallelism") { - int degree = 2; - ParallelTensorShape par_input = - make_input(SumDegree{1}, DiscardCopyDegree{1}, degree, 1, 1, 1); + nonnegative_int degree = 2_n; + ParallelTensorShape par_input = make_input( + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree, 1_n, 1_n, 1_n); SUBCASE("get_output_shape") { ParallelTensorShape result = get_output_shape(attrs, par_input); - ParallelTensorShape correct = - make_output(SumDegree{1}, DiscardCopyDegree{1}, degree, 1, 1, 1); + ParallelTensorShape correct = make_output( + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree, 1_n, 1_n, 1_n); CHECK(result == correct); } SUBCASE("get_kernel_shape") { ParallelTensorShape result = get_kernel_shape(attrs, par_input); - ParallelTensorShape correct = - make_kernel(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1, 1, 1); + ParallelTensorShape correct = make_kernel( + SumDegree{1_n}, DiscardCopyDegree{degree}, 1_n, 1_n, 1_n, 1_n); CHECK(result == correct); } SUBCASE("get_bias_shape") { ParallelTensorShape result = get_bias_shape(attrs, par_input); ParallelTensorShape correct = - make_bias(SumDegree{1}, DiscardCopyDegree{degree}, 1); + make_bias(SumDegree{1_n}, DiscardCopyDegree{degree}, 1_n); CHECK(result == correct); } } SUBCASE("input channel parallelism") { - int degree = 2; - ParallelTensorShape par_input = - make_input(SumDegree{1}, DiscardCopyDegree{1}, 1, degree, 1, 1); + nonnegative_int degree = 2_n; + ParallelTensorShape par_input = make_input( + SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, degree, 1_n, 1_n); SUBCASE("get_output_shape") { ParallelTensorShape result = get_output_shape(attrs, par_input); - ParallelTensorShape correct = - make_output(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1, 1, 1); + ParallelTensorShape correct = make_output( + SumDegree{degree}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n, 1_n); CHECK(result == correct); } SUBCASE("get_kernel_shape") { ParallelTensorShape result = get_kernel_shape(attrs, par_input); - ParallelTensorShape correct = - make_kernel(SumDegree{1}, DiscardCopyDegree{1}, 1, degree, 1, 1); + ParallelTensorShape correct = make_kernel( + SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, degree, 1_n, 1_n); CHECK(result == correct); } SUBCASE("get_bias_shape") { ParallelTensorShape result = get_bias_shape(attrs, par_input); ParallelTensorShape correct = - make_bias(SumDegree{degree}, DiscardCopyDegree{1}, 1); + make_bias(SumDegree{degree}, DiscardCopyDegree{1_n}, 1_n); CHECK(result == correct); } } SUBCASE("output channel parallelism") { - int degree = 2; - ParallelTensorShape par_input = - make_input(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1, 1, 1); + nonnegative_int degree = 2_n; + ParallelTensorShape par_input = make_input( + SumDegree{1_n}, DiscardCopyDegree{degree}, 1_n, 1_n, 1_n, 1_n); SUBCASE("get_output_shape") { ParallelTensorShape result = get_output_shape(attrs, par_input); - ParallelTensorShape correct = - make_output(SumDegree{1}, DiscardCopyDegree{1}, 1, degree, 1, 1); + ParallelTensorShape correct = make_output( + SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, degree, 1_n, 1_n); CHECK(result == correct); } SUBCASE("get_kernel_shape") { ParallelTensorShape result = get_kernel_shape(attrs, par_input); - ParallelTensorShape correct = - make_kernel(SumDegree{1}, DiscardCopyDegree{1}, degree, 1, 1, 1); + ParallelTensorShape correct = make_kernel( + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree, 1_n, 1_n, 1_n); CHECK(result == correct); } SUBCASE("get_bias_shape") { ParallelTensorShape result = get_bias_shape(attrs, par_input); ParallelTensorShape correct = - make_bias(SumDegree{1}, DiscardCopyDegree{1}, degree); + make_bias(SumDegree{1_n}, DiscardCopyDegree{1_n}, degree); CHECK(result == correct); } } SUBCASE("propagating sum degree") { - int degree = 2; - ParallelTensorShape par_input = - make_input(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1, 1, 1); + nonnegative_int degree = 2_n; + ParallelTensorShape par_input = make_input( + SumDegree{degree}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n, 1_n); SUBCASE("get_output_shape") { ParallelTensorShape result = get_output_shape(attrs, par_input); - ParallelTensorShape correct = - make_output(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1, 1, 1); + ParallelTensorShape correct = make_output( + SumDegree{degree}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n, 1_n); CHECK(result == correct); } SUBCASE("get_kernel_shape") { ParallelTensorShape result = get_kernel_shape(attrs, par_input); - ParallelTensorShape correct = - make_kernel(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1, 1, 1); + ParallelTensorShape correct = make_kernel( + SumDegree{1_n}, DiscardCopyDegree{degree}, 1_n, 1_n, 1_n, 1_n); CHECK(result == correct); } SUBCASE("get_bias_shape") { ParallelTensorShape result = get_bias_shape(attrs, par_input); ParallelTensorShape correct = - make_bias(SumDegree{degree}, DiscardCopyDegree{1}, 1); + make_bias(SumDegree{degree}, DiscardCopyDegree{1_n}, 1_n); CHECK(result == correct); } } diff --git a/lib/op-attrs/test/src/op-attrs/ops/dropout.cc b/lib/op-attrs/test/src/op-attrs/ops/dropout.cc index 7580de24e5..e1a03a7613 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/dropout.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/dropout.cc @@ -15,10 +15,10 @@ TEST_SUITE(FF_TEST_SUITE) { }; TensorShape input = TensorShape{ - TensorDims{FFOrdered{ - 12, - 14, - 16, + TensorDims{FFOrdered{ + 12_n, + 14_n, + 16_n, }}, DataType::FLOAT, }; @@ -36,48 +36,54 @@ TEST_SUITE(FF_TEST_SUITE) { }; TensorShape input = TensorShape{ - TensorDims{FFOrdered{ - 12, - 14, - 16, + TensorDims{FFOrdered{ + 12_n, + 14_n, + 16_n, }}, DataType::FLOAT, }; TensorShape output = input; - auto make_input = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o0, int o1, int o2) { - return lift_to_parallel_with_degrees( - input, o_sum, o_eq, FFOrdered{o0, o1, o2}); - }; + auto make_input = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o0, + nonnegative_int o1, + nonnegative_int o2) { + return lift_to_parallel_with_degrees( + input, o_sum, o_eq, FFOrdered{o0, o1, o2}); + }; - auto make_output = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o0, int o1, int o2) { - return lift_to_parallel_with_degrees( - output, o_sum, o_eq, FFOrdered{o0, o1, o2}); - }; + auto make_output = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o0, + nonnegative_int o1, + nonnegative_int o2) { + return lift_to_parallel_with_degrees( + output, o_sum, o_eq, FFOrdered{o0, o1, o2}); + }; SUBCASE("partition parallelism (allowed)") { - int degree0 = 2; - int degree2 = 4; + nonnegative_int degree0 = 2_n; + nonnegative_int degree2 = 4_n; - ParallelTensorShape par_input = - make_input(SumDegree{1}, DiscardCopyDegree{1}, degree0, 1, degree2); + ParallelTensorShape par_input = make_input( + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree0, 1_n, degree2); tl::expected result = get_output_shape(attrs, par_input); - tl::expected correct = - make_output(SumDegree{1}, DiscardCopyDegree{1}, degree0, 1, degree2); + tl::expected correct = make_output( + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree0, 1_n, degree2); CHECK(result == correct); } SUBCASE("sum parallelism (not allowed)") { - SumDegree sum_degree = SumDegree{2}; + SumDegree sum_degree = SumDegree{2_n}; ParallelTensorShape par_input = - make_input(sum_degree, DiscardCopyDegree{1}, 1, 1, 1); + make_input(sum_degree, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n); std::optional result = optional_from_expected(get_output_shape(attrs, par_input)); @@ -87,10 +93,10 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("discard copy parallelism (not allowed)") { - DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{2}; + DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{2_n}; ParallelTensorShape par_input = - make_input(SumDegree{1}, discard_copy_degree, 1, 1, 1); + make_input(SumDegree{1_n}, discard_copy_degree, 1_n, 1_n, 1_n); std::optional result = optional_from_expected(get_output_shape(attrs, par_input)); diff --git a/lib/op-attrs/test/src/op-attrs/ops/element_binary.cc b/lib/op-attrs/test/src/op-attrs/ops/element_binary.cc index d5aab55cb2..d6a92036f0 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/element_binary.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/element_binary.cc @@ -7,9 +7,9 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("EWAdd shape inference") { - size_t d1 = 16; - size_t d2 = 32; - size_t d3 = 24; + nonnegative_int d1 = 16_n; + nonnegative_int d2 = 32_n; + nonnegative_int d3 = 24_n; ElementBinaryAttrs attrs = ElementBinaryAttrs{ OperatorType::EW_ADD, @@ -20,7 +20,7 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape input_lhs = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ d1, d2, d3, @@ -41,7 +41,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("mismatched dim size") { TensorShape incorrect_rhs = input_lhs; - dim_at_idx(incorrect_rhs, relative_ff_dim_t{0}) += 1; + dim_at_idx(incorrect_rhs, relative_ff_dim_t{0}) += 1_n; tl::expected result = get_output_shape(attrs, input_lhs, incorrect_rhs); @@ -53,9 +53,9 @@ TEST_SUITE(FF_TEST_SUITE) { } TEST_CASE("EWAdd parallel shape inference") { - size_t d1 = 16; - size_t d2 = 32; - size_t d3 = 24; + nonnegative_int d1 = 16_n; + nonnegative_int d2 = 32_n; + nonnegative_int d3 = 24_n; ElementBinaryAttrs attrs = ElementBinaryAttrs{ OperatorType::EW_ADD, @@ -66,7 +66,7 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape unpar_lhs = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ d1, d2, d3, @@ -83,68 +83,68 @@ TEST_SUITE(FF_TEST_SUITE) { auto make_lhs = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_1, - int o_2, - int o_3) { + nonnegative_int o_1, + nonnegative_int o_2, + nonnegative_int o_3) { return lift_to_parallel_with_degrees( - unpar_lhs, o_sum, o_eq, FFOrdered{o_1, o_2, o_3}); + unpar_lhs, o_sum, o_eq, FFOrdered{o_1, o_2, o_3}); }; auto make_rhs = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_1, - int o_2, - int o_3) { + nonnegative_int o_1, + nonnegative_int o_2, + nonnegative_int o_3) { return lift_to_parallel_with_degrees( - unpar_rhs, o_sum, o_eq, FFOrdered{o_1, o_2, o_3}); + unpar_rhs, o_sum, o_eq, FFOrdered{o_1, o_2, o_3}); }; auto make_output = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_1, - int o_2, - int o_3) { + nonnegative_int o_1, + nonnegative_int o_2, + nonnegative_int o_3) { return lift_to_parallel_with_degrees( - unpar_output, o_sum, o_eq, FFOrdered{o_1, o_2, o_3}); + unpar_output, o_sum, o_eq, FFOrdered{o_1, o_2, o_3}); }; SUBCASE("data parallelism") { - int degree = 4; + nonnegative_int degree = 4_n; ParallelTensorShape input_lhs = - make_lhs(SumDegree{1}, DiscardCopyDegree{1}, degree, 1, 1); + make_lhs(SumDegree{1_n}, DiscardCopyDegree{1_n}, degree, 1_n, 1_n); ParallelTensorShape input_rhs = - make_rhs(SumDegree{1}, DiscardCopyDegree{1}, degree, 1, 1); + make_rhs(SumDegree{1_n}, DiscardCopyDegree{1_n}, degree, 1_n, 1_n); tl::expected result = get_output_shape(attrs, input_lhs, input_rhs); tl::expected correct = - make_output(SumDegree{1}, DiscardCopyDegree{1}, degree, 1, 1); + make_output(SumDegree{1_n}, DiscardCopyDegree{1_n}, degree, 1_n, 1_n); CHECK(result == correct); } SUBCASE("reduction parallelism") { - int degree = 4; + nonnegative_int degree = 4_n; ParallelTensorShape input_lhs = - make_lhs(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1, 1); + make_lhs(SumDegree{degree}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n); ParallelTensorShape input_rhs = - make_rhs(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1, 1); + make_rhs(SumDegree{degree}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n); tl::expected result = get_output_shape(attrs, input_lhs, input_rhs); tl::expected correct = - make_output(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1, 1); + make_output(SumDegree{degree}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n); CHECK(result == correct); } SUBCASE("invalid discard copy parallelism") { - int degree = 4; + nonnegative_int degree = 4_n; ParallelTensorShape input_lhs = - make_lhs(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1, 1); + make_lhs(SumDegree{1_n}, DiscardCopyDegree{degree}, 1_n, 1_n, 1_n); ParallelTensorShape input_rhs = - make_rhs(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1, 1); + make_rhs(SumDegree{1_n}, DiscardCopyDegree{degree}, 1_n, 1_n, 1_n); tl::expected result = get_output_shape(attrs, input_lhs, input_rhs); @@ -154,12 +154,12 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("invalid mismatched parallelism degrees") { - int degree = 4; + nonnegative_int degree = 4_n; ParallelTensorShape input_lhs = - make_lhs(SumDegree{1}, DiscardCopyDegree{1}, 1, degree, 1); + make_lhs(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, degree, 1_n); ParallelTensorShape input_rhs = - make_rhs(SumDegree{1}, DiscardCopyDegree{1}, 1, 1, degree); + make_rhs(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, 1_n, degree); tl::expected result = get_output_shape(attrs, input_lhs, input_rhs); diff --git a/lib/op-attrs/test/src/op-attrs/ops/element_unary.cc b/lib/op-attrs/test/src/op-attrs/ops/element_unary.cc index 94c382356e..bac6efba3f 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/element_unary.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/element_unary.cc @@ -7,16 +7,16 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("ReLU shape inference") { - size_t d1 = 16; - size_t d2 = 32; - size_t d3 = 24; + nonnegative_int d1 = 16_n; + nonnegative_int d2 = 32_n; + nonnegative_int d3 = 24_n; ElementUnaryAttrs attrs = ElementUnaryAttrs{OperatorType::RELU, std::nullopt}; TensorShape input = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ d1, d2, d3, @@ -31,20 +31,20 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK(result == correct); - auto make_i = [&](SumDegree o_sum, - DiscardCopyDegree o_eq, - int o_1, - int o_2, - int o_3) { + auto make_input = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o_1, + nonnegative_int o_2, + nonnegative_int o_3) { return lift_to_parallel_with_degrees( - input, o_sum, o_eq, FFOrdered{o_1, o_2, o_3}); + input, o_sum, o_eq, FFOrdered{o_1, o_2, o_3}); }; SUBCASE("partition i.e., sharding parallelism") { - int degree1 = 4; - int degree2 = 8; - ParallelTensorShape par_input = - make_i(SumDegree{1}, DiscardCopyDegree{1}, degree1, 1, degree2); + nonnegative_int degree1 = 4_n; + nonnegative_int degree2 = 8_n; + ParallelTensorShape par_input = make_input( + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree1, 1_n, degree2); tl::expected result = get_output_shape(attrs, par_input); @@ -54,10 +54,11 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("sum degree > 1") { - int degree = 2; + nonnegative_int degree = 2_n; tl::expected result = get_output_shape( - attrs, make_i(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1, 1)); + attrs, + make_input(SumDegree{degree}, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n)); CHECK_MESSAGE(!result.has_value(), "Unexpected successful result: ", @@ -65,10 +66,11 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("discard copy degree > 1") { - int degree = 2; + nonnegative_int degree = 2_n; tl::expected result = get_output_shape( - attrs, make_i(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1, 1)); + attrs, + make_input(SumDegree{1_n}, DiscardCopyDegree{degree}, 1_n, 1_n, 1_n)); CHECK_MESSAGE(!result.has_value(), "Unexpected successful result: ", diff --git a/lib/op-attrs/test/src/op-attrs/ops/embedding.cc b/lib/op-attrs/test/src/op-attrs/ops/embedding.cc index 134737f6c0..8fe50a4217 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/embedding.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/embedding.cc @@ -8,8 +8,8 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Sum embedding shape inference") { - int out_channels = 128; - int num_entries = 1024; + nonnegative_int out_channels = 128_n; + nonnegative_int num_entries = 1024_n; EmbeddingAttrs attrs = EmbeddingAttrs{ /*num_entries=*/num_entries, /*out_channels=*/out_channels, @@ -17,11 +17,11 @@ TEST_SUITE(FF_TEST_SUITE) { /*data_type=*/DataType::FLOAT, }; - size_t batch_size = 48; - size_t features_dim = 56; + nonnegative_int batch_size = 48_n; + nonnegative_int features_dim = 56_n; TensorShape input = TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ batch_size, features_dim, }}, @@ -30,9 +30,9 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape output = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ batch_size, - size_t_from_int(out_channels), + out_channels, }, }, DataType::FLOAT, @@ -40,9 +40,9 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape weights = TensorShape{ TensorDims{ - FFOrdered{ - size_t_from_int(num_entries), - size_t_from_int(out_channels), + FFOrdered{ + num_entries, + out_channels, }, }, DataType::FLOAT, @@ -66,38 +66,44 @@ TEST_SUITE(FF_TEST_SUITE) { auto make_input = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_batch, - int o_features) { + nonnegative_int o_batch, + nonnegative_int o_features) { return lift_to_parallel_with_degrees( - input, o_sum, o_eq, FFOrdered{o_batch, o_features}); + input, o_sum, o_eq, FFOrdered{o_batch, o_features}); }; auto make_output = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_batch, - int o_outchannels) { + nonnegative_int o_batch, + nonnegative_int o_outchannels) { return lift_to_parallel_with_degrees( - output, o_sum, o_eq, FFOrdered{o_batch, o_outchannels}); + output, + o_sum, + o_eq, + FFOrdered{o_batch, o_outchannels}); }; auto make_weights = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_entries, - int o_outchannels) { + nonnegative_int o_entries, + nonnegative_int o_outchannels) { return lift_to_parallel_with_degrees( - weights, o_sum, o_eq, FFOrdered{o_entries, o_outchannels}); + weights, + o_sum, + o_eq, + FFOrdered{o_entries, o_outchannels}); }; SUBCASE("data parallelism") { - int degree = 4; + nonnegative_int degree = 4_n; ParallelTensorShape par_input = - make_input(SumDegree{1}, DiscardCopyDegree{1}, degree, 1); + make_input(SumDegree{1_n}, DiscardCopyDegree{1_n}, degree, 1_n); { tl::expected result = get_output_shape(attrs, par_input); tl::expected correct = - make_output(SumDegree{1}, DiscardCopyDegree{1}, degree, 1); + make_output(SumDegree{1_n}, DiscardCopyDegree{1_n}, degree, 1_n); CHECK(result == correct); } @@ -105,21 +111,21 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_weights_shape(attrs, par_input); tl::expected correct = - make_weights(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1); + make_weights(SumDegree{1_n}, DiscardCopyDegree{degree}, 1_n, 1_n); CHECK(result == correct); } } SUBCASE("input features parallelism") { - int degree = 4; + nonnegative_int degree = 4_n; ParallelTensorShape input = - make_input(SumDegree{1}, DiscardCopyDegree{1}, 1, degree); + make_input(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, degree); { tl::expected result = get_output_shape(attrs, input); tl::expected correct = - make_output(SumDegree{degree}, DiscardCopyDegree{1}, 1, 1); + make_output(SumDegree{degree}, DiscardCopyDegree{1_n}, 1_n, 1_n); CHECK(result == correct); } @@ -127,7 +133,7 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_weights_shape(attrs, input); tl::expected correct = - make_weights(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1); + make_weights(SumDegree{1_n}, DiscardCopyDegree{degree}, 1_n, 1_n); CHECK(result == correct); } } @@ -139,15 +145,15 @@ TEST_SUITE(FF_TEST_SUITE) { // dimension. For now we choose to represent parallelism in the channel // dimension, but partitioning in the entry dimension is also potentially // useful as it produces sum parallelism in the output - int degree = 4; + nonnegative_int degree = 4_n; ParallelTensorShape input = - make_input(SumDegree{1}, DiscardCopyDegree{degree}, 1, 1); + make_input(SumDegree{1_n}, DiscardCopyDegree{degree}, 1_n, 1_n); { tl::expected result = get_output_shape(attrs, input); tl::expected correct = - make_output(SumDegree{1}, DiscardCopyDegree{1}, 1, degree); + make_output(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, degree); CHECK(result == correct); } @@ -155,7 +161,7 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_weights_shape(attrs, input); tl::expected correct = - make_weights(SumDegree{1}, DiscardCopyDegree{1}, 1, degree); + make_weights(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, degree); CHECK(result == correct); } } diff --git a/lib/op-attrs/test/src/op-attrs/ops/flat.cc b/lib/op-attrs/test/src/op-attrs/ops/flat.cc index 8998dfaffd..ebd869b3e5 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/flat.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/flat.cc @@ -9,25 +9,25 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_output_shape(FlatAttrs, TensorShape)") { TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ - 2, - 4, - 2, - 3, + TensorDims{FFOrdered{ + 2_n, + 4_n, + 2_n, + 3_n, }}, DataType::FLOAT, }; SUBCASE("flatten all dims") { FlatAttrs attrs = FlatAttrs{ - /*start_dim=*/ff_dim_t{nonnegative_int{0}}, - /*end_dim=*/ff_dim_t{nonnegative_int{4}}, + /*start_dim=*/ff_dim_t{0_n}, + /*end_dim=*/ff_dim_t{4_n}, }; TensorShape result = get_output_shape(attrs, input_shape); TensorShape correct = TensorShape{ - TensorDims{FFOrdered{ - 2 * 4 * 2 * 3, + TensorDims{FFOrdered{ + 2_n * 4_n * 2_n * 3_n, }}, DataType::FLOAT, }; @@ -43,10 +43,10 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape result = get_output_shape(attrs, input_shape); TensorShape correct = TensorShape{ - TensorDims{FFOrdered{ - 2, - 4, - 2 * 3, + TensorDims{FFOrdered{ + 2_n, + 4_n, + 2_n * 3_n, }}, DataType::FLOAT, }; @@ -62,10 +62,10 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape result = get_output_shape(attrs, input_shape); TensorShape correct = TensorShape{ - TensorDims{FFOrdered{ - 2 * 4, - 2, - 3, + TensorDims{FFOrdered{ + 2_n * 4_n, + 2_n, + 3_n, }}, DataType::FLOAT, }; @@ -81,10 +81,10 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape result = get_output_shape(attrs, input_shape); TensorShape correct = TensorShape{ - TensorDims{FFOrdered{ - 2, - 4 * 2, - 3, + TensorDims{FFOrdered{ + 2_n, + 4_n * 2_n, + 3_n, }}, DataType::FLOAT, }; @@ -124,18 +124,18 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("allows shard parallelism in non-flattened dims") { ParallelTensorDimDegrees input = ParallelTensorDimDegrees{ - SumDegree{1}, - DiscardCopyDegree{1}, - FFOrdered{2, 1, 1, 3}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, + FFOrdered{2_n, 1_n, 1_n, 3_n}, }; tl::expected result = get_output_parallel_dim_degrees(attrs, input); tl::expected correct = ParallelTensorDimDegrees{ - SumDegree{1}, - DiscardCopyDegree{1}, - FFOrdered{2, 1, 3}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, + FFOrdered{2_n, 1_n, 3_n}, }; CHECK(result == correct); @@ -143,9 +143,9 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("does not allow shard parallelism in flattened dims") { ParallelTensorDimDegrees input = ParallelTensorDimDegrees{ - SumDegree{1}, - DiscardCopyDegree{1}, - FFOrdered{1, 1, 2, 1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, + FFOrdered{1_n, 1_n, 2_n, 1_n}, }; std::optional result = @@ -157,18 +157,18 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("allows sum parallelism") { ParallelTensorDimDegrees input = ParallelTensorDimDegrees{ - SumDegree{2}, - DiscardCopyDegree{1}, - FFOrdered{1, 1, 1, 1}, + SumDegree{2_n}, + DiscardCopyDegree{1_n}, + FFOrdered{1_n, 1_n, 1_n, 1_n}, }; std::optional result = optional_from_expected(get_output_parallel_dim_degrees(attrs, input)); std::optional correct = ParallelTensorDimDegrees{ - SumDegree{2}, - DiscardCopyDegree{1}, - FFOrdered{1, 1, 1}, + SumDegree{2_n}, + DiscardCopyDegree{1_n}, + FFOrdered{1_n, 1_n, 1_n}, }; CHECK(result == correct); @@ -176,18 +176,18 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("allows discard copy parallelism") { ParallelTensorDimDegrees input = ParallelTensorDimDegrees{ - SumDegree{1}, - DiscardCopyDegree{2}, - FFOrdered{1, 1, 1, 1}, + SumDegree{1_n}, + DiscardCopyDegree{2_n}, + FFOrdered{1_n, 1_n, 1_n, 1_n}, }; std::optional result = optional_from_expected(get_output_parallel_dim_degrees(attrs, input)); std::optional correct = ParallelTensorDimDegrees{ - SumDegree{1}, - DiscardCopyDegree{2}, - FFOrdered{1, 1, 1}, + SumDegree{1_n}, + DiscardCopyDegree{2_n}, + FFOrdered{1_n, 1_n, 1_n}, }; CHECK(result == correct); @@ -203,22 +203,22 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{4, 2}, - ShardParallelDim{8, 1}, - ShardParallelDim{6, 1}, - ShardParallelDim{9, 3}, + ShardParallelDim{4_n, 2_n}, + ShardParallelDim{8_n, 1_n}, + ShardParallelDim{6_n, 1_n}, + ShardParallelDim{9_n, 3_n}, }, ReplicaParallelDimSet{ - SumDegree{7}, - DiscardCopyDegree{5}, + SumDegree{7_n}, + DiscardCopyDegree{5_n}, }, }, DataType::FLOAT, }; FlatAttrs attrs = FlatAttrs{ - /*start_dim=*/ff_dim_t{nonnegative_int{1}}, - /*end_dim=*/ff_dim_t{nonnegative_int{3}}, + /*start_dim=*/ff_dim_t{nonnegative_int{1_n}}, + /*end_dim=*/ff_dim_t{nonnegative_int{3_n}}, }; tl::expected result = @@ -227,13 +227,13 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{4, 2}, - ShardParallelDim{8 * 6, 1}, - ShardParallelDim{9, 3}, + ShardParallelDim{4_n, 2_n}, + ShardParallelDim{8_n * 6_n, 1_n}, + ShardParallelDim{9_n, 3_n}, }, ReplicaParallelDimSet{ - SumDegree{7}, - DiscardCopyDegree{5}, + SumDegree{7_n}, + DiscardCopyDegree{5_n}, }, }, DataType::FLOAT, diff --git a/lib/op-attrs/test/src/op-attrs/ops/layer_norm.cc b/lib/op-attrs/test/src/op-attrs/ops/layer_norm.cc index b9426a89a2..b9aa3c0677 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/layer_norm.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/layer_norm.cc @@ -58,11 +58,11 @@ TEST_SUITE(FF_TEST_SUITE) { }(); TensorShape input = TensorShape{ - TensorDims{FFOrdered{ - 12, - 14, - 16, - 18, + TensorDims{FFOrdered{ + 12_n, + 14_n, + 16_n, + 18_n, }}, DataType::FLOAT, }; @@ -70,9 +70,9 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape output = input; TensorShape gamma = TensorShape{ - TensorDims{FFOrdered{ - 12, - 16, + TensorDims{FFOrdered{ + 12_n, + 16_n, }}, DataType::FLOAT, }; @@ -125,49 +125,58 @@ TEST_SUITE(FF_TEST_SUITE) { auto make_input = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o0, - int o1, - int o2, - int o3) { + nonnegative_int o0, + nonnegative_int o1, + nonnegative_int o2, + nonnegative_int o3) { return lift_to_parallel_with_degrees( - input, o_sum, o_eq, FFOrdered{o0, o1, o2, o3}); + input, o_sum, o_eq, FFOrdered{o0, o1, o2, o3}); }; auto make_output = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o0, - int o1, - int o2, - int o3) { + nonnegative_int o0, + nonnegative_int o1, + nonnegative_int o2, + nonnegative_int o3) { return lift_to_parallel_with_degrees( - output, o_sum, o_eq, FFOrdered{o0, o1, o2, o3}); + output, o_sum, o_eq, FFOrdered{o0, o1, o2, o3}); }; - auto make_gamma_weights = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o0, int o2) { - return lift_to_parallel_with_degrees( - gamma, o_sum, o_eq, FFOrdered{o0, o2}); - }; + auto make_gamma_weights = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o0, + nonnegative_int o2) { + return lift_to_parallel_with_degrees( + gamma, o_sum, o_eq, FFOrdered{o0, o2}); + }; - auto make_beta_weights = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o0, int o2) { - return lift_to_parallel_with_degrees( - beta, o_sum, o_eq, FFOrdered{o0, o2}); - }; + auto make_beta_weights = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o0, + nonnegative_int o2) { + return lift_to_parallel_with_degrees( + beta, o_sum, o_eq, FFOrdered{o0, o2}); + }; SUBCASE("parallel shape inference (LayerNorm)") { SUBCASE("partition parallelism (not in axes)") { - int degree0 = 2; - int degree2 = 3; + nonnegative_int degree0 = 2_n; + nonnegative_int degree2 = 3_n; ParallelTensorShape par_input = make_input( - SumDegree{1}, DiscardCopyDegree{1}, degree0, 1, degree2, 1); + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree0, 1_n, degree2, 1_n); SUBCASE("get_output_shape(LayerNormAttrs, ParallelTensorShape)") { tl::expected result = get_output_shape(attrs_affine_true, par_input); - tl::expected correct = make_output( - SumDegree{1}, DiscardCopyDegree{1}, degree0, 1, degree2, 1); + tl::expected correct = + make_output(SumDegree{1_n}, + DiscardCopyDegree{1_n}, + degree0, + 1_n, + degree2, + 1_n); CHECK(result == correct); } @@ -179,7 +188,7 @@ TEST_SUITE(FF_TEST_SUITE) { get_gamma_weights_shape(attrs_affine_true, par_input); tl::expected correct = make_gamma_weights( - SumDegree{1}, DiscardCopyDegree{1}, degree0, degree2); + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree0, degree2); CHECK(result == correct); } @@ -199,7 +208,7 @@ TEST_SUITE(FF_TEST_SUITE) { get_beta_weights_shape(attrs_affine_true, par_input); tl::expected correct = make_beta_weights( - SumDegree{1}, DiscardCopyDegree{1}, degree0, degree2); + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree0, degree2); CHECK(result == correct); } @@ -215,11 +224,11 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("partition parallelism (in axes)") { - int degree1 = 2; - int degree2 = 4; + nonnegative_int degree1 = 2_n; + nonnegative_int degree2 = 4_n; ParallelTensorShape par_input = make_input( - SumDegree{1}, DiscardCopyDegree{1}, 1, degree1, degree2, 1); + SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, degree1, degree2, 1_n); SUBCASE("get_output_shape(LayerNormAttrs, ParallelTensorShape)") { std::optional result = optional_from_expected( @@ -248,10 +257,10 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("sum parallelism") { - SumDegree sum_degree = SumDegree{2}; + SumDegree sum_degree = SumDegree{2_n}; ParallelTensorShape par_input = - make_input(sum_degree, DiscardCopyDegree{1}, 1, 1, 1, 1); + make_input(sum_degree, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n, 1_n); SUBCASE("get_output_shape(LayerNormAttrs, ParallelTensorShape)") { std::optional result = optional_from_expected( @@ -280,10 +289,10 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("discard copy parallelism") { - DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{2}; + DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{2_n}; ParallelTensorShape par_input = - make_input(SumDegree{1}, discard_copy_degree, 1, 1, 1, 1); + make_input(SumDegree{1_n}, discard_copy_degree, 1_n, 1_n, 1_n, 1_n); SUBCASE("get_output_shape(LayerNormAttrs, ParallelTensorShape)") { std::optional result = optional_from_expected( diff --git a/lib/op-attrs/test/src/op-attrs/ops/linear.cc b/lib/op-attrs/test/src/op-attrs/ops/linear.cc index 191515b062..eaa99ef099 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/linear.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/linear.cc @@ -10,7 +10,7 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_linear_incoming_tensor_roles(LinearAttrs)") { auto make_attrs = [](bool use_bias) { return LinearAttrs{ - /*out_channels=*/16, + /*out_channels=*/16_n, /*use_bias=*/use_bias, /*data_type=*/DataType::FLOAT, /*activation=*/Activation::RELU, @@ -47,7 +47,7 @@ TEST_SUITE(FF_TEST_SUITE) { } TEST_CASE("Linear shape inference") { - int out_channels = 16; + nonnegative_int out_channels = 16_n; LinearAttrs attrs = LinearAttrs{ /*out_channels=*/out_channels, /*use_bias=*/true, @@ -56,13 +56,13 @@ TEST_SUITE(FF_TEST_SUITE) { /*regularizer=*/std::nullopt, }; - size_t batch_size = 12; - size_t extra_dim = 16; - size_t in_channels = 8; + nonnegative_int batch_size = 12_n; + nonnegative_int extra_dim = 16_n; + nonnegative_int in_channels = 8_n; TensorShape input = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ batch_size, extra_dim, in_channels, @@ -73,10 +73,10 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape output = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ batch_size, extra_dim, - size_t_from_int(out_channels), + out_channels, }, }, DataType::FLOAT, @@ -84,9 +84,9 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape projection = TensorShape{ TensorDims{ - FFOrdered{ + FFOrdered{ in_channels, - size_t_from_int(out_channels), + out_channels, }, }, DataType::FLOAT, @@ -94,8 +94,8 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape bias = TensorShape{ TensorDims{ - FFOrdered{ - size_t_from_int(out_channels), + FFOrdered{ + out_channels, }, }, DataType::FLOAT, @@ -127,56 +127,66 @@ TEST_SUITE(FF_TEST_SUITE) { auto make_input = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_batch, - int o_extra_dim, - int o_channel) { + nonnegative_int o_batch, + nonnegative_int o_extra_dim, + nonnegative_int o_channel) { return lift_to_parallel_with_degrees( - input, o_sum, o_eq, FFOrdered{o_batch, o_extra_dim, o_channel}); + input, + o_sum, + o_eq, + FFOrdered{o_batch, o_extra_dim, o_channel}); }; auto make_output = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_batch, - int o_extra_dim, - int o_channel) { + nonnegative_int o_batch, + nonnegative_int o_extra_dim, + nonnegative_int o_channel) { return lift_to_parallel_with_degrees( - output, o_sum, o_eq, FFOrdered{o_batch, o_extra_dim, o_channel}); + output, + o_sum, + o_eq, + FFOrdered{o_batch, o_extra_dim, o_channel}); }; auto make_projection = [&](SumDegree o_sum, DiscardCopyDegree o_eq, - int o_inchannel, - int o_outchannel) { + nonnegative_int o_inchannel, + nonnegative_int o_outchannel) { return lift_to_parallel_with_degrees( - projection, o_sum, o_eq, FFOrdered{o_inchannel, o_outchannel}); + projection, + o_sum, + o_eq, + FFOrdered{o_inchannel, o_outchannel}); }; - auto make_bias = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o_outchannel) { - return lift_to_parallel_with_degrees( - bias, o_sum, o_eq, FFOrdered{o_outchannel}); - }; + auto make_bias = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o_outchannel) { + return lift_to_parallel_with_degrees( + bias, o_sum, o_eq, FFOrdered{o_outchannel}); + }; SUBCASE("data parallelism") { - int input_sum_degree = 2; - int extra_dim_degree = 8; - int degree = 4; + nonnegative_int input_sum_degree = 2_n; + nonnegative_int extra_dim_degree = 8_n; + nonnegative_int degree = 4_n; ParallelTensorShape par_input = make_input(SumDegree{input_sum_degree}, - DiscardCopyDegree{1}, + DiscardCopyDegree{1_n}, degree, extra_dim_degree, - 1); + 1_n); { tl::expected result = get_output_shape(attrs, par_input); tl::expected correct = make_output(SumDegree{input_sum_degree}, - DiscardCopyDegree{1}, + DiscardCopyDegree{1_n}, degree, extra_dim_degree, - 1); + 1_n); CHECK(result == correct); } @@ -185,10 +195,10 @@ TEST_SUITE(FF_TEST_SUITE) { get_projection_shape(attrs, par_input); tl::expected correct = make_projection( - SumDegree{1}, + SumDegree{1_n}, DiscardCopyDegree{input_sum_degree * degree * extra_dim_degree}, - 1, - 1); + 1_n, + 1_n); CHECK(result == correct); } @@ -198,27 +208,30 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected correct = make_bias(SumDegree{input_sum_degree}, DiscardCopyDegree{degree * extra_dim_degree}, - 1); + 1_n); CHECK(result == correct); } } SUBCASE("reduction parallelism") { - int input_sum_degree = 2; - int degree = 4; + nonnegative_int input_sum_degree = 2_n; + nonnegative_int degree = 4_n; - ParallelTensorShape par_input = make_input( - SumDegree{input_sum_degree}, DiscardCopyDegree{1}, 1, 1, degree); + ParallelTensorShape par_input = make_input(SumDegree{input_sum_degree}, + DiscardCopyDegree{1_n}, + 1_n, + 1_n, + degree); { tl::expected result = get_output_shape(attrs, par_input); tl::expected correct = make_output(SumDegree{input_sum_degree * degree}, - DiscardCopyDegree{1}, - 1, - 1, - 1); + DiscardCopyDegree{1_n}, + 1_n, + 1_n, + 1_n); CHECK(result == correct); } @@ -226,8 +239,10 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_projection_shape(attrs, par_input); tl::expected correct = - make_projection( - SumDegree{1}, DiscardCopyDegree{input_sum_degree}, degree, 1); + make_projection(SumDegree{1_n}, + DiscardCopyDegree{input_sum_degree}, + degree, + 1_n); CHECK(result == correct); } @@ -235,23 +250,30 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_bias_shape(attrs, par_input); tl::expected correct = make_bias( - SumDegree{input_sum_degree * degree}, DiscardCopyDegree{1}, 1); + SumDegree{input_sum_degree * degree}, DiscardCopyDegree{1_n}, 1_n); CHECK(result == correct); } } SUBCASE("output channel parallelism") { - int input_sum_degree = 2; - int degree = 4; + nonnegative_int input_sum_degree = 2_n; + nonnegative_int degree = 4_n; - ParallelTensorShape par_input = make_input( - SumDegree{input_sum_degree}, DiscardCopyDegree{degree}, 1, 1, 1); + ParallelTensorShape par_input = make_input(SumDegree{input_sum_degree}, + DiscardCopyDegree{degree}, + 1_n, + 1_n, + 1_n); { tl::expected result = get_output_shape(attrs, par_input); - tl::expected correct = make_output( - SumDegree{input_sum_degree}, DiscardCopyDegree{1}, 1, 1, degree); + tl::expected correct = + make_output(SumDegree{input_sum_degree}, + DiscardCopyDegree{1_n}, + 1_n, + 1_n, + degree); CHECK(result == correct); } @@ -259,8 +281,10 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_projection_shape(attrs, par_input); tl::expected correct = - make_projection( - SumDegree{1}, DiscardCopyDegree{input_sum_degree}, 1, degree); + make_projection(SumDegree{1_n}, + DiscardCopyDegree{input_sum_degree}, + 1_n, + degree); CHECK(result == correct); } @@ -268,7 +292,7 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_bias_shape(attrs, par_input); tl::expected correct = make_bias( - SumDegree{input_sum_degree}, DiscardCopyDegree{1}, degree); + SumDegree{input_sum_degree}, DiscardCopyDegree{1_n}, degree); CHECK(result == correct); } } diff --git a/lib/op-attrs/test/src/op-attrs/ops/pool_2d.cc b/lib/op-attrs/test/src/op-attrs/ops/pool_2d.cc index 0c14c0fc2a..6c14a226a2 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/pool_2d.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/pool_2d.cc @@ -9,27 +9,27 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("make_adaptive_pool2d") { - size_t input_n = 10; - size_t input_c = 11; - size_t input_h = 15; - size_t input_w = 20; + nonnegative_int input_n = 10_n; + nonnegative_int input_c = 11_n; + nonnegative_int input_h = 15_n; + nonnegative_int input_w = 20_n; Activation activation = Activation::RELU; PoolOp op = PoolOp::AVG; - TensorDims input_dims = - TensorDims{FFOrdered{input_n, input_c, input_h, input_w}}; + TensorDims input_dims = TensorDims{ + FFOrdered{input_n, input_c, input_h, input_w}}; SUBCASE("input_h divisible by output_h && input_w divisible by output_w") { - int output_h = 5; - int output_w = 2; + nonnegative_int output_h = 5_n; + nonnegative_int output_w = 2_n; Pool2DAttrs correct_attrs = Pool2DAttrs{ - /*kernel_h=*/3, - /*kernel_w=*/10, - /*stride_h=*/3, - /*stride_w=*/10, - /*padding_h=*/0, - /*padding_w=*/0, + /*kernel_h=*/3_n, + /*kernel_w=*/10_n, + /*stride_h=*/3_n, + /*stride_w=*/10_n, + /*padding_h=*/0_n, + /*padding_w=*/0_n, /*pool_type=*/op, /*activation=*/activation, }; @@ -50,11 +50,11 @@ TEST_SUITE(FF_TEST_SUITE) { tl::expected result = get_output_shape(correct_attrs, input_shape); tl::expected correct = TensorShape{ - TensorDims{FFOrdered{ + TensorDims{FFOrdered{ input_n, input_c, - size_t_from_int(output_h), - size_t_from_int(output_w), + output_h, + output_w, }}, DataType::FLOAT, }; @@ -64,8 +64,8 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("input_h not divisible by output_h") { - int output_h = 6; - int output_w = 2; + nonnegative_int output_h = 6_n; + nonnegative_int output_w = 2_n; std::optional result = optional_from_expected(make_adaptive_pool2d_attrs( @@ -76,8 +76,8 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("input_w not divisible by output_w") { - int output_h = 5; - int output_w = 3; + nonnegative_int output_h = 5_n; + nonnegative_int output_w = 3_n; std::optional result = optional_from_expected(make_adaptive_pool2d_attrs( @@ -88,16 +88,16 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("input_h == output_h and input_w == output_w") { - int output_h = input_h; - int output_w = input_w; + nonnegative_int output_h = input_h; + nonnegative_int output_w = input_w; Pool2DAttrs correct_attrs = Pool2DAttrs{ - /*kernel_h=*/1, - /*kernel_w=*/1, - /*stride_h=*/1, - /*stride_w=*/1, - /*padding_h=*/0, - /*padding_w=*/0, + /*kernel_h=*/1_n, + /*kernel_w=*/1_n, + /*stride_h=*/1_n, + /*stride_w=*/1_n, + /*padding_h=*/0_n, + /*padding_w=*/0_n, /*pool_type=*/op, /*activation=*/activation, }; @@ -126,22 +126,22 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_output_shape(Pool2DAttrs, TensorShape)") { Pool2DAttrs attrs = Pool2DAttrs{ - /*kernel_h=*/3, - /*kernel_w=*/2, - /*stride_h=*/2, - /*stride_w=*/2, - /*padding_h=*/1, - /*padding_w=*/1, + /*kernel_h=*/3_n, + /*kernel_w=*/2_n, + /*stride_h=*/2_n, + /*stride_w=*/2_n, + /*padding_h=*/1_n, + /*padding_w=*/1_n, /*pool_type=*/PoolOp::MAX, /*activation=*/std::nullopt, }; SUBCASE("fails on non-4d inputs") { TensorShape input = TensorShape{ - TensorDims{FFOrdered{ - 10, - 12, - 14, + TensorDims{FFOrdered{ + 10_n, + 12_n, + 14_n, }}, DataType::FLOAT, }; @@ -155,14 +155,14 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("4d input") { TensorShape input = TensorShape{ - TensorDims{FFOrdered{11, 13, 12, 6}}, + TensorDims{FFOrdered{11_n, 13_n, 12_n, 6_n}}, DataType::FLOAT, }; tl::expected result = get_output_shape(attrs, input); tl::expected correct = TensorShape{ - TensorDims{FFOrdered{11, 13, 6, 4}}, + TensorDims{FFOrdered{11_n, 13_n, 6_n, 4_n}}, DataType::FLOAT, }; @@ -175,12 +175,12 @@ TEST_SUITE(FF_TEST_SUITE) { auto make_attrs = [](PoolOp pool_type, std::optional const &activation) { return Pool2DAttrs{ - /*kernel_h=*/3, - /*kernel_w=*/2, - /*stride_h=*/2, - /*stride_w=*/2, - /*padding_h=*/1, - /*padding_w=*/1, + /*kernel_h=*/3_n, + /*kernel_w=*/2_n, + /*stride_h=*/2_n, + /*stride_w=*/2_n, + /*padding_h=*/1_n, + /*padding_w=*/1_n, /*pool_type=*/pool_type, /*activation=*/activation, }; @@ -190,13 +190,13 @@ TEST_SUITE(FF_TEST_SUITE) { Pool2DAttrs attrs = make_attrs(PoolOp::MAX, /*activation=*/std::nullopt); ParallelTensorDimDegrees input = ParallelTensorDimDegrees{ - SumDegree{1}, - DiscardCopyDegree{1}, - FFOrdered{ - 4, - 1, - 1, - 1, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, + FFOrdered{ + 4_n, + 1_n, + 1_n, + 1_n, }, }; @@ -211,13 +211,13 @@ TEST_SUITE(FF_TEST_SUITE) { Pool2DAttrs attrs = make_attrs(PoolOp::MAX, /*activation=*/std::nullopt); ParallelTensorDimDegrees input = ParallelTensorDimDegrees{ - SumDegree{1}, - DiscardCopyDegree{1}, - FFOrdered{ - 4, - 2, - 5, - 6, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, + FFOrdered{ + 4_n, + 2_n, + 5_n, + 6_n, }, }; @@ -232,13 +232,13 @@ TEST_SUITE(FF_TEST_SUITE) { Pool2DAttrs attrs = make_attrs(PoolOp::MAX, /*activation=*/std::nullopt); ParallelTensorDimDegrees input = ParallelTensorDimDegrees{ - SumDegree{1}, - DiscardCopyDegree{3}, - FFOrdered{ - 1, - 1, - 1, - 1, + SumDegree{1_n}, + DiscardCopyDegree{3_n}, + FFOrdered{ + 1_n, + 1_n, + 1_n, + 1_n, }, }; @@ -256,13 +256,13 @@ TEST_SUITE(FF_TEST_SUITE) { make_attrs(PoolOp::MAX, /*activation=*/std::nullopt); ParallelTensorDimDegrees input = ParallelTensorDimDegrees{ - SumDegree{2}, - DiscardCopyDegree{1}, - FFOrdered{ - 1, - 1, - 1, - 1, + SumDegree{2_n}, + DiscardCopyDegree{1_n}, + FFOrdered{ + 1_n, + 1_n, + 1_n, + 1_n, }, }; @@ -279,13 +279,13 @@ TEST_SUITE(FF_TEST_SUITE) { make_attrs(PoolOp::AVG, /*activation=*/std::nullopt); ParallelTensorDimDegrees input = ParallelTensorDimDegrees{ - SumDegree{2}, - DiscardCopyDegree{1}, - FFOrdered{ - 1, - 1, - 1, - 1, + SumDegree{2_n}, + DiscardCopyDegree{1_n}, + FFOrdered{ + 1_n, + 1_n, + 1_n, + 1_n, }, }; @@ -302,13 +302,13 @@ TEST_SUITE(FF_TEST_SUITE) { make_attrs(PoolOp::AVG, /*activation=*/Activation::RELU); ParallelTensorDimDegrees input = ParallelTensorDimDegrees{ - SumDegree{2}, - DiscardCopyDegree{1}, - FFOrdered{ - 1, - 1, - 1, - 1, + SumDegree{2_n}, + DiscardCopyDegree{1_n}, + FFOrdered{ + 1_n, + 1_n, + 1_n, + 1_n, }, }; @@ -326,12 +326,12 @@ TEST_SUITE(FF_TEST_SUITE) { // just do a single test to make sure it works/exists Pool2DAttrs attrs = Pool2DAttrs{ - /*kernel_h=*/3, - /*kernel_w=*/2, - /*stride_h=*/2, - /*stride_w=*/2, - /*padding_h=*/1, - /*padding_w=*/1, + /*kernel_h=*/3_n, + /*kernel_w=*/2_n, + /*stride_h=*/2_n, + /*stride_w=*/2_n, + /*padding_h=*/1_n, + /*padding_w=*/1_n, /*pool_type=*/PoolOp::MAX, /*activation=*/std::nullopt, }; @@ -340,14 +340,14 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{14, 7}, - ShardParallelDim{16, 8}, - ShardParallelDim{12, 3}, - ShardParallelDim{6, 2}, + ShardParallelDim{14_n, 7_n}, + ShardParallelDim{16_n, 8_n}, + ShardParallelDim{12_n, 3_n}, + ShardParallelDim{6_n, 2_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{2}, + SumDegree{1_n}, + DiscardCopyDegree{2_n}, }, }, DataType::FLOAT, @@ -359,14 +359,14 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{14, 7}, - ShardParallelDim{16, 8}, - ShardParallelDim{6, 3}, - ShardParallelDim{4, 2}, + ShardParallelDim{14_n, 7_n}, + ShardParallelDim{16_n, 8_n}, + ShardParallelDim{6_n, 3_n}, + ShardParallelDim{4_n, 2_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{2}, + SumDegree{1_n}, + DiscardCopyDegree{2_n}, }, }, DataType::FLOAT, @@ -377,14 +377,14 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{14, 1}, - ShardParallelDim{16, 1}, - ShardParallelDim{12, 1}, - ShardParallelDim{6, 1}, + ShardParallelDim{14_n, 1_n}, + ShardParallelDim{16_n, 1_n}, + ShardParallelDim{12_n, 1_n}, + ShardParallelDim{6_n, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{2}, - DiscardCopyDegree{1}, + SumDegree{2_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, diff --git a/lib/op-attrs/test/src/op-attrs/ops/reduction.cc b/lib/op-attrs/test/src/op-attrs/ops/reduction.cc index 0d1c8bdf98..dc12eb12a8 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/reduction.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/reduction.cc @@ -10,21 +10,21 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{12, 2}, - ShardParallelDim{14, 1}, - ShardParallelDim{16, 3}, - ShardParallelDim{18, 2}, + ShardParallelDim{12_n, 2_n}, + ShardParallelDim{14_n, 1_n}, + ShardParallelDim{16_n, 3_n}, + ShardParallelDim{18_n, 2_n}, }, ReplicaParallelDimSet{ - SumDegree{3}, - DiscardCopyDegree{2}, + SumDegree{3_n}, + DiscardCopyDegree{2_n}, }, }, DataType::FLOAT, }; SUBCASE("valid") { - int degree = 3; + nonnegative_int degree = 3_n; ReductionAttrs attrs = ReductionAttrs{ /*repartition_degree=*/degree, }; @@ -42,7 +42,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("invalid") { - int degree = 4; + nonnegative_int degree = 4_n; ReductionAttrs attrs = ReductionAttrs{ /*repartition_degree=*/degree, }; diff --git a/lib/op-attrs/test/src/op-attrs/ops/repartition.cc b/lib/op-attrs/test/src/op-attrs/ops/repartition.cc index ba213f54f4..36a265ce9f 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/repartition.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/repartition.cc @@ -6,8 +6,8 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Repartition shape inference") { - ff_dim_t dim = ff_dim_t{nonnegative_int{2}}; - int degree = 4; + ff_dim_t dim = ff_dim_t{2_n}; + nonnegative_int degree = 4_n; RepartitionAttrs attrs = RepartitionAttrs{ /*repartition_dim=*/dim, /*repartition_degree=*/degree, @@ -16,14 +16,14 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{12, 2}, - ShardParallelDim{14, 1}, - ShardParallelDim{16, 3}, - ShardParallelDim{18, 2}, + ShardParallelDim{12_n, 2_n}, + ShardParallelDim{14_n, 1_n}, + ShardParallelDim{16_n, 3_n}, + ShardParallelDim{18_n, 2_n}, }, ReplicaParallelDimSet{ - SumDegree{3}, - DiscardCopyDegree{2}, + SumDegree{3_n}, + DiscardCopyDegree{2_n}, }, }, DataType::FLOAT, diff --git a/lib/op-attrs/test/src/op-attrs/ops/replicate.cc b/lib/op-attrs/test/src/op-attrs/ops/replicate.cc index 60a1018479..770ae20d38 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/replicate.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/replicate.cc @@ -6,20 +6,20 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Replicate shape inference") { ReplicateAttrs attrs = ReplicateAttrs{ - /*replicate_degree=*/4, + /*replicate_degree=*/4_n, }; ParallelTensorShape input = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{10, 2}, - ShardParallelDim{12, 1}, - ShardParallelDim{14, 2}, - ShardParallelDim{16, 2}, + ShardParallelDim{10_n, 2_n}, + ShardParallelDim{12_n, 1_n}, + ShardParallelDim{14_n, 2_n}, + ShardParallelDim{16_n, 2_n}, }, ReplicaParallelDimSet{ - SumDegree{3}, - DiscardCopyDegree{2}, + SumDegree{3_n}, + DiscardCopyDegree{2_n}, }, }, DataType::FLOAT, @@ -28,7 +28,8 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape result = get_output_shape(attrs, input); ParallelTensorShape correct_output = input; - correct_output.dims.replica_dims.discard_copy_degree = DiscardCopyDegree{8}; + correct_output.dims.replica_dims.discard_copy_degree = + DiscardCopyDegree{8_n}; CHECK(result == correct_output); } diff --git a/lib/op-attrs/test/src/op-attrs/ops/softmax.cc b/lib/op-attrs/test/src/op-attrs/ops/softmax.cc index 5808e5ef42..8c80e348c0 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/softmax.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/softmax.cc @@ -10,16 +10,16 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_output_shape(SoftmaxAttrs, TensorShape)") { TensorShape input = TensorShape{ - TensorDims{FFOrdered{ - 12, - 14, - 16, + TensorDims{FFOrdered{ + 12_n, + 14_n, + 16_n, }}, DataType::FLOAT, }; SUBCASE("attrs.dim in bounds") { - SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t{nonnegative_int{1}}}; + SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t{1_n}}; tl::expected result = get_output_shape(attrs, input); @@ -29,7 +29,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("attrs.dims out of bounds") { - SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t{nonnegative_int{4}}}; + SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t{4_n}}; std::optional result = optional_from_expected(get_output_shape(attrs, input)); @@ -41,47 +41,53 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_output_shape(SoftmaxAttrs, ParallelTensorShape)") { TensorShape input = TensorShape{ - TensorDims{FFOrdered{ - 12, - 14, - 16, + TensorDims{FFOrdered{ + 12_n, + 14_n, + 16_n, }}, DataType::FLOAT, }; TensorShape output = input; - auto make_input = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o0, int o1, int o2) { - return lift_to_parallel_with_degrees( - input, o_sum, o_eq, FFOrdered{o0, o1, o2}); - }; + auto make_input = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o0, + nonnegative_int o1, + nonnegative_int o2) { + return lift_to_parallel_with_degrees( + input, o_sum, o_eq, FFOrdered{o0, o1, o2}); + }; - auto make_output = - [&](SumDegree o_sum, DiscardCopyDegree o_eq, int o0, int o1, int o2) { - return lift_to_parallel_with_degrees( - output, o_sum, o_eq, FFOrdered{o0, o1, o2}); - }; + auto make_output = [&](SumDegree o_sum, + DiscardCopyDegree o_eq, + nonnegative_int o0, + nonnegative_int o1, + nonnegative_int o2) { + return lift_to_parallel_with_degrees( + output, o_sum, o_eq, FFOrdered{o0, o1, o2}); + }; SUBCASE("partition parallelism in non-softmax-dim (valid)") { - int degree0 = 2; - int degree2 = 4; + nonnegative_int degree0 = 2_n; + nonnegative_int degree2 = 4_n; - ParallelTensorShape par_input = - make_input(SumDegree{1}, DiscardCopyDegree{1}, degree0, 1, degree2); + ParallelTensorShape par_input = make_input( + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree0, 1_n, degree2); SUBCASE("attrs.dim in bounds") { - SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t{nonnegative_int{1}}}; + SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t{1_n}}; tl::expected result = get_output_shape(attrs, par_input); tl::expected correct = make_output( - SumDegree{1}, DiscardCopyDegree{1}, degree0, 1, degree2); + SumDegree{1_n}, DiscardCopyDegree{1_n}, degree0, 1_n, degree2); CHECK(result == correct); } SUBCASE("attrs.dims out of bounds") { - SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t{nonnegative_int{4}}}; + SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t{4_n}}; std::optional result = optional_from_expected(get_output_shape(attrs, par_input)); @@ -92,12 +98,12 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("partition parallism in softmax dim (invalid)") { - int degree1 = 2; + nonnegative_int degree1 = 2_n; - SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t{nonnegative_int{1}}}; + SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t{1_n}}; ParallelTensorShape par_input = - make_input(SumDegree{1}, DiscardCopyDegree{1}, 1, degree1, 1); + make_input(SumDegree{1_n}, DiscardCopyDegree{1_n}, 1_n, degree1, 1_n); std::optional result = optional_from_expected(get_output_shape(attrs, par_input)); @@ -107,12 +113,12 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("sum parallelism (invalid)") { - SumDegree sum_degree = SumDegree{2}; + SumDegree sum_degree = SumDegree{2_n}; - SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t{nonnegative_int{1}}}; + SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t{1_n}}; ParallelTensorShape par_input = - make_input(sum_degree, DiscardCopyDegree{1}, 1, 1, 1); + make_input(sum_degree, DiscardCopyDegree{1_n}, 1_n, 1_n, 1_n); std::optional result = optional_from_expected(get_output_shape(attrs, par_input)); @@ -122,12 +128,12 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("discard copy parallelism (invalid)") { - DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{2}; + DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{2_n}; - SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t{nonnegative_int{1}}}; + SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t{1_n}}; ParallelTensorShape par_input = - make_input(SumDegree{1}, discard_copy_degree, 1, 1, 1); + make_input(SumDegree{1_n}, discard_copy_degree, 1_n, 1_n, 1_n); std::optional result = optional_from_expected(get_output_shape(attrs, par_input)); diff --git a/lib/op-attrs/test/src/op-attrs/pcg_operator_attrs.cc b/lib/op-attrs/test/src/op-attrs/pcg_operator_attrs.cc index 73f5f0674d..1187bfcfbf 100644 --- a/lib/op-attrs/test/src/op-attrs/pcg_operator_attrs.cc +++ b/lib/op-attrs/test/src/op-attrs/pcg_operator_attrs.cc @@ -6,8 +6,8 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("PCGOperatorAttrs to/from json") { PCGOperatorAttrs correct = PCGOperatorAttrs{RepartitionAttrs{ - /*repartition_dim=*/ff_dim_t{nonnegative_int{1}}, - /*repartition_degree=*/4, + /*repartition_dim=*/ff_dim_t{1_n}, + /*repartition_degree=*/4_n, }}; nlohmann::json j = correct; auto result = j.get(); diff --git a/lib/op-attrs/test/src/op-attrs/relative_ff_dim_t.cc b/lib/op-attrs/test/src/op-attrs/relative_ff_dim_t.cc index c09c1ec3df..e3f3f4534e 100644 --- a/lib/op-attrs/test/src/op-attrs/relative_ff_dim_t.cc +++ b/lib/op-attrs/test/src/op-attrs/relative_ff_dim_t.cc @@ -5,13 +5,13 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("ff_dim_t_from_relative_ff_dim_t") { - int input_dim = 5; + nonnegative_int input_dim = 5_n; SUBCASE("relative index is zero") { relative_ff_dim_t relative_ff_dim = relative_ff_dim_t{0}; ff_dim_t ff_dim = ff_dim_t_from_relative_ff_dim_t(relative_ff_dim, input_dim); - CHECK(ff_dim == ff_dim_t{nonnegative_int{0}}); + CHECK(ff_dim == ff_dim_t{0_n}); } SUBCASE("relative index is positive") { @@ -20,14 +20,14 @@ TEST_SUITE(FF_TEST_SUITE) { relative_ff_dim_t relative_ff_dim = relative_ff_dim_t{1}; ff_dim_t ff_dim = ff_dim_t_from_relative_ff_dim_t(relative_ff_dim, input_dim); - CHECK(ff_dim == ff_dim_t{nonnegative_int{1}}); + CHECK(ff_dim == ff_dim_t{1_n}); } SUBCASE("relative index is out of range") { relative_ff_dim_t relative_ff_dim = relative_ff_dim_t{10}; ff_dim_t ff_dim = ff_dim_t_from_relative_ff_dim_t(relative_ff_dim, input_dim); - CHECK(ff_dim == ff_dim_t{nonnegative_int{10}}); + CHECK(ff_dim == ff_dim_t{10_n}); } } @@ -37,7 +37,7 @@ TEST_SUITE(FF_TEST_SUITE) { relative_ff_dim_t relative_ff_dim = relative_ff_dim_t{-1}; ff_dim_t ff_dim = ff_dim_t_from_relative_ff_dim_t(relative_ff_dim, input_dim); - CHECK(ff_dim == ff_dim_t{nonnegative_int{4}}); + CHECK(ff_dim == ff_dim_t{4_n}); } SUBCASE("relative index is out of range") { diff --git a/lib/op-attrs/test/src/op-attrs/tensor_dims.cc b/lib/op-attrs/test/src/op-attrs/tensor_dims.cc index 60d87300c1..7e072d82d9 100644 --- a/lib/op-attrs/test/src/op-attrs/tensor_dims.cc +++ b/lib/op-attrs/test/src/op-attrs/tensor_dims.cc @@ -7,7 +7,8 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("tensor_dims_is_broadcastable_to(TensorDims, TensorDims)") { - TensorDims goal = TensorDims{FFOrdered{1, 1, 4, 3}}; + TensorDims goal = + TensorDims{FFOrdered{1_n, 1_n, 4_n, 3_n}}; SUBCASE("dims match") { bool result = tensor_dims_is_broadcastable_to(goal, goal); @@ -17,7 +18,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("curr only needs num_dims promotion") { - TensorDims curr = TensorDims{FFOrdered{4, 3}}; + TensorDims curr = TensorDims{FFOrdered{4_n, 3_n}}; bool result = tensor_dims_is_broadcastable_to(curr, goal); bool correct = true; @@ -26,7 +27,8 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("curr only needs dim expansion") { - TensorDims curr = TensorDims{FFOrdered{1, 1, 1, 3}}; + TensorDims curr = + TensorDims{FFOrdered{1_n, 1_n, 1_n, 3_n}}; bool result = tensor_dims_is_broadcastable_to(curr, goal); bool correct = true; @@ -35,7 +37,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("curr needs both num_dims promotion and dim expansion") { - TensorDims curr = TensorDims{FFOrdered{1, 3}}; + TensorDims curr = TensorDims{FFOrdered{1_n, 3_n}}; bool result = tensor_dims_is_broadcastable_to(curr, goal); bool correct = true; @@ -44,7 +46,8 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("curr needs invalid dim promotion") { - TensorDims curr = TensorDims{FFOrdered{1, 1, 2, 3}}; + TensorDims curr = + TensorDims{FFOrdered{1_n, 1_n, 2_n, 3_n}}; bool result = tensor_dims_is_broadcastable_to(curr, goal); bool correct = false; @@ -53,7 +56,8 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("num_dims(goal) < num_dims(curr)") { - TensorDims curr = TensorDims{FFOrdered{1, 1, 10, 4, 3}}; + TensorDims curr = + TensorDims{FFOrdered{1_n, 1_n, 10_n, 4_n, 3_n}}; bool result = tensor_dims_is_broadcastable_to(curr, goal); bool correct = false; @@ -63,12 +67,13 @@ TEST_SUITE(FF_TEST_SUITE) { } TEST_CASE("get_broadcast_target_dims(std::unordered_set)") { - TensorDims d1 = TensorDims{FFOrdered{1, 10, 4, 3}}; + TensorDims d1 = TensorDims{FFOrdered{1_n, 10_n, 4_n, 3_n}}; - TensorDims d2 = TensorDims{FFOrdered{10, 4, 1}}; + TensorDims d2 = TensorDims{FFOrdered{10_n, 4_n, 1_n}}; SUBCASE("has target in inputs") { - TensorDims d3 = TensorDims{FFOrdered{1, 1, 4, 3}}; + TensorDims d3 = + TensorDims{FFOrdered{1_n, 1_n, 4_n, 3_n}}; std::optional result = get_broadcast_target_dims({d1, d2, d3}); @@ -78,7 +83,8 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("has no possible target") { - TensorDims d3 = TensorDims{FFOrdered{1, 1, 1, 4}}; + TensorDims d3 = + TensorDims{FFOrdered{1_n, 1_n, 1_n, 4_n}}; std::optional result = get_broadcast_target_dims({d1, d2, d3}); @@ -88,10 +94,11 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("has possible target, but not in inputs") { - TensorDims d3 = TensorDims{FFOrdered{1, 1, 1, 4, 3}}; + TensorDims d3 = + TensorDims{FFOrdered{1_n, 1_n, 1_n, 4_n, 3_n}}; TensorDims possible_target = - TensorDims{FFOrdered{1, 1, 10, 4, 3}}; + TensorDims{FFOrdered{1_n, 1_n, 10_n, 4_n, 3_n}}; REQUIRE(tensor_dims_is_broadcastable_to(d1, possible_target)); REQUIRE(tensor_dims_is_broadcastable_to(d2, possible_target)); diff --git a/lib/pcg/include/pcg/computation_graph_builder.h b/lib/pcg/include/pcg/computation_graph_builder.h index df93f69f2e..290df8574e 100644 --- a/lib/pcg/include/pcg/computation_graph_builder.h +++ b/lib/pcg/include/pcg/computation_graph_builder.h @@ -85,15 +85,15 @@ struct ComputationGraphBuilder { // Add a 2D convolutional layer tensor_guid_t conv2d( tensor_guid_t const &input, - int outChannels, - int kernelH, - int kernelW, - int strideH, - int strideW, - int paddingH, - int paddingW, + nonnegative_int outChannels, + nonnegative_int kernelH, + nonnegative_int kernelW, + nonnegative_int strideH, + nonnegative_int strideW, + nonnegative_int paddingH, + nonnegative_int paddingW, std::optional const &activation = std::nullopt, - int groups = 1, + nonnegative_int groups = 1_n, bool use_bias = true, std::optional const &kernel_initializer = std::nullopt, std::optional const &bias_initializer = std::nullopt, @@ -107,8 +107,8 @@ struct ComputationGraphBuilder { // Add an embedding layer tensor_guid_t embedding( tensor_guid_t const &input, - int num_entries, - int outDim, + nonnegative_int num_entries, + nonnegative_int outDim, AggregateOp aggr, DataType dtype = DataType::FLOAT, std::optional const &kernel_initializer = std::nullopt, @@ -121,32 +121,32 @@ struct ComputationGraphBuilder { // Add a cache layer tensor_guid_t cache(tensor_guid_t const &input, - int num_batches, + nonnegative_int num_batches, std::function score_f = {}, std::optional const &name = std::nullopt); // Add a 2D pooling layer tensor_guid_t pool2d(tensor_guid_t const &input, - int kernelH, - int kernelW, - int strideH, - int strideW, - int paddingH, - int paddingW, + nonnegative_int kernelH, + nonnegative_int kernelW, + nonnegative_int strideH, + nonnegative_int strideW, + nonnegative_int paddingH, + nonnegative_int paddingW, PoolOp type = PoolOp::MAX, std::optional const &activation = std::nullopt, std::optional const &name = std::nullopt); tensor_guid_t adaptive_pool2d( tensor_guid_t const &input, - int output_h, - int output_w, + nonnegative_int output_h, + nonnegative_int output_w, PoolOp type = PoolOp::MAX, std::optional const &activation = std::nullopt, std::optional const &name = std::nullopt); tensor_guid_t layer_norm(tensor_guid_t const &input, - std::vector const &axes, + std::vector const &axes, bool elementwise_affine, float eps, std::optional const &name = std::nullopt); @@ -157,15 +157,15 @@ struct ComputationGraphBuilder { float eps, std::optional const &momentum, std::optional const &name = std::nullopt); - tensor_guid_t - batch_matmul(tensor_guid_t const &A, - tensor_guid_t const &B, - int a_seq_length_dim = -1, - int b_seq_length_dim = -1, - std::optional const &name = std::nullopt); + tensor_guid_t batch_matmul( + tensor_guid_t const &A, + tensor_guid_t const &B, + std::optional const &a_seq_length_dim = std::nullopt, + std::optional const &b_seq_length_dim = std::nullopt, + std::optional const &name = std::nullopt); tensor_guid_t dense( tensor_guid_t const &input, - int outDim, + nonnegative_int outDim, std::optional activation = std::nullopt, bool use_bias = true, DataType data_type = DataType::FLOAT, @@ -181,7 +181,7 @@ struct ComputationGraphBuilder { std::optional const &name = std::nullopt); // Add a concat layer tensor_guid_t concat(std::vector const &tensors, - int axis, + relative_ff_dim_t axis, std::optional const &name = std::nullopt); // Add a mean layer tensor_guid_t mean(tensor_guid_t const &input, @@ -191,47 +191,48 @@ struct ComputationGraphBuilder { // Add a split layer std::vector split(tensor_guid_t const &input, - std::vector const &split, - int axis, + std::vector const &split, + relative_ff_dim_t axis, std::optional const &name = std::nullopt); // Add a flat layer - tensor_guid_t flat(tensor_guid_t const &input, - int start_dim = 0, - std::optional const &end_dim = std::nullopt, - std::optional const &name = std::nullopt); + tensor_guid_t + flat(tensor_guid_t const &input, + relative_ff_dim_t start_dim = relative_ff_dim_t{0}, + std::optional const &end_dim = std::nullopt, + std::optional const &name = std::nullopt); // Add a softmax layer tensor_guid_t softmax(tensor_guid_t const &input, - std::optional dim = std::nullopt, + std::optional dim = std::nullopt, std::optional const &name = std::nullopt); // Create input tensors and constants tensor_guid_t transpose(tensor_guid_t const &input, - std::vector const &perm, + std::vector const &perm, std::optional const &name = std::nullopt); tensor_guid_t reduce_sum(tensor_guid_t const &input, - std::vector const &axes, + std::vector const &axes, bool keepdims = false, std::optional const &name = std::nullopt); tensor_guid_t reshape(tensor_guid_t const &input, - std::vector const &shape, + std::vector const &shape, std::optional const &name = std::nullopt); tensor_guid_t reverse(tensor_guid_t const &input, - int axis, + relative_ff_dim_t axis, std::optional const &name = std::nullopt); std::vector top_k(tensor_guid_t const &input, - int k, + nonnegative_int k, bool sorted, std::optional const &name = std::nullopt); tensor_guid_t multihead_attention( tensor_guid_t const &query, tensor_guid_t const &key, tensor_guid_t const &value, - int embed_dim, - int num_heads, - int kdim = 0, - int vdim = 0, + nonnegative_int embed_dim, + nonnegative_int num_heads, + nonnegative_int kdim = 0_n, + nonnegative_int vdim = 0_n, float dropout = 0.0f, bool bias = true, bool add_bias_kv = false, @@ -254,7 +255,7 @@ struct ComputationGraphBuilder { std::optional const &name = std::nullopt); std::vector get_outputs(LayerAttrs const &) const; - tensor_guid_t get_output(LayerAttrs const &, int idx) const; + tensor_guid_t get_output(LayerAttrs const &, nonnegative_int idx) const; std::vector add_layer(LayerAttrs const &layer, diff --git a/lib/pcg/include/pcg/cpu_id_t.struct.toml b/lib/pcg/include/pcg/cpu_id_t.struct.toml index 0492a937be..152debbded 100644 --- a/lib/pcg/include/pcg/cpu_id_t.struct.toml +++ b/lib/pcg/include/pcg/cpu_id_t.struct.toml @@ -9,6 +9,10 @@ features = [ "fmt", ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + [[fields]] name = "cpu_index" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/pcg/include/pcg/device_id.h b/lib/pcg/include/pcg/device_id.h index 28cf30eaba..36ea9de6b3 100644 --- a/lib/pcg/include/pcg/device_id.h +++ b/lib/pcg/include/pcg/device_id.h @@ -13,9 +13,9 @@ device_id_t operator+(device_id_t, size_t); DeviceType get_device_type(device_id_t const &device_id); gpu_id_t unwrap_gpu(device_id_t); cpu_id_t unwrap_cpu(device_id_t); -int get_raw_id(device_id_t); +nonnegative_int get_raw_id(device_id_t); -device_id_t device_id_from_index(int, DeviceType); +device_id_t device_id_from_index(nonnegative_int, DeviceType); } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/file_format/v1/graphs/v1_dataflow_graph.h b/lib/pcg/include/pcg/file_format/v1/graphs/v1_dataflow_graph.h index 05c486f0f7..9554995fa0 100644 --- a/lib/pcg/include/pcg/file_format/v1/graphs/v1_dataflow_graph.h +++ b/lib/pcg/include/pcg/file_format/v1/graphs/v1_dataflow_graph.h @@ -8,7 +8,7 @@ namespace FlexFlow { V1DataflowGraph to_v1(DataflowGraphView const &); V1DataflowGraph to_v1(DataflowGraphView const &, - std::unordered_map const &); + std::unordered_map const &); } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/file_format/v1/graphs/v1_dataflow_graph.struct.toml b/lib/pcg/include/pcg/file_format/v1/graphs/v1_dataflow_graph.struct.toml index c332b6b41d..57b559a18e 100644 --- a/lib/pcg/include/pcg/file_format/v1/graphs/v1_dataflow_graph.struct.toml +++ b/lib/pcg/include/pcg/file_format/v1/graphs/v1_dataflow_graph.struct.toml @@ -13,6 +13,7 @@ includes = [ "", "", "pcg/file_format/v1/graphs/v1_graph_edge.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] src_includes = [ @@ -24,7 +25,7 @@ src_includes = [ [[fields]] name = "nodes" -type = "std::vector" +type = "std::vector<::FlexFlow::nonnegative_int>" [[fields]] name = "edges" diff --git a/lib/pcg/include/pcg/file_format/v1/graphs/v1_graph_edge.struct.toml b/lib/pcg/include/pcg/file_format/v1/graphs/v1_graph_edge.struct.toml index 752706fe1d..9150c20056 100644 --- a/lib/pcg/include/pcg/file_format/v1/graphs/v1_graph_edge.struct.toml +++ b/lib/pcg/include/pcg/file_format/v1/graphs/v1_graph_edge.struct.toml @@ -9,18 +9,22 @@ features = [ "fmt", ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + [[fields]] name = "srcNode" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "srcIdx" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "dstNode" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "dstIdx" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/pcg/include/pcg/file_format/v1/graphs/v1_labelled_dataflow_graph.h b/lib/pcg/include/pcg/file_format/v1/graphs/v1_labelled_dataflow_graph.h index fc9dfcef9a..426bad5a82 100644 --- a/lib/pcg/include/pcg/file_format/v1/graphs/v1_labelled_dataflow_graph.h +++ b/lib/pcg/include/pcg/file_format/v1/graphs/v1_labelled_dataflow_graph.h @@ -13,18 +13,19 @@ namespace FlexFlow { template -std::pair, bidict> +std::pair, + bidict> to_v1_including_node_numbering( LabelledDataflowGraphView const &g) { - bidict nodes = bidict_from_enumerating(get_nodes(g)); + bidict nodes = bidict_from_enumerating(get_nodes(g)); V1DataflowGraph unlabelled = to_v1(g, nodes.reversed()); - std::unordered_map node_labels = map_values( + std::unordered_map node_labels = map_values( nodes.as_unordered_map(), [&](Node const &n) { return g.at(n); }); - std::unordered_map> output_labels = + std::unordered_map> output_labels = map_values(nodes.as_unordered_map(), [&](Node const &n) { return transform(get_outputs(g, n), [&](DataflowOutput const &o) { return g.at(o); }); diff --git a/lib/pcg/include/pcg/file_format/v1/graphs/v1_labelled_dataflow_graph.struct.toml b/lib/pcg/include/pcg/file_format/v1/graphs/v1_labelled_dataflow_graph.struct.toml index b440d0f03d..1f69f5cd93 100644 --- a/lib/pcg/include/pcg/file_format/v1/graphs/v1_labelled_dataflow_graph.struct.toml +++ b/lib/pcg/include/pcg/file_format/v1/graphs/v1_labelled_dataflow_graph.struct.toml @@ -18,6 +18,7 @@ includes = [ "", "pcg/file_format/v1/graphs/v1_dataflow_graph.dtg.h", "pcg/file_format/v1/graphs/v1_graph_output.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] src_includes = [ @@ -29,11 +30,11 @@ src_includes = [ [[fields]] name = "node_labels" -type = "std::unordered_map" +type = "std::unordered_map<::FlexFlow::nonnegative_int, NodeLabel>" [[fields]] name = "output_labels" -type = "std::unordered_map>" +type = "std::unordered_map<::FlexFlow::nonnegative_int, std::vector>" [[fields]] name = "graph" diff --git a/lib/pcg/include/pcg/file_format/v1/v1_binary_sp_decomposition/v1_binary_sp_decomposition.variant.toml b/lib/pcg/include/pcg/file_format/v1/v1_binary_sp_decomposition/v1_binary_sp_decomposition.variant.toml index 0fe0b1761f..bd60564465 100644 --- a/lib/pcg/include/pcg/file_format/v1/v1_binary_sp_decomposition/v1_binary_sp_decomposition.variant.toml +++ b/lib/pcg/include/pcg/file_format/v1/v1_binary_sp_decomposition/v1_binary_sp_decomposition.variant.toml @@ -9,6 +9,7 @@ features = [ includes = [ "pcg/file_format/v1/v1_binary_sp_decomposition/v1_binary_series_split.dtg.h", "pcg/file_format/v1/v1_binary_sp_decomposition/v1_binary_parallel_split.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[values]] @@ -20,5 +21,5 @@ type = "::FlexFlow::V1BinaryParallelSplit" key = "parallel" [[values]] -type = "int" +type = "::FlexFlow::nonnegative_int" key = "leaf" diff --git a/lib/pcg/include/pcg/file_format/v1/v1_computation_graph.h b/lib/pcg/include/pcg/file_format/v1/v1_computation_graph.h index 5590d6999b..c0e9966425 100644 --- a/lib/pcg/include/pcg/file_format/v1/v1_computation_graph.h +++ b/lib/pcg/include/pcg/file_format/v1/v1_computation_graph.h @@ -9,7 +9,7 @@ namespace FlexFlow { V1ComputationGraph to_v1(ComputationGraph const &); -std::pair> +std::pair> to_v1_including_node_numbering(ComputationGraph const &); } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/gpu_id_t.struct.toml b/lib/pcg/include/pcg/gpu_id_t.struct.toml index 170dbb96fa..7a85b4c0a7 100644 --- a/lib/pcg/include/pcg/gpu_id_t.struct.toml +++ b/lib/pcg/include/pcg/gpu_id_t.struct.toml @@ -9,6 +9,10 @@ features = [ "fmt", ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + [[fields]] name = "gpu_index" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/pcg/include/pcg/machine_space_coordinate.struct.toml b/lib/pcg/include/pcg/machine_space_coordinate.struct.toml index 9b197a74c9..2528eab849 100644 --- a/lib/pcg/include/pcg/machine_space_coordinate.struct.toml +++ b/lib/pcg/include/pcg/machine_space_coordinate.struct.toml @@ -11,15 +11,16 @@ features = [ includes = [ "pcg/device_type.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] name = "node_idx" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "device_idx" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "device_type" diff --git a/lib/pcg/include/pcg/machine_specification.h b/lib/pcg/include/pcg/machine_specification.h index 39591e8a70..11c5a81bba 100644 --- a/lib/pcg/include/pcg/machine_specification.h +++ b/lib/pcg/include/pcg/machine_specification.h @@ -8,12 +8,12 @@ namespace FlexFlow { -int get_num_gpus(MachineSpecification const &ms); -int get_num_cpus(MachineSpecification const &ms); -int get_num_devices(MachineSpecification const &ms, - DeviceType const &device_type); -int get_num_devices_per_node(MachineSpecification const &ms, - DeviceType const &device_type); +nonnegative_int get_num_gpus(MachineSpecification const &ms); +nonnegative_int get_num_cpus(MachineSpecification const &ms); +nonnegative_int get_num_devices(MachineSpecification const &ms, + DeviceType const &device_type); +nonnegative_int get_num_devices_per_node(MachineSpecification const &ms, + DeviceType const &device_type); bool is_valid_machine_space_coordinate(MachineSpecification const &ms, MachineSpaceCoordinate const &coord); diff --git a/lib/pcg/include/pcg/machine_specification.struct.toml b/lib/pcg/include/pcg/machine_specification.struct.toml index e75b5018cb..7c624c7240 100644 --- a/lib/pcg/include/pcg/machine_specification.struct.toml +++ b/lib/pcg/include/pcg/machine_specification.struct.toml @@ -9,17 +9,21 @@ features = [ "fmt", ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + [[fields]] name = "num_nodes" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "num_cpus_per_node" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "num_gpus_per_node" -type = "int" +type = "::FlexFlow::nonnegative_int" [[fields]] name = "inter_node_bandwidth" diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index f72b2359dc..6ed9e7dd9c 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -5,7 +5,7 @@ #include "machine_view.dtg.h" #include "pcg/device_id_t.dtg.h" #include "pcg/operator_task_space.dtg.h" -#include "task_space_coordinate.dtg.h" +#include "pcg/task_space_coordinate.dtg.h" #include #include #include diff --git a/lib/pcg/include/pcg/operator_task_space.h b/lib/pcg/include/pcg/operator_task_space.h index 1a19397c72..b095fad088 100644 --- a/lib/pcg/include/pcg/operator_task_space.h +++ b/lib/pcg/include/pcg/operator_task_space.h @@ -16,8 +16,8 @@ std::unordered_set TaskSpaceCoordinate get_task_space_maximum_coordinate(OperatorTaskSpace const &task); -size_t num_dims(OperatorTaskSpace const &task); -size_t num_tasks(OperatorTaskSpace const &task); +nonnegative_int num_dims(OperatorTaskSpace const &task); +nonnegative_int num_tasks(OperatorTaskSpace const &task); OperatorTaskSpace get_operator_task_space(ParallelComputationGraph const &pcg, parallel_layer_guid_t const &layer); diff --git a/lib/pcg/include/pcg/operator_task_space.struct.toml b/lib/pcg/include/pcg/operator_task_space.struct.toml index 3ab8b83173..9cc4f6b93a 100644 --- a/lib/pcg/include/pcg/operator_task_space.struct.toml +++ b/lib/pcg/include/pcg/operator_task_space.struct.toml @@ -11,6 +11,7 @@ features = [ includes = [ "", + "utils/nonnegative_int/nonnegative_int.h", ] src_includes = [ @@ -20,4 +21,4 @@ src_includes = [ [[fields]] name = "degrees" -type = "std::vector" +type = "std::vector<::FlexFlow::nonnegative_int>" diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_builder.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_builder.h index 019b120936..faa9b73d95 100644 --- a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_builder.h +++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_builder.h @@ -33,15 +33,15 @@ struct ParallelComputationGraphBuilder { parallel_tensor_guid_t conv2d( parallel_tensor_guid_t const &input, - int outChannels, - int kernelH, - int kernelW, - int strideH, - int strideW, - int paddingH, - int paddingW, + nonnegative_int outChannels, + nonnegative_int kernelH, + nonnegative_int kernelW, + nonnegative_int strideH, + nonnegative_int strideW, + nonnegative_int paddingH, + nonnegative_int paddingW, std::optional const &activation = std::nullopt, - int groups = 1, + nonnegative_int groups = 1_n, bool use_bias = true, std::optional const &kernel_initializer = std::nullopt, std::optional const &bias_initializer = std::nullopt, @@ -50,7 +50,7 @@ struct ParallelComputationGraphBuilder { parallel_tensor_guid_t dense( parallel_tensor_guid_t const &input, - int outDim, + nonnegative_int outDim, std::optional activation = std::nullopt, bool use_bias = true, DataType data_type = DataType::FLOAT, @@ -61,8 +61,8 @@ struct ParallelComputationGraphBuilder { parallel_tensor_guid_t embedding( parallel_tensor_guid_t const &input, - int num_entries, - int outDim, + nonnegative_int num_entries, + nonnegative_int outDim, AggregateOp aggr, DataType dtype = DataType::FLOAT, std::optional const &kernel_initializer = std::nullopt, @@ -72,10 +72,10 @@ struct ParallelComputationGraphBuilder { parallel_tensor_guid_t const &query, parallel_tensor_guid_t const &key, parallel_tensor_guid_t const &value, - int embed_dim, - int num_heads, - std::optional kdim = std::nullopt, - std::optional vdim = std::nullopt, + nonnegative_int embed_dim, + nonnegative_int num_heads, + std::optional kdim = std::nullopt, + std::optional vdim = std::nullopt, float dropout = 0.0f, bool bias = true, bool add_bias_kv = false, @@ -120,20 +120,20 @@ struct ParallelComputationGraphBuilder { parallel_tensor_guid_t parallel_partition(parallel_tensor_guid_t const &x, ff_dim_t dim, - int degree, + nonnegative_int degree, std::optional const &name = std::nullopt); parallel_tensor_guid_t parallel_combine(parallel_tensor_guid_t const &x, ff_dim_t dim, - int degree, + nonnegative_int degree, std::optional const &name = std::nullopt); parallel_tensor_guid_t parallel_replicate(parallel_tensor_guid_t const &x, - int degree, + nonnegative_int degree, std::optional const &name = std::nullopt); parallel_tensor_guid_t parallel_reduce(parallel_tensor_guid_t const &x, - int degree, + nonnegative_int degree, std::optional const &name = std::nullopt); private: diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_edge.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_edge.h index 7aac8558e4..5bce560020 100644 --- a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_edge.h +++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_edge.h @@ -11,7 +11,7 @@ parallel_tensor_guid_t get_parallel_tensor(ParallelComputationGraphEdge const &); parallel_layer_guid_t get_src_layer(ParallelComputationGraphEdge const &); parallel_layer_guid_t get_dst_layer(ParallelComputationGraphEdge const &); -int get_dst_layer_input_idx(ParallelComputationGraphEdge const &); +nonnegative_int get_dst_layer_input_idx(ParallelComputationGraphEdge const &); } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.h b/lib/pcg/include/pcg/start_invariant_machine_view.h index f5091c69d1..cdf17213f9 100644 --- a/lib/pcg/include/pcg/start_invariant_machine_view.h +++ b/lib/pcg/include/pcg/start_invariant_machine_view.h @@ -17,7 +17,7 @@ MachineView StartInvariantMachineView start_invariant_from_machine_view(MachineView const &mv); -size_t num_dims(StartInvariantMachineView const &mv); +nonnegative_int num_dims(StartInvariantMachineView const &mv); DeviceType get_device_type(StartInvariantMachineView const &mv); diff --git a/lib/pcg/include/pcg/stride_t.struct.toml b/lib/pcg/include/pcg/stride_t.struct.toml index a764497b8b..8d950c5f39 100644 --- a/lib/pcg/include/pcg/stride_t.struct.toml +++ b/lib/pcg/include/pcg/stride_t.struct.toml @@ -9,6 +9,10 @@ features = [ "fmt", ] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + [[fields]] name = "unwrapped" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/pcg/include/pcg/task_space_coordinate.struct.toml b/lib/pcg/include/pcg/task_space_coordinate.struct.toml index 65aea167cb..1057676b8e 100644 --- a/lib/pcg/include/pcg/task_space_coordinate.struct.toml +++ b/lib/pcg/include/pcg/task_space_coordinate.struct.toml @@ -11,6 +11,7 @@ features = [ includes = [ "", + "utils/nonnegative_int/nonnegative_int.h", ] src_includes = [ @@ -20,4 +21,4 @@ src_includes = [ [[fields]] name = "raw_coord" -type = "std::vector" +type = "std::vector<::FlexFlow::nonnegative_int>" diff --git a/lib/pcg/src/pcg/computation_graph_builder.cc b/lib/pcg/src/pcg/computation_graph_builder.cc index 2d523c78ac..d7e6ea3291 100644 --- a/lib/pcg/src/pcg/computation_graph_builder.cc +++ b/lib/pcg/src/pcg/computation_graph_builder.cc @@ -375,30 +375,32 @@ tensor_guid_t tensor_guid_t ComputationGraphBuilder::conv2d( tensor_guid_t const &x, - int outChannels, - int kernelH, - int kernelW, - int strideH, - int strideW, - int paddingH, - int paddingW, + nonnegative_int outChannels, + nonnegative_int kernelH, + nonnegative_int kernelW, + nonnegative_int strideH, + nonnegative_int strideW, + nonnegative_int paddingH, + nonnegative_int paddingW, std::optional const &activation, - int groups, + nonnegative_int groups, bool use_bias, std::optional const &kernel_initializer, std::optional const &bias_initializer, std::optional const &kernel_regularizer, std::optional const &maybe_name) { - Conv2DAttrs attrs = Conv2DAttrs{outChannels, - kernelH, - kernelW, - strideH, - strideW, - paddingH, - paddingW, - groups, - activation, - use_bias}; + Conv2DAttrs attrs = Conv2DAttrs{ + /*out_channels=*/outChannels, + /*kernel_h=*/kernelH, + /*kernel_w=*/kernelW, + /*stride_h=*/strideH, + /*stride_w=*/strideW, + /*padding_h=*/paddingH, + /*padding_w=*/paddingW, + /*groups=*/groups, + /*activation=*/activation, + /*use_bias=*/use_bias, + }; std::string name = maybe_name.value_or(get_default_name(ComputationGraphOpAttrs{attrs})); @@ -450,13 +452,18 @@ tensor_guid_t ComputationGraphBuilder::dropout( tensor_guid_t ComputationGraphBuilder::embedding( tensor_guid_t const &x, - int num_entries, - int outDim, + nonnegative_int num_entries, + nonnegative_int outDim, AggregateOp aggr, DataType dtype, std::optional const &kernel_initializer, std::optional const &maybe_name) { - EmbeddingAttrs attrs = EmbeddingAttrs{num_entries, outDim, aggr, dtype}; + EmbeddingAttrs attrs = EmbeddingAttrs{ + /*num_entries=*/num_entries, + /*out_channels=*/outDim, + /*aggr=*/aggr, + /*data_type=*/dtype, + }; std::string name = maybe_name.value_or(get_default_name(ComputationGraphOpAttrs{attrs})); @@ -508,12 +515,12 @@ tensor_guid_t ComputationGraphBuilder::gather( } tensor_guid_t ComputationGraphBuilder::pool2d( tensor_guid_t const &x, - int kernelH, - int kernelW, - int strideH, - int strideW, - int paddingH, - int paddingW, + nonnegative_int kernelH, + nonnegative_int kernelW, + nonnegative_int strideH, + nonnegative_int strideW, + nonnegative_int paddingH, + nonnegative_int paddingW, PoolOp type, std::optional const &activation, std::optional const &maybe_name) { @@ -546,8 +553,8 @@ tensor_guid_t ComputationGraphBuilder::pool2d( tensor_guid_t ComputationGraphBuilder::adaptive_pool2d( tensor_guid_t const &uncasted_input, - int output_h, - int output_w, + nonnegative_int output_h, + nonnegative_int output_w, PoolOp type, std::optional const &activation, std::optional const &maybe_name) { @@ -636,10 +643,10 @@ tensor_guid_t ComputationGraphBuilder::multihead_attention( tensor_guid_t const &query, tensor_guid_t const &key, tensor_guid_t const &value, - int embed_dim, - int num_heads, - int kdim, - int vdim, + nonnegative_int embed_dim, + nonnegative_int num_heads, + nonnegative_int kdim, + nonnegative_int vdim, float dropout, bool bias, bool add_bias_kv, @@ -661,14 +668,16 @@ tensor_guid_t ComputationGraphBuilder::multihead_attention( "If you need this functionality, please create an issue."); } - MultiHeadAttentionAttrs attrs = MultiHeadAttentionAttrs{embed_dim, - num_heads, - kdim, - vdim, - dropout, - bias, - add_bias_kv, - add_zero_attn}; + MultiHeadAttentionAttrs attrs = MultiHeadAttentionAttrs{ + /*embed_dim=*/embed_dim, + /*num_heads=*/num_heads, + /*kdim=*/kdim, + /*vdim=*/vdim, + /*dropout=*/dropout, + /*bias=*/bias, + /*add_bias_kv=*/add_bias_kv, + /*add_zero_attn=*/add_zero_attn, + }; std::string name = maybe_name.value_or(get_default_name(ComputationGraphOpAttrs{attrs})); @@ -742,7 +751,7 @@ TensorDims ComputationGraphBuilder::get_broadcast_target_dims( tensor_guid_t ComputationGraphBuilder::dense( tensor_guid_t const &input, - int outDim, + nonnegative_int outDim, std::optional activation, bool use_bias, DataType data_type, @@ -751,8 +760,13 @@ tensor_guid_t ComputationGraphBuilder::dense( std::optional const &maybe_name, std::optional const &projection_name, std::optional const &bias_name) { - LinearAttrs attrs = - LinearAttrs{outDim, use_bias, data_type, activation, std::nullopt}; + LinearAttrs attrs = LinearAttrs{ + /*out_channels=*/outDim, + /*use_bias=*/use_bias, + /*data_type=*/data_type, + /*activation=*/activation, + /*regularizer=*/std::nullopt, + }; std::string name = maybe_name.value_or(get_default_name(ComputationGraphOpAttrs{attrs})); @@ -793,12 +807,11 @@ tensor_guid_t ComputationGraphBuilder::dense( tensor_guid_t ComputationGraphBuilder::concat( std::vector const &inputs, - int axis, + relative_ff_dim_t axis, std::optional const &maybe_name) { - relative_ff_dim_t wrapped_axis = relative_ff_dim_t{axis}; ConcatAttrs attrs = ConcatAttrs{ff_dim_t_from_relative_ff_dim_t( - wrapped_axis, num_dims(this->get_shape(inputs[0])))}; + axis, num_dims(this->get_shape(inputs[0])))}; std::string name = maybe_name.value_or(get_default_name(ComputationGraphOpAttrs{attrs})); @@ -816,17 +829,17 @@ tensor_guid_t ComputationGraphBuilder::concat( tensor_guid_t ComputationGraphBuilder::flat( tensor_guid_t const &input, - int start_dim, - std::optional const &end_dim, + relative_ff_dim_t start_dim, + std::optional const &end_dim, std::optional const &maybe_name) { - int input_num_dims = num_dims(this->get_shape(input)); + nonnegative_int input_num_dims = num_dims(this->get_shape(input)); FlatAttrs attrs = FlatAttrs{ - /*start_dim=*/ff_dim_t_from_relative_ff_dim_t( - relative_ff_dim_t{start_dim}, input_num_dims), + /*start_dim=*/ff_dim_t_from_relative_ff_dim_t(start_dim, input_num_dims), /*end_dim=*/ - ff_dim_t_from_relative_ff_dim_t( - relative_ff_dim_t{end_dim.value_or(input_num_dims)}, input_num_dims), + ff_dim_t_from_relative_ff_dim_t(end_dim.value_or(relative_ff_dim_t{ + input_num_dims.unwrap_nonnegative()}), + input_num_dims), }; std::string name = @@ -842,16 +855,15 @@ tensor_guid_t ComputationGraphBuilder::flat( tensor_guid_t ComputationGraphBuilder::layer_norm( tensor_guid_t const &input, - std::vector const &relative_axes, + std::vector const &relative_axes, bool elementwise_affine, float eps, std::optional const &maybe_name) { TensorShape input_shape = this->get_shape(input); - auto resolve_dim_idx = [&](int dim_idx) { - return ff_dim_t_from_relative_ff_dim_t(relative_ff_dim_t{dim_idx}, - num_dims(input_shape)); + auto resolve_dim_idx = [&](relative_ff_dim_t dim_idx) { + return ff_dim_t_from_relative_ff_dim_t(dim_idx, num_dims(input_shape)); }; stack_vector axes = stack_vector_of( @@ -909,15 +921,16 @@ tensor_guid_t ComputationGraphBuilder::layer_norm( tensor_guid_t ComputationGraphBuilder::softmax( tensor_guid_t const &input, - std::optional maybe_dim, + std::optional maybe_dim, std::optional const &maybe_name) { TensorShape input_shape = this->get_shape(input); - int dim = maybe_dim.value_or(num_dims(input_shape) - 1); + relative_ff_dim_t dim = maybe_dim.value_or( + relative_ff_dim_t{num_dims(input_shape).unwrap_nonnegative() - 1}); - SoftmaxAttrs attrs = SoftmaxAttrs{ff_dim_t_from_relative_ff_dim_t( - relative_ff_dim_t{dim}, num_dims(input_shape))}; + SoftmaxAttrs attrs = + SoftmaxAttrs{ff_dim_t_from_relative_ff_dim_t(dim, num_dims(input_shape))}; if (attrs.dim.value >= num_dims(input_shape)) { throw mk_runtime_error( diff --git a/lib/pcg/src/pcg/device_id.cc b/lib/pcg/src/pcg/device_id.cc index a8cfe1f82f..1a4f7b7d22 100644 --- a/lib/pcg/src/pcg/device_id.cc +++ b/lib/pcg/src/pcg/device_id.cc @@ -25,7 +25,7 @@ cpu_id_t unwrap_cpu(device_id_t device_id) { return device_id.get(); } -int get_raw_id(device_id_t device_id) { +nonnegative_int get_raw_id(device_id_t device_id) { switch (get_device_type(device_id)) { case DeviceType::GPU: return unwrap_gpu(device_id).gpu_index; @@ -36,7 +36,7 @@ int get_raw_id(device_id_t device_id) { } } -device_id_t device_id_from_index(int idx, DeviceType device_type) { +device_id_t device_id_from_index(nonnegative_int idx, DeviceType device_type) { switch (device_type) { case DeviceType::GPU: return device_id_t{gpu_id_t{idx}}; diff --git a/lib/pcg/src/pcg/file_format/v1/graphs/v1_dataflow_graph.cc b/lib/pcg/src/pcg/file_format/v1/graphs/v1_dataflow_graph.cc index cf150a339f..064e2d81d3 100644 --- a/lib/pcg/src/pcg/file_format/v1/graphs/v1_dataflow_graph.cc +++ b/lib/pcg/src/pcg/file_format/v1/graphs/v1_dataflow_graph.cc @@ -10,15 +10,15 @@ namespace FlexFlow { V1DataflowGraph to_v1(DataflowGraphView const &g) { - bidict node_enumeration_bidict = + bidict node_enumeration_bidict = bidict_from_enumerating(get_nodes(g)); - std::unordered_map node_enumeration = + std::unordered_map node_enumeration = node_enumeration_bidict.reversed().as_unordered_map(); return to_v1(g, node_enumeration); } V1DataflowGraph to_v1(DataflowGraphView const &g, - std::unordered_map const &nodes) { + std::unordered_map const &nodes) { std::unordered_set edges; for (DataflowEdge const &e : get_edges(g)) { edges.insert(V1GraphEdge{ diff --git a/lib/pcg/src/pcg/file_format/v1/graphs/v1_labelled_dataflow_graph.cc b/lib/pcg/src/pcg/file_format/v1/graphs/v1_labelled_dataflow_graph.cc index d353ccdda3..ac819db342 100644 --- a/lib/pcg/src/pcg/file_format/v1/graphs/v1_labelled_dataflow_graph.cc +++ b/lib/pcg/src/pcg/file_format/v1/graphs/v1_labelled_dataflow_graph.cc @@ -1 +1,17 @@ #include "pcg/file_format/v1/graphs/v1_labelled_dataflow_graph.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using NodeLabel = value_type<0>; +using OutputLabel = value_type<1>; + +template std::pair, + bidict> + to_v1_including_node_numbering( + LabelledDataflowGraphView const &); + +template V1LabelledDataflowGraph + to_v1(LabelledDataflowGraphView const &); + +} // namespace FlexFlow diff --git a/lib/pcg/src/pcg/file_format/v1/v1_binary_sp_decomposition/json.cc b/lib/pcg/src/pcg/file_format/v1/v1_binary_sp_decomposition/json.cc index 5341e03c0a..d39652a7e2 100644 --- a/lib/pcg/src/pcg/file_format/v1/v1_binary_sp_decomposition/json.cc +++ b/lib/pcg/src/pcg/file_format/v1/v1_binary_sp_decomposition/json.cc @@ -21,7 +21,7 @@ V1BinarySPDecomposition }; } else if (type == "leaf") { return V1BinarySPDecomposition{ - j.at("value").get(), + j.at("value").get(), }; } else { throw mk_runtime_error(fmt::format( @@ -45,7 +45,7 @@ void adl_serializer::to_json( j["type"] = "parallel"; return std::monostate{}; }, - [&](int leaf) { + [&](nonnegative_int leaf) { j["value"] = leaf; j["type"] = "leaf"; return std::monostate{}; diff --git a/lib/pcg/src/pcg/file_format/v1/v1_computation_graph.cc b/lib/pcg/src/pcg/file_format/v1/v1_computation_graph.cc index 975e92dfb7..3511ccc269 100644 --- a/lib/pcg/src/pcg/file_format/v1/v1_computation_graph.cc +++ b/lib/pcg/src/pcg/file_format/v1/v1_computation_graph.cc @@ -9,13 +9,14 @@ V1ComputationGraph to_v1(ComputationGraph const &g) { }; } -std::pair> +std::pair> to_v1_including_node_numbering(ComputationGraph const &cg) { - std::pair, bidict> + std::pair, + bidict> raw = to_v1_including_node_numbering(cg.raw_graph); V1ComputationGraph v1_cg = V1ComputationGraph{raw.first}; - bidict v1_node_ids = + bidict v1_node_ids = map_values(raw.second, [](Node const &n) { return layer_guid_t{n}; }); return {v1_cg, v1_node_ids}; diff --git a/lib/pcg/src/pcg/machine_space_offset.cc b/lib/pcg/src/pcg/machine_space_offset.cc index 9990023f8c..4aa79b3d1b 100644 --- a/lib/pcg/src/pcg/machine_space_offset.cc +++ b/lib/pcg/src/pcg/machine_space_offset.cc @@ -17,8 +17,10 @@ MachineSpaceOffset get_machine_space_offset_from_coordinate( fmt::format("{} has different DeviceType from {}", start, coord)); } - return MachineSpaceOffset{coord.node_idx - start.node_idx, - coord.device_idx - start.device_idx, + return MachineSpaceOffset{coord.node_idx.unwrap_nonnegative() - + start.node_idx.unwrap_nonnegative(), + coord.device_idx.unwrap_nonnegative() - + start.device_idx.unwrap_nonnegative(), coord.device_type}; } diff --git a/lib/pcg/src/pcg/machine_specification.cc b/lib/pcg/src/pcg/machine_specification.cc index 19ff50b4b7..0fefeddd27 100644 --- a/lib/pcg/src/pcg/machine_specification.cc +++ b/lib/pcg/src/pcg/machine_specification.cc @@ -4,14 +4,16 @@ #include "utils/exception.h" namespace FlexFlow { -int get_num_gpus(MachineSpecification const &ms) { +nonnegative_int get_num_gpus(MachineSpecification const &ms) { return ms.num_nodes * ms.num_gpus_per_node; } -int get_num_cpus(MachineSpecification const &ms) { + +nonnegative_int get_num_cpus(MachineSpecification const &ms) { return ms.num_nodes * ms.num_cpus_per_node; } -int get_num_devices(MachineSpecification const &ms, - DeviceType const &device_type) { + +nonnegative_int get_num_devices(MachineSpecification const &ms, + DeviceType const &device_type) { switch (device_type) { case DeviceType::GPU: return get_num_gpus(ms); @@ -22,8 +24,8 @@ int get_num_devices(MachineSpecification const &ms, } } -int get_num_devices_per_node(MachineSpecification const &ms, - DeviceType const &device_type) { +nonnegative_int get_num_devices_per_node(MachineSpecification const &ms, + DeviceType const &device_type) { switch (device_type) { case DeviceType::GPU: return ms.num_gpus_per_node; @@ -33,6 +35,7 @@ int get_num_devices_per_node(MachineSpecification const &ms, throw mk_runtime_error(fmt::format("Unknown DeviceType {}", device_type)); } } + bool is_valid_machine_space_coordinate(MachineSpecification const &ms, MachineSpaceCoordinate const &coord) { return (coord.node_idx < ms.num_nodes) && @@ -45,7 +48,7 @@ device_id_t get_device_id(MachineSpecification const &ms, throw mk_runtime_error(fmt::format( "Invalid coordinate {} for machine specification {}", ms, coord)); } - int raw_idx = + nonnegative_int raw_idx = coord.node_idx * get_num_devices_per_node(ms, coord.device_type) + coord.device_idx; return device_id_from_index(raw_idx, coord.device_type); diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index cc42ad83b2..fe319dc63c 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -16,6 +16,9 @@ #include "utils/containers/transform.h" #include "utils/containers/zip.h" #include "utils/exception.h" +#include "utils/nonnegative_int/nonnegative_range.h" +#include "utils/nonnegative_int/num_elements.h" + namespace FlexFlow { size_t num_dims(MachineView const &mv) { @@ -71,47 +74,57 @@ std::optional get_machine_space_coordinate( } auto get_dimension_indices_for_dimension = - [&](MachineSpecificationDimension dimension) { - std::vector mv_dimensions = - get_dimensions(machine_view); - return filter(count(mv_dimensions.size()), [&](size_t idx) { - return mv_dimensions.at(idx) == dimension; - }); - }; - - auto compute_index = [&](int start_idx, - std::vector const &dimension_indices) { - std::vector mv_strides = get_strides(machine_view); - - std::vector sizes = transform(dimension_indices, [&](size_t i) { - return task.degrees.at(i) * mv_strides.at(i).unwrapped; - }); - std::vector coord_points = transform( - dimension_indices, [&](size_t i) { return coord.raw_coord.at(i); }); - std::vector strides = transform(dimension_indices, [&](size_t i) { - return mv_strides.at(i).unwrapped; - }); - - std::vector coeffs = scanl(sizes, 1, std::multiplies()); - - int index = start_idx; - for (auto [coeff, coord_point, stride] : - zip(coeffs, coord_points, strides)) { - index += coeff * coord_point * stride; - } - return index; + [&](MachineSpecificationDimension dimension) + -> std::vector { + std::vector mv_dimensions = + get_dimensions(machine_view); + return filter(nonnegative_range(num_elements(mv_dimensions)), + [&](nonnegative_int idx) { + return mv_dimensions.at(idx.unwrap_nonnegative()) == + dimension; + }); }; - std::vector inter_dimension_indices = + auto compute_index = + [&](nonnegative_int start_idx, + std::vector const &dimension_indices) { + std::vector mv_strides = get_strides(machine_view); + + std::vector sizes = + transform(dimension_indices, [&](nonnegative_int i) { + return task.degrees.at(i.unwrap_nonnegative()) * + mv_strides.at(i.unwrap_nonnegative()).unwrapped; + }); + std::vector coord_points = + transform(dimension_indices, [&](nonnegative_int i) { + return coord.raw_coord.at(i.unwrap_nonnegative()); + }); + std::vector strides = + transform(dimension_indices, [&](nonnegative_int i) { + return mv_strides.at(i.unwrap_nonnegative()).unwrapped; + }); + + std::vector coeffs = scanl( + sizes, nonnegative_int{1}, std::multiplies()); + + nonnegative_int index = start_idx; + for (auto [coeff, coord_point, stride] : + zip(coeffs, coord_points, strides)) { + index += coeff * coord_point * stride; + } + return index; + }; + + std::vector inter_dimension_indices = get_dimension_indices_for_dimension( MachineSpecificationDimension::INTER_NODE); - std::vector intra_dimension_indices = + std::vector intra_dimension_indices = get_dimension_indices_for_dimension( MachineSpecificationDimension::INTRA_NODE); - int node_idx = + nonnegative_int node_idx = compute_index(machine_view.start.node_idx, inter_dimension_indices); - int device_idx = + nonnegative_int device_idx = compute_index(machine_view.start.device_idx, intra_dimension_indices); MachineSpaceCoordinate ms_coord = MachineSpaceCoordinate{ node_idx, device_idx, get_device_type(machine_view)}; diff --git a/lib/pcg/src/pcg/operator_task_space.cc b/lib/pcg/src/pcg/operator_task_space.cc index 7157b75082..57af6eedc7 100644 --- a/lib/pcg/src/pcg/operator_task_space.cc +++ b/lib/pcg/src/pcg/operator_task_space.cc @@ -14,18 +14,23 @@ #include "utils/containers/unordered_set_of.h" #include "utils/containers/vector_of.h" #include "utils/fmt/unordered_set.h" +#include "utils/nonnegative_int/nonnegative_range.h" +#include "utils/nonnegative_int/num_elements.h" + namespace FlexFlow { std::unordered_set get_task_space_coordinates(OperatorTaskSpace const &task) { - std::vector> coordinate_ranges = transform( - task.degrees, [&](int const &num_points) { return range(num_points); }); + std::vector> coordinate_ranges = + transform(task.degrees, [&](nonnegative_int num_points) { + return nonnegative_range(num_points); + }); - std::unordered_set> raw_coordinates = + std::unordered_set> raw_coordinates = unordered_set_of(cartesian_product(coordinate_ranges)); std::unordered_set task_space_coordinates = - transform(raw_coordinates, [](std::vector const &point) { + transform(raw_coordinates, [](std::vector const &point) { return TaskSpaceCoordinate{point}; }); return task_space_coordinates; @@ -36,10 +41,11 @@ TaskSpaceCoordinate return maximum(get_task_space_coordinates(task)); } -size_t num_dims(OperatorTaskSpace const &task) { - return task.degrees.size(); +nonnegative_int num_dims(OperatorTaskSpace const &task) { + return num_elements(task.degrees); } -size_t num_tasks(OperatorTaskSpace const &task) { + +nonnegative_int num_tasks(OperatorTaskSpace const &task) { return product(task.degrees); } @@ -48,7 +54,7 @@ OperatorTaskSpace get_operator_task_space(ParallelComputationGraph const &pcg, parallel_tensor_guid_t out_tensor = get_layer_outputs(pcg, layer).at(0); ParallelTensorShape shape = get_parallel_tensor_shape(pcg, out_tensor); - std::vector degrees; + std::vector degrees; extend(degrees, vector_of(ff_ordered_shard_degrees(shape))); degrees.push_back(get_sum_degree(shape)); degrees.push_back(get_discard_copy_degree(shape)); diff --git a/lib/pcg/src/pcg/parallel_computation_graph/generate_weight_transform.cc b/lib/pcg/src/pcg/parallel_computation_graph/generate_weight_transform.cc index dadad6277f..2cf149f78a 100644 --- a/lib/pcg/src/pcg/parallel_computation_graph/generate_weight_transform.cc +++ b/lib/pcg/src/pcg/parallel_computation_graph/generate_weight_transform.cc @@ -9,7 +9,7 @@ std::unordered_set ParallelTensorShape const &goal) { std::unordered_set result; - int sum_degree = get_sum_degree(goal); + nonnegative_int sum_degree = get_sum_degree(goal); if (sum_degree != 1) { throw mk_runtime_error( fmt::format("generate_weight_transform currently only supports " @@ -17,7 +17,7 @@ std::unordered_set sum_degree)); } - int discard_copy_degree = get_discard_copy_degree(goal); + nonnegative_int discard_copy_degree = get_discard_copy_degree(goal); if (discard_copy_degree != 1) { result.insert(ParallelOpAttrs{ReplicateAttrs{discard_copy_degree}}); } diff --git a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc index f33b4dcd17..25f8dc0c5f 100644 --- a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc +++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc @@ -107,8 +107,8 @@ parallel_tensor_guid_t ParallelComputationGraphBuilder::batch_matmul( std::optional const &maybe_name) { BatchMatmulAttrs attrs = BatchMatmulAttrs{ - /*a_seq_length_dim=*/-1, - /*b_seq_length_dim=*/-1, + /*a_seq_length_dim=*/std::nullopt, + /*b_seq_length_dim=*/std::nullopt, }; std::string name = @@ -140,30 +140,32 @@ parallel_tensor_guid_t ParallelComputationGraphBuilder::cast( parallel_tensor_guid_t ParallelComputationGraphBuilder::conv2d( parallel_tensor_guid_t const &raw_input, - int outChannels, - int kernelH, - int kernelW, - int strideH, - int strideW, - int paddingH, - int paddingW, + nonnegative_int outChannels, + nonnegative_int kernelH, + nonnegative_int kernelW, + nonnegative_int strideH, + nonnegative_int strideW, + nonnegative_int paddingH, + nonnegative_int paddingW, std::optional const &activation, - int groups, + nonnegative_int groups, bool use_bias, std::optional const &kernel_initializer, std::optional const &bias_initializer, std::optional const &kernel_regularizer, std::optional const &maybe_name) { - Conv2DAttrs attrs = Conv2DAttrs{outChannels, - kernelH, - kernelW, - strideH, - strideW, - paddingH, - paddingW, - groups, - activation, - use_bias}; + Conv2DAttrs attrs = Conv2DAttrs{ + /*out_channels=*/outChannels, + /*kernel_h=*/kernelH, + /*kernel_w=*/kernelW, + /*stride_h=*/strideH, + /*stride_w=*/strideW, + /*padding_h=*/paddingH, + /*padding_w=*/paddingW, + /*groups=*/groups, + /*activation=*/activation, + /*use_bias=*/use_bias, + }; std::string name = maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs})); @@ -191,7 +193,7 @@ parallel_tensor_guid_t ParallelComputationGraphBuilder::conv2d( parallel_tensor_guid_t ParallelComputationGraphBuilder::dense( parallel_tensor_guid_t const &input, - int outDim, + nonnegative_int outDim, std::optional activation, bool use_bias, DataType data_type, @@ -199,11 +201,11 @@ parallel_tensor_guid_t ParallelComputationGraphBuilder::dense( std::optional const &bias_initializer, std::optional const &maybe_name) { LinearAttrs attrs = LinearAttrs{ - outDim, - use_bias, - data_type, - activation, - std::nullopt, + /*out_channels=*/outDim, + /*use_bias=*/use_bias, + /*data_type=*/data_type, + /*activation=*/activation, + /*regularizer=*/std::nullopt, }; std::string name = @@ -238,18 +240,18 @@ parallel_tensor_guid_t ParallelComputationGraphBuilder::dense( parallel_tensor_guid_t ParallelComputationGraphBuilder::embedding( parallel_tensor_guid_t const &input, - int num_entries, - int outDim, + nonnegative_int num_entries, + nonnegative_int outDim, AggregateOp aggr, DataType dtype, std::optional const &kernel_initializer, std::optional const &maybe_name) { EmbeddingAttrs attrs = EmbeddingAttrs{ - num_entries, - outDim, - aggr, - dtype, + /*num_entries=*/num_entries, + /*out_channels=*/outDim, + /*aggr=*/aggr, + /*data_type=*/dtype, }; std::string name = @@ -273,10 +275,10 @@ parallel_tensor_guid_t ParallelComputationGraphBuilder::multihead_attention( parallel_tensor_guid_t const &query, parallel_tensor_guid_t const &key, parallel_tensor_guid_t const &value, - int embed_dim, - int num_heads, - std::optional maybe_kdim, - std::optional maybe_vdim, + nonnegative_int embed_dim, + nonnegative_int num_heads, + std::optional maybe_kdim, + std::optional maybe_vdim, float dropout, bool bias, bool add_bias_kv, @@ -286,8 +288,8 @@ parallel_tensor_guid_t ParallelComputationGraphBuilder::multihead_attention( std::optional output_bias_initializer, std::optional const &maybe_name) { - int kdim = maybe_kdim.value_or(embed_dim); - int vdim = maybe_vdim.value_or(embed_dim); + nonnegative_int kdim = maybe_kdim.value_or(embed_dim); + nonnegative_int vdim = maybe_vdim.value_or(embed_dim); MultiHeadAttentionAttrs attrs = MultiHeadAttentionAttrs{ /*embed_dim=*/embed_dim, @@ -490,10 +492,13 @@ parallel_tensor_guid_t ParallelComputationGraphBuilder::elu( parallel_tensor_guid_t ParallelComputationGraphBuilder::parallel_partition( parallel_tensor_guid_t const &input, ff_dim_t dim, - int degree, + nonnegative_int degree, std::optional const &maybe_name) { - RepartitionAttrs attrs = RepartitionAttrs{dim, degree}; + RepartitionAttrs attrs = RepartitionAttrs{ + /*repartition_dim=*/dim, + /*repartition_degree=*/degree, + }; std::string name = maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs})); @@ -509,10 +514,13 @@ parallel_tensor_guid_t ParallelComputationGraphBuilder::parallel_partition( parallel_tensor_guid_t ParallelComputationGraphBuilder::parallel_combine( parallel_tensor_guid_t const &input, ff_dim_t dim, - int degree, + nonnegative_int degree, std::optional const &maybe_name) { - CombineAttrs attrs = CombineAttrs{dim, degree}; + CombineAttrs attrs = CombineAttrs{ + /*combine_dim=*/dim, + /*combine_degree=*/degree, + }; std::string name = maybe_name.value_or(get_default_name(PCGOperatorAttrs{attrs})); @@ -527,7 +535,7 @@ parallel_tensor_guid_t ParallelComputationGraphBuilder::parallel_combine( parallel_tensor_guid_t ParallelComputationGraphBuilder::parallel_replicate( parallel_tensor_guid_t const &input, - int degree, + nonnegative_int degree, std::optional const &maybe_name) { ReplicateAttrs attrs = ReplicateAttrs{degree}; @@ -545,7 +553,7 @@ parallel_tensor_guid_t ParallelComputationGraphBuilder::parallel_replicate( parallel_tensor_guid_t ParallelComputationGraphBuilder::parallel_reduce( parallel_tensor_guid_t const &input, - int degree, + nonnegative_int degree, std::optional const &maybe_name) { ReductionAttrs attrs = ReductionAttrs{degree}; @@ -661,7 +669,7 @@ std::vector ParallelComputationGraphBuilder::add_layer( std::vector raw_weight_tensors; for (auto const &kv : enumerate_vector(weights)) { - int weight_idx = kv.first; + nonnegative_int weight_idx = kv.first; ParallelTensorAttrs weight_tensor_attrs = kv.second; std::optional weight_name = diff --git a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_edge.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_edge.cc index d30739486e..f37d08dc8a 100644 --- a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_edge.cc +++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_edge.cc @@ -16,7 +16,7 @@ parallel_layer_guid_t get_dst_layer(ParallelComputationGraphEdge const &e) { return parallel_layer_guid_t{e.raw_edge.dst.node}; } -int get_dst_layer_input_idx(ParallelComputationGraphEdge const &e) { +nonnegative_int get_dst_layer_input_idx(ParallelComputationGraphEdge const &e) { return e.raw_edge.dst.idx; } diff --git a/lib/pcg/src/pcg/start_invariant_machine_view.cc b/lib/pcg/src/pcg/start_invariant_machine_view.cc index 1fcc3ea12f..e9f864d416 100644 --- a/lib/pcg/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/src/pcg/start_invariant_machine_view.cc @@ -7,6 +7,7 @@ #include "utils/containers/scanl.h" #include "utils/containers/transform.h" #include "utils/containers/zip.h" +#include "utils/nonnegative_int/num_elements.h" namespace FlexFlow { MachineView machine_view_from_start_invariant( @@ -20,8 +21,8 @@ StartInvariantMachineView return StartInvariantMachineView{mv.dimensions, get_device_type(mv)}; } -size_t num_dims(StartInvariantMachineView const &start_inv_mv) { - return start_inv_mv.dimensions.size(); +nonnegative_int num_dims(StartInvariantMachineView const &start_inv_mv) { + return num_elements(start_inv_mv.dimensions); } DeviceType get_device_type(StartInvariantMachineView const &start_inv_mv) { @@ -59,7 +60,7 @@ std::optional get_machine_space_offset( TaskSpaceCoordinate const &coord, MachineSpecification const &machine_specification) { MachineSpaceCoordinate dummy_start = - MachineSpaceCoordinate{0, 0, get_device_type(start_inv_machine_view)}; + MachineSpaceCoordinate{0_n, 0_n, get_device_type(start_inv_machine_view)}; MachineView mv = machine_view_from_start_invariant(start_inv_machine_view, dummy_start); std::optional ms_coord = diff --git a/lib/pcg/test/src/pcg/computation_graph.cc b/lib/pcg/test/src/pcg/computation_graph.cc index e2ed51b2f1..d92d65ad7b 100644 --- a/lib/pcg/test/src/pcg/computation_graph.cc +++ b/lib/pcg/test/src/pcg/computation_graph.cc @@ -13,9 +13,9 @@ TEST_SUITE(FF_TEST_SUITE) { ComputationGraphBuilder b; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ - 10, - 12, + TensorDims{FFOrdered{ + 10_n, + 12_n, }}, DataType::FLOAT, }; @@ -40,9 +40,9 @@ TEST_SUITE(FF_TEST_SUITE) { ComputationGraphBuilder b; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ - 10, - 12, + TensorDims{FFOrdered{ + 10_n, + 12_n, }}, DataType::FLOAT, }; @@ -66,16 +66,16 @@ TEST_SUITE(FF_TEST_SUITE) { ComputationGraphBuilder b; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ - 10, - 12, + TensorDims{FFOrdered{ + 10_n, + 12_n, }}, DataType::FLOAT, }; tensor_guid_t input = b.create_input(input_shape, CreateGrad::YES); b.dense(input, - /*outDim=*/14, + /*outDim=*/14_n, /*activation=*/Activation::RELU, /*use_bias=*/true, /*data_type=*/DataType::FLOAT, @@ -103,9 +103,9 @@ TEST_SUITE(FF_TEST_SUITE) { ComputationGraphBuilder b; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ - 10, - 12, + TensorDims{FFOrdered{ + 10_n, + 12_n, }}, DataType::FLOAT, }; @@ -131,9 +131,9 @@ TEST_SUITE(FF_TEST_SUITE) { ComputationGraphBuilder b; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ - 10, - 12, + TensorDims{FFOrdered{ + 10_n, + 12_n, }}, DataType::FLOAT, }; @@ -161,16 +161,16 @@ TEST_SUITE(FF_TEST_SUITE) { ComputationGraphBuilder b; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ - 10, - 12, + TensorDims{FFOrdered{ + 10_n, + 12_n, }}, DataType::FLOAT, }; tensor_guid_t input = b.create_input(input_shape, CreateGrad::YES); b.dense(input, - /*outDim=*/14, + /*outDim=*/14_n, /*activation=*/Activation::RELU, /*use_bias=*/true, /*data_type=*/DataType::FLOAT, diff --git a/lib/pcg/test/src/pcg/computation_graph_builder.cc b/lib/pcg/test/src/pcg/computation_graph_builder.cc index e7fa853be9..98a4e2a241 100644 --- a/lib/pcg/test/src/pcg/computation_graph_builder.cc +++ b/lib/pcg/test/src/pcg/computation_graph_builder.cc @@ -8,22 +8,22 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("ComputationGraphBuilder") { ComputationGraphBuilder b; - size_t batch_size = 2; + nonnegative_int batch_size = 2_n; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{batch_size, 3, 10, 10}}, + TensorDims{FFOrdered{batch_size, 3_n, 10_n, 10_n}}, DataType::FLOAT, }; tensor_guid_t input = b.create_input(input_shape, CreateGrad::YES); tensor_guid_t output = b.conv2d(input, - /*outChannels=*/5, - /*kernelH=*/3, - /*kernelW=*/3, - /*strideH=*/1, - /*strideW=*/1, - /*paddingH=*/0, - /*paddingW=*/0); + /*outChannels=*/5_n, + /*kernelH=*/3_n, + /*kernelW=*/3_n, + /*strideH=*/1_n, + /*strideW=*/1_n, + /*paddingH=*/0_n, + /*paddingW=*/0_n); // ComputationGraph cg = b.computation_graph; // CHECK(get_layers(cg).size() == 1); } diff --git a/lib/pcg/test/src/pcg/file_format/v1/v1_binary_sp_decomposition/json.cc b/lib/pcg/test/src/pcg/file_format/v1/v1_binary_sp_decomposition/json.cc index 9068e14517..4102efd48e 100644 --- a/lib/pcg/test/src/pcg/file_format/v1/v1_binary_sp_decomposition/json.cc +++ b/lib/pcg/test/src/pcg/file_format/v1/v1_binary_sp_decomposition/json.cc @@ -9,11 +9,11 @@ TEST_SUITE(FF_TEST_SUITE) { V1BinarySeriesSplit{ V1BinarySPDecomposition{ V1BinaryParallelSplit{ - V1BinarySPDecomposition{2}, - V1BinarySPDecomposition{2}, + V1BinarySPDecomposition{2_n}, + V1BinarySPDecomposition{2_n}, }, }, - V1BinarySPDecomposition{3}, + V1BinarySPDecomposition{3_n}, }, }; @@ -68,11 +68,11 @@ TEST_SUITE(FF_TEST_SUITE) { V1BinarySeriesSplit example_split = V1BinarySeriesSplit{ V1BinarySPDecomposition{ V1BinaryParallelSplit{ - V1BinarySPDecomposition{2}, - V1BinarySPDecomposition{2}, + V1BinarySPDecomposition{2_n}, + V1BinarySPDecomposition{2_n}, }, }, - V1BinarySPDecomposition{3}, + V1BinarySPDecomposition{3_n}, }; nlohmann::json example_json = { @@ -124,11 +124,11 @@ TEST_SUITE(FF_TEST_SUITE) { V1BinaryParallelSplit example_split = V1BinaryParallelSplit{ V1BinarySPDecomposition{ V1BinaryParallelSplit{ - V1BinarySPDecomposition{2}, - V1BinarySPDecomposition{2}, + V1BinarySPDecomposition{2_n}, + V1BinarySPDecomposition{2_n}, }, }, - V1BinarySPDecomposition{3}, + V1BinarySPDecomposition{3_n}, }; nlohmann::json example_json = { diff --git a/lib/pcg/test/src/pcg/file_format/v1/v1_computation_graph.cc b/lib/pcg/test/src/pcg/file_format/v1/v1_computation_graph.cc index 8336d81bb4..59c606adb1 100644 --- a/lib/pcg/test/src/pcg/file_format/v1/v1_computation_graph.cc +++ b/lib/pcg/test/src/pcg/file_format/v1/v1_computation_graph.cc @@ -10,15 +10,15 @@ TEST_SUITE(FF_TEST_SUITE) { ComputationGraphBuilder b; TensorShape input_shape = TensorShape{ - TensorDims{FFOrdered{ - 12, - 16, + TensorDims{FFOrdered{ + 12_n, + 16_n, }}, DataType::FLOAT, }; tensor_guid_t input = b.create_input(input_shape, CreateGrad::YES); - tensor_guid_t mm_output = b.dense(input, 8); + tensor_guid_t mm_output = b.dense(input, 8_n); tensor_guid_t relu_output = b.relu(mm_output); return b.computation_graph; diff --git a/lib/pcg/test/src/pcg/file_format/v1/v1_parallel_computation_graph.cc b/lib/pcg/test/src/pcg/file_format/v1/v1_parallel_computation_graph.cc index 8ce25c4bc5..682cf2d798 100644 --- a/lib/pcg/test/src/pcg/file_format/v1/v1_parallel_computation_graph.cc +++ b/lib/pcg/test/src/pcg/file_format/v1/v1_parallel_computation_graph.cc @@ -12,19 +12,19 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{12, 2}, - ShardParallelDim{16, 1}, + ShardParallelDim{12_n, 2_n}, + ShardParallelDim{16_n, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, }; parallel_tensor_guid_t input = b.create_input_tensor(input_shape); - parallel_tensor_guid_t mm_output = b.dense(input, 8); + parallel_tensor_guid_t mm_output = b.dense(input, 8_n); parallel_tensor_guid_t relu_output = b.relu(mm_output); return b.pcg; diff --git a/lib/pcg/test/src/pcg/machine_specification.cc b/lib/pcg/test/src/pcg/machine_specification.cc index c183ae0d31..6d339350a0 100644 --- a/lib/pcg/test/src/pcg/machine_specification.cc +++ b/lib/pcg/test/src/pcg/machine_specification.cc @@ -7,11 +7,10 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("MachineSpecification") { - MachineSpecification ms = MachineSpecification{ - /*num_nodes=*/4, - /*num_cpus_per_node=*/16, - /*num_gpus_per_node=*/8, + /*num_nodes=*/4_n, + /*num_cpus_per_node=*/16_n, + /*num_gpus_per_node=*/8_n, /*inter_node_bandwidth=*/0, /*intra_node_bandwidth=*/0, }; @@ -32,19 +31,19 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("get_device_id") { SUBCASE("valid MachineSpaceCoordinate") { MachineSpaceCoordinate coord = MachineSpaceCoordinate{ - /*node_idx=*/2, - /*device_idx=*/12, + /*node_idx=*/2_n, + /*device_idx=*/12_n, DeviceType::CPU, }; device_id_t correct = - device_id_from_index(2 * 16 + 12, DeviceType::CPU); + device_id_from_index(nonnegative_int{2 * 16 + 12}, DeviceType::CPU); device_id_t result = get_device_id(ms, coord); CHECK(correct == result); } SUBCASE("MachineSpaceCoordinate out of bounds for given machine spec") { MachineSpaceCoordinate coord = MachineSpaceCoordinate{ - /*node_idx=*/2, - /*device_idx=*/18, + /*node_idx=*/2_n, + /*device_idx=*/18_n, DeviceType::CPU, }; CHECK_THROWS(get_device_id(ms, coord)); diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index 3e9d48fac3..e286f08bf2 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -12,10 +12,10 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("MachineView - utility functions") { MachineView mv = MachineView{ MachineSpaceCoordinate{ - /*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU}, - {MachineViewDimension{stride_t{2}, + /*node_idx=*/0_n, /*device_idx=*/0_n, DeviceType::GPU}, + {MachineViewDimension{stride_t{2_n}, MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{2}, + MachineViewDimension{stride_t{2_n}, MachineSpecificationDimension::INTER_NODE}}}; SUBCASE("num_dims") { @@ -43,48 +43,48 @@ TEST_SUITE(FF_TEST_SUITE) { * Where the (x,) are the `TaskSpaceCoordinate`s, and the underlying grid * is the machine space. */ - OperatorTaskSpace task = OperatorTaskSpace{{3}}; + OperatorTaskSpace task = OperatorTaskSpace{{3_n}}; MachineView mv = MachineView{ MachineSpaceCoordinate{ - /*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU}, - {MachineViewDimension{stride_t{2}, + /*node_idx=*/0_n, /*device_idx=*/1_n, DeviceType::GPU}, + {MachineViewDimension{stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE}}}; MachineSpecification ms = - MachineSpecification{/*num_nodes=*/1, - /*num_cpus_per_node=*/6, - /*num_gpus_per_node=*/6, + MachineSpecification{/*num_nodes=*/1_n, + /*num_cpus_per_node=*/6_n, + /*num_gpus_per_node=*/6_n, /*inter_node_bandwidth=*/0, /*intra_node_bandwidth=*/0}; SUBCASE("Task with TaskSpaceCoordinate = (0,)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU}; + /*node_idx=*/0_n, /*device_idx=*/1_n, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (1,)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/0, /*device_idx=*/3, DeviceType::GPU}; + /*node_idx=*/0_n, /*device_idx=*/3_n, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (2,)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{2}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{2_n}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/0, /*device_idx=*/5, DeviceType::GPU}; + /*node_idx=*/0_n, /*device_idx=*/5_n, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } SUBCASE("TaskSpaceCoordinate is out of bounds") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{4}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{4_n}}; std::optional result = get_machine_space_coordinate(task, mv, coord, ms); std::optional correct = std::nullopt; @@ -112,52 +112,52 @@ TEST_SUITE(FF_TEST_SUITE) { * grid is the machine space. */ - OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; + OperatorTaskSpace task = OperatorTaskSpace{{2_n, 2_n}}; MachineView mv = MachineView{ MachineSpaceCoordinate{ - /*node_idx=*/1, /*device_idx=*/2, DeviceType::GPU}, - {MachineViewDimension{stride_t{1}, + /*node_idx=*/1_n, /*device_idx=*/2_n, DeviceType::GPU}, + {MachineViewDimension{stride_t{1_n}, MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{2}, + MachineViewDimension{stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE}}}; MachineSpecification ms = - MachineSpecification{/*num_nodes=*/3, - /*num_cpus_per_node=*/5, - /*num_gpus_per_node=*/5, + MachineSpecification{/*num_nodes=*/3_n, + /*num_cpus_per_node=*/5_n, + /*num_gpus_per_node=*/5_n, /*inter_node_bandwidth=*/0, /*intra_node_bandwidth=*/0}; SUBCASE("Task with TaskSpaceCoordinate = (0,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 0}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n, 0_n}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, /*device_idx=*/2, DeviceType::GPU}; + /*node_idx=*/1_n, /*device_idx=*/2_n, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (0,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n, 1_n}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, /*device_idx=*/4, DeviceType::GPU}; + /*node_idx=*/1_n, /*device_idx=*/4_n, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (1,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 0_n}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/2, /*device_idx=*/2, DeviceType::GPU}; + /*node_idx=*/2_n, /*device_idx=*/2_n, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (1,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 1}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 1_n}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/2, /*device_idx=*/4, DeviceType::GPU}; + /*node_idx=*/2_n, /*device_idx=*/4_n, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); @@ -179,52 +179,52 @@ TEST_SUITE(FF_TEST_SUITE) { * grid is the machine space. */ - OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; + OperatorTaskSpace task = OperatorTaskSpace{{2_n, 2_n}}; MachineView mv = MachineView{ MachineSpaceCoordinate{ - /*node_idx=*/1, /*device_idx=*/0, DeviceType::GPU}, - {MachineViewDimension{stride_t{1}, + /*node_idx=*/1_n, /*device_idx=*/0_n, DeviceType::GPU}, + {MachineViewDimension{stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE}, - MachineViewDimension{stride_t{2}, + MachineViewDimension{stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE}}}; MachineSpecification ms = - MachineSpecification{/*num_nodes=*/2, - /*num_cpus_per_node=*/6, - /*num_gpus_per_node=*/6, + MachineSpecification{/*num_nodes=*/2_n, + /*num_cpus_per_node=*/6_n, + /*num_gpus_per_node=*/6_n, /*inter_node_bandwidth=*/0, /*intra_node_bandwidth=*/0}; SUBCASE("Task with TaskSpaceCoordinate = (0,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 0}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n, 0_n}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, /*device_idx=*/0, DeviceType::GPU}; + /*node_idx=*/1_n, /*device_idx=*/0_n, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (0,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n, 1_n}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, /*device_idx=*/4, DeviceType::GPU}; + /*node_idx=*/1_n, /*device_idx=*/4_n, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (1,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 0_n}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, /*device_idx=*/1, DeviceType::GPU}; + /*node_idx=*/1_n, /*device_idx=*/1_n, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (1,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 1}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 1_n}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, /*device_idx=*/5, DeviceType::GPU}; + /*node_idx=*/1_n, /*device_idx=*/5_n, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); @@ -253,45 +253,45 @@ TEST_SUITE(FF_TEST_SUITE) { * grid is the machine space. */ - OperatorTaskSpace task = OperatorTaskSpace{{2, 2, 2}}; + OperatorTaskSpace task = OperatorTaskSpace{{2_n, 2_n, 2_n}}; MachineView mv = MachineView{ MachineSpaceCoordinate{ - /*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU}, - {MachineViewDimension{stride_t{1}, + /*node_idx=*/0_n, /*device_idx=*/1_n, DeviceType::GPU}, + {MachineViewDimension{stride_t{1_n}, MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{2}, + MachineViewDimension{stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE}, - MachineViewDimension{stride_t{1}, + MachineViewDimension{stride_t{1_n}, MachineSpecificationDimension::INTRA_NODE}}}; MachineSpecification ms = - MachineSpecification{/*num_nodes=*/2, - /*num_cpus_per_node=*/8, - /*num_gpus_per_node=*/8, + MachineSpecification{/*num_nodes=*/2_n, + /*num_cpus_per_node=*/8_n, + /*num_gpus_per_node=*/8_n, /*inter_node_bandwidth=*/0, /*intra_node_bandwidth=*/0}; SUBCASE("Task with TaskSpaceCoordinate = (0,0,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1, 0}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n, 1_n, 0_n}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/0, /*device_idx=*/3, DeviceType::GPU}; + /*node_idx=*/0_n, /*device_idx=*/3_n, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (1,1,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0, 1}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 0_n, 1_n}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, /*device_idx=*/5, DeviceType::GPU}; + /*node_idx=*/1_n, /*device_idx=*/5_n, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (1,1,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 1, 1}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 1_n, 1_n}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, /*device_idx=*/7, DeviceType::GPU}; + /*node_idx=*/1_n, /*device_idx=*/7_n, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); @@ -319,23 +319,23 @@ TEST_SUITE(FF_TEST_SUITE) { * select */ MachineSpecification ms = - MachineSpecification{/*num_nodes=*/1, - /*num_cpus_per_node=*/6, - /*num_gpus_per_node=*/6, + MachineSpecification{/*num_nodes=*/1_n, + /*num_cpus_per_node=*/6_n, + /*num_gpus_per_node=*/6_n, /*inter_node_bandwidth=*/0, /*intra_node_bandwidth=*/0}; - OperatorTaskSpace task = OperatorTaskSpace{{3}}; + OperatorTaskSpace task = OperatorTaskSpace{{3_n}}; MachineView mv = MachineView{ MachineSpaceCoordinate{ - /*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU}, - {MachineViewDimension{stride_t{2}, + /*node_idx=*/0_n, /*device_idx=*/1_n, DeviceType::GPU}, + {MachineViewDimension{stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE}}}; std::unordered_set correct = { - device_id_t{gpu_id_t{1}}, - device_id_t{gpu_id_t{3}}, - device_id_t{gpu_id_t{5}}, + device_id_t{gpu_id_t{1_n}}, + device_id_t{gpu_id_t{3_n}}, + device_id_t{gpu_id_t{5_n}}, }; std::unordered_set result = get_device_ids(task, mv, ms); CHECK(result == correct); @@ -364,26 +364,26 @@ TEST_SUITE(FF_TEST_SUITE) { */ MachineSpecification ms = - MachineSpecification{/*num_nodes=*/3, - /*num_cpus_per_node=*/5, - /*num_gpus_per_node=*/5, + MachineSpecification{/*num_nodes=*/3_n, + /*num_cpus_per_node=*/5_n, + /*num_gpus_per_node=*/5_n, /*inter_node_bandwidth=*/0, /*intra_node_bandwidth=*/0}; - OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; + OperatorTaskSpace task = OperatorTaskSpace{{2_n, 2_n}}; MachineView mv = MachineView{ MachineSpaceCoordinate{ - /*node_idx=*/1, /*device_idx=*/2, DeviceType::GPU}, - {MachineViewDimension{stride_t{1}, + /*node_idx=*/1_n, /*device_idx=*/2_n, DeviceType::GPU}, + {MachineViewDimension{stride_t{1_n}, MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{2}, + MachineViewDimension{stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE}}}; std::unordered_set correct = { - device_id_t{gpu_id_t{7}}, - device_id_t{gpu_id_t{9}}, - device_id_t{gpu_id_t{12}}, - device_id_t{gpu_id_t{14}}, + device_id_t{gpu_id_t{7_n}}, + device_id_t{gpu_id_t{9_n}}, + device_id_t{gpu_id_t{12_n}}, + device_id_t{gpu_id_t{14_n}}, }; std::unordered_set result = get_device_ids(task, mv, ms); CHECK(result == correct); diff --git a/lib/pcg/test/src/pcg/operator_task_space.cc b/lib/pcg/test/src/pcg/operator_task_space.cc index 13198d9456..fa06af3635 100644 --- a/lib/pcg/test/src/pcg/operator_task_space.cc +++ b/lib/pcg/test/src/pcg/operator_task_space.cc @@ -18,13 +18,13 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("OperatorTaskSpace has 2 dimensions") { - OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; + OperatorTaskSpace task = OperatorTaskSpace{{2_n, 2_n}}; std::unordered_set correct = {{ - TaskSpaceCoordinate{{0, 0}}, - TaskSpaceCoordinate{{0, 1}}, - TaskSpaceCoordinate{{1, 0}}, - TaskSpaceCoordinate{{1, 1}}, + TaskSpaceCoordinate{{0_n, 0_n}}, + TaskSpaceCoordinate{{0_n, 1_n}}, + TaskSpaceCoordinate{{1_n, 0_n}}, + TaskSpaceCoordinate{{1_n, 1_n}}, }}; std::unordered_set result = get_task_space_coordinates(task); @@ -32,13 +32,13 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("OperatorTaskSpace has 3 dimensions") { - OperatorTaskSpace task = OperatorTaskSpace{{1, 2, 2}}; + OperatorTaskSpace task = OperatorTaskSpace{{1_n, 2_n, 2_n}}; std::unordered_set correct = {{ - TaskSpaceCoordinate{{0, 0, 0}}, - TaskSpaceCoordinate{{0, 0, 1}}, - TaskSpaceCoordinate{{0, 1, 0}}, - TaskSpaceCoordinate{{0, 1, 1}}, + TaskSpaceCoordinate{{0_n, 0_n, 0_n}}, + TaskSpaceCoordinate{{0_n, 0_n, 1_n}}, + TaskSpaceCoordinate{{0_n, 1_n, 0_n}}, + TaskSpaceCoordinate{{0_n, 1_n, 1_n}}, }}; std::unordered_set result = get_task_space_coordinates(task); @@ -48,17 +48,17 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_task_space_maximum_coordinate") { SUBCASE("OperatorTaskSpace has 2 dimensions") { - OperatorTaskSpace task = OperatorTaskSpace{{3, 2}}; + OperatorTaskSpace task = OperatorTaskSpace{{3_n, 2_n}}; - TaskSpaceCoordinate correct = TaskSpaceCoordinate{{2, 1}}; + TaskSpaceCoordinate correct = TaskSpaceCoordinate{{2_n, 1_n}}; TaskSpaceCoordinate result = get_task_space_maximum_coordinate(task); CHECK(correct == result); } SUBCASE("OperatorTaskSpace has 3 dimensions") { - OperatorTaskSpace task = OperatorTaskSpace{{3, 2, 4}}; + OperatorTaskSpace task = OperatorTaskSpace{{3_n, 2_n, 4_n}}; - TaskSpaceCoordinate correct = TaskSpaceCoordinate{{2, 1, 3}}; + TaskSpaceCoordinate correct = TaskSpaceCoordinate{{2_n, 1_n, 3_n}}; TaskSpaceCoordinate result = get_task_space_maximum_coordinate(task); CHECK(correct == result); } diff --git a/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc b/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc index dd8308561f..979a96d204 100644 --- a/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc +++ b/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc @@ -45,12 +45,12 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{10, 2}, - ShardParallelDim{12, 1}, + ShardParallelDim{10_n, 2_n}, + ShardParallelDim{12_n, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -84,7 +84,7 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_tensor_guid_t input = b.create_input_tensor(input_shape, CreateGrad::YES); b.dense(input, - /*outDim=*/14, + /*outDim=*/14_n, /*activation=*/Activation::RELU, /*use_bias=*/true, /*data_type=*/DataType::FLOAT, @@ -110,12 +110,12 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape tensor_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{10, 2}, - ShardParallelDim{12, 1}, + ShardParallelDim{10_n, 2_n}, + ShardParallelDim{12_n, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -186,12 +186,12 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{10, 2}, - ShardParallelDim{12, 1}, + ShardParallelDim{10_n, 2_n}, + ShardParallelDim{12_n, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -246,7 +246,7 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelComputationGraph pcg = empty_parallel_computation_graph(); LinearAttrs op_attrs = LinearAttrs{ - /*out_channels=*/14, + /*out_channels=*/14_n, /*use_bias=*/false, /*data_type=*/DataType::FLOAT, /*activation=*/Activation::RELU, @@ -293,7 +293,7 @@ TEST_SUITE(FF_TEST_SUITE) { {}, {raw_projection_tensor_attrs}); - ReplicateAttrs replicate_attrs = ReplicateAttrs{/*degree=*/2}; + ReplicateAttrs replicate_attrs = ReplicateAttrs{/*degree=*/2_n}; ParallelLayerAttrs replicate_layer_attrs = ParallelLayerAttrs{ PCGOperatorAttrs{replicate_attrs}, std::nullopt, @@ -346,12 +346,12 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape tensor_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{12, 2}, - ShardParallelDim{10, 1}, + ShardParallelDim{12_n, 2_n}, + ShardParallelDim{10_n, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{2}, - DiscardCopyDegree{2}, + SumDegree{2_n}, + DiscardCopyDegree{2_n}, }, }, DataType::FLOAT, diff --git a/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc b/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc index 3f66b33b6e..ef3173d744 100644 --- a/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc +++ b/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc @@ -26,18 +26,18 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("ParallelComputationGraphBuilder::add") { ParallelComputationGraphBuilder b; - ShardParallelDim d1 = ShardParallelDim{10, 2}; - ShardParallelDim d2 = ShardParallelDim{15, 3}; + ShardParallelDim d1 = ShardParallelDim{10_n, 2_n}; + ShardParallelDim d2 = ShardParallelDim{15_n, 3_n}; ParallelTensorShape lhs_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{10, 2}, - ShardParallelDim{15, 3}, + ShardParallelDim{10_n, 2_n}, + ShardParallelDim{15_n, 3_n}, }, ReplicaParallelDimSet{ - SumDegree{2}, - DiscardCopyDegree{1}, + SumDegree{2_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -76,18 +76,18 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("ParallelComputationGraphBuilder::batch_matmul") { ParallelComputationGraphBuilder b; - ShardParallelDim batch_dim = ShardParallelDim{4, 2}; + ShardParallelDim batch_dim = ShardParallelDim{4_n, 2_n}; ParallelTensorShape a_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ batch_dim, - ShardParallelDim{10, 1}, - ShardParallelDim{15, 3}, + ShardParallelDim{10_n, 1_n}, + ShardParallelDim{15_n, 3_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -97,12 +97,12 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorDims{ FFOrdered{ batch_dim, - ShardParallelDim{15, 3}, - ShardParallelDim{12, 1}, + ShardParallelDim{15_n, 3_n}, + ShardParallelDim{12_n, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -130,7 +130,8 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("op attrs") { PCGOperatorAttrs result = get_parallel_layer_attrs(b.pcg, layer).op_attrs; - PCGOperatorAttrs correct = PCGOperatorAttrs{BatchMatmulAttrs{-1, -1}}; + PCGOperatorAttrs correct = + PCGOperatorAttrs{BatchMatmulAttrs{std::nullopt, std::nullopt}}; CHECK(result == correct); } } @@ -141,12 +142,12 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{10, 2}, - ShardParallelDim{12, 1}, + ShardParallelDim{10_n, 2_n}, + ShardParallelDim{12_n, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{3}, - DiscardCopyDegree{1}, + SumDegree{3_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -179,28 +180,28 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("ParallelComputationGraphBuilder::conv2d") { ParallelComputationGraphBuilder b; - size_t batch_size = 2; + nonnegative_int batch_size = 2_n; TensorShape unpar_input_shape = TensorShape{ - TensorDims{FFOrdered{batch_size, 3, 10, 10}}, + TensorDims{FFOrdered{batch_size, 3_n, 10_n, 10_n}}, DataType::FLOAT, }; - ParallelTensorShape input_shape = - lift_to_parallel_with_degrees(unpar_input_shape, - SumDegree{1}, - DiscardCopyDegree{1}, - FFOrdered{2, 1, 1, 1}); + ParallelTensorShape input_shape = lift_to_parallel_with_degrees( + unpar_input_shape, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, + FFOrdered{2_n, 1_n, 1_n, 1_n}); parallel_tensor_guid_t input = b.create_input_tensor(input_shape); - int outChannels = 6; - int kernelH = 5; - int kernelW = 4; - int strideH = 3; - int strideW = 2; - int paddingH = 1; - int paddingW = 0; + nonnegative_int outChannels = 6_n; + nonnegative_int kernelH = 5_n; + nonnegative_int kernelW = 4_n; + nonnegative_int strideH = 3_n; + nonnegative_int strideW = 2_n; + nonnegative_int paddingH = 1_n; + nonnegative_int paddingW = 0_n; parallel_tensor_guid_t output = b.conv2d(input, /*outChannels=*/outChannels, /*kernelH=*/kernelH, @@ -254,7 +255,7 @@ TEST_SUITE(FF_TEST_SUITE) { strideW, paddingH, paddingW, - /*groups=*/1, + /*groups=*/1_n, /*activation=*/std::nullopt, /*use_bias=*/true, }; @@ -301,18 +302,18 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{10, 2}, - ShardParallelDim{16, 1}, + ShardParallelDim{10_n, 2_n}, + ShardParallelDim{16_n, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, }; - int outDim = 14; + nonnegative_int outDim = 14_n; parallel_tensor_guid_t input = b.create_input_tensor(input_shape); parallel_tensor_guid_t output = b.dense(input, @@ -341,8 +342,8 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("ParallelComputationGraphBuilder::embedding") { ParallelComputationGraphBuilder b; - ShardParallelDim batch_dim = ShardParallelDim{12, 2}; - ShardParallelDim feature_dim = ShardParallelDim{10, 1}; + ShardParallelDim batch_dim = ShardParallelDim{12_n, 2_n}; + ShardParallelDim feature_dim = ShardParallelDim{10_n, 1_n}; ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ @@ -350,8 +351,8 @@ TEST_SUITE(FF_TEST_SUITE) { feature_dim, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::INT32, @@ -359,8 +360,8 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_tensor_guid_t input = b.create_input_tensor(input_shape); parallel_tensor_guid_t output = b.embedding(input, - /*num_entries=*/32, - /*outDim=*/8, + /*num_entries=*/32_n, + /*outDim=*/8_n, AggregateOp::SUM, DataType::FLOAT); parallel_layer_guid_t layer = get_source_layer(output); @@ -384,9 +385,9 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("ParallelComputationGraphBuilder::multihead_attention") { ParallelComputationGraphBuilder b; - ShardParallelDim batch_dim = ShardParallelDim{12, 2}; - ShardParallelDim sequence_dim = ShardParallelDim{16, 1}; - ShardParallelDim feature_dim = ShardParallelDim{10, 1}; + ShardParallelDim batch_dim = ShardParallelDim{12_n, 2_n}; + ShardParallelDim sequence_dim = ShardParallelDim{16_n, 1_n}; + ShardParallelDim feature_dim = ShardParallelDim{10_n, 1_n}; ParallelTensorShape query_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ @@ -395,8 +396,8 @@ TEST_SUITE(FF_TEST_SUITE) { feature_dim, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -405,8 +406,8 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape key_shape = query_shape; ParallelTensorShape value_shape = query_shape; - int embed_dim = 8; - int num_heads = 6; + nonnegative_int embed_dim = 8_n; + nonnegative_int num_heads = 6_n; parallel_tensor_guid_t query = b.create_input_tensor(query_shape); parallel_tensor_guid_t key = b.create_input_tensor(key_shape); @@ -435,8 +436,8 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("ParallelComputationGraphBuilder::relu") { ParallelComputationGraphBuilder b; - ShardParallelDim batch_dim = ShardParallelDim{18, 3}; - ShardParallelDim feature_dim = ShardParallelDim{32, 1}; + ShardParallelDim batch_dim = ShardParallelDim{18_n, 3_n}; + ShardParallelDim feature_dim = ShardParallelDim{32_n, 1_n}; ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ @@ -445,8 +446,8 @@ TEST_SUITE(FF_TEST_SUITE) { feature_dim, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -474,8 +475,8 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("ParallelComputationGraphBuilder::parallel_partition") { ParallelComputationGraphBuilder b; - ShardParallelDim batch_dim = ShardParallelDim{18, 2}; - ShardParallelDim feature_dim = ShardParallelDim{10, 1}; + ShardParallelDim batch_dim = ShardParallelDim{18_n, 2_n}; + ShardParallelDim feature_dim = ShardParallelDim{10_n, 1_n}; ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ @@ -484,8 +485,8 @@ TEST_SUITE(FF_TEST_SUITE) { feature_dim, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -493,7 +494,7 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_tensor_guid_t input = b.create_input_tensor(input_shape); parallel_tensor_guid_t output = - b.parallel_partition(input, ff_dim_t{nonnegative_int{0}}, 2); + b.parallel_partition(input, ff_dim_t{nonnegative_int{0}}, 2_n); parallel_layer_guid_t layer = get_source_layer(output); SUBCASE("incoming") { @@ -514,8 +515,8 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("ParallelComputationGraphBuilder::parallel_combine") { ParallelComputationGraphBuilder b; - ShardParallelDim batch_dim = ShardParallelDim{18, 2}; - ShardParallelDim feature_dim = ShardParallelDim{10, 1}; + ShardParallelDim batch_dim = ShardParallelDim{18_n, 2_n}; + ShardParallelDim feature_dim = ShardParallelDim{10_n, 1_n}; ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ @@ -524,8 +525,8 @@ TEST_SUITE(FF_TEST_SUITE) { feature_dim, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -533,7 +534,7 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_tensor_guid_t input = b.create_input_tensor(input_shape); parallel_tensor_guid_t output = - b.parallel_combine(input, ff_dim_t{nonnegative_int{0}}, 2); + b.parallel_combine(input, ff_dim_t{nonnegative_int{0}}, 2_n); parallel_layer_guid_t layer = get_source_layer(output); SUBCASE("incoming") { @@ -554,8 +555,8 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("ParallelComputationGraphBuilder::parallel_replicate") { ParallelComputationGraphBuilder b; - ShardParallelDim batch_dim = ShardParallelDim{18, 2}; - ShardParallelDim feature_dim = ShardParallelDim{10, 1}; + ShardParallelDim batch_dim = ShardParallelDim{18_n, 2_n}; + ShardParallelDim feature_dim = ShardParallelDim{10_n, 1_n}; ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ @@ -564,15 +565,15 @@ TEST_SUITE(FF_TEST_SUITE) { feature_dim, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, }; parallel_tensor_guid_t input = b.create_input_tensor(input_shape); - parallel_tensor_guid_t output = b.parallel_replicate(input, 2); + parallel_tensor_guid_t output = b.parallel_replicate(input, 2_n); parallel_layer_guid_t layer = get_source_layer(output); SUBCASE("incoming") { @@ -593,8 +594,8 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("ParallelComputationGraphBuilder::parallel_reduce") { ParallelComputationGraphBuilder b; - ShardParallelDim batch_dim = ShardParallelDim{18, 2}; - ShardParallelDim feature_dim = ShardParallelDim{10, 1}; + ShardParallelDim batch_dim = ShardParallelDim{18_n, 2_n}; + ShardParallelDim feature_dim = ShardParallelDim{10_n, 1_n}; ParallelTensorShape input_shape = ParallelTensorShape{ ParallelTensorDims{ @@ -603,15 +604,15 @@ TEST_SUITE(FF_TEST_SUITE) { feature_dim, }, ReplicaParallelDimSet{ - SumDegree{4}, - DiscardCopyDegree{1}, + SumDegree{4_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, }; parallel_tensor_guid_t input = b.create_input_tensor(input_shape); - parallel_tensor_guid_t output = b.parallel_reduce(input, 2); + parallel_tensor_guid_t output = b.parallel_reduce(input, 2_n); parallel_layer_guid_t layer = get_source_layer(output); SUBCASE("incoming") { diff --git a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc index 8383754aa2..71c4d1b1d0 100644 --- a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc @@ -8,15 +8,15 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("StartInvariantMachineView - utility functions") { StartInvariantMachineView simv = StartInvariantMachineView{ - {MachineViewDimension{stride_t{2}, + {MachineViewDimension{stride_t{2_n}, MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{2}, + MachineViewDimension{stride_t{2_n}, MachineSpecificationDimension::INTER_NODE}}, DeviceType::GPU}; SUBCASE("num_dims") { - int result = num_dims(simv); - int correct = 2; + nonnegative_int result = num_dims(simv); + nonnegative_int correct = 2_n; CHECK(result == correct); } @@ -28,7 +28,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("get_strides") { std::vector result = get_strides(simv); - std::vector correct = {stride_t{2}, stride_t{2}}; + std::vector correct = {stride_t{2_n}, stride_t{2_n}}; CHECK(result == correct); } @@ -43,11 +43,11 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("StartInvariantMachineView - conversions") { MachineSpaceCoordinate start = - MachineSpaceCoordinate{1, 2, DeviceType::GPU}; + MachineSpaceCoordinate{1_n, 2_n, DeviceType::GPU}; std::vector dimensions = { - MachineViewDimension{stride_t{2}, + MachineViewDimension{stride_t{2_n}, MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{3}, + MachineViewDimension{stride_t{3_n}, MachineSpecificationDimension::INTRA_NODE}}; MachineView mv = MachineView{start, dimensions}; @@ -94,21 +94,21 @@ TEST_SUITE(FF_TEST_SUITE) { * | (0,) | | (1,) | | (2,) | | * +-------+-------+-------+-------+-------+-------+ */ - OperatorTaskSpace task = OperatorTaskSpace{{3}}; + OperatorTaskSpace task = OperatorTaskSpace{{3_n}}; StartInvariantMachineView simv = StartInvariantMachineView{ - {MachineViewDimension{stride_t{2}, + {MachineViewDimension{stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE}}, DeviceType::GPU}; MachineSpecification ms = - MachineSpecification{/*num_nodes=*/1, - /*num_cpus_per_node=*/6, - /*num_gpus_per_node=*/6, - /*inter_node_bandwidth=*/0, - /*intra_node_bandwidth=*/0}; + MachineSpecification{/*num_nodes=*/1_n, + /*num_cpus_per_node=*/6_n, + /*num_gpus_per_node=*/6_n, + /*inter_node_bandwidth=*/0.0, + /*intra_node_bandwidth=*/0.0}; SUBCASE("get_machine_space_offset") { SUBCASE("Task with TaskSpaceCoordinate = (0,)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n}}; MachineSpaceOffset correct = MachineSpaceOffset{0, 0, DeviceType::GPU}; MachineSpaceOffset result = @@ -117,7 +117,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("Task with TaskSpaceCoordinate = (1,)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n}}; MachineSpaceOffset correct = MachineSpaceOffset{0, 2, DeviceType::GPU}; MachineSpaceOffset result = @@ -126,7 +126,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("Task with TaskSpaceCoordinate = (2,)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{2}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{2_n}}; MachineSpaceOffset correct = MachineSpaceOffset{0, 4, DeviceType::GPU}; MachineSpaceOffset result = @@ -162,23 +162,23 @@ TEST_SUITE(FF_TEST_SUITE) { * +-------+-------+-------+-------+ */ - OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; + OperatorTaskSpace task = OperatorTaskSpace{{2_n, 2_n}}; StartInvariantMachineView simv = StartInvariantMachineView{ - {MachineViewDimension{stride_t{1}, + {MachineViewDimension{stride_t{1_n}, MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{2}, + MachineViewDimension{stride_t{2_n}, MachineSpecificationDimension::INTRA_NODE}}, DeviceType::GPU}; MachineSpecification ms = - MachineSpecification{/*num_nodes=*/2, - /*num_cpus_per_node=*/4, - /*num_gpus_per_node=*/4, + MachineSpecification{/*num_nodes=*/2_n, + /*num_cpus_per_node=*/4_n, + /*num_gpus_per_node=*/4_n, /*inter_node_bandwidth=*/0, /*intra_node_bandwidth=*/0}; SUBCASE("get_machine_space_offset") { SUBCASE("Task with TaskSpaceCoordinate = (0,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 0}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n, 0_n}}; MachineSpaceOffset correct = MachineSpaceOffset{0, 0, DeviceType::GPU}; MachineSpaceOffset result = @@ -187,7 +187,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("Task with TaskSpaceCoordinate = (0,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n, 1_n}}; MachineSpaceOffset correct = MachineSpaceOffset{0, 2, DeviceType::GPU}; MachineSpaceOffset result = @@ -196,7 +196,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("Task with TaskSpaceCoordinate = (1,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 0_n}}; MachineSpaceOffset correct = MachineSpaceOffset{1, 0, DeviceType::GPU}; MachineSpaceOffset result = @@ -205,7 +205,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("Task with TaskSpaceCoordinate = (1,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 1}}; + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 1_n}}; MachineSpaceOffset correct = MachineSpaceOffset{1, 2, DeviceType::GPU}; MachineSpaceOffset result = diff --git a/lib/substitutions/include/substitutions/apply_substitution/apply_substitution.h b/lib/substitutions/include/substitutions/apply_substitution/apply_substitution.h new file mode 100644 index 0000000000..92f7bb1c03 --- /dev/null +++ b/lib/substitutions/include/substitutions/apply_substitution/apply_substitution.h @@ -0,0 +1,31 @@ +#ifndef _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_APPLY_SUBSTITUTION_APPLY_SUBSTITUTION_H +#define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_APPLY_SUBSTITUTION_APPLY_SUBSTITUTION_H + +#include "substitutions/pcg_pattern_match.dtg.h" +#include "substitutions/sub_parallel_computation_graph.dtg.h" +#include "substitutions/substitution.dtg.h" + +namespace FlexFlow { + +/** + * @brief Applies \p substitution to \p sub_pcg at the location specified by \p + * match, returning the resulting SubParallelComputationGraph + * + * @param sub_pcg + * @param substitution + * @param match The location at which to apply substitution. This location in + * sub_pcg should match substitution's PCGPattern. Likely created by running + * FlexFlow::find_pattern_matches(PCGPattern const &, + * SubParallelComputationGraph const &). + * @return SubParallelComputationGraph A sub-PCG similar to sub_pcg, but with + * the subgraph specified by match replaced with the result of the output + * expression of substitution + */ +SubParallelComputationGraph + apply_substitution(SubParallelComputationGraph const &sub_pcg, + Substitution const &substitution, + PCGPatternMatch const &match); + +} // namespace FlexFlow + +#endif diff --git a/lib/substitutions/include/substitutions/substitution_internal/evaluate_substitution_output.h b/lib/substitutions/include/substitutions/apply_substitution/evaluate_substitution_output.h similarity index 76% rename from lib/substitutions/include/substitutions/substitution_internal/evaluate_substitution_output.h rename to lib/substitutions/include/substitutions/apply_substitution/evaluate_substitution_output.h index a0461b075b..74089c5aab 100644 --- a/lib/substitutions/include/substitutions/substitution_internal/evaluate_substitution_output.h +++ b/lib/substitutions/include/substitutions/apply_substitution/evaluate_substitution_output.h @@ -1,10 +1,10 @@ -#ifndef _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_SUBSTITUTION_INTERNAL_EVALUATE_SUBSTITUTION_OUTPUT_H -#define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_SUBSTITUTION_INTERNAL_EVALUATE_SUBSTITUTION_OUTPUT_H +#ifndef _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_APPLY_SUBSTITUTION_EVALUATE_SUBSTITUTION_OUTPUT_H +#define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_APPLY_SUBSTITUTION_EVALUATE_SUBSTITUTION_OUTPUT_H +#include "substitutions/apply_substitution/output_expr_to_result_sub_pcg_mapping.dtg.h" #include "substitutions/pcg_pattern_match.dtg.h" #include "substitutions/sub_parallel_computation_graph.dtg.h" #include "substitutions/substitution.dtg.h" -#include "substitutions/substitution_internal/output_expr_to_result_sub_pcg_mapping.dtg.h" #include namespace FlexFlow { diff --git a/lib/substitutions/include/substitutions/substitution_internal/output_expr_to_result_sub_pcg_mapping.h b/lib/substitutions/include/substitutions/apply_substitution/output_expr_to_result_sub_pcg_mapping.h similarity index 62% rename from lib/substitutions/include/substitutions/substitution_internal/output_expr_to_result_sub_pcg_mapping.h rename to lib/substitutions/include/substitutions/apply_substitution/output_expr_to_result_sub_pcg_mapping.h index 603cb670bf..cd7e782909 100644 --- a/lib/substitutions/include/substitutions/substitution_internal/output_expr_to_result_sub_pcg_mapping.h +++ b/lib/substitutions/include/substitutions/apply_substitution/output_expr_to_result_sub_pcg_mapping.h @@ -1,11 +1,11 @@ -#ifndef _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_SUBSTITUTION_INTERNAL_OUTPUT_EXPR_TO_RESULT_SUB_PCG_MAPPING_H -#define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_SUBSTITUTION_INTERNAL_OUTPUT_EXPR_TO_RESULT_SUB_PCG_MAPPING_H +#ifndef _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_APPLY_SUBSTITUTION_OUTPUT_EXPR_TO_RESULT_SUB_PCG_MAPPING_H +#define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_APPLY_SUBSTITUTION_OUTPUT_EXPR_TO_RESULT_SUB_PCG_MAPPING_H #include "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h" +#include "substitutions/apply_substitution/output_expr_to_result_sub_pcg_mapping.dtg.h" #include "substitutions/output_graph/output_graph_expr.dtg.h" #include "substitutions/output_graph/output_graph_expr_node_output.dtg.h" #include "substitutions/sub_parallel_computation_graph.dtg.h" -#include "substitutions/substitution_internal/output_expr_to_result_sub_pcg_mapping.dtg.h" namespace FlexFlow { diff --git a/lib/substitutions/include/substitutions/substitution_internal/output_expr_to_result_sub_pcg_mapping.struct.toml b/lib/substitutions/include/substitutions/apply_substitution/output_expr_to_result_sub_pcg_mapping.struct.toml similarity index 100% rename from lib/substitutions/include/substitutions/substitution_internal/output_expr_to_result_sub_pcg_mapping.struct.toml rename to lib/substitutions/include/substitutions/apply_substitution/output_expr_to_result_sub_pcg_mapping.struct.toml diff --git a/lib/substitutions/include/substitutions/substitution_internal/perform_shape_inference.h b/lib/substitutions/include/substitutions/apply_substitution/perform_shape_inference.h similarity index 85% rename from lib/substitutions/include/substitutions/substitution_internal/perform_shape_inference.h rename to lib/substitutions/include/substitutions/apply_substitution/perform_shape_inference.h index b7ce13db0e..c3f9eff349 100644 --- a/lib/substitutions/include/substitutions/substitution_internal/perform_shape_inference.h +++ b/lib/substitutions/include/substitutions/apply_substitution/perform_shape_inference.h @@ -1,5 +1,5 @@ -#ifndef _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_SUBSTITUTION_INTERNAL_PERFORM_SHAPE_INFERENCE_H -#define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_SUBSTITUTION_INTERNAL_PERFORM_SHAPE_INFERENCE_H +#ifndef _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_APPLY_SUBSTITUTION_PERFORM_SHAPE_INFERENCE_H +#define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_APPLY_SUBSTITUTION_PERFORM_SHAPE_INFERENCE_H #include "op-attrs/parallel_tensor_shape.dtg.h" #include "pcg/parallel_computation_graph/parallel_layer_attrs.dtg.h" diff --git a/lib/substitutions/include/substitutions/constraint_type.enum.toml b/lib/substitutions/include/substitutions/constraint_type.enum.toml index 8646ba1c83..f366a17725 100644 --- a/lib/substitutions/include/substitutions/constraint_type.enum.toml +++ b/lib/substitutions/include/substitutions/constraint_type.enum.toml @@ -9,3 +9,6 @@ features = [ [[values]] name = "EQUAL" + +[[values]] +name = "DIVISIBLE_BY" diff --git a/lib/substitutions/include/substitutions/operator_pattern/get_attribute_map.h b/lib/substitutions/include/substitutions/operator_pattern/get_attribute_map.h new file mode 100644 index 0000000000..2b31dada04 --- /dev/null +++ b/lib/substitutions/include/substitutions/operator_pattern/get_attribute_map.h @@ -0,0 +1,15 @@ +#ifndef _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_OPERATOR_PATTERN_GET_ATTRIBUTE_MAP_H +#define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_OPERATOR_PATTERN_GET_ATTRIBUTE_MAP_H + +#include "op-attrs/pcg_operator_attrs.dtg.h" +#include "substitutions/operator_pattern/operator_attribute_key.dtg.h" +#include "substitutions/operator_pattern/operator_attribute_value.dtg.h" + +namespace FlexFlow { + +std::unordered_map + get_attribute_map(PCGOperatorAttrs const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_constraint.h b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_constraint.h index 4affdd697f..c2c11fac51 100644 --- a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_constraint.h +++ b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_constraint.h @@ -9,6 +9,8 @@ OperatorAttributeConstraint op_type_equals_constraint(OperatorType); OperatorAttributeConstraint op_attr_key_equals(OperatorAttributeKey, OperatorAttributeValue const &); +OperatorAttributeConstraint + op_attr_key_divisible_by(OperatorAttributeKey, nonnegative_int denominator); OperatorAttributeConstraint make_equals_constraint(OperatorAttributeExpr const &, OperatorAttributeValue const &); diff --git a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_key.enum.toml b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_key.enum.toml index eb758ea4fc..af3666d46f 100644 --- a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_key.enum.toml +++ b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_key.enum.toml @@ -56,6 +56,7 @@ values = [ { name = "SHOULD_BROADCAST_RHS" }, { name = "DIM" }, { name = "AFFINE" }, + { name = "ELEMENTWISE_AFFINE" }, { name = "MOMENTUM" }, { name = "REGULARIZER" }, { name = "SHAPE" }, diff --git a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_key.h b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_key.h new file mode 100644 index 0000000000..d46403a847 --- /dev/null +++ b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_key.h @@ -0,0 +1,12 @@ +#ifndef _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_OPERATOR_PATTERN_OPERATOR_ATTRIBUTE_KEY_H +#define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_OPERATOR_PATTERN_OPERATOR_ATTRIBUTE_KEY_H + +#include "substitutions/operator_pattern/operator_attribute_key.dtg.h" + +namespace FlexFlow { + +std::vector all_operator_attribute_keys(); + +} // namespace FlexFlow + +#endif diff --git a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_list_access.struct.toml b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_list_access.struct.toml index bceff393d2..4ed226907e 100644 --- a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_list_access.struct.toml +++ b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_list_access.struct.toml @@ -10,7 +10,8 @@ features = [ ] includes = [ - "substitutions/operator_pattern/operator_attribute_key.dtg.h" + "substitutions/operator_pattern/operator_attribute_key.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] @@ -19,4 +20,4 @@ type = "::FlexFlow::OperatorAttributeKey" [[fields]] name = "index" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_value.variant.toml b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_value.variant.toml index 8fe4a9494d..3312b292a0 100644 --- a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_value.variant.toml +++ b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_value.variant.toml @@ -7,7 +7,6 @@ features = [ "fmt", "json", ] -explicit_constructors = false includes = [ "", @@ -21,6 +20,7 @@ includes = [ "op-attrs/tensor_shape.dtg.h", "op-attrs/datatype.dtg.h", "", + "utils/nonnegative_int/nonnegative_int.h", ] src_includes = [ @@ -31,7 +31,7 @@ src_includes = [ ] [[values]] -type = "int" +type = "::FlexFlow::nonnegative_int" [[values]] type = "bool" @@ -40,7 +40,10 @@ type = "bool" type = "float" [[values]] -type = "std::vector" +type = "std::optional" + +[[values]] +type = "std::vector<::FlexFlow::nonnegative_int>" [[values]] type = "std::vector<::FlexFlow::ff_dim_t>" @@ -55,10 +58,7 @@ type = "std::optional<::FlexFlow::Activation>" type = "::FlexFlow::ff_dim_t" [[values]] -type = "size_t" - -[[values]] -type = "::FlexFlow::AggregateOp" +type = "std::optional<::FlexFlow::AggregateOp>" [[values]] type = "std::optional<::FlexFlow::RegularizerAttrs>" diff --git a/lib/substitutions/include/substitutions/output_graph/output_graph_expr.h b/lib/substitutions/include/substitutions/output_graph/output_graph_expr.h index e550767292..8c047fc44d 100644 --- a/lib/substitutions/include/substitutions/output_graph/output_graph_expr.h +++ b/lib/substitutions/include/substitutions/output_graph/output_graph_expr.h @@ -2,14 +2,19 @@ #define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_OUTPUT_GRAPH_OUTPUT_GRAPH_EXPR_H #include "substitutions/output_graph/output_graph_expr.dtg.h" +#include "substitutions/output_graph/output_graph_expr_input.dtg.h" #include "substitutions/output_graph/output_graph_expr_node.dtg.h" #include "substitutions/output_graph/output_graph_expr_node_output.dtg.h" namespace FlexFlow { +std::unordered_set get_nodes(OutputGraphExpr const &); + std::vector get_node_outputs(OutputGraphExpr const &, OutputGraphExprNode const &); +std::unordered_set get_inputs(OutputGraphExpr const &); + } // namespace FlexFlow #endif diff --git a/lib/substitutions/include/substitutions/output_graph/output_graph_expr_value.h b/lib/substitutions/include/substitutions/output_graph/output_graph_expr_value.h new file mode 100644 index 0000000000..e172edb025 --- /dev/null +++ b/lib/substitutions/include/substitutions/output_graph/output_graph_expr_value.h @@ -0,0 +1,16 @@ +#ifndef _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_OUTPUT_GRAPH_OUTPUT_GRAPH_EXPR_VALUE_H +#define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_OUTPUT_GRAPH_OUTPUT_GRAPH_EXPR_VALUE_H + +#include "substitutions/output_graph/output_graph_expr_value.dtg.h" +#include "utils/graph/open_dataflow_graph/open_dataflow_value.dtg.h" + +namespace FlexFlow { + +OpenDataflowValue raw_open_dataflow_value_from_output_graph_expr_value( + OutputGraphExprValue const &); +OutputGraphExprValue output_graph_expr_value_from_raw_open_dataflow_value( + OpenDataflowValue const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/substitutions/include/substitutions/output_graph/output_graph_expr_value.variant.toml b/lib/substitutions/include/substitutions/output_graph/output_graph_expr_value.variant.toml new file mode 100644 index 0000000000..641250e1f0 --- /dev/null +++ b/lib/substitutions/include/substitutions/output_graph/output_graph_expr_value.variant.toml @@ -0,0 +1,19 @@ +namespace = "FlexFlow" +name = "OutputGraphExprValue" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "substitutions/output_graph/output_graph_expr_input.dtg.h", + "substitutions/output_graph/output_graph_expr_node_output.dtg.h", +] + +[[values]] +type = "::FlexFlow::OutputGraphExprNodeOutput" + +[[values]] +type = "::FlexFlow::OutputGraphExprInput" diff --git a/lib/substitutions/include/substitutions/output_graph/output_operator_attrs_assignment.h b/lib/substitutions/include/substitutions/output_graph/output_operator_attrs_assignment.h index 60540c0711..0921569d62 100644 --- a/lib/substitutions/include/substitutions/output_graph/output_operator_attrs_assignment.h +++ b/lib/substitutions/include/substitutions/output_graph/output_operator_attrs_assignment.h @@ -20,6 +20,9 @@ std::pair set_attr_to_constant(OperatorAttributeKey key, OperatorAttributeValue const &value); +std::pair + set_op_type_attr(OperatorType); + } // namespace FlexFlow #endif diff --git a/lib/substitutions/include/substitutions/output_graph/output_operator_attrs_assignment.struct.toml b/lib/substitutions/include/substitutions/output_graph/output_operator_attrs_assignment.struct.toml index d712ea96f7..483f27791a 100644 --- a/lib/substitutions/include/substitutions/output_graph/output_operator_attrs_assignment.struct.toml +++ b/lib/substitutions/include/substitutions/output_graph/output_operator_attrs_assignment.struct.toml @@ -18,11 +18,12 @@ includes = [ src_includes = [ "utils/hash/unordered_map.h", "utils/fmt/unordered_map.h", + "utils/fmt/optional.h", ] -# [[fields]] -# name = "clone_operator" -# type = "std::optional" +[[fields]] +name = "template_operator" +type = "std::optional<::FlexFlow::PatternNode>" # NOTE(@wmdi): Not sure if it aligns with other design. Or alternatively we can # define the assignment for each operator type. diff --git a/lib/substitutions/include/substitutions/pcg_pattern.h b/lib/substitutions/include/substitutions/pcg_pattern.h index 7342e8169f..f0962b15c2 100644 --- a/lib/substitutions/include/substitutions/pcg_pattern.h +++ b/lib/substitutions/include/substitutions/pcg_pattern.h @@ -10,6 +10,8 @@ namespace FlexFlow { +std::unordered_set get_nodes(PCGPattern const &); + /** * @brief Find all locations in \p pcg that match \p pattern */ diff --git a/lib/substitutions/include/substitutions/pcg_pattern_match.h b/lib/substitutions/include/substitutions/pcg_pattern_match.h index 388377d70c..b946173422 100644 --- a/lib/substitutions/include/substitutions/pcg_pattern_match.h +++ b/lib/substitutions/include/substitutions/pcg_pattern_match.h @@ -6,7 +6,7 @@ #include "substitutions/pcg_pattern_match.dtg.h" #include "substitutions/sub_parallel_computation_graph.dtg.h" #include "substitutions/unlabelled/pattern_node_output.dtg.h" -#include "substitutions/unlabelled/unlabelled_dataflow_graph_pattern_match.dtg.h" +#include "substitutions/unlabelled/unlabelled_dataflow_graph_pattern_match.h" namespace FlexFlow { @@ -17,7 +17,7 @@ bidict SubParallelComputationGraph const &spcg); UnlabelledDataflowGraphPatternMatch - get_unlabelled_pattern_match(PCGPatternMatch const &); + get_unlabelled_pattern_match(PCGPatternMatch const &match); } // namespace FlexFlow diff --git a/lib/substitutions/include/substitutions/sub_parallel_computation_graph_edge.h b/lib/substitutions/include/substitutions/sub_parallel_computation_graph_edge.h index 15cbb6127c..c0544abe1b 100644 --- a/lib/substitutions/include/substitutions/sub_parallel_computation_graph_edge.h +++ b/lib/substitutions/include/substitutions/sub_parallel_computation_graph_edge.h @@ -12,7 +12,7 @@ namespace FlexFlow { SubParallelComputationGraphEdge subpcg_edge_from_tensor_and_dst(parallel_tensor_guid_t const &tensor, parallel_layer_guid_t const &layer, - int input_idx); + nonnegative_int input_idx); SubParallelComputationGraphEdge subpcg_edge_from_tensor_and_use(open_parallel_tensor_guid_t const &tensor, parallel_tensor_use_t const &use); diff --git a/lib/substitutions/include/substitutions/substitution.h b/lib/substitutions/include/substitutions/substitution.h index 7b4e5e6912..7dc4e714ab 100644 --- a/lib/substitutions/include/substitutions/substitution.h +++ b/lib/substitutions/include/substitutions/substitution.h @@ -1,12 +1,14 @@ #ifndef _FLEXFLOW_SUBSTITUTIONS_SUBSTITUTION_H #define _FLEXFLOW_SUBSTITUTIONS_SUBSTITUTION_H -#include "substitutions/pcg_pattern_match.dtg.h" -#include "substitutions/sub_parallel_computation_graph.dtg.h" #include "substitutions/substitution.dtg.h" namespace FlexFlow { +bool is_isomorphic_to(Substitution const &, Substitution const &); + +std::string as_dot(Substitution const &); + /** * @brief Checks that all internal invariants of the given substitution hold * @@ -22,25 +24,6 @@ namespace FlexFlow { */ bool is_valid_substitution(Substitution const &); -/** - * @brief Applies \p substitution to \p sub_pcg at the location specified by \p - * match, returning the resulting SubParallelComputationGraph - * - * @param sub_pcg - * @param substitution - * @param match The location at which to apply substitution. This location in - * sub_pcg should match substitution's PCGPattern. Likely created by running - * FlexFlow::find_pattern_matches(PCGPattern const &, - * SubParallelComputationGraph const &). - * @return SubParallelComputationGraph A sub-PCG similar to sub_pcg, but with - * the subgraph specified by match replaced with the result of the output - * expression of substitution - */ -SubParallelComputationGraph - apply_substitution(SubParallelComputationGraph const &sub_pcg, - Substitution const &substitution, - PCGPatternMatch const &match); - } // namespace FlexFlow #endif diff --git a/lib/substitutions/include/substitutions/substitution_builder.h b/lib/substitutions/include/substitutions/substitution_builder.h new file mode 100644 index 0000000000..1548b2269b --- /dev/null +++ b/lib/substitutions/include/substitutions/substitution_builder.h @@ -0,0 +1,49 @@ +#ifndef _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_SUBSTITUTION_BUILDER_H +#define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_SUBSTITUTION_BUILDER_H + +#include "substitutions/output_graph/output_graph_expr_value.dtg.h" +#include "substitutions/substitution.dtg.h" +#include "substitutions/unlabelled/pattern_value.dtg.h" +#include + +namespace FlexFlow { + +struct SubstitutionBuilder { +public: + SubstitutionBuilder(); + + std::pair + add_input(TensorAttributePattern const &, + std::optional const &name = std::nullopt); + void equate_outputs(PatternValue const &, OutputGraphExprValue const &); + + std::vector add_pattern_node( + OperatorAttributePattern const &node_pattern, + std::vector const &inputs, + std::vector const &output_patterns, + std::optional const &name = std::nullopt); + + std::vector + add_output_graph_node(OutputOperatorAttrsAssignment const &node_expr, + std::vector const &inputs, + nonnegative_int num_outputs); + + PatternNode pattern_node_named(std::string const &) const; + PatternInput pattern_input_named(std::string const &) const; + + Substitution get_substitution() const; + +private: + LabelledOpenDataflowGraph + pattern_g; + LabelledOpenDataflowGraph + output_g; + bidict input_mapping; + bidict pattern_node_names; + bidict pattern_input_names; + bidict output_mapping; +}; + +} // namespace FlexFlow + +#endif diff --git a/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_list_access.struct.toml b/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_list_access.struct.toml index a57dd25845..71e11a12d5 100644 --- a/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_list_access.struct.toml +++ b/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_list_access.struct.toml @@ -10,7 +10,8 @@ features = [ ] includes = [ - "substitutions/tensor_pattern/tensor_attribute_key.dtg.h" + "substitutions/tensor_pattern/tensor_attribute_key.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] @@ -19,4 +20,4 @@ type = "::FlexFlow::TensorAttributeKey" [[fields]] name = "index" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_pattern.h b/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_pattern.h index 5b7ebf4ef8..c1e28f8d8f 100644 --- a/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_pattern.h +++ b/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_pattern.h @@ -2,10 +2,13 @@ #define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_TENSOR_PATTERN_TENSOR_ATTRIBUTE_PATTERN_H #include "substitutions/tensor_pattern/tensor_attribute_pattern.dtg.h" +#include "utils/nonnegative_int/nonnegative_int.h" namespace FlexFlow { TensorAttributePattern tensor_attribute_pattern_match_all(); +TensorAttributePattern + tensor_attr_pattern_require_num_dims(nonnegative_int num_dims); } // namespace FlexFlow diff --git a/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_value.variant.toml b/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_value.variant.toml index 46b703a7fc..d2b931fb2d 100644 --- a/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_value.variant.toml +++ b/lib/substitutions/include/substitutions/tensor_pattern/tensor_attribute_value.variant.toml @@ -12,10 +12,11 @@ includes = [ "", "utils/hash/vector.h", "utils/fmt/vector.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[values]] -type = "size_t" +type = "::FlexFlow::nonnegative_int" [[values]] -type = "std::vector" +type = "std::vector<::FlexFlow::nonnegative_int>" diff --git a/lib/substitutions/include/substitutions/unity_substitution_set.h b/lib/substitutions/include/substitutions/unity_substitution_set.h new file mode 100644 index 0000000000..183f76ac8a --- /dev/null +++ b/lib/substitutions/include/substitutions/unity_substitution_set.h @@ -0,0 +1,47 @@ +#ifndef _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_UNITY_SUBSTITUTION_SET_H +#define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_UNITY_SUBSTITUTION_SET_H + +#include "pcg/machine_specification.dtg.h" +#include "substitutions/substitution.dtg.h" +#include "utils/fmt/vector.h" + +namespace FlexFlow { + +std::vector + get_substitution_set(MachineSpecification const &resources); + +Substitution create_combine_inception(nonnegative_int num_convs, + nonnegative_int num_dims, + nonnegative_int degree); +Substitution create_combine_concat(nonnegative_int num_inputs, + nonnegative_int num_dims, + nonnegative_int degree); +Substitution create_replicate_linear_combine(nonnegative_int num_dims, + nonnegative_int degree, + bool use_bias); +Substitution create_partition_linear_combine(nonnegative_int num_dims, + nonnegative_int degree, + Activation activation, + bool use_bias); +Substitution create_partition_conv2d_combine(nonnegative_int num_dims, + nonnegative_int degree); +Substitution create_partition_attention_combine(nonnegative_int num_heads, + nonnegative_int degree); +Substitution create_replicate_attention_reduce(nonnegative_int num_heads, + nonnegative_int degree); +Substitution create_partition_add_combine(ff_dim_t parallel_dim, + nonnegative_int degree); +Substitution create_partition_relu_combine(ff_dim_t parallel_dim, + nonnegative_int degree); +Substitution create_partition_concat_combine(nonnegative_int num_inputs, + ff_dim_t concat_dim, + ff_dim_t parallel_dim, + nonnegative_int degree); +Substitution create_partition_softmax_combine(ff_dim_t softmax_dim, + ff_dim_t partition_dim, + nonnegative_int degree); +Substitution create_fuse_linear_activation(Activation activation); + +} // namespace FlexFlow + +#endif diff --git a/lib/substitutions/include/substitutions/unlabelled/input_pattern_edge.h b/lib/substitutions/include/substitutions/unlabelled/input_pattern_edge.h index 7a7c9c3c28..8c58cb991c 100644 --- a/lib/substitutions/include/substitutions/unlabelled/input_pattern_edge.h +++ b/lib/substitutions/include/substitutions/unlabelled/input_pattern_edge.h @@ -9,7 +9,7 @@ namespace FlexFlow { PatternInput get_src_input(InputPatternEdge const &); PatternNode get_dst_node(InputPatternEdge const &); -int get_dst_idx(InputPatternEdge const &); +nonnegative_int get_dst_idx(InputPatternEdge const &); } // namespace FlexFlow diff --git a/lib/substitutions/include/substitutions/unlabelled/pattern_matching.h b/lib/substitutions/include/substitutions/unlabelled/pattern_matching.h index 14c0b9ddcc..ce30b18f55 100644 --- a/lib/substitutions/include/substitutions/unlabelled/pattern_matching.h +++ b/lib/substitutions/include/substitutions/unlabelled/pattern_matching.h @@ -9,13 +9,10 @@ namespace FlexFlow { -// OpenDataflowGraphView apply_match(UnlabelledGraphPattern const &pattern, -// UnlabelledDataflowGraphPatternMatch const -// &match); - OpenDataflowSubgraphResult subgraph_matched(OpenDataflowGraphView const &graph, UnlabelledDataflowGraphPatternMatch const &match); + bool pattern_matches_subgraph_under( UnlabelledGraphPattern const &pattern, OpenDataflowGraphView const &subgraph, @@ -30,11 +27,6 @@ bool unlabelled_pattern_does_match( UnlabelledDataflowGraphPatternMatch const &match, MatchAdditionalCriterion const &additional_criterion); -std::vector - find_pattern_matches(UnlabelledGraphPattern const &pattern, - OpenDataflowGraphView const &graph, - MatchAdditionalCriterion const &additional_criterion); - } // namespace FlexFlow #endif diff --git a/lib/substitutions/include/substitutions/unlabelled/pattern_node_output.h b/lib/substitutions/include/substitutions/unlabelled/pattern_node_output.h index 3dd5b262c9..67f513b8b1 100644 --- a/lib/substitutions/include/substitutions/unlabelled/pattern_node_output.h +++ b/lib/substitutions/include/substitutions/unlabelled/pattern_node_output.h @@ -6,7 +6,7 @@ namespace FlexFlow { PatternNode get_src_node(PatternNodeOutput const &); -int get_idx(PatternNodeOutput const &); +nonnegative_int get_idx(PatternNodeOutput const &); } // namespace FlexFlow diff --git a/lib/substitutions/include/substitutions/unlabelled/standard_pattern_edge.h b/lib/substitutions/include/substitutions/unlabelled/standard_pattern_edge.h index 7316098fb5..817e829709 100644 --- a/lib/substitutions/include/substitutions/unlabelled/standard_pattern_edge.h +++ b/lib/substitutions/include/substitutions/unlabelled/standard_pattern_edge.h @@ -8,8 +8,8 @@ namespace FlexFlow { PatternNode get_src_node(StandardPatternEdge const &); PatternNode get_dst_node(StandardPatternEdge const &); -int get_src_idx(StandardPatternEdge const &); -int get_dst_idx(StandardPatternEdge const &); +nonnegative_int get_src_idx(StandardPatternEdge const &); +nonnegative_int get_dst_idx(StandardPatternEdge const &); } // namespace FlexFlow diff --git a/lib/substitutions/src/substitutions/apply_substitution/apply_substitution.cc b/lib/substitutions/src/substitutions/apply_substitution/apply_substitution.cc new file mode 100644 index 0000000000..61bfe15d7b --- /dev/null +++ b/lib/substitutions/src/substitutions/apply_substitution/apply_substitution.cc @@ -0,0 +1,165 @@ +#include "substitutions/apply_substitution/apply_substitution.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.h" +#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.h" +#include "substitutions/apply_substitution/evaluate_substitution_output.h" +#include "substitutions/apply_substitution/output_expr_to_result_sub_pcg_mapping.h" +#include "substitutions/open_parallel_tensor_guid_t.h" +#include "substitutions/pcg_pattern_match.h" +#include "substitutions/sub_parallel_computation_graph.h" +#include "substitutions/sub_parallel_computation_graph_data.dtg.h" +#include "substitutions/sub_parallel_computation_graph_edge.h" +#include "utils/containers/keys.h" +#include "utils/containers/merge_maps.h" +#include "utils/containers/restrict_keys.h" +#include "utils/containers/set_minus.h" +#include "utils/containers/values.h" + +namespace FlexFlow { + +SubParallelComputationGraph + apply_substitution(SubParallelComputationGraph const &spcg, + Substitution const &sub, + PCGPatternMatch const &match) { + auto substitution_output_result = + evaluate_substitution_output(spcg, sub, match); + SubParallelComputationGraph substitution_output_graph = + substitution_output_result.first; + OutputExprToResultSubPCGMapping output_expr_to_result_sub_pcg_mapping = + substitution_output_result.second; + + SubParallelComputationGraphData output_graph_data = + get_sub_pcg_data(substitution_output_graph); + SubParallelComputationGraphData pre_data = get_sub_pcg_data(spcg); + + std::unordered_set pre_nodes = + keys(pre_data.node_data); + std::unordered_set matched_nodes = + unordered_set_of(values(match.node_assignment)); + std::unordered_set post_nodes_from_original_graph = + set_minus(pre_nodes, matched_nodes); + + std::unordered_map post_node_data = + [&] { + std::unordered_map + post_node_data_from_orig = restrict_keys( + pre_data.node_data, post_nodes_from_original_graph); + std::unordered_map + post_node_data_from_sub = output_graph_data.node_data; + + return merge_disjoint_maps(post_node_data_from_orig, + post_node_data_from_sub); + }(); + + std::unordered_set post_edges = [&] { + std::unordered_set post_edges_from_orig = + filter(pre_data.edges, [&](SubParallelComputationGraphEdge const &e) { + if (e.raw_edge.has()) { + return true; + } else { + DataflowEdge dfe = e.raw_edge.get(); + parallel_layer_guid_t src = parallel_layer_guid_t{dfe.src.node}; + parallel_layer_guid_t dst = parallel_layer_guid_t{dfe.dst.node}; + return !(contains(matched_nodes, src) || + contains(matched_nodes, dst)); + } + }); + + std::unordered_set post_edges_from_sub = + filter(output_graph_data.edges, + [&](SubParallelComputationGraphEdge const &e) { + return !e.raw_edge.has(); + }); + + bidict + output_orig_pattern_mapping = get_output_mapping_for_pcg_pattern_match( + match, sub.pcg_pattern, spcg); + bidict + output_post_outexpr_mapping = get_output_graph_expr_output_mapping( + output_expr_to_result_sub_pcg_mapping, + sub.output_graph_expr, + substitution_output_graph); + + std::unordered_set incoming_to_sub_edges; + for (auto const &[pattern_input, base_graph_tensor] : + match.input_assignment) { + OutputGraphExprInput output_expr_input = + sub.inputs_mapping.at_l(pattern_input); + input_parallel_tensor_guid_t output_graph_input = + output_expr_to_result_sub_pcg_mapping.input_mapping.at_r( + output_expr_input); + std::unordered_set uses = get_parallel_tensor_uses( + substitution_output_graph, + open_parallel_tensor_guid_from_input(output_graph_input)); + for (parallel_tensor_use_t const &use : uses) { + SubParallelComputationGraphEdge new_edge = + subpcg_edge_from_tensor_and_use(base_graph_tensor, use); + incoming_to_sub_edges.insert(new_edge); + } + } + + std::unordered_set outgoing_from_sub_edges; + for (ParallelComputationGraphEdge const &outgoing_edge : + get_subgraph_outgoing_edges(spcg, matched_nodes)) { + parallel_tensor_guid_t original_tensor = + get_parallel_tensor(outgoing_edge); + PatternNodeOutput pattern_tensor = + output_orig_pattern_mapping.at_r(original_tensor); + OutputGraphExprNodeOutput output_graph_tensor = + sub.outputs_mapping.at_l(pattern_tensor); + parallel_tensor_guid_t new_tensor = + output_post_outexpr_mapping.at_r(output_graph_tensor); + + SubParallelComputationGraphEdge new_edge = + subpcg_edge_from_tensor_and_dst( + new_tensor, + get_dst_layer(outgoing_edge), + get_dst_layer_input_idx(outgoing_edge)); + outgoing_from_sub_edges.insert(new_edge); + } + + return set_union(std::vector{ + post_edges_from_orig, + post_edges_from_sub, + incoming_to_sub_edges, + outgoing_from_sub_edges, + }); + }(); + + std::unordered_set post_inputs = + pre_data.inputs; + + std::unordered_map + post_value_data = [&] { + std::unordered_map + post_value_data_from_orig = filter_keys( + pre_data.value_data, [&](open_parallel_tensor_guid_t const &t) { + return visit_open_parallel_tensor_guid( + t, + overload{ + [&](parallel_tensor_guid_t const &t) { + return contains(post_nodes_from_original_graph, + get_source_layer(t)); + }, + [](input_parallel_tensor_guid_t const &) { + return true; + }, + }); + }); + + std::unordered_map + post_value_data_from_sub = output_graph_data.value_data; + return merge_disjoint_maps(post_value_data_from_orig, + post_value_data_from_sub); + }(); + + SubParallelComputationGraphData post_data = SubParallelComputationGraphData{ + post_node_data, + post_edges, + post_inputs, + post_value_data, + }; + + return sub_pcg_from_graph_data(post_data); +} + +} // namespace FlexFlow diff --git a/lib/substitutions/src/substitutions/substitution_internal/evaluate_substitution_output.cc b/lib/substitutions/src/substitutions/apply_substitution/evaluate_substitution_output.cc similarity index 96% rename from lib/substitutions/src/substitutions/substitution_internal/evaluate_substitution_output.cc rename to lib/substitutions/src/substitutions/apply_substitution/evaluate_substitution_output.cc index 186e2fc03a..a921201c3a 100644 --- a/lib/substitutions/src/substitutions/substitution_internal/evaluate_substitution_output.cc +++ b/lib/substitutions/src/substitutions/apply_substitution/evaluate_substitution_output.cc @@ -1,7 +1,7 @@ -#include "substitutions/substitution_internal/evaluate_substitution_output.h" +#include "substitutions/apply_substitution/evaluate_substitution_output.h" +#include "substitutions/apply_substitution/perform_shape_inference.h" #include "substitutions/output_graph/output_operator_attrs_assignment.h" #include "substitutions/sub_parallel_computation_graph.h" -#include "substitutions/substitution_internal/perform_shape_inference.h" #include "utils/containers/map_keys.h" #include "utils/containers/map_values.h" #include "utils/graph/labelled_open_dataflow_graph/algorithms/permute_input_ids.h" diff --git a/lib/substitutions/src/substitutions/substitution_internal/output_expr_to_result_sub_pcg_mapping.cc b/lib/substitutions/src/substitutions/apply_substitution/output_expr_to_result_sub_pcg_mapping.cc similarity index 93% rename from lib/substitutions/src/substitutions/substitution_internal/output_expr_to_result_sub_pcg_mapping.cc rename to lib/substitutions/src/substitutions/apply_substitution/output_expr_to_result_sub_pcg_mapping.cc index 22e6a9f333..a5fc9a2e06 100644 --- a/lib/substitutions/src/substitutions/substitution_internal/output_expr_to_result_sub_pcg_mapping.cc +++ b/lib/substitutions/src/substitutions/apply_substitution/output_expr_to_result_sub_pcg_mapping.cc @@ -1,4 +1,4 @@ -#include "substitutions/substitution_internal/output_expr_to_result_sub_pcg_mapping.h" +#include "substitutions/apply_substitution/output_expr_to_result_sub_pcg_mapping.h" #include "substitutions/output_graph/output_graph_expr.h" #include "substitutions/sub_parallel_computation_graph.h" #include "utils/bidict/algorithms/bidict_from_keys_and_values.h" diff --git a/lib/substitutions/src/substitutions/substitution_internal/perform_shape_inference.cc b/lib/substitutions/src/substitutions/apply_substitution/perform_shape_inference.cc similarity index 95% rename from lib/substitutions/src/substitutions/substitution_internal/perform_shape_inference.cc rename to lib/substitutions/src/substitutions/apply_substitution/perform_shape_inference.cc index 9fa91d75b7..f49c7e0a3e 100644 --- a/lib/substitutions/src/substitutions/substitution_internal/perform_shape_inference.cc +++ b/lib/substitutions/src/substitutions/apply_substitution/perform_shape_inference.cc @@ -1,4 +1,4 @@ -#include "substitutions/substitution_internal/perform_shape_inference.h" +#include "substitutions/apply_substitution/perform_shape_inference.h" #include "op-attrs/get_output_shapes.h" #include "utils/containers/map_keys.h" #include "utils/containers/transform.h" diff --git a/lib/substitutions/src/substitutions/operator_pattern/eval_list_access.cc b/lib/substitutions/src/substitutions/operator_pattern/eval_list_access.cc index 53973dc1cb..6f41772a9e 100644 --- a/lib/substitutions/src/substitutions/operator_pattern/eval_list_access.cc +++ b/lib/substitutions/src/substitutions/operator_pattern/eval_list_access.cc @@ -1,5 +1,8 @@ #include "substitutions/operator_pattern/eval_list_access.h" #include "substitutions/operator_pattern/get_attribute.h" +#include "utils/containers/at_idx.h" +#include "utils/containers/make.h" +#include "utils/containers/transform.h" #include "utils/overload.h" namespace FlexFlow { @@ -18,20 +21,12 @@ std::optional [&](auto const &v) -> std::optional { using T = std::decay_t; - if constexpr (std::is_same_v>) { - if (acc.index >= v.size()) { - return std::nullopt; - } else { - int value = v.at(acc.index); - return OperatorAttributeValue{value}; - } + if constexpr (std::is_same_v>) { + return transform(at_idx(v, acc.index), + make()); } else if constexpr (std::is_same_v>) { - if (acc.index >= v.size()) { - return std::nullopt; - } else { - ff_dim_t value = v.at(acc.index); - return OperatorAttributeValue{value}; - } + return transform(at_idx(v, acc.index), + make()); } else { throw mk_runtime_error("Invalid operand"); } diff --git a/lib/substitutions/src/substitutions/operator_pattern/eval_list_size.cc b/lib/substitutions/src/substitutions/operator_pattern/eval_list_size.cc index a3ae9c84d1..fb0fd7f47b 100644 --- a/lib/substitutions/src/substitutions/operator_pattern/eval_list_size.cc +++ b/lib/substitutions/src/substitutions/operator_pattern/eval_list_size.cc @@ -1,5 +1,6 @@ #include "substitutions/operator_pattern/eval_list_size.h" #include "substitutions/operator_pattern/get_attribute.h" +#include "utils/nonnegative_int/num_elements.h" #include "utils/overload.h" namespace FlexFlow { @@ -18,9 +19,9 @@ std::optional [&](auto const &v) -> std::optional { using T = std::decay_t; - if constexpr (std::is_same_v> || + if constexpr (std::is_same_v> || std::is_same_v>) { - size_t size = v.size(); + nonnegative_int size = num_elements(v); return OperatorAttributeValue{size}; } else { throw mk_runtime_error("Invalid operand"); diff --git a/lib/substitutions/src/substitutions/operator_pattern/get_attribute.cc b/lib/substitutions/src/substitutions/operator_pattern/get_attribute.cc index 442d3345a1..cb733e16ff 100644 --- a/lib/substitutions/src/substitutions/operator_pattern/get_attribute.cc +++ b/lib/substitutions/src/substitutions/operator_pattern/get_attribute.cc @@ -8,7 +8,7 @@ std::optional get_attribute(BatchMatmulAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; default: return std::nullopt; } @@ -18,13 +18,13 @@ std::optional get_attribute(BatchNormAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::EPSILON: - return p.eps; + return OperatorAttributeValue{p.eps}; case OperatorAttributeKey::AFFINE: - return p.affine; + return OperatorAttributeValue{p.affine}; case OperatorAttributeKey::MOMENTUM: - return p.momentum; + return OperatorAttributeValue{p.momentum}; default: return std::nullopt; } @@ -34,9 +34,9 @@ std::optional get_attribute(BroadcastAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::TARGET_DIMS: - return p.target_dims; + return OperatorAttributeValue{p.target_dims}; default: return std::nullopt; } @@ -46,9 +46,9 @@ std::optional get_attribute(CastAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::DATA_TYPE: - return p.dtype; + return OperatorAttributeValue{p.dtype}; default: return std::nullopt; } @@ -58,11 +58,11 @@ std::optional get_attribute(CombineAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::PARALLEL_OP_DIM: - return p.combine_dim; + return OperatorAttributeValue{p.combine_dim}; case OperatorAttributeKey::PARALLEL_DIM: - return p.combine_degree; + return OperatorAttributeValue{p.combine_degree}; default: return std::nullopt; } @@ -72,9 +72,9 @@ std::optional get_attribute(ConcatAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::AXIS: - return p.axis; + return OperatorAttributeValue{p.axis}; default: return std::nullopt; } @@ -84,25 +84,25 @@ std::optional get_attribute(Conv2DAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::KERNEL_H: - return p.kernel_h; + return OperatorAttributeValue{p.kernel_h}; case OperatorAttributeKey::KERNEL_W: - return p.kernel_w; + return OperatorAttributeValue{p.kernel_w}; case OperatorAttributeKey::STRIDE_H: - return p.stride_h; + return OperatorAttributeValue{p.stride_h}; case OperatorAttributeKey::STRIDE_W: - return p.stride_w; + return OperatorAttributeValue{p.stride_w}; case OperatorAttributeKey::PADDING_H: - return p.padding_h; + return OperatorAttributeValue{p.padding_h}; case OperatorAttributeKey::PADDING_W: - return p.padding_w; + return OperatorAttributeValue{p.padding_w}; case OperatorAttributeKey::GROUPS: - return p.groups; + return OperatorAttributeValue{p.groups}; case OperatorAttributeKey::ACTIVATION: - return p.activation; + return OperatorAttributeValue{p.activation}; case OperatorAttributeKey::USE_BIAS: - return p.use_bias; + return OperatorAttributeValue{p.use_bias}; default: return std::nullopt; } @@ -112,7 +112,7 @@ std::optional get_attribute(ElementBinaryAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; default: return std::nullopt; } @@ -122,7 +122,7 @@ std::optional get_attribute(ElementUnaryAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; default: return std::nullopt; } @@ -132,7 +132,7 @@ std::optional get_attribute(DropoutAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; default: return std::nullopt; } @@ -142,15 +142,15 @@ std::optional get_attribute(EmbeddingAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::DATA_TYPE: - return p.data_type; + return OperatorAttributeValue{p.data_type}; case OperatorAttributeKey::AGGR: - return p.aggr; + return OperatorAttributeValue{p.aggr}; case OperatorAttributeKey::NUM_ENTRIES: - return p.num_entries; + return OperatorAttributeValue{p.num_entries}; case OperatorAttributeKey::OUT_CHANNELS: - return p.out_channels; + return OperatorAttributeValue{p.out_channels}; default: return std::nullopt; } @@ -160,7 +160,7 @@ std::optional get_attribute(FlatAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; default: return std::nullopt; } @@ -170,9 +170,9 @@ std::optional get_attribute(GatherAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::AXIS: - return p.dim; + return OperatorAttributeValue{p.dim}; default: return std::nullopt; } @@ -182,7 +182,7 @@ std::optional get_attribute(InputAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; default: return std::nullopt; } @@ -192,11 +192,11 @@ std::optional get_attribute(LayerNormAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::AFFINE: - return p.elementwise_affine; + return OperatorAttributeValue{p.elementwise_affine}; case OperatorAttributeKey::AXES: - return vector_of(p.axes); + return OperatorAttributeValue{vector_of(p.axes)}; default: return std::nullopt; } @@ -206,17 +206,17 @@ std::optional get_attribute(LinearAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::OUT_CHANNELS: - return p.out_channels; + return OperatorAttributeValue{p.out_channels}; case OperatorAttributeKey::USE_BIAS: - return p.use_bias; + return OperatorAttributeValue{p.use_bias}; case OperatorAttributeKey::DATA_TYPE: - return p.data_type; + return OperatorAttributeValue{p.data_type}; case OperatorAttributeKey::ACTIVATION: - return p.activation; + return OperatorAttributeValue{p.activation}; case OperatorAttributeKey::REGULARIZER: - return p.regularizer; + return OperatorAttributeValue{p.regularizer}; default: return std::nullopt; } @@ -226,13 +226,13 @@ std::optional get_attribute(MultiHeadAttentionAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::NUM_HEADS: - return p.num_heads; + return OperatorAttributeValue{p.num_heads}; case OperatorAttributeKey::USE_BIAS: - return p.bias; + return OperatorAttributeValue{p.bias}; case OperatorAttributeKey::DROPOUT: - return p.dropout; + return OperatorAttributeValue{p.dropout}; default: return std::nullopt; } @@ -242,7 +242,7 @@ std::optional get_attribute(NoopAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; default: return std::nullopt; } @@ -252,23 +252,23 @@ std::optional get_attribute(Pool2DAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::KERNEL_H: - return p.kernel_h; + return OperatorAttributeValue{p.kernel_h}; case OperatorAttributeKey::KERNEL_W: - return p.kernel_w; + return OperatorAttributeValue{p.kernel_w}; case OperatorAttributeKey::STRIDE_H: - return p.stride_h; + return OperatorAttributeValue{p.stride_h}; case OperatorAttributeKey::STRIDE_W: - return p.stride_w; + return OperatorAttributeValue{p.stride_w}; case OperatorAttributeKey::PADDING_H: - return p.padding_h; + return OperatorAttributeValue{p.padding_h}; case OperatorAttributeKey::PADDING_W: - return p.padding_w; + return OperatorAttributeValue{p.padding_w}; case OperatorAttributeKey::POOL_TYPE: - return p.pool_type; + return OperatorAttributeValue{p.pool_type}; case OperatorAttributeKey::ACTIVATION: - return std::optional{p.activation}; + return OperatorAttributeValue{p.activation}; default: return std::nullopt; } @@ -278,7 +278,7 @@ std::optional get_attribute(ReduceAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; default: return std::nullopt; } @@ -288,9 +288,9 @@ std::optional get_attribute(ReductionAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::PARALLEL_OP_DEGREE: - return p.reduction_degree; + return OperatorAttributeValue{p.reduction_degree}; default: return std::nullopt; } @@ -300,11 +300,11 @@ std::optional get_attribute(RepartitionAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::PARALLEL_OP_DIM: - return p.repartition_dim; + return OperatorAttributeValue{p.repartition_dim}; case OperatorAttributeKey::PARALLEL_OP_DEGREE: - return p.repartition_degree; + return OperatorAttributeValue{p.repartition_degree}; default: return std::nullopt; } @@ -314,9 +314,9 @@ std::optional get_attribute(ReplicateAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::PARALLEL_OP_DEGREE: - return p.replicate_degree; + return OperatorAttributeValue{p.replicate_degree}; default: return std::nullopt; } @@ -326,7 +326,7 @@ std::optional get_attribute(ReshapeAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; default: return std::nullopt; } @@ -336,9 +336,9 @@ std::optional get_attribute(ReverseAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::AXIS: - return p.axis; + return OperatorAttributeValue{p.axis}; default: return std::nullopt; } @@ -348,9 +348,9 @@ std::optional get_attribute(SplitAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::AXIS: - return p.axis; + return OperatorAttributeValue{p.axis}; default: return std::nullopt; } @@ -360,9 +360,9 @@ std::optional get_attribute(SoftmaxAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::AXIS: - return p.dim; + return OperatorAttributeValue{p.dim}; default: return std::nullopt; } @@ -372,7 +372,7 @@ std::optional get_attribute(TopKAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; default: return std::nullopt; } @@ -382,9 +382,9 @@ std::optional get_attribute(TransposeAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::PERMUTATION: - return vector_of(p.perm); + return OperatorAttributeValue{vector_of(p.perm)}; default: return std::nullopt; } @@ -394,7 +394,7 @@ std::optional get_attribute(WeightAttrs const &p, OperatorAttributeKey key) { switch (key) { case OperatorAttributeKey::OP_TYPE: - return get_op_type(p); + return OperatorAttributeValue{get_op_type(p)}; default: return std::nullopt; } diff --git a/lib/substitutions/src/substitutions/operator_pattern/get_attribute_map.cc b/lib/substitutions/src/substitutions/operator_pattern/get_attribute_map.cc new file mode 100644 index 0000000000..f1b7440aed --- /dev/null +++ b/lib/substitutions/src/substitutions/operator_pattern/get_attribute_map.cc @@ -0,0 +1,25 @@ +#include "substitutions/operator_pattern/get_attribute_map.h" +#include "substitutions/operator_pattern/get_attribute.h" +#include "substitutions/operator_pattern/operator_attribute_key.dtg.h" +#include "substitutions/operator_pattern/operator_attribute_key.h" +#include "substitutions/operator_pattern/operator_attribute_value.dtg.h" + +namespace FlexFlow { + +std::unordered_map + get_attribute_map(PCGOperatorAttrs const &op_attrs) { + std::unordered_map result; + + for (OperatorAttributeKey const &attr_key : all_operator_attribute_keys()) { + std::optional attr_value = + get_attribute(op_attrs, attr_key); + + if (attr_value.has_value()) { + result.insert({attr_key, attr_value.value()}); + } + } + + return result; +} + +} // namespace FlexFlow diff --git a/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_constraint.cc b/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_constraint.cc index 5ab528ed3d..29aef07e3a 100644 --- a/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_constraint.cc +++ b/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_constraint.cc @@ -20,6 +20,16 @@ OperatorAttributeConstraint }; } +OperatorAttributeConstraint + op_attr_key_divisible_by(OperatorAttributeKey key, + nonnegative_int denominator) { + return OperatorAttributeConstraint{ + ConstraintType::DIVISIBLE_BY, + OperatorAttributeExpr{key}, + OperatorAttributeValue{denominator}, + }; +} + OperatorAttributeConstraint make_equals_constraint(OperatorAttributeExpr const &expr, OperatorAttributeValue const &val) { diff --git a/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_key.cc b/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_key.cc new file mode 100644 index 0000000000..232d2c2f12 --- /dev/null +++ b/lib/substitutions/src/substitutions/operator_pattern/operator_attribute_key.cc @@ -0,0 +1,68 @@ +#include "substitutions/operator_pattern/operator_attribute_key.h" + +namespace FlexFlow { + +// This should probably be integrated into proj, +// tracked in https://github.com/flexflow/FlexFlow/issues/1478 +std::vector all_operator_attribute_keys() { + return { + OperatorAttributeKey::OP_TYPE, + OperatorAttributeKey::USE_BIAS, + OperatorAttributeKey::GROUPS, + OperatorAttributeKey::POOL_TYPE, + OperatorAttributeKey::KERNEL_H, + OperatorAttributeKey::KERNEL_W, + OperatorAttributeKey::DATA_TYPE, + OperatorAttributeKey::SCALAR, + OperatorAttributeKey::STRIDE_H, + OperatorAttributeKey::STRIDE_W, + OperatorAttributeKey::PADDING_H, + OperatorAttributeKey::PADDING_W, + OperatorAttributeKey::AGGR, + OperatorAttributeKey::NUM_ENTRIES, + OperatorAttributeKey::OUT_CHANNELS, + OperatorAttributeKey::ACTIVATION, + OperatorAttributeKey::NUMDIM, + OperatorAttributeKey::AXIS, + OperatorAttributeKey::PERMUTATION, + OperatorAttributeKey::OUTSHUFFLE, + OperatorAttributeKey::MERGE_GCONV_COUNT, + OperatorAttributeKey::AXES, + OperatorAttributeKey::KEEP_DIMS, + OperatorAttributeKey::EPSILON, + OperatorAttributeKey::PARALLEL_OP_DIM, + OperatorAttributeKey::PARALLEL_OP_DEGREE, + OperatorAttributeKey::SOFTMAX_DIM, + OperatorAttributeKey::NUM_HEADS, + OperatorAttributeKey::PARALLEL_DIM, + OperatorAttributeKey::PARALLEL_DEGREE, + OperatorAttributeKey::PAD, + OperatorAttributeKey::EMBED_DIM, + OperatorAttributeKey::KDIM, + OperatorAttributeKey::VDIM, + OperatorAttributeKey::DROPOUT, + OperatorAttributeKey::BIAS, + OperatorAttributeKey::ADD_BIAS_KV, + OperatorAttributeKey::ADD_ZERO_ATTN, + OperatorAttributeKey::A_SEQ_LENGTH_DIM, + OperatorAttributeKey::B_SEQ_LENGTH_DIM, + OperatorAttributeKey::RELU, + OperatorAttributeKey::TARGET_DIMS, + OperatorAttributeKey::RATE, + OperatorAttributeKey::SEED, + OperatorAttributeKey::SHOULD_BROADCAST_LHS, + OperatorAttributeKey::SHOULD_BROADCAST_RHS, + OperatorAttributeKey::DIM, + OperatorAttributeKey::ELEMENTWISE_AFFINE, + OperatorAttributeKey::REGULARIZER, + OperatorAttributeKey::SHAPE, + OperatorAttributeKey::SPLITS, + OperatorAttributeKey::K, + OperatorAttributeKey::SORTED, + OperatorAttributeKey::COMBINE_DIM, + OperatorAttributeKey::COMBINE_DEGREE, + OperatorAttributeKey::NUM_INPUTS, + }; +} + +} // namespace FlexFlow diff --git a/lib/substitutions/src/substitutions/output_graph/materialize_operator_from_attrs_map.cc b/lib/substitutions/src/substitutions/output_graph/materialize_operator_from_attrs_map.cc index 7d65f687c8..4f11b343f8 100644 --- a/lib/substitutions/src/substitutions/output_graph/materialize_operator_from_attrs_map.cc +++ b/lib/substitutions/src/substitutions/output_graph/materialize_operator_from_attrs_map.cc @@ -33,10 +33,12 @@ PCGOperatorAttrs materialize_operator_from_attrs_map( switch (op_type) { case OperatorType::MULTIHEAD_ATTENTION: return PCGOperatorAttrs{MultiHeadAttentionAttrs{ - /*embed_dim=*/acc.get(OperatorAttributeKey::EMBED_DIM), - /*num_heads=*/acc.get(OperatorAttributeKey::NUM_HEADS), - /*kdim=*/acc.get(OperatorAttributeKey::KDIM), - /*vdim=*/acc.get(OperatorAttributeKey::VDIM), + /*embed_dim=*/acc.get( + OperatorAttributeKey::EMBED_DIM), + /*num_heads=*/ + acc.get(OperatorAttributeKey::NUM_HEADS), + /*kdim=*/acc.get(OperatorAttributeKey::KDIM), + /*vdim=*/acc.get(OperatorAttributeKey::VDIM), /*dropout=*/acc.get(OperatorAttributeKey::DROPOUT), /*bias=*/acc.get(OperatorAttributeKey::BIAS), /*add_bias_kv=*/acc.get(OperatorAttributeKey::ADD_BIAS_KV), @@ -44,12 +46,14 @@ PCGOperatorAttrs materialize_operator_from_attrs_map( }}; case OperatorType::POOL2D: return PCGOperatorAttrs{Pool2DAttrs{ - /*kernel_h=*/acc.get(OperatorAttributeKey::KERNEL_H), - /*kernel_w=*/acc.get(OperatorAttributeKey::KERNEL_W), - /*stride_h=*/acc.get(OperatorAttributeKey::STRIDE_H), - /*stride_w=*/acc.get(OperatorAttributeKey::STRIDE_W), - /*padding_h=*/acc.get(OperatorAttributeKey::PADDING_H), - /*padding_w=*/acc.get(OperatorAttributeKey::PADDING_W), + /*kernel_h=*/acc.get(OperatorAttributeKey::KERNEL_H), + /*kernel_w=*/acc.get(OperatorAttributeKey::KERNEL_W), + /*stride_h=*/acc.get(OperatorAttributeKey::STRIDE_H), + /*stride_w=*/acc.get(OperatorAttributeKey::STRIDE_W), + /*padding_h=*/ + acc.get(OperatorAttributeKey::PADDING_H), + /*padding_w=*/ + acc.get(OperatorAttributeKey::PADDING_W), /*pool_type=*/acc.get(OperatorAttributeKey::POOL_TYPE), /*activation=*/ acc.get>(OperatorAttributeKey::ACTIVATION) @@ -62,7 +66,8 @@ PCGOperatorAttrs materialize_operator_from_attrs_map( case OperatorType::DROPOUT: case OperatorType::LINEAR: return PCGOperatorAttrs{LinearAttrs{ - /*out_channels=*/acc.get(OperatorAttributeKey::OUT_CHANNELS), + /*out_channels=*/acc.get( + OperatorAttributeKey::OUT_CHANNELS), /*use_bias=*/acc.get(OperatorAttributeKey::USE_BIAS), /*data_type=*/acc.get(OperatorAttributeKey::DATA_TYPE), /*activation=*/ diff --git a/lib/substitutions/src/substitutions/output_graph/output_graph_expr.cc b/lib/substitutions/src/substitutions/output_graph/output_graph_expr.cc index 3d6aadc795..f6d1410a07 100644 --- a/lib/substitutions/src/substitutions/output_graph/output_graph_expr.cc +++ b/lib/substitutions/src/substitutions/output_graph/output_graph_expr.cc @@ -1,9 +1,18 @@ #include "substitutions/output_graph/output_graph_expr.h" #include "utils/containers/transform.h" #include "utils/graph/dataflow_graph/algorithms.h" +#include "utils/graph/node/algorithms.h" +#include "utils/graph/open_dataflow_graph/algorithms/get_open_dataflow_graph_inputs.h" namespace FlexFlow { +std::unordered_set get_nodes(OutputGraphExpr const &g) { + std::unordered_set raw_nodes = get_nodes(g.raw_graph); + + return transform(raw_nodes, + [](Node const &n) { return OutputGraphExprNode{n}; }); +} + std::vector get_node_outputs(OutputGraphExpr const &g, OutputGraphExprNode const &n) { std::vector raw_outputs = @@ -14,4 +23,13 @@ std::vector }); } +std::unordered_set get_inputs(OutputGraphExpr const &g) { + std::unordered_set raw_inputs = + get_open_dataflow_graph_inputs(g.raw_graph); + + return transform(raw_inputs, [](DataflowGraphInput const &i) { + return OutputGraphExprInput{i}; + }); +} + } // namespace FlexFlow diff --git a/lib/substitutions/src/substitutions/output_graph/output_graph_expr_value.cc b/lib/substitutions/src/substitutions/output_graph/output_graph_expr_value.cc new file mode 100644 index 0000000000..b35f3bbeae --- /dev/null +++ b/lib/substitutions/src/substitutions/output_graph/output_graph_expr_value.cc @@ -0,0 +1,30 @@ +#include "substitutions/output_graph/output_graph_expr_value.h" +#include "utils/overload.h" + +namespace FlexFlow { + +OpenDataflowValue raw_open_dataflow_value_from_output_graph_expr_value( + OutputGraphExprValue const &v) { + return v.visit(overload{ + [](OutputGraphExprNodeOutput const &o) { + return OpenDataflowValue{o.raw_dataflow_output}; + }, + [](OutputGraphExprInput const &i) { + return OpenDataflowValue{i.raw_dataflow_graph_input}; + }, + }); +} + +OutputGraphExprValue output_graph_expr_value_from_raw_open_dataflow_value( + OpenDataflowValue const &v) { + return v.visit(overload{ + [](DataflowOutput const &o) { + return OutputGraphExprValue{OutputGraphExprNodeOutput{o}}; + }, + [](DataflowGraphInput const &i) { + return OutputGraphExprValue{OutputGraphExprInput{i}}; + }, + }); +} + +} // namespace FlexFlow diff --git a/lib/substitutions/src/substitutions/output_graph/output_operator_attrs_assignment.cc b/lib/substitutions/src/substitutions/output_graph/output_operator_attrs_assignment.cc index fa247cd151..f6b90ef054 100644 --- a/lib/substitutions/src/substitutions/output_graph/output_operator_attrs_assignment.cc +++ b/lib/substitutions/src/substitutions/output_graph/output_operator_attrs_assignment.cc @@ -1,7 +1,9 @@ #include "substitutions/output_graph/output_operator_attrs_assignment.h" +#include "substitutions/operator_pattern/get_attribute_map.h" #include "substitutions/output_graph/materialize_operator_from_attrs_map.h" #include "substitutions/output_graph/output_operator_attribute_expr.h" #include "utils/containers/map_values.h" +#include "utils/containers/merge_maps.h" namespace FlexFlow { @@ -12,14 +14,31 @@ OutputOperatorAttrsAssignment output_operator_clone_node(PatternNode const &) { PCGOperatorAttrs materialize_output_operator_from_attrs_assignment( OutputOperatorAttrsAssignment const &attrs_assignment, std::unordered_map const &node_match) { - std::unordered_map attr_map = - map_values(attrs_assignment.assignments, - [&](OutputOperatorAttributeExpr const &expr) { - return evaluate_output_operator_attribute_expr(expr, - node_match); - }); - - return materialize_operator_from_attrs_map(attr_map); + + std::unordered_map + template_attrs_map = [&]() + -> std::unordered_map { + if (attrs_assignment.template_operator.has_value()) { + PatternNode template_node = attrs_assignment.template_operator.value(); + PCGOperatorAttrs template_op_attrs = node_match.at(template_node); + return get_attribute_map(template_op_attrs); + } else { + return {}; + } + }(); + + std::unordered_map + assignments_attrs_map = map_values( + attrs_assignment.assignments, + [&](OutputOperatorAttributeExpr const &expr) { + return evaluate_output_operator_attribute_expr(expr, node_match); + }); + + std::unordered_map + joined_attrs_map = + merge_map_right_dominates(template_attrs_map, assignments_attrs_map); + + return materialize_operator_from_attrs_map(joined_attrs_map); } std::pair @@ -39,4 +58,10 @@ std::pair }; } +std::pair + set_op_type_attr(OperatorType op_type) { + return set_attr_to_constant(OperatorAttributeKey::OP_TYPE, + OperatorAttributeValue{op_type}); +} + } // namespace FlexFlow diff --git a/lib/substitutions/src/substitutions/pcg_pattern.cc b/lib/substitutions/src/substitutions/pcg_pattern.cc index e53877006d..a0af875848 100644 --- a/lib/substitutions/src/substitutions/pcg_pattern.cc +++ b/lib/substitutions/src/substitutions/pcg_pattern.cc @@ -3,13 +3,23 @@ #include "substitutions/pcg_pattern_match.h" #include "substitutions/sub_parallel_computation_graph.h" #include "substitutions/tensor_pattern/satisfies_pattern.h" +#include "substitutions/unlabelled/find_pattern_matches.h" #include "substitutions/unlabelled/pattern_value.h" #include "utils/containers/map_values.h" #include "utils/containers/transform.h" #include "utils/graph/dataflow_graph/algorithms.h" +#include "utils/graph/node/algorithms.h" +#include "utils/graph/open_dataflow_graph/algorithms/get_inputs.h" +#include "utils/graph/open_dataflow_graph/algorithms/get_open_dataflow_graph_inputs.h" namespace FlexFlow { +std::unordered_set get_nodes(PCGPattern const &p) { + std::unordered_set raw_nodes = get_nodes(p.raw_graph); + + return transform(raw_nodes, [](Node const &n) { return PatternNode{n}; }); +} + static MatchAdditionalCriterion pcg_pattern_criteria(PCGPattern const &pattern, SubParallelComputationGraph const &pcg) { @@ -63,6 +73,14 @@ OperatorAttributePattern get_operator_pattern(PCGPattern const &p, return p.raw_graph.at(n.raw_node); } +std::unordered_set get_inputs(PCGPattern const &p) { + std::unordered_set raw_inputs = + get_open_dataflow_graph_inputs(p.raw_graph); + + return transform(raw_inputs, + [](DataflowGraphInput const &i) { return PatternInput{i}; }); +} + std::vector get_pattern_node_outputs(PCGPattern const &pattern, PatternNode const &node) { diff --git a/lib/substitutions/src/substitutions/sub_parallel_computation_graph.cc b/lib/substitutions/src/substitutions/sub_parallel_computation_graph.cc index 0c673f0a8a..83df74f21b 100644 --- a/lib/substitutions/src/substitutions/sub_parallel_computation_graph.cc +++ b/lib/substitutions/src/substitutions/sub_parallel_computation_graph.cc @@ -188,33 +188,34 @@ bool sub_pcgs_are_isomorphic(SubParallelComputationGraph const &lhs, } std::string as_dot(SubParallelComputationGraph const &spcg) { - std::function get_node_label = - [](ParallelLayerAttrs const &a) -> std::string { - RecordFormatter r = as_dot(a.op_attrs); - - if (a.name.has_value()) { - RecordFormatter rr; - rr << "Name" << a.name.value(); - r << rr; - } - - std::ostringstream oss; - oss << r; - return oss.str(); - }; - - std::function get_input_label = - [](ParallelTensorAttrs const &a) -> std::string { - RecordFormatter r; - - r << fmt::to_string(a.shape); - - std::ostringstream oss; - oss << r; - return oss.str(); - }; - - return as_dot(spcg.raw_graph, get_node_label, get_input_label); + NOT_IMPLEMENTED(); + // std::function get_node_label = + // [](ParallelLayerAttrs const &a) -> std::string { + // RecordFormatter r = as_dot(a.op_attrs); + // + // if (a.name.has_value()) { + // RecordFormatter rr; + // rr << "Name" << a.name.value(); + // r << rr; + // } + // + // std::ostringstream oss; + // oss << r; + // return oss.str(); + // }; + // + // std::function get_input_label = + // [](ParallelTensorAttrs const &a) -> std::string { + // RecordFormatter r; + // + // r << fmt::to_string(a.shape); + // + // std::ostringstream oss; + // oss << r; + // return oss.str(); + // }; + // + // return as_dot(spcg.raw_graph, get_node_label, get_input_label); } void debug_print_dot(SubParallelComputationGraph const &spcg) { diff --git a/lib/substitutions/src/substitutions/sub_parallel_computation_graph_edge.cc b/lib/substitutions/src/substitutions/sub_parallel_computation_graph_edge.cc index bb8cb449bc..0d2b912049 100644 --- a/lib/substitutions/src/substitutions/sub_parallel_computation_graph_edge.cc +++ b/lib/substitutions/src/substitutions/sub_parallel_computation_graph_edge.cc @@ -6,7 +6,7 @@ namespace FlexFlow { SubParallelComputationGraphEdge subpcg_edge_from_tensor_and_dst(parallel_tensor_guid_t const &tensor, parallel_layer_guid_t const &layer, - int input_idx) { + nonnegative_int input_idx) { return SubParallelComputationGraphEdge{ OpenDataflowEdge{ DataflowEdge{ diff --git a/lib/substitutions/src/substitutions/substitution.cc b/lib/substitutions/src/substitutions/substitution.cc index 22e15cb01a..874700d303 100644 --- a/lib/substitutions/src/substitutions/substitution.cc +++ b/lib/substitutions/src/substitutions/substitution.cc @@ -1,169 +1,164 @@ #include "substitutions/substitution.h" -#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.h" -#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.h" -#include "substitutions/open_parallel_tensor_guid_t.h" -#include "substitutions/output_graph/output_operator_attrs_assignment.h" -#include "substitutions/pcg_pattern_match.h" -#include "substitutions/sub_parallel_computation_graph.h" -#include "substitutions/sub_parallel_computation_graph_edge.h" -#include "substitutions/substitution_internal/evaluate_substitution_output.h" -#include "substitutions/substitution_internal/output_expr_to_result_sub_pcg_mapping.h" -#include "utils/containers/merge_maps.h" -#include "utils/containers/restrict_keys.h" -#include "utils/containers/set_minus.h" -#include "utils/containers/values.h" -#include "utils/graph/dataflow_graph/algorithms/get_subgraph_outgoing_edges.h" -#include "utils/graph/node/algorithms.h" -#include "utils/overload.h" +#include "substitutions/output_graph/output_graph_expr.h" +#include "substitutions/pcg_pattern.h" +#include "utils/bidict/algorithms/left_entries.h" +#include "utils/bidict/algorithms/right_entries.h" +#include "utils/containers/map_values.h" +#include "utils/graph/labelled_open_dataflow_graph/algorithms/find_isomorphism.h" +#include "utils/graph/labelled_open_dataflow_graph/algorithms/rewrite_node_labels.h" +#include "utils/graph/open_dataflow_graph/algorithms/open_dataflow_graph_isomorphism.dtg.h" +#include "utils/graph/open_dataflow_graph/algorithms/open_dataflow_graph_isomorphism.h" namespace FlexFlow { -bool is_valid_substitution(Substitution const &) { - NOT_IMPLEMENTED(); -} +bool is_isomorphic_to(Substitution const &l, Substitution const &r) { + OpenDataflowGraphIsomorphism pcg_pattern_isomorphism = ({ + std::optional maybe_isomorphism = + find_isomorphism(l.pcg_pattern.raw_graph, r.pcg_pattern.raw_graph); -SubParallelComputationGraph - apply_substitution(SubParallelComputationGraph const &spcg, - Substitution const &sub, - PCGPatternMatch const &match) { - auto substitution_output_result = - evaluate_substitution_output(spcg, sub, match); - SubParallelComputationGraph substitution_output_graph = - substitution_output_result.first; - OutputExprToResultSubPCGMapping output_expr_to_result_sub_pcg_mapping = - substitution_output_result.second; - - SubParallelComputationGraphData output_graph_data = - get_sub_pcg_data(substitution_output_graph); - SubParallelComputationGraphData pre_data = get_sub_pcg_data(spcg); - - std::unordered_set pre_nodes = - keys(pre_data.node_data); - std::unordered_set matched_nodes = - unordered_set_of(values(match.node_assignment)); - std::unordered_set post_nodes_from_original_graph = - set_minus(pre_nodes, matched_nodes); - - std::unordered_map post_node_data = - [&] { - std::unordered_map - post_node_data_from_orig = restrict_keys( - pre_data.node_data, post_nodes_from_original_graph); - std::unordered_map - post_node_data_from_sub = output_graph_data.node_data; - - return merge_maps(post_node_data_from_orig, post_node_data_from_sub); - }(); - - std::unordered_set post_edges = [&] { - std::unordered_set post_edges_from_orig = - filter(pre_data.edges, [&](SubParallelComputationGraphEdge const &e) { - if (e.raw_edge.has()) { - return true; - } else { - DataflowEdge dfe = e.raw_edge.get(); - parallel_layer_guid_t src = parallel_layer_guid_t{dfe.src.node}; - parallel_layer_guid_t dst = parallel_layer_guid_t{dfe.dst.node}; - return !(contains(matched_nodes, src) || - contains(matched_nodes, dst)); - } - }); - - std::unordered_set post_edges_from_sub = - filter(output_graph_data.edges, - [&](SubParallelComputationGraphEdge const &e) { - return !e.raw_edge.has(); - }); - - bidict - output_orig_pattern_mapping = get_output_mapping_for_pcg_pattern_match( - match, sub.pcg_pattern, spcg); - bidict - output_post_outexpr_mapping = get_output_graph_expr_output_mapping( - output_expr_to_result_sub_pcg_mapping, - sub.output_graph_expr, - substitution_output_graph); - - std::unordered_set incoming_to_sub_edges; - for (auto const &[pattern_input, base_graph_tensor] : - match.input_assignment) { - OutputGraphExprInput output_expr_input = - sub.inputs_mapping.at_l(pattern_input); - input_parallel_tensor_guid_t output_graph_input = - output_expr_to_result_sub_pcg_mapping.input_mapping.at_r( - output_expr_input); - std::unordered_set uses = get_parallel_tensor_uses( - substitution_output_graph, - open_parallel_tensor_guid_from_input(output_graph_input)); - for (parallel_tensor_use_t const &use : uses) { - SubParallelComputationGraphEdge new_edge = - subpcg_edge_from_tensor_and_use(base_graph_tensor, use); - incoming_to_sub_edges.insert(new_edge); - } + if (!maybe_isomorphism.has_value()) { + return false; } - std::unordered_set outgoing_from_sub_edges; - for (ParallelComputationGraphEdge const &outgoing_edge : - get_subgraph_outgoing_edges(spcg, matched_nodes)) { - parallel_tensor_guid_t original_tensor = - get_parallel_tensor(outgoing_edge); - PatternNodeOutput pattern_tensor = - output_orig_pattern_mapping.at_r(original_tensor); - OutputGraphExprNodeOutput output_graph_tensor = - sub.outputs_mapping.at_l(pattern_tensor); - parallel_tensor_guid_t new_tensor = - output_post_outexpr_mapping.at_r(output_graph_tensor); - - SubParallelComputationGraphEdge new_edge = - subpcg_edge_from_tensor_and_dst( - new_tensor, - get_dst_layer(outgoing_edge), - get_dst_layer_input_idx(outgoing_edge)); - outgoing_from_sub_edges.insert(new_edge); - } + maybe_isomorphism.value(); + }); + + auto l_from_r_pattern_node = [&](PatternNode const &r_node) { + return PatternNode{ + pcg_pattern_isomorphism.node_mapping.at_r(r_node.raw_node), + }; + }; - return set_union(std::vector{ - post_edges_from_orig, - post_edges_from_sub, - incoming_to_sub_edges, - outgoing_from_sub_edges, - }); - }(); - - std::unordered_set post_inputs = - pre_data.inputs; - - std::unordered_map - post_value_data = [&] { - std::unordered_map - post_value_data_from_orig = filter_keys( - pre_data.value_data, [&](open_parallel_tensor_guid_t const &t) { - return visit_open_parallel_tensor_guid( - t, - overload{ - [&](parallel_tensor_guid_t const &t) { - return contains(post_nodes_from_original_graph, - get_source_layer(t)); - }, - [](input_parallel_tensor_guid_t const &) { - return true; - }, - }); + auto l_from_r_output_attrs_assignment = + [&](OutputOperatorAttrsAssignment const &r_attrs) { + std::optional l_template_operator = + transform(r_attrs.template_operator, l_from_r_pattern_node); + std::unordered_map + l_assignments = map_values( + r_attrs.assignments, + [&](OutputOperatorAttributeExpr const &r_expr) { + return r_expr.visit( + overload{[&](AttrConstant const &) { return r_expr; }, + [&](OutputOperatorAttrAccess const &r_acc) { + return OutputOperatorAttributeExpr{ + OutputOperatorAttrAccess{ + l_from_r_pattern_node(r_acc.node), + r_acc.attr_expr, + }, + }; + }}); }); + return OutputOperatorAttrsAssignment{ + l_template_operator, + l_assignments, + }; + }; + + OpenDataflowGraphIsomorphism output_graph_expr_isomorphism = ({ + std::optional maybe_isomorphism = + find_isomorphism( + l.output_graph_expr.raw_graph, + rewrite_node_labels( + r.output_graph_expr.raw_graph, + [&](Node const &, OutputOperatorAttrsAssignment const &a) { + return l_from_r_output_attrs_assignment(a); + })); + if (!maybe_isomorphism.has_value()) { + return false; + } - std::unordered_map - post_value_data_from_sub = output_graph_data.value_data; - return merge_maps(post_value_data_from_orig, post_value_data_from_sub); - }(); + maybe_isomorphism.value(); + }); - SubParallelComputationGraphData post_data = SubParallelComputationGraphData{ - post_node_data, - post_edges, - post_inputs, - post_value_data, + auto l_from_r_pattern_input = [&](PatternInput const &r_input) { + return PatternInput{ + pcg_pattern_isomorphism.input_mapping.at_r( + r_input.raw_dataflow_graph_input), + }; }; - return sub_pcg_from_graph_data(post_data); + auto l_from_r_output_graph_input = [&](OutputGraphExprInput const &r_input) { + return OutputGraphExprInput{ + output_graph_expr_isomorphism.input_mapping.at_r( + r_input.raw_dataflow_graph_input), + }; + }; + + auto l_from_r_pattern_output = [&](PatternNodeOutput const &r_output) { + return PatternNodeOutput{ + isomorphism_map_l_dataflow_output_from_r(pcg_pattern_isomorphism, + r_output.raw_dataflow_output), + }; + }; + + auto l_from_r_output_graph_output = + [&](OutputGraphExprNodeOutput const &r_output) { + return OutputGraphExprNodeOutput{ + isomorphism_map_l_dataflow_output_from_r( + output_graph_expr_isomorphism, r_output.raw_dataflow_output), + }; + }; + + bidict l_input_mapping_from_r = + transform(r.inputs_mapping, + [&](PatternInput const &r_p, OutputGraphExprInput const &r_o) { + return std::pair{ + l_from_r_pattern_input(r_p), + l_from_r_output_graph_input(r_o), + }; + }); + if (l_input_mapping_from_r != l.inputs_mapping) { + return false; + } + + bidict l_output_mapping_from_r = + transform(r.outputs_mapping, + [&](PatternNodeOutput const &r_p, + OutputGraphExprNodeOutput const &r_o) { + return std::pair{ + l_from_r_pattern_output(r_p), + l_from_r_output_graph_output(r_o), + }; + }); + if (l_output_mapping_from_r != l.outputs_mapping) { + return false; + } + + return true; +} + +bool is_valid_substitution(Substitution const &sub) { + { + std::unordered_set pattern_inputs = + get_inputs(sub.pcg_pattern); + std::unordered_set mapped_inputs = + left_entries(sub.inputs_mapping); + + if (pattern_inputs != mapped_inputs) { + return false; + } + } + + { + std::unordered_set output_graph_inputs = + get_inputs(sub.output_graph_expr); + std::unordered_set mapped_inputs = + right_entries(sub.inputs_mapping); + + if (output_graph_inputs != mapped_inputs) { + return false; + } + } + + if (get_nodes(sub.pcg_pattern).empty()) { + return false; + } + + if (get_nodes(sub.output_graph_expr).empty()) { + return false; + } + + return true; } } // namespace FlexFlow diff --git a/lib/substitutions/src/substitutions/substitution_builder.cc b/lib/substitutions/src/substitutions/substitution_builder.cc new file mode 100644 index 0000000000..a267b8113f --- /dev/null +++ b/lib/substitutions/src/substitutions/substitution_builder.cc @@ -0,0 +1,162 @@ +#include "substitutions/substitution_builder.h" +#include "substitutions/output_graph/output_graph_expr_value.h" +#include "substitutions/substitution.h" +#include "substitutions/unlabelled/pattern_value.h" +#include "utils/containers/repeat_element.h" +#include "utils/graph/instances/unordered_set_labelled_open_dataflow_graph.h" +#include "utils/overload.h" + +namespace FlexFlow { + +SubstitutionBuilder::SubstitutionBuilder() + : pattern_g(LabelledOpenDataflowGraph:: + create>()), + output_g(LabelledOpenDataflowGraph:: + create>()) {} + +std::pair SubstitutionBuilder::add_input( + TensorAttributePattern const &input_tensor_pattern, + std::optional const &name) { + PatternInput pattern_input = PatternInput{ + this->pattern_g.add_input(input_tensor_pattern), + }; + + OutputGraphExprInput output_graph_expr_input = OutputGraphExprInput{ + this->output_g.add_input(std::monostate{}), + }; + + this->input_mapping.equate(pattern_input, output_graph_expr_input); + + if (name.has_value()) { + this->pattern_input_names.equate(pattern_input, name.value()); + } + + return { + PatternValue{pattern_input}, + OutputGraphExprValue{output_graph_expr_input}, + }; +} + +std::vector SubstitutionBuilder::add_pattern_node( + OperatorAttributePattern const &node_pattern, + std::vector const &inputs, + std::vector const &output_patterns, + std::optional const &maybe_name) { + NodeAddedResult node_added = this->pattern_g.add_node( + node_pattern, + transform(inputs, raw_open_dataflow_value_from_pattern_value), + output_patterns); + + if (maybe_name.has_value()) { + std::string name = maybe_name.value(); + + if (this->pattern_node_names.contains_r(name)) { + throw mk_runtime_error(fmt::format("Attempted to name node {}, but a " + "node with that name already exists!", + name)); + } + + this->pattern_node_names.equate(PatternNode{node_added.node}, name); + } + + return transform(node_added.outputs, [](DataflowOutput const &o) { + return pattern_value_from_raw_open_dataflow_value(OpenDataflowValue{o}); + }); +} + +std::vector SubstitutionBuilder::add_output_graph_node( + OutputOperatorAttrsAssignment const &node_expr, + std::vector const &inputs, + nonnegative_int num_outputs) { + NodeAddedResult node_added = this->output_g.add_node( + node_expr, + transform(inputs, raw_open_dataflow_value_from_output_graph_expr_value), + repeat_element(/*num_times=*/num_outputs, /*element=*/std::monostate{})); + + return transform(node_added.outputs, [](DataflowOutput const &o) { + return output_graph_expr_value_from_raw_open_dataflow_value( + OpenDataflowValue{o}); + }); +} + +void SubstitutionBuilder::equate_outputs( + PatternValue const &maybe_pattern_output, + OutputGraphExprValue const &maybe_output_graph_expr_output) { + PatternNodeOutput pattern_output = + maybe_pattern_output.visit(overload{ + [](PatternNodeOutput const &o) { return o; }, + [&](PatternInput const &) -> PatternNodeOutput { + throw mk_runtime_error(fmt::format( + "SubstitutionBuilder::equate_outputs expected a PatternValue " + "holding a PatternNodeOutput, but received {}", + maybe_pattern_output)); + }, + }); + + OutputGraphExprNodeOutput output_graph_expr_output = + maybe_output_graph_expr_output.visit(overload{ + [](OutputGraphExprNodeOutput const &o) { return o; }, + [&](OutputGraphExprInput const &) -> OutputGraphExprNodeOutput { + throw mk_runtime_error( + fmt::format("SubstitutionBuilder::equate_outputs expected an " + "OutputGraphExprValue holding a " + "OutputGraphExprNodeOutput, but received {}", + maybe_output_graph_expr_output)); + }, + }); + + if (this->output_mapping.contains_l(pattern_output)) { + throw mk_runtime_error( + fmt::format("SubstitutionBuilder::equate_outputs expected a " + "PatternValue holding a PatternValueOutput" + "that is not contained in the output_mapping forward graph," + "but received {}", + pattern_output)); + } + if (this->output_mapping.contains_r(output_graph_expr_output)) { + throw mk_runtime_error(fmt::format( + "SubstitutionBuilder::output_graph_expr_output expected a " + "OutputGraphExprValue holding a OutputGraphExprNodeOutput" + "that is not contained in the output_mapping backward graph," + "but received {}", + output_graph_expr_output)); + } + + this->output_mapping.equate(pattern_output, output_graph_expr_output); +} + +PatternNode + SubstitutionBuilder::pattern_node_named(std::string const &name) const { + return this->pattern_node_names.at_r(name); +} + +PatternInput + SubstitutionBuilder::pattern_input_named(std::string const &name) const { + return this->pattern_input_names.at_r(name); +} + +Substitution SubstitutionBuilder::get_substitution() const { + Substitution result = Substitution{ + PCGPattern{this->pattern_g}, + OutputGraphExpr{this->output_g}, + this->input_mapping, + this->output_mapping, + }; + + if (!is_valid_substitution(result)) { + throw mk_runtime_error( + "get_substitution cannot return a Substitution, as the Substitution is " + "currently invalid. Ensure you have finished constructing the " + "Substitution and have mapped all of the outputs."); + } + + return result; +} + +} // namespace FlexFlow diff --git a/lib/substitutions/src/substitutions/tensor_pattern/eval_list_access.cc b/lib/substitutions/src/substitutions/tensor_pattern/eval_list_access.cc index efbcf4a6f1..7bfb1f5e9e 100644 --- a/lib/substitutions/src/substitutions/tensor_pattern/eval_list_access.cc +++ b/lib/substitutions/src/substitutions/tensor_pattern/eval_list_access.cc @@ -11,9 +11,8 @@ TensorAttributeValue TensorAttributeValue from_attr = get_attribute(attrs, acc.attribute_key); return from_attr.visit(overload{ - [&](std::vector const &v) -> TensorAttributeValue { - return TensorAttributeValue{ - static_cast(at_idx(v, acc.index).value())}; + [&](std::vector const &v) -> TensorAttributeValue { + return TensorAttributeValue{at_idx(v, acc.index).value()}; }, [](auto &&) -> TensorAttributeValue { throw mk_runtime_error("Invalid operand"); diff --git a/lib/substitutions/src/substitutions/tensor_pattern/eval_list_size.cc b/lib/substitutions/src/substitutions/tensor_pattern/eval_list_size.cc index d1e97adc37..5acfdf406a 100644 --- a/lib/substitutions/src/substitutions/tensor_pattern/eval_list_size.cc +++ b/lib/substitutions/src/substitutions/tensor_pattern/eval_list_size.cc @@ -1,5 +1,6 @@ #include "substitutions/tensor_pattern/eval_list_size.h" #include "substitutions/tensor_pattern/get_attribute.h" +#include "utils/nonnegative_int/num_elements.h" #include "utils/overload.h" namespace FlexFlow { @@ -9,8 +10,8 @@ TensorAttributeValue eval_list_size(ParallelTensorAttrs const &attrs, TensorAttributeValue from_attr = get_attribute(attrs, acc.attribute_key); return from_attr.visit(overload{ - [](std::vector const &v) -> TensorAttributeValue { - return TensorAttributeValue{v.size()}; + [](std::vector const &v) -> TensorAttributeValue { + return TensorAttributeValue{num_elements(v)}; }, [](auto &&) -> TensorAttributeValue { throw mk_runtime_error("Invalid operand"); diff --git a/lib/substitutions/src/substitutions/tensor_pattern/get_attribute.cc b/lib/substitutions/src/substitutions/tensor_pattern/get_attribute.cc index 286bc69b84..3539b06832 100644 --- a/lib/substitutions/src/substitutions/tensor_pattern/get_attribute.cc +++ b/lib/substitutions/src/substitutions/tensor_pattern/get_attribute.cc @@ -10,15 +10,15 @@ TensorAttributeValue get_attribute(ParallelTensorAttrs const &attrs, TensorAttributeKey key) { switch (key) { case TensorAttributeKey::DIM_SIZES: { - std::vector sizes = - transform(vector_of(ff_ordered_shard_dims(attrs.shape.dims)), - [](ShardParallelDim const &d) { return d.size; }); + std::vector sizes = transform( + vector_of(ff_ordered_shard_dims(attrs.shape.dims)), + [](ShardParallelDim const &d) { return nonnegative_int{d.size}; }); return TensorAttributeValue{sizes}; } case TensorAttributeKey::DIM_DEGREES: { - std::vector degrees = transform( + std::vector degrees = transform( vector_of(ff_ordered_shard_dims(attrs.shape.dims)), - [](ShardParallelDim const &d) { return size_t_from_int(d.degree); }); + [](ShardParallelDim const &d) { return nonnegative_int{d.degree}; }); return TensorAttributeValue{degrees}; } default: diff --git a/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_pattern.cc b/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_pattern.cc index 794ab5abda..e1c1fe7cf6 100644 --- a/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_pattern.cc +++ b/lib/substitutions/src/substitutions/tensor_pattern/tensor_attribute_pattern.cc @@ -1,4 +1,5 @@ #include "substitutions/tensor_pattern/tensor_attribute_pattern.h" +#include "utils/integer_conversions.h" namespace FlexFlow { @@ -6,4 +7,19 @@ TensorAttributePattern tensor_attribute_pattern_match_all() { return TensorAttributePattern{{}}; } +TensorAttributePattern + tensor_attr_pattern_require_num_dims(nonnegative_int num_dims) { + return TensorAttributePattern{{ + TensorAttributeConstraint{ + ConstraintType::EQUAL, + TensorAttributeExpr{ + TensorAttributeListSize{ + TensorAttributeKey::DIM_SIZES, + }, + }, + TensorAttributeValue{num_dims}, + }, + }}; +} + } // namespace FlexFlow diff --git a/lib/substitutions/src/substitutions/unity_substitution_set.cc b/lib/substitutions/src/substitutions/unity_substitution_set.cc new file mode 100644 index 0000000000..4b00cdd95f --- /dev/null +++ b/lib/substitutions/src/substitutions/unity_substitution_set.cc @@ -0,0 +1,235 @@ +#include "substitutions/unity_substitution_set.h" +#include "pcg/machine_specification.h" +#include "substitutions/operator_pattern/operator_attribute_constraint.h" +#include "substitutions/output_graph/output_operator_attrs_assignment.h" +#include "substitutions/substitution_builder.h" +#include "substitutions/tensor_pattern/tensor_attribute_pattern.h" +#include "utils/containers/get_only.h" +#include "utils/nonnegative_int/nonnegative_int.h" +#include "utils/nonnegative_int/nonnegative_range.h" + +namespace FlexFlow { + +std::vector + get_substitution_set(MachineSpecification const &resources) { + std::vector substitutions; + for (nonnegative_int num_dims : + nonnegative_range(1_n, nonnegative_int{MAX_TENSOR_DIM})) { + for (nonnegative_int degree = 1_n; degree <= get_num_gpus(resources); + degree *= 2_n) { + substitutions.push_back( + create_replicate_linear_combine(num_dims, degree, true)); + substitutions.push_back( + create_replicate_linear_combine(num_dims, degree, false)); + } + } + substitutions.push_back(create_fuse_linear_activation(Activation::RELU)); + substitutions.push_back(create_fuse_linear_activation(Activation::SIGMOID)); + substitutions.push_back(create_fuse_linear_activation(Activation::TANH)); + substitutions.push_back(create_fuse_linear_activation(Activation::GELU)); + return substitutions; +} + +Substitution create_combine_inception(nonnegative_int num_convs, + nonnegative_int num_dims, + nonnegative_int degree) { + NOT_IMPLEMENTED(); +} + +Substitution create_combine_concat(nonnegative_int num_inputs, + nonnegative_int num_dims, + nonnegative_int degree) { + NOT_IMPLEMENTED(); +} + +Substitution create_replicate_linear_combine(nonnegative_int num_dims, + nonnegative_int degree, + bool use_bias) { + SubstitutionBuilder b; + + auto [p_input, o_input] = b.add_input(tensor_attribute_pattern_match_all()); + auto [p_weight, o_weight] = b.add_input(tensor_attribute_pattern_match_all()); + std::vector p_inputs = {p_input, p_weight}; + + std::optional o_bias = std::nullopt; + if (use_bias) { + std::pair bias = + b.add_input(tensor_attribute_pattern_match_all()); + p_inputs.push_back(bias.first); + o_bias = bias.second; + } + + OperatorAttributePattern linear_pattern = OperatorAttributePattern{{ + op_type_equals_constraint(OperatorType::LINEAR), + op_attr_key_equals(OperatorAttributeKey::BIAS, + OperatorAttributeValue{use_bias}), + op_attr_key_divisible_by(OperatorAttributeKey::OUT_CHANNELS, + nonnegative_int{degree}), + }}; + + PatternValue p_linear_output = get_only(b.add_pattern_node( + linear_pattern, + p_inputs, + {tensor_attr_pattern_require_num_dims(nonnegative_int{num_dims})}, + "linear")); + + OutputOperatorAttrsAssignment replicate_input_expr = + OutputOperatorAttrsAssignment{ + std::nullopt, + { + set_op_type_attr(OperatorType::REPLICATE), + set_attr_to_constant(OperatorAttributeKey::PARALLEL_DEGREE, + OperatorAttributeValue{degree}), + }}; + OutputGraphExprValue o_replicate_input_output = + get_only(b.add_output_graph_node(replicate_input_expr, {o_input}, 1_n)); + + OutputOperatorAttrsAssignment partition_weights_expr = + OutputOperatorAttrsAssignment{ + std::nullopt, + { + set_op_type_attr(OperatorType::REPARTITION), + set_attr_to_constant(OperatorAttributeKey::PARALLEL_DEGREE, + OperatorAttributeValue{degree}), + set_attr_to_constant(OperatorAttributeKey::PARALLEL_DIM, + OperatorAttributeValue{ff_dim_t{1_n}}), + }}; + OutputGraphExprValue o_partition_weights_output = get_only( + b.add_output_graph_node(partition_weights_expr, {o_weight}, 1_n)); + + std::vector o_linear_inputs = { + o_replicate_input_output, o_partition_weights_output}; + + if (use_bias) { + OutputOperatorAttrsAssignment partition_bias_expr = + OutputOperatorAttrsAssignment{ + std::nullopt, + { + set_op_type_attr(OperatorType::REPARTITION), + set_attr_to_constant(OperatorAttributeKey::PARALLEL_DEGREE, + OperatorAttributeValue{degree}), + set_attr_to_constant(OperatorAttributeKey::PARALLEL_DIM, + OperatorAttributeValue{ff_dim_t{1_n}}), + }}; + OutputGraphExprValue o_partition_bias_output = get_only( + b.add_output_graph_node(partition_bias_expr, {o_bias.value()}, 1_n)); + o_linear_inputs.push_back(o_partition_bias_output); + } + + OutputOperatorAttrsAssignment linear_expr = OutputOperatorAttrsAssignment{ + b.pattern_node_named("linear"), + {}, + }; + OutputGraphExprValue o_linear_output = + get_only(b.add_output_graph_node(linear_expr, o_linear_inputs, 1_n)); + + OutputOperatorAttrsAssignment combine_expr = OutputOperatorAttrsAssignment{ + std::nullopt, + { + set_op_type_attr(OperatorType::COMBINE), + set_attr_to_constant(OperatorAttributeKey::PARALLEL_DEGREE, + OperatorAttributeValue{degree}), + set_attr_to_constant( + OperatorAttributeKey::PARALLEL_DIM, + OperatorAttributeValue{ff_dim_t{ + nonnegative_int{num_dims.unwrap_nonnegative() - 1}, + }}), + }, + }; + OutputGraphExprValue o_combine_output = + get_only(b.add_output_graph_node(combine_expr, {o_linear_output}, 1_n)); + + b.equate_outputs(p_linear_output, o_combine_output); + + return b.get_substitution(); +} + +Substitution create_partition_linear_combine(nonnegative_int num_dims, + nonnegative_int degree, + Activation activation, + bool use_bias) { + NOT_IMPLEMENTED(); +} + +Substitution create_partition_conv2d_combine(nonnegative_int num_dims, + nonnegative_int degree) { + NOT_IMPLEMENTED(); +} + +Substitution create_partition_attention_combine(nonnegative_int num_heads, + nonnegative_int degree) { + NOT_IMPLEMENTED(); +} + +Substitution create_replicate_attention_reduce(nonnegative_int num_heads, + nonnegative_int degree) { + NOT_IMPLEMENTED(); +} + +Substitution create_partition_add_combine(ff_dim_t parallel_dim, + nonnegative_int degree) { + NOT_IMPLEMENTED(); +} + +Substitution create_partition_relu_combine(ff_dim_t parallel_dim, + nonnegative_int degree) { + NOT_IMPLEMENTED(); +} + +Substitution create_partition_concat_combine(nonnegative_int num_inputs, + ff_dim_t concat_dim, + ff_dim_t parallel_dim, + nonnegative_int degree) { + NOT_IMPLEMENTED(); +} + +Substitution create_partition_softmax_combine(ff_dim_t softmax_dim, + ff_dim_t partition_dim, + nonnegative_int degree) { + NOT_IMPLEMENTED(); +} + +Substitution create_fuse_linear_activation(Activation activation) { + SubstitutionBuilder b; + + auto [p_input, o_input] = + b.add_input(tensor_attribute_pattern_match_all(), "input"); + auto [p_weight, o_weight] = + b.add_input(tensor_attribute_pattern_match_all(), "weight"); + + OperatorAttributePattern mm_pattern = OperatorAttributePattern{{ + op_type_equals_constraint(OperatorType::LINEAR), + op_attr_key_equals( + OperatorAttributeKey::ACTIVATION, + OperatorAttributeValue{std::optional{std::nullopt}}), + }}; + PatternValue p_mm_output = + get_only(b.add_pattern_node(mm_pattern, + {p_input, p_weight}, + {tensor_attribute_pattern_match_all()}, + "mm")); + + OperatorAttributePattern relu_pattern = OperatorAttributePattern{{ + op_type_equals_constraint(OperatorType::RELU), + }}; + PatternValue p_relu_output = + get_only(b.add_pattern_node(relu_pattern, + {p_mm_output}, + {tensor_attribute_pattern_match_all()}, + "relu")); + + OutputOperatorAttrsAssignment fused_node_expr = OutputOperatorAttrsAssignment{ + b.pattern_node_named("mm"), + { + set_attr_to_constant(OperatorAttributeKey::ACTIVATION, + OperatorAttributeValue{activation}), + }}; + OutputGraphExprValue o_fused_node_output = get_only( + b.add_output_graph_node(fused_node_expr, {o_input, o_weight}, 1_n)); + + b.equate_outputs(p_relu_output, o_fused_node_output); + + return b.get_substitution(); +} + +} // namespace FlexFlow diff --git a/lib/substitutions/src/substitutions/unlabelled/input_pattern_edge.cc b/lib/substitutions/src/substitutions/unlabelled/input_pattern_edge.cc index e8deacebec..dff600ecf0 100644 --- a/lib/substitutions/src/substitutions/unlabelled/input_pattern_edge.cc +++ b/lib/substitutions/src/substitutions/unlabelled/input_pattern_edge.cc @@ -11,7 +11,7 @@ PatternNode get_dst_node(InputPatternEdge const &e) { return PatternNode{e.raw_edge.dst.node}; } -int get_dst_idx(InputPatternEdge const &e) { +nonnegative_int get_dst_idx(InputPatternEdge const &e) { return e.raw_edge.dst.idx; } diff --git a/lib/substitutions/src/substitutions/unlabelled/pattern_node_output.cc b/lib/substitutions/src/substitutions/unlabelled/pattern_node_output.cc index 9abdc4e83c..24bbb6f4d1 100644 --- a/lib/substitutions/src/substitutions/unlabelled/pattern_node_output.cc +++ b/lib/substitutions/src/substitutions/unlabelled/pattern_node_output.cc @@ -6,7 +6,7 @@ PatternNode get_src_node(PatternNodeOutput const &o) { return PatternNode{o.raw_dataflow_output.node}; } -int get_idx(PatternNodeOutput const &o) { +nonnegative_int get_idx(PatternNodeOutput const &o) { return o.raw_dataflow_output.idx; } diff --git a/lib/substitutions/src/substitutions/unlabelled/standard_pattern_edge.cc b/lib/substitutions/src/substitutions/unlabelled/standard_pattern_edge.cc index dea3e5f500..17d05f1122 100644 --- a/lib/substitutions/src/substitutions/unlabelled/standard_pattern_edge.cc +++ b/lib/substitutions/src/substitutions/unlabelled/standard_pattern_edge.cc @@ -10,11 +10,11 @@ PatternNode get_dst_node(StandardPatternEdge const &e) { return PatternNode{e.raw_edge.dst.node}; } -int get_src_idx(StandardPatternEdge const &e) { +nonnegative_int get_src_idx(StandardPatternEdge const &e) { return e.raw_edge.src.idx; } -int get_dst_idx(StandardPatternEdge const &e) { +nonnegative_int get_dst_idx(StandardPatternEdge const &e) { return e.raw_edge.dst.idx; } diff --git a/lib/substitutions/test/src/substitutions/apply_substitution/apply_substitution.cc b/lib/substitutions/test/src/substitutions/apply_substitution/apply_substitution.cc new file mode 100644 index 0000000000..5fd923f71f --- /dev/null +++ b/lib/substitutions/test/src/substitutions/apply_substitution/apply_substitution.cc @@ -0,0 +1,174 @@ +#include "substitutions/apply_substitution/apply_substitution.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h" +#include "substitutions/operator_pattern/operator_attribute_constraint.h" +#include "substitutions/output_graph/output_operator_attrs_assignment.h" +#include "substitutions/sub_parallel_computation_graph.h" +#include "substitutions/substitution_builder.h" +#include "substitutions/tensor_pattern/tensor_attribute_pattern.h" +#include "utils/containers/get_only.h" +#include "utils/integer_conversions.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("apply_substitution") { + SubstitutionBuilder b; + + auto [p_input, o_input] = + b.add_input(tensor_attribute_pattern_match_all(), "input"); + auto [p_weight, o_weight] = + b.add_input(tensor_attribute_pattern_match_all(), "weight"); + + PatternValue p_mm_output = [&] { + auto pattern = OperatorAttributePattern{{ + op_type_equals_constraint(OperatorType::LINEAR), + op_attr_key_equals( + OperatorAttributeKey::ACTIVATION, + OperatorAttributeValue{std::optional{std::nullopt}}), + }}; + + return get_only(b.add_pattern_node(pattern, + {p_input, p_weight}, + {tensor_attribute_pattern_match_all()}, + "mm")); + }(); + + PatternValue p_relu_output = [&] { + auto pattern = OperatorAttributePattern{{ + op_type_equals_constraint(OperatorType::RELU), + }}; + + return get_only(b.add_pattern_node(pattern, + {p_mm_output}, + {tensor_attribute_pattern_match_all()}, + "relu")); + }(); + + OutputGraphExprValue o_fused_output = [&] { + auto node_expr = OutputOperatorAttrsAssignment{ + b.pattern_node_named("mm"), + { + set_attr_to_constant(OperatorAttributeKey::ACTIVATION, + OperatorAttributeValue{Activation::RELU}), + }}; + + return get_only( + b.add_output_graph_node(node_expr, {o_input, o_weight}, 1_n)); + }(); + + b.equate_outputs(p_relu_output, o_fused_output); + + Substitution sub = b.get_substitution(); + + nonnegative_int in_channels = 24_n; + nonnegative_int batch_size = 4_n; + nonnegative_int batch_degree = 2_n; + std::string mm_match = "mm_match"; + std::string relu_match = "relu_match"; + + SubParallelComputationGraph pcg = [&] { + ParallelComputationGraphBuilder b; + parallel_tensor_guid_t t = b.create_input_tensor(ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{batch_size, batch_degree}, + ShardParallelDim{in_channels, 1_n}, + }, + ReplicaParallelDimSet{ + SumDegree{1_n}, + DiscardCopyDegree{1_n}, + }, + }, + DataType::FLOAT, + }); + t = b.dense(t, + /*outDim=*/16_n, + /*activation=*/std::nullopt); + t = b.gelu(t); + t = b.dense(t, + /*outDim=*/12_n, + /*activation=*/std::nullopt, + /*use_bias=*/false, + /*data_type=*/DataType::FLOAT, + /*kernel_initializer=*/std::nullopt, + /*bias_initializer=*/std::nullopt, + /*name=*/mm_match); + t = b.relu(t, + /*name=*/relu_match); + t = b.dense(t, + /*outDim=*/8_n, + /*activation=*/Activation::RELU); + + return sub_pcg_from_full_pcg(b.pcg); + }(); + + PCGPatternMatch match = [&] { + parallel_layer_guid_t mm_match_layer = + get_parallel_layer_by_name(pcg, mm_match); + parallel_layer_guid_t relu_match_layer = + get_parallel_layer_by_name(pcg, relu_match); + open_parallel_tensor_guid_t mm_match_layer_input_activations = + get_layer_inputs(pcg, mm_match_layer).at(0); + open_parallel_tensor_guid_t mm_match_layer_input_weights = + get_layer_inputs(pcg, mm_match_layer).at(1); + + return PCGPatternMatch{ + bidict{ + {b.pattern_node_named("mm"), mm_match_layer}, + {b.pattern_node_named("relu"), relu_match_layer}, + }, + std::unordered_map{ + { + b.pattern_input_named("input"), + mm_match_layer_input_activations, + }, + { + b.pattern_input_named("weight"), + mm_match_layer_input_weights, + }}, + }; + }(); + + SubParallelComputationGraph result = apply_substitution(pcg, sub, match); + + SubParallelComputationGraph correct = [&] { + ParallelComputationGraphBuilder b; + parallel_tensor_guid_t t = b.create_input_tensor(ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{batch_size, batch_degree}, + ShardParallelDim{in_channels, 1_n}, + }, + ReplicaParallelDimSet{ + SumDegree{1_n}, + DiscardCopyDegree{1_n}, + }, + }, + DataType::FLOAT, + }); + t = b.dense(t, + /*outDim=*/16_n, + /*activation=*/std::nullopt); + t = b.gelu(t); + t = b.dense(t, + /*outDim=*/12_n, + /*activation=*/Activation::RELU, + /*use_bias=*/false, + /*data_type=*/DataType::FLOAT, + /*kernel_initializer=*/std::nullopt, + /*bias_initializer=*/std::nullopt, + /*name=*/std::nullopt); + t = b.dense(t, + /*outDim=*/8_n, + /*activation=*/Activation::RELU); + + return sub_pcg_from_full_pcg(b.pcg); + }(); + + // since the new nodes produced by the substitution have new ids, it's + // easier/more correct to check that the graphs are isomorphic rather than + // checking their exact graph data + CHECK(sub_pcgs_are_isomorphic(result, correct)); + } +} diff --git a/lib/substitutions/test/src/substitutions/substitution_internal/evaluate_substitution_output.cc b/lib/substitutions/test/src/substitutions/apply_substitution/evaluate_substitution_output.cc similarity index 86% rename from lib/substitutions/test/src/substitutions/substitution_internal/evaluate_substitution_output.cc rename to lib/substitutions/test/src/substitutions/apply_substitution/evaluate_substitution_output.cc index 52b54b32fb..7bdcc5a3bd 100644 --- a/lib/substitutions/test/src/substitutions/substitution_internal/evaluate_substitution_output.cc +++ b/lib/substitutions/test/src/substitutions/apply_substitution/evaluate_substitution_output.cc @@ -1,4 +1,4 @@ -#include "substitutions/substitution_internal/evaluate_substitution_output.h" +#include "substitutions/apply_substitution/evaluate_substitution_output.h" #include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h" #include "substitutions/open_parallel_tensor_guid_t.h" #include "substitutions/operator_pattern/operator_attribute_constraint.h" @@ -64,20 +64,23 @@ TEST_SUITE(FF_TEST_SUITE) { OutputGraphExprInput{output_g.add_input({})}; OutputOperatorAttrsAssignment fused_mm_relu_attrs_assignment = - OutputOperatorAttrsAssignment{{ - set_attr_to_constant(OperatorAttributeKey::OP_TYPE, - OperatorAttributeValue{OperatorType::LINEAR}), - copy_attr_from_pattern_node(OperatorAttributeKey::OUT_CHANNELS, - pattern_mm_node), - copy_attr_from_pattern_node(OperatorAttributeKey::USE_BIAS, - pattern_mm_node), - copy_attr_from_pattern_node(OperatorAttributeKey::DATA_TYPE, - pattern_mm_node), - set_attr_to_constant(OperatorAttributeKey::ACTIVATION, - OperatorAttributeValue{Activation::RELU}), - copy_attr_from_pattern_node(OperatorAttributeKey::REGULARIZER, - pattern_mm_node), - }}; + OutputOperatorAttrsAssignment{ + std::nullopt, + { + set_attr_to_constant( + OperatorAttributeKey::OP_TYPE, + OperatorAttributeValue{OperatorType::LINEAR}), + copy_attr_from_pattern_node(OperatorAttributeKey::OUT_CHANNELS, + pattern_mm_node), + copy_attr_from_pattern_node(OperatorAttributeKey::USE_BIAS, + pattern_mm_node), + copy_attr_from_pattern_node(OperatorAttributeKey::DATA_TYPE, + pattern_mm_node), + set_attr_to_constant(OperatorAttributeKey::ACTIVATION, + OperatorAttributeValue{Activation::RELU}), + copy_attr_from_pattern_node(OperatorAttributeKey::REGULARIZER, + pattern_mm_node), + }}; NodeAddedResult fused_mm_relu_added = output_g.add_node( fused_mm_relu_attrs_assignment, {OpenDataflowValue{output_i_activation.raw_dataflow_graph_input}, @@ -108,9 +111,9 @@ TEST_SUITE(FF_TEST_SUITE) { }, }; - int in_channels = 24; - int batch_size = 4; - int batch_degree = 2; + nonnegative_int in_channels = 24_n; + nonnegative_int batch_size = 4_n; + nonnegative_int batch_degree = 2_n; std::string mm_match = "mm_match"; std::string relu_match = "relu_match"; @@ -119,22 +122,22 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_tensor_guid_t t = b.create_input_tensor(ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{size_t_from_int(batch_size), batch_degree}, - ShardParallelDim{size_t_from_int(in_channels), 1}, + ShardParallelDim{batch_size, batch_degree}, + ShardParallelDim{in_channels, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, }); t = b.dense(t, - /*outDim=*/16, + /*outDim=*/16_n, /*activation=*/std::nullopt); t = b.gelu(t); t = b.dense(t, - /*outDim=*/12, + /*outDim=*/12_n, /*activation=*/std::nullopt, /*use_bias=*/false, /*data_type=*/DataType::FLOAT, @@ -144,7 +147,7 @@ TEST_SUITE(FF_TEST_SUITE) { t = b.relu(t, /*name=*/relu_match); t = b.dense(t, - /*outDim=*/8, + /*outDim=*/8_n, /*activation=*/Activation::RELU); return sub_pcg_from_full_pcg(b.pcg); @@ -186,10 +189,10 @@ TEST_SUITE(FF_TEST_SUITE) { result_input_map = result.second.input_mapping; LinearAttrs correct_result_fused_mm_relu_attrs = LinearAttrs{ - 12, + /*out_channels=*/12_n, /*use_bias=*/false, - DataType::FLOAT, - Activation::RELU, + /*data_type=*/DataType::FLOAT, + /*activation=*/Activation::RELU, /*regularizer=*/std::nullopt, }; @@ -228,7 +231,7 @@ TEST_SUITE(FF_TEST_SUITE) { result_i_activation.raw_dataflow_graph_input, DataflowInput{ result_fused_mm_relu_node.raw_graph_node, - 0, + 0_n, }, }, }, @@ -239,7 +242,7 @@ TEST_SUITE(FF_TEST_SUITE) { result_i_weights.raw_dataflow_graph_input, DataflowInput{ result_fused_mm_relu_node.raw_graph_node, - 1, + 1_n, }, }, }, diff --git a/lib/substitutions/test/src/substitutions/substitution_internal/perform_shape_inference.cc b/lib/substitutions/test/src/substitutions/apply_substitution/perform_shape_inference.cc similarity index 78% rename from lib/substitutions/test/src/substitutions/substitution_internal/perform_shape_inference.cc rename to lib/substitutions/test/src/substitutions/apply_substitution/perform_shape_inference.cc index 4d4e557fb8..950e833771 100644 --- a/lib/substitutions/test/src/substitutions/substitution_internal/perform_shape_inference.cc +++ b/lib/substitutions/test/src/substitutions/apply_substitution/perform_shape_inference.cc @@ -1,4 +1,4 @@ -#include "substitutions/substitution_internal/perform_shape_inference.h" +#include "substitutions/apply_substitution/perform_shape_inference.h" #include "op-attrs/ops/element_unary.h" #include "op-attrs/ops/linear.h" #include "op-attrs/parallel_tensor_shape.h" @@ -18,21 +18,21 @@ TEST_SUITE(FF_TEST_SUITE) { UnorderedSetLabelledOpenDataflowGraph>(); - int in_channels = 24; - int out_channels = 16; - int batch_size = 4; - int batch_degree = 2; + nonnegative_int in_channels = 24_n; + nonnegative_int out_channels = 16_n; + nonnegative_int batch_size = 4_n; + nonnegative_int batch_degree = 2_n; DataflowGraphInput i0 = g.add_input({}); ParallelTensorShape i0_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ - ShardParallelDim{size_t_from_int(batch_size), batch_degree}, - ShardParallelDim{size_t_from_int(in_channels), 1}, + ShardParallelDim{batch_size, batch_degree}, + ShardParallelDim{in_channels, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -40,28 +40,28 @@ TEST_SUITE(FF_TEST_SUITE) { bool use_bias = false; LinearAttrs n1_op_attrs = LinearAttrs{ - out_channels, - use_bias, - DataType::FLOAT, - std::nullopt, - std::nullopt, + /*out_channels=*/out_channels, + /*use_bias=*/use_bias, + /*data_type=*/DataType::FLOAT, + /*activation=*/std::nullopt, + /*regularizer=*/std::nullopt, }; ParallelLayerAttrs n1_attrs = ParallelLayerAttrs{ - PCGOperatorAttrs{ + /*op_attrs=*/PCGOperatorAttrs{ n1_op_attrs, }, - std::nullopt, + /*name=*/std::nullopt, }; ElementUnaryAttrs n2_op_attrs = ElementUnaryAttrs{ - OperatorType::RELU, - std::nullopt, + /*op_type=*/OperatorType::RELU, + /*scalar=*/std::nullopt, }; ParallelLayerAttrs n2_attrs = ParallelLayerAttrs{ - PCGOperatorAttrs{ + /*op_attrs=*/PCGOperatorAttrs{ n2_op_attrs, }, - std::nullopt, + /*name=*/std::nullopt, }; ParallelTensorShape n1_output_shape = @@ -131,22 +131,22 @@ TEST_SUITE(FF_TEST_SUITE) { OpenDataflowEdge{ DataflowInputEdge{ i0, - DataflowInput{n1, 0}, + DataflowInput{n1, 0_n}, }, }, OpenDataflowEdge{DataflowEdge{ - DataflowOutput{n1_weight_node, 0}, - DataflowInput{n1_weight_replicate_node, 0}, + DataflowOutput{n1_weight_node, 0_n}, + DataflowInput{n1_weight_replicate_node, 0_n}, }}, OpenDataflowEdge{ DataflowEdge{ - DataflowOutput{n1_weight_replicate_node, 0}, - DataflowInput{n1, 1}, + DataflowOutput{n1_weight_replicate_node, 0_n}, + DataflowInput{n1, 1_n}, }, }, OpenDataflowEdge{DataflowEdge{ - DataflowOutput{n1, 0}, - DataflowInput{n2, 0}, + DataflowOutput{n1, 0_n}, + DataflowInput{n2, 0_n}, }}, }, {i0}, @@ -155,19 +155,20 @@ TEST_SUITE(FF_TEST_SUITE) { i0_shape, }, { - OpenDataflowValue{DataflowOutput{n1_weight_node, 0}}, + OpenDataflowValue{DataflowOutput{n1_weight_node, 0_n}}, lift_to_parallel(get_reduced_shape(n1_weight_shape)), }, { - OpenDataflowValue{DataflowOutput{n1_weight_replicate_node, 0}}, + OpenDataflowValue{ + DataflowOutput{n1_weight_replicate_node, 0_n}}, n1_weight_shape, }, { - OpenDataflowValue{DataflowOutput{n1, 0}}, + OpenDataflowValue{DataflowOutput{n1, 0_n}}, n1_output_shape, }, { - OpenDataflowValue{DataflowOutput{n2, 0}}, + OpenDataflowValue{DataflowOutput{n2, 0_n}}, n2_output_shape, }}}; diff --git a/lib/substitutions/test/src/substitutions/operator_pattern/get_attribute.cc b/lib/substitutions/test/src/substitutions/operator_pattern/get_attribute.cc index 95b61e0ef4..24f9e9bd56 100644 --- a/lib/substitutions/test/src/substitutions/operator_pattern/get_attribute.cc +++ b/lib/substitutions/test/src/substitutions/operator_pattern/get_attribute.cc @@ -6,7 +6,7 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_attribute(LinearAttrs, OperatorAttributeKey)") { - int out_channels = 16; + nonnegative_int out_channels = 16_n; bool use_bias = true; std::optional activation = Activation::GELU; std::optional regularizer = RegularizerAttrs{ diff --git a/lib/substitutions/test/src/substitutions/pcg_pattern.cc b/lib/substitutions/test/src/substitutions/pcg_pattern.cc index d9273b4bcf..9ff368a8eb 100644 --- a/lib/substitutions/test/src/substitutions/pcg_pattern.cc +++ b/lib/substitutions/test/src/substitutions/pcg_pattern.cc @@ -15,19 +15,19 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("find_pattern_matches(PCGPattern, SubParallelComputationGraph)") { ParallelComputationGraphBuilder builder; - size_t batch_size = 16; - int batch_degree = 2; - size_t num_channels = 24; + nonnegative_int batch_size = 16_n; + nonnegative_int batch_degree = 2_n; + nonnegative_int num_channels = 24_n; ParallelTensorShape a_shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ ShardParallelDim{batch_size, batch_degree}, - ShardParallelDim{num_channels, 1}, + ShardParallelDim{num_channels, 1_n}, }, ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, + SumDegree{1_n}, + DiscardCopyDegree{1_n}, }, }, DataType::FLOAT, @@ -37,7 +37,7 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_tensor_guid_t a_tensor = builder.create_input_tensor(a_shape, CreateGrad::YES, a_name); - int outDim = 16; + nonnegative_int outDim = 16_n; std::string x_matmul_name = "x_matmul"; std::string y_matmul_name = "y_matmul"; parallel_tensor_guid_t t0 = diff --git a/lib/substitutions/test/src/substitutions/substitution.cc b/lib/substitutions/test/src/substitutions/substitution.cc index 1718b03b5c..ef27cb7606 100644 --- a/lib/substitutions/test/src/substitutions/substitution.cc +++ b/lib/substitutions/test/src/substitutions/substitution.cc @@ -4,226 +4,173 @@ #include "substitutions/operator_pattern/operator_attribute_constraint.h" #include "substitutions/output_graph/output_graph_expr_node.dtg.h" #include "substitutions/output_graph/output_operator_attrs_assignment.h" +#include "substitutions/pcg_pattern.h" #include "substitutions/pcg_pattern_builder.h" #include "substitutions/sub_parallel_computation_graph.h" +#include "substitutions/substitution_builder.h" #include "substitutions/tensor_pattern/tensor_attribute_pattern.h" #include "utils/containers/get_only.h" #include "utils/graph/instances/unordered_set_labelled_open_dataflow_graph.h" #include "utils/graph/labelled_open_dataflow_graph/algorithms/get_graph_data.h" +#include "utils/graph/open_dataflow_graph/algorithms/are_isomorphic.h" #include "utils/integer_conversions.h" #include using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { - // TEST_CASE("is_valid_substitution") { - // FAIL("TODO"); - // } - - TEST_CASE("evaluate_substitution_output(SubParallelComputationGraph, " - "Substitution, PCGPatternMatch)") { - // Currently Substitution creation is very verbose. - // This is being addressed in - // https://github.com/flexflow/FlexFlow/issues/1473. - auto pattern_g = LabelledOpenDataflowGraph:: - create>(); - - PatternInput pattern_i_activation = - PatternInput{pattern_g.add_input(tensor_attribute_pattern_match_all())}; - PatternInput pattern_i_weights = - PatternInput{pattern_g.add_input(tensor_attribute_pattern_match_all())}; - - OperatorAttributePattern mm_pattern = OperatorAttributePattern{{ - op_type_equals_constraint(OperatorType::LINEAR), - op_attr_key_equals( - OperatorAttributeKey::ACTIVATION, - OperatorAttributeValue{std::optional{std::nullopt}}), - }}; - NodeAddedResult mm_added = pattern_g.add_node( - mm_pattern, - {OpenDataflowValue{pattern_i_activation.raw_dataflow_graph_input}, - OpenDataflowValue{pattern_i_weights.raw_dataflow_graph_input}}, - {tensor_attribute_pattern_match_all()}); - PatternNode pattern_mm_node = PatternNode{mm_added.node}; - DataflowOutput mm_output = get_only(mm_added.outputs); - - OperatorAttributePattern relu_pattern = OperatorAttributePattern{{ - op_type_equals_constraint(OperatorType::RELU), - }}; - NodeAddedResult relu_added = - pattern_g.add_node(relu_pattern, - {OpenDataflowValue{mm_output}}, - {tensor_attribute_pattern_match_all()}); - PatternNode pattern_relu_node = PatternNode{relu_added.node}; - DataflowOutput relu_output = get_only(relu_added.outputs); - - LabelledOpenDataflowGraph - output_g = LabelledOpenDataflowGraph:: - create>(); - - OutputGraphExprInput output_i_activation = - OutputGraphExprInput{output_g.add_input({})}; - OutputGraphExprInput output_i_weights = - OutputGraphExprInput{output_g.add_input({})}; - - OutputOperatorAttrsAssignment fused_mm_relu_attrs_assignment = - OutputOperatorAttrsAssignment{{ - set_attr_to_constant(OperatorAttributeKey::OP_TYPE, - OperatorAttributeValue{OperatorType::LINEAR}), - copy_attr_from_pattern_node(OperatorAttributeKey::OUT_CHANNELS, - pattern_mm_node), - copy_attr_from_pattern_node(OperatorAttributeKey::USE_BIAS, - pattern_mm_node), - copy_attr_from_pattern_node(OperatorAttributeKey::DATA_TYPE, - pattern_mm_node), - set_attr_to_constant(OperatorAttributeKey::ACTIVATION, - OperatorAttributeValue{Activation::RELU}), - copy_attr_from_pattern_node(OperatorAttributeKey::REGULARIZER, - pattern_mm_node), + TEST_CASE("is_isomorphic_to(Substitution, Substitution)") { + auto make_substitution = [] { + SubstitutionBuilder b; + + auto [p_input, o_input] = + b.add_input(tensor_attribute_pattern_match_all()); + auto [p_weight, o_weight] = + b.add_input(tensor_attribute_pattern_match_all()); + + PatternValue p_mm_output = [&] { + auto pattern = OperatorAttributePattern{{ + op_type_equals_constraint(OperatorType::LINEAR), + op_attr_key_equals(OperatorAttributeKey::ACTIVATION, + OperatorAttributeValue{ + std::optional{std::nullopt}}), }}; - NodeAddedResult fused_mm_relu_added = output_g.add_node( - fused_mm_relu_attrs_assignment, - {OpenDataflowValue{output_i_activation.raw_dataflow_graph_input}, - OpenDataflowValue{output_i_weights.raw_dataflow_graph_input}}, - {{}}); - OutputGraphExprNode fused_mm_relu_node = - OutputGraphExprNode{fused_mm_relu_added.node}; - DataflowOutput fused_mm_relu_output = get_only(fused_mm_relu_added.outputs); - - Substitution sub = Substitution{ - PCGPattern{pattern_g}, - OutputGraphExpr{output_g}, - bidict{ - { - pattern_i_activation, - output_i_activation, - }, - { - pattern_i_weights, - output_i_weights, - }, - }, - bidict{ + + return get_only( + b.add_pattern_node(pattern, + {p_input, p_weight}, + {tensor_attribute_pattern_match_all()}, + "mm")); + }(); + + PatternValue p_relu_output = [&] { + auto pattern = OperatorAttributePattern{{ + op_type_equals_constraint(OperatorType::RELU), + }}; + + return get_only( + b.add_pattern_node(pattern, + {p_mm_output}, + {tensor_attribute_pattern_match_all()}, + "relu")); + }(); + + OutputGraphExprValue o_fused_output = [&] { + auto node_expr = OutputOperatorAttrsAssignment{ + b.pattern_node_named("mm"), { - PatternNodeOutput{relu_output}, - OutputGraphExprNodeOutput{fused_mm_relu_output}, - }, - }, + set_attr_to_constant(OperatorAttributeKey::ACTIVATION, + OperatorAttributeValue{Activation::RELU}), + }}; + + return get_only(b.add_output_graph_node( + node_expr, {o_input, o_weight}, nonnegative_int{1})); + }(); + + b.equate_outputs(p_relu_output, o_fused_output); + + return b.get_substitution(); }; - int in_channels = 24; - int batch_size = 4; - int batch_degree = 2; - std::string mm_match = "mm_match"; - std::string relu_match = "relu_match"; - - SubParallelComputationGraph pcg = [&] { - ParallelComputationGraphBuilder b; - parallel_tensor_guid_t t = b.create_input_tensor(ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{size_t_from_int(batch_size), batch_degree}, - ShardParallelDim{size_t_from_int(in_channels), 1}, - }, - ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, - }, - }, - DataType::FLOAT, - }); - t = b.dense(t, - /*outDim=*/16, - /*activation=*/std::nullopt); - t = b.gelu(t); - t = b.dense(t, - /*outDim=*/12, - /*activation=*/std::nullopt, - /*use_bias=*/false, - /*data_type=*/DataType::FLOAT, - /*kernel_initializer=*/std::nullopt, - /*bias_initializer=*/std::nullopt, - /*name=*/mm_match); - t = b.relu(t, - /*name=*/relu_match); - t = b.dense(t, - /*outDim=*/8, - /*activation=*/Activation::RELU); - - return sub_pcg_from_full_pcg(b.pcg); + Substitution sub1 = make_substitution(); + Substitution sub2 = make_substitution(); + + CHECK(is_isomorphic_to(sub1, sub1)); + CHECK(is_isomorphic_to(sub1, sub2)); + } + + TEST_CASE("is_valid_substitution") { + SubstitutionBuilder b; + + auto [p_input, o_input] = b.add_input(tensor_attribute_pattern_match_all()); + auto [p_weight, o_weight] = + b.add_input(tensor_attribute_pattern_match_all()); + + PatternValue p_mm_output = [&] { + auto pattern = OperatorAttributePattern{{ + op_type_equals_constraint(OperatorType::LINEAR), + op_attr_key_equals( + OperatorAttributeKey::ACTIVATION, + OperatorAttributeValue{std::optional{std::nullopt}}), + }}; + + return get_only(b.add_pattern_node(pattern, + {p_input, p_weight}, + {tensor_attribute_pattern_match_all()}, + "mm")); }(); - PCGPatternMatch match = [&] { - parallel_layer_guid_t mm_match_layer = - get_parallel_layer_by_name(pcg, mm_match); - parallel_layer_guid_t relu_match_layer = - get_parallel_layer_by_name(pcg, relu_match); - open_parallel_tensor_guid_t mm_match_layer_input_activations = - get_layer_inputs(pcg, mm_match_layer).at(0); - open_parallel_tensor_guid_t mm_match_layer_input_weights = - get_layer_inputs(pcg, mm_match_layer).at(1); - - return PCGPatternMatch{ - bidict{ - {pattern_mm_node, mm_match_layer}, - {pattern_relu_node, relu_match_layer}, - }, - std::unordered_map{ - { - PatternInput{pattern_i_activation}, - mm_match_layer_input_activations, - }, - { - PatternInput{pattern_i_weights}, - mm_match_layer_input_weights, - }}, - }; + PatternValue p_relu_output = [&] { + auto pattern = OperatorAttributePattern{{ + op_type_equals_constraint(OperatorType::RELU), + }}; + + return get_only(b.add_pattern_node(pattern, + {p_mm_output}, + {tensor_attribute_pattern_match_all()}, + "relu")); }(); - SubParallelComputationGraph result = apply_substitution(pcg, sub, match); - - SubParallelComputationGraph correct = [&] { - ParallelComputationGraphBuilder b; - parallel_tensor_guid_t t = b.create_input_tensor(ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{size_t_from_int(batch_size), batch_degree}, - ShardParallelDim{size_t_from_int(in_channels), 1}, - }, - ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, - }, - }, - DataType::FLOAT, - }); - t = b.dense(t, - /*outDim=*/16, - /*activation=*/std::nullopt); - t = b.gelu(t); - t = b.dense(t, - /*outDim=*/12, - /*activation=*/Activation::RELU, - /*use_bias=*/false, - /*data_type=*/DataType::FLOAT, - /*kernel_initializer=*/std::nullopt, - /*bias_initializer=*/std::nullopt, - /*name=*/std::nullopt); - t = b.dense(t, - /*outDim=*/8, - /*activation=*/Activation::RELU); - - return sub_pcg_from_full_pcg(b.pcg); + OutputGraphExprValue o_fused_output = [&] { + auto node_expr = OutputOperatorAttrsAssignment{ + b.pattern_node_named("mm"), + { + set_attr_to_constant(OperatorAttributeKey::ACTIVATION, + OperatorAttributeValue{Activation::RELU}), + }}; + + return get_only(b.add_output_graph_node( + node_expr, {o_input, o_weight}, nonnegative_int{1})); }(); - // since the new nodes produced by the substitution have new ids, it's - // easier/more correct to check that the graphs are isomorphic rather than - // checking their exact graph data - CHECK(sub_pcgs_are_isomorphic(result, correct)); + b.equate_outputs(p_relu_output, o_fused_output); + + SUBCASE("pattern inputs != mapped inputs") { + Substitution sub = b.get_substitution(); + sub.pcg_pattern.raw_graph.add_input(tensor_attribute_pattern_match_all()); + CHECK_FALSE(is_valid_substitution(sub)); + } + + SUBCASE("output graph inputs != mapped inputs") { + Substitution sub = b.get_substitution(); + sub.output_graph_expr.raw_graph.add_input(std::monostate{}); + CHECK_FALSE(is_valid_substitution(sub)); + } + + SUBCASE("pattern has no nodes") { + // Could revamp this test to only trigger the + // get_nodes(sub.pcg_pattern).empty() case + Substitution sub = b.get_substitution(); + LabelledOpenDataflowGraph + zero_node_pattern = + LabelledOpenDataflowGraph:: + create>(); + sub.pcg_pattern = PCGPattern{zero_node_pattern}; + CHECK_FALSE(is_valid_substitution(sub)); + } + + SUBCASE("output graph has no nodes") { + // Could revamp this test to only trigger the + // get_nodes(sub.output_graph_expr).empty() case + Substitution sub = b.get_substitution(); + LabelledOpenDataflowGraph + zero_node_pattern = + LabelledOpenDataflowGraph:: + create>(); + sub.output_graph_expr = OutputGraphExpr{zero_node_pattern}; + CHECK_FALSE(is_valid_substitution(sub)); + } + + SUBCASE("valid substitution") { + Substitution sub = b.get_substitution(); + CHECK(is_valid_substitution(sub)); + } } } diff --git a/lib/substitutions/test/src/substitutions/substitution_builder.cc b/lib/substitutions/test/src/substitutions/substitution_builder.cc new file mode 100644 index 0000000000..028a4e59c9 --- /dev/null +++ b/lib/substitutions/test/src/substitutions/substitution_builder.cc @@ -0,0 +1,145 @@ +#include "substitutions/substitution_builder.h" +#include "substitutions/operator_pattern/operator_attribute_constraint.h" +#include "substitutions/output_graph/output_graph_expr_node.dtg.h" +#include "substitutions/output_graph/output_operator_attrs_assignment.h" +#include "substitutions/substitution.h" +#include "substitutions/tensor_pattern/tensor_attribute_pattern.h" +#include "utils/containers/get_only.h" +#include "utils/graph/instances/unordered_set_labelled_open_dataflow_graph.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("SubstitutionBuilder") { + OperatorAttributePattern relu_pattern = OperatorAttributePattern{{ + op_type_equals_constraint(OperatorType::RELU), + }}; + + OperatorAttributePattern mm_pattern = OperatorAttributePattern{{ + op_type_equals_constraint(OperatorType::LINEAR), + op_attr_key_equals( + OperatorAttributeKey::ACTIVATION, + OperatorAttributeValue{std::optional{std::nullopt}}), + }}; + + std::unordered_map + fused_mm_relu_attr_assignments = { + set_attr_to_constant(OperatorAttributeKey::ACTIVATION, + OperatorAttributeValue{Activation::RELU}), + }; + + Substitution correct = [&] { + auto pattern_g = LabelledOpenDataflowGraph:: + create< + UnorderedSetLabelledOpenDataflowGraph>(); + + PatternInput pattern_i_activation = PatternInput{ + pattern_g.add_input(tensor_attribute_pattern_match_all())}; + PatternInput pattern_i_weights = PatternInput{ + pattern_g.add_input(tensor_attribute_pattern_match_all())}; + + NodeAddedResult mm_added = pattern_g.add_node( + mm_pattern, + {OpenDataflowValue{pattern_i_activation.raw_dataflow_graph_input}, + OpenDataflowValue{pattern_i_weights.raw_dataflow_graph_input}}, + {tensor_attribute_pattern_match_all()}); + PatternNode pattern_mm_node = PatternNode{mm_added.node}; + DataflowOutput mm_output = get_only(mm_added.outputs); + + NodeAddedResult relu_added = + pattern_g.add_node(relu_pattern, + {OpenDataflowValue{mm_output}}, + {tensor_attribute_pattern_match_all()}); + PatternNode pattern_relu_node = PatternNode{relu_added.node}; + DataflowOutput relu_output = get_only(relu_added.outputs); + + LabelledOpenDataflowGraph + output_g = LabelledOpenDataflowGraph:: + create>(); + + OutputGraphExprInput output_i_activation = + OutputGraphExprInput{output_g.add_input({})}; + OutputGraphExprInput output_i_weights = + OutputGraphExprInput{output_g.add_input({})}; + + OutputOperatorAttrsAssignment fused_mm_relu_attrs_assignment = + OutputOperatorAttrsAssignment{ + pattern_mm_node, + fused_mm_relu_attr_assignments, + }; + NodeAddedResult fused_mm_relu_added = output_g.add_node( + fused_mm_relu_attrs_assignment, + {OpenDataflowValue{output_i_activation.raw_dataflow_graph_input}, + OpenDataflowValue{output_i_weights.raw_dataflow_graph_input}}, + {{}}); + OutputGraphExprNode fused_mm_relu_node = + OutputGraphExprNode{fused_mm_relu_added.node}; + DataflowOutput fused_mm_relu_output = + get_only(fused_mm_relu_added.outputs); + + return Substitution{ + PCGPattern{pattern_g}, + OutputGraphExpr{output_g}, + bidict{ + { + pattern_i_activation, + output_i_activation, + }, + { + pattern_i_weights, + output_i_weights, + }, + }, + bidict{ + { + PatternNodeOutput{relu_output}, + OutputGraphExprNodeOutput{fused_mm_relu_output}, + }, + }, + }; + }(); + + Substitution result = [&] { + SubstitutionBuilder b; + + auto [p_input, o_input] = + b.add_input(tensor_attribute_pattern_match_all()); + auto [p_weight, o_weight] = + b.add_input(tensor_attribute_pattern_match_all()); + + PatternValue p_mm_output = + get_only(b.add_pattern_node(mm_pattern, + {p_input, p_weight}, + {tensor_attribute_pattern_match_all()}, + "mm")); + + PatternValue p_relu_output = + get_only(b.add_pattern_node(relu_pattern, + {p_mm_output}, + {tensor_attribute_pattern_match_all()}, + "relu")); + + OutputOperatorAttrsAssignment fused_mm_relu_attrs_assignment = + OutputOperatorAttrsAssignment{ + b.pattern_node_named("mm"), + fused_mm_relu_attr_assignments, + }; + OutputGraphExprValue o_fused_output = + get_only(b.add_output_graph_node(fused_mm_relu_attrs_assignment, + {o_input, o_weight}, + nonnegative_int{1})); + + b.equate_outputs(p_relu_output, o_fused_output); + + return b.get_substitution(); + }(); + + CHECK(is_isomorphic_to(result, correct)); + } +} diff --git a/lib/substitutions/test/src/substitutions/unity_substitution_set.cc b/lib/substitutions/test/src/substitutions/unity_substitution_set.cc new file mode 100644 index 0000000000..804fa99bef --- /dev/null +++ b/lib/substitutions/test/src/substitutions/unity_substitution_set.cc @@ -0,0 +1,20 @@ +#include "substitutions/unity_substitution_set.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_substitution_set") { + MachineSpecification machine_spec = MachineSpecification{ + /*num_nodes=*/2_n, + /*num_cpus_per_node=*/8_n, + /*num_gpus_per_node=*/4_n, + /*inter_node_bandwidth=*/0.0, + /*intra_node_bandwidth=*/0.0, + }; + + std::vector result = get_substitution_set(machine_spec); + + CHECK(result.size() == 36); + } +} diff --git a/lib/substitutions/test/src/test_pattern_matches.cc b/lib/substitutions/test/src/substitutions/unlabelled/find_pattern_matches.cc similarity index 94% rename from lib/substitutions/test/src/test_pattern_matches.cc rename to lib/substitutions/test/src/substitutions/unlabelled/find_pattern_matches.cc index aeedd65f82..ab79ad6ff6 100644 --- a/lib/substitutions/test/src/test_pattern_matches.cc +++ b/lib/substitutions/test/src/substitutions/unlabelled/find_pattern_matches.cc @@ -9,7 +9,6 @@ #include "utils/graph/open_dataflow_graph/algorithms/get_subgraph.h" #include "utils/graph/open_dataflow_graph/algorithms/get_subgraph_inputs.h" #include "utils/graph/open_dataflow_graph/open_dataflow_graph.h" -#include "utils/overload.h" #include using namespace FlexFlow; @@ -59,30 +58,30 @@ namespace rc { // OpenMultiDiGraphView subgraph = // get_subgraph(as_openmultidigraph(g), // subgraph_nodes); - +// // std::vector matches = // find_pattern_matches(subgraph, as_openmultidigraph(g), AlwaysTrue{}); - +// // RC_ASSERT(!matches.empty()); - +// // for (MultiDiGraphPatternMatch const &match : matches) { // RC_ASSERT(pattern_matches(subgraph, as_openmultidigraph(g), match, // AlwaysTrue{})); // } // }); -// } TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("find_pattern_matches_small") { + TEST_CASE("find_pattern_matches") { OpenDataflowGraph pattern_graph = OpenDataflowGraph::create(); - NodeAddedResult pattern_n0_added = pattern_graph.add_node({}, 1); + NodeAddedResult pattern_n0_added = pattern_graph.add_node({}, 1_n); Node pattern_n0 = pattern_n0_added.node; OpenDataflowValue pattern_v0 = OpenDataflowValue{get_only(pattern_n0_added.outputs)}; - NodeAddedResult pattern_n1_added = pattern_graph.add_node({pattern_v0}, 1); + NodeAddedResult pattern_n1_added = + pattern_graph.add_node({pattern_v0}, 1_n); Node pattern_n1 = pattern_n1_added.node; OpenDataflowValue pattern_v1 = OpenDataflowValue{get_only(pattern_n1_added.outputs)}; @@ -94,19 +93,19 @@ TEST_SUITE(FF_TEST_SUITE) { OpenDataflowGraph graph = OpenDataflowGraph::create(); - NodeAddedResult n0_added = graph.add_node({}, 1); + NodeAddedResult n0_added = graph.add_node({}, 1_n); Node n0 = n0_added.node; OpenDataflowValue v0 = OpenDataflowValue{get_only(n0_added.outputs)}; - NodeAddedResult n1_added = graph.add_node({v0}, 1); + NodeAddedResult n1_added = graph.add_node({v0}, 1_n); Node n1 = n1_added.node; OpenDataflowValue v1 = OpenDataflowValue{get_only(n1_added.outputs)}; - NodeAddedResult n2_added = graph.add_node({v1}, 1); + NodeAddedResult n2_added = graph.add_node({v1}, 1_n); Node n2 = n2_added.node; OpenDataflowValue v2 = OpenDataflowValue{get_only(n2_added.outputs)}; - NodeAddedResult n3_added = graph.add_node({v2}, 1); + NodeAddedResult n3_added = graph.add_node({v2}, 1_n); Node n3 = n3_added.node; OpenDataflowValue v3 = OpenDataflowValue{get_only(n3_added.outputs)}; @@ -128,8 +127,8 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector n1_incoming = {OpenDataflowEdge{ DataflowEdge{ - DataflowOutput{n0, 0}, - DataflowInput{n1, 0}, + DataflowOutput{n0, 0_n}, + DataflowInput{n1, 0_n}, }, }}; @@ -201,7 +200,7 @@ TEST_SUITE(FF_TEST_SUITE) { OpenDataflowGraph::create(); DataflowGraphInput i0 = g.add_input(); - NodeAddedResult g_n0_added = g.add_node({OpenDataflowValue{i0}}, 1); + NodeAddedResult g_n0_added = g.add_node({OpenDataflowValue{i0}}, 1_n); Node g_n0 = g_n0_added.node; OpenDataflowValue g_v0 = OpenDataflowValue{get_only(g_n0_added.outputs)}; PatternNode g_p0 = PatternNode{g_n0}; diff --git a/lib/substitutions/test/src/substitutions/unlabelled/pattern_matching.cc b/lib/substitutions/test/src/substitutions/unlabelled/pattern_matching.cc new file mode 100644 index 0000000000..8fd468d186 --- /dev/null +++ b/lib/substitutions/test/src/substitutions/unlabelled/pattern_matching.cc @@ -0,0 +1,210 @@ +#include "substitutions/unlabelled/pattern_matching.h" +#include "substitutions/unlabelled/find_pattern_matches.h" +#include "substitutions/unlabelled/match_additional_criterion.h" +#include "utils/containers/get_only.h" +#include "utils/graph/instances/unordered_set_dataflow_graph.h" +#include "utils/graph/node/algorithms.h" +#include "utils/graph/open_dataflow_graph/algorithms/get_incoming_edges.h" +#include "utils/graph/open_dataflow_graph/algorithms/get_open_dataflow_values.h" +#include "utils/graph/open_dataflow_graph/algorithms/get_subgraph.h" +#include "utils/graph/open_dataflow_graph/algorithms/get_subgraph_inputs.h" +#include "utils/graph/open_dataflow_graph/open_dataflow_graph.h" +#include "utils/overload.h" +#include + +using namespace FlexFlow; + +namespace rc { + +// template <> +// struct Arbitrary { +// static int const MAX_GRAPH_SIZE = 200; +// static int const MAX_EDGE_SIZE = 1000; +// +// static Gen arbitrary() { +// return gen::exec([&] { +// int num_nodes = *gen::inRange(1, MAX_GRAPH_SIZE + 1); +// MultiDiGraph g = MultiDiGraph::template +// create(); +// +// std::vector nodes; +// for (int i = 0; i < num_nodes; ++i) { +// nodes.push_back(g.add_node()); +// } +// +// int num_edges = *gen::inRange(1, MAX_GRAPH_SIZE + 1); +// for (int i = 0; i < num_edges; ++i) { +// int src_id = *gen::inRange(0, num_nodes); +// int dst_id = *gen::inRange(0, num_nodes); +// if (src_id > dst_id) { +// std::swap(src_id, dst_id); +// } +// +// g.add_edge(MultiDiEdge{nodes[dst_id], +// g.add_node_port(), +// nodes[src_id], +// g.add_node_port()}); +// } +// +// return g; +// }); +// } +// }; + +} // namespace rc + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("find_pattern_matches") { + OpenDataflowGraph pattern_graph = + OpenDataflowGraph::create(); + + NodeAddedResult pattern_n0_added = pattern_graph.add_node({}, 1_n); + Node pattern_n0 = pattern_n0_added.node; + OpenDataflowValue pattern_v0 = + OpenDataflowValue{get_only(pattern_n0_added.outputs)}; + + NodeAddedResult pattern_n1_added = + pattern_graph.add_node({pattern_v0}, 1_n); + Node pattern_n1 = pattern_n1_added.node; + OpenDataflowValue pattern_v1 = + OpenDataflowValue{get_only(pattern_n1_added.outputs)}; + + UnlabelledGraphPattern pattern = UnlabelledGraphPattern{pattern_graph}; + PatternNode p0 = PatternNode{pattern_n0}; + PatternNode p1 = PatternNode{pattern_n1}; + + OpenDataflowGraph graph = + OpenDataflowGraph::create(); + + NodeAddedResult n0_added = graph.add_node({}, 1_n); + Node n0 = n0_added.node; + OpenDataflowValue v0 = OpenDataflowValue{get_only(n0_added.outputs)}; + + NodeAddedResult n1_added = graph.add_node({v0}, 1_n); + Node n1 = n1_added.node; + OpenDataflowValue v1 = OpenDataflowValue{get_only(n1_added.outputs)}; + + NodeAddedResult n2_added = graph.add_node({v1}, 1_n); + Node n2 = n2_added.node; + OpenDataflowValue v2 = OpenDataflowValue{get_only(n2_added.outputs)}; + + NodeAddedResult n3_added = graph.add_node({v2}, 1_n); + Node n3 = n3_added.node; + OpenDataflowValue v3 = OpenDataflowValue{get_only(n3_added.outputs)}; + + UnlabelledDataflowGraphPatternMatch match = + UnlabelledDataflowGraphPatternMatch{ + bidict{ + {p0, n0}, + {p1, n1}, + }, + bidict{}}; + + UnlabelledDataflowGraphPatternMatch invalid_match = + UnlabelledDataflowGraphPatternMatch{ + bidict{ + {p0, n1}, + {p1, n2}, + }, + bidict{}}; + + std::vector n1_incoming = {OpenDataflowEdge{ + DataflowEdge{ + DataflowOutput{n0, 0_n}, + DataflowInput{n1, 0_n}, + }, + }}; + + SUBCASE("get_incoming_edges") { + SUBCASE("n0") { + std::vector result = get_incoming_edges(graph, n0); + std::vector correct = {}; + CHECK(result == correct); + } + SUBCASE("n1") { + std::vector result = get_incoming_edges(graph, n1); + std::vector correct = n1_incoming; + CHECK(result == correct); + } + SUBCASE("both") { + std::unordered_map> result = + get_incoming_edges(graph, {n0, n1}); + std::unordered_map> correct = { + {n0, {}}, {n1, n1_incoming}}; + CHECK(result == correct); + } + } + + SUBCASE("get_subgraph_inputs") { + std::unordered_set result = + get_subgraph_inputs(graph, {n0, n1}); + std::unordered_set correct = {}; + CHECK(result == correct); + } + + SUBCASE("get_subgraph") { + OpenDataflowGraphView g = get_subgraph(graph, {n0, n1}).graph; + SUBCASE("nodes") { + std::unordered_set result = get_nodes(g); + std::unordered_set correct = {n0, n1}; + CHECK(result == correct); + } + SUBCASE("inputs") { + std::unordered_set result = g.get_inputs(); + std::unordered_set correct = {}; + CHECK(result == correct); + } + SUBCASE("get_open_dataflow_values") { + std::unordered_set values = + get_open_dataflow_values(g); + CHECK(values.size() == 2); + } + } + + SUBCASE("subgraph_matched") { + OpenDataflowGraphView result = subgraph_matched(graph, match).graph; + std::unordered_set result_nodes = get_nodes(result); + std::unordered_set correct_nodes = {n0, n1}; + CHECK(result_nodes == correct_nodes); + } + + SUBCASE("unlabelled_pattern_does_match") { + CHECK(unlabelled_pattern_does_match( + pattern, graph, match, match_additional_crition_always_true())); + CHECK_FALSE(unlabelled_pattern_does_match( + pattern, + graph, + invalid_match, + match_additional_crition_always_true())); + } + + SUBCASE("unlabelled_pattern_does_match") { + OpenDataflowGraph g = + OpenDataflowGraph::create(); + DataflowGraphInput i0 = g.add_input(); + + NodeAddedResult g_n0_added = g.add_node({OpenDataflowValue{i0}}, 1_n); + Node g_n0 = g_n0_added.node; + OpenDataflowValue g_v0 = OpenDataflowValue{get_only(g_n0_added.outputs)}; + PatternNode g_p0 = PatternNode{g_n0}; + PatternInput g_pi0 = PatternInput{i0}; + + UnlabelledGraphPattern open_pattern = UnlabelledGraphPattern{g}; + + UnlabelledDataflowGraphPatternMatch open_match = + UnlabelledDataflowGraphPatternMatch{ + bidict{ + {g_p0, n1}, + }, + bidict{ + {g_pi0, v0}, + }}; + + CHECK(unlabelled_pattern_does_match( + open_pattern, + graph, + open_match, + match_additional_crition_always_true())); + } + } +} diff --git a/lib/substitutions/test/src/substitutions/unlabelled/pattern_split.cc b/lib/substitutions/test/src/substitutions/unlabelled/pattern_split.cc index e4d763d9c3..1bddb9f680 100644 --- a/lib/substitutions/test/src/substitutions/unlabelled/pattern_split.cc +++ b/lib/substitutions/test/src/substitutions/unlabelled/pattern_split.cc @@ -13,11 +13,11 @@ TEST_SUITE(FF_TEST_SUITE) { OpenDataflowGraph g = OpenDataflowGraph::create(); - NodeAddedResult n0_added = g.add_node({}, 1); + NodeAddedResult n0_added = g.add_node({}, 1_n); Node n0 = n0_added.node; OpenDataflowValue v0 = OpenDataflowValue{get_only(n0_added.outputs)}; - NodeAddedResult n1_added = g.add_node({v0}, 1); + NodeAddedResult n1_added = g.add_node({v0}, 1_n); Node n1 = n1_added.node; OpenDataflowValue v1 = OpenDataflowValue{get_only(n1_added.outputs)}; @@ -77,11 +77,11 @@ TEST_SUITE(FF_TEST_SUITE) { DataflowGraphInput i0 = g.add_input(); DataflowGraphInput i1 = g.add_input(); - NodeAddedResult n0_added = g.add_node({OpenDataflowValue{i0}}, 1); + NodeAddedResult n0_added = g.add_node({OpenDataflowValue{i0}}, 1_n); Node n0 = n0_added.node; OpenDataflowValue v0 = OpenDataflowValue{get_only(n0_added.outputs)}; - NodeAddedResult n1_added = g.add_node({OpenDataflowValue{i1}}, 1); + NodeAddedResult n1_added = g.add_node({OpenDataflowValue{i1}}, 1_n); Node n1 = n1_added.node; OpenDataflowValue v1 = OpenDataflowValue{get_only(n1_added.outputs)}; diff --git a/lib/substitutions/test/src/substitutions/unlabelled/unlabelled_graph_pattern.cc b/lib/substitutions/test/src/substitutions/unlabelled/unlabelled_graph_pattern.cc index e0805dbfd4..22d1b8a2a5 100644 --- a/lib/substitutions/test/src/substitutions/unlabelled/unlabelled_graph_pattern.cc +++ b/lib/substitutions/test/src/substitutions/unlabelled/unlabelled_graph_pattern.cc @@ -17,7 +17,7 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK_FALSE(is_singleton_pattern(pattern)); } - NodeAddedResult n0_added = g.add_node({}, 1); + NodeAddedResult n0_added = g.add_node({}, 1_n); OpenDataflowValue v0 = OpenDataflowValue{get_only(n0_added.outputs)}; SUBCASE("1 node") { @@ -26,7 +26,7 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK(is_singleton_pattern(pattern)); } - NodeAddedResult n1_added = g.add_node({v0}, 1); + NodeAddedResult n1_added = g.add_node({v0}, 1_n); OpenDataflowValue v1 = OpenDataflowValue{get_only(n1_added.outputs)}; SUBCASE("more than 1 node") { diff --git a/lib/substitutions/test/src/test_substitution.cc b/lib/substitutions/test/src/test_substitution.cc deleted file mode 100644 index dcb06a78fa..0000000000 --- a/lib/substitutions/test/src/test_substitution.cc +++ /dev/null @@ -1,148 +0,0 @@ -#include "doctest/doctest.h" -#include "op-attrs/get_op_type.h" -#include "rapidcheck.h" -#include "substitutions/substitution.h" - -using namespace FlexFlow; - -// TEST_SUITE(FF_TEST_SUITE) { -// TEST_CASE("substitution") { -// PCGPattern pattern; -// OutputGraphExpr output_expr; -// bidict{ -// OperatorAttributeConstraint{ConstraintType::EQUAL, -// OperatorAttributeKey::OP_TYPE, -// OperatorType::LINEAR}}}; -// -// ParallelTensorPattern tensor_pattern_e0{ -// std::vector{ -// TensorAttributeConstraint{ConstraintType::EQUAL, -// ListIndexAccess{ -// TensorAttributeKey::DIM_SIZES, 0}, -// 2}}}; -// -// ParallelTensorPattern tensor_pattern_empty{ -// std::vector{}}; -// -// auto ig = -// OutputLabelledOpenMultiDiGraph:: -// create>(); -// Node n0 = ig.add_node(operator_pattern_n0); -// NodePort p0 = ig.add_node_port(); -// InputMultiDiEdge e0{n0, p0, std::make_pair(p0.value(), p0.value())}; -// ig.add_edge(e0); -// ig.add_label(e0, tensor_pattern_e0); -// -// RC_ASSERT(get_nodes(ig).size() == 1); -// RC_ASSERT(get_edges(ig).size() == 1); -// -// GraphPattern input_graph{ig}; -// -// OperatorAttrAssignment op_ass_n1{ -// {{OperatorAttributeKey::OP_TYPE, -// AttrConstant{OperatorType::REPARTITION}}, -// {OperatorAttributeKey::PARALLEL_DIM, -// AttrConstant{ff_dim_t{nonnegative_int{0}}}}, -// {OperatorAttributeKey::PARALLEL_DEGREE, AttrConstant{2}}}}; -// -// OperatorAttrAssignment op_ass_n2{ -// {{OperatorAttributeKey::OP_TYPE, AttrConstant{OperatorType::LINEAR}}, -// {OperatorAttributeKey::OUT_CHANNELS, -// OperatorAttrAccess{n0, OperatorAttributeKey::OUT_CHANNELS}}, -// {OperatorAttributeKey::USE_BIAS, -// OperatorAttrAccess{n0, OperatorAttributeKey::USE_BIAS}}, -// {OperatorAttributeKey::DATA_TYPE, -// OperatorAttrAccess{n0, OperatorAttributeKey::DATA_TYPE}}, -// {OperatorAttributeKey::ACTIVATION, -// OperatorAttrAccess{n0, OperatorAttributeKey::ACTIVATION}}, -// {OperatorAttributeKey::REGULARIZER, -// OperatorAttrAccess{n0, OperatorAttributeKey::REGULARIZER}}}}; -// -// OperatorAttrAssignment op_ass_n3{ -// {{OperatorAttributeKey::OP_TYPE, -// AttrConstant{OperatorType::REDUCTION}}, -// {OperatorAttributeKey::PARALLEL_DIM, -// AttrConstant{ff_dim_t{nonnegative_int{0}}}}, -// {OperatorAttributeKey::PARALLEL_DEGREE, AttrConstant{2}}}}; -// -// auto og = NodeLabelledOpenMultiDiGraph::create< -// UnorderedNodeLabelledOpenMultiDiGraph>(); -// Node n1 = og.add_node(op_ass_n1); -// Node n2 = og.add_node(op_ass_n2); -// Node n3 = og.add_node(op_ass_n3); -// NodePort p1 = og.add_node_port(); -// NodePort p2 = og.add_node_port(); -// NodePort p3 = og.add_node_port(); -// InputMultiDiEdge e1{n1, p1, {p1.value(), p1.value()}}; -// MultiDiEdge e2{n2, p2, n1, p1}; -// MultiDiEdge e3{n3, p3, n2, p2}; -// og.add_edge(e1); -// og.add_edge(e2); -// og.add_edge(e3); -// OutputGraphExpr output_graph_expr{og}; -// -// RC_ASSERT(get_nodes(og).size() == 3); -// RC_ASSERT(get_edges(og).size() == 3); -// -// bidict input_mapping; -// input_mapping.equate(e0, e1); -// bidict output_mapping; -// -// Substitution substitution{ -// input_graph, output_graph_expr, input_mapping, output_mapping}; -// -// SubParallelComputationGraph pcg = -// OutputLabelledOpenMultiDiGraph::create< -// UnorderedOutputLabelledOpenMultiDiGraph>(); -// -// Node n4 = pcg.add_node(Operator{InputAttrs{}, "input"}); -// Node n5 = pcg.add_node(Operator{ -// LinearAttrs{1, false, DataType::FLOAT, Activation::RELU, -// std::nullopt}, "linear"}); -// NodePort p4 = pcg.add_node_port(); -// NodePort p5 = pcg.add_node_port(); -// -// MultiDiEdge e4{n5, p5, n4, p4}; -// pcg.add_edge(e4); -// ParallelDim dim = {2, 1, false}; -// ParallelTensorDims dims = {FFOrdered{dim}}; -// pcg.add_label(e4, ParallelTensor(dims, DataType::FLOAT, -// CreateGrad::YES)); -// -// MatchAdditionalCriterion criterion{ -// [&](Node const &pattern_node, Node const &graph_node) { -// return operator_satisfies(pcg.at(graph_node), -// input_graph.value().at(pattern_node)); -// }, -// [&](OpenMultiDiEdge const &pattern_edge, -// OpenMultiDiEdge const &graph_edge) { -// return parallel_tensor_satisfies( -// pcg.at(graph_edge), input_graph.value().at(pattern_edge)); -// }}; -// -// RC_ASSERT(criterion.node_criterion(n0, n5)); -// -// std::vector matches = -// find_pattern_matches(input_graph, pcg, criterion); -// -// RC_ASSERT(matches.size() == 1); -// -// SubParallelComputationGraph new_pcg = -// apply_substitution(pcg, substitution, matches[0]); -// -// RC_ASSERT(get_nodes(new_pcg).size() == 4); -// RC_ASSERT(get_edges(new_pcg).size() == 3); -// } -// } diff --git a/lib/utils/include/utils/bidict/algorithms/bidict_from_enumerating.h b/lib/utils/include/utils/bidict/algorithms/bidict_from_enumerating.h index 86ef6c4b4d..83afc32e0c 100644 --- a/lib/utils/include/utils/bidict/algorithms/bidict_from_enumerating.h +++ b/lib/utils/include/utils/bidict/algorithms/bidict_from_enumerating.h @@ -2,14 +2,16 @@ #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_BIDICT_FROM_ENUMERATING_H #include "utils/bidict/bidict.h" +#include "utils/nonnegative_int/nonnegative_int.h" #include namespace FlexFlow { template -bidict bidict_from_enumerating(std::unordered_set const &s) { - bidict result; - int idx = 0; +bidict + bidict_from_enumerating(std::unordered_set const &s) { + bidict result; + nonnegative_int idx = 0_n; for (T const &t : s) { result.equate(idx, t); idx++; @@ -19,9 +21,9 @@ bidict bidict_from_enumerating(std::unordered_set const &s) { } template -bidict bidict_from_enumerating(std::set const &s) { - bidict result; - int idx = 0; +bidict bidict_from_enumerating(std::set const &s) { + bidict result; + nonnegative_int idx = 0_n; for (T const &t : s) { result.equate(idx, t); idx++; diff --git a/lib/utils/include/utils/cli/cli_flag_key.struct.toml b/lib/utils/include/utils/cli/cli_flag_key.struct.toml index 790a752911..9c02fddc3e 100644 --- a/lib/utils/include/utils/cli/cli_flag_key.struct.toml +++ b/lib/utils/include/utils/cli/cli_flag_key.struct.toml @@ -6,8 +6,10 @@ features = [ "fmt", ] -includes = [] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] [[fields]] name = "raw_idx" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/utils/include/utils/cli/cli_positional_argument_key.struct.toml b/lib/utils/include/utils/cli/cli_positional_argument_key.struct.toml index d571d0deb3..4c50c277c0 100644 --- a/lib/utils/include/utils/cli/cli_positional_argument_key.struct.toml +++ b/lib/utils/include/utils/cli/cli_positional_argument_key.struct.toml @@ -6,8 +6,10 @@ features = [ "fmt", ] -includes = [] +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] [[fields]] name = "raw_idx" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/utils/include/utils/containers/at_idx.h b/lib/utils/include/utils/containers/at_idx.h index 757da5c548..fdc13a0231 100644 --- a/lib/utils/include/utils/containers/at_idx.h +++ b/lib/utils/include/utils/containers/at_idx.h @@ -1,17 +1,18 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_AT_IDX_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_AT_IDX_H +#include "utils/nonnegative_int/nonnegative_int.h" #include #include namespace FlexFlow { template -std::optional at_idx(std::vector const &v, size_t idx) { +std::optional at_idx(std::vector const &v, nonnegative_int idx) { if (idx >= v.size()) { return std::nullopt; } else { - return v.at(idx); + return v.at(idx.unwrap_nonnegative()); } } diff --git a/lib/utils/include/utils/containers/enumerate.h b/lib/utils/include/utils/containers/enumerate.h index e3722e52c6..1e8bc1f3dc 100644 --- a/lib/utils/include/utils/containers/enumerate.h +++ b/lib/utils/include/utils/containers/enumerate.h @@ -11,14 +11,14 @@ namespace FlexFlow { /** * @brief Generate a map from indices to elements of \p c. * - * @note We return a std::map rather than a - * std::vector> for consistency + * @note We return a std::map rather than a + * std::vector> for consistency * with enumerate(FFOrdered const &). Note that std::map * provides ordered iteration in increasing order, so iterating through * the result of this function should still function as expected. */ template -std::map enumerate(std::vector const &c) { +std::map enumerate(std::vector const &c) { return enumerate_vector(c); } @@ -27,16 +27,16 @@ std::map enumerate(std::vector const &c) { * return a map from indices of this ordering to elements of \p c. * - * @note We return a std::map rather than a - * std::vector> for consistency + * @note We return a std::map rather than a + * std::vector> for consistency * with enumerate(FFOrdered const &). Note that std::map * provides ordered iteration in increasing order, so iterating through * the result of this function should still function as expected. */ template -std::map enumerate(std::unordered_set const &c) { - std::map result; - int idx = 0; +std::map enumerate(std::unordered_set const &c) { + std::map result; + nonnegative_int idx = 0_n; for (auto const &v : c) { result.insert({idx++, v}); } diff --git a/lib/utils/include/utils/containers/enumerate_vector.h b/lib/utils/include/utils/containers/enumerate_vector.h index 700106ea3f..1e66279306 100644 --- a/lib/utils/include/utils/containers/enumerate_vector.h +++ b/lib/utils/include/utils/containers/enumerate_vector.h @@ -1,16 +1,19 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_ENUMERATE_VECTOR_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_ENUMERATE_VECTOR_H +#include "utils/nonnegative_int/nonnegative_int.h" +#include "utils/nonnegative_int/nonnegative_range.h" +#include "utils/nonnegative_int/num_elements.h" #include #include namespace FlexFlow { template -std::map enumerate_vector(std::vector const &v) { - std::map result; - for (int i = 0; i < v.size(); i++) { - result.insert({i, v.at(i)}); +std::map enumerate_vector(std::vector const &v) { + std::map result; + for (nonnegative_int i : nonnegative_range(num_elements(v))) { + result.insert({i, v.at(i.unwrap_nonnegative())}); } return result; } diff --git a/lib/utils/include/utils/containers/flatmap.h b/lib/utils/include/utils/containers/flatmap.h index b016a1e03d..a7848b88aa 100644 --- a/lib/utils/include/utils/containers/flatmap.h +++ b/lib/utils/include/utils/containers/flatmap.h @@ -4,6 +4,7 @@ #include "utils/containers/extend.h" #include "utils/containers/get_element_type.h" #include "utils/containers/merge_maps.h" +#include #include #include @@ -52,7 +53,19 @@ std::unordered_map flatmap(std::unordered_map const &m, std::unordered_map result; for (auto const &[k, v] : m) { - result = merge_maps(result, f(k, v)); + result = merge_disjoint_maps(result, f(k, v)); + } + + return result; +} + +template +std::string flatmap(std::string const &input, F const &f) { + std::string result = ""; + + for (char c : input) { + std::string for_c = f(c); + result += for_c; } return result; diff --git a/lib/utils/include/utils/containers/get_all_permutations_with_repetition.h b/lib/utils/include/utils/containers/get_all_permutations_with_repetition.h index ccdde0131a..0a7e9d16c2 100644 --- a/lib/utils/include/utils/containers/get_all_permutations_with_repetition.h +++ b/lib/utils/include/utils/containers/get_all_permutations_with_repetition.h @@ -1,6 +1,7 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_GET_ALL_PERMUTATIONS_WITH_REPETITION_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_GET_ALL_PERMUTATIONS_WITH_REPETITION_H +#include "utils/nonnegative_int/nonnegative_int.h" #include #include @@ -14,7 +15,8 @@ namespace FlexFlow { **/ template std::unordered_multiset> - get_all_permutations_with_repetition(C const &container, int n) { + get_all_permutations_with_repetition(C const &container, + nonnegative_int n) { std::unordered_multiset> result; if (container.empty() || n == 0) { @@ -22,16 +24,16 @@ std::unordered_multiset> } std::vector elements(std::begin(container), std::end(container)); - std::vector indices(n, 0); + std::vector indices(n.unwrap_nonnegative(), 0); while (true) { - std::vector perm(n); + std::vector perm(n.unwrap_nonnegative()); for (int i = 0; i < n; ++i) { perm[i] = elements[indices[i]]; } result.insert(perm); - int i = n - 1; + int i = n.unwrap_nonnegative() - 1; while (i != -1 && ++indices[i] == elements.size()) { indices[i] = 0; --i; diff --git a/lib/utils/include/utils/containers/make.h b/lib/utils/include/utils/containers/make.h new file mode 100644 index 0000000000..f7b15dfa02 --- /dev/null +++ b/lib/utils/include/utils/containers/make.h @@ -0,0 +1,13 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MAKE_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MAKE_H + +namespace FlexFlow { + +template +decltype(auto) make() { + return [](auto const &x) { return T{x}; }; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/merge_maps.h b/lib/utils/include/utils/containers/merge_maps.h index dd886ab8aa..bfc2446d99 100644 --- a/lib/utils/include/utils/containers/merge_maps.h +++ b/lib/utils/include/utils/containers/merge_maps.h @@ -3,30 +3,64 @@ #include "utils/containers/are_disjoint.h" #include "utils/containers/keys.h" +#include "utils/containers/merge_method.dtg.h" #include "utils/exception.h" #include "utils/fmt/unordered_map.h" +#include "utils/fmt/unordered_set.h" #include namespace FlexFlow { template -std::unordered_map merge_maps(std::unordered_map const &lhs, - std::unordered_map const &rhs) { - if (!are_disjoint(keys(lhs), keys(rhs))) { - throw mk_runtime_error(fmt::format("Key sets of merge_maps parameters are " - "non-disjoint: lhs = {}, rhs = {}", - lhs, - rhs)); +void merge_in_map(std::unordered_map const &m, + std::unordered_map &result) { + for (auto const &[k, v] : m) { + auto it = result.find(k); + if (it != result.end()) { + it->second = v; + } else { + result.insert({k, v}); + } } +} - std::unordered_map result; - for (auto const &kv : lhs) { - result.insert(kv); - } - for (auto const &kv : rhs) { - result.insert(kv); +template +std::unordered_map + merge_disjoint_maps(std::unordered_map const &lhs, + std::unordered_map const &rhs) { + + std::unordered_set lhs_keys = keys(lhs); + std::unordered_set rhs_keys = keys(rhs); + std::unordered_set shared_keys = intersection(lhs_keys, rhs_keys); + if (!shared_keys.empty()) { + throw mk_runtime_error( + fmt::format("merge_maps expected disjoint maps, but maps share keys {}", + shared_keys)); } + std::unordered_map result; + merge_in_map(lhs, result); + merge_in_map(rhs, result); + return result; +} + +template +std::unordered_map + merge_map_left_dominates(std::unordered_map const &lhs, + std::unordered_map const &rhs) { + std::unordered_map result; + merge_in_map(rhs, result); + merge_in_map(lhs, result); + return result; +} + +template +std::unordered_map + merge_map_right_dominates(std::unordered_map const &lhs, + std::unordered_map const &rhs) { + std::unordered_map result; + merge_in_map(lhs, result); + merge_in_map(rhs, result); return result; } diff --git a/lib/utils/include/utils/containers/merge_method.enum.toml b/lib/utils/include/utils/containers/merge_method.enum.toml new file mode 100644 index 0000000000..ec0ed067dd --- /dev/null +++ b/lib/utils/include/utils/containers/merge_method.enum.toml @@ -0,0 +1,17 @@ +namespace = "FlexFlow" +name = "MergeMethod" +features = [ + "json", + "hash", + "fmt", + "rapidcheck", +] + +[[values]] +name = "REQUIRE_DISJOINT" + +[[values]] +name = "LEFT_DOMINATES" + +[[values]] +name = "RIGHT_DOMINATES" diff --git a/lib/utils/include/utils/containers/product.h b/lib/utils/include/utils/containers/product.h index af04edcb81..30aac2681a 100644 --- a/lib/utils/include/utils/containers/product.h +++ b/lib/utils/include/utils/containers/product.h @@ -10,7 +10,7 @@ namespace FlexFlow { **/ template Element product(Container const &container) { - Element result = 1; + Element result = Element{1}; for (Element const &element : container) { result *= element; } diff --git a/lib/utils/include/utils/containers/repeat.h b/lib/utils/include/utils/containers/repeat.h index 18de92cf4a..9782d6265a 100644 --- a/lib/utils/include/utils/containers/repeat.h +++ b/lib/utils/include/utils/containers/repeat.h @@ -1,6 +1,7 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REPEAT_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REPEAT_H +#include "utils/nonnegative_int/nonnegative_int.h" #include #include #include @@ -8,9 +9,7 @@ namespace FlexFlow { template > -std::vector repeat(int n, F const &f) { - assert(n >= 0); - +std::vector repeat(nonnegative_int n, F const &f) { std::vector result; for (int i = 0; i < n; i++) { result.push_back(f()); diff --git a/lib/utils/include/utils/containers/repeat_element.h b/lib/utils/include/utils/containers/repeat_element.h new file mode 100644 index 0000000000..e1ac508116 --- /dev/null +++ b/lib/utils/include/utils/containers/repeat_element.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REPLICATE_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REPLICATE_H + +#include "utils/exception.h" +#include "utils/nonnegative_int/nonnegative_int.h" +#include +#include + +namespace FlexFlow { + +template +std::vector repeat_element(nonnegative_int num_times, T const &element) { + std::vector result; + for (int i = 0; i < num_times; ++i) { + result.push_back(element); + } + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/replicate.h b/lib/utils/include/utils/containers/replicate.h deleted file mode 100644 index aa3d0a7e35..0000000000 --- a/lib/utils/include/utils/containers/replicate.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REPLICATE_H -#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REPLICATE_H - -#include - -namespace FlexFlow { - -template -std::vector replicate(int n, T const &element) { - return std::vector(n, element); -} - -} // namespace FlexFlow - -#endif diff --git a/lib/utils/include/utils/containers/sum.h b/lib/utils/include/utils/containers/sum.h index 135e704045..d6061e396e 100644 --- a/lib/utils/include/utils/containers/sum.h +++ b/lib/utils/include/utils/containers/sum.h @@ -8,7 +8,7 @@ namespace FlexFlow { **/ template Element sum(Container const &container) { - Element result = 0; + Element result = Element{0}; for (Element const &element : container) { result += element; } diff --git a/lib/utils/include/utils/graph/dataflow_graph/algorithms/view_as_open_dataflow_graph.h b/lib/utils/include/utils/graph/dataflow_graph/algorithms/view_as_open_dataflow_graph.h new file mode 100644 index 0000000000..b12e20124f --- /dev/null +++ b/lib/utils/include/utils/graph/dataflow_graph/algorithms/view_as_open_dataflow_graph.h @@ -0,0 +1,34 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_DATAFLOW_GRAPH_ALGORITHMS_VIEW_AS_OPEN_DATAFLOW_GRAPH_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_DATAFLOW_GRAPH_ALGORITHMS_VIEW_AS_OPEN_DATAFLOW_GRAPH_H + +#include "utils/graph/dataflow_graph/dataflow_graph_view.h" +#include "utils/graph/open_dataflow_graph/open_dataflow_graph_view.h" + +namespace FlexFlow { + +struct ViewDataflowGraphAsOpenDataflowGraph final + : public IOpenDataflowGraphView { + + ViewDataflowGraphAsOpenDataflowGraph() = delete; + ViewDataflowGraphAsOpenDataflowGraph(DataflowGraphView const &); + + std::unordered_set query_nodes(NodeQuery const &) const override; + std::unordered_set + query_outputs(DataflowOutputQuery const &) const override; + std::unordered_set get_inputs() const override; + std::unordered_set + query_edges(OpenDataflowEdgeQuery const &) const override; + + ViewDataflowGraphAsOpenDataflowGraph *clone() const override; + + virtual ~ViewDataflowGraphAsOpenDataflowGraph() = default; + +private: + DataflowGraphView g; +}; + +OpenDataflowGraphView view_as_open_dataflow_graph(DataflowGraphView const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/graph/dataflow_graph/dataflow_edge_query.struct.toml b/lib/utils/include/utils/graph/dataflow_graph/dataflow_edge_query.struct.toml index 0b0c5a41d8..aed0c28aeb 100644 --- a/lib/utils/include/utils/graph/dataflow_graph/dataflow_edge_query.struct.toml +++ b/lib/utils/include/utils/graph/dataflow_graph/dataflow_edge_query.struct.toml @@ -10,6 +10,7 @@ features = [ includes = [ "utils/graph/query_set.h", "utils/graph/node/node.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] @@ -18,7 +19,7 @@ type = "::FlexFlow::query_set<::FlexFlow::Node>" [[fields]] name = "src_idxs" -type = "::FlexFlow::query_set" +type = "::FlexFlow::query_set<::FlexFlow::nonnegative_int>" [[fields]] name = "dst_nodes" @@ -26,4 +27,4 @@ type = "::FlexFlow::query_set<::FlexFlow::Node>" [[fields]] name = "dst_idxs" -type = "::FlexFlow::query_set" +type = "::FlexFlow::query_set<::FlexFlow::nonnegative_int>" diff --git a/lib/utils/include/utils/graph/dataflow_graph/dataflow_graph.h b/lib/utils/include/utils/graph/dataflow_graph/dataflow_graph.h index 6a1898dd13..58c28aaff6 100644 --- a/lib/utils/include/utils/graph/dataflow_graph/dataflow_graph.h +++ b/lib/utils/include/utils/graph/dataflow_graph/dataflow_graph.h @@ -4,13 +4,14 @@ #include "utils/graph/dataflow_graph/dataflow_graph_view.h" #include "utils/graph/dataflow_graph/i_dataflow_graph.h" #include "utils/graph/dataflow_graph/node_added_result.dtg.h" +#include "utils/nonnegative_int/nonnegative_int.h" namespace FlexFlow { struct DataflowGraph : virtual public DataflowGraphView { public: NodeAddedResult add_node(std::vector const &inputs, - int num_outputs); + nonnegative_int num_outputs); void add_node_unsafe(Node const &node, std::vector const &inputs, diff --git a/lib/utils/include/utils/graph/dataflow_graph/dataflow_input.struct.toml b/lib/utils/include/utils/graph/dataflow_graph/dataflow_input.struct.toml index f322fa63fe..eb9c30d558 100644 --- a/lib/utils/include/utils/graph/dataflow_graph/dataflow_input.struct.toml +++ b/lib/utils/include/utils/graph/dataflow_graph/dataflow_input.struct.toml @@ -9,6 +9,7 @@ features = [ includes = [ "utils/graph/node/node.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] @@ -17,4 +18,4 @@ type = "::FlexFlow::Node" [[fields]] name = "idx" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/utils/include/utils/graph/dataflow_graph/dataflow_output.struct.toml b/lib/utils/include/utils/graph/dataflow_graph/dataflow_output.struct.toml index f3ccebe046..19d92a3d4c 100644 --- a/lib/utils/include/utils/graph/dataflow_graph/dataflow_output.struct.toml +++ b/lib/utils/include/utils/graph/dataflow_graph/dataflow_output.struct.toml @@ -9,6 +9,7 @@ features = [ includes = [ "utils/graph/node/node.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] @@ -17,4 +18,4 @@ type = "::FlexFlow::Node" [[fields]] name = "idx" -type = "int" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/utils/include/utils/graph/dataflow_graph/dataflow_output_query.struct.toml b/lib/utils/include/utils/graph/dataflow_graph/dataflow_output_query.struct.toml index 0701855ba6..d1af6d5c0d 100644 --- a/lib/utils/include/utils/graph/dataflow_graph/dataflow_output_query.struct.toml +++ b/lib/utils/include/utils/graph/dataflow_graph/dataflow_output_query.struct.toml @@ -10,6 +10,10 @@ features = [ includes = [ "utils/graph/query_set.h", "utils/graph/node/node.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", +] + +src_includes = [ "utils/fmt/unordered_set.h", ] @@ -19,4 +23,4 @@ type = "::FlexFlow::query_set<::FlexFlow::Node>" [[fields]] name = "output_idxs" -type = "::FlexFlow::query_set" +type = "::FlexFlow::query_set<::FlexFlow::nonnegative_int>" diff --git a/lib/utils/include/utils/graph/dataflow_graph/i_dataflow_graph.h b/lib/utils/include/utils/graph/dataflow_graph/i_dataflow_graph.h index 87882a6242..2572fe5c68 100644 --- a/lib/utils/include/utils/graph/dataflow_graph/i_dataflow_graph.h +++ b/lib/utils/include/utils/graph/dataflow_graph/i_dataflow_graph.h @@ -9,7 +9,7 @@ namespace FlexFlow { struct IDataflowGraph : virtual public IDataflowGraphView { virtual NodeAddedResult add_node(std::vector const &inputs, - int num_outputs) = 0; + nonnegative_int num_outputs) = 0; virtual void add_node_unsafe(Node const &node, std::vector const &inputs, diff --git a/lib/utils/include/utils/graph/instances/unordered_set_dataflow_graph.h b/lib/utils/include/utils/graph/instances/unordered_set_dataflow_graph.h index 4ed83834a2..ecba7921af 100644 --- a/lib/utils/include/utils/graph/instances/unordered_set_dataflow_graph.h +++ b/lib/utils/include/utils/graph/instances/unordered_set_dataflow_graph.h @@ -14,9 +14,9 @@ struct UnorderedSetDataflowGraph final : virtual public IDataflowGraph, UnorderedSetDataflowGraph(); NodeAddedResult add_node(std::vector const &inputs, - int num_outputs) override; + nonnegative_int num_outputs) override; NodeAddedResult add_node(std::vector const &inputs, - int num_outputs) override; + nonnegative_int num_outputs) override; DataflowGraphInput add_input() override; std::unordered_set query_nodes(NodeQuery const &) const override; diff --git a/lib/utils/include/utils/graph/instances/unordered_set_labelled_open_dataflow_graph.h b/lib/utils/include/utils/graph/instances/unordered_set_labelled_open_dataflow_graph.h index f1063c1f21..159778bb6d 100644 --- a/lib/utils/include/utils/graph/instances/unordered_set_labelled_open_dataflow_graph.h +++ b/lib/utils/include/utils/graph/instances/unordered_set_labelled_open_dataflow_graph.h @@ -57,9 +57,10 @@ struct UnorderedSetLabelledOpenDataflowGraph final } std::vector new_outputs = - transform(count(output_labels.size()), [&](int output_idx) { - return DataflowOutput{new_node, output_idx}; - }); + transform(nonnegative_range(num_elements(output_labels)), + [&](nonnegative_int output_idx) { + return DataflowOutput{new_node, output_idx}; + }); for (auto const &[output, output_label] : zip(new_outputs, output_labels)) { this->values.insert({OpenDataflowValue{output}, output_label}); diff --git a/lib/utils/include/utils/graph/labelled_open_dataflow_graph/algorithms/get_graph_data.h b/lib/utils/include/utils/graph/labelled_open_dataflow_graph/algorithms/get_graph_data.h index ec8f025ac3..2115a03cda 100644 --- a/lib/utils/include/utils/graph/labelled_open_dataflow_graph/algorithms/get_graph_data.h +++ b/lib/utils/include/utils/graph/labelled_open_dataflow_graph/algorithms/get_graph_data.h @@ -4,6 +4,7 @@ #include "utils/graph/labelled_open_dataflow_graph/algorithms/labelled_open_dataflow_graph_data.dtg.h" #include "utils/graph/labelled_open_dataflow_graph/labelled_open_dataflow_graph_view.h" #include "utils/graph/node/algorithms.h" +#include "utils/graph/open_dataflow_graph/algorithms/get_edges.h" #include "utils/graph/open_dataflow_graph/algorithms/get_open_dataflow_values.h" namespace FlexFlow { diff --git a/lib/utils/include/utils/graph/labelled_open_dataflow_graph/algorithms/permute_node_ids.h b/lib/utils/include/utils/graph/labelled_open_dataflow_graph/algorithms/permute_node_ids.h index 2d1dd03755..88950635d2 100644 --- a/lib/utils/include/utils/graph/labelled_open_dataflow_graph/algorithms/permute_node_ids.h +++ b/lib/utils/include/utils/graph/labelled_open_dataflow_graph/algorithms/permute_node_ids.h @@ -1,6 +1,7 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_LABELLED_OPEN_DATAFLOW_GRAPH_ALGORITHMS_PERMUTE_NODE_IDS_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_LABELLED_OPEN_DATAFLOW_GRAPH_ALGORITHMS_PERMUTE_NODE_IDS_H +#include "utils/containers/generate_map.h" #include "utils/graph/labelled_open_dataflow_graph/algorithms/with_labelling.h" #include "utils/graph/labelled_open_dataflow_graph/labelled_open_dataflow_graph_view.h" #include "utils/graph/node/algorithms.h" diff --git a/lib/utils/include/utils/graph/multidigraph/algorithms/add_nodes.h b/lib/utils/include/utils/graph/multidigraph/algorithms/add_nodes.h index 737f2d0d23..80d0ca3eaf 100644 --- a/lib/utils/include/utils/graph/multidigraph/algorithms/add_nodes.h +++ b/lib/utils/include/utils/graph/multidigraph/algorithms/add_nodes.h @@ -2,10 +2,11 @@ #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_MULTIDIGRAPH_ALGORITHMS_ADD_NODES_H #include "utils/graph/multidigraph/multidigraph.h" +#include "utils/nonnegative_int/nonnegative_int.h" namespace FlexFlow { -std::vector add_nodes(MultiDiGraph &, int num_nodes); +std::vector add_nodes(MultiDiGraph &, nonnegative_int num_nodes); } // namespace FlexFlow diff --git a/lib/utils/include/utils/graph/open_dataflow_graph/algorithms/are_isomorphic.h b/lib/utils/include/utils/graph/open_dataflow_graph/algorithms/are_isomorphic.h new file mode 100644 index 0000000000..ae99e2850f --- /dev/null +++ b/lib/utils/include/utils/graph/open_dataflow_graph/algorithms/are_isomorphic.h @@ -0,0 +1,13 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_OPEN_DATAFLOW_GRAPH_ALGORITHMS_ARE_ISOMORPHIC_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_OPEN_DATAFLOW_GRAPH_ALGORITHMS_ARE_ISOMORPHIC_H + +#include "utils/graph/open_dataflow_graph/open_dataflow_graph_view.h" + +namespace FlexFlow { + +bool are_isomorphic(OpenDataflowGraphView const &, + OpenDataflowGraphView const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/graph/open_dataflow_graph/algorithms/open_dataflow_graph_isomorphism.h b/lib/utils/include/utils/graph/open_dataflow_graph/algorithms/open_dataflow_graph_isomorphism.h new file mode 100644 index 0000000000..fe282a8c2e --- /dev/null +++ b/lib/utils/include/utils/graph/open_dataflow_graph/algorithms/open_dataflow_graph_isomorphism.h @@ -0,0 +1,21 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_OPEN_DATAFLOW_GRAPH_ALGORITHMS_OPEN_DATAFLOW_GRAPH_ISOMORPHISM_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_OPEN_DATAFLOW_GRAPH_ALGORITHMS_OPEN_DATAFLOW_GRAPH_ISOMORPHISM_H + +#include "utils/graph/open_dataflow_graph/algorithms/open_dataflow_graph_isomorphism.dtg.h" +#include "utils/graph/open_dataflow_graph/open_dataflow_value.dtg.h" + +namespace FlexFlow { + +OpenDataflowValue isomorphism_map_r_open_dataflow_value_from_l( + OpenDataflowGraphIsomorphism const &iso, OpenDataflowValue const &l_value); +OpenDataflowValue isomorphism_map_l_open_dataflow_value_from_r( + OpenDataflowGraphIsomorphism const &iso, OpenDataflowValue const &r_value); + +DataflowOutput isomorphism_map_r_dataflow_output_from_l( + OpenDataflowGraphIsomorphism const &iso, DataflowOutput const &l_output); +DataflowOutput isomorphism_map_l_dataflow_output_from_r( + OpenDataflowGraphIsomorphism const &iso, DataflowOutput const &r_output); + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/graph/open_dataflow_graph/dataflow_input_edge_query.struct.toml b/lib/utils/include/utils/graph/open_dataflow_graph/dataflow_input_edge_query.struct.toml index 544a05af85..f67e8b88e0 100644 --- a/lib/utils/include/utils/graph/open_dataflow_graph/dataflow_input_edge_query.struct.toml +++ b/lib/utils/include/utils/graph/open_dataflow_graph/dataflow_input_edge_query.struct.toml @@ -11,6 +11,7 @@ includes = [ "utils/graph/query_set.h", "utils/graph/open_dataflow_graph/dataflow_graph_input.dtg.h", "utils/graph/node/node.dtg.h", + "utils/nonnegative_int/nonnegative_int.h", ] [[fields]] @@ -23,4 +24,4 @@ type = "::FlexFlow::query_set<::FlexFlow::Node>" [[fields]] name = "dst_idxs" -type = "::FlexFlow::query_set" +type = "::FlexFlow::query_set<::FlexFlow::nonnegative_int>" diff --git a/lib/utils/include/utils/graph/open_dataflow_graph/i_open_dataflow_graph.h b/lib/utils/include/utils/graph/open_dataflow_graph/i_open_dataflow_graph.h index 6edfa408d4..9b71b06e62 100644 --- a/lib/utils/include/utils/graph/open_dataflow_graph/i_open_dataflow_graph.h +++ b/lib/utils/include/utils/graph/open_dataflow_graph/i_open_dataflow_graph.h @@ -9,7 +9,7 @@ namespace FlexFlow { struct IOpenDataflowGraph : virtual public IOpenDataflowGraphView { virtual NodeAddedResult add_node(std::vector const &inputs, - int num_outputs) = 0; + nonnegative_int num_outputs) = 0; virtual DataflowGraphInput add_input() = 0; virtual IOpenDataflowGraph *clone() const = 0; diff --git a/lib/utils/include/utils/graph/open_dataflow_graph/open_dataflow_edge.h b/lib/utils/include/utils/graph/open_dataflow_graph/open_dataflow_edge.h index 09499f8e5f..1102bf0586 100644 --- a/lib/utils/include/utils/graph/open_dataflow_graph/open_dataflow_edge.h +++ b/lib/utils/include/utils/graph/open_dataflow_graph/open_dataflow_edge.h @@ -7,7 +7,7 @@ namespace FlexFlow { Node get_open_dataflow_edge_dst_node(OpenDataflowEdge const &); -int get_open_dataflow_edge_dst_idx(OpenDataflowEdge const &); +nonnegative_int get_open_dataflow_edge_dst_idx(OpenDataflowEdge const &); DataflowInput get_open_dataflow_edge_dst(OpenDataflowEdge const &); OpenDataflowValue get_open_dataflow_edge_src(OpenDataflowEdge const &); OpenDataflowEdge diff --git a/lib/utils/include/utils/graph/open_dataflow_graph/open_dataflow_graph.h b/lib/utils/include/utils/graph/open_dataflow_graph/open_dataflow_graph.h index e8ecce76e8..9d48020d5f 100644 --- a/lib/utils/include/utils/graph/open_dataflow_graph/open_dataflow_graph.h +++ b/lib/utils/include/utils/graph/open_dataflow_graph/open_dataflow_graph.h @@ -11,7 +11,7 @@ namespace FlexFlow { struct OpenDataflowGraph : virtual public OpenDataflowGraphView { public: NodeAddedResult add_node(std::vector const &inputs, - int num_outputs); + nonnegative_int num_outputs); DataflowGraphInput add_input(); template diff --git a/lib/utils/include/utils/graph/open_dataflow_graph/unordered_set_open_dataflow_graph.h b/lib/utils/include/utils/graph/open_dataflow_graph/unordered_set_open_dataflow_graph.h index 7b921772d6..f3d54e4329 100644 --- a/lib/utils/include/utils/graph/open_dataflow_graph/unordered_set_open_dataflow_graph.h +++ b/lib/utils/include/utils/graph/open_dataflow_graph/unordered_set_open_dataflow_graph.h @@ -12,7 +12,7 @@ struct UnorderedSetOpenDataflowGraph : public IOpenDataflowGraph { UnorderedSetOpenDataflowGraph(); NodeAddedResult add_node(std::vector const &inputs, - int num_outputs) override; + nonnegative_int num_outputs) override; std::unordered_set query_nodes(NodeQuery const &) const override; std::unordered_set diff --git a/lib/utils/include/utils/graph/render_dot.h b/lib/utils/include/utils/graph/render_dot.h new file mode 100644 index 0000000000..632ba736ea --- /dev/null +++ b/lib/utils/include/utils/graph/render_dot.h @@ -0,0 +1,19 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_RENDER_DOT_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_RENDER_DOT_H + +#include "utils/graph/labelled_open_dataflow_graph/labelled_open_dataflow_graph_view.h" +#include +#include + +namespace FlexFlow { + +std::string escape_dot_string(std::string const &); +std::string render_dot_node_attrs( + std::unordered_map const &attrs); +std::string render_dot( + LabelledDataflowGraphView, + std::string> const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/nonnegative_int/ceildiv.h b/lib/utils/include/utils/nonnegative_int/ceildiv.h new file mode 100644 index 0000000000..939ea3de51 --- /dev/null +++ b/lib/utils/include/utils/nonnegative_int/ceildiv.h @@ -0,0 +1,11 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_NONNEGATIVE_INT_CEILDIV_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_NONNEGATIVE_INT_CEILDIV_H + +#include "utils/nonnegative_int/nonnegative_int.h" +namespace FlexFlow { + +nonnegative_int ceildiv(nonnegative_int numerator, nonnegative_int denominator); + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/nonnegative_int/nonnegative_int.h b/lib/utils/include/utils/nonnegative_int/nonnegative_int.h index 0749497c56..0bcc8cfd6f 100644 --- a/lib/utils/include/utils/nonnegative_int/nonnegative_int.h +++ b/lib/utils/include/utils/nonnegative_int/nonnegative_int.h @@ -1,12 +1,11 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_NONNEGATIVE_INT_NONNEGATIVE_INT_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_NONNEGATIVE_INT_NONNEGATIVE_INT_H -#include "rapidcheck.h" - #include #include #include #include +#include #include namespace FlexFlow { @@ -14,6 +13,7 @@ class nonnegative_int { public: nonnegative_int() = delete; explicit nonnegative_int(int value); + explicit nonnegative_int(size_t value); explicit operator int() const noexcept; @@ -39,16 +39,31 @@ class nonnegative_int { friend bool operator>=(int const &lhs, nonnegative_int const &rhs); nonnegative_int operator+(nonnegative_int const &other) const; + nonnegative_int &operator++(); + nonnegative_int operator++(int); + nonnegative_int &operator+=(nonnegative_int const &other); + + nonnegative_int operator*(nonnegative_int const &other) const; + nonnegative_int &operator*=(nonnegative_int const &other); + + nonnegative_int operator/(nonnegative_int const &other) const; + nonnegative_int &operator/=(nonnegative_int const &other); + + nonnegative_int operator%(nonnegative_int const &other) const; + nonnegative_int &operator%=(nonnegative_int const &other); friend std::ostream &operator<<(std::ostream &os, nonnegative_int const &n); friend int format_as(nonnegative_int const &); - int get_value() const; + int unwrap_nonnegative() const; private: int value_; }; + +nonnegative_int operator""_n(unsigned long long int); + } // namespace FlexFlow namespace nlohmann { @@ -59,6 +74,13 @@ struct adl_serializer<::FlexFlow::nonnegative_int> { }; } // namespace nlohmann +namespace rc { +template <> +struct Arbitrary<::FlexFlow::nonnegative_int> { + static Gen<::FlexFlow::nonnegative_int> arbitrary(); +}; +} // namespace rc + namespace std { template <> struct hash<::FlexFlow::nonnegative_int> { diff --git a/lib/utils/include/utils/nonnegative_int/nonnegative_range.h b/lib/utils/include/utils/nonnegative_int/nonnegative_range.h new file mode 100644 index 0000000000..af323aef42 --- /dev/null +++ b/lib/utils/include/utils/nonnegative_int/nonnegative_range.h @@ -0,0 +1,14 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_NONNEGATIVE_INT_NONNEGATIVE_RANGE_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_NONNEGATIVE_INT_NONNEGATIVE_RANGE_H + +#include "utils/nonnegative_int/nonnegative_int.h" + +namespace FlexFlow { + +std::vector nonnegative_range(nonnegative_int end); +std::vector + nonnegative_range(nonnegative_int start, nonnegative_int end, int step = 1); + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/nonnegative_int/num_elements.h b/lib/utils/include/utils/nonnegative_int/num_elements.h new file mode 100644 index 0000000000..57bc98ee50 --- /dev/null +++ b/lib/utils/include/utils/nonnegative_int/num_elements.h @@ -0,0 +1,17 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_NONNEGATIVE_INT_NUM_ELEMENTS_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_NONNEGATIVE_INT_NUM_ELEMENTS_H + +#include "utils/exception.h" +#include "utils/nonnegative_int/nonnegative_int.h" + +namespace FlexFlow { + +template +nonnegative_int num_elements(T const &t) { + size_t t_size = t.size(); + return nonnegative_int{t_size}; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/variant.h b/lib/utils/include/utils/variant.h index 241d631200..75a8851362 100644 --- a/lib/utils/include/utils/variant.h +++ b/lib/utils/include/utils/variant.h @@ -4,6 +4,7 @@ #include "rapidcheck.h" #include "utils/type_traits.h" #include +#include #include namespace FlexFlow { diff --git a/lib/utils/src/utils/bidict/algorithms/bidict_from_enumerating.cc b/lib/utils/src/utils/bidict/algorithms/bidict_from_enumerating.cc index 350f08600c..67e0b32d6e 100644 --- a/lib/utils/src/utils/bidict/algorithms/bidict_from_enumerating.cc +++ b/lib/utils/src/utils/bidict/algorithms/bidict_from_enumerating.cc @@ -1 +1,14 @@ #include "utils/bidict/algorithms/bidict_from_enumerating.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using T = value_type<0>; + +template bidict + bidict_from_enumerating(std::unordered_set const &); + +template bidict + bidict_from_enumerating(std::set const &); + +} // namespace FlexFlow diff --git a/lib/utils/src/utils/cli/cli_parse.cc b/lib/utils/src/utils/cli/cli_parse.cc index 07982c0c2d..36d5837f9c 100644 --- a/lib/utils/src/utils/cli/cli_parse.cc +++ b/lib/utils/src/utils/cli/cli_parse.cc @@ -32,7 +32,7 @@ tl::expected {}, }; - int consumed_positional_args = 0; + nonnegative_int consumed_positional_args = 0_n; auto parse_positional_arg = [&](std::string const &arg) -> std::optional { if (consumed_positional_args >= cli.positional_arguments.size()) { @@ -40,8 +40,8 @@ tl::expected cli.positional_arguments.size()); } - CLIPositionalArgumentSpec arg_spec = - cli.positional_arguments.at(consumed_positional_args); + CLIPositionalArgumentSpec arg_spec = cli.positional_arguments.at( + consumed_positional_args.unwrap_nonnegative()); if (arg_spec.choices.has_value() && !contains(arg_spec.choices.value(), arg)) { diff --git a/lib/utils/src/utils/cli/cli_spec.cc b/lib/utils/src/utils/cli/cli_spec.cc index ca51cfe57f..e314f6fd55 100644 --- a/lib/utils/src/utils/cli/cli_spec.cc +++ b/lib/utils/src/utils/cli/cli_spec.cc @@ -2,6 +2,8 @@ #include "utils/containers/count.h" #include "utils/containers/transform.h" #include "utils/integer_conversions.h" +#include "utils/nonnegative_int/nonnegative_range.h" +#include "utils/nonnegative_int/num_elements.h" namespace FlexFlow { @@ -10,8 +12,8 @@ CLISpec empty_cli_spec() { } std::vector cli_get_flag_keys(CLISpec const &cli) { - return transform(count(cli.flags.size()), - [](int idx) { return CLIFlagKey{idx}; }); + return transform(nonnegative_range(num_elements(cli.flags)), + [](nonnegative_int idx) { return CLIFlagKey{idx}; }); } CLIArgumentKey cli_add_help_flag(CLISpec &cli) { @@ -21,17 +23,18 @@ CLIArgumentKey cli_add_help_flag(CLISpec &cli) { } CLIArgumentKey cli_add_flag(CLISpec &cli, CLIFlagSpec const &flag_spec) { + CLIArgumentKey key = CLIArgumentKey{CLIFlagKey{num_elements(cli.flags)}}; cli.flags.push_back(flag_spec); - - return CLIArgumentKey{CLIFlagKey{int_from_size_t(cli.flags.size()) - 1}}; + return key; } CLIArgumentKey cli_add_positional_argument(CLISpec &cli, CLIPositionalArgumentSpec const &arg) { + CLIArgumentKey key = CLIArgumentKey{ + CLIPositionalArgumentKey{num_elements(cli.positional_arguments)}}; cli.positional_arguments.push_back(arg); - return CLIArgumentKey{CLIPositionalArgumentKey{ - int_from_size_t(cli.positional_arguments.size()) - 1}}; + return key; } } // namespace FlexFlow diff --git a/lib/utils/src/utils/containers/at_idx.cc b/lib/utils/src/utils/containers/at_idx.cc index 45b1a31fce..14a0695c6d 100644 --- a/lib/utils/src/utils/containers/at_idx.cc +++ b/lib/utils/src/utils/containers/at_idx.cc @@ -1 +1,10 @@ #include "utils/containers/at_idx.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using E = value_type<0>; + +template std::optional at_idx(std::vector const &, nonnegative_int); + +} // namespace FlexFlow diff --git a/lib/utils/src/utils/containers/enumerate.cc b/lib/utils/src/utils/containers/enumerate.cc index 0984b6dc63..ca5ad6ddc1 100644 --- a/lib/utils/src/utils/containers/enumerate.cc +++ b/lib/utils/src/utils/containers/enumerate.cc @@ -1 +1,12 @@ #include "utils/containers/enumerate.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using T = value_type<0>; + +template std::map enumerate(std::vector const &); + +template std::map enumerate(std::unordered_set const &); + +} // namespace FlexFlow diff --git a/lib/utils/src/utils/containers/enumerate_vector.cc b/lib/utils/src/utils/containers/enumerate_vector.cc index d4fd131af2..0d0bd1c277 100644 --- a/lib/utils/src/utils/containers/enumerate_vector.cc +++ b/lib/utils/src/utils/containers/enumerate_vector.cc @@ -1 +1,10 @@ #include "utils/containers/enumerate_vector.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using T = value_type<0>; + +template std::map enumerate_vector(std::vector const &); + +} // namespace FlexFlow diff --git a/lib/utils/src/utils/containers/make.cc b/lib/utils/src/utils/containers/make.cc new file mode 100644 index 0000000000..29b5bc5184 --- /dev/null +++ b/lib/utils/src/utils/containers/make.cc @@ -0,0 +1,8 @@ +#include "utils/containers/make.h" +#include + +namespace FlexFlow { + +template decltype(auto) make>(); + +} // namespace FlexFlow diff --git a/lib/utils/src/utils/containers/range.cc b/lib/utils/src/utils/containers/range.cc index d3ebd1063b..f3baab3db1 100644 --- a/lib/utils/src/utils/containers/range.cc +++ b/lib/utils/src/utils/containers/range.cc @@ -1,5 +1,6 @@ #include "utils/containers/range.h" #include +#include namespace FlexFlow { diff --git a/lib/utils/src/utils/containers/repeat.cc b/lib/utils/src/utils/containers/repeat.cc index 76e46f0fdc..777996d995 100644 --- a/lib/utils/src/utils/containers/repeat.cc +++ b/lib/utils/src/utils/containers/repeat.cc @@ -1 +1,11 @@ #include "utils/containers/repeat.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using Out = value_type<0>; +using F = std::function; + +template std::vector repeat(nonnegative_int, F const &); + +} // namespace FlexFlow diff --git a/lib/utils/src/utils/containers/repeat_element.cc b/lib/utils/src/utils/containers/repeat_element.cc new file mode 100644 index 0000000000..70889eb971 --- /dev/null +++ b/lib/utils/src/utils/containers/repeat_element.cc @@ -0,0 +1,10 @@ +#include "utils/containers/repeat_element.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using T = value_type<0>; + +template std::vector repeat_element(nonnegative_int, T const &); + +} // namespace FlexFlow diff --git a/lib/utils/src/utils/containers/replicate.cc b/lib/utils/src/utils/containers/replicate.cc deleted file mode 100644 index 2fb2f079f6..0000000000 --- a/lib/utils/src/utils/containers/replicate.cc +++ /dev/null @@ -1 +0,0 @@ -#include "utils/containers/replicate.h" diff --git a/lib/utils/src/utils/graph/dataflow_graph/algorithms.cc b/lib/utils/src/utils/graph/dataflow_graph/algorithms.cc index f0e52d6fc2..7069146057 100644 --- a/lib/utils/src/utils/graph/dataflow_graph/algorithms.cc +++ b/lib/utils/src/utils/graph/dataflow_graph/algorithms.cc @@ -27,7 +27,7 @@ std::vector get_outputs(DataflowGraphView const &g, Node const &n) { return sorted_by(g.query_outputs(DataflowOutputQuery{ query_set{n}, - query_set::matchall(), + query_set::matchall(), }), [](DataflowOutput const &l, DataflowOutput const &r) { return l.idx < r.idx; diff --git a/lib/utils/src/utils/graph/dataflow_graph/algorithms/as_dot.cc b/lib/utils/src/utils/graph/dataflow_graph/algorithms/as_dot.cc index 47c30ce998..2ae903fa0b 100644 --- a/lib/utils/src/utils/graph/dataflow_graph/algorithms/as_dot.cc +++ b/lib/utils/src/utils/graph/dataflow_graph/algorithms/as_dot.cc @@ -1,27 +1,36 @@ #include "utils/graph/dataflow_graph/algorithms/as_dot.h" +#include "utils/containers/generate_map.h" +#include "utils/containers/map_keys.h" #include "utils/dot_file.h" #include "utils/graph/dataflow_graph/algorithms.h" +#include "utils/graph/dataflow_graph/algorithms/view_as_open_dataflow_graph.h" +#include "utils/graph/labelled_open_dataflow_graph/algorithms/with_labelling.h" #include "utils/graph/node/algorithms.h" +#include "utils/graph/render_dot.h" #include "utils/record_formatter.h" namespace FlexFlow { -// WARN(@lockshaw): doing this all with string ids is ugly and error prone, -// as it requires duplicating the stringification logic across functions. -// -// Fixing this is tracked in issue std::string as_dot(DataflowGraphView const &g) { - std::ostringstream oss; - DotFile dot = DotFile{oss}; + auto get_node_attrs = [](Node const &) { + return std::unordered_map{}; + }; + + std::unordered_map> + node_labels = generate_map(get_nodes(g), get_node_attrs); - std::function get_node_label = - [](Node const &n) -> std::string { - return fmt::format("n{}", n.raw_uid); + auto get_output_label = [](DataflowOutput const &o) { + return fmt::to_string(o.idx); }; - as_dot(dot, g, get_node_label); - dot.close(); - return oss.str(); + std::unordered_map output_labels = + generate_map(get_all_dataflow_outputs(g), get_output_label); + std::unordered_map value_labels = + map_keys(output_labels, + [](DataflowOutput const &o) { return OpenDataflowValue{o}; }); + + return render_dot(with_labelling( + view_as_open_dataflow_graph(g), node_labels, value_labels)); } void as_dot(DotFile &dot, @@ -29,9 +38,13 @@ void as_dot(DotFile &dot, std::function const &get_node_label) { auto get_node_name = [](Node n) { return fmt::format("n{}", n.raw_uid); }; - auto get_input_field = [](int idx) { return fmt::format("i{}", idx); }; + auto get_input_field = [](nonnegative_int idx) { + return fmt::format("i{}", idx); + }; - auto get_output_field = [](int idx) { return fmt::format("o{}", idx); }; + auto get_output_field = [](nonnegative_int idx) { + return fmt::format("o{}", idx); + }; for (Node const &n : get_nodes(g)) { std::vector n_inputs = get_dataflow_inputs(g, n); diff --git a/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_dataflow_edges_from_node_to_node.cc b/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_dataflow_edges_from_node_to_node.cc index c07d344d05..73afc11acc 100644 --- a/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_dataflow_edges_from_node_to_node.cc +++ b/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_dataflow_edges_from_node_to_node.cc @@ -6,9 +6,9 @@ std::unordered_set get_dataflow_edges_from_node_to_node( DataflowGraphView const &g, Node const &src, Node const &dst) { return g.query_edges(DataflowEdgeQuery{ /*src_nodes=*/query_set{src}, - /*src_idxs=*/query_set::matchall(), + /*src_idxs=*/query_set::matchall(), /*dst_nodes=*/query_set{dst}, - /*dst_idxs=*/query_set::matchall(), + /*dst_idxs=*/query_set::matchall(), }); } diff --git a/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_incoming_edges.cc b/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_incoming_edges.cc index 9500836db1..c4947f967a 100644 --- a/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_incoming_edges.cc +++ b/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_incoming_edges.cc @@ -7,9 +7,9 @@ std::vector get_incoming_edges(DataflowGraphView const &g, Node const &n) { return sorted_by(g.query_edges(DataflowEdgeQuery{ query_set::matchall(), - query_set::matchall(), + query_set::matchall(), {n}, - query_set::matchall(), + query_set::matchall(), }), [](DataflowEdge const &l, DataflowEdge const &r) { return l.dst.idx < r.dst.idx; @@ -21,9 +21,9 @@ std::unordered_set std::unordered_set const &ns) { DataflowEdgeQuery query = DataflowEdgeQuery{ query_set::matchall(), - query_set::matchall(), + query_set::matchall(), query_set{ns}, - query_set::matchall(), + query_set::matchall(), }; return g.query_edges(query); } diff --git a/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc b/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc index 2376e4897f..16b2b82b2d 100644 --- a/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc +++ b/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc @@ -7,9 +7,9 @@ std::unordered_set get_outgoing_edges(DataflowGraphView const &g, Node const &n) { return g.query_edges(DataflowEdgeQuery{ {n}, - query_set::matchall(), + query_set::matchall(), query_set::matchall(), - query_set::matchall(), + query_set::matchall(), }); } @@ -18,9 +18,9 @@ std::unordered_set std::unordered_set const &ns) { DataflowEdgeQuery query = DataflowEdgeQuery{ query_set{ns}, - query_set::matchall(), + query_set::matchall(), query_set::matchall(), - query_set::matchall(), + query_set::matchall(), }; return g.query_edges(query); } diff --git a/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_subgraph_incoming_edges.cc b/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_subgraph_incoming_edges.cc index d17a84dd12..a06ec1ab31 100644 --- a/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_subgraph_incoming_edges.cc +++ b/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_subgraph_incoming_edges.cc @@ -13,9 +13,9 @@ std::unordered_set DataflowEdgeQuery query = DataflowEdgeQuery{ src_query, - query_set::matchall(), + query_set::matchall(), query_set{ns}, - query_set::matchall(), + query_set::matchall(), }; return g.query_edges(query); diff --git a/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_subgraph_outgoing_edges.cc b/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_subgraph_outgoing_edges.cc index c442a26dab..f94dd94e11 100644 --- a/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_subgraph_outgoing_edges.cc +++ b/lib/utils/src/utils/graph/dataflow_graph/algorithms/get_subgraph_outgoing_edges.cc @@ -13,9 +13,9 @@ std::unordered_set DataflowEdgeQuery query = DataflowEdgeQuery{ query_set{ns}, - query_set::matchall(), + query_set::matchall(), dst_query, - query_set::matchall(), + query_set::matchall(), }; return g.query_edges(query); diff --git a/lib/utils/src/utils/graph/dataflow_graph/algorithms/view_as_open_dataflow_graph.cc b/lib/utils/src/utils/graph/dataflow_graph/algorithms/view_as_open_dataflow_graph.cc index 0fd0b85b71..703db4bf91 100644 --- a/lib/utils/src/utils/graph/dataflow_graph/algorithms/view_as_open_dataflow_graph.cc +++ b/lib/utils/src/utils/graph/dataflow_graph/algorithms/view_as_open_dataflow_graph.cc @@ -3,16 +3,18 @@ namespace FlexFlow { -ViewDataflowGraphAsOpen::ViewDataflowGraphAsOpen(DataflowGraphView const &g) +ViewDataflowGraphAsOpenDataflowGraph::ViewDataflowGraphAsOpenDataflowGraph( + DataflowGraphView const &g) : g(g) {} -std::unordered_set - ViewDataflowGraphAsOpen::query_nodes(NodeQuery const &q) const { +std::unordered_set ViewDataflowGraphAsOpenDataflowGraph::query_nodes( + NodeQuery const &q) const { return this->g.query_nodes(q); } std::unordered_set - ViewDataflowGraphAsOpen::query_edges(OpenDataflowEdgeQuery const &q) const { + ViewDataflowGraphAsOpenDataflowGraph::query_edges( + OpenDataflowEdgeQuery const &q) const { std::unordered_set closed_edges = this->g.query_edges(q.standard_edge_query); @@ -21,21 +23,23 @@ std::unordered_set } std::unordered_set - ViewDataflowGraphAsOpen::query_outputs(DataflowOutputQuery const &q) const { + ViewDataflowGraphAsOpenDataflowGraph::query_outputs( + DataflowOutputQuery const &q) const { return this->g.query_outputs(q); } std::unordered_set - ViewDataflowGraphAsOpen::get_inputs() const { + ViewDataflowGraphAsOpenDataflowGraph::get_inputs() const { return {}; } -ViewDataflowGraphAsOpen *ViewDataflowGraphAsOpen::clone() const { - return new ViewDataflowGraphAsOpen{this->g}; +ViewDataflowGraphAsOpenDataflowGraph * + ViewDataflowGraphAsOpenDataflowGraph::clone() const { + return new ViewDataflowGraphAsOpenDataflowGraph{this->g}; } OpenDataflowGraphView view_as_open_dataflow_graph(DataflowGraphView const &g) { - return OpenDataflowGraphView::create(g); + return OpenDataflowGraphView::create(g); } } // namespace FlexFlow diff --git a/lib/utils/src/utils/graph/dataflow_graph/algorithms/view_as_open_dataflow_graph.h b/lib/utils/src/utils/graph/dataflow_graph/algorithms/view_as_open_dataflow_graph.h deleted file mode 100644 index bec9d0e019..0000000000 --- a/lib/utils/src/utils/graph/dataflow_graph/algorithms/view_as_open_dataflow_graph.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef _FLEXFLOW_LIB_UTILS_SRC_UTILS_GRAPH_DATAFLOW_GRAPH_ALGORITHMS_VIEW_AS_OPEN_DATAFLOW_GRAPH_H -#define _FLEXFLOW_LIB_UTILS_SRC_UTILS_GRAPH_DATAFLOW_GRAPH_ALGORITHMS_VIEW_AS_OPEN_DATAFLOW_GRAPH_H - -#include "utils/graph/open_dataflow_graph/open_dataflow_graph_view.h" - -namespace FlexFlow { - -struct ViewDataflowGraphAsOpen final : public IOpenDataflowGraphView { -public: - ViewDataflowGraphAsOpen() = delete; - ViewDataflowGraphAsOpen(DataflowGraphView const &); - - std::unordered_set query_nodes(NodeQuery const &) const override; - std::unordered_set - query_edges(OpenDataflowEdgeQuery const &) const override; - std::unordered_set - query_outputs(DataflowOutputQuery const &) const override; - std::unordered_set get_inputs() const override; - - ViewDataflowGraphAsOpen *clone() const override; - - ~ViewDataflowGraphAsOpen() = default; - -private: - DataflowGraphView g; -}; - -OpenDataflowGraphView view_as_open_dataflow_graph(DataflowGraphView const &); - -} // namespace FlexFlow - -#endif diff --git a/lib/utils/src/utils/graph/dataflow_graph/dataflow_edge_query.cc b/lib/utils/src/utils/graph/dataflow_graph/dataflow_edge_query.cc index 2196f7a028..982969f3a5 100644 --- a/lib/utils/src/utils/graph/dataflow_graph/dataflow_edge_query.cc +++ b/lib/utils/src/utils/graph/dataflow_graph/dataflow_edge_query.cc @@ -5,18 +5,18 @@ namespace FlexFlow { DataflowEdgeQuery dataflow_edge_query_all() { return DataflowEdgeQuery{ query_set::matchall(), - query_set::matchall(), + query_set::matchall(), query_set::matchall(), - query_set::matchall(), + query_set::matchall(), }; } DataflowEdgeQuery dataflow_edge_query_none() { return DataflowEdgeQuery{ query_set::match_none(), - query_set::match_none(), + query_set::match_none(), query_set::match_none(), - query_set::match_none(), + query_set::match_none(), }; } @@ -30,9 +30,9 @@ bool dataflow_edge_query_includes_dataflow_edge(DataflowEdgeQuery const &q, DataflowEdgeQuery dataflow_edge_query_for_edge(DataflowEdge const &e) { return DataflowEdgeQuery{ query_set{e.src.node}, - query_set{e.src.idx}, + query_set{e.src.idx}, query_set{e.dst.node}, - query_set{e.dst.idx}, + query_set{e.dst.idx}, }; } @@ -40,9 +40,9 @@ DataflowEdgeQuery dataflow_edge_query_all_outgoing_from(DataflowOutput const &src) { return DataflowEdgeQuery{ query_set{src.node}, - query_set{src.idx}, + query_set{src.idx}, query_set::matchall(), - query_set::matchall(), + query_set::matchall(), }; } @@ -50,9 +50,9 @@ DataflowEdgeQuery dataflow_edge_query_all_incoming_to(DataflowInput const &dst) { return DataflowEdgeQuery{ query_set::matchall(), - query_set::matchall(), + query_set::matchall(), query_set{dst.node}, - query_set{dst.idx}, + query_set{dst.idx}, }; } diff --git a/lib/utils/src/utils/graph/dataflow_graph/dataflow_graph.cc b/lib/utils/src/utils/graph/dataflow_graph/dataflow_graph.cc index 868dd61c6d..8ed36135e1 100644 --- a/lib/utils/src/utils/graph/dataflow_graph/dataflow_graph.cc +++ b/lib/utils/src/utils/graph/dataflow_graph/dataflow_graph.cc @@ -4,7 +4,7 @@ namespace FlexFlow { NodeAddedResult DataflowGraph::add_node(std::vector const &inputs, - int num_outputs) { + nonnegative_int num_outputs) { return this->get_interface().add_node(inputs, num_outputs); } diff --git a/lib/utils/src/utils/graph/dataflow_graph/dataflow_output_query.cc b/lib/utils/src/utils/graph/dataflow_graph/dataflow_output_query.cc index 64df4c77f2..ceaad2bfdf 100644 --- a/lib/utils/src/utils/graph/dataflow_graph/dataflow_output_query.cc +++ b/lib/utils/src/utils/graph/dataflow_graph/dataflow_output_query.cc @@ -5,14 +5,14 @@ namespace FlexFlow { DataflowOutputQuery dataflow_output_query_all() { return DataflowOutputQuery{ query_set::matchall(), - query_set::matchall(), + query_set::matchall(), }; } DataflowOutputQuery dataflow_output_query_none() { return DataflowOutputQuery{ query_set::match_none(), - query_set::match_none(), + query_set::match_none(), }; } @@ -24,7 +24,7 @@ bool dataflow_output_query_includes_dataflow_output( DataflowOutputQuery dataflow_output_query_for_output(DataflowOutput const &o) { return DataflowOutputQuery{ query_set{o.node}, - query_set{o.idx}, + query_set{o.idx}, }; } diff --git a/lib/utils/src/utils/graph/dataflow_graph/i_dataflow_graph_view.cc b/lib/utils/src/utils/graph/dataflow_graph/i_dataflow_graph_view.cc index 300b5de546..ef9412b939 100644 --- a/lib/utils/src/utils/graph/dataflow_graph/i_dataflow_graph_view.cc +++ b/lib/utils/src/utils/graph/dataflow_graph/i_dataflow_graph_view.cc @@ -7,9 +7,9 @@ std::unordered_set IDataflowGraphView::query_edges(DirectedEdgeQuery const &q) const { DataflowEdgeQuery dataflow_query = DataflowEdgeQuery{ q.srcs, - matchall(), + matchall(), q.dsts, - matchall(), + matchall(), }; std::unordered_set dataflow_edges = this->query_edges(dataflow_query); diff --git a/lib/utils/src/utils/graph/digraph/algorithms/transitive_closure.cc b/lib/utils/src/utils/graph/digraph/algorithms/transitive_closure.cc index 3efea1c138..2de3056068 100644 --- a/lib/utils/src/utils/graph/digraph/algorithms/transitive_closure.cc +++ b/lib/utils/src/utils/graph/digraph/algorithms/transitive_closure.cc @@ -6,6 +6,7 @@ #include "utils/graph/digraph/algorithms/materialize_digraph_view.h" #include "utils/graph/instances/adjacency_digraph.h" #include "utils/graph/node/algorithms.h" +#include "utils/nonnegative_int/num_elements.h" namespace FlexFlow { @@ -15,7 +16,9 @@ DiGraphView transitive_closure(DiGraphView const &g) { // incredibly slow (> minutes) for even moderately sized graphs // (i.e., 200 nodes) without optimization enabled. - bidict nodes = bidict_from_enumerating(get_nodes(g)); + bidict nodes = + map_keys(bidict_from_enumerating(get_nodes(g)), + [](nonnegative_int x) { return x.unwrap_nonnegative(); }); std::unordered_set edges = get_edges(g); int num_nodes = nodes.size(); diff --git a/lib/utils/src/utils/graph/digraph/algorithms/transitive_reduction.cc b/lib/utils/src/utils/graph/digraph/algorithms/transitive_reduction.cc index 97a2439263..69b24b716c 100644 --- a/lib/utils/src/utils/graph/digraph/algorithms/transitive_reduction.cc +++ b/lib/utils/src/utils/graph/digraph/algorithms/transitive_reduction.cc @@ -37,7 +37,9 @@ DiGraphView transitive_reduction(DiGraphView const &g) { // transitive_closure inlined to avoid any drifts in node numbering // between transitive_closure and transitive_reduction - bidict nodes = bidict_from_enumerating(get_nodes(g)); + bidict nodes = + map_keys(bidict_from_enumerating(get_nodes(g)), + [](nonnegative_int x) { return x.unwrap_nonnegative(); }); int num_nodes = nodes.size(); std::vector edge_matrix(num_nodes * num_nodes, false); diff --git a/lib/utils/src/utils/graph/instances/unordered_set_dataflow_graph.cc b/lib/utils/src/utils/graph/instances/unordered_set_dataflow_graph.cc index 1ffc5f423f..a5a1fb82bf 100644 --- a/lib/utils/src/utils/graph/instances/unordered_set_dataflow_graph.cc +++ b/lib/utils/src/utils/graph/instances/unordered_set_dataflow_graph.cc @@ -1,6 +1,5 @@ #include "utils/graph/instances/unordered_set_dataflow_graph.h" #include "utils/containers/are_disjoint.h" -#include "utils/containers/count.h" #include "utils/containers/enumerate_vector.h" #include "utils/containers/extend.h" #include "utils/containers/transform.h" @@ -9,6 +8,7 @@ #include "utils/graph/node/algorithms.h" #include "utils/graph/open_dataflow_graph/open_dataflow_edge.h" #include "utils/graph/open_dataflow_graph/open_dataflow_edge_query.h" +#include "utils/nonnegative_int/nonnegative_range.h" namespace FlexFlow { @@ -25,18 +25,18 @@ UnorderedSetDataflowGraph::UnorderedSetDataflowGraph( } NodeAddedResult UnorderedSetDataflowGraph::add_node( - std::vector const &inputs, int num_outputs) { + std::vector const &inputs, nonnegative_int num_outputs) { std::vector open_inputs = transform( inputs, [](DataflowOutput const &o) { return OpenDataflowValue{o}; }); return this->add_node(open_inputs, num_outputs); } NodeAddedResult UnorderedSetDataflowGraph::add_node( - std::vector const &inputs, int num_outputs) { + std::vector const &inputs, nonnegative_int num_outputs) { Node new_node = this->node_source.new_node(); - std::vector new_outputs = - transform(count(num_outputs), [&](int output_idx) { + std::vector new_outputs = transform( + nonnegative_range(num_outputs), [&](nonnegative_int output_idx) { return DataflowOutput{new_node, output_idx}; }); diff --git a/lib/utils/src/utils/graph/multidigraph/algorithms/add_nodes.cc b/lib/utils/src/utils/graph/multidigraph/algorithms/add_nodes.cc index a404423284..fd4a8782a4 100644 --- a/lib/utils/src/utils/graph/multidigraph/algorithms/add_nodes.cc +++ b/lib/utils/src/utils/graph/multidigraph/algorithms/add_nodes.cc @@ -3,7 +3,7 @@ namespace FlexFlow { -std::vector add_nodes(MultiDiGraph &g, int num_nodes) { +std::vector add_nodes(MultiDiGraph &g, nonnegative_int num_nodes) { return repeat(num_nodes, [&]() { return g.add_node(); }); } diff --git a/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/are_isomorphic.cc b/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/are_isomorphic.cc new file mode 100644 index 0000000000..f7f8a9fd34 --- /dev/null +++ b/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/are_isomorphic.cc @@ -0,0 +1,11 @@ +#include "utils/graph/open_dataflow_graph/algorithms/are_isomorphic.h" +#include "utils/graph/open_dataflow_graph/algorithms/find_isomorphism.h" + +namespace FlexFlow { + +bool are_isomorphic(OpenDataflowGraphView const &src, + OpenDataflowGraphView const &dst) { + return find_isomorphism(src, dst).has_value(); +} + +} // namespace FlexFlow diff --git a/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/as_dot.cc b/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/as_dot.cc index 9077ea5f9a..261de287a9 100644 --- a/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/as_dot.cc +++ b/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/as_dot.cc @@ -2,13 +2,16 @@ #include "utils/dot_file.h" #include "utils/graph/dataflow_graph/algorithms.h" #include "utils/graph/dataflow_graph/algorithms/as_dot.h" +#include "utils/graph/labelled_dataflow_graph/labelled_dataflow_graph.h" #include "utils/graph/node/algorithms.h" #include "utils/graph/open_dataflow_graph/algorithms/get_incoming_edges.h" +#include "utils/graph/open_dataflow_graph/algorithms/get_inputs.h" #include "utils/graph/open_dataflow_graph/algorithms/get_open_dataflow_graph_inputs.h" namespace FlexFlow { std::string as_dot(OpenDataflowGraphView const &g) { + std::function get_node_label = [](Node const &n) { return fmt::format("n{}", n.raw_uid); }; @@ -36,9 +39,13 @@ std::string auto get_node_name = [](Node n) { return fmt::format("n{}", n.raw_uid); }; - auto get_input_field = [](int idx) { return fmt::format("i{}", idx); }; + auto get_input_field = [](nonnegative_int idx) { + return fmt::format("i{}", idx); + }; - auto get_output_field = [](int idx) { return fmt::format("o{}", idx); }; + auto get_output_field = [](nonnegative_int idx) { + return fmt::format("o{}", idx); + }; auto get_graph_input_name = [](DataflowGraphInput i) { return fmt::format("gi{}", i.idx); diff --git a/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/get_incoming_edges.cc b/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/get_incoming_edges.cc index cad00c71e1..728dc75678 100644 --- a/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/get_incoming_edges.cc +++ b/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/get_incoming_edges.cc @@ -27,13 +27,13 @@ std::vector get_incoming_edges(OpenDataflowGraphView const &g, DataflowInputEdgeQuery{ query_set::matchall(), {n}, - query_set::matchall(), + query_set::matchall(), }, DataflowEdgeQuery{ query_set::matchall(), - query_set::matchall(), + query_set::matchall(), {n}, - query_set::matchall(), + query_set::matchall(), }, }), [](OpenDataflowEdge const &l, OpenDataflowEdge const &r) { diff --git a/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/get_subgraph_incoming_edges.cc b/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/get_subgraph_incoming_edges.cc index 95a8e095fc..6448da9c73 100644 --- a/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/get_subgraph_incoming_edges.cc +++ b/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/get_subgraph_incoming_edges.cc @@ -13,13 +13,13 @@ std::unordered_set DataflowInputEdgeQuery{ query_set::matchall(), query_set{ns}, - query_set::matchall(), + query_set::matchall(), }, DataflowEdgeQuery{ query_set{nodes_not_in_ns}, - query_set::matchall(), + query_set::matchall(), query_set{ns}, - query_set::matchall(), + query_set::matchall(), }, }; diff --git a/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/open_dataflow_graph_isomorphism.cc b/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/open_dataflow_graph_isomorphism.cc new file mode 100644 index 0000000000..c55c4fe360 --- /dev/null +++ b/lib/utils/src/utils/graph/open_dataflow_graph/algorithms/open_dataflow_graph_isomorphism.cc @@ -0,0 +1,54 @@ +#include "utils/graph/open_dataflow_graph/algorithms/open_dataflow_graph_isomorphism.h" +#include "utils/overload.h" + +namespace FlexFlow { + +OpenDataflowValue isomorphism_map_r_open_dataflow_value_from_l( + OpenDataflowGraphIsomorphism const &iso, OpenDataflowValue const &l_value) { + return l_value.visit(overload{ + [&](DataflowGraphInput const &l_input) { + return OpenDataflowValue{ + iso.input_mapping.at_l(l_input), + }; + }, + [&](DataflowOutput const &l_output) { + return OpenDataflowValue{ + isomorphism_map_r_dataflow_output_from_l(iso, l_output), + }; + }, + }); +} + +OpenDataflowValue isomorphism_map_l_open_dataflow_value_from_r( + OpenDataflowGraphIsomorphism const &iso, OpenDataflowValue const &r_value) { + return r_value.visit(overload{ + [&](DataflowGraphInput const &r_input) { + return OpenDataflowValue{ + iso.input_mapping.at_r(r_input), + }; + }, + [&](DataflowOutput const &r_output) { + return OpenDataflowValue{ + isomorphism_map_l_dataflow_output_from_r(iso, r_output), + }; + }, + }); +} + +DataflowOutput isomorphism_map_r_dataflow_output_from_l( + OpenDataflowGraphIsomorphism const &iso, DataflowOutput const &l_output) { + return DataflowOutput{ + iso.node_mapping.at_l(l_output.node), + l_output.idx, + }; +} + +DataflowOutput isomorphism_map_l_dataflow_output_from_r( + OpenDataflowGraphIsomorphism const &iso, DataflowOutput const &r_output) { + return DataflowOutput{ + iso.node_mapping.at_r(r_output.node), + r_output.idx, + }; +} + +} // namespace FlexFlow diff --git a/lib/utils/src/utils/graph/open_dataflow_graph/dataflow_input_edge_query.cc b/lib/utils/src/utils/graph/open_dataflow_graph/dataflow_input_edge_query.cc index 8736f2d157..34adea6b09 100644 --- a/lib/utils/src/utils/graph/open_dataflow_graph/dataflow_input_edge_query.cc +++ b/lib/utils/src/utils/graph/open_dataflow_graph/dataflow_input_edge_query.cc @@ -6,14 +6,14 @@ DataflowInputEdgeQuery dataflow_input_edge_query_all() { return DataflowInputEdgeQuery{ query_set::matchall(), query_set::matchall(), - query_set::matchall(), + query_set::matchall(), }; } DataflowInputEdgeQuery dataflow_input_edge_query_none() { return DataflowInputEdgeQuery{ query_set::match_none(), query_set::match_none(), - query_set::match_none(), + query_set::match_none(), }; } @@ -28,7 +28,7 @@ DataflowInputEdgeQuery return DataflowInputEdgeQuery{ query_set{e.src}, query_set{e.dst.node}, - query_set{e.dst.idx}, + query_set{e.dst.idx}, }; } @@ -37,7 +37,7 @@ DataflowInputEdgeQuery return DataflowInputEdgeQuery{ query_set{src}, query_set::matchall(), - query_set::matchall(), + query_set::matchall(), }; } @@ -46,7 +46,7 @@ DataflowInputEdgeQuery return DataflowInputEdgeQuery{ query_set::matchall(), query_set{dst.node}, - query_set{dst.idx}, + query_set{dst.idx}, }; } diff --git a/lib/utils/src/utils/graph/open_dataflow_graph/open_dataflow_edge.cc b/lib/utils/src/utils/graph/open_dataflow_graph/open_dataflow_edge.cc index d5e5b614af..d51562a6c6 100644 --- a/lib/utils/src/utils/graph/open_dataflow_graph/open_dataflow_edge.cc +++ b/lib/utils/src/utils/graph/open_dataflow_graph/open_dataflow_edge.cc @@ -7,7 +7,7 @@ Node get_open_dataflow_edge_dst_node(OpenDataflowEdge const &e) { return get_open_dataflow_edge_dst(e).node; } -int get_open_dataflow_edge_dst_idx(OpenDataflowEdge const &e) { +nonnegative_int get_open_dataflow_edge_dst_idx(OpenDataflowEdge const &e) { return get_open_dataflow_edge_dst(e).idx; } diff --git a/lib/utils/src/utils/graph/open_dataflow_graph/open_dataflow_graph.cc b/lib/utils/src/utils/graph/open_dataflow_graph/open_dataflow_graph.cc index 63222dd360..949f837665 100644 --- a/lib/utils/src/utils/graph/open_dataflow_graph/open_dataflow_graph.cc +++ b/lib/utils/src/utils/graph/open_dataflow_graph/open_dataflow_graph.cc @@ -4,7 +4,7 @@ namespace FlexFlow { NodeAddedResult OpenDataflowGraph::add_node(std::vector const &inputs, - int num_outputs) { + nonnegative_int num_outputs) { return this->get_interface().add_node(inputs, num_outputs); } diff --git a/lib/utils/src/utils/graph/open_dataflow_graph/unordered_set_open_dataflow_graph.cc b/lib/utils/src/utils/graph/open_dataflow_graph/unordered_set_open_dataflow_graph.cc index 0fdb2f408b..171b321c66 100644 --- a/lib/utils/src/utils/graph/open_dataflow_graph/unordered_set_open_dataflow_graph.cc +++ b/lib/utils/src/utils/graph/open_dataflow_graph/unordered_set_open_dataflow_graph.cc @@ -18,7 +18,7 @@ UnorderedSetOpenDataflowGraph::UnorderedSetOpenDataflowGraph( outputs(outputs), graph_inputs(graph_inputs) {} NodeAddedResult UnorderedSetOpenDataflowGraph::add_node( - std::vector const &inputs, int num_outputs) { + std::vector const &inputs, nonnegative_int num_outputs) { NOT_IMPLEMENTED(); } diff --git a/lib/utils/src/utils/graph/render_dot.cc b/lib/utils/src/utils/graph/render_dot.cc new file mode 100644 index 0000000000..8bdc001c80 --- /dev/null +++ b/lib/utils/src/utils/graph/render_dot.cc @@ -0,0 +1,90 @@ +#include "utils/graph/render_dot.h" +#include "utils/containers/flatmap.h" +#include "utils/containers/try_at.h" +#include "utils/graph/dataflow_graph/algorithms.h" +#include "utils/graph/node/algorithms.h" +#include "utils/record_formatter.h" + +namespace FlexFlow { + +std::string escape_dot_string(std::string const &s) { + auto escape_dot_char = [](char c) -> std::string { + switch (c) { + case '\\': + case '"': + return std::string{'\\'} + c; + default: + return std::string{c}; + } + }; + + return flatmap(s, escape_dot_char); +} + +std::string render_dot_node_attrs( + std::unordered_map const &node_attrs) { + std::ostringstream oss; + for (auto const &[k, v] : node_attrs) { + oss << fmt::format( + "\"{}\"=\"{}\",", escape_dot_string(k), escape_dot_string(v)); + } + return oss.str(); +} + +std::string render_node_label( + LabelledDataflowGraphView, + std::string> const &g, + Node const &n) { + std::vector n_inputs = get_dataflow_inputs(g, n); + std::vector n_outputs = get_outputs(g, n); + + RecordFormatter inputs_record; + for (DataflowInput const &i : n_inputs) { + inputs_record << fmt::format("{}", i.idx, i.idx); + } + + RecordFormatter outputs_record; + for (DataflowOutput const &o : n_outputs) { + outputs_record << fmt::format("{}", o.idx, g.at(o)); + } + + RecordFormatter rec; + rec << inputs_record + << try_at(g.at(n), std::string{"label"}) + .value_or(fmt::to_string(n.raw_uid)) + << outputs_record; + + std::ostringstream oss; + oss << rec; + return oss.str(); +} + +std::string render_dot( + LabelledDataflowGraphView, + std::string> const &g) { + std::vector lines; + lines.push_back("digraph {"); + + for (Node const &n : get_nodes(g)) { + std::unordered_map node_attrs = g.at(n); + node_attrs.at("label") = render_node_label(g, n); + node_attrs["shape"] = "record"; + + lines.push_back(fmt::format( + " n{} [{}];", n.raw_uid, render_dot_node_attrs(node_attrs))); + } + + for (DataflowEdge const &e : get_edges(g)) { + lines.push_back(fmt::format(" n{}:o{} -> n{}:i{};", + e.src.node.raw_uid, + e.src.idx, + e.dst.node.raw_uid, + e.dst.idx)); + } + + lines.push_back("}"); + + return join_strings(lines, "\n"); +} + +} // namespace FlexFlow diff --git a/lib/utils/src/utils/nonnegative_int/ceildiv.cc b/lib/utils/src/utils/nonnegative_int/ceildiv.cc new file mode 100644 index 0000000000..f1115b25b5 --- /dev/null +++ b/lib/utils/src/utils/nonnegative_int/ceildiv.cc @@ -0,0 +1,20 @@ +#include "utils/nonnegative_int/ceildiv.h" +#include "utils/exception.h" + +namespace FlexFlow { + +nonnegative_int ceildiv(nonnegative_int numerator, + nonnegative_int denominator) { + if (denominator == 0) { + throw mk_runtime_error(fmt::format( + "ceildiv expected denominator != 0, but received {}", denominator)); + } + + int n = numerator.unwrap_nonnegative(); + int d = denominator.unwrap_nonnegative(); + + int result = (n + d - 1) / d; + return nonnegative_int{result}; +} + +} // namespace FlexFlow diff --git a/lib/utils/src/utils/nonnegative_int/nonnegative_int.cc b/lib/utils/src/utils/nonnegative_int/nonnegative_int.cc index 9088cc4bf9..e86c242250 100644 --- a/lib/utils/src/utils/nonnegative_int/nonnegative_int.cc +++ b/lib/utils/src/utils/nonnegative_int/nonnegative_int.cc @@ -1,4 +1,5 @@ #include "utils/nonnegative_int/nonnegative_int.h" +#include "utils/exception.h" namespace FlexFlow { @@ -10,6 +11,15 @@ nonnegative_int::nonnegative_int(int value) { this->value_ = value; } +nonnegative_int::nonnegative_int(size_t value) { + if (value > std::numeric_limits::max()) { + throw std::invalid_argument(fmt::format( + "Input {} to nonnegative_int(size_t) is out-of-bounds for int", value)); + } + this->value_ = static_cast(value); + assert(this->value_ >= 0); +} + nonnegative_int::operator int() const noexcept { return this->value_; } @@ -75,18 +85,72 @@ nonnegative_int nonnegative_int::operator+(nonnegative_int const &other) const { return nonnegative_int{this->value_ + other.value_}; } +nonnegative_int &nonnegative_int::operator++() { + this->value_++; + return *this; +} + +nonnegative_int nonnegative_int::operator++(int) { + nonnegative_int result = *this; + this->value_++; + return result; +} + +nonnegative_int &nonnegative_int::operator+=(nonnegative_int const &other) { + this->value_ += other.value_; + return *this; +} + +nonnegative_int nonnegative_int::operator*(nonnegative_int const &other) const { + return nonnegative_int{this->value_ * other.value_}; +} + +nonnegative_int &nonnegative_int::operator*=(nonnegative_int const &other) { + this->value_ *= other.value_; + return *this; +} + +nonnegative_int nonnegative_int::operator/(nonnegative_int const &other) const { + return nonnegative_int{this->value_ / other.value_}; +} + +nonnegative_int &nonnegative_int::operator/=(nonnegative_int const &other) { + this->value_ /= other.value_; + return *this; +} + +nonnegative_int nonnegative_int::operator%(nonnegative_int const &other) const { + return nonnegative_int{this->value_ % other.value_}; +} + +nonnegative_int &nonnegative_int::operator%=(nonnegative_int const &other) { + this->value_ %= other.value_; + return *this; +} + std::ostream &operator<<(std::ostream &os, nonnegative_int const &n) { os << n.value_; return os; } -int nonnegative_int::get_value() const { +int nonnegative_int::unwrap_nonnegative() const { return this->value_; } int format_as(nonnegative_int const &x) { - return x.get_value(); + return x.unwrap_nonnegative(); } + +nonnegative_int operator""_n(unsigned long long int x) { + if (x > + static_cast(std::numeric_limits::max())) { + throw mk_runtime_error( + fmt::format("Value too large to wrap as nonnegative_int: {}", x)); + } + + return nonnegative_int{static_cast(x)}; +} + } // namespace FlexFlow namespace nlohmann { @@ -97,13 +161,20 @@ ::FlexFlow::nonnegative_int void adl_serializer<::FlexFlow::nonnegative_int>::to_json( json &j, ::FlexFlow::nonnegative_int t) { - j = t.get_value(); + j = t.unwrap_nonnegative(); } } // namespace nlohmann +namespace rc { +Gen<::FlexFlow::nonnegative_int> + Arbitrary<::FlexFlow::nonnegative_int>::arbitrary() { + return gen::construct<::FlexFlow::nonnegative_int>(gen::nonNegative()); +} +} // namespace rc + namespace std { std::size_t hash<::FlexFlow::nonnegative_int>::operator()( FlexFlow::nonnegative_int const &n) const noexcept { - return std::hash{}(n.get_value()); + return std::hash{}(n.unwrap_nonnegative()); } } // namespace std diff --git a/lib/utils/src/utils/nonnegative_int/nonnegative_range.cc b/lib/utils/src/utils/nonnegative_int/nonnegative_range.cc new file mode 100644 index 0000000000..f31db6d589 --- /dev/null +++ b/lib/utils/src/utils/nonnegative_int/nonnegative_range.cc @@ -0,0 +1,19 @@ +#include "utils/nonnegative_int/nonnegative_range.h" +#include "utils/containers/range.h" +#include "utils/containers/transform.h" + +namespace FlexFlow { + +std::vector nonnegative_range(nonnegative_int end) { + return transform(range(end.unwrap_nonnegative()), + [](int x) { return nonnegative_int{x}; }); +} + +std::vector + nonnegative_range(nonnegative_int start, nonnegative_int end, int step) { + return transform( + range(start.unwrap_nonnegative(), end.unwrap_nonnegative(), step), + [](int x) { return nonnegative_int{x}; }); +} + +} // namespace FlexFlow diff --git a/lib/utils/src/utils/nonnegative_int/num_elements.cc b/lib/utils/src/utils/nonnegative_int/num_elements.cc new file mode 100644 index 0000000000..21292bf2ab --- /dev/null +++ b/lib/utils/src/utils/nonnegative_int/num_elements.cc @@ -0,0 +1,10 @@ +#include "utils/nonnegative_int/num_elements.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using E = value_type<0>; + +template nonnegative_int num_elements(std::vector const &); + +} // namespace FlexFlow diff --git a/lib/utils/test/src/main.cc b/lib/utils/test/src/main.cc deleted file mode 100644 index 9522fa7fdb..0000000000 --- a/lib/utils/test/src/main.cc +++ /dev/null @@ -1,2 +0,0 @@ -#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN -#include "doctest/doctest.h" diff --git a/lib/utils/test/src/utils/bidict/algorithms/bidict_from_enumerating.cc b/lib/utils/test/src/utils/bidict/algorithms/bidict_from_enumerating.cc index b5a373e5c9..a669869fb8 100644 --- a/lib/utils/test/src/utils/bidict/algorithms/bidict_from_enumerating.cc +++ b/lib/utils/test/src/utils/bidict/algorithms/bidict_from_enumerating.cc @@ -10,10 +10,12 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("bidict_from_enumerating(std::unordered_set)") { std::unordered_set input = {"zero", "one", "two"}; - bidict result = bidict_from_enumerating(input); + bidict result = + bidict_from_enumerating(input); - std::unordered_set result_left_entries = left_entries(result); - std::unordered_set correct_left_entries = {0, 1, 2}; + std::unordered_set result_left_entries = + left_entries(result); + std::unordered_set correct_left_entries = {0_n, 1_n, 2_n}; CHECK(result_left_entries == correct_left_entries); std::unordered_set result_right_entries = @@ -25,13 +27,14 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("bidict_from_enumerating(std::set)") { std::set input = {"a", "c", "b"}; - bidict correct = { - {0, "a"}, - {1, "b"}, - {2, "c"}, + bidict correct = { + {0_n, "a"}, + {1_n, "b"}, + {2_n, "c"}, }; - bidict result = bidict_from_enumerating(input); + bidict result = + bidict_from_enumerating(input); CHECK(result == correct); } diff --git a/lib/utils/test/src/utils/cli/cli_parse.cc b/lib/utils/test/src/utils/cli/cli_parse.cc index 40dea86ae0..72a09efbde 100644 --- a/lib/utils/test/src/utils/cli/cli_parse.cc +++ b/lib/utils/test/src/utils/cli/cli_parse.cc @@ -24,8 +24,8 @@ TEST_SUITE(FF_TEST_SUITE) { {}, }; - CLIFlagKey key_flag1 = CLIFlagKey{0}; - CLIFlagKey key_flag2 = CLIFlagKey{1}; + CLIFlagKey key_flag1 = CLIFlagKey{0_n}; + CLIFlagKey key_flag2 = CLIFlagKey{1_n}; SUBCASE("correctly parses short flag") { std::string input = "-2"; @@ -94,8 +94,8 @@ TEST_SUITE(FF_TEST_SUITE) { }, {}, }; - CLIFlagKey key_flag1 = CLIFlagKey{0}; - CLIFlagKey key_flag2 = CLIFlagKey{1}; + CLIFlagKey key_flag1 = CLIFlagKey{0_n}; + CLIFlagKey key_flag2 = CLIFlagKey{1_n}; SUBCASE("parses flags in any order") { std::vector inputs = {"prog_name", "-2", "--flag1"}; @@ -180,8 +180,8 @@ TEST_SUITE(FF_TEST_SUITE) { }, }; - CLIPositionalArgumentKey key_posarg1 = CLIPositionalArgumentKey{0}; - CLIPositionalArgumentKey key_posarg2 = CLIPositionalArgumentKey{1}; + CLIPositionalArgumentKey key_posarg1 = CLIPositionalArgumentKey{0_n}; + CLIPositionalArgumentKey key_posarg2 = CLIPositionalArgumentKey{1_n}; SUBCASE("can parse multiple positional arguments") { std::vector inputs = {"prog_name", "hello", "world"}; @@ -266,7 +266,7 @@ TEST_SUITE(FF_TEST_SUITE) { }, }; - CLIPositionalArgumentKey key_posarg = CLIPositionalArgumentKey{0}; + CLIPositionalArgumentKey key_posarg = CLIPositionalArgumentKey{0_n}; SUBCASE( "succeeds if a positional argument is set to a valid choice") { @@ -351,11 +351,11 @@ TEST_SUITE(FF_TEST_SUITE) { }, }, }; - CLIFlagKey key_flag1 = CLIFlagKey{0}; - CLIFlagKey key_flag2 = CLIFlagKey{1}; - CLIFlagKey key_flag3 = CLIFlagKey{2}; - CLIPositionalArgumentKey key_posarg1 = CLIPositionalArgumentKey{0}; - CLIPositionalArgumentKey key_posarg2 = CLIPositionalArgumentKey{1}; + CLIFlagKey key_flag1 = CLIFlagKey{0_n}; + CLIFlagKey key_flag2 = CLIFlagKey{1_n}; + CLIFlagKey key_flag3 = CLIFlagKey{2_n}; + CLIPositionalArgumentKey key_posarg1 = CLIPositionalArgumentKey{0_n}; + CLIPositionalArgumentKey key_posarg2 = CLIPositionalArgumentKey{1_n}; SUBCASE("works if flags are before positional arguments") { std::vector inputs = { @@ -449,11 +449,11 @@ TEST_SUITE(FF_TEST_SUITE) { }, }, }; - CLIFlagKey key_flag1 = CLIFlagKey{0}; - CLIFlagKey key_flag2 = CLIFlagKey{1}; - CLIFlagKey key_flag3 = CLIFlagKey{2}; - CLIPositionalArgumentKey key_posarg1 = CLIPositionalArgumentKey{0}; - CLIPositionalArgumentKey key_posarg2 = CLIPositionalArgumentKey{1}; + CLIFlagKey key_flag1 = CLIFlagKey{0_n}; + CLIFlagKey key_flag2 = CLIFlagKey{1_n}; + CLIFlagKey key_flag3 = CLIFlagKey{2_n}; + CLIPositionalArgumentKey key_posarg1 = CLIPositionalArgumentKey{0_n}; + CLIPositionalArgumentKey key_posarg2 = CLIPositionalArgumentKey{1_n}; int argc = 5; char const *argv[] = {"prog_name", "red", "-f", "world", "--flag3"}; diff --git a/lib/utils/test/src/utils/containers/at_idx.cc b/lib/utils/test/src/utils/containers/at_idx.cc new file mode 100644 index 0000000000..b2a6286b62 --- /dev/null +++ b/lib/utils/test/src/utils/containers/at_idx.cc @@ -0,0 +1,29 @@ +#include "utils/containers/at_idx.h" +#include "test/utils/doctest/fmt/optional.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("at_idx(std::vector, nonnegative_int)") { + std::vector vec = {1, 3, 2, 3}; + + SUBCASE("idx is in bounds") { + nonnegative_int idx = 1_n; + + std::optional result = at_idx(vec, idx); + std::optional correct = 3; + + CHECK(result == correct); + } + + SUBCASE("idx is out of bounds") { + nonnegative_int idx = 4_n; + + std::optional result = at_idx(vec, idx); + std::optional correct = std::nullopt; + + CHECK(result == correct); + } + } +} diff --git a/lib/utils/test/src/utils/containers/enumerate.cc b/lib/utils/test/src/utils/containers/enumerate.cc index 2f9a5b3c02..2fdb2e481e 100644 --- a/lib/utils/test/src/utils/containers/enumerate.cc +++ b/lib/utils/test/src/utils/containers/enumerate.cc @@ -17,26 +17,27 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("enumerate(std::vector)") { std::vector input = {"zero", "one", "two", "three"}; - std::map correct = { - {0, "zero"}, - {1, "one"}, - {2, "two"}, - {3, "three"}, + std::map correct = { + {0_n, "zero"}, + {1_n, "one"}, + {2_n, "two"}, + {3_n, "three"}, }; - std::map result = enumerate(input); + std::map result = enumerate(input); CHECK(result == correct); SUBCASE("check iteration order") { - std::vector> iterated_result = - vector_of(result); - std::vector> correct_iteration_order = { - {0, "zero"}, - {1, "one"}, - {2, "two"}, - {3, "three"}, - }; + std::vector> + iterated_result = vector_of(result); + std::vector> + correct_iteration_order = { + {0_n, "zero"}, + {1_n, "one"}, + {2_n, "two"}, + {3_n, "three"}, + }; CHECK(iterated_result == correct_iteration_order); } @@ -45,9 +46,9 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("enumerate(std::unordered_set)") { std::unordered_set input = {"A", "B", "C", "D"}; - std::unordered_set correct_keys = {0, 1, 2, 3}; + std::unordered_set correct_keys = {0_n, 1_n, 2_n, 3_n}; std::unordered_multiset correct_values = {"A", "B", "C", "D"}; - std::map result = enumerate(input); + std::map result = enumerate(input); CHECK(keys(result) == correct_keys); CHECK(unordered_multiset_of(values(result)) == correct_values); diff --git a/lib/utils/test/src/utils/containers/enumerate_vector.cc b/lib/utils/test/src/utils/containers/enumerate_vector.cc new file mode 100644 index 0000000000..fa5c5cf6fb --- /dev/null +++ b/lib/utils/test/src/utils/containers/enumerate_vector.cc @@ -0,0 +1,33 @@ +#include "utils/containers/enumerate_vector.h" +#include "test/utils/doctest/fmt/map.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("enumerate_vector(std::vector)") { + SUBCASE("input vector is empty") { + std::vector input = {}; + + std::map result = enumerate_vector(input); + std::map correct = {}; + + CHECK(result == correct); + } + + SUBCASE("input vector is not empty") { + std::vector input = {2, 3, 1, 3, 3}; + + std::map result = enumerate_vector(input); + std::map correct = { + {0_n, 2}, + {1_n, 3}, + {2_n, 1}, + {3_n, 3}, + {4_n, 3}, + }; + + CHECK(result == correct); + } + } +} diff --git a/lib/utils/test/src/utils/containers/flatmap.cc b/lib/utils/test/src/utils/containers/flatmap.cc index bd6d3ae5be..6a6d3c86a8 100644 --- a/lib/utils/test/src/utils/containers/flatmap.cc +++ b/lib/utils/test/src/utils/containers/flatmap.cc @@ -73,6 +73,38 @@ TEST_SUITE(FF_TEST_SUITE) { } } + TEST_CASE("flatmap(std::string, F)") { + std::string input = "aBabcBc"; + + SUBCASE("replacement length > 1") { + std::string result = flatmap(input, [](char c) -> std::string { + if (c == 'B') { + return ".."; + } else { + return std::string{c}; + } + }); + + std::string correct = "a..abc..c"; + + CHECK(result == correct); + } + + SUBCASE("replacement length == 0") { + std::string result = flatmap(input, [](char c) -> std::string { + if (c == 'B') { + return ""; + } else { + return std::string{c}; + } + }); + + std::string correct = "aabcc"; + + CHECK(result == correct); + } + } + TEST_CASE("flatmap(std::unordered_map, F)") { auto de_nest_keys = [](int k1, std::unordered_map const &v) { diff --git a/lib/utils/test/src/utils/containers/get_all_permutations_with_repetition.cc b/lib/utils/test/src/utils/containers/get_all_permutations_with_repetition.cc index f25bcf65b1..9fb4048691 100644 --- a/lib/utils/test/src/utils/containers/get_all_permutations_with_repetition.cc +++ b/lib/utils/test/src/utils/containers/get_all_permutations_with_repetition.cc @@ -13,7 +13,7 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector input = {1, 2, 3}; std::unordered_multiset> result = - get_all_permutations_with_repetition(input, 1); + get_all_permutations_with_repetition(input, 1_n); std::unordered_multiset> correct = { {1}, {2}, @@ -27,7 +27,7 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector input = {1}; std::unordered_multiset> result = - get_all_permutations_with_repetition(input, 2); + get_all_permutations_with_repetition(input, 2_n); std::unordered_multiset> correct = { {1, 1}, }; @@ -39,7 +39,7 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector input = {1, 2}; std::unordered_multiset> result = - get_all_permutations_with_repetition(input, 3); + get_all_permutations_with_repetition(input, 3_n); std::unordered_multiset> correct = { {1, 1, 1}, {1, 1, 2}, @@ -58,7 +58,7 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector input = {1, 2, 2}; std::unordered_multiset> result = - get_all_permutations_with_repetition(input, 2); + get_all_permutations_with_repetition(input, 2_n); std::unordered_multiset> correct = {{1, 1}, {1, 2}, {1, 2}, diff --git a/lib/utils/test/src/utils/containers/make.cc b/lib/utils/test/src/utils/containers/make.cc new file mode 100644 index 0000000000..4070f5b35a --- /dev/null +++ b/lib/utils/test/src/utils/containers/make.cc @@ -0,0 +1,15 @@ +#include "utils/containers/make.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("make") { + auto f = make(); + + int result = f(true); + int correct = 1; + + CHECK(result == correct); + } +} diff --git a/lib/utils/test/src/utils/containers/merge_maps.cc b/lib/utils/test/src/utils/containers/merge_maps.cc index a083e94de3..4ec8054892 100644 --- a/lib/utils/test/src/utils/containers/merge_maps.cc +++ b/lib/utils/test/src/utils/containers/merge_maps.cc @@ -1,30 +1,80 @@ #include "utils/containers/merge_maps.h" #include "test/utils/doctest/fmt/unordered_map.h" #include -#include using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("merge_disjoint_maps") { + std::unordered_map l_map = { + {1, "one"}, + {2, "two"}, + }; - TEST_CASE("merge_maps") { + std::unordered_map r_map = { + {3, "three"}, + }; - SUBCASE("disjoint keys") { - std::unordered_map lhs = {{1, "one"}, {2, "two"}}; - std::unordered_map rhs = {{3, "three"}, {4, "four"}}; - - std::unordered_map result = merge_maps(lhs, rhs); - std::unordered_map correct = { - {1, "one"}, {2, "two"}, {3, "three"}, {4, "four"}}; + std::unordered_map correct = { + {1, "one"}, + {2, "two"}, + {3, "three"}, + }; + SUBCASE("maps are disjoint") { + std::unordered_map result = + merge_disjoint_maps(l_map, r_map); CHECK(result == correct); } - SUBCASE("overlapping keys") { - std::unordered_map lhs = {{1, "one"}, {2, "two"}}; - std::unordered_map rhs = {{2, "three"}, {3, "four"}}; - - CHECK_THROWS(merge_maps(lhs, rhs)); + SUBCASE("maps are not disjoint") { + CHECK_THROWS(merge_disjoint_maps(l_map, l_map)); } } + + TEST_CASE("merge_map_left_dominates") { + std::unordered_map l_map = { + {1, "one"}, + {2, "left_two"}, + }; + + std::unordered_map r_map = { + {2, "right_two"}, + {3, "three"}, + }; + + std::unordered_map correct = { + {1, "one"}, + {2, "left_two"}, + {3, "three"}, + }; + + std::unordered_map result = + merge_map_left_dominates(l_map, r_map); + + CHECK(result == correct); + } + + TEST_CASE("merge_map_right_dominates") { + std::unordered_map l_map = { + {1, "one"}, + {2, "left_two"}, + }; + + std::unordered_map r_map = { + {2, "right_two"}, + {3, "three"}, + }; + + std::unordered_map correct = { + {1, "one"}, + {2, "right_two"}, + {3, "three"}, + }; + + std::unordered_map result = + merge_map_right_dominates(l_map, r_map); + + CHECK(result == correct); + } } diff --git a/lib/utils/test/src/utils/containers/product.cc b/lib/utils/test/src/utils/containers/product.cc index 3fa94c8e9e..2278bfba17 100644 --- a/lib/utils/test/src/utils/containers/product.cc +++ b/lib/utils/test/src/utils/containers/product.cc @@ -1,4 +1,6 @@ #include "utils/containers/product.h" +#include "utils/nonnegative_int/nonnegative_int.h" +#include #include #include #include @@ -29,4 +31,22 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK(correct == result); } } + + TEST_CASE("product(std::vector)") { + SUBCASE("non-empty container") { + std::vector input = {1_n, 2_n, 3_n, 5_n}; + nonnegative_int correct = 30_n; + auto result = product(input); + CHECK(correct == result); + } + + SUBCASE("empty container") { + std::vector input = {5_n}; + nonnegative_int correct = 5_n; + // correct = nonnegative_int{x}; + // CHECK(x == 3); + nonnegative_int result = product(input); + CHECK(correct == correct); + } + } } diff --git a/lib/utils/test/src/utils/containers/repeat.cc b/lib/utils/test/src/utils/containers/repeat.cc index d8ffe76a64..d2fc595f49 100644 --- a/lib/utils/test/src/utils/containers/repeat.cc +++ b/lib/utils/test/src/utils/containers/repeat.cc @@ -7,7 +7,7 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("repeat") { int x = 0; - std::vector result = repeat(3, [&]() { + std::vector result = repeat(3_n, [&]() { int result = x; x += 2; return result; diff --git a/lib/utils/test/src/utils/containers/replicate.cc b/lib/utils/test/src/utils/containers/repeat_element.cc similarity index 69% rename from lib/utils/test/src/utils/containers/replicate.cc rename to lib/utils/test/src/utils/containers/repeat_element.cc index 1c7845642e..08bee8bec8 100644 --- a/lib/utils/test/src/utils/containers/replicate.cc +++ b/lib/utils/test/src/utils/containers/repeat_element.cc @@ -1,4 +1,4 @@ -#include "utils/containers/replicate.h" +#include "utils/containers/repeat_element.h" #include "test/utils/doctest/fmt/unordered_set.h" #include "test/utils/doctest/fmt/vector.h" #include @@ -7,16 +7,17 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("replicate") { + TEST_CASE("repeat_element") { SUBCASE("ints") { int x = 42; - std::vector result = replicate(5, x); + std::vector result = repeat_element(nonnegative_int{5}, x); std::vector correct = {42, 42, 42, 42, 42}; CHECK(result == correct); } SUBCASE("unordered_set") { std::unordered_set x = {1.0, 1.5}; - std::vector> result = replicate(3, x); + std::vector> result = + repeat_element(nonnegative_int{3}, x); std::vector> correct = { {1.0, 1.5}, {1.0, 1.5}, {1.0, 1.5}}; CHECK(result == correct); diff --git a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms.cc b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms.cc index 25f990f80e..ff491f6b85 100644 --- a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms.cc +++ b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms.cc @@ -12,19 +12,19 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_inputs/get_outputs") { DataflowGraph g = DataflowGraph::create(); - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; DataflowOutput o1 = get_only(n1_added.outputs); - NodeAddedResult n2_added = g.add_node({}, 1); + NodeAddedResult n2_added = g.add_node({}, 1_n); Node n2 = n2_added.node; DataflowOutput o2 = get_only(n2_added.outputs); - NodeAddedResult n3_added = g.add_node({}, 1); + NodeAddedResult n3_added = g.add_node({}, 1_n); Node n3 = n3_added.node; DataflowOutput o3 = get_only(n3_added.outputs); - NodeAddedResult n4_added = g.add_node({o1, o2, o3}, 1); + NodeAddedResult n4_added = g.add_node({o1, o2, o3}, 1_n); Node n4 = n4_added.node; DataflowOutput o4 = get_only(n4_added.outputs); @@ -44,15 +44,15 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("topological_ordering") { DataflowGraph g = DataflowGraph::create(); - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; DataflowOutput o1 = get_only(n1_added.outputs); - NodeAddedResult n2_added = g.add_node({o1}, 1); + NodeAddedResult n2_added = g.add_node({o1}, 1_n); Node n2 = n2_added.node; DataflowOutput o2 = get_only(n2_added.outputs); - NodeAddedResult n3_added = g.add_node({o2}, 1); + NodeAddedResult n3_added = g.add_node({o2}, 1_n); Node n3 = n3_added.node; DataflowOutput o3 = get_only(n3_added.outputs); diff --git a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/dataflow_graphs_are_isomorphic.cc b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/dataflow_graphs_are_isomorphic.cc index f991b4a65e..0f812f2dec 100644 --- a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/dataflow_graphs_are_isomorphic.cc +++ b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/dataflow_graphs_are_isomorphic.cc @@ -11,21 +11,21 @@ TEST_SUITE(FF_TEST_SUITE) { "dataflow_graphs_are_isomorphic(DataflowGraphView, DataflowGraphView)") { auto g1 = DataflowGraph::create(); - NodeAddedResult g1_n1_added = g1.add_node({}, 1); + NodeAddedResult g1_n1_added = g1.add_node({}, 1_n); Node g1_n1_node = g1_n1_added.node; DataflowOutput g1_n1_output = get_only(g1_n1_added.outputs); - NodeAddedResult g1_n2_added = g1.add_node({g1_n1_output}, 1); + NodeAddedResult g1_n2_added = g1.add_node({g1_n1_output}, 1_n); Node g1_n2_node = g1_n2_added.node; auto g2 = DataflowGraph::create(); SUBCASE("input graphs are isomorphic") { - NodeAddedResult g2_n1_added = g2.add_node({}, 1); + NodeAddedResult g2_n1_added = g2.add_node({}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); - NodeAddedResult g2_n2_added = g2.add_node({g2_n1_output}, 1); + NodeAddedResult g2_n2_added = g2.add_node({g2_n1_output}, 1_n); Node g2_n2_node = g2_n2_added.node; bool correct = true; @@ -36,12 +36,12 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("input graphs are not isomorphic (different connectivity)") { - NodeAddedResult g2_n1_added = g2.add_node({}, 1); + NodeAddedResult g2_n1_added = g2.add_node({}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); NodeAddedResult g2_n2_added = - g2.add_node({g2_n1_output, g2_n1_output}, 1); + g2.add_node({g2_n1_output, g2_n1_output}, 1_n); Node g2_n2_node = g2_n2_added.node; bool correct = false; @@ -53,14 +53,14 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("input graphs are not isomorphic (different number of src and sink " "nodes)") { - NodeAddedResult g2_n1_added = g2.add_node({}, 1); + NodeAddedResult g2_n1_added = g2.add_node({}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); - NodeAddedResult g2_n2_added = g2.add_node({g2_n1_output}, 1); + NodeAddedResult g2_n2_added = g2.add_node({g2_n1_output}, 1_n); Node g2_n2_node = g2_n2_added.node; - NodeAddedResult g2_n3_added = g2.add_node({}, 1); + NodeAddedResult g2_n3_added = g2.add_node({}, 1_n); Node g2_n3_node = g2_n3_added.node; bool correct = false; @@ -72,15 +72,15 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("input graphs are not isomorphic (different number of internal " "nodes)") { - NodeAddedResult g2_n1_added = g2.add_node({}, 1); + NodeAddedResult g2_n1_added = g2.add_node({}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); - NodeAddedResult g2_n2_added = g2.add_node({g2_n1_output}, 1); + NodeAddedResult g2_n2_added = g2.add_node({g2_n1_output}, 1_n); Node g2_n2_node = g2_n2_added.node; DataflowOutput g2_n2_output = get_only(g2_n2_added.outputs); - NodeAddedResult g2_n3_added = g2.add_node({g2_n2_output}, 1); + NodeAddedResult g2_n3_added = g2.add_node({g2_n2_output}, 1_n); Node g2_n3_node = g2_n3_added.node; bool correct = false; diff --git a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/find_isomorphism.cc b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/find_isomorphism.cc index 160e4c4f73..8974d09832 100644 --- a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/find_isomorphism.cc +++ b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/find_isomorphism.cc @@ -10,21 +10,21 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("find_isomorphism(DataflowGraphView, DataflowGraphView)") { auto g1 = DataflowGraph::create(); - NodeAddedResult g1_n1_added = g1.add_node({}, 1); + NodeAddedResult g1_n1_added = g1.add_node({}, 1_n); Node g1_n1_node = g1_n1_added.node; DataflowOutput g1_n1_output = get_only(g1_n1_added.outputs); - NodeAddedResult g1_n2_added = g1.add_node({g1_n1_output}, 1); + NodeAddedResult g1_n2_added = g1.add_node({g1_n1_output}, 1_n); Node g1_n2_node = g1_n2_added.node; auto g2 = DataflowGraph::create(); SUBCASE("input graphs are isomorphic") { - NodeAddedResult g2_n1_added = g2.add_node({}, 1); + NodeAddedResult g2_n1_added = g2.add_node({}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); - NodeAddedResult g2_n2_added = g2.add_node({g2_n1_output}, 1); + NodeAddedResult g2_n2_added = g2.add_node({g2_n1_output}, 1_n); Node g2_n2_node = g2_n2_added.node; std::optional correct_isomorphism = @@ -41,12 +41,12 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("input graphs are not isomorphic (different connectivity)") { - NodeAddedResult g2_n1_added = g2.add_node({}, 1); + NodeAddedResult g2_n1_added = g2.add_node({}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); NodeAddedResult g2_n2_added = - g2.add_node({g2_n1_output, g2_n1_output}, 1); + g2.add_node({g2_n1_output, g2_n1_output}, 1_n); Node g2_n2_node = g2_n2_added.node; std::optional correct_isomorphism = @@ -59,14 +59,14 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("input graphs are not isomorphic (different number of src and sink " "nodes)") { - NodeAddedResult g2_n1_added = g2.add_node({}, 1); + NodeAddedResult g2_n1_added = g2.add_node({}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); - NodeAddedResult g2_n2_added = g2.add_node({g2_n1_output}, 1); + NodeAddedResult g2_n2_added = g2.add_node({g2_n1_output}, 1_n); Node g2_n2_node = g2_n2_added.node; - NodeAddedResult g2_n3_added = g2.add_node({}, 0); + NodeAddedResult g2_n3_added = g2.add_node({}, 0_n); Node g2_n3_node = g2_n3_added.node; std::optional correct_isomorphism = @@ -79,15 +79,15 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("input graphs are not isomorphic (different number of internal " "nodes)") { - NodeAddedResult g2_n1_added = g2.add_node({}, 1); + NodeAddedResult g2_n1_added = g2.add_node({}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); - NodeAddedResult g2_n2_added = g2.add_node({g2_n1_output}, 1); + NodeAddedResult g2_n2_added = g2.add_node({g2_n1_output}, 1_n); Node g2_n2_node = g2_n2_added.node; DataflowOutput g2_n2_output = get_only(g2_n2_added.outputs); - NodeAddedResult g2_n3_added = g2.add_node({g2_n2_output}, 1); + NodeAddedResult g2_n3_added = g2.add_node({g2_n2_output}, 1_n); Node g2_n3_node = g2_n3_added.node; std::optional correct_isomorphism = diff --git a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_dataflow_edges_from_node_to_node.cc b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_dataflow_edges_from_node_to_node.cc index fec5d3401e..e619cc3b1c 100644 --- a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_dataflow_edges_from_node_to_node.cc +++ b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_dataflow_edges_from_node_to_node.cc @@ -11,12 +11,12 @@ TEST_SUITE(FF_TEST_SUITE) { DataflowGraph g = DataflowGraph::create(); SUBCASE("gets edges if there are multiple") { - NodeAddedResult n1_added = g.add_node({}, 2); + NodeAddedResult n1_added = g.add_node({}, 2_n); Node n1 = n1_added.node; DataflowOutput n1_o0 = n1_added.outputs.at(0); DataflowOutput n1_o1 = n1_added.outputs.at(1); - NodeAddedResult n2_added = g.add_node({n1_o0, n1_o0, n1_o1}, 0); + NodeAddedResult n2_added = g.add_node({n1_o0, n1_o0, n1_o1}, 0_n); Node n2 = n2_added.node; std::unordered_set result = @@ -24,15 +24,15 @@ TEST_SUITE(FF_TEST_SUITE) { std::unordered_set correct = { DataflowEdge{ n1_o0, - DataflowInput{n2, 0}, + DataflowInput{n2, 0_n}, }, DataflowEdge{ n1_o0, - DataflowInput{n2, 1}, + DataflowInput{n2, 1_n}, }, DataflowEdge{ n1_o1, - DataflowInput{n2, 2}, + DataflowInput{n2, 2_n}, }, }; @@ -40,15 +40,15 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("does not get edges to/from other nodes") { - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; DataflowOutput o1 = get_only(n1_added.outputs); - NodeAddedResult n2_added = g.add_node({o1}, 1); + NodeAddedResult n2_added = g.add_node({o1}, 1_n); Node n2 = n2_added.node; DataflowOutput o2 = get_only(n2_added.outputs); - NodeAddedResult n3_added = g.add_node({o2}, 1); + NodeAddedResult n3_added = g.add_node({o2}, 1_n); Node n3 = n3_added.node; DataflowOutput o3 = get_only(n3_added.outputs); @@ -61,11 +61,11 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE( "does not get flipped edges (i.e., respects from vs to direction)") { - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; DataflowOutput o1 = get_only(n1_added.outputs); - NodeAddedResult n2_added = g.add_node({o1}, 0); + NodeAddedResult n2_added = g.add_node({o1}, 0_n); Node n2 = n2_added.node; std::unordered_set result = @@ -76,10 +76,10 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("returns empty set if no edges exist between the given nodes") { - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; - NodeAddedResult n2_added = g.add_node({}, 1); + NodeAddedResult n2_added = g.add_node({}, 1_n); Node n2 = n2_added.node; std::unordered_set result = @@ -91,7 +91,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("returns empty set if src node == dst node (as cycles cannot exist " "in DataflowGraph") { - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; std::unordered_set result = diff --git a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_incoming_edges.cc b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_incoming_edges.cc index 86e4802cdb..f55afbacc1 100644 --- a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_incoming_edges.cc +++ b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_incoming_edges.cc @@ -10,34 +10,34 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_incoming_edges(DataflowGraphView, Node)") { DataflowGraph g = DataflowGraph::create(); - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; DataflowOutput o1 = get_only(n1_added.outputs); - NodeAddedResult n2_added = g.add_node({}, 1); + NodeAddedResult n2_added = g.add_node({}, 1_n); Node n2 = n2_added.node; DataflowOutput o2 = get_only(n2_added.outputs); - NodeAddedResult n3_added = g.add_node({o2}, 1); + NodeAddedResult n3_added = g.add_node({o2}, 1_n); Node n3 = n3_added.node; DataflowOutput o3 = get_only(n3_added.outputs); - NodeAddedResult n4_added = g.add_node({o2, o3}, 1); + NodeAddedResult n4_added = g.add_node({o2, o3}, 1_n); Node n4 = n4_added.node; DataflowOutput o4 = get_only(n4_added.outputs); SUBCASE("n4 - multiple incoming edges") { std::vector result = get_incoming_edges(g, n4); std::vector correct = { - DataflowEdge{o2, DataflowInput{n4, 0}}, - DataflowEdge{o3, DataflowInput{n4, 1}}}; + DataflowEdge{o2, DataflowInput{n4, 0_n}}, + DataflowEdge{o3, DataflowInput{n4, 1_n}}}; CHECK(result == correct); } SUBCASE("n3- single incoming edge") { std::vector result = get_incoming_edges(g, n3); std::vector correct = { - DataflowEdge{o2, DataflowInput{n3, 0}}, + DataflowEdge{o2, DataflowInput{n3, 0_n}}, }; CHECK(result == correct); } diff --git a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc index be874b7e29..c37dcf5be7 100644 --- a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc +++ b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_outgoing_edges.cc @@ -10,26 +10,26 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_outgoing_edges(DataflowGraphView, Node)") { DataflowGraph g = DataflowGraph::create(); - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; DataflowOutput o1 = get_only(n1_added.outputs); - NodeAddedResult n2_added = g.add_node({o1}, 1); + NodeAddedResult n2_added = g.add_node({o1}, 1_n); Node n2 = n2_added.node; DataflowOutput o2 = get_only(n2_added.outputs); - NodeAddedResult n3_added = g.add_node({o1}, 1); + NodeAddedResult n3_added = g.add_node({o1}, 1_n); Node n3 = n3_added.node; DataflowOutput o3 = get_only(n3_added.outputs); - NodeAddedResult n4_added = g.add_node({o2}, 1); + NodeAddedResult n4_added = g.add_node({o2}, 1_n); Node n4 = n4_added.node; DataflowOutput o4 = get_only(n4_added.outputs); SUBCASE("n2 - single outgoing edge") { std::unordered_set result = get_outgoing_edges(g, n2); std::unordered_set correct = { - DataflowEdge{o2, DataflowInput{n4, 0}}, + DataflowEdge{o2, DataflowInput{n4, 0_n}}, }; CHECK(result == correct); } @@ -37,8 +37,8 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("n1 - multiple outgoing edges") { std::unordered_set result = get_outgoing_edges(g, n1); std::unordered_set correct = { - DataflowEdge{o1, DataflowInput{n2, 0}}, - DataflowEdge{o1, DataflowInput{n3, 0}}, + DataflowEdge{o1, DataflowInput{n2, 0_n}}, + DataflowEdge{o1, DataflowInput{n3, 0_n}}, }; CHECK(result == correct); } @@ -53,19 +53,19 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_outgoing_edges(DataflowGraphView, std::unordered_set)") { DataflowGraph g = DataflowGraph::create(); - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; DataflowOutput o1 = get_only(n1_added.outputs); - NodeAddedResult n2_added = g.add_node({o1}, 1); + NodeAddedResult n2_added = g.add_node({o1}, 1_n); Node n2 = n2_added.node; DataflowOutput o2 = get_only(n2_added.outputs); - NodeAddedResult n3_added = g.add_node({o1}, 1); + NodeAddedResult n3_added = g.add_node({o1}, 1_n); Node n3 = n3_added.node; DataflowOutput o3 = get_only(n3_added.outputs); - NodeAddedResult n4_added = g.add_node({o2}, 1); + NodeAddedResult n4_added = g.add_node({o2}, 1_n); Node n4 = n4_added.node; DataflowOutput o4 = get_only(n4_added.outputs); @@ -73,9 +73,9 @@ TEST_SUITE(FF_TEST_SUITE) { std::unordered_set nodes = {n1, n2}; std::unordered_set result = get_outgoing_edges(g, nodes); std::unordered_set correct = { - DataflowEdge{o1, DataflowInput{n2, 0}}, - DataflowEdge{o1, DataflowInput{n3, 0}}, - DataflowEdge{o2, DataflowInput{n4, 0}}, + DataflowEdge{o1, DataflowInput{n2, 0_n}}, + DataflowEdge{o1, DataflowInput{n3, 0_n}}, + DataflowEdge{o2, DataflowInput{n4, 0_n}}, }; CHECK(result == correct); } diff --git a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_subgraph_incoming_edges.cc b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_subgraph_incoming_edges.cc index 330628adfd..6c770a9d29 100644 --- a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_subgraph_incoming_edges.cc +++ b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_subgraph_incoming_edges.cc @@ -11,19 +11,19 @@ TEST_SUITE(FF_TEST_SUITE) { "std::unordered_set") { DataflowGraph g = DataflowGraph::create(); - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; DataflowOutput o1 = get_only(n1_added.outputs); - NodeAddedResult n2_added = g.add_node({o1}, 1); + NodeAddedResult n2_added = g.add_node({o1}, 1_n); Node n2 = n2_added.node; DataflowOutput o2 = get_only(n2_added.outputs); - NodeAddedResult n3_added = g.add_node({o1, o2, o1}, 1); + NodeAddedResult n3_added = g.add_node({o1, o2, o1}, 1_n); Node n3 = n3_added.node; DataflowOutput o3 = get_only(n3_added.outputs); - NodeAddedResult n4_added = g.add_node({o2, o3}, 1); + NodeAddedResult n4_added = g.add_node({o2, o3}, 1_n); Node n4 = n4_added.node; DataflowOutput o4 = get_only(n4_added.outputs); @@ -33,9 +33,9 @@ TEST_SUITE(FF_TEST_SUITE) { get_subgraph_incoming_edges(g, input_node_set); std::unordered_set correct = { - DataflowEdge{o1, DataflowInput{n2, 0}}, - DataflowEdge{o1, DataflowInput{n3, 0}}, - DataflowEdge{o1, DataflowInput{n3, 2}}, + DataflowEdge{o1, DataflowInput{n2, 0_n}}, + DataflowEdge{o1, DataflowInput{n3, 0_n}}, + DataflowEdge{o1, DataflowInput{n3, 2_n}}, }; CHECK(result == correct); diff --git a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_subgraph_outgoing_edges.cc b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_subgraph_outgoing_edges.cc index 779d0a9560..bb7f3c4c30 100644 --- a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_subgraph_outgoing_edges.cc +++ b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/get_subgraph_outgoing_edges.cc @@ -11,19 +11,19 @@ TEST_SUITE(FF_TEST_SUITE) { "std::unordered_set") { DataflowGraph g = DataflowGraph::create(); - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; DataflowOutput o1 = get_only(n1_added.outputs); - NodeAddedResult n2_added = g.add_node({o1}, 1); + NodeAddedResult n2_added = g.add_node({o1}, 1_n); Node n2 = n2_added.node; DataflowOutput o2 = get_only(n2_added.outputs); - NodeAddedResult n3_added = g.add_node({o2}, 1); + NodeAddedResult n3_added = g.add_node({o2}, 1_n); Node n3 = n3_added.node; DataflowOutput o3 = get_only(n3_added.outputs); - NodeAddedResult n4_added = g.add_node({o1, o2, o3}, 1); + NodeAddedResult n4_added = g.add_node({o1, o2, o3}, 1_n); Node n4 = n4_added.node; DataflowOutput o4 = get_only(n4_added.outputs); @@ -33,8 +33,8 @@ TEST_SUITE(FF_TEST_SUITE) { get_subgraph_outgoing_edges(g, input_node_set); std::unordered_set correct = { - DataflowEdge{o2, DataflowInput{n4, 1}}, - DataflowEdge{o3, DataflowInput{n4, 2}}, + DataflowEdge{o2, DataflowInput{n4, 1_n}}, + DataflowEdge{o3, DataflowInput{n4, 2_n}}, }; CHECK(result == correct); diff --git a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/transitive_reduced_dataflow_graph/get_transitive_reduced_boundary_nodes_for_split.cc b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/transitive_reduced_dataflow_graph/get_transitive_reduced_boundary_nodes_for_split.cc index c35789044d..4e26812315 100644 --- a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/transitive_reduced_dataflow_graph/get_transitive_reduced_boundary_nodes_for_split.cc +++ b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/transitive_reduced_dataflow_graph/get_transitive_reduced_boundary_nodes_for_split.cc @@ -19,19 +19,19 @@ TEST_SUITE(FF_TEST_SUITE) { DataflowGraph g = DataflowGraph::create(); - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; DataflowOutput o1 = get_only(n1_added.outputs); - NodeAddedResult n2_added = g.add_node({o1}, 1); + NodeAddedResult n2_added = g.add_node({o1}, 1_n); Node n2 = n2_added.node; DataflowOutput o2 = get_only(n2_added.outputs); - NodeAddedResult n3_added = g.add_node({o1, o2}, 1); + NodeAddedResult n3_added = g.add_node({o1, o2}, 1_n); Node n3 = n3_added.node; DataflowOutput o3 = get_only(n3_added.outputs); - NodeAddedResult n4_added = g.add_node({o2, o3}, 1); + NodeAddedResult n4_added = g.add_node({o2, o3}, 1_n); Node n4 = n4_added.node; DataflowOutput o4 = get_only(n4_added.outputs); diff --git a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/transitive_reduced_dataflow_graph/get_transitive_reduced_edges_across_split.cc b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/transitive_reduced_dataflow_graph/get_transitive_reduced_edges_across_split.cc index 1f8f66b932..38b722ec70 100644 --- a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/transitive_reduced_dataflow_graph/get_transitive_reduced_edges_across_split.cc +++ b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/transitive_reduced_dataflow_graph/get_transitive_reduced_edges_across_split.cc @@ -25,19 +25,19 @@ TEST_SUITE(FF_TEST_SUITE) { auto make_leaf = [](Node const &n) { return BinarySPDecompositionTree{n}; }; SUBCASE("multiple nodes with edges across") { - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; DataflowOutput o1 = get_only(n1_added.outputs); - NodeAddedResult n2_added = g.add_node({}, 1); + NodeAddedResult n2_added = g.add_node({}, 1_n); Node n2 = n2_added.node; DataflowOutput o2 = get_only(n2_added.outputs); - NodeAddedResult n3_added = g.add_node({o2, o1}, 1); + NodeAddedResult n3_added = g.add_node({o2, o1}, 1_n); Node n3 = n3_added.node; DataflowOutput o3 = get_only(n3_added.outputs); - NodeAddedResult n4_added = g.add_node({o1}, 1); + NodeAddedResult n4_added = g.add_node({o1}, 1_n); Node n4 = n4_added.node; DataflowOutput o4 = get_only(n4_added.outputs); @@ -54,15 +54,15 @@ TEST_SUITE(FF_TEST_SUITE) { std::unordered_set correct = { DataflowEdge{ o1, - DataflowInput{n3, 1}, + DataflowInput{n3, 1_n}, }, DataflowEdge{ o2, - DataflowInput{n3, 0}, + DataflowInput{n3, 0_n}, }, DataflowEdge{ o1, - DataflowInput{n4, 0}, + DataflowInput{n4, 0_n}, }, }; @@ -70,12 +70,12 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("nodes each have multiple edges across") { - NodeAddedResult n1_added = g.add_node({}, 2); + NodeAddedResult n1_added = g.add_node({}, 2_n); Node n1 = n1_added.node; DataflowOutput n1_o1 = n1_added.outputs.at(0); DataflowOutput n1_o2 = n1_added.outputs.at(1); - NodeAddedResult n2_added = g.add_node({n1_o1, n1_o2, n1_o1}, 1); + NodeAddedResult n2_added = g.add_node({n1_o1, n1_o2, n1_o1}, 1_n); Node n2 = n2_added.node; TransitiveReducedDataflowGraphView tr_g = @@ -91,15 +91,15 @@ TEST_SUITE(FF_TEST_SUITE) { std::unordered_set correct = { DataflowEdge{ n1_o1, - DataflowInput{n2, 0}, + DataflowInput{n2, 0_n}, }, DataflowEdge{ n1_o2, - DataflowInput{n2, 1}, + DataflowInput{n2, 1_n}, }, DataflowEdge{ n1_o1, - DataflowInput{n2, 2}, + DataflowInput{n2, 2_n}, }, }; @@ -107,19 +107,19 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("does not return edges eliminated by transitive reduction") { - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; DataflowOutput o1 = get_only(n1_added.outputs); - NodeAddedResult n2_added = g.add_node({o1}, 1); + NodeAddedResult n2_added = g.add_node({o1}, 1_n); Node n2 = n2_added.node; DataflowOutput o2 = get_only(n2_added.outputs); - NodeAddedResult n3_added = g.add_node({o1, o2}, 1); + NodeAddedResult n3_added = g.add_node({o1, o2}, 1_n); Node n3 = n3_added.node; DataflowOutput o3 = get_only(n3_added.outputs); - NodeAddedResult n4_added = g.add_node({o2, o3}, 1); + NodeAddedResult n4_added = g.add_node({o2, o3}, 1_n); Node n4 = n4_added.node; DataflowOutput o4 = get_only(n4_added.outputs); @@ -136,7 +136,7 @@ TEST_SUITE(FF_TEST_SUITE) { std::unordered_set correct = { DataflowEdge{ o2, - DataflowInput{n3, 1}, + DataflowInput{n3, 1_n}, }, }; diff --git a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/transitive_reduced_dataflow_graph/get_transitive_reduced_outputs_across_split.cc b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/transitive_reduced_dataflow_graph/get_transitive_reduced_outputs_across_split.cc index 0e77739434..f922721fde 100644 --- a/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/transitive_reduced_dataflow_graph/get_transitive_reduced_outputs_across_split.cc +++ b/lib/utils/test/src/utils/graph/dataflow_graph/algorithms/transitive_reduced_dataflow_graph/get_transitive_reduced_outputs_across_split.cc @@ -19,19 +19,19 @@ TEST_SUITE(FF_TEST_SUITE) { DataflowGraph g = DataflowGraph::create(); - NodeAddedResult n1_added = g.add_node({}, 1); + NodeAddedResult n1_added = g.add_node({}, 1_n); Node n1 = n1_added.node; DataflowOutput o1 = get_only(n1_added.outputs); - NodeAddedResult n2_added = g.add_node({o1}, 1); + NodeAddedResult n2_added = g.add_node({o1}, 1_n); Node n2 = n2_added.node; DataflowOutput o2 = get_only(n2_added.outputs); - NodeAddedResult n3_added = g.add_node({o1, o2}, 1); + NodeAddedResult n3_added = g.add_node({o1, o2}, 1_n); Node n3 = n3_added.node; DataflowOutput o3 = get_only(n3_added.outputs); - NodeAddedResult n4_added = g.add_node({o2, o3}, 1); + NodeAddedResult n4_added = g.add_node({o2, o3}, 1_n); Node n4 = n4_added.node; DataflowOutput o4 = get_only(n4_added.outputs); diff --git a/lib/utils/test/src/utils/graph/dataflow_graph/unordered_open_dataflow_graph.cc b/lib/utils/test/src/utils/graph/dataflow_graph/unordered_open_dataflow_graph.cc index 7a3237d432..ec3ad86fe6 100644 --- a/lib/utils/test/src/utils/graph/dataflow_graph/unordered_open_dataflow_graph.cc +++ b/lib/utils/test/src/utils/graph/dataflow_graph/unordered_open_dataflow_graph.cc @@ -31,7 +31,7 @@ TEST_SUITE(FF_TEST_SUITE) { REQUIRE(result == correct); } - NodeAddedResult added = g.add_node({}, 2); + NodeAddedResult added = g.add_node({}, 2_n); { std::unordered_set result = g.query_nodes(node_query_all()); @@ -54,7 +54,7 @@ TEST_SUITE(FF_TEST_SUITE) { REQUIRE(result == correct); } - NodeAddedResult added2 = g.add_node(added.outputs, 3); + NodeAddedResult added2 = g.add_node(added.outputs, 3_n); { std::unordered_set result = g.query_nodes(node_query_all()); @@ -66,8 +66,8 @@ TEST_SUITE(FF_TEST_SUITE) { std::unordered_set result = g.query_edges(dataflow_edge_query_all()); std::unordered_set correct = { - DataflowEdge{added.outputs.at(0), DataflowInput{added2.node, 0}}, - DataflowEdge{added.outputs.at(1), DataflowInput{added2.node, 1}}, + DataflowEdge{added.outputs.at(0), DataflowInput{added2.node, 0_n}}, + DataflowEdge{added.outputs.at(1), DataflowInput{added2.node, 1_n}}, }; REQUIRE(result == correct); } diff --git a/lib/utils/test/src/utils/graph/multidigraph/algorithms/add_edges.cc b/lib/utils/test/src/utils/graph/multidigraph/algorithms/add_edges.cc index 93d3d9605b..d9d91a03e9 100644 --- a/lib/utils/test/src/utils/graph/multidigraph/algorithms/add_edges.cc +++ b/lib/utils/test/src/utils/graph/multidigraph/algorithms/add_edges.cc @@ -10,7 +10,7 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("add_edges(MultiDiGraph &, std::vector>)") { MultiDiGraph g = MultiDiGraph::create(); - std::vector n = add_nodes(g, 3); + std::vector n = add_nodes(g, 3_n); std::vector> input = { {n.at(0), n.at(1)}, diff --git a/lib/utils/test/src/utils/graph/multidigraph/algorithms/add_nodes.cc b/lib/utils/test/src/utils/graph/multidigraph/algorithms/add_nodes.cc index e41bf33d6c..e3d9ee6a29 100644 --- a/lib/utils/test/src/utils/graph/multidigraph/algorithms/add_nodes.cc +++ b/lib/utils/test/src/utils/graph/multidigraph/algorithms/add_nodes.cc @@ -9,7 +9,7 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("add_nodes(MultiDiGraph &, int)") { MultiDiGraph g = MultiDiGraph::create(); - std::unordered_set result = unordered_set_of(add_nodes(g, 3)); + std::unordered_set result = unordered_set_of(add_nodes(g, 3_n)); std::unordered_set correct = g.query_nodes(node_query_all()); CHECK(result == correct); diff --git a/lib/utils/test/src/utils/graph/multidigraph/algorithms/get_edges.cc b/lib/utils/test/src/utils/graph/multidigraph/algorithms/get_edges.cc index aef6d9baff..0dfcc8a851 100644 --- a/lib/utils/test/src/utils/graph/multidigraph/algorithms/get_edges.cc +++ b/lib/utils/test/src/utils/graph/multidigraph/algorithms/get_edges.cc @@ -11,7 +11,7 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_edges(MultiDiGraphView)") { MultiDiGraph g = MultiDiGraph::create(); - std::vector n = add_nodes(g, 3); + std::vector n = add_nodes(g, 3_n); std::vector e = add_edges(g, { {n.at(0), n.at(1)}, diff --git a/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/find_isomorphism.cc b/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/find_isomorphism.cc index 78aaa8d9fc..55b7b34e52 100644 --- a/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/find_isomorphism.cc +++ b/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/find_isomorphism.cc @@ -26,12 +26,13 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("input graphs are not empty") { DataflowGraphInput g1_i1 = g1.add_input(); - NodeAddedResult g1_n1_added = g1.add_node({OpenDataflowValue{g1_i1}}, 1); + NodeAddedResult g1_n1_added = + g1.add_node({OpenDataflowValue{g1_i1}}, 1_n); Node g1_n1_node = g1_n1_added.node; DataflowOutput g1_n1_output = get_only(g1_n1_added.outputs); NodeAddedResult g1_n2_added = g1.add_node( - {OpenDataflowValue{g1_i1}, OpenDataflowValue{g1_n1_output}}, 1); + {OpenDataflowValue{g1_i1}, OpenDataflowValue{g1_n1_output}}, 1_n); Node g1_n2_node = g1_n2_added.node; SUBCASE("one graph is empty") { @@ -46,11 +47,11 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("input graphs are isomorphic") { DataflowGraphInput g2_i1 = g2.add_input(); NodeAddedResult g2_n1_added = - g2.add_node({OpenDataflowValue{g2_i1}}, 1); + g2.add_node({OpenDataflowValue{g2_i1}}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); NodeAddedResult g2_n2_added = g2.add_node( - {OpenDataflowValue{g2_i1}, OpenDataflowValue{g2_n1_output}}, 1); + {OpenDataflowValue{g2_i1}, OpenDataflowValue{g2_n1_output}}, 1_n); Node g2_n2_node = g2_n2_added.node; std::optional correct = @@ -75,11 +76,11 @@ TEST_SUITE(FF_TEST_SUITE) { DataflowGraphInput g2_i1 = g2.add_input(); DataflowGraphInput g2_i2 = g2.add_input(); NodeAddedResult g2_n1_added = - g2.add_node({OpenDataflowValue{g2_i1}}, 1); + g2.add_node({OpenDataflowValue{g2_i1}}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); NodeAddedResult g2_n2_added = g2.add_node( - {OpenDataflowValue{g2_i1}, OpenDataflowValue{g2_n1_output}}, 1); + {OpenDataflowValue{g2_i1}, OpenDataflowValue{g2_n1_output}}, 1_n); Node g2_n2_node = g2_n2_added.node; std::optional correct = std::nullopt; @@ -93,12 +94,12 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("input graphs are not isomorphic (different connectivity)") { DataflowGraphInput g2_i1 = g2.add_input(); NodeAddedResult g2_n1_added = - g2.add_node({OpenDataflowValue{g2_i1}}, 1); + g2.add_node({OpenDataflowValue{g2_i1}}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); NodeAddedResult g2_n2_added = g2.add_node( {OpenDataflowValue{g2_n1_output}, OpenDataflowValue{g2_n1_output}}, - 1); + 1_n); Node g2_n2_node = g2_n2_added.node; std::optional correct = std::nullopt; @@ -112,14 +113,14 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("input graphs are not isomorphic (different numbers of nodes)") { DataflowGraphInput g2_i1 = g2.add_input(); NodeAddedResult g2_n1_added = - g2.add_node({OpenDataflowValue{g2_i1}}, 1); + g2.add_node({OpenDataflowValue{g2_i1}}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); NodeAddedResult g2_n2_added = g2.add_node( - {OpenDataflowValue{g2_i1}, OpenDataflowValue{g2_n1_output}}, 1); + {OpenDataflowValue{g2_i1}, OpenDataflowValue{g2_n1_output}}, 1_n); Node g2_n2_node = g2_n2_added.node; - NodeAddedResult g2_n3_added = g2.add_node({}, 0); + NodeAddedResult g2_n3_added = g2.add_node({}, 0_n); Node g2_n3_node = g2_n3_added.node; std::optional correct = std::nullopt; diff --git a/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/get_open_dataflow_graph_inputs.cc b/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/get_open_dataflow_graph_inputs.cc index ff75e8fe48..fd54b801ce 100644 --- a/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/get_open_dataflow_graph_inputs.cc +++ b/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/get_open_dataflow_graph_inputs.cc @@ -13,7 +13,7 @@ TEST_SUITE(FF_TEST_SUITE) { DataflowGraphInput i0 = g.add_input(); DataflowGraphInput i1 = g.add_input(); - NodeAddedResult n0_added = g.add_node({}, 1); + NodeAddedResult n0_added = g.add_node({}, 1_n); std::unordered_set result = get_open_dataflow_graph_inputs(g); diff --git a/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/get_open_dataflow_value_uses.cc b/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/get_open_dataflow_value_uses.cc index 7496c3009d..c7d294a588 100644 --- a/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/get_open_dataflow_value_uses.cc +++ b/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/get_open_dataflow_value_uses.cc @@ -18,19 +18,19 @@ TEST_SUITE(FF_TEST_SUITE) { NodeAddedResult n0_added = g.add_node( {OpenDataflowValue{i0}, OpenDataflowValue{i1}, OpenDataflowValue{i0}}, - 1); + 1_n); Node n0 = n0_added.node; DataflowOutput o0 = get_only(n0_added.outputs); NodeAddedResult n1_added = g.add_node( {OpenDataflowValue{i1}, OpenDataflowValue{o0}, OpenDataflowValue{i0}}, - 1); + 1_n); Node n1 = n1_added.node; std::unordered_set correct = { - DataflowInput{n0, 0}, - DataflowInput{n0, 2}, - DataflowInput{n1, 2}, + DataflowInput{n0, 0_n}, + DataflowInput{n0, 2_n}, + DataflowInput{n1, 2_n}, }; std::unordered_set result = @@ -45,7 +45,7 @@ TEST_SUITE(FF_TEST_SUITE) { DataflowGraphInput i0 = g.add_input(); - NodeAddedResult n0_added = g.add_node({OpenDataflowValue{i0}}, 2); + NodeAddedResult n0_added = g.add_node({OpenDataflowValue{i0}}, 2_n); Node n0 = n0_added.node; DataflowOutput o0_0 = n0_added.outputs.at(0); DataflowOutput o0_1 = n0_added.outputs.at(1); @@ -53,16 +53,16 @@ TEST_SUITE(FF_TEST_SUITE) { NodeAddedResult n1_added = g.add_node({OpenDataflowValue{i0}, OpenDataflowValue{o0_1}, OpenDataflowValue{o0_0}}, - 1); + 1_n); Node n1 = n1_added.node; NodeAddedResult n2_added = - g.add_node({OpenDataflowValue{o0_1}, OpenDataflowValue{i0}}, 1); + g.add_node({OpenDataflowValue{o0_1}, OpenDataflowValue{i0}}, 1_n); Node n2 = n2_added.node; std::unordered_set correct = { - DataflowInput{n1, 1}, - DataflowInput{n2, 0}, + DataflowInput{n1, 1_n}, + DataflowInput{n2, 0_n}, }; std::unordered_set result = diff --git a/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/get_unused_open_dataflow_graph_inputs.cc b/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/get_unused_open_dataflow_graph_inputs.cc index ddd6d74119..e1a2062865 100644 --- a/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/get_unused_open_dataflow_graph_inputs.cc +++ b/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/get_unused_open_dataflow_graph_inputs.cc @@ -13,7 +13,7 @@ TEST_SUITE(FF_TEST_SUITE) { DataflowGraphInput g_i2 = g.add_input(); DataflowGraphInput g_i3 = g.add_input(); - NodeAddedResult g_n1_added = g.add_node({OpenDataflowValue{g_i2}}, 1); + NodeAddedResult g_n1_added = g.add_node({OpenDataflowValue{g_i2}}, 1_n); std::unordered_set result = get_unused_open_dataflow_graph_inputs(g); @@ -28,7 +28,7 @@ TEST_SUITE(FF_TEST_SUITE) { DataflowGraphInput g_i2 = g.add_input(); NodeAddedResult g_n1_added = - g.add_node({OpenDataflowValue{g_i1}, OpenDataflowValue{g_i2}}, 1); + g.add_node({OpenDataflowValue{g_i1}, OpenDataflowValue{g_i2}}, 1_n); std::unordered_set result = get_unused_open_dataflow_graph_inputs(g); diff --git a/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/open_dataflow_graphs_are_isomorphic.cc b/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/open_dataflow_graphs_are_isomorphic.cc index bdb1bb4814..c53e069f68 100644 --- a/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/open_dataflow_graphs_are_isomorphic.cc +++ b/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/open_dataflow_graphs_are_isomorphic.cc @@ -21,12 +21,13 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("input graphs are not empty") { DataflowGraphInput g1_i1 = g1.add_input(); - NodeAddedResult g1_n1_added = g1.add_node({OpenDataflowValue{g1_i1}}, 1); + NodeAddedResult g1_n1_added = + g1.add_node({OpenDataflowValue{g1_i1}}, 1_n); Node g1_n1_node = g1_n1_added.node; DataflowOutput g1_n1_output = get_only(g1_n1_added.outputs); NodeAddedResult g1_n2_added = g1.add_node( - {OpenDataflowValue{g1_i1}, OpenDataflowValue{g1_n1_output}}, 1); + {OpenDataflowValue{g1_i1}, OpenDataflowValue{g1_n1_output}}, 1_n); Node g1_n2_node = g1_n2_added.node; SUBCASE("one input graph is empty") { @@ -39,11 +40,11 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("input graphs are isomorphic") { DataflowGraphInput g2_i1 = g2.add_input(); NodeAddedResult g2_n1_added = - g2.add_node({OpenDataflowValue{g2_i1}}, 1); + g2.add_node({OpenDataflowValue{g2_i1}}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); NodeAddedResult g2_n2_added = g2.add_node( - {OpenDataflowValue{g2_i1}, OpenDataflowValue{g2_n1_output}}, 1); + {OpenDataflowValue{g2_i1}, OpenDataflowValue{g2_n1_output}}, 1_n); Node g2_n2_node = g2_n2_added.node; bool correct = true; @@ -57,11 +58,11 @@ TEST_SUITE(FF_TEST_SUITE) { DataflowGraphInput g2_i1 = g2.add_input(); DataflowGraphInput g2_i2 = g2.add_input(); NodeAddedResult g2_n1_added = - g2.add_node({OpenDataflowValue{g2_i1}}, 1); + g2.add_node({OpenDataflowValue{g2_i1}}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); NodeAddedResult g2_n2_added = g2.add_node( - {OpenDataflowValue{g2_i1}, OpenDataflowValue{g2_n1_output}}, 1); + {OpenDataflowValue{g2_i1}, OpenDataflowValue{g2_n1_output}}, 1_n); Node g2_n2_node = g2_n2_added.node; bool correct = false; @@ -73,12 +74,12 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("input graphs are not isomorphic (different connectivity)") { DataflowGraphInput g2_i1 = g2.add_input(); NodeAddedResult g2_n1_added = - g2.add_node({OpenDataflowValue{g2_i1}}, 1); + g2.add_node({OpenDataflowValue{g2_i1}}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); NodeAddedResult g2_n2_added = g2.add_node( {OpenDataflowValue{g2_n1_output}, OpenDataflowValue{g2_n1_output}}, - 1); + 1_n); Node g2_n2_node = g2_n2_added.node; bool correct = false; @@ -90,14 +91,14 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("input graphs are not isomorphic (different numbers of nodes)") { DataflowGraphInput g2_i1 = g2.add_input(); NodeAddedResult g2_n1_added = - g2.add_node({OpenDataflowValue{g2_i1}}, 1); + g2.add_node({OpenDataflowValue{g2_i1}}, 1_n); Node g2_n1_node = g2_n1_added.node; DataflowOutput g2_n1_output = get_only(g2_n1_added.outputs); NodeAddedResult g2_n2_added = g2.add_node( - {OpenDataflowValue{g2_i1}, OpenDataflowValue{g2_n1_output}}, 1); + {OpenDataflowValue{g2_i1}, OpenDataflowValue{g2_n1_output}}, 1_n); Node g2_n2_node = g2_n2_added.node; - NodeAddedResult g2_n3_added = g2.add_node({}, 0); + NodeAddedResult g2_n3_added = g2.add_node({}, 0_n); Node g2_n3_node = g2_n3_added.node; bool correct = false; diff --git a/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/permute_input_ids.cc b/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/permute_input_ids.cc index b565e46e67..90682cf0f0 100644 --- a/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/permute_input_ids.cc +++ b/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/permute_input_ids.cc @@ -17,11 +17,11 @@ TEST_SUITE(FF_TEST_SUITE) { DataflowGraphInput i0 = g.add_input(); DataflowGraphInput i1 = g.add_input(); - NodeAddedResult n0_added = g.add_node({OpenDataflowValue{i0}}, 1); + NodeAddedResult n0_added = g.add_node({OpenDataflowValue{i0}}, 1_n); Node n0 = n0_added.node; DataflowOutput n0_output = get_only(n0_added.outputs); - NodeAddedResult n1_added = g.add_node({OpenDataflowValue{n0_output}}, 1); + NodeAddedResult n1_added = g.add_node({OpenDataflowValue{n0_output}}, 1_n); Node n1 = n1_added.node; DataflowOutput n1_output = get_only(n1_added.outputs); @@ -44,7 +44,7 @@ TEST_SUITE(FF_TEST_SUITE) { new_i0, DataflowInput{ n0, - 0, + 0_n, }, }, }, @@ -52,11 +52,11 @@ TEST_SUITE(FF_TEST_SUITE) { DataflowEdge{ DataflowOutput{ n0, - 0, + 0_n, }, DataflowInput{ n1, - 0, + 0_n, }, }, }, @@ -65,11 +65,11 @@ TEST_SUITE(FF_TEST_SUITE) { { DataflowOutput{ n0, - 0, + 0_n, }, DataflowOutput{ n1, - 0, + 0_n, }, }, }; diff --git a/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/permute_node_ids.cc b/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/permute_node_ids.cc index 36bcd16dad..1e7ad87d88 100644 --- a/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/permute_node_ids.cc +++ b/lib/utils/test/src/utils/graph/open_dataflow_graph/algorithms/permute_node_ids.cc @@ -17,12 +17,12 @@ TEST_SUITE(FF_TEST_SUITE) { DataflowGraphInput i0 = g.add_input(); - NodeAddedResult n0_added = g.add_node({OpenDataflowValue{i0}}, 1); + NodeAddedResult n0_added = g.add_node({OpenDataflowValue{i0}}, 1_n); Node n0 = n0_added.node; DataflowOutput n0_output = get_only(n0_added.outputs); NodeAddedResult n1_added = - g.add_node({OpenDataflowValue{i0}, OpenDataflowValue{n0_output}}, 1); + g.add_node({OpenDataflowValue{i0}, OpenDataflowValue{n0_output}}, 1_n); Node n1 = n1_added.node; DataflowOutput n1_output = get_only(n1_added.outputs); @@ -45,7 +45,7 @@ TEST_SUITE(FF_TEST_SUITE) { i0, DataflowInput{ new_node0, - 0, + 0_n, }, }, }, @@ -54,7 +54,7 @@ TEST_SUITE(FF_TEST_SUITE) { i0, DataflowInput{ new_node1, - 0, + 0_n, }, }, }, @@ -62,11 +62,11 @@ TEST_SUITE(FF_TEST_SUITE) { DataflowEdge{ DataflowOutput{ new_node0, - 0, + 0_n, }, DataflowInput{ new_node1, - 1, + 1_n, }, }, }, @@ -75,11 +75,11 @@ TEST_SUITE(FF_TEST_SUITE) { { DataflowOutput{ new_node0, - 0, + 0_n, }, DataflowOutput{ new_node1, - 0, + 0_n, }, }, }; @@ -109,9 +109,9 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("check access to old edges") { OpenDataflowEdgeQuery query = OpenDataflowEdgeQuery{ dataflow_input_edge_query_for_edge( - DataflowInputEdge{i0, DataflowInput{n0, 0}}), + DataflowInputEdge{i0, DataflowInput{n0, 0_n}}), dataflow_edge_query_for_edge( - DataflowEdge{n0_output, DataflowInput{n1, 1}}), + DataflowEdge{n0_output, DataflowInput{n1, 1_n}}), }; std::unordered_set result_nodes = result.query_edges(query); @@ -121,12 +121,12 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("check access to new edges") { DataflowEdge new_standard_edge = DataflowEdge{ - DataflowOutput{new_node0, 0}, - DataflowInput{new_node1, 1}, + DataflowOutput{new_node0, 0_n}, + DataflowInput{new_node1, 1_n}, }; DataflowInputEdge new_input_edge = DataflowInputEdge{ i0, - DataflowInput{new_node0, 0}, + DataflowInput{new_node0, 0_n}, }; OpenDataflowEdgeQuery query = OpenDataflowEdgeQuery{ dataflow_input_edge_query_for_edge(new_input_edge), @@ -159,7 +159,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("check access to new outputs") { - DataflowOutput new_output = DataflowOutput{new_node0, 0}; + DataflowOutput new_output = DataflowOutput{new_node0, 0_n}; DataflowOutputQuery query = dataflow_output_query_for_output(new_output); diff --git a/lib/utils/test/src/utils/graph/series_parallel/parallel_reduction.cc b/lib/utils/test/src/utils/graph/series_parallel/parallel_reduction.cc index a62f528bcf..a2f818b5e9 100644 --- a/lib/utils/test/src/utils/graph/series_parallel/parallel_reduction.cc +++ b/lib/utils/test/src/utils/graph/series_parallel/parallel_reduction.cc @@ -14,7 +14,7 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("find_parallel_reduction") { MultiDiGraph g = MultiDiGraph::create(); SUBCASE("base case") { - std::vector n = add_nodes(g, 2); + std::vector n = add_nodes(g, 2_n); std::vector e = add_edges(g, { {n.at(0), n.at(1)}, @@ -28,7 +28,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("does not apply when there is only one edge") { - std::vector n = add_nodes(g, 2); + std::vector n = add_nodes(g, 2_n); std::vector e = add_edges(g, { {n.at(0), n.at(1)}, @@ -40,7 +40,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("requires both ends be the same") { - std::vector n = add_nodes(g, 3); + std::vector n = add_nodes(g, 3_n); SUBCASE("branch out") { std::vector e = add_edges(g, { @@ -67,7 +67,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("finds one reduction when there are multiple") { - std::vector n = add_nodes(g, 2); + std::vector n = add_nodes(g, 2_n); std::vector e = add_edges(g, { {n.at(0), n.at(1)}, @@ -86,7 +86,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("in larger graph") { - std::vector n = add_nodes(g, 5); + std::vector n = add_nodes(g, 5_n); std::vector e = add_edges(g, { {n.at(0), n.at(1)}, @@ -109,7 +109,7 @@ TEST_SUITE(FF_TEST_SUITE) { MultiDiGraph g = MultiDiGraph::create(); SUBCASE("base case") { - std::vector n = add_nodes(g, 2); + std::vector n = add_nodes(g, 2_n); std::vector e = add_edges(g, { {n.at(0), n.at(1)}, @@ -142,7 +142,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("in larger graph") { - std::vector n = add_nodes(g, 5); + std::vector n = add_nodes(g, 5_n); std::vector e = add_edges(g, { {n.at(0), n.at(1)}, diff --git a/lib/utils/test/src/utils/graph/series_parallel/series_reduction.cc b/lib/utils/test/src/utils/graph/series_parallel/series_reduction.cc index c6b45ec6ce..4bb57aeb0d 100644 --- a/lib/utils/test/src/utils/graph/series_parallel/series_reduction.cc +++ b/lib/utils/test/src/utils/graph/series_parallel/series_reduction.cc @@ -12,7 +12,7 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_pre/post/center_node") { MultiDiGraph g = MultiDiGraph::create(); - std::vector n = add_nodes(g, 3); + std::vector n = add_nodes(g, 3_n); std::vector e = add_edges(g, { {n.at(0), n.at(1)}, @@ -42,7 +42,7 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("find_series_reduction") { MultiDiGraph g = MultiDiGraph::create(); SUBCASE("base case") { - std::vector n = add_nodes(g, 3); + std::vector n = add_nodes(g, 3_n); std::vector e = add_edges(g, { {n.at(0), n.at(1)}, @@ -57,7 +57,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("does not find if other edges are involved with center node") { SUBCASE("duplicate edge") { - std::vector n = add_nodes(g, 3); + std::vector n = add_nodes(g, 3_n); std::vector e = add_edges(g, { {n.at(0), n.at(1)}, @@ -71,7 +71,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("misc edge") { - std::vector n = add_nodes(g, 4); + std::vector n = add_nodes(g, 4_n); std::vector e = add_edges(g, { {n.at(0), n.at(1)}, @@ -86,7 +86,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("does find if other edges are involved with non-center node") { - std::vector n = add_nodes(g, 4); + std::vector n = add_nodes(g, 4_n); SUBCASE("edge from dst") { std::vector e = add_edges(g, { @@ -107,7 +107,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("finds one reduction when there are multiple") { - std::vector n = add_nodes(g, 4); + std::vector n = add_nodes(g, 4_n); std::vector e = add_edges(g, { {n.at(0), n.at(1)}, @@ -125,7 +125,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("in larger graph") { - std::vector n = add_nodes(g, 8); + std::vector n = add_nodes(g, 8_n); std::vector e = add_edges(g, { {n.at(0), n.at(2)}, @@ -149,7 +149,7 @@ TEST_SUITE(FF_TEST_SUITE) { MultiDiGraph g = MultiDiGraph::create(); SUBCASE("base case") { - std::vector n = add_nodes(g, 3); + std::vector n = add_nodes(g, 3_n); std::vector e = add_edges(g, { {n.at(0), n.at(1)}, @@ -188,7 +188,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("in larger graph") { - std::vector n = add_nodes(g, 8); + std::vector n = add_nodes(g, 8_n); std::vector e = add_edges(g, { {n.at(0), n.at(2)}, diff --git a/lib/utils/test/src/utils/nonnegative_int/ceildiv.cc b/lib/utils/test/src/utils/nonnegative_int/ceildiv.cc new file mode 100644 index 0000000000..7ac882ff9f --- /dev/null +++ b/lib/utils/test/src/utils/nonnegative_int/ceildiv.cc @@ -0,0 +1,52 @@ +#include "utils/nonnegative_int/ceildiv.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("ceildiv(nonnegative_int, nonnegative_int)") { + SUBCASE("divides evenly") { + nonnegative_int numerator = 12_n; + nonnegative_int denominator = 3_n; + + nonnegative_int result = ceildiv(numerator, denominator); + nonnegative_int correct = 4_n; + + CHECK(result == correct); + } + + SUBCASE("does not divide evenly") { + nonnegative_int numerator = 17_n; + nonnegative_int denominator = 4_n; + + nonnegative_int result = ceildiv(numerator, denominator); + nonnegative_int correct = 5_n; + + CHECK(result == correct); + } + + SUBCASE("denominator is zero") { + nonnegative_int numerator = 15_n; + nonnegative_int denominator = 0_n; + + CHECK_THROWS(ceildiv(numerator, denominator)); + } + + SUBCASE("numerator is zero") { + nonnegative_int numerator = 0_n; + nonnegative_int denominator = 1_n; + + nonnegative_int result = ceildiv(numerator, denominator); + nonnegative_int correct = 0_n; + + CHECK(result == correct); + } + + SUBCASE("denominator and numerator are zero") { + nonnegative_int numerator = 0_n; + nonnegative_int denominator = 0_n; + + CHECK_THROWS(ceildiv(numerator, denominator)); + } + } +} diff --git a/lib/utils/test/src/utils/nonnegative_int/nonnegative_int.cc b/lib/utils/test/src/utils/nonnegative_int/nonnegative_int.cc index 73d382d830..dfde11f9bd 100644 --- a/lib/utils/test/src/utils/nonnegative_int/nonnegative_int.cc +++ b/lib/utils/test/src/utils/nonnegative_int/nonnegative_int.cc @@ -198,13 +198,89 @@ TEST_SUITE(FF_TEST_SUITE) { } } - TEST_CASE("nonnegative_int + operation") { - nonnegative_int nn_int_1a = nonnegative_int{1}; - nonnegative_int nn_int_1b = nonnegative_int{1}; - nonnegative_int nn_int_2 = nonnegative_int{2}; - SUBCASE("LHS: nonnegative_int, RHS: nonnegative_int") { - CHECK(nn_int_1a + nn_int_1b == nn_int_2); - } + TEST_CASE("nonnegative_int::operator+(nonnegative_int)") { + nonnegative_int result = nonnegative_int{1} + nonnegative_int{2}; + nonnegative_int correct = nonnegative_int{3}; + + CHECK(result == correct); + } + + TEST_CASE("nonnegative_int::operator++() (pre-increment)") { + nonnegative_int input = nonnegative_int{1}; + + nonnegative_int result = ++input; + nonnegative_int correct = nonnegative_int{2}; + + CHECK(result == correct); + CHECK(input == correct); + } + + TEST_CASE("nonnegative_int::operator++(int) (post-increment)") { + nonnegative_int input = nonnegative_int{1}; + + nonnegative_int result = input++; + nonnegative_int correct_input = nonnegative_int{2}; + nonnegative_int correct_result = nonnegative_int{1}; + + CHECK(result == correct_result); + CHECK(input == correct_input); + } + + TEST_CASE("nonnegative_int::operator+=(nonnegative_int)") { + nonnegative_int result = nonnegative_int{1}; + result += nonnegative_int{3}; + + nonnegative_int correct = nonnegative_int{4}; + + CHECK(result == correct); + } + + TEST_CASE("nonnegative_int::operator*(nonnegative_int)") { + nonnegative_int result = nonnegative_int{2} * nonnegative_int{3}; + nonnegative_int correct = nonnegative_int{6}; + + CHECK(result == correct); + } + + TEST_CASE("nonnegative_int::operator*=(nonnegative_int)") { + nonnegative_int result = nonnegative_int{3}; + result *= nonnegative_int{6}; + + nonnegative_int correct = nonnegative_int{18}; + + CHECK(result == correct); + } + + TEST_CASE("nonnegative_int::operator/(nonnegative_int)") { + nonnegative_int result = nonnegative_int{5} / nonnegative_int{2}; + nonnegative_int correct = nonnegative_int{2}; + + CHECK(result == correct); + } + + TEST_CASE("nonnegative_int::operator/=(nonnegative_int)") { + nonnegative_int result = nonnegative_int{13}; + result /= nonnegative_int{3}; + + nonnegative_int correct = nonnegative_int{4}; + + CHECK(result == correct); + } + + TEST_CASE("nonnegative_int::operator%(nonnegative_int)") { + nonnegative_int result = nonnegative_int{5} % nonnegative_int{2}; + nonnegative_int correct = nonnegative_int{1}; + + CHECK(result == correct); + } + + TEST_CASE("nonnegative_int::operator%=(nonnegative_int)") { + nonnegative_int result = nonnegative_int{15}; + result %= nonnegative_int{4}; + + nonnegative_int correct = nonnegative_int{3}; + + CHECK(result == correct); } TEST_CASE("adl_serializer") { diff --git a/lib/utils/test/src/utils/nonnegative_int/nonnegative_range.cc b/lib/utils/test/src/utils/nonnegative_int/nonnegative_range.cc new file mode 100644 index 0000000000..db8fca295e --- /dev/null +++ b/lib/utils/test/src/utils/nonnegative_int/nonnegative_range.cc @@ -0,0 +1,42 @@ +#include "utils/nonnegative_int/nonnegative_range.h" +#include "test/utils/doctest/fmt/vector.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("nonnegative_range(nonnegative_int)") { + SUBCASE("bound is greater than zero") { + std::vector result = + nonnegative_range(nonnegative_int{3}); + std::vector correct = { + nonnegative_int{0}, + nonnegative_int{1}, + nonnegative_int{2}, + }; + + CHECK(result == correct); + } + + SUBCASE("bound is zero") { + std::vector result = + nonnegative_range(nonnegative_int{0}); + std::vector correct = {}; + + CHECK(result == correct); + } + } + + TEST_CASE("nonnegative_range(nonnegative_int, nonnegative_int, int)") { + std::vector result = nonnegative_range( + /*start=*/nonnegative_int{7}, + /*end=*/nonnegative_int{3}, + /*step=*/-2); + std::vector correct = { + nonnegative_int{7}, + nonnegative_int{5}, + }; + + CHECK(result == correct); + } +} diff --git a/lib/utils/test/src/utils/nonnegative_int/num_elements.cc b/lib/utils/test/src/utils/nonnegative_int/num_elements.cc new file mode 100644 index 0000000000..0878be0410 --- /dev/null +++ b/lib/utils/test/src/utils/nonnegative_int/num_elements.cc @@ -0,0 +1,15 @@ +#include "utils/nonnegative_int/num_elements.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("num_elements") { + std::vector input = {-1, 3, 3, 1}; + + nonnegative_int result = num_elements(input); + nonnegative_int correct = nonnegative_int{4}; + + CHECK(result == correct); + } +} diff --git a/lib/utils/test/src/utils/random_utils.cc b/lib/utils/test/src/utils/random_utils.cc index 8e7d22138f..fdc48a64dd 100644 --- a/lib/utils/test/src/utils/random_utils.cc +++ b/lib/utils/test/src/utils/random_utils.cc @@ -29,7 +29,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("correct distribution") { auto check_probabilities = [](std::vector const &values, std::vector const &weights) { - int num_iterations = 10'000; + nonnegative_int num_iterations = 10'000_n; std::vector trials = repeat( num_iterations, [&]() { return select_random(values, weights); }); @@ -39,8 +39,8 @@ TEST_SUITE(FF_TEST_SUITE) { float expectedProbability = w / sum(weights); int num_occurrences = filter(trials, [&](int c) { return (c == v); }).size(); - float observedProbability = - static_cast(num_occurrences) / num_iterations; + float observedProbability = static_cast(num_occurrences) / + num_iterations.unwrap_nonnegative(); CHECK(observedProbability == doctest::Approx(expectedProbability).epsilon(0.01f)); }