Skip to content

Commit

Permalink
Merge branch 'inference' into background_worker
Browse files Browse the repository at this point in the history
  • Loading branch information
zwang86 authored Nov 28, 2023
2 parents d908b1a + 457b5f2 commit 938a2d6
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 32 deletions.
32 changes: 2 additions & 30 deletions MULTI-NODE.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,35 +84,7 @@ ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOy5NKYdE8Cwgid59rx6xMqyj9vLaWuXIwy/BSRiK4su

## 5. Test FlexFlow

<<<<<<< HEAD
Follow step 6 in the [Build from source guide](https://flexflow.readthedocs.io/en/latest/installation.html) to set environment variables.
Follow step 6 in the [Build from source guide](https://flexflow.readthedocs.io/en/latest/installation.html) to set the environment variables.

A script to run a Python example on multiple nodes is available at `scripts/mnist_mlp_run.sh`. You can run the script using [`mpirun`](https://www.open-mpi.org/doc/current/man1/mpirun.1.php) (if you configured it in step 3) or [`srun`](https://slurm.schedmd.com/srun.html).
=======
Follow step 6 in [INSTALL.md](INSTALL.md#6-test-flexflow) to set environment variables.
A script to run a Python example on multiple nodes is available at `scripts/mnist_mlp_run.sh`. Run the script to test FlexFlow on mnist mlp training. You can adjust the script to run any other program. Make sure to change the `FLEXFLOW_DIR` and `UCX_DIR` variables in it to appropriate paths.

Save the following script as `mnist_mlp_run.sh` and make sure to change `FLEXFLOW_DIR` and `UCX_DIR` to appropriate paths:

```bash
#!/bin/bash
eval "$(conda shell.bash hook)"
conda activate flexflow
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib

# Path to your FlexFlow build
FLEXFLOW_DIR=/home/ubuntu/FlexFlow/build

# Path to your UCX installation
UCX_DIR=/home/ubuntu/ucx-1.15.0/install

export REALM_UCP_BOOTSTRAP_PLUGIN=$FLEXFLOW_DIR/deps/legion/lib/realm_ucp_bootstrap_mpi.so
export LD_LIBRARY_PATH=$FLEXFLOW_DIR/deps/legion/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$FLEXFLOW_DIR:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$UCX_DIR/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/conda/envs/flexflow/lib:$LD_LIBRARY_PATH

mpiexec -x REALM_UCP_BOOTSTRAP_PLUGIN -x PATH -x LD_LIBRARY_PATH --hostfile ~/hostfile --mca btl_tcp_if_include ens5 -np 2 "$FLEXFLOW_DIR"/flexflow_python "$FLEXFLOW_DIR"/../examples/python/native/mnist_mlp.py -ll:py 1 -ll:gpu 1 -ll:fsize 8000 -ll:zsize 8000
```

Run the script to test FlexFlow on mnist mlp training. You can adjust the script to run any other program.
>>>>>>> c031ab1f... fix ucx
2 changes: 1 addition & 1 deletion config/config.linux
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ fi

function get_build_configs() {
# Create a string with the values of the variables set in this script
BUILD_CONFIGS="FF_CUDA_ARCH=${FF_CUDA_ARCH} FF_HIP_ARCH=${FF_HIP_ARCH} CUDNN_DIR=${CUDNN_DIR} CUDA_DIR=${CUDA_DIR} NCCL_DIR=${NCCL_DIR} FF_USE_PYTHON=${FF_USE_PYTHON} BUILD_LEGION_ONLY=${BUILD_LEGION_ONLY} FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT} FF_UCX_URL=${FF_UCX_URL} FF_LEGION_NETWORKS=${FF_LEGION_NETWORKS} FF_BUILD_ALL_EXAMPLES=${FF_BUILD_ALL_EXAMPLES} FF_BUILD_ALL_INFERENCE_EXAMPLES=${FF_BUILD_ALL_INFERENCE_EXAMPLES} FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS} FF_USE_PREBUILT_NCCL=${FF_USE_PREBUILT_NCCL} FF_USE_PREBUILT_LEGION=${FF_USE_PREBUILT_LEGION} FF_USE_ALL_PREBUILT_LIBRARIES=${FF_USE_ALL_PREBUILT_LIBRARIES} FF_USE_AVX2=${FF_USE_AVX2} FF_MAX_DIM=${FF_MAX_DIM} ROCM_PATH=${ROCM_PATH} FF_GPU_BACKEND=${FF_GPU_BACKEND} INSTALL_DIR=${INSTALL_DIR}"
BUILD_CONFIGS="FF_CUDA_ARCH=${FF_CUDA_ARCH} FF_HIP_ARCH=${FF_HIP_ARCH} CUDNN_DIR=${CUDNN_DIR} CUDA_DIR=${CUDA_DIR} NCCL_DIR=${NCCL_DIR} FF_USE_PYTHON=${FF_USE_PYTHON} BUILD_LEGION_ONLY=${BUILD_LEGION_ONLY} FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT} UCX_DIR=${UCX_DIR} FF_LEGION_NETWORKS=${FF_LEGION_NETWORKS} FF_BUILD_ALL_EXAMPLES=${FF_BUILD_ALL_EXAMPLES} FF_BUILD_ALL_INFERENCE_EXAMPLES=${FF_BUILD_ALL_INFERENCE_EXAMPLES} FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS} FF_USE_PREBUILT_NCCL=${FF_USE_PREBUILT_NCCL} FF_USE_PREBUILT_LEGION=${FF_USE_PREBUILT_LEGION} FF_USE_ALL_PREBUILT_LIBRARIES=${FF_USE_ALL_PREBUILT_LIBRARIES} FF_USE_AVX2=${FF_USE_AVX2} FF_MAX_DIM=${FF_MAX_DIM} ROCM_PATH=${ROCM_PATH} FF_GPU_BACKEND=${FF_GPU_BACKEND} INSTALL_DIR=${INSTALL_DIR}"
}

if [[ -n "$1" && ( "$1" == "CMAKE_FLAGS" || "$1" == "CUDA_PATH" ) ]]; then
Expand Down
15 changes: 14 additions & 1 deletion scripts/mnist_mlp_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,17 @@
eval "$(conda shell.bash hook)"
conda activate flexflow
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib
~/FlexFlow/python/flexflow_python ~/FlexFlow/examples/python/native/mnist_mlp.py -ll:py 1 -ll:gpu 1 -ll:fsize 8000 -ll:zsize 8000

# Path to your FlexFlow build
FLEXFLOW_DIR=/home/ubuntu/FlexFlow/build

# Path to your UCX installation
UCX_DIR=/home/ubuntu/ucx-1.15.0/install

export REALM_UCP_BOOTSTRAP_PLUGIN=$FLEXFLOW_DIR/deps/legion/lib/realm_ucp_bootstrap_mpi.so
export LD_LIBRARY_PATH=$FLEXFLOW_DIR/deps/legion/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$FLEXFLOW_DIR:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$UCX_DIR/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/conda/envs/flexflow/lib:$LD_LIBRARY_PATH

mpiexec -x REALM_UCP_BOOTSTRAP_PLUGIN -x PATH -x LD_LIBRARY_PATH --hostfile ~/hostfile --mca btl_tcp_if_include ens5 -np 2 "$FLEXFLOW_DIR"/flexflow_python "$FLEXFLOW_DIR"/../examples/python/native/mnist_mlp.py -ll:py 1 -ll:gpu 1 -ll:fsize 8000 -ll:zsize 8000

0 comments on commit 938a2d6

Please sign in to comment.