Skip to content

Commit

Permalink
First working version (1 Ray Client)
Browse files Browse the repository at this point in the history
  • Loading branch information
xicko7 committed Mar 17, 2023
0 parents commit 001af42
Show file tree
Hide file tree
Showing 8 changed files with 506 additions and 0 deletions.
13 changes: 13 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
CMakeCache.txt
config.yml
Makefile
reisa.log
simulation
CMakeFiles/**
outputs/**
cmake_install.cmake
SImLauncher.sh
message.sh
derivative.py
cluster.yaml
client.py
24 changes: 24 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#=============================================================================
# Copyright (c) 2020-2022 Centre national de la recherche scientifique (CNRS)
# Copyright (c) 2020-2022 Commissariat a l'énergie atomique et aux énergies alternatives (CEA)
# Copyright (c) 2020-2022 Institut national de recherche en informatique et en automatique (Inria)
# Copyright (c) 2020-2022 Université Paris-Saclay
# Copyright (c) 2020-2022 Université de Versailles Saint-Quentin-en-Yvelines
#
# SPDX-License-Identifier: MIT
#=============================================================================

cmake_minimum_required(VERSION 3.9)
project(Coupling LANGUAGES C CXX)

find_package(MPI REQUIRED COMPONENTS C)
find_package(paraconf REQUIRED COMPONENTS C)
find_package(PDI 1.4 REQUIRED COMPONENTS C)
find_package(OpenMP COMPONENTS C)

add_executable(simulation simulation.c)
target_link_libraries(simulation m MPI::MPI_C paraconf::paraconf PDI::pdi)
target_compile_features(simulation PRIVATE c_std_99)
if ("${OpenMP_C_FOUND}")
target_link_libraries(simulation OpenMP::OpenMP_C)
endif ()
54 changes: 54 additions & 0 deletions Launcher.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/bash

# spack load [email protected] [email protected];

# BASE VALUES
PARALLELISM1=4
PARALLELISM2=4
MPI_PER_NODE=4
DATASIZE1=4096
DATASIZE2=4096
GENERATION=10
NWORKER=4
CPUS_PER_WORKER=4

GR='\033[0;32m'
BL='\033[0;34m'
NC='\033[0m' # No Color

echo -n -e "${BL}WDIR"
echo -e "${GR} $PWD${NC}"

echo -n -e "${BL}PDI"
echo -e "${GR} `which pdirun`${NC}"

echo -n -e "${BL}MPI"
echo -e "${GR} `which mpirun`${NC}"

echo -n -e "${BL}PYTHON"
echo -e "${GR} `which python`${NC}"

echo -n -e "${BL}RAY"
echo -e "${GR} `which ray`${NC}"



# AUXILIAR VALUES
SIMUNODES=$(($PARALLELISM2 * $PARALLELISM1 / $MPI_PER_NODE)) # / MPI tasks per node
NNODES=$((1 + $NWORKER + $SIMUNODES)) # WORKERS + HEAD + SIMULATION
NPROC=$(($PARALLELISM2 * $PARALLELISM1 + $NNODES + 1))

# COMPILING
(CC=gcc CXX=g++ pdirun cmake .) > /dev/null 2>&1
pdirun make -B simulation

# MANAGING FILES
OUTPUT_DIR_NAME="outputs"
rm -rf $OUTPUT_DIR_NAME/ > /dev/null 2>&1
mkdir $OUTPUT_DIR_NAME

# RUNNING
CPUS=$(($CPUS_PER_WORKER * ($NWORKER + 1) + ($PARALLELISM1 * $PARALLELISM2) + $SIMUNODES))
echo Running in $PWD
`which python` prescript.py $DATASIZE1 $DATASIZE2 $PARALLELISM1 $PARALLELISM2 $GENERATION $NWORKER
sbatch -N $NNODES --ntasks=$NPROC Script.sh $SIMUNODES $MPI_PER_NODE $CPUS_PER_WORKER
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
To launch the program ensure you have loaded **pycall and mpi plugins from PDI** (v1.6.0) and dependencies, you have installed **ray** (v2.3.0) and run the following script:

`./Launcher.sh`
78 changes: 78 additions & 0 deletions Script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/bin/bash
#SBATCH --time=00:05:00
#SBATCH -o reisa.log
#SBATCH --error reisa.log
#SBATCH --mem-per-cpu=2GB
#SBATCH --wait-all-nodes=1
#SBATCH --oversubscribe
#SBATCH --exclusive
###################################################################################################

unset RAY_ADDRESS;

mpi_tasks=$(($SLURM_NTASKS - $SLURM_NNODES - 1))
mpi_per_node=$2
num_sim_nodes=$1
worker_num=$(($SLURM_JOB_NUM_NODES - 1 - $num_sim_nodes))
cpus_per_worker=$3


# Setting nodes
nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")
nodes_array=($nodes)
simarray=(${nodes_array[@]: -$num_sim_nodes});

for nodo in ${simarray[@]}; do
simunodelist+="$nodo,"
done
simunodelist=${simunodelist%,}

echo All nodes: ${nodes_array[@]} "(head node + $worker_num workers + $num_sim_nodes simulation nodes)"
echo Simulation nodes: ${simarray[@]} "($mpi_per_node MPI tasks on each node)"

head_node=${nodes_array[0]}
head_node_ip=$(srun -N 1 -n 1 --relative=0 hostname -i &)
port=6379
echo -e "Head node: $head_node_ip:$port"


# Launching the head node
srun --nodes=1 --ntasks=1 --relative=0 --cpus-per-task=$cpus_per_worker \
ray start --head --node-ip-address="$head_node_ip" --port=$port \
--num-cpus $cpus_per_worker --block --resources='{"data": 0}' &
export RAY_ADDRESS=$head_node_ip:$port
sleep 15

# Launch Ray workers
for ((i = 1; i <= worker_num; i++)); do
node_i=${nodes_array[$i]}
echo "Starting worker node at $node_i"
srun --nodes=1 --ntasks=1 --relative=$i --cpus-per-task=$cpus_per_worker \
ray start --address $RAY_ADDRESS \
--num-cpus $cpus_per_worker --block --resources='{"data": 1}' &
done

if [ $num_sim_nodes = 1 ]; then
rel=$i
else
rel="$i-$(($SLURM_JOB_NUM_NODES - 1))"
fi

# Launch Ray instance in simulation nodes
# echo "Starting simulation at $simunodelist"
# for ((; i < $SLURM_JOB_NUM_NODES; i++)); do
# node_i=${nodes_array[$i]}
# srun --mem-per-cpu=1GB --nodes=1 --ntasks=1 --relative=$i \
# ray start --address $RAY_ADDRESS --block &
# done

sleep 10
# Launch the simulation code (python script is here)
srun --exclusive --oversubscribe -N $num_sim_nodes --ntasks-per-node=$mpi_per_node \
-n $mpi_tasks --nodelist=$simunodelist --cpus-per-task=1 \
pdirun ./simulation &
sim_pid=$!

# Wait for results
wait $sim_pid
echo "Finished"
29 changes: 29 additions & 0 deletions prescript.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
###################################################################################################
# Copyright (c) 2020-2022 Centre national de la recherche scientifique (CNRS)
# Copyright (c) 2020-2022 Commissariat a l'énergie atomique et aux énergies alternatives (CEA)
# Copyright (c) 2020-2022 Institut national de recherche en informatique et en automatique (Inria)
# Copyright (c) 2020-2022 Université Paris-Saclay
# Copyright (c) 2020-2022 Université de Versailles Saint-Quentin-en-Yvelines
#
# SPDX-License-Identifier: MIT
#
###################################################################################################

import yaml
import sys

# sys.argv[1] = 10 # global_size.height
# sys.argv[2] = 10 # global_size.width
# sys.argv[3] = 2 # parallelism.height
# sys.argv[4] = 2 # parallelism.width
# sys.argv[5] = 1 # generation
# sys.argv[6] = 1 # nworkers

with open('config.yml', 'w') as file:
data = {"global_size": {"height": int(sys.argv[1]), "width": int(sys.argv[2])},
"parallelism": {"height": int(sys.argv[3]), "width": int(sys.argv[4])},
"MaxtimeSteps": int(sys.argv[5]),
"workers": int(sys.argv[6])}
if data:
with open('config.yml', 'w') as file:
yaml.safe_dump(data, file)
Loading

0 comments on commit 001af42

Please sign in to comment.