Skip to content

Commit

Permalink
Beneisner/add rlbench eval (#51)
Browse files Browse the repository at this point in the history
* add in the rlbench eval

* rlbench evals seem to work

* fold in remaining rlbench utils for cluster training
  • Loading branch information
beneisner authored May 14, 2024
1 parent 9e8aa27 commit 70a5f1f
Show file tree
Hide file tree
Showing 18 changed files with 1,448 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM nvidia/cuda:12.4.1-base-ubuntu22.04
FROM nvidia/cuda:12.2.2-base-ubuntu20.04

ENV DEBIAN_FRONTEND=noninteractive

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# @package _global_

defaults:
- /eval_rlbench.yaml
- override /model: taxpose_tc
- override /task: pick_and_lift
- _self_
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# @package _global_

defaults:
- /eval_rlbench.yaml
- override /model: taxpose_tc
- override /task: pick_up_cup
- _self_
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# @package _global_

defaults:
- /eval_rlbench.yaml
- override /model: taxpose_tc
- override /task: push_button
- _self_
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# @package _global_

defaults:
- /eval_rlbench.yaml
- override /model: taxpose_tc
- override /task: put_knife_on_chopping_board
- _self_
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# @package _global_

defaults:
- /eval_rlbench.yaml
- override /model: taxpose_tc
- override /task: put_money_in_safe
- _self_
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# @package _global_

defaults:
- /eval_rlbench.yaml
- override /model: taxpose_tc
- override /task: reach_target
- _self_
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# @package _global_

defaults:
- /eval_rlbench.yaml
- override /model: taxpose_tc
- override /task: slide_block_to_target
- _self_
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# @package _global_

defaults:
- /eval_rlbench.yaml
- override /model: taxpose_tc
- override /task: stack_wine
- _self_
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# @package _global_

defaults:
- /eval_rlbench.yaml
- override /model: taxpose_tc
- override /task: take_money_out_safe
- _self_
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# @package _global_

defaults:
- /eval_rlbench.yaml
- override /model: taxpose_tc
- override /task: take_umbrella_out_of_umbrella_stand
- _self_
40 changes: 40 additions & 0 deletions configs/eval_rlbench.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
job_type: eval_rlbench_${task.name}

data_root: ${oc.env:HOME}/datasets

defaults:
- _logging
- _self_

- benchmark: rlbench
- optional task: stack_wine

# Machinery to load the correct model architecture.
# The model config can be found in configs/model.
- model: taxpose

# Task-specific settings.
- task/${benchmark}/${task}/task@task

policy_spec:
collision_checking: True
num_points: 256
break_symmetry: False
model: ${model}


# Usually only a single checkpoint, but we could have multiple for each phase.
checkpoints:
ckpt_file: ???

task:
action_mode: object
anchor_mode: single_object

num_trials: 100
headless: True

resources:
num_workers: 10

seed: 123456
12 changes: 12 additions & 0 deletions copy_rlbench_and_unzip.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copy the folder named rlbench.zip from /project_data/held/baeisner/rlbench.zip to /scratch/baeisner/data/rlbench.zip
# Unzip the file rlbench.zip in /scratch/baeisner/data/rlbench
# Delete the file rlbench.zip in /scratch/baeisner/data/rlbench.zip

# Rsync with progress
rsync -av --progress /project_data/held/baeisner/rlbench.zip /scratch/baeisner/data/rlbench.zip

# Unzip
unzip /scratch/baeisner/data/rlbench.zip -d /scratch/baeisner/data

# Delete
rm /scratch/baeisner/data/rlbench.zip
139 changes: 139 additions & 0 deletions launch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#!/bin/bash

# This is a script that should take in three arguments:
# 1. the platfrom to run on (e.g. "autobot" or "local")
# 2. the index of which GPU to use
# 3. the command and arguments to run

# Example usage:
# ./launch_autobot.sh autobot 0 python scripts/train_residual_flow.py

# Get the first argument:
PLATFORM=$1

# Get the second argument:
GPU_INDEX=$2
shift
shift

# Get the third argument:
COMMAND=$@


echo Platform: $PLATFORM
echo GPU Index: $GPU_INDEX
echo Command: $COMMAND

# We want to get the name of the current branch.
branch_name=$(git branch --show-current)
sanitized_branch_name="${branch_name//\//-}"

# Now check to see if the branch name exists as a tag on docker
if ! docker manifest inspect beisner/taxpose:${sanitized_branch_name} > /dev/null 2>&1; then
tag="latest"
else
tag="${sanitized_branch_name}"
fi

# Override tag if DOCKER_IMAGE is set
if [ ! -z "$DOCKER_TAG" ]; then
tag="${DOCKER_TAG}"
fi

echo "Using image: beisner/taxpose:${tag}"


# If the platform is "autobot", then we need to use singularity to run the command.
if [ $PLATFORM == "autobot" ]; then
echo "Running on autobot"

# For the following directories, check to see if they exist. If they don't, create them. Use an array.
# Directories to check:
DIRECTORIES=("/scratch/$(whoami)/data" "/scratch/$(whoami)/logs" "/scratch/$(whoami)/artifacts" "/scratch/$(whoami)/.config" "/scratch/$(whoami)/tmp" "/scratch/$(whoami)/home")

for DIRECTORY in "${DIRECTORIES[@]}"; do
if [ ! -d $DIRECTORY ]; then
mkdir -p $DIRECTORY
fi
done

# Run on signularity.
APPTAINERENV_CUDA_VISIBLE_DEVICES=$GPU_INDEX \
APPTAINERENV_WANDB_DOCKER_IMAGE=taxpose \
APPTAINERENV_MPLCONFIGDIR=/opt/.config \
apptainer run \
--nv \
--no-mount hostfs \
--pwd /opt/$(whoami)/code \
--workdir /opt/tmp \
-B /home/$(whoami)/code/rpad/taxpose:/opt/$(whoami)/code \
-B /scratch/$(whoami)/data:/data \
-B /scratch/$(whoami)/logs:/opt/logs \
-B /scratch/$(whoami)/artifacts:/opt/artifacts \
-B /scratch/$(whoami)/.config:/opt/.config \
-B /scratch/$(whoami)/tmp:/tmp \
-B /scratch/$(whoami)/home:/home/$(whoami) \
docker://beisner/taxpose:${tag} \
$COMMAND \
log_dir=/opt/logs \
data_root=/data \
wandb.artifact_dir=/opt/artifacts \

# If the platform is "local-docker", then we need to use docker to run the command.
elif [ $PLATFORM == "local-docker" ]; then
echo "Running locally with docker"

docker run \
--gpus "device=$GPU_INDEX" \
-it \
-e WANDB_API_KEY="${WANDB_API_KEY}" \
-e WANDB_DOCKER_IMAGE=taxpose \
-v /usr/share/glvnd/egl_vendor.d/10_nvidia.json:/usr/share/glvnd/egl_vendor.d/10_nvidia.json \
-v /home/beisner/datasets/:/data \
-v /home/beisner/code/rpad/taxpose/artifacts:/opt/artifacts \
-v /home/beisner/code/rpad/taxpose/logs:/opt/logs \
-v /home/beisner/code/rpad/taxpose:/opt/baeisner/code \
beisner/taxpose:${tag} \
$COMMAND \
log_dir=/opt/logs \
data_root=/data \
wandb.artifact_dir=/opt/artifacts

elif [ $PLATFORM == "local-apptainer" ]; then
echo "Running locally with apptainer"

APPTAINERENV_CUDA_VISIBLE_DEVICES=$GPU_INDEX \
APPTAINERENV_WANDB_DOCKER_IMAGE=taxpose \
APPTAINERENV_MPLCONFIGDIR=/opt/.config \
APPTAINERENV_VGL_DEVICE=egl$GPU_INDEX \
APPTAINERENV_PYENV_VERSION= \
apptainer run \
--nv \
--no-mount hostfs \
--pwd /opt/$(whoami)/code \
--contain \
-B /home/$(whoami)/code/rpad/taxpose:/opt/$(whoami)/code \
-B /home/$(whoami)/datasets:/data \
-B /home/$(whoami)/code/rpad/taxpose/logs:/opt/logs \
-B /home/$(whoami)/code/rpad/taxpose/artifacts:/opt/artifacts \
-B /home/$(whoami)/.config:/opt/.config \
-B /home/$(whoami)/.tmp:/tmp \
-B /home/$(whoami)/tmp_home:/home/$(whoami) \
-B /usr/share/glvnd/egl_vendor.d/10_nvidia.json:/usr/share/glvnd/egl_vendor.d/10_nvidia.json \
docker://beisner/taxpose:${tag} \
$COMMAND \
log_dir=/opt/logs \
data_root=/data \
wandb.artifact_dir=/opt/artifacts

# If the platform is "local", then we can just run the command.
elif [ $PLATFORM == "local" ]; then
echo "Running locally"

CUDA_VISIBLE_DEVICES=$GPU_INDEX \
WANDB_DOCKER_IMAGE=taxpose \
$COMMAND

else
echo "Platform not recognized"
fi
Loading

0 comments on commit 70a5f1f

Please sign in to comment.