Skip to content

Commit

Permalink
Revision updates.
Browse files Browse the repository at this point in the history
  • Loading branch information
Johannes Linder committed Sep 30, 2024
1 parent 24c4e51 commit 970379f
Show file tree
Hide file tree
Showing 78 changed files with 7,941 additions and 3,940 deletions.
68 changes: 68 additions & 0 deletions download_models.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/bash

# download model weights (data fold 3, 4 replicates)
for rep in f3c0,f0 f3c1,f1 f3c2,f2 f3c3,f3; do IFS=","; set -- $rep;
mkdir -p "examples/saved_models/$1/train"
local_model="examples/saved_models/$1/train/model0_best.h5"
if [ -f "$local_model" ]; then
echo "$1 model already exists."
else
wget --progress=bar:force "https://storage.googleapis.com/seqnn-share/borzoi/$2/model0_best.h5" -O "$local_model"
fi
done

# download and uncompress annotation files
mkdir -p examples/hg38/genes/gencode41
mkdir -p examples/hg38/genes/polyadb

if [ -f examples/hg38/genes/gencode41/gencode41_basic_nort.gtf ]; then
echo "Gene annotation already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > examples/hg38/genes/gencode41/gencode41_basic_nort.gtf
fi

if [ -f examples/hg38/genes/gencode41/gencode41_basic_nort_protein.gtf ]; then
echo "Gene annotation (no read-through, protein-coding) already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort_protein.gtf.gz | gunzip -c > examples/hg38/genes/gencode41/gencode41_basic_nort_protein.gtf
fi

if [ -f examples/hg38/genes/gencode41/gencode41_basic_protein.gtf ]; then
echo "Gene annotation (protein-coding) already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein.gtf.gz | gunzip -c > examples/hg38/genes/gencode41/gencode41_basic_protein.gtf
fi

if [ -f examples/hg38/genes/gencode41/gencode41_basic_tss2.bed ]; then
echo "TSS annotation already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_tss2.bed.gz | gunzip -c > examples/hg38/genes/gencode41/gencode41_basic_tss2.bed
fi

if [ -f examples/hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz ]; then
echo "Splice site annotation already exist."
else
wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz -O examples/hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz
fi

if [ -f examples/hg38/genes/gencode41/gencode41_basic_protein_splice.gff ]; then
echo "Splice site annotation already exist."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.gff.gz | gunzip -c > examples/hg38/genes/gencode41/gencode41_basic_protein_splice.gff
fi

if [ -f examples/hg38/genes/polyadb/polyadb_human_v3.csv.gz ]; then
echo "PolyA site annotation already exist."
else
wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz -O examples/hg38/genes/polyadb/polyadb_human_v3.csv.gz
fi

# download and index hg38 genome
mkdir -p examples/hg38/assembly/ucsc

if [ -f examples/hg38/assembly/ucsc/hg38.fa ]; then
echo "Human genome FASTA already exists."
else
wget -O - http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > examples/hg38/assembly/ucsc/hg38.fa
python src/scripts/idx_genome.py examples/hg38/assembly/ucsc/hg38.fa
fi
50 changes: 50 additions & 0 deletions env_vars.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash

# set these variables before running the script
LOCAL_BORZOI_PATH="/home/jlinder/borzoi"
LOCAL_BASKERVILLE_PATH="/home/jlinder/baskerville"
LOCAL_WESTMINSTER_PATH="/home/jlinder/westminster"
LOCAL_USER="jlinder"

# create env_vars sh scripts in local conda env
mkdir -p "$CONDA_PREFIX/etc/conda/activate.d"
mkdir -p "$CONDA_PREFIX/etc/conda/deactivate.d"

file_vars_act="$CONDA_PREFIX/etc/conda/activate.d/env_vars.sh"
if ! [ -e $file_vars_act ]; then
echo '#!/bin/sh' > $file_vars_act
fi

file_vars_deact="$CONDA_PREFIX/etc/conda/deactivate.d/env_vars.sh"
if ! [ -e $file_vars_deact ]; then
echo '#!/bin/sh' > $file_vars_deact
fi

# append borzoi (and baskerville/westminster) env variable exports to /activate.d/env_vars.sh
echo "export BORZOI_DIR=$LOCAL_BORZOI_PATH" >> $file_vars_act
echo 'export PATH=$BORZOI_DIR/src/scripts:$PATH' >> $file_vars_act
echo 'export PYTHONPATH=$BORZOI_DIR/src/scripts:$PYTHONPATH' >> $file_vars_act

echo "export BASKERVILLE_DIR=$LOCAL_BASKERVILLE_PATH" >> $file_vars_act
echo 'export PATH=$BASKERVILLE_DIR/src/baskerville/scripts:$PATH' >> $file_vars_act
echo 'export PYTHONPATH=$BASKERVILLE_DIR/src/baskerville/scripts:$PYTHONPATH' >> $file_vars_act

echo "export WESTMINSTER_DIR=$LOCAL_WESTMINSTER_PATH" >> $file_vars_act
echo 'export PATH=$WESTMINSTER_DIR/src/westminster/scripts:$PATH' >> $file_vars_act
echo 'export PYTHONPATH=$WESTMINSTER_DIR/src/westminster/scripts:$PYTHONPATH' >> $file_vars_act

echo 'export BORZOI_HG38=$BORZOI_DIR/examples/hg38' >> $file_vars_act
echo 'export BORZOI_MM10=$BORZOI_DIR/examples/mm10' >> $file_vars_act

echo "export BORZOI_CONDA=/home/$LOCAL_USER/anaconda3/etc/profile.d/conda.sh" >> $file_vars_act

# append borzoi env variable unsets to /deactivate.d/env_vars.sh
echo 'unset BASKERVILLE_DIR' >> $file_vars_deact
echo 'unset WESTMINSTER_DIR' >> $file_vars_deact
echo 'unset BORZOI_DIR' >> $file_vars_deact
echo 'unset BORZOI_HG38' >> $file_vars_deact
echo 'unset BORZOI_MM10' >> $file_vars_deact
echo 'unset BORZOI_CONDA' >> $file_vars_deact

# finally activate env variables
source $file_vars_act
Loading

0 comments on commit 970379f

Please sign in to comment.