forked from paracrawl/cirrus-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path09.clean.sh
executable file
·35 lines (31 loc) · 990 Bytes
/
09.clean.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash
## create and submit the batches on csd3 for alignment
set -euo pipefail
. ./env/init.sh
. ./config.sh
. ./functions.sh
collection=$1
shift
for lang in $*; do
# Load some language-spefic bicleaner & bifixer configurations (because they normally don't
# deal with zh or ko correctly. Read: time for the duct tape!
bicleaner_ai_model $lang
output="filtered${BICLEANER_THRESHOLD/./}.gz"
batch_list=`make_batch_list 09 $collection $lang $output fixed.gz scored.gz`
job_list=`make_job_list $batch_list`
if [ ! -z $job_list ]; then
prompt "Scheduling $job_list\n"
if confirm; then
schedule \
-J clean-${lang%~*}-${collection} \
-a $job_list \
--time 24:00:00 \
--cpus-per-task 4 `#because more memory` \
-e ${SLURM_LOGS}/09.clean-%A_%a.err \
-o ${SLURM_LOGS}/09.clean-%A_%a.out \
${SCRIPTS}/generic.slurm $batch_list \
${SCRIPTS}/09.clean ${collection} ${lang%~*} \
${COLLECTIONS[$collection]}-shards/${TARGET_LANG}
fi
fi
done