-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathgo.sh
executable file
·81 lines (68 loc) · 2.45 KB
/
go.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/bin/bash
set -o errexit
set -o nounset
set -o pipefail
# DATA-SETS
train_set="DATA/txt/semeval_train2016.txt"
test_sets="DATA/txt/Twitter2013_raws.txt
DATA/txt/Twitter2014_raws.txt
DATA/txt/Twitter2015_raws.txt
DATA/txt/Twitter2016_raws.txt"
embeddings=DATA/txt/struc_skip_400.txt
# Model
model=nlse
# Geometry
sub_size=10
# Weight initialization
init_sub=0.1
init_clas=0.7
# Optimization
n_epoch=12
lrate=0.005
randomize=True
dropout=0.
# Cost fuction
neutral_penalty=0.25
update_emb=True
work_folder=DATA/pkl/$(basename $train_set .txt)/
model_path=$work_folder/models/$(basename $embeddings .txt)/sub_size${sub_size}.$dropout/
model_name=semeval2016.pkl
# OTHER
if [ ! -d "$model_path" ];then
mkdir -p "$model_path"
fi
# First you create the index and global vocabulary from the text-based data.
# This will store Pickle files with same file name as the txt files under
#
# DATA/pkl/
#
# It will also store a wrd2idx.pkl containing a dictionary that maps each word
# to an integer index. If you have any number of txt files using the same
# format, it should work as well.
printf "\033[34mIndexing all data\033[0m\n"
python scripts/extract.py -o $work_folder -f $train_set $test_sets
# Next thing is to select the pre-trained embeddings present in the vocabulary
# you are using to build your embedding matrix. This is done with
printf "\033[34mExtracting embeddings for all data\033[0m\n"
python scripts/extract.py -o $work_folder -e $embeddings
# Note that this step can be a source of problems. If you have words that are
# not in your embeddings they will be set to an embedding of zero. This can be
# counter-productive in some cases.
printf "\033[34mTraining\033[0m\n"
python scripts/train.py -o $work_folder -e $embeddings \
-m $model_path/$model_name \
-model $model \
-lrate $lrate \
-n_epoch $n_epoch \
-neutral_penalty $neutral_penalty \
-randomize $randomize \
-update_emb $update_emb \
-sub_size $sub_size \
-init_sub $init_sub \
-init_clas $init_clas \
-dropout $dropout
# Finally to get the SemEval results, you just need to do
printf "\033[34mTesting\033[0m\n"
python scripts/test.py -o $work_folder \
-m $model_path/$model_name \
-f $test_sets -model nlse