-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathexample.sh
executable file
·63 lines (47 loc) · 3.35 KB
/
example.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/bin/bash
# Directory that contains train/validation/test data etc.
DATA_HOME=sample_data/java/
# Directory in which the model will be saved.
MODEL_DIR=sample_data/java/model
mkdir $MODEL_DIR
# Filenames
TRAIN_FILE=java_training_slp_pre_enc_bpe_10000
VALIDATION_FILE=java_validation_slp_pre_enc_bpe_10000
TEST_FILE=java_test_slp_pre_enc_bpe_10000
TEST_PROJ_NAMES_FILE=testProjects
ID_MAP_FILE=sample_data/java/id_map_java_test_slp_pre_bpe_10000
# Maximum training epochs
EPOCHS=5
# Initial learning rate
LR=0.1 # This is the default value. You can skip it if you don't want to change it.
# Training batch size
BATCH_SIZE=32 # This is also the default.
# RNN unroll timesteps for gradient calculation.
STEPS=200 # This also the default.
# 1 - Dropout probability
KEEP_PROB=0.5 # This is also the default.
# RNN hidden state size
STATE_DIMS=512 # This is also the default.
# Checkpoint and validation loss calculation frequency.
CHECKPOINT_EVERY=5000 # This is also the default.
# Understanding boolean options.
# Most boolean options are set to False by default.
# For using any boolean option set it to True.
# For instance for using a GRU instead of an LSTM add to your command the option --gru True.
# Train a small java model for 1 epoch.
python code_nlm.py --data_path $DATA_HOME --train_dir $MODEL_DIR --train_filename $TRAIN_FILE --validation_filename $VALIDATION_FILE --gru True --hidden_size $STATE_DIMS --batch_size $BATCH_SIZE --word_level_perplexity True --cross_entropy True --steps_per_checkpoint $CHECKPOINT_EVERY --max_epoch $EPOCHS
# Because we are using the default values we could have shortened the command to:
# python code_nlm.py --data_path $DATA_HOME --train_dir $MODEL_DIR --train_filename $TRAIN_FILE --validation_filename $VALIDATION_FILE --gru True --word_level_perplexity True --cross_entropy True --max_epoch $EPOCHS
# Testing the model (Calculating test set entropy)
python code_nlm.py --test True --data_path $DATA_HOME --train_dir $MODEL_DIR --test_filename $TEST_FILE --gru True --batch_size $BATCH_SIZE --word_level_perplexity True --cross_entropy True
# Test a dynamically adapted version of the model.
# Batch size must always be set to 1 for this scenario!
python code_nlm.py --dynamic_test True --data_path $DATA_HOME --train_dir $MODEL_DIR --test_filename $TEST_FILE --gru True --batch_size 1 --word_level_perplexity True --cross_entropy True --test_proj_filename $TEST_PROJ_NAMES_FILE
# Code completion
python code_nlm.py --completion True --data_path $DATA_HOME --train_dir $MODEL_DIR --test_filename $TEST_FILE --gru True --batch_size $BATCH_SIZE
# Dynamic Code Completion
python code_nlm.py --completion True --dynamic True --data_path $DATA_HOME --train_dir $MODEL_DIR --test_filename $TEST_FILE --gru True --batch_size $BATCH_SIZE --test_proj_filename $TEST_PROJ_NAMES_FILE
# Dynamic Code Completion and Measuring Identifier Performance
python code_nlm.py --completion True --dynamic True --data_path $DATA_HOME --train_dir $MODEL_DIR --test_filename $TEST_FILE --gru True --batch_size $BATCH_SIZE --test_proj_filename $TEST_PROJ_NAMES_FILE --identifier_map $ID_MAP_FILE
# Adding a Simple Identifier n-gram Cache
python code_nlm.py --completion True --dynamic True --data_path $DATA_HOME --train_dir $MODEL_DIR --test_filename $TEST_FILE --gru True --batch_size $BATCH_SIZE --test_proj_filename $TEST_PROJ_NAMES_FILE --identifier_map $ID_MAP_FILE --cache_ids True