forked from EnnoMeijers/lod-aggregator
-
Notifications
You must be signed in to change notification settings - Fork 2
/
starter.sh
executable file
·181 lines (165 loc) · 5.17 KB
/
starter.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/bin/bash
set -e
scripts_dir=$(cd $(dirname $0) && pwd -P)
source $scripts_dir/utils.sh
# the Jena java tools run in memory so we increase the default heapsize maximum
JVM_ARGS="-Xmx28G"
export JVM_ARGS
# Defaults which can be set through .ENV or through the 'environment' command in docker-compose config
if [ -z $BASE_DIR ]; then
BASE_DIR="opt"
fi
if [ -z $SHAPE_DIR ]; then
SHAPE_DIR="shapes"
fi
if [ -z $DATA_DIR ]; then
DATA_DIR="data"
fi
if [ -z $QUERY_DIR ]; then
QUERY_DIR="queries"
fi
if [ -z $VAR_PROVIDER ]; then
VAR_PROVIDER="unknown"
fi
# default format for map and serialize service
format='RDF/XML'
# supported RDF formats are:
# Turtle
# RDF/XML
# N-Triples
# JSON-LD
# RDF/JSON
# TriG
# N-Quads
# TriX
# RDF Binary
#
# other non-RDF formats are: text, XML, JSON, CSV, TSV.
# set the EDM shape file as default for validation
rel_shape_file=$SHAPE_DIR/"shacl_edm.ttl"
shape_file=/$BASE_DIR/$rel_shape_file
# set the schema2edm query as default query for the mapping process
rel_query_file=$QUERY_DIR/"schema2edm.rq"
query_file=/$BASE_DIR/$rel_query_file
# Parse command line arguments
while [[ "$#" > 1 ]]; do case $1 in
--shape)
rel_shape_file=$SHAPE_DIR/$2
shape_file=/$BASE_DIR/$rel_shape_file
shift; shift
;;
--data)
rel_data_file=$DATA_DIR/$2
data_file=/$BASE_DIR/$rel_data_file
shift; shift
;;
--output)
rel_output_file=$DATA_DIR/$2
output_file=/$BASE_DIR/$rel_output_file
shift; shift
;;
--query)
rel_query_file=$QUERY_DIR/$2
query_file=/$BASE_DIR/$rel_query_file
shift; shift
;;
--format)
format=$2
shift; shift
;;
--dataset-uri)
dataset_uri=$2
shift; shift
;;
--description-only)
description_only="true"
shift
;;
--provider)
VAR_PROVIDER=$2
shift; shift
;;
*) break;;
esac;
done
case $TOOL in
crawl)
echo
echo "Checking input parameters..."
echo
check_arg_and_exit_on_error "dataset-uri" $dataset_uri
check_arg_and_exit_on_error "output" $output_file
if [ -z $description_only ]; then
echo "Starting crawling dataset $dataset_uri..."
cd /app/crawler/
./crawler.sh -dataset_uri $dataset_uri -output_file $output_file &> /$BASE_DIR/$DATA_DIR/crawler.log
echo
echo "Ready, results (if any) written to $rel_output_file, check $DATA_DIR/crawler.log for details..."
else
echo "Starting crawling dataset $dataset_uri..."
cd /app/crawler/
./crawler.sh -dataset_uri $dataset_uri -dataset_description_only -output_file $output_file &> /$BASE_DIR/$DATA_DIR/crawler.log
echo
echo "Ready, results (if any) written to $rel_output_file, check $DATA_DIR/crawler.log for details..."
fi
;;
convert)
echo
echo "Checking input parameters..."
echo
check_arg_and_exit_on_error "data" $data_file
check_arg_and_exit_on_error "output" $output_file
echo "Starting conversion to XML for $data_file..."
cd /app/crawler/
./rdf2edm.sh -input_file $data_file -output_file $output_file &> /$BASE_DIR/$DATA_DIR/converter.log
echo
echo "Ready, results (if any) written to $rel_output_file, check $DATA_DIR/converter.log for details..."
;;
validate)
echo
echo "Checking input parameters..."
echo
check_arg_and_exit_on_error "shape" $shape_file
check_arg_and_exit_on_error "data" $data_file
check_arg_and_exit_on_error "output" $output_file
echo "Starting SHACL validation with shapes: $rel_shape_file on input data $rel_data_file..."
shacl validate --shapes $shape_file --data $data_file > $output_file
echo
echo "Ready, results written to $rel_output_file..."
;;
map)
echo
echo "Checking input parameters..."
echo
if [ "$VAR_PROVIDER" == "unknown" ]; then
echo "Eror: Value Provider can not be 'unknown'!"
echo "Please specify provider throug environment (VAR_PROVIDER) variable or option {--provider <name>}."
exit 1
fi
check_arg_and_exit_on_error "query" $query_file
check_arg_and_exit_on_error "data" $data_file
check_arg_and_exit_on_error "output" $output_file
# Replace the VAR_PROVIDER text in the query with $VAR_PROVIDER
# Make a copy to keep the orginal query untouched.
tmp_query=/$BASE_DIR/$QUERY_DIR/tmp.rq
cp $query_file $tmp_query
sed -i "s/VAR_PROVIDER/$VAR_PROVIDER/g" $tmp_query
echo "Starting conversion with query: $rel_query_file and input data: $rel_data_file..."
sparql --query $tmp_query --data $data_file --results $format --time > $output_file
echo "Ready, results written to $rel_output_file..."
;;
serialize)
echo
echo "Checking input parameters..."
echo
check_arg_and_exit_on_error "data" $data_file
check_arg_and_exit_on_error "output" $output_file
check_arg_and_exit_on_error "format" $format
echo "Starting serialization to $format format on input data: $rel_data_file..."
riot --output=$format $data_file > $output_file
echo
echo "Ready, results written to $rel_output_file..."
;;
*)
echo "error: environment variable TOOL invalid or unset: $TOOL";;
esac