-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbaseline.jl
81 lines (70 loc) · 1.85 KB
/
baseline.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
using ArgParse
using TextGrams
global settings
function parse_commandline()
s = ArgParseSettings()
@add_arg_table s begin
"--precedent", "-p"
help = "set the 'precedent' file upon which all other ngrams will be based"
nargs = 1
"--verbose", "-v"
help = "show progress updates"
action = :store_true
"--ignore-results"
help = "don't show key/value result"
action = :store_true
"--ngrams", "-n"
help = "the N in ngrams (e.g. '3' to create up to 3-grams)"
arg_type = Integer
default = 3
"FILES"
help = "files or directories to include in baseline"
required = true
nargs = '*'
end
return parse_args(s)
end
settings = parse_commandline()
function msg(m...)
if settings["verbose"]
println(STDERR, m...)
end
end
function maybe_timed(fn::Function, m...)
msg(m...)
if settings["verbose"]
tic()
end
fn()
if settings["verbose"]
toc()
end
end
baseline =
if length(settings["precedent"]) == 1
msg("Using precedent ", settings["precedent"][1])
Ngrams(Document(open(settings["precedent"][1])), settings["ngrams"])
else
msg("No precedent")
Ngrams()
end
for file in @task(fileProducer(settings["FILES"]))
baselineSizeBefore = length(baseline)
msg("Loading ", file)
ngrams = Ngrams(Document(open(file)), settings["ngrams"])
if length(settings["precedent"]) == 1
msg("Intersecting")
leftJoinAdd!(baseline, ngrams)
else
msg("Unioning")
unionAdd!(baseline, ngrams)
end
baselineSize = length(baseline)
msg(baselineSizeBefore, " (+ ", (baselineSize - baselineSizeBefore), " of ", length(ngrams), ")")
end
msg("----- RESULTS -----")
if !settings["ignore-results"]
for (k,v) in baseline
println(k, "\t", v)
end
end