-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
80 lines (70 loc) · 2.02 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
const Sql = require('sql-extra');
const lunr = require('lunr');
const path = require('path');
var corpus = new Map();
var ranges = new Map();
var exacts = new Map();
var index = null;
var ready = false;
function loadCorpus() {
for(var [k, v] of require('./corpus'))
corpus.set(k, v);
};
function setupIndex() {
index = lunr(function() {
this.ref('code');
this.field('code');
this.field('group');
this.field('tags');
this.pipeline.remove(lunr.stopWordFilter);
for(var r of corpus.values())
this.add(r);
});
};
function setupRanges() {
for(var {code, entries} of corpus.values()) {
for(var entry of entries.split(';')) {
if(!entry.includes('-')) exacts.set(entry, code);
else ranges.set(entry.split('-').map(v => parseInt(v, 10)), code);
}
}
}
function csv() {
return path.join(__dirname, 'index.csv');
};
function sql(tab='groups', opt={}) {
return Sql.setupTable(tab, {code: 'TEXT', group: 'TEXT', entries: 'INT', tags: 'TEXT'}, require('./corpus').values(),
Object.assign({pk: 'code', index: true, tsvector: {code: 'A', group: 'B', tags: 'C'}}, opt));
};
function load() {
if(ready) return true;
loadCorpus(); setupIndex(); setupRanges();
return ready = true;
};
function findEntry(entry) {
if(exacts.has(entry)) return exacts.get(entry);
var n = parseInt(entry, 10);
for(var [[bgn, end], code] of ranges)
if(bgn<=n && n<=end) return code;
return null;
}
function groups(txt) {
if(index==null) return [];
var z = [], txt = txt.replace(/\W/g, ' ');
var mats = index.search(txt), max = 0;
for(var mat of mats)
max = Math.max(max, Object.keys(mat.matchData.metadata).length);
for(var mat of mats)
if(Object.keys(mat.matchData.metadata).length===max) z.push(corpus.get(mat.ref));
var code = findEntry(txt);
if(!code) return z;
var mat = corpus.get(code);
if(z.includes(mat)) z.splice(z.indexOf(math), 1);
z.unshift(mat);
return z;
};
groups.csv = csv;
groups.sql = sql;
groups.load = load;
groups.corpus = corpus;
module.exports = groups;