Skip to content

Commit

Permalink
feat(solution): add function which generates a classification mask
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink committed May 2, 2019
1 parent 65e395a commit fea5eed
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 0 deletions.
29 changes: 29 additions & 0 deletions solver/Solution.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,35 @@ class Solution {
this.score = (score.confidence / score.coverage) * (score.coverage / tokenizer.coverage)
}

// return a mask of the input for this solution
// which shows the areas covered by different types of classification
// N = housenumber, S = street, P = postcode, A = administrative
mask (tokenizer) {
// use the original input, mask should be the same length
let body = tokenizer.span.body
let mask = Array(body.length).fill(' ')
let map = { 'housenumber': 'N', 'street': 'S', 'postcode': 'P', 'default': 'A' }

// scan the input letter-by-letter from left-to-right
for (let i = 0; i < body.length; i++) {
// find which fields cover this character (should only be covered by 0 or 1 field)
let coveredBy = this.pair.filter(p => p.span.start <= i && p.span.end >= i)

if (coveredBy.length) {
let label = coveredBy[0].classification.label
let code = map.hasOwnProperty(label) ? map[label] : map.default
for (let j = coveredBy[0].span.start; j < coveredBy[0].span.end; j++) {
mask[j] = code
}

// skip forward to avoid scanning the same token again
i = coveredBy[0].span.end
}
}

return mask.join('')
}

// @todo implement this
// equals(solution) {}
}
Expand Down
58 changes: 58 additions & 0 deletions solver/Solution.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
const Solution = require('./Solution')
const AddressParser = require('../parser/AddressParser')
const Tokenizer = require('../tokenization/Tokenizer')

module.exports.tests = {}

module.exports.tests.constructor = (test) => {
test('constructor', (t) => {
let sol = new Solution()
t.deepEquals(sol.pair, [])
t.equals(sol.score, 0.0)
t.end()
})
}

// @todo
// module.exports.tests.copy = (test) => {}

// @todo
// module.exports.tests.covers = (test) => {}

// @todo
// module.exports.tests.coversSameClassification = (test) => {}

// @todo
// module.exports.tests.computeScore = (test) => {}

module.exports.tests.mask = (test) => {
let parser = new AddressParser()
test('mask', (t) => {
// ' SSSSSSSSSSSS NN PPPPP AAAAAA'
let tokenizer = new Tokenizer('Kaschk Bar, Linienstraße 40 10119 Berlin')
parser.classify(tokenizer)
parser.solve(tokenizer)

t.equal(tokenizer.solution[0].mask(tokenizer), ' SSSSSSSSSSSS NN PPPPP AAAAAA')
t.end()
})
test('mask', (t) => {
// ' NN SSSSSSS AAAAAA PPPPP '
let tokenizer = new Tokenizer('Foo Cafe 10 Main St London 10010 Earth')
parser.classify(tokenizer)
parser.solve(tokenizer)

t.equal(tokenizer.solution[0].mask(tokenizer), ' NN SSSSSSS AAAAAA PPPPP ')
t.end()
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`Solution: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}

0 comments on commit fea5eed

Please sign in to comment.