Skip to content

Commit

Permalink
Merge pull request #426 from Gapminder/fix-dup-datapoint
Browse files Browse the repository at this point in the history
fixed(rules): error in datapoint duplications
  • Loading branch information
buchslava authored Jun 14, 2017
2 parents baeda45 + 0d24466 commit 2227b28
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 70 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ npm-debug.log
coverage
_results
_results-test
package-lock.json
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"test-travis": "istanbul cover mocha _mocha -- -R spec --timeout 200000 --compilers ts:ts-node/register --recursive test/**/*.spec.ts && codecov",
"changelog": "conventional-changelog -i CHANGELOG.md -s -p angular",
"github-release": "conventional-github-releaser -p angular",
"build": "tsc && touch lib/package.json && echo \\{\\\"version\\\": \\\"1.6.1\\\"\\} > lib/package.json",
"build": "tsc && touch lib/package.json && echo \\{\\\"version\\\": \\\"1.6.2\\\"\\} > lib/package.json",
"prepublish": "npm run build",
"preversion": "npm test",
"version": "npm run changelog && git add CHANGELOG.md",
Expand Down
64 changes: 30 additions & 34 deletions src/ddf-rules/data-point-rules/duplicated-data-point-key.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,58 +2,54 @@ import { uniq, isEmpty, difference } from 'lodash';
import { DUPLICATED_DATA_POINT_KEY } from '../registry';
import { Issue } from '../issue';

const md5 = require('md5');
let storage: any;

let storage: Map<string, any> = new Map();

const initRuleInStorage = (ruleKey: string) => {
if (!storage.has(ruleKey)) {
storage.set(ruleKey, {
hash: new Set(),
duplicatedHashes: [],
content: {}
});
const initStorage = () => {
if (storage && storage.hash) {
storage.hash.clear();
}

storage = {
hash: new Set(),
duplicatedHashes: [],
content: {}
};
};

initStorage();

export const rule = {
resetStorage: () => {
storage.clear();
initStorage();
},
aggregateRecord: (dataPointDescriptor, ruleKey) => {
const ruleKeyString = Symbol.keyFor(ruleKey);

initRuleInStorage(ruleKeyString);

const keyData = dataPointDescriptor.fileDescriptor.primaryKey.map(keyPart => dataPointDescriptor.record[keyPart]).join(',');
const indicatorKey = difference(dataPointDescriptor.fileDescriptor.headers, dataPointDescriptor.fileDescriptor.primaryKey).join(',');
const recordHash = `${keyData}@${indicatorKey}`;

if (storage.get(ruleKeyString).hash.has(recordHash)) {
storage.get(ruleKeyString).duplicatedHashes.push(recordHash);
aggregateRecord: (dataPointDescriptor) => {
const sortedPrimaryKey = dataPointDescriptor.fileDescriptor.primaryKey.sort();
const dimensionData = sortedPrimaryKey.map(keyPart => `${keyPart}:${dataPointDescriptor.record[keyPart]}`).join(',');
const indicatorName = difference(dataPointDescriptor.fileDescriptor.headers, dataPointDescriptor.fileDescriptor.primaryKey).join(',');
const recordHash = `${dimensionData}@${indicatorName}`;

if (storage.hash.has(recordHash)) {
storage.duplicatedHashes.push(recordHash);
}

storage.get(ruleKeyString).hash.add(recordHash);
storage.hash.add(recordHash);

if (!storage.get(ruleKeyString).content[recordHash]) {
storage.get(ruleKeyString).content[recordHash] = [];
if (!storage.content[recordHash]) {
storage.content[recordHash] = [];
}

storage.get(ruleKeyString).content[recordHash].push({
storage.content[recordHash].push({
file: dataPointDescriptor.fileDescriptor.file,
record: dataPointDescriptor.record,
line: dataPointDescriptor.line
});
},
aggregativeRule: (dataPointDescriptor, ruleKey) => {
const ruleKeyString = Symbol.keyFor(ruleKey);

initRuleInStorage(ruleKeyString);

const duplicates: string[] = <string[]>uniq(storage.get(ruleKeyString).duplicatedHashes);
aggregativeRule: (dataPointDescriptor) => {
const duplicates: string[] = <string[]>uniq(storage.duplicatedHashes);
const data: any[] = [];

for (const hash of duplicates) {
data.push(storage.get(ruleKeyString).content[hash]);
data.push(storage.content[hash]);
}

let issue = null;
Expand All @@ -62,7 +58,7 @@ export const rule = {
issue = new Issue(DUPLICATED_DATA_POINT_KEY).setPath(dataPointDescriptor.fileDescriptor.fullPath).setData(data);
}

storage.delete(ruleKeyString);
initStorage();

return issue;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,40 +1,41 @@
import { uniq, isEmpty } from 'lodash';
import { difference, uniq, isEmpty } from 'lodash';
import { DUPLICATED_DATA_POINT_TRANSLATION_KEY } from '../registry';
import { Issue } from '../issue';

const md5 = require('md5');
let storage: any;

let storage: Map<string, any> = new Map();
const initStorage = () => {
if (storage && storage.hash) {
storage.hash.clear();
}

storage = {
hash: new Set(),
duplicatedPrimaryKeys: []
};
};

initStorage();

export const rule = {
resetStorage: () => {
storage.clear();
initStorage();
},
isTranslation: true,
aggregateRecord: (dataPointDescriptor, ruleKey) => {
const ruleKeyString = Symbol.keyFor(ruleKey);

if (!storage.has(ruleKeyString)) {
storage.set(ruleKeyString, {
hash: new Set(),
duplicatedPrimaryKeys: []
});
}

const keyData = dataPointDescriptor.fileDescriptor.primaryKey
.map(keyPart => dataPointDescriptor.record[keyPart])
.join(',');
const recordHash = md5(keyData);

if (storage.get(ruleKeyString).hash.has(recordHash)) {
storage.get(ruleKeyString).duplicatedPrimaryKeys.push(keyData);
aggregateRecord: (dataPointDescriptor) => {
const sortedPrimaryKey = dataPointDescriptor.fileDescriptor.primaryKey.sort();
const dimensionData = sortedPrimaryKey.map(keyPart => `${keyPart}:${dataPointDescriptor.record[keyPart]}`).join(',');
const indicatorName = difference(dataPointDescriptor.fileDescriptor.headers, dataPointDescriptor.fileDescriptor.primaryKey).join(',');
const recordHash = `${dimensionData}@${indicatorName}`;

if (storage.hash.has(recordHash)) {
storage.duplicatedPrimaryKeys.push(recordHash);
}

storage.get(ruleKeyString).hash.add(recordHash);
storage.hash.add(recordHash);
},
aggregativeRule: (dataPointDescriptor, ruleKey) => {
const ruleKeyString = Symbol.keyFor(ruleKey);
const duplicates = uniq(storage.get(ruleKeyString).duplicatedPrimaryKeys);
aggregativeRule: (dataPointDescriptor) => {
const duplicates = uniq(storage.duplicatedPrimaryKeys);

let issue = null;

Expand All @@ -44,7 +45,7 @@ export const rule = {
.setData(duplicates);
}

storage.delete(ruleKeyString);
initStorage();

return issue;
}
Expand Down
2 changes: 1 addition & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ export function createRecordBasedRuleProcessor(context, fileDescriptor, resultHa
fileDescriptor,
record,
line
}, context.ruleKey);
});
}
}
},
Expand Down
2 changes: 1 addition & 1 deletion src/shared.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ function createRecordAggregationProcessor(context, ddfDataSet, fileDescriptor, r
}

if (isAggregativeRule(ddfRules[key])) {
ddfRules[key].aggregateRecord({ddfDataSet, fileDescriptor, record, line}, key);
ddfRules[key].aggregateRecord({ddfDataSet, fileDescriptor, record, line});
}
});
};
Expand Down
2 changes: 2 additions & 0 deletions test/data-point-rules.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ describe('rules for data points', () => {
data: [
[
{
file: 'ddf--datapoints--gas_production_bcf--by--geo--year.csv',
record: {
geo: 'algeria',
year: '1977',
Expand All @@ -222,6 +223,7 @@ describe('rules for data points', () => {
line: 7
},
{
file: 'ddf--datapoints--gas_production_bcf--by--geo--year.csv',
record: {
geo: 'algeria',
year: '1977',
Expand Down
14 changes: 7 additions & 7 deletions test/translation-rules.spec.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import * as chai from 'chai';
import {endsWith, isEqual, head, flattenDeep, compact, isEmpty} from 'lodash';
import {parallelLimit} from 'async';
import {createRecordBasedRuleProcessor} from '../src/index';
import {DdfDataSet} from '../src/ddf-definitions/ddf-data-set';
import { endsWith, isEqual, head, flattenDeep, compact, isEmpty } from 'lodash';
import { parallelLimit } from 'async';
import { createRecordBasedRuleProcessor } from '../src/index';
import { DdfDataSet } from '../src/ddf-definitions/ddf-data-set';
import {
UNEXPECTED_TRANSLATION_HEADER,
UNEXPECTED_TRANSLATIONS_DATA,
UNEXPECTED_DATA_POINT_TRANSLATIONS_DATA,
DUPLICATED_DATA_POINT_TRANSLATION_KEY,
DUPLICATED_TRANSLATION_KEY
} from '../src/ddf-rules/registry';
import {allRules} from '../src/ddf-rules';
import {Issue} from '../src/ddf-rules/issue';
import { allRules } from '../src/ddf-rules';
import { Issue } from '../src/ddf-rules/issue';

const CONCURRENT_OPERATIONS_AMOUNT = 30;
const expect = chai.expect;
Expand Down Expand Up @@ -265,7 +265,7 @@ describe('translation rules', () => {
const results = compact(tempResults);
const EXPECTED_RESULT = {
path: 'lang/nl-nl/ddf--datapoints--company_size_string--by--company--anno.csv',
data: ['mic,2016']
data: ['anno:2016,company:mic@company_size_string']
};

expect(results.length).to.equal(1);
Expand Down

0 comments on commit 2227b28

Please sign in to comment.