Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DO NOT MERGE] Add validateValueType function to ensure data type integrity #1002

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ const networkSourceData = {
CIDR: '255.255.255.0',
name: 'My Network',
notInDataModel: 'Not In Data Model',
owner: { name: 'Bob' },
summary: [{ title: 'Summary' }, { description: 'Description' }],
};

const networkResourceEntity = {
Expand Down Expand Up @@ -118,20 +116,6 @@ describe('createIntegrationEntity', () => {
expect('notWhitelisted' in entity).toBe(false);
});

test('ignore source properties that are object types', () => {
const entity = createIntegrationEntity({
entityData,
});
expect('owner' in entity).toBe(false);
});

test('ignore source properties that are object[] types', () => {
const entity = createIntegrationEntity({
entityData,
});
expect('summary' in entity).toBe(false);
});

test('handles empty tags in source data', () => {
const entity = createIntegrationEntity({
entityData: {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/* eslint-disable no-console */
import { validateValueType } from '../createIntegrationEntity';

describe('validateValueType', () => {
test('should accept string type', () => {
expect(() => validateValueType('Sample String', 'testPath')).not.toThrow();
});

test('should accept number type', () => {
expect(() => validateValueType(123, 'testPath')).not.toThrow();
});

test('should accept boolean type', () => {
expect(() => validateValueType(true, 'testPath')).not.toThrow();
});

test('should accept null type', () => {
expect(() => validateValueType(null, 'testPath')).not.toThrow();
});

test('should accept array of supported types', () => {
expect(() => validateValueType([1, 'a', true], 'testPath')).not.toThrow();
});

test('should validate nested arrays with supported types', () => {
expect(() =>
validateValueType([1, ['a', 2], [true, false]], 'testPath'),
).not.toThrow();
});

test('should reject nested arrays with unsupported types', () => {
expect(() =>
validateValueType([1, ['a', { key: 'value' }], true], 'testPath'),
).toThrow();
});

test('should reject object type', () => {
expect(() => validateValueType({ key: 'value' }, 'testPath')).toThrow();
});

test('should reject unsupported type in array', () => {
expect(() =>
validateValueType([1, 2, { key: 'value' }], 'testPath'),
).toThrow();
});
});

describe('validateValueType - successful performance tests', () => {
const largeDatasetSize = 500000;

function generateLargeDataset(type) {
switch (type) {
case 'string':
return new Array(largeDatasetSize).fill('testString');
case 'number':
return Array.from({ length: largeDatasetSize }, (_, i) => i);
case 'boolean':
return new Array(largeDatasetSize).fill(true);
case 'array':
return new Array(largeDatasetSize).fill([1, 2, 3]);
case 'undefined':
return new Array(largeDatasetSize).fill(undefined);
default:
throw new Error(`Unsupported type: ${type}`);
}
}

['string', 'number', 'boolean', 'array', 'undefined'].forEach((type) => {
test(`should handle large dataset of type ${type} efficiently`, () => {
const largeDataset = generateLargeDataset(type);

const start = performance.now();
largeDataset.forEach((item) =>
validateValueType(item, `testPath-${type}`),
);
const end = performance.now();

const executionTime = end - start;

console.log(`Execution time for ${type} dataset: ${executionTime} ms`);

const acceptableTimeLimit = 1000;
expect(executionTime).toBeLessThan(acceptableTimeLimit);
});
});
});

describe('validateValueType - error Scenarios', () => {
const largeDatasetSize = 500000;

function generateErrorDataset() {
const data: any[] = [];
for (let i = 0; i < largeDatasetSize; i++) {
if (i % 50000 === 0) {
data.push({ key: `value-${i}` });
} else {
data.push(i % 2 === 0 ? `string-${i}` : i);
}
}
return data;
}

test(`should throw error for large dataset with unsupported types`, () => {
const errorDataset = generateErrorDataset();
let executionTime = 0;

const start = performance.now();
try {
errorDataset.forEach((item) =>
validateValueType(item, `testPath-errorCase`),
);
const end = performance.now();
executionTime = end - start;
} catch (error) {
const end = performance.now();
executionTime = end - start;

console.log(`Execution time until error: ${executionTime} ms`);
}

expect(executionTime).toBeGreaterThan(0);

const acceptableErrorDetectionTime = 1000;
expect(executionTime).toBeLessThan(acceptableErrorDetectionTime);

expect(() => {
errorDataset.forEach((item) =>
validateValueType(item, `testPath-errorCase`),
);
}).toThrow();
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import { parseTimePropertyValue } from './converters';
import { validateRawData } from './rawData';
import { assignTags, ResourceTagList, ResourceTagMap } from './tagging';

const SUPPORTED_TYPES = ['string', 'number', 'boolean'];
const SUPPORTED_TYPES = ['string', 'number', 'boolean', 'array', 'undefined'];
i5o marked this conversation as resolved.
Show resolved Hide resolved

/**
* Properties to be assigned to a generated entity which are declared in code
Expand Down Expand Up @@ -179,6 +179,40 @@ function generateEntity({
return entity;
}

/**
* Validates that the provided value conforms to the supported types.
* If the value is an array, this function is called recursively on each element
* to ensure nested arrays are also validated. This function throws an error
* if it encounters a value type that is not supported.
*
* @param value The value to be validated. It can be a single value or an array.
* For arrays, each element is validated recursively.
* @param path The path to the current property being validated, used for error messaging.
* This is updated with each recursive call to reflect the current context.
*/
export function validateValueType(value: any, path: string): void {
// Explicitly allow null values
if (value === null) {
return;
}

if (Array.isArray(value)) {
// If the value is an array, validate each element
value.forEach((item, index) => {
validateValueType(item, `${path}[${index}]`);
});
} else {
// For non-array values, check if the type is supported
const valueType = typeof value;
if (!SUPPORTED_TYPES.includes(valueType)) {
throw new IntegrationError({
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can we keep steps from failing on unsupported data? This is a step towards strictness that'll have any overall impact on job statuses and the ingestion team.
How else could we improve the overall health of jobs and data?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should open a bigger discussion for this but this should be a MAJOR, we have some ideas in the AWS Pod as to how to proceed

code: 'UNSUPPORTED_TYPE',
message: `Unsupported type found at "${path}": ${valueType}`,
});
}
}
}

/**
* Answers a form of the provider data with only the properties supported by the
* data model schema.
Expand All @@ -192,12 +226,14 @@ function whitelistedProviderData(
): Omit<ProviderSourceData, 'tags'> {
const whitelistedProviderData: ProviderSourceData = {};
i5o marked this conversation as resolved.
Show resolved Hide resolved
const schemaProperties = schemaWhitelistedPropertyNames(_class);

for (const [key, value] of Object.entries(source)) {
// Ensure the property is part of the schema and not null
if (value != null && schemaProperties.includes(key)) {
const valueType = Array.isArray(value) ? typeof value[0] : typeof value;
if (SUPPORTED_TYPES.includes(valueType)) {
whitelistedProviderData[key] = value;
}
if (key != 'tags') validateValueType(value, key);

// If validation passes, assign the value to the whitelisted data
whitelistedProviderData[key] = value;
}
}
return whitelistedProviderData;
Expand Down
Loading