From a4bd522ebe1df1d9f0a74d5baf9bbe242a84c349 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A9l=C3=A8ne=20MJ?= Date: Tue, 7 May 2024 10:11:39 +0200 Subject: [PATCH] Refactor codemeta validation code --- cypress/integration/validation.js | 6 +- index.html | 1 + js/codemeta_generation.js | 16 +- js/validation/index.js | 234 +++++++++++++++++++++++++----- js/validation/primitives.js | 4 + js/validation/things.js | 206 ++------------------------ js/validation/utils.js | 35 +++++ 7 files changed, 258 insertions(+), 244 deletions(-) create mode 100644 js/validation/utils.js diff --git a/cypress/integration/validation.js b/cypress/integration/validation.js index 2161406..bd2073c 100644 --- a/cypress/integration/validation.js +++ b/cypress/integration/validation.js @@ -651,7 +651,7 @@ describe('Person validation', function() { ); cy.get('#validateCodemeta').click(); - cy.get('#errorMessage').should('have.text', 'Unknown field "foo" in "author".'); + cy.get('#errorMessage').should('have.text', 'Unknown field "foo".'); }); it('errors on Person with invalid field', function() { @@ -857,7 +857,7 @@ describe('Organization validation', function() { ); cy.get('#validateCodemeta').click(); - cy.get('#errorMessage').should('have.text', 'Unknown field "foo" in "author".'); + cy.get('#errorMessage').should('have.text', 'Unknown field "foo".'); }); it('errors on Organization with invalid field', function() { @@ -1005,7 +1005,7 @@ describe('CreativeWork validation', function() { ); cy.get('#validateCodemeta').click(); - cy.get('#errorMessage').should('have.text', 'Unknown field "foo" in "isPartOf".'); + cy.get('#errorMessage').should('have.text', 'Unknown field "foo".'); }); it('errors on CreativeWork with invalid field', function() { diff --git a/index.html b/index.html index cda6dc8..9ea85b4 100644 --- a/index.html +++ b/index.html @@ -13,6 +13,7 @@ + diff --git a/js/codemeta_generation.js b/js/codemeta_generation.js index 6dd111d..195e9a0 100644 --- a/js/codemeta_generation.js +++ b/js/codemeta_generation.js @@ -7,8 +7,8 @@ "use strict"; -const LOCAL_CONTEXT_PATH = "./data/contexts/codemeta-local.jsonld"; -const LOCAL_CONTEXT_URL = "local"; +const INTERNAL_CONTEXT_PATH = "./data/contexts/codemeta-local.jsonld"; +const INTERNAL_CONTEXT_URL = "internal"; const CODEMETA_CONTEXTS = { "2.0": { path: "./data/contexts/codemeta-2.0.jsonld", @@ -25,12 +25,12 @@ const SPDX_PREFIX = 'https://spdx.org/licenses/'; const loadContextData = async () => { const [contextLocal, contextV2, contextV3] = await Promise.all([ - fetch(LOCAL_CONTEXT_PATH).then(response => response.json()), + fetch(INTERNAL_CONTEXT_PATH).then(response => response.json()), fetch(CODEMETA_CONTEXTS["2.0"].path).then(response => response.json()), fetch(CODEMETA_CONTEXTS["3.0"].path).then(response => response.json()) ]); return { - [LOCAL_CONTEXT_URL]: contextLocal, + [INTERNAL_CONTEXT_URL]: contextLocal, [CODEMETA_CONTEXTS["2.0"].url]: contextV2, [CODEMETA_CONTEXTS["3.0"].url]: contextV3 } @@ -219,7 +219,7 @@ function generateReview() { async function buildExpandedJson() { var doc = { - "@context": LOCAL_CONTEXT_URL, + "@context": INTERNAL_CONTEXT_URL, "@type": "SoftwareSourceCode", }; @@ -272,10 +272,11 @@ async function buildExpandedJson() { async function generateCodemeta(codemetaVersion = "2.0") { var inputForm = document.querySelector('#inputForm'); var codemetaText, errorHTML; + let compacted; if (inputForm.checkValidity()) { const expanded = await buildExpandedJson(); - const compacted = await jsonld.compact(expanded, CODEMETA_CONTEXTS[codemetaVersion].url); + compacted = await jsonld.compact(expanded, CODEMETA_CONTEXTS[codemetaVersion].url); codemetaText = JSON.stringify(compacted, null, 4); errorHTML = ""; } @@ -293,7 +294,8 @@ async function generateCodemeta(codemetaVersion = "2.0") { // If this finds a validation, it means there is a bug in our code (either // generation or validation), and the generation MUST NOT generate an // invalid codemeta file, regardless of user input. - if (codemetaText && !validateDocument(JSON.parse(codemetaText))) { + const isValid = codemetaText && (await parseAndValidateCodemeta(false)); + if (!isValid) { alert('Bug detected! The data you wrote is correct; but for some reason, it seems we generated an invalid codemeta.json. Please report this bug at https://github.com/codemeta/codemeta-generator/issues/new and copy-paste the generated codemeta.json file. Thanks!'); } diff --git a/js/validation/index.js b/js/validation/index.js index 4035f8c..fdfad61 100644 --- a/js/validation/index.js +++ b/js/validation/index.js @@ -12,16 +12,179 @@ * that are easy to understand for users with no understanding of JSON-LD. */ +const softwareFieldValidators = { + "@id": validateUrl, + "id": validateUrl, -function validateDocument(doc) { - if (!Array.isArray(doc) && typeof doc != 'object') { + "codeRepository": validateUrls, + "programmingLanguage": noValidation, + "runtimePlatform": validateTexts, + "targetProduct": noValidation, // TODO: validate SoftwareApplication + "applicationCategory": validateTextsOrUrls, + "applicationSubCategory": validateTextsOrUrls, + "downloadUrl": validateUrls, + "fileSize": validateText, // TODO + "installUrl": validateUrls, + "memoryRequirements": validateTextsOrUrls, + "operatingSystem": validateTexts, + "permissions": validateTexts, + "processorRequirements": validateTexts, + "releaseNotes": validateTextsOrUrls, + "softwareHelp": validateCreativeWorks, + "softwareRequirements": noValidation, // TODO: validate SoftwareSourceCode + "softwareVersion": validateText, // TODO? + "storageRequirements": validateTextsOrUrls, + "supportingData": noValidation, // TODO + "author": validateActors, + "citation": validateCreativeWorks, // TODO + "contributor": validateActors, + "copyrightHolder": validateActors, + "copyrightYear": validateNumbers, + "creator": validateActors, // TODO: still in codemeta 2.0, but removed from master + "dateCreated": validateDate, + "dateModified": validateDate, + "datePublished": validateDate, + "editor": validatePersons, + "encoding": noValidation, + "fileFormat": validateTextsOrUrls, + "funder": validateActors, // TODO: may be other types + "keywords": validateTexts, + "license": validateCreativeWorks, + "producer": validateActors, + "provider": validateActors, + "publisher": validateActors, + "sponsor": validateActors, + "version": validateNumberOrText, + "isAccessibleForFree": validateBoolean, + "isSourceCodeOf": validateTextsOrUrls, + "isPartOf": validateCreativeWorks, + "hasPart": validateCreativeWorks, + "position": noValidation, + "identifier": noValidation, // TODO + "description": validateText, + "name": validateText, + "sameAs": validateUrls, + "url": validateUrls, + "relatedLink": validateUrls, + "review": validateReview, + + "softwareSuggestions": noValidation, // TODO: validate SoftwareSourceCode + "maintainer": validateActors, + "contIntegration": validateUrls, + "continuousIntegration": validateUrls, + "buildInstructions": validateUrls, + "developmentStatus": validateText, // TODO: use only repostatus strings? + "embargoDate": validateDate, + "embargoEndDate": validateDate, + "funding": validateText, + "issueTracker": validateUrls, + "referencePublication": noValidation, // TODO? + "readme": validateUrls, +}; + +const creativeWorkFieldValidators = { + "@id": validateUrl, + "id": validateUrl, + + "author": validateActors, + "citation": validateCreativeWorks, // TODO + "contributor": validateActors, + "copyrightHolder": validateActors, + "copyrightYear": validateNumbers, + "creator": validateActors, // TODO: still in codemeta 2.0, but removed from master + "dateCreated": validateDate, + "dateModified": validateDate, + "datePublished": validateDate, + "editor": validatePersons, + "encoding": noValidation, + "funder": validateActors, // TODO: may be other types + "keywords": validateTexts, + "license": validateCreativeWorks, + "producer": validateActors, + "provider": validateActors, + "publisher": validateActors, + "sponsor": validateActors, + "version": validateNumberOrText, + "isAccessibleForFree": validateBoolean, + "isPartOf": validateCreativeWorks, + "hasPart": validateCreativeWorks, + "position": noValidation, + "identifier": noValidation, // TODO + "description": validateText, + "name": validateText, + "sameAs": validateUrls, + "url": validateUrls, +}; + +const roleFieldValidators = { + "roleName": validateText, + "startDate": validateDate, + "endDate": validateDate, + + "schema:author": validateActor +}; + +const personFieldValidators = { + "@id": validateUrl, + "id": validateUrl, + + "givenName": validateText, + "familyName": validateText, + "email": validateText, + "affiliation": validateOrganizations, + "identifier": validateUrls, + "name": validateText, // TODO: this is technically valid, but should be allowed here? + "url": validateUrls, +}; + +const organizationFieldValidators = { + "@id": validateUrl, + "id": validateUrl, + + "email": validateText, + "identifier": validateUrls, + "name": validateText, + "address": validateText, + "sponsor": validateActors, + "funder": validateActors, // TODO: may be other types + "isPartOf": validateOrganizations, + "url": validateUrls, + + // TODO: add more? +}; + +const reviewFieldValidators = { + "reviewAspect": validateText, + "reviewBody": validateText, +} + +function switchCodemetaContext(codemetaJSON, contextUrl) { + const previousCodemetaContext = codemetaJSON["@context"]; + codemetaJSON["@context"] = contextUrl; + return previousCodemetaContext; +} + +async function validateTerms(codemetaJSON) { + try { + await jsonld.expand(codemetaJSON, { safe: true }); + } catch (validationError) { + if (validationError.details.event.code === "invalid property") { + setError(`Unknown field "${validationError.details.event.details.property}".`); + return false; + } + } + return true; +} + +function validateCodemetaJSON(codemetaJSON) { + if (!Array.isArray(codemetaJSON) && typeof codemetaJSON != 'object') { setError("Document must be an object (starting and ending with { and }), not ${typeof doc}.") return false; } // TODO: validate id/@id // TODO: check there is either type or @type but not both - var type = getDocumentType(doc); + var type = getDocumentType(codemetaJSON); if (type === undefined) { setError("Missing type (must be SoftwareSourceCode or SoftwareApplication).") return false; @@ -32,42 +195,29 @@ function validateDocument(doc) { setError(`Wrong document type: must be "SoftwareSourceCode"/"SoftwareApplication", not ${JSON.stringify(type)}`) return false; } - else { - return Object.entries(doc).every((entry) => { - var fieldName = entry[0]; - var subdoc = entry[1]; - if (fieldName == "@context") { - // Was checked before - return true; - } - else if (fieldName == "type" || fieldName == "@type") { - // Was checked before - return true; - } - else if (isFieldFromOtherVersionToIgnore(fieldName)) { - // Do not check fields from other versions FIXME - return true; - } - else { - var validator = softwareFieldValidators[fieldName]; - if (validator === undefined) { - // TODO: find if it's a field that belongs to another type, - // and suggest that to the user - setError(`Unknown field "${fieldName}".`) - return false; - } - else { - return validator(fieldName, subdoc); - } + return true; +} + +function validateDocument(doc) { + return Object.entries(doc) + .filter(([fieldName]) => !isKeyword(fieldName)) + .every(([fieldName, subdoc]) => { + const compactedFieldName = getCompactType(fieldName); + var validator = softwareFieldValidators[compactedFieldName]; + if (validator === undefined) { + // TODO: find if it's a field that belongs to another type, + // and suggest that to the user + setError(`Unknown field "${compactedFieldName}".`) + return false; + } else { + return validator(compactedFieldName, subdoc); } }); - } } - async function parseAndValidateCodemeta(showPopup) { var codemetaText = document.querySelector('#codemetaText').innerText; - let parsed, doc; + let parsed; try { parsed = JSON.parse(codemetaText); @@ -79,9 +229,20 @@ async function parseAndValidateCodemeta(showPopup) { setError(""); - var isValid = validateDocument(parsed); + let isJSONValid = validateCodemetaJSON(parsed); + + const previousCodemetaContext = switchCodemetaContext(parsed, INTERNAL_CONTEXT_URL); + + let areTermsValid = await validateTerms(parsed); + + const expanded = await jsonld.expand(parsed); + const doc = await jsonld.compact(expanded, INTERNAL_CONTEXT_URL); + + switchCodemetaContext(parsed, previousCodemetaContext) + + let isDocumentValid = validateDocument(doc); if (showPopup) { - if (isValid) { + if (isJSONValid && areTermsValid && isDocumentValid) { alert('Document is valid!') } else { @@ -89,8 +250,5 @@ async function parseAndValidateCodemeta(showPopup) { } } - parsed["@context"] = LOCAL_CONTEXT_URL; - const expanded = await jsonld.expand(parsed); - doc = await jsonld.compact(expanded, LOCAL_CONTEXT_URL); return doc; } diff --git a/js/validation/primitives.js b/js/validation/primitives.js index 2dc1cd0..6488d14 100644 --- a/js/validation/primitives.js +++ b/js/validation/primitives.js @@ -9,6 +9,10 @@ * Validators for native schema.org data types. */ +function noValidation(fieldName, doc) { + return true; +} + // Validates an URL or an array of URLs function validateUrls(fieldName, doc) { return validateListOrSingle(fieldName, doc, (subdoc, inList) => { diff --git a/js/validation/things.js b/js/validation/things.js index c790bb0..542b5bb 100644 --- a/js/validation/things.js +++ b/js/validation/things.js @@ -9,35 +9,6 @@ * Validators for codemeta objects derived from http://schema.org/Thing. */ -function getDocumentType(doc) { - // TODO: check there is at most one. - // FIXME: is the last variant allowed? - return doc["type"] || doc["@type"] || doc["codemeta:type"] -} - -function getDocumentId(doc) { - return doc["id"] || doc["@id"]; -} - -function isCompactTypeEqual(type, compactedType) { - // FIXME: are all variants allowed? - return (type == `${compactedType}` - || type == `schema:${compactedType}` - || type == `codemeta:${compactedType}` - || type == `http://schema.org/${compactedType}` - ); -} - -function isFieldFromOtherVersionToIgnore(fieldName) { - return ["codemeta:contIntegration", "codemeta:continuousIntegration", "codemeta:isSourceCodeOf", - "schema:review", "schema:reviewAspect", "schema:reviewBody"].includes(fieldName); -} - -function noValidation(fieldName, doc) { - return true; -} - - // Validates subtypes of Thing, or URIs // // typeFieldValidators is a map: {type => {fieldName => fieldValidator}} @@ -93,30 +64,21 @@ function validateThing(parentFieldName, typeFieldValidators, doc) { for (expectedType in typeFieldValidators) { if (isCompactTypeEqual(documentType, expectedType)) { var fieldValidators = typeFieldValidators[expectedType]; - return Object.entries(doc).every((entry) => { - var fieldName = entry[0]; - var subdoc = entry[1]; - if (fieldName == "type" || fieldName == "@type") { - // Was checked before - return true; - } - else if (isFieldFromOtherVersionToIgnore(fieldName)) { - // Do not check fields from other versions FIXME - return true; - } - else { - var validator = fieldValidators[fieldName]; + + return Object.entries(doc) + .filter(([fieldName]) => !isKeyword(fieldName)) + .every(([fieldName, subdoc]) => { + const compactedFieldName = getCompactType(fieldName); + var validator = fieldValidators[compactedFieldName]; if (validator === undefined) { // TODO: find if it's a field that belongs to another type, // and suggest that to the user - setError(`Unknown field "${fieldName}" in "${parentFieldName}".`) + setError(`Unknown field "${compactedFieldName}".`) return false; + } else { + return validator(compactedFieldName, subdoc); } - else { - return validator(fieldName, subdoc); - } - } - }); + }); } } @@ -194,151 +156,3 @@ function validateOrganization(fieldName, doc) { function validateReview(fieldName, doc) { return validateThingOrId(fieldName, {"Review": reviewFieldValidators}, doc); } - - -var softwareFieldValidators = { - "@id": validateUrl, - "id": validateUrl, - - "codeRepository": validateUrls, - "programmingLanguage": noValidation, - "runtimePlatform": validateTexts, - "targetProduct": noValidation, // TODO: validate SoftwareApplication - "applicationCategory": validateTextsOrUrls, - "applicationSubCategory": validateTextsOrUrls, - "downloadUrl": validateUrls, - "fileSize": validateText, // TODO - "installUrl": validateUrls, - "memoryRequirements": validateTextsOrUrls, - "operatingSystem": validateTexts, - "permissions": validateTexts, - "processorRequirements": validateTexts, - "releaseNotes": validateTextsOrUrls, - "softwareHelp": validateCreativeWorks, - "softwareRequirements": noValidation, // TODO: validate SoftwareSourceCode - "softwareVersion": validateText, // TODO? - "storageRequirements": validateTextsOrUrls, - "supportingData": noValidation, // TODO - "author": validateActors, - "citation": validateCreativeWorks, // TODO - "contributor": validateActors, - "copyrightHolder": validateActors, - "copyrightYear": validateNumbers, - "creator": validateActors, // TODO: still in codemeta 2.0, but removed from master - "dateCreated": validateDate, - "dateModified": validateDate, - "datePublished": validateDate, - "editor": validatePersons, - "encoding": noValidation, - "fileFormat": validateTextsOrUrls, - "funder": validateActors, // TODO: may be other types - "keywords": validateTexts, - "license": validateCreativeWorks, - "producer": validateActors, - "provider": validateActors, - "publisher": validateActors, - "sponsor": validateActors, - "version": validateNumberOrText, - "isAccessibleForFree": validateBoolean, - "isSourceCodeOf": validateTextsOrUrls, - "isPartOf": validateCreativeWorks, - "hasPart": validateCreativeWorks, - "position": noValidation, - "identifier": noValidation, // TODO - "description": validateText, - "name": validateText, - "sameAs": validateUrls, - "url": validateUrls, - "relatedLink": validateUrls, - "review": validateReview, - - "softwareSuggestions": noValidation, // TODO: validate SoftwareSourceCode - "maintainer": validateActors, - "contIntegration": validateUrls, - "continuousIntegration": validateUrls, - "buildInstructions": validateUrls, - "developmentStatus": validateText, // TODO: use only repostatus strings? - "embargoDate": validateDate, - "embargoEndDate": validateDate, - "funding": validateText, - "issueTracker": validateUrls, - "referencePublication": noValidation, // TODO? - "readme": validateUrls, -}; - -var creativeWorkFieldValidators = { - "@id": validateUrl, - "id": validateUrl, - - "author": validateActors, - "citation": validateCreativeWorks, // TODO - "contributor": validateActors, - "copyrightHolder": validateActors, - "copyrightYear": validateNumbers, - "creator": validateActors, // TODO: still in codemeta 2.0, but removed from master - "dateCreated": validateDate, - "dateModified": validateDate, - "datePublished": validateDate, - "editor": validatePersons, - "encoding": noValidation, - "funder": validateActors, // TODO: may be other types - "keywords": validateTexts, - "license": validateCreativeWorks, - "producer": validateActors, - "provider": validateActors, - "publisher": validateActors, - "sponsor": validateActors, - "version": validateNumberOrText, - "isAccessibleForFree": validateBoolean, - "isPartOf": validateCreativeWorks, - "hasPart": validateCreativeWorks, - "position": noValidation, - "identifier": noValidation, // TODO - "description": validateText, - "name": validateText, - "sameAs": validateUrls, - "url": validateUrls, -}; - -var roleFieldValidators = { - "roleName": validateText, - "startDate": validateDate, - "endDate": validateDate, - - "schema:author": validateActor -}; - -var personFieldValidators = { - "@id": validateUrl, - "id": validateUrl, - - "givenName": validateText, - "familyName": validateText, - "email": validateText, - "affiliation": validateOrganizations, - "identifier": validateUrls, - "name": validateText, // TODO: this is technically valid, but should be allowed here? - "url": validateUrls, -}; - - -var organizationFieldValidators = { - "@id": validateUrl, - "id": validateUrl, - - "email": validateText, - "identifier": validateUrls, - "name": validateText, - "address": validateText, - "sponsor": validateActors, - "funder": validateActors, // TODO: may be other types - "isPartOf": validateOrganizations, - "url": validateUrls, - - // TODO: add more? -}; - -const reviewFieldValidators = { - "reviewAspect": validateText, - "reviewBody": validateText, -} diff --git a/js/validation/utils.js b/js/validation/utils.js new file mode 100644 index 0000000..4f91c86 --- /dev/null +++ b/js/validation/utils.js @@ -0,0 +1,35 @@ +/** + * Copyright (C) 2020-2021 The Software Heritage developers + * See the AUTHORS file at the top-level directory of this distribution + * License: GNU Affero General Public License version 3, or any later version + * See top-level LICENSE file for more information + */ + +function getDocumentType(doc) { + // TODO: check there is at most one. + // FIXME: is the last variant allowed? + return doc["type"] || doc["@type"] || doc["codemeta:type"] +} + +function getDocumentId(doc) { + return doc["id"] || doc["@id"]; +} + +function getCompactType(type) { + return type + .replace("schema:", "") + .replace("codemeta:", ""); +} + +function isCompactTypeEqual(type, compactedType) { + // FIXME: are all variants allowed? + return (type == `${compactedType}` + || type == `schema:${compactedType}` + || type == `codemeta:${compactedType}` + || type == `http://schema.org/${compactedType}` + ); +} + +function isKeyword(term) { + return ["@context", "type"].includes(term); +}