From 2a9e1bce9ad6272bf6a9b10fa21eff3ace7bc399 Mon Sep 17 00:00:00 2001 From: Alex Anderson <191496+alxndrsn@users.noreply.github.com> Date: Sat, 8 Feb 2025 10:49:43 +0300 Subject: [PATCH] analytics/datasets: make query more efficient (#1387) Closes #1388 --- lib/model/query/analytics.js | 212 ++++++++++++++++++----------------- 1 file changed, 112 insertions(+), 100 deletions(-) diff --git a/lib/model/query/analytics.js b/lib/model/query/analytics.js index ac6fd4dd1..69cc060c9 100644 --- a/lib/model/query/analytics.js +++ b/lib/model/query/analytics.js @@ -14,8 +14,8 @@ const { runSequentially } = require('../../util/promise'); const { metricsTemplate } = require('../../data/analytics'); const oidc = require('../../util/oidc'); -const DAY_RANGE = 45; -const _cutoffDate = sql`current_date - cast(${DAY_RANGE} as int)`; +const DAY_RANGE = sql`45`; +const _cutoffDate = sql`current_date - ${DAY_RANGE}`; // Gets a pointer to the project (repeat) in the metrics report for a specific // project, creating it first from the project metrics template if necessary @@ -420,110 +420,122 @@ group by f."projectId"`); // Datasets const getDatasets = () => ({ all }) => all(sql` -SELECT - ds.id, ds."projectId", COUNT(DISTINCT e.id) num_entities_total, - COUNT(DISTINCT CASE WHEN e."createdAt" >= current_date - cast(${DAY_RANGE} as int) THEN e.id END) num_entities_recent, - COUNT(DISTINCT CASE WHEN e."updatedAt" IS NOT NULL THEN e.id END) num_entities_updated_total, - COUNT(DISTINCT CASE WHEN e."updatedAt" >= current_date - cast(${DAY_RANGE} as int) THEN e.id END) num_entities_updated_recent, - COUNT(DISTINCT fd."formId") num_creation_forms, - COUNT(DISTINCT CASE WHEN f."currentDefId" IS NOT NULL THEN fa."formId" END) num_followup_forms, - MAX(COALESCE(errors.total, 0)) num_failed_entities_total, - MAX(COALESCE(errors.recent, 0)) num_failed_entities_recent, - MAX(COALESCE(updates.total, 0)) num_entity_updates_total, - MAX(COALESCE(updates.recent, 0)) num_entity_updates_recent, - MAX(COALESCE(updates.update_sub_total, 0)) num_entity_updates_sub_total, - MAX(COALESCE(updates.update_sub_recent, 0)) num_entity_updates_sub_recent, - MAX(COALESCE(updates.update_api_total, 0)) num_entity_updates_api_total, - MAX(COALESCE(updates.update_api_recent, 0)) num_entity_updates_api_recent, - MAX(COALESCE(conflict_stats.conflicts, 0)) num_entity_conflicts, - MAX(COALESCE(conflict_stats.resolved, 0)) num_entity_conflicts_resolved, - MAX(COALESCE(creates.create_sub_total, 0)) num_entity_create_sub_total, - MAX(COALESCE(creates.create_sub_recent, 0)) num_entity_create_sub_recent, - MAX(COALESCE(creates.create_api_total, 0)) num_entity_create_api_total, - MAX(COALESCE(creates.create_api_recent, 0)) num_entity_create_api_recent, - MAX(COALESCE(bulk_creates.total, 0)) num_entity_create_bulk_total, - MAX(COALESCE(bulk_creates.recent, 0)) num_entity_create_bulk_recent -FROM datasets ds - LEFT JOIN entities e ON e."datasetId" = ds.id - LEFT JOIN (dataset_form_defs dfd - JOIN form_defs fd ON fd.id = dfd."formDefId" - ) ON dfd."datasetId" = ds.id - LEFT JOIN (form_attachments fa - JOIN forms f ON f.id = fa."formId" - ) ON fa."datasetId" = ds.id - LEFT JOIN ( - SELECT - COUNT (a.details -> 'submissionId'::TEXT) total, - SUM (CASE WHEN a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) THEN 1 ELSE 0 END) recent, - dfd."datasetId" - FROM audits a - JOIN submissions s ON CAST((a.details ->> 'submissionId'::TEXT) AS integer) = s.id - JOIN submission_defs sd ON sd."submissionId" = s.id AND sd."current" - JOIN dataset_form_defs dfd ON sd."formDefId" = dfd."formDefId" - WHERE a."action" = 'entity.error' - GROUP BY dfd."datasetId" - ) AS errors ON ds.id = errors."datasetId" - LEFT JOIN ( - SELECT - COUNT (1) total, - SUM (CASE WHEN a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) THEN 1 ELSE 0 END) recent, - SUM (CASE WHEN a."details"->'submissionDefId' IS NOT NULL THEN 1 ELSE 0 END) update_sub_total, - SUM (CASE WHEN a."details"->'submissionDefId' IS NOT NULL AND a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) - THEN 1 ELSE 0 END) update_sub_recent, - SUM (CASE WHEN a."details"->'submissionDefId' IS NULL THEN 1 ELSE 0 END) update_api_total, - SUM (CASE WHEN a."details"->'submissionDefId' IS NULL AND a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) - THEN 1 ELSE 0 END) update_api_recent, - e."datasetId" - FROM audits a - JOIN entities e on CAST((a.details ->> 'entityId'::TEXT) AS integer) = e.id - WHERE a."action" = 'entity.update.version' - GROUP BY e."datasetId" - ) as updates ON ds.id = updates."datasetId" - LEFT JOIN ( - SELECT - COUNT (1) total, - SUM (CASE WHEN a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) THEN 1 ELSE 0 END) recent, - SUM (CASE WHEN a."details"->'submissionDefId' IS NOT NULL THEN 1 ELSE 0 END) create_sub_total, - SUM (CASE WHEN a."details"->'submissionDefId' IS NOT NULL AND a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) - THEN 1 ELSE 0 END) create_sub_recent, - SUM (CASE WHEN a."details"->'submissionDefId' IS NULL THEN 1 ELSE 0 END) create_api_total, - SUM (CASE WHEN a."details"->'submissionDefId' IS NULL AND a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) - THEN 1 ELSE 0 END) create_api_recent, - e."datasetId" - FROM audits a - JOIN entities e on CAST((a.details ->> 'entityId'::TEXT) AS integer) = e.id - WHERE a."action" = 'entity.create' - GROUP BY e."datasetId" - ) as creates ON ds.id = creates."datasetId" - LEFT JOIN ( - SELECT count(1) total, - SUM (CASE WHEN a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) THEN 1 ELSE 0 END) recent, - e."datasetId" - FROM audits a - JOIN entity_def_sources eds on CAST((a.details ->> 'sourceId'::TEXT) AS integer) = eds."id" - JOIN entity_defs ed on ed."sourceId" = eds.id AND root=true - JOIN entities e on ed."entityId" = e.id - WHERE a."action" = 'entity.bulk.create' - GROUP BY e."datasetId" - ) as bulk_creates on ds.id = bulk_creates."datasetId" - LEFT JOIN ( - SELECT COUNT (1) conflicts, - SUM (CASE WHEN e."conflict" IS NULL THEN 1 ELSE 0 END) resolved, - e."datasetId" - FROM entities e - WHERE e.id IN - (SELECT DISTINCT "entityId" FROM entity_defs WHERE "conflictingProperties" IS NOT NULL) - GROUP BY e."datasetId" - ) AS conflict_stats ON ds.id = conflict_stats."datasetId" -WHERE ds."publishedAt" IS NOT NULL -GROUP BY ds.id, ds."projectId" + SELECT ds.id + , ds."projectId" + , COALESCE(entz.num_entities_total , 0) AS num_entities_total + , COALESCE(entz.num_entities_recent , 0) AS num_entities_recent + , COALESCE(entz.num_entities_updated_total , 0) AS num_entities_updated_total + , COALESCE(entz.num_entities_updated_recent, 0) AS num_entities_updated_recent + , COALESCE(num_creation_forms , 0) AS num_creation_forms + , COALESCE(num_followup_forms , 0) AS num_followup_forms + , COALESCE(errors.total , 0) AS num_failed_entities_total + , COALESCE(errors.recent , 0) AS num_failed_entities_recent + , COALESCE(updates.total , 0) AS num_entity_updates_total + , COALESCE(updates.recent , 0) AS num_entity_updates_recent + , COALESCE(updates.update_sub_total , 0) AS num_entity_updates_sub_total + , COALESCE(updates.update_sub_recent , 0) AS num_entity_updates_sub_recent + , COALESCE(updates.update_api_total , 0) AS num_entity_updates_api_total + , COALESCE(updates.update_api_recent , 0) AS num_entity_updates_api_recent + , COALESCE(conflict_stats.conflicts , 0) AS num_entity_conflicts + , COALESCE(conflict_stats.resolved , 0) AS num_entity_conflicts_resolved + , COALESCE(creates.create_sub_total , 0) AS num_entity_create_sub_total + , COALESCE(creates.create_sub_recent , 0) AS num_entity_create_sub_recent + , COALESCE(creates.create_api_total , 0) AS num_entity_create_api_total + , COALESCE(creates.create_api_recent , 0) AS num_entity_create_api_recent + , COALESCE(bulk_creates.total , 0) AS num_entity_create_bulk_total + , COALESCE(bulk_creates.recent , 0) AS num_entity_create_bulk_recent + FROM datasets AS ds + LEFT JOIN ( + SELECT COUNT(*) AS num_entities_total + , COUNT("createdAt" >= ${_cutoffDate} OR NULL) AS num_entities_recent + , COUNT("updatedAt" IS NOT NULL OR NULL) AS num_entities_updated_total + , COUNT("updatedAt" >= ${_cutoffDate} OR NULL) AS num_entities_updated_recent + , "datasetId" + FROM entities + GROUP BY "datasetId" + ) AS entz ON entz."datasetId" = ds.id + LEFT JOIN ( + SELECT COUNT(*) AS conflicts + , COUNT(conflict IS NULL OR NULL) AS resolved + , "datasetId" + FROM entities AS e + WHERE EXISTS ( + SELECT 1 + FROM entity_defs + WHERE "entityId" = e.id + AND "conflictingProperties" IS NOT NULL + ) + GROUP BY "datasetId" + ) AS conflict_stats ON conflict_stats."datasetId" = ds.id + LEFT JOIN ( + SELECT COUNT(fd."formId") AS num_creation_forms + , dfd."datasetId" + FROM dataset_form_defs AS dfd + JOIN form_defs AS fd ON fd.id = dfd."formDefId" + GROUP BY dfd."datasetId" + ) AS fd ON fd."datasetId" = ds.id + LEFT JOIN ( + SELECT COUNT("formId") AS num_followup_forms + , "datasetId" + FROM form_attachments + JOIN forms AS f ON f.id = "formId" + WHERE f."currentDefId" IS NOT NULL + GROUP BY "datasetId" + ) AS fa ON fa."datasetId" = ds.id + LEFT JOIN ( + SELECT COUNT(a.details->'submissionId') AS total + , COUNT(a."loggedAt" >= ${_cutoffDate} OR NULL) AS recent + , dfd."datasetId" + FROM audits AS a + JOIN submissions AS s ON s.id = (a.details->'submissionId')::INT + JOIN submission_defs AS sd ON sd."submissionId" = s.id AND sd.current + JOIN dataset_form_defs AS dfd ON dfd."formDefId" = sd."formDefId" + WHERE a.action = 'entity.error' + GROUP BY dfd."datasetId" + ) AS errors ON errors."datasetId" = ds.id + LEFT JOIN ( + SELECT COUNT(*) AS total + , COUNT(a."loggedAt" >= ${_cutoffDate} OR NULL) AS recent + , COUNT(a.details->'submissionDefId') AS update_sub_total + , COUNT(a.details->'submissionDefId' IS NOT NULL AND a."loggedAt" >= ${_cutoffDate} OR NULL) AS update_sub_recent + , COUNT(a.details->'submissionDefId' IS NULL OR NULL) update_api_total + , COUNT(a.details->'submissionDefId' IS NULL AND a."loggedAt" >= ${_cutoffDate} OR NULL) AS update_api_recent + , e."datasetId" + FROM audits AS a + JOIN entities AS e ON e.id = (a.details->'entityId')::INT + WHERE a.action = 'entity.update.version' + GROUP BY e."datasetId" + ) AS updates ON updates."datasetId" = ds.id + LEFT JOIN ( + SELECT COUNT(a.details->'submissionDefId') AS create_sub_total + , COUNT(a.details->'submissionDefId' IS NOT NULL AND a."loggedAt" >= ${_cutoffDate} OR NULL) AS create_sub_recent + , COUNT(a.details->'submissionDefId' IS NULL OR NULL) AS create_api_total + , COUNT(a.details->'submissionDefId' IS NULL AND a."loggedAt" >= ${_cutoffDate} OR NULL) AS create_api_recent + , e."datasetId" + FROM audits AS a + JOIN entities AS e ON e.id = (a.details->'entityId')::INT + WHERE a.action = 'entity.create' + GROUP BY e."datasetId" + ) AS creates ON creates."datasetId" = ds.id + LEFT JOIN ( + SELECT COUNT(*) AS total + , COUNT(a."loggedAt" >= ${_cutoffDate} OR NULL) AS recent + , e."datasetId" + FROM audits AS a + JOIN entity_def_sources AS eds ON eds.id = (a.details->'sourceId')::INT + JOIN entity_defs AS ed ON ed."sourceId" = eds.id AND root=true + JOIN entities AS e ON e.id = ed."entityId" + WHERE a.action = 'entity.bulk.create' + GROUP BY e."datasetId" + ) AS bulk_creates ON bulk_creates."datasetId" = ds.id + WHERE ds."publishedAt" IS NOT NULL `); const getDatasetEvents = () => ({ all }) => all(sql` SELECT ds.id, ds."projectId", COUNT (*) num_bulk_create_events_total, - SUM (CASE WHEN audits."loggedAt" >= current_date - cast(${DAY_RANGE} as int) THEN 1 ELSE 0 END) num_bulk_create_events_recent, + SUM (CASE WHEN audits."loggedAt" >= ${_cutoffDate} THEN 1 ELSE 0 END) num_bulk_create_events_recent, MAX (CAST(sources."details"->'count' AS integer)) AS biggest_bulk_upload FROM datasets ds JOIN audits ON ds."acteeId" = audits."acteeId"