From 441cc31e9c52358ea29c44a89c95b8c9d3398cf4 Mon Sep 17 00:00:00 2001 From: Eduardo Filho Date: Mon, 10 Jul 2023 12:18:25 -0400 Subject: [PATCH] Add non-normalized aggregations to backend (#2379) * Add support to non-normalized aggs (histogram, legacy telemetry) * migration script for less downtime * fix lint * fix tests + lint * Use RunSQL.noop to make this migration reversible * Fix migration reversibility after tests --- .../0025_non_normalized_aggregations.py | 98 +++++++++++++++++++ glam/api/models.py | 2 + glam/api/views.py | 6 ++ glam/tests/test_api.py | 43 ++++++++ 4 files changed, 149 insertions(+) create mode 100644 glam/api/migrations/0025_non_normalized_aggregations.py diff --git a/glam/api/migrations/0025_non_normalized_aggregations.py b/glam/api/migrations/0025_non_normalized_aggregations.py new file mode 100644 index 000000000..c9c965b5b --- /dev/null +++ b/glam/api/migrations/0025_non_normalized_aggregations.py @@ -0,0 +1,98 @@ +# Generated by Django 3.1.13 on 2023-05-25 22:59 + +from django.db import migrations, models + +from glam.api import constants + +sql_create = [] +sql_create_rev = [] +sql_rename = [] +sql_drop = [] +for channel in constants.CHANNEL_NAMES.values(): + sql_create.extend( + [ + f"CREATE MATERIALIZED VIEW view_glam_desktop_{channel}_aggregation_temp AS SELECT * FROM glam_desktop_{channel}_aggregation;", + f"CREATE UNIQUE INDEX ON view_glam_desktop_{channel}_aggregation_temp (id);", + f"CREATE INDEX ON view_glam_desktop_{channel}_aggregation_temp (version);", + f"CREATE INDEX ON view_glam_desktop_{channel}_aggregation_temp USING HASH (metric);", + f"CREATE INDEX ON view_glam_desktop_{channel}_aggregation_temp (os);", + ] + ) + sql_create_rev.extend( + [ + f"CREATE MATERIALIZED VIEW view_glam_desktop_{channel}_aggregation AS SELECT * FROM glam_desktop_{channel}_aggregation;", + f"CREATE UNIQUE INDEX ON view_glam_desktop_{channel}_aggregation (id);", + f"CREATE INDEX ON view_glam_desktop_{channel}_aggregation (version);", + f"CREATE INDEX ON view_glam_desktop_{channel}_aggregation USING HASH (metric);", + f"CREATE INDEX ON view_glam_desktop_{channel}_aggregation (os);", + ] + ) +for channel in constants.CHANNEL_NAMES.values(): + sql_rename.extend( + [ + f"ALTER MATERIALIZED VIEW view_glam_desktop_{channel}_aggregation rename TO view_glam_desktop_{channel}_aggregation_old;", + f"ALTER MATERIALIZED VIEW view_glam_desktop_{channel}_aggregation_temp rename TO view_glam_desktop_{channel}_aggregation;", + ] + ) +for channel in constants.CHANNEL_NAMES.values(): + sql_drop.extend( + [ + f"DROP MATERIALIZED VIEW view_glam_desktop_{channel}_aggregation_old;", + ] + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("api", "0024_usageinstrumentation"), + ] + + operations = [ + migrations.RunSQL( + sql=migrations.RunSQL.noop, + reverse_sql=sql_create_rev, + ), + migrations.AddField( + model_name="desktopbetaaggregation", + name="non_norm_histogram", + field=models.TextField(blank=True, null=True), + ), + migrations.AddField( + model_name="desktopbetaaggregation", + name="non_norm_percentiles", + field=models.TextField(blank=True, null=True), + ), + migrations.AddField( + model_name="desktopnightlyaggregation", + name="non_norm_histogram", + field=models.TextField(blank=True, null=True), + ), + migrations.AddField( + model_name="desktopnightlyaggregation", + name="non_norm_percentiles", + field=models.TextField(blank=True, null=True), + ), + migrations.AddField( + model_name="desktopreleaseaggregation", + name="non_norm_histogram", + field=models.TextField(blank=True, null=True), + ), + migrations.AddField( + model_name="desktopreleaseaggregation", + name="non_norm_percentiles", + field=models.TextField(blank=True, null=True), + ), + migrations.RunSQL( + sql_create, + reverse_sql=migrations.RunSQL.noop, + ), + migrations.RunSQL( + sql_rename, + reverse_sql=migrations.RunSQL.noop, + ), + migrations.RunSQL( + sql_drop, + reverse_sql=migrations.RunSQL.noop, + ), + ] diff --git a/glam/api/models.py b/glam/api/models.py index 2ebdb3bcb..67fff8e14 100644 --- a/glam/api/models.py +++ b/glam/api/models.py @@ -126,6 +126,8 @@ class AbstractDesktopAggregation(models.Model): histogram = models.TextField(null=True, blank=True) percentiles = models.TextField(null=True, blank=True) total_sample = models.BigIntegerField(null=True, blank=True) + non_norm_histogram = models.TextField(null=True, blank=True) + non_norm_percentiles = models.TextField(null=True, blank=True) # TODO: Update these fields to not allow NULLs. class Meta: diff --git a/glam/api/views.py b/glam/api/views.py index d51b0c947..84b0561e0 100644 --- a/glam/api/views.py +++ b/glam/api/views.py @@ -127,7 +127,13 @@ def get_firefox_aggregations(request, **kwargs): "total_users": row.total_users, "sample_count": row.total_sample, "histogram": row.histogram and orjson.loads(row.histogram) or "", + "non_norm_histogram": row.non_norm_histogram + and orjson.loads(row.non_norm_histogram) + or "", "percentiles": row.percentiles and orjson.loads(row.percentiles) or "", + "non_norm_percentiles": row.non_norm_percentiles + and orjson.loads(row.non_norm_percentiles) + or "", } if row.client_agg_type: if row.metric_type == "boolean": diff --git a/glam/tests/test_api.py b/glam/tests/test_api.py index d36e6c6c7..a7ee2adb5 100644 --- a/glam/tests/test_api.py +++ b/glam/tests/test_api.py @@ -50,6 +50,23 @@ def _create_aggregation(data=None, multiplier=1.0, model=None): "95": 95 * multiplier, } ), + "non_norm_histogram": json.dumps( + { + "0": round(10.00001111 * multiplier, 4), + "1": round(20.00002222 * multiplier, 4), + "2": round(30.00003333 * multiplier, 4), + "3": round(40.00004444 * multiplier, 4), + } + ), + "non_norm_percentiles": json.dumps( + { + "5": 5 * multiplier, + "25": 25 * multiplier, + "50": 50 * multiplier, + "75": 75 * multiplier, + "95": 95 * multiplier, + } + ), } if data: _data.update(data) @@ -316,11 +333,24 @@ def test_histogram(self, client): "revision": "", "client_agg_type": "summed-histogram", "histogram": {"0": 100.0001, "1": 200.0002, "2": 300.0003, "3": 400.0004}, + "non_norm_histogram": { + "0": 100.0001, + "1": 200.0002, + "2": 300.0003, + "3": 400.0004, + }, "metric": "gc_ms", "metric_key": "", "metric_type": "histogram-exponential", "os": "*", "percentiles": {"5": 50, "25": 250, "50": 500, "75": 750, "95": 950}, + "non_norm_percentiles": { + "5": 50, + "25": 250, + "50": 500, + "75": 750, + "95": 950, + }, "process": "parent", # "total_addressable_market": 999, "total_users": 1110, @@ -355,11 +385,24 @@ def test_revision_lookup(self, client): "revision": revision, "client_agg_type": "summed-histogram", "histogram": {"0": 100.0001, "1": 200.0002, "2": 300.0003, "3": 400.0004}, + "non_norm_histogram": { + "0": 100.0001, + "1": 200.0002, + "2": 300.0003, + "3": 400.0004, + }, "metric": "gc_ms", "metric_key": "", "metric_type": "histogram-exponential", "os": "*", "percentiles": {"5": 50, "25": 250, "50": 500, "75": 750, "95": 950}, + "non_norm_percentiles": { + "5": 50, + "25": 250, + "50": 500, + "75": 750, + "95": 950, + }, "process": "parent", # "total_addressable_market": 999, "total_users": 1110,