From af67cc5b01e87a52e247c284a48f935092debe72 Mon Sep 17 00:00:00 2001 From: Sanny Ramirez Date: Thu, 1 Jun 2023 20:52:31 +0200 Subject: [PATCH 1/9] set default garbage collector in java options --- bin/supply | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bin/supply b/bin/supply index 5eef947f9..74a4aa697 100755 --- a/bin/supply +++ b/bin/supply @@ -1,3 +1,9 @@ #!/usr/bin/env bash -# Since the Mendix Cloud Foundry buildpack is designed to be used as a final buildpack, this script is empty +BUILD_PATH=$1 + +# setup defaults +mkdir -p "${BUILD_PATH}/.profile.d/" +cat > "${BUILD_PATH}/.profile.d/defaults.sh" < Date: Thu, 1 Jun 2023 21:31:02 +0200 Subject: [PATCH 2/9] set default garbage collector in java options --- bin/supply | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/supply b/bin/supply index 74a4aa697..67132c9b3 100755 --- a/bin/supply +++ b/bin/supply @@ -5,5 +5,6 @@ BUILD_PATH=$1 # setup defaults mkdir -p "${BUILD_PATH}/.profile.d/" cat > "${BUILD_PATH}/.profile.d/defaults.sh" < Date: Mon, 12 Jun 2023 07:59:47 +0200 Subject: [PATCH 3/9] ci: trigger job deploy also on releases/ branches --- .github/workflows/build.yml | 2 +- .github/workflows/deploy.yml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b4bf92689..2d23d7e4c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,7 +29,7 @@ jobs: - name: Get current version id: get-current-version run: | - CURRENT_VERSION_TAG=$(git tag --list --sort=-version:refname "v*" | head -n 1) + CURRENT_VERSION_TAG=$(git tag --list --merged --sort=-version:refname "v*" | head -n 1) echo "::set-output name=current_version_tag::${CURRENT_VERSION_TAG}" - name: Auto-generate future version id: autogenerate-version diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9f84c0f27..99522ce90 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -5,6 +5,7 @@ on: types: [closed] branches: - master + - 'releases/**' jobs: pre: From e98deae28a624abbbf2f29fb93f51c05b226af77 Mon Sep 17 00:00:00 2001 From: Joost Pastoor Date: Tue, 13 Jun 2023 13:31:49 +0200 Subject: [PATCH 4/9] ci: trigger job deploy also on releases/ branches (fixup) --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2d23d7e4c..558adcac4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,7 +29,7 @@ jobs: - name: Get current version id: get-current-version run: | - CURRENT_VERSION_TAG=$(git tag --list --merged --sort=-version:refname "v*" | head -n 1) + CURRENT_VERSION_TAG=$(git tag --list --merged HEAD --sort=-version:refname "v*" | head -n 1) echo "::set-output name=current_version_tag::${CURRENT_VERSION_TAG}" - name: Auto-generate future version id: autogenerate-version From 842ac7dde324c6842aecc217336e64e93bbe5a7f Mon Sep 17 00:00:00 2001 From: ekremsekerci Date: Wed, 17 May 2023 11:41:54 +0200 Subject: [PATCH 5/9] Add filtering capability for metrics ingestion to third party APMs --- README.md | 39 +++++- buildpack/telemetry/metrics.py | 192 +++++++++++++++++--------- buildpack/telemetry/telegraf.py | 4 +- tests/unit/test_micrometer_metrics.py | 143 ++++++++++++++++++- 4 files changed, 309 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index b4e5f5d0d..de0e9d68e 100644 --- a/README.md +++ b/README.md @@ -583,7 +583,7 @@ Note that: ## Telemetry Configuration -The buildpack includes a variety of telemetry agents, and can configure logging for the Mendix Runtime. +The buildpack includes a variety of telemetry agents that can be configured to collect and forward metrics/logs from the Mendix Runtime. ### New Relic @@ -829,6 +829,43 @@ Example (1000 loglines/second): cf set-env LOG_RATELIMIT '1000' ``` +### Custom Runtime Metrics filtering + +For the third-party integrations explained above, in addition to the metrics collected by the agents, custom runtime metrics are provided via telegraf. +This configuration also has a filtering mechanism that allows users to specify metrics they allow or deny for the vendor they are using. +To filter the ingestion of custom runtime metrics to third party APMs, users should provide a list of prefixes of the metrics they want to allow/deny using the environment variables listed below. + +Note: Custom database metrics cannot be filtered by name, to turn them off, the `APPMETRICS_INCLUDE_DB` environment variable should be set to false. + +#### APM_METRICS_FILTER_ALLOW + +Comma-separated list of prefixes for the metrics to be allowed. By default, all metrics are allowed, even if they are not specified via this env var. + +For example, to allow only the session metrics, `APM_METRICS_FILTER_ALLOW` should be set to `mx.runtime.stats.sessions`: + +```shell +cf set-env APM_METRICS_FILTER_ALLOW 'mx.runtime.stats.sessions' +``` + +#### APM_METRICS_FILTER_DENY + +Comma-separated list of prefixes for the metrics to be denied. + +For example, to deny all metrics starting with jetty or mx.runtime, the environment variable should be set to `jetty,mx.runtime`: + +```shell +cf set-env APM_METRICS_FILTER_DENY 'jetty,mx.runtime' +``` + +#### APM_METRICS_FILTER_DENY_ALL + +If this environment variable is set to `true`, all metrics will be denied regardless of values of `APM_METRICS_FILTER_ALLOW`, `APM_METRICS_FILTER_DENY`, and `APPMETRICS_INCLUDE_DB`. + +```shell +cf set-env APM_METRICS_FILTER_DENY_ALL true +``` + + ## Using the Buildpack without an Internet Connection If you are running Cloud Foundry without a connection to the Internet, you should specify an on-premises web server that hosts Mendix Runtime files and other buildpack dependencies. You can set the endpoint with the following environment variable: diff --git a/buildpack/telemetry/metrics.py b/buildpack/telemetry/metrics.py index a20035458..703984913 100644 --- a/buildpack/telemetry/metrics.py +++ b/buildpack/telemetry/metrics.py @@ -20,69 +20,6 @@ from . import datadog, appdynamics, dynatrace -# Runtime configuration for influx registry -# This enables the new stream of metrics coming from micrometer instead -# of the admin port. -# https://docs.mendix.com/refguide/metrics#registries-configuration -# NOTE: Metrics are usually dot separated. But each registry has its -# own naming format. For instance, a metric like -# `a.name.like.this` would appear as `a_name_like_this` in -# influx-formatted metrics output. Hence the filter names uses the -# dot-separated metric names. -INFLUX_REGISTRY = { - "type": "influx", - "settings": { - "uri": "http://localhost:8086", - "db": "mendix", - "step": "10s", - }, - "filters": [ - # Login metrics needs to be enabled explicitly as it's disabled - # by default - { - "type": "nameStartsWith", - "result": "accept", - "values": ["mx.runtime.user.login"], - }, - # Filter out irrelevant metrics to reduce - # the payload size passed to TSS/TFR - # https://docs.mendix.com/refguide/metrics#filters - { - "type": "nameStartsWith", - "result": "deny", - "values": ["commons.pool", "jvm.buffer"], - }, - ], -} - -STATSD_REGISTRY = { - "type": "statsd", - "settings": {"port": datadog.get_statsd_port()}, -} - -# For freeapps we push only the session & login metrics -FREEAPPS_METRICS_REGISTRY = [ - { - "type": "influx", - "settings": { - "uri": "http://localhost:8086", - "db": "mendix", - "step": "10s", - }, - "filters": [ - { - "type": "nameStartsWith", - "result": "accept", - "values": [ - "mx.runtime.stats.sessions", - "mx.runtime.user.login", - ], - }, - {"type": "nameStartsWith", "result": "deny", "values": [""]}, - ], - } -] - METRICS_REGISTRIES_KEY = "Metrics.Registries" # From this MxRuntime version onwards we gather (available) runtime statistics @@ -186,9 +123,9 @@ def configure_metrics_registry(m2ee): logging.info("Configuring runtime to push metrics to influx via micrometer") if util.is_free_app(): - return FREEAPPS_METRICS_REGISTRY + return [get_freeapps_registry()] - paidapps_registries = [INFLUX_REGISTRY] + paidapps_registries = [get_influx_registry()] if ( datadog.is_enabled() @@ -196,11 +133,134 @@ def configure_metrics_registry(m2ee): or appdynamics.machine_agent_enabled() or dynatrace.is_telegraf_enabled() ): - paidapps_registries.append(STATSD_REGISTRY) + allow_list, deny_list = get_apm_filters() + paidapps_registries.append(get_statsd_registry(allow_list, deny_list)) return paidapps_registries +def get_apm_filters(): + if deny_all_apm_metrics(): + allow_list = [] + deny_list = [""] + else: + allowed_metrics = os.getenv("APM_METRICS_FILTER_ALLOW") + denied_metrics = os.getenv("APM_METRICS_FILTER_DENY") + + if allowed_metrics and (denied_metrics is None): + # if only allowed metrics are specified, deny all the others + denied_metrics = "" + + allow_list = sanitize_metrics_filter(allowed_metrics) + deny_list = sanitize_metrics_filter(denied_metrics) + + logging.info( + "For APM integrations; allowed metric prefixes are: %s, " + "and denied metric prefixes are: %s", + allow_list, + deny_list, + ) + + return allow_list, deny_list + + +def deny_all_apm_metrics(): + return strtobool(os.getenv("APM_METRICS_FILTER_DENY_ALL", default="false")) + + +def sanitize_metrics_filter(metric_filter): + """ + If we use empty string ("") in the filters that we use for statsd registry, + it accepts/denies every metric since we use type as `nameStartsWith`. + To prevent breaking the functionality because of this, we need to make sure + that we pass empty string to the registry filters only if it's intentional. + So, we strip the leading and trailing commas. Additionally we remove all + the white spaces to prevent any unintentional mistakes. + """ + if metric_filter is None: + return [] + return metric_filter.replace(" ", "").strip(",").split(",") + + +def get_influx_registry(): + # Runtime configuration for influx registry + # This enables the new stream of metrics coming from micrometer instead + # of the admin port. + # https://docs.mendix.com/refguide/metrics#registries-configuration + # NOTE: Metrics are usually dot separated. But each registry has its + # own naming format. For instance, a metric like + # `a.name.like.this` would appear as `a_name_like_this` in + # influx-formatted metrics output. Hence the filter names uses the + # dot-separated metric names. + return { + "type": "influx", + "settings": { + "uri": "http://localhost:8086", + "db": "mendix", + "step": "10s", + }, + "filters": [ + # Login metrics needs to be enabled explicitly as it's disabled + # by default + { + "type": "nameStartsWith", + "result": "accept", + "values": ["mx.runtime.user.login"], + }, + # Filter out irrelevant metrics to reduce + # the payload size passed to TSS/TFR + # https://docs.mendix.com/refguide/metrics#filters + { + "type": "nameStartsWith", + "result": "deny", + "values": ["commons.pool", "jvm.buffer"], + }, + ], + } + + +def get_statsd_registry(allow_list, deny_list): + return { + "type": "statsd", + "settings": {"port": datadog.get_statsd_port()}, + "filters": [ + { + "type": "nameStartsWith", + "result": "accept", + "values": allow_list, + }, + { + "type": "nameStartsWith", + "result": "deny", + "values": deny_list, + }, + ], + } + + +def get_freeapps_registry(): + # For freeapps we push only the session & login metrics + return { + "type": "influx", + "settings": { + "uri": "http://localhost:8086", + "db": "mendix", + "step": "10s", + }, + "filters": [ + { + "type": "nameStartsWith", + "result": "accept", + "values": [ + "mx.runtime.stats.sessions", + "mx.runtime.user.login", + ], + }, + {"type": "nameStartsWith", "result": "deny", "values": [""]}, + ], + } + + def bypass_loggregator(): env_var = os.getenv("BYPASS_LOGGREGATOR", "False") # Throws a useful message if you put in a nonsensical value. diff --git a/buildpack/telemetry/telegraf.py b/buildpack/telemetry/telegraf.py index 9e79b9a8b..308603721 100644 --- a/buildpack/telemetry/telegraf.py +++ b/buildpack/telemetry/telegraf.py @@ -75,6 +75,8 @@ def _get_config_file_path(version): def include_db_metrics(): + if metrics.deny_all_apm_metrics(): + return False if util.is_free_app(): # For free apps we are not interested in database metrics return False @@ -169,7 +171,7 @@ def _get_http_outputs(): def _get_db_config(): - if (include_db_metrics() or datadog.get_api_key()) and util.is_cluster_leader(): + if include_db_metrics() and util.is_cluster_leader(): db_config = database.get_config() if db_config and db_config["DatabaseType"] == "PostgreSQL": return db_config diff --git a/tests/unit/test_micrometer_metrics.py b/tests/unit/test_micrometer_metrics.py index 4cba0d282..7d0b4e25d 100644 --- a/tests/unit/test_micrometer_metrics.py +++ b/tests/unit/test_micrometer_metrics.py @@ -97,12 +97,119 @@ def test_paidapps_metrics_registry_statsd(self, is_enabled): "statsd", ) + @patch("buildpack.telemetry.datadog.is_enabled", return_value=True) + def test_apm_metrics_filters_in_registries(self, is_enabled): + with patch.dict( + os.environ, + { + "PROFILE": "some-random-mx-profile", + "APM_METRICS_FILTER_ALLOW": "allowed_metric", + "APM_METRICS_FILTER_DENY": "denied_metric", + }, + ): + result = metrics.configure_metrics_registry(Mock()) + metrics_registries = sorted(result, key=itemgetter("type")) + + # influx registry shouldn't have the filters + self.assertNotIn( + "allowed_metric", metrics_registries[0]["filters"][0]["values"] + ) + + self.assertNotIn( + "denied_metric", metrics_registries[0]["filters"][1]["values"] + ) + + # statsd registry should have the filters + self.assertIn( + "allowed_metric", metrics_registries[1]["filters"][0]["values"] + ) + + self.assertIn( + "denied_metric", metrics_registries[1]["filters"][1]["values"] + ) + + @patch("buildpack.telemetry.datadog.is_enabled", return_value=True) + def test_statsd_registry_when_only_allow_filter_is_provided(self, is_enabled): + with patch.dict( + os.environ, + { + "PROFILE": "some-random-mx-profile", + "APM_METRICS_FILTER_ALLOW": "allowed_metric", + }, + ): + result = metrics.configure_metrics_registry(Mock()) + metrics_registries = sorted(result, key=itemgetter("type")) + + # Allow list should contain allowed_metric + self.assertIn( + "allowed_metric", metrics_registries[1]["filters"][0]["values"] + ) + + # Deny list should be empty to deny all other metrics + self.assertEqual([""], metrics_registries[1]["filters"][1]["values"]) + + @patch("buildpack.telemetry.datadog.is_enabled", return_value=True) + def test_statsd_registry_when_only_deny_filter_is_provided(self, is_enabled): + with patch.dict( + os.environ, + { + "PROFILE": "some-random-mx-profile", + "APM_METRICS_FILTER_DENY": "denied_metric", + }, + ): + result = metrics.configure_metrics_registry(Mock()) + metrics_registries = sorted(result, key=itemgetter("type")) + + # Allow list should be empty list + self.assertEqual([], metrics_registries[1]["filters"][0]["values"]) + + # Deny list should contain denied_metric + self.assertIn( + "denied_metric", metrics_registries[1]["filters"][1]["values"] + ) + + @patch("buildpack.telemetry.datadog.is_enabled", return_value=True) + def test_statsd_registry_when_no_filter_is_provided(self, is_enabled): + with patch.dict( + os.environ, + { + "PROFILE": "some-random-mx-profile", + }, + ): + result = metrics.configure_metrics_registry(Mock()) + metrics_registries = sorted(result, key=itemgetter("type")) + + # Allow list should be empty list + self.assertEqual([], metrics_registries[1]["filters"][0]["values"]) + + # Deny list should contain denied_metric + self.assertEqual([], metrics_registries[1]["filters"][1]["values"]) + + @patch("buildpack.telemetry.datadog.is_enabled", return_value=True) + def test_statsd_registry_when_deny_all_is_set(self, is_enabled): + with patch.dict( + os.environ, + { + "PROFILE": "some-random-mx-profile", + "APM_METRICS_FILTER_ALLOW": "allowed_metric", + "APM_METRICS_FILTER_DENY": "denied_metric", + "APM_METRICS_FILTER_DENY_ALL": "true", + }, + ): + result = metrics.configure_metrics_registry(Mock()) + metrics_registries = sorted(result, key=itemgetter("type")) + + # statsd registry should be configured to deny everything + self.assertEqual([], metrics_registries[1]["filters"][0]["values"]) + + self.assertEqual([""], metrics_registries[1]["filters"][1]["values"]) + def test_freeapps_metrics_registry(self): with patch.dict(os.environ, {"PROFILE": "free"}): result = metrics.configure_metrics_registry(Mock()) self.assertEqual( result, - metrics.FREEAPPS_METRICS_REGISTRY, + [metrics.get_freeapps_registry()], ) @@ -114,3 +221,37 @@ def test_paidapps_less_than_9_7(self, dd_is_enabled, mocked_runtime_version): result = metrics.configure_metrics_registry(Mock()) # nothing to configure for apps below 9.7.0 assert result == [] + + +class TestAPMMetricsFilterSanitization(TestCase): + @parameterized.expand( + [ + [ + "metric_1,metric_2", + ["metric_1", "metric_2"], + ], + [ + "metric_1, metric_2", + ["metric_1", "metric_2"], + ], + [ + "metric_1, metric_2,", + ["metric_1", "metric_2"], + ], + [ + "metric_1,metric_2 ", + ["metric_1", "metric_2"], + ], + [ + " metric_1,metric_2", + ["metric_1", "metric_2"], + ], + [ + "", + [""], + ], + ] + ) + def test_sanitize_metrics_filter(self, input_string, expected_list): + actual_list = metrics.sanitize_metrics_filter(input_string) + self.assertEqual(expected_list, actual_list) From 5ec51bf534171170a1be30f64bf24c09e7b682ec Mon Sep 17 00:00:00 2001 From: ekremsekerci Date: Tue, 13 Jun 2023 11:08:25 +0200 Subject: [PATCH 6/9] Reduce metric_batch_size to 2500 in telegraf config --- etc/telegraf/telegraf.toml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/telegraf/telegraf.toml.j2 b/etc/telegraf/telegraf.toml.j2 index a78032c6d..88f0fbbbe 100644 --- a/etc/telegraf/telegraf.toml.j2 +++ b/etc/telegraf/telegraf.toml.j2 @@ -430,7 +430,7 @@ # Higher flush interval and batch size, so that we # don't bombard TFR and DataLake with too many requests, but # few requests with bigger payloads - metric_batch_size = 3000 + metric_batch_size = 2500 flush_interval = "30s" ## HTTP method, one of: "POST" or "PUT" From 8a18aa11a693ab8f94a5a0031a0818f009d2bf63 Mon Sep 17 00:00:00 2001 From: Matheus Svolenski Date: Wed, 14 Jun 2023 14:29:31 +0200 Subject: [PATCH 7/9] ci: trigger job test also on releases/ branches --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 488c02fd3..46a47d70d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,6 +8,7 @@ on: branches: - develop - master + - 'releases/**' concurrency: group: ${{ github.workflow }}-${{ github.ref }} From 42c6e3bb3e593bd1af6c5436ebe65ac9c54f2f8e Mon Sep 17 00:00:00 2001 From: Sanny Ramirez Date: Mon, 19 Jun 2023 14:26:25 +0200 Subject: [PATCH 8/9] set garbage collector based on container memory or provided env variable --- buildpack/core/java.py | 45 +++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/buildpack/core/java.py b/buildpack/core/java.py index 5481f4c4a..f96452e3e 100644 --- a/buildpack/core/java.py +++ b/buildpack/core/java.py @@ -11,6 +11,8 @@ JAVA_VERSION_OVERRIDE_KEY = "JAVA_VERSION" +DEFAULT_GC_COLLECTOR = "Serial" +SUPPORTED_GC_COLLECTORS = ["Serial", "G1"] def get_java_major_version(runtime_version): @@ -282,13 +284,7 @@ def _set_user_provided_java_options(m2ee): def _set_jvm_memory(m2ee, vcap): - max_memory = os.environ.get("MEMORY_LIMIT") - - if max_memory: - match = re.search("([0-9]+)M", max_memory.upper()) - limit = int(match.group(1)) - else: - limit = int(vcap["limits"]["mem"]) + limit = get_memory_limit(vcap) if limit >= 32768: heap_size = limit - 4096 @@ -335,6 +331,40 @@ def _set_jvm_memory(m2ee, vcap): ) +def _set_garbage_collector(m2ee, vcap_data): + limit = get_memory_limit(vcap_data) + + jvm_garbage_collector = DEFAULT_GC_COLLECTOR + if limit >= 4096: + # override collector if memory > 4G + jvm_garbage_collector = "G1" + + env_jvm_garbage_collector = os.getenv("JVM_GARBAGE_COLLECTOR") + if env_jvm_garbage_collector: + if env_jvm_garbage_collector in SUPPORTED_GC_COLLECTORS: + # override from user-provided variable + jvm_garbage_collector = env_jvm_garbage_collector + else: + logging.warning("Unsupported jvm garbage collector found. The specified garbage collector [%s] " + "is not supported. JVM garbage collector type falling back to default [%s]", + env_jvm_garbage_collector, jvm_garbage_collector) + + util.upsert_javaopts(m2ee, f"-XX:+Use{jvm_garbage_collector}GC") + + logging.info("JVM garbage collector is set to [%s]", jvm_garbage_collector) + + +def get_memory_limit(vcap): + max_memory = os.environ.get("MEMORY_LIMIT") + + if max_memory: + match = re.search("([0-9]+)M", max_memory.upper()) + limit = int(match.group(1)) + else: + limit = int(vcap["limits"]["mem"]) + return limit + + def _set_application_name(m2ee, application_name): util.upsert_javaopts(m2ee, f"-DapplicationName={application_name}") @@ -342,5 +372,6 @@ def _set_application_name(m2ee, application_name): def update_config(m2ee, application_name, vcap_data, runtime_version): _set_application_name(m2ee, application_name) _set_jvm_memory(m2ee, vcap_data) + _set_garbage_collector(m2ee, vcap_data) _set_jvm_locale(m2ee, get_java_major_version(runtime_version)) _set_user_provided_java_options(m2ee) From d72d4ae9a82c68abccc495b75d8ef18574b5f5a4 Mon Sep 17 00:00:00 2001 From: Sanny Ramirez Date: Mon, 19 Jun 2023 14:30:09 +0200 Subject: [PATCH 9/9] set garbage collector based on container memory or provided env variable --- bin/supply | 9 +-------- buildpack/core/java.py | 5 +++-- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/bin/supply b/bin/supply index 67132c9b3..5eef947f9 100755 --- a/bin/supply +++ b/bin/supply @@ -1,10 +1,3 @@ #!/usr/bin/env bash -BUILD_PATH=$1 - -# setup defaults -mkdir -p "${BUILD_PATH}/.profile.d/" -cat > "${BUILD_PATH}/.profile.d/defaults.sh" <