From 93da9b54507cbdfc0f6134a02452941ce95ba1c5 Mon Sep 17 00:00:00 2001 From: Maurizio Branca Date: Tue, 26 Mar 2024 09:35:21 +0100 Subject: [PATCH] Add subobjects definition (at the data stream level) (#727) ## What does this PR do? Add support for `subobjects: false` at the data stream level. Here is an example: ```yaml # From /packages/good_v3/data_stream/subobjects/manifest.yml title: my-data-stream type: logs elasticsearch: index_template: mappings: subobjects: true ``` ## Why is it important? Give integration developers (per data stream) access to the [subobjects](https://www.elastic.co/guide/en/elasticsearch/reference/current/subobjects.html) option in the integration's index template mappings. Since we added the `subobjects` option in stack version 8.3, users could customize how Elasticsearch handles fields that contain dots in their names from `true` (expanded, current default) to `false` (not expanded). However, integration developers could not set this up in the integrations. Note on per filed option: the `subobjects` option [has been available](https://github.com/elastic/package-spec/pull/573) at the field level since package-spec 3.1.0. However, to make this happen at the data stream level, we needed https://github.com/elastic/elasticsearch/issues/99860 to land in Elasticsearch. ## Checklist - [x] I have added test packages to [`test/packages`](https://github.com/elastic/package-spec/tree/main/test/packages) that prove my change is effective. - [x] I have added an entry in [`spec/changelog.yml`](https://github.com/elastic/package-spec/blob/main/spec/changelog.yml). ## Related issues - https://github.com/elastic/package-spec/issues/349 - https://github.com/elastic/package-spec/pull/573 - https://github.com/elastic/elasticsearch/issues/99860 (requirement) --------- Co-authored-by: Mario Rodriguez Molins --- spec/changelog.yml | 5 ++ spec/integration/_dev/build/build.spec.yml | 2 +- .../integration/data_stream/manifest.spec.yml | 63 ++++++++++++++++++- .../data_stream/subobjects/manifest.yml | 4 ++ test/packages/good_v3/manifest.yml | 2 +- 5 files changed, 73 insertions(+), 3 deletions(-) diff --git a/spec/changelog.yml b/spec/changelog.yml index 870c951dd..7ee1171a8 100644 --- a/spec/changelog.yml +++ b/spec/changelog.yml @@ -2,6 +2,11 @@ ## This file documents changes in the package specification. It is NOT a package specification file. ## Newer entries go at the bottom of each in-development version. ## +- version: 3.2.0-next + changes: + - description: Add subobjects definition (at the data stream level) + type: enhancement + link: https://github.com/elastic/package-spec/pull/727 - version: 3.1.3-next changes: - description: Prepare for next version diff --git a/spec/integration/_dev/build/build.spec.yml b/spec/integration/_dev/build/build.spec.yml index 121864708..37ab0e72f 100644 --- a/spec/integration/_dev/build/build.spec.yml +++ b/spec/integration/_dev/build/build.spec.yml @@ -32,7 +32,7 @@ spec: - dependencies # JSON patches for newer versions should be placed on top versions: - - before: 3.1.3 + - before: 3.2.0 patch: - op: remove path: "/properties/dependencies/properties/ecs/properties/import_mappings/deprecated" diff --git a/spec/integration/data_stream/manifest.spec.yml b/spec/integration/data_stream/manifest.spec.yml index 666610b5d..f06a87dac 100644 --- a/spec/integration/data_stream/manifest.spec.yml +++ b/spec/integration/data_stream/manifest.spec.yml @@ -279,7 +279,6 @@ spec: examples: - ["strict_date_optional_time","yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z"] # This is the default if not set. - ["MM/dd/yyyy"] - dynamic_templates: type: array items: @@ -353,6 +352,64 @@ spec: $ref: "./fields/fields.spec.yml#/items/properties/scaling_factor" type: $ref: "./fields/fields.spec.yml#/items/properties/type" + subobjects: + description: > + Turn the support for subobjects on or off in the mapping + configuration for the data stream. + + By default, it's set to `true`, enabling Elasticsearch to + expands fields that contain dots in their names + to their corresponding object structure. + + Setting it to `false` restricts objects + from holding subobjects, enabling storing documents + with dot-contained field names and common prefixes. + + It is still possible to send documents that have + objects to a data stream that has set subobjects + to `false`. + + Example when the subobjects mapping option is `false`: + + PUT my-index-000001 + { + "mappings": { + "subobjects": false + } + } + + PUT my-index-000001/_doc/metric_1 + { + "time" : "100ms", + "time.min" : "10ms", + "time.max" : "900ms" + } + + Before disabling subobjects, consider the following implications: + + - Nested field types cannot be used in data streams. + - The subobjects mapping definition is immutable. + - This setting depends on auto-flattening mappings, + which comes with limitations [^1] for integration + and custom mappings in data streams without + subobjects. + - Auto-flattening is available starting from version + 8.14, integrations must be on at least this + version. + + For comprehensive details on subobjects, refer to + the Elasticsearch documentation [^2]. + + + [^1]: Auto-flattening limitations are documented at: + https://www.elastic.co/guide/en/elasticsearch/reference/master/subobjects.html#_auto_flattening_object_mappings + + [^2]: Elasticsearch documentation on subobjects: + https://www.elastic.co/guide/en/elasticsearch/reference/current/subobjects.html + + type: boolean + default: true + ingest_pipeline: description: Elasticsearch ingest pipeline settings type: object @@ -515,6 +572,10 @@ spec: - title # JSON patches for newer versions should be placed on top versions: + - before: 3.2.0 + patch: + - op: remove + path: /definitions/elasticsearch_index_template/properties/mappings/properties/subobjects - before: 3.0.2 patch: # Required secret for variables that look like secrets. diff --git a/test/packages/good_v3/data_stream/subobjects/manifest.yml b/test/packages/good_v3/data_stream/subobjects/manifest.yml index 1a3b1aea7..32a327683 100644 --- a/test/packages/good_v3/data_stream/subobjects/manifest.yml +++ b/test/packages/good_v3/data_stream/subobjects/manifest.yml @@ -1,5 +1,9 @@ title: subojects type: logs +elasticsearch: + index_template: + mappings: + subobjects: true streams: - input: logfile vars: diff --git a/test/packages/good_v3/manifest.yml b/test/packages/good_v3/manifest.yml index edbfef5ce..648fe11e6 100644 --- a/test/packages/good_v3/manifest.yml +++ b/test/packages/good_v3/manifest.yml @@ -1,4 +1,4 @@ -format_version: 3.1.0 +format_version: 3.2.0 name: good_v3 title: Good package description: This package is good for format version 3