From 978dcddcb04581fe1c4d0dd9cd607a1bee150429 Mon Sep 17 00:00:00 2001 From: Melissa Vagi Date: Tue, 9 Jul 2024 16:37:21 -0600 Subject: [PATCH 1/5] Add composite aggregation Signed-off-by: Melissa Vagi --- _aggregations/bucket/composite.md | 79 +++++ _aggregations/bucket/value-sources.md | 467 ++++++++++++++++++++++++++ 2 files changed, 546 insertions(+) create mode 100644 _aggregations/bucket/composite.md create mode 100644 _aggregations/bucket/value-sources.md diff --git a/_aggregations/bucket/composite.md b/_aggregations/bucket/composite.md new file mode 100644 index 0000000000..09704f3840 --- /dev/null +++ b/_aggregations/bucket/composite.md @@ -0,0 +1,79 @@ +--- +layout: default +title: Composite +parent: Bucket aggregations +grand_parent: Aggregations +nav_order: 20 +has_children: true +--- + +# Composite + +The `composite` aggregation is a multi-bucket aggregation that creates composite buckets from different sources. It is useful for efficiently paginating multi-level aggregations and retrieving all buckets. Composite buckets are built from combinations of values extracted from documents for each specified source field. + +## Syntax + +```json +{ + "composite": { + "sources": [ + { + "source_field_1": { + "terms": { + "field": "field_name" + } + } + }, + { + "source_field_2": { + "terms": { + "field": "another_field_name" + } + } + } + ] + } +} +``` +{% include copy-curl.html %} + +Property | Description | +---------|------------| +`composite` | The aggregation type. +`sources ` | An array of source objects, where each object defines a source field for the composite buckets. +`terms` | The subaggregation type used to extract the values from the specified field for each source. +`field` | The field name in your documents from which the values will be extracted for the corresponding source. + +For example, consider the following document: + +```json +{ + "product": "T-Shirt", + "category": "Clothing", + "brand": "Acme", + "price": 19.99, + "sizes": ["S", "M", "L"], + "colors": ["red", "blue"] +} +``` +{% include copy-curl.html %} + +Using `sizes` and `colors` as source fields for the aggregation results in the following composite buckets: + +```json +{ "sizes": "S", "colors": "red" } +{ "sizes": "S", "colors": "blue" } +{ "sizes": "M", "colors": "red" } +{ "sizes": "M", "colors": "blue" } +{ "sizes": "L", "colors": "red" } +{ "sizes": "L", "colors": "blue" } +``` +{% include copy-curl.html %} + +## Compatibility and limitations + + + +## Performance considerations + + diff --git a/_aggregations/bucket/value-sources.md b/_aggregations/bucket/value-sources.md new file mode 100644 index 0000000000..5396e86216 --- /dev/null +++ b/_aggregations/bucket/value-sources.md @@ -0,0 +1,467 @@ +--- +layout: default +title: Value sources +parent: Composite +grand_parent: Bucket aggregations +great_grand_parent: Aggregations +nav_order: 5 +--- + +# Value sources + +The `sources` parameter defines the source fields to use when building composite buckets. The order in which the sources are defined controls the order in which the keys are returned in the composite buckets. You must use a unique name when defining sources for the composite aggregation. + +The `sources` parameter can be any of the following: terms, histogram, date histogram, or geotile grid. + +## Terms + +The `terms` value source functions similarly to a regular `terms` aggregation. It extracts values from a document or a script, and each unique value becomes a bucket in the `composite` aggregation. For example, the following request uses the single value source `product` to create a bucket for each unique value of the `product` field in your dataset: + +```json +GET /test_index/_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "product": { "terms": { "field": "product.keyword" } } } + ] + } + } + } +} +``` +{% include copy-curl.html %} + + +
+   +    Response +   +  {: .text-delta} + +#### Example response +```json +{ + "took": 30, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "my_buckets": { + "after_key": { + "product": "T-Shirt" + }, + "buckets": [ + { + "key": { + "product": "Jeans" + }, + "doc_count": 1 + }, + { + "key": { + "product": "Sneakers" + }, + "doc_count": 1 + }, + { + "key": { + "product": "T-Shirt" + }, + "doc_count": 1 + } + ] + } + } +} +``` + +
+ + +Similar to the regular `terms` aggregation, the composite aggregation allows you to generate bucket values using a script. For example, in the following request, instead of directly referencing the `product` field in the `terms` aggregation, the script is used to retrieve the field value: + +```json +GET /test_index/_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { + "product": { + "terms": { + "script": { + "source": "doc['product.keyword'].value", + "lang": "painless" + } + } + } + } + ] + } + } + } +} +``` +{% include copy-curl.html %} + + +
+   +    Response +   +  {: .text-delta} + +#### Example response +```json +{ + "took": 47, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "my_buckets": { + "after_key": { + "product": "T-Shirt" + }, + "buckets": [ + { + "key": { + "product": "Jeans" + }, + "doc_count": 1 + }, + { + "key": { + "product": "Sneakers" + }, + "doc_count": 1 + }, + { + "key": { + "product": "T-Shirt" + }, + "doc_count": 1 + } + ] + } + } +} +``` + +
+ + +--- + +## Histogram + +The `histogram` value source feature enables you to create fixed-size intervals for numeric data. The interval parameter determines how the numeric values will be grouped. For example, if you set the interval to `5`, any numeric value will be assigned to the closest interval range. So, a value of `101` would be placed in the interval range of `100` to `105`, with `100` serving as the key for that range. For example, the following query performs a composite aggregation using a `histogram` value source on the `price` field. + +```json +GET /test_index/_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "histo": { "histogram": { "field": "price", "interval": 5 } } } + ] + } + } + } +} +``` +{% include copy-curl.html %} + + +
+   +    Response +   +  {: .text-delta} + +#### Example response +```json +{ + "took": 8, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "my_buckets": { + "after_key": { + "histo": 75 + }, + "buckets": [ + { + "key": { + "histo": 15 + }, + "doc_count": 1 + }, + { + "key": { + "histo": 45 + }, + "doc_count": 1 + }, + { + "key": { + "histo": 75 + }, + "doc_count": 1 + } + ] + } + } +} +``` + +
+ + +You can use a numeric field from your data or implement a script that calculates and returns numerical values to populate the values. For example, the following query buckets all documents into histogram ranges of the `price` field with an interval of `5`, allowing you to analyze the distribution and count of documents across different price ranges. + +```json +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { + "histo": { + "histogram": { + "interval": 5, + "script": { + "source": "doc['price'].value", + "lang": "painless" + } + } + } + } + ] + } + } + } +} +``` +{% include copy-curl.html %} + + +
+   +    Response +   +  {: .text-delta} + +#### Example response +```json +{ + "took": 51, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "my_buckets": { + "after_key": { + "histo": 75 + }, + "buckets": [ + { + "key": { + "histo": 15 + }, + "doc_count": 1 + }, + { + "key": { + "histo": 45 + }, + "doc_count": 1 + }, + { + "key": { + "histo": 75 + }, + "doc_count": 1 + } + ] + } + } +} +``` + +
+ +--- + +## Date histogram + +The `date_histogram` value source functions similarly to the `histogram` value source, but instead of using a numeric interval, it employs a date/time expression to define the interval for grouping date/time values. For example, the following query performs a composite aggregation on a date field (`timestamp`) using the `date_histogram` value source: + +```json +GET /test_index/_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d" } } } + ] + } + } + } +} +``` +{% include copy-curl.html %} + +
+   +    Response +   +  {: .text-delta} + +#### Example response +```json +{ + "took": 56, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "my_buckets": { + "buckets": [] + } + } +} +``` + +
+ +--- + +### Geotile grid + +For `geo_point` data, the `geotile_grid` value source provides a way to aggregate points into buckets that correspond to cells in a grid. Each cell is labeled with a `"{zoom}/{x}/{y}"` format, where zoom is set to the specified precision value. For example, the following query performs a composite aggregation on a `date` field called `timestamp`. It groups the documents based on the day (`calendar_interval` of `1d`) using the `date_histogram` source. + +```json +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d" } } } + ] + } + } + } +} +``` +{% include copy-curl.html %} + + +
+   +    Response +   +  {: .text-delta} + +#### Example response +```json +{ + "took": 34, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "my_buckets": { + "buckets": [] + } + } +} +``` + +
\ No newline at end of file From c4144e0136b79745f82adcbcedf80e65826c29ea Mon Sep 17 00:00:00 2001 From: Melissa Vagi Date: Tue, 9 Jul 2024 17:04:48 -0600 Subject: [PATCH 2/5] Add composite aggregation Signed-off-by: Melissa Vagi --- _aggregations/bucket/mixing-value-sources.md | 137 +++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 _aggregations/bucket/mixing-value-sources.md diff --git a/_aggregations/bucket/mixing-value-sources.md b/_aggregations/bucket/mixing-value-sources.md new file mode 100644 index 0000000000..005e883bed --- /dev/null +++ b/_aggregations/bucket/mixing-value-sources.md @@ -0,0 +1,137 @@ +--- +layout: default +title: Mixing value sources +parent: Composite +grand_parent: Bucket aggregations +great_grand_parent: Aggregations +nav_order: 10 +--- + +# Mixing value sources + +The `sources` parameter in the composite aggregation defines the source fields and aggregation types to use when building composite buckets. You can mix and match multiple value sources, such as `terms`, `histogram`, `date_histogram`, and `geotile_grid`, to create unique combinations of data aggregations. + +The order in which the sources are defined controls the order in which the keys are returned in the composite buckets. You must use a unique name when defining sources for the composite aggregation. + +--- + +## Example: Mixing terms and histogram value sources + +The following example creates composite buckets that combine the `product` field (using the `terms` value source) and the `price` field (using the `histogram` value source): + +```json +GET /test_index/_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "product": { "terms": { "field": "product.keyword" } } }, + { "price_range": { "histogram": { "field": "price", "interval": 10 } } } + ] + } + } + } +} +``` +{% include copy-curl.html %} + +This query defines two value sources: + +- `product`: This source uses the terms value source to create buckets for each unique value of the `product.keyword` field. +- `price_range`: This source uses the `histogram` value source to create buckets based on the `price` field, grouped into intervals of `10`. + +The resulting composite buckets will have a structure similar to the following example: + +```json +{ + "data": [ + { + "key": { + "product": "Jeans", + "price_range": 40 + }, + "doc_count": 1 + }, + { + "key": { + "product": "Sneakers", + "price_range": 70 + }, + "doc_count": 1 + }, + { + "key": { + "product": "T-Shirt", + "price_range": 10 + }, + "doc_count": 1 + } + ] +} +``` +Each composite bucket will contain the product name and the corresponding price range, allowing you to analyze the distribution of products across different price ranges. + +--- + +## Example: Mixing date histogram and geotile grid value source + +The following example combines the `date_histogram` and `geotile_grid` value sources to create composite buckets based on timestamps and geographic locations: + +```json +GET /test_index/_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d" } } }, + { "location": { "geotile_grid": { "field": "location", "precision": 3 } } } + ] + } + } + } +} +``` +{% include copy-curl.html %} + +This query defines two value sources: + +- `date`: This source uses the `date_histogram` value source to group documents based on the day of the `timestamp` field. +- `location`: This source uses the `geotile_grid` value source to aggregate `geo_point` data into buckets that correspond to cells in a grid, with a precision of `3`. + +The resulting composite buckets will have a structure similar to the following example: + +```json +{ + "took": 34, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "my_buckets": { + "buckets": [] + } + } +} +``` + +## Considerations + +When mixing value sources in the `composite` aggregation, keep the following point in mind: + +- \ No newline at end of file From 13d7217a20bc3a8229316729d041e18aec748151 Mon Sep 17 00:00:00 2001 From: Melissa Vagi Date: Tue, 9 Jul 2024 17:06:41 -0600 Subject: [PATCH 3/5] Add composite aggregation Signed-off-by: Melissa Vagi --- _aggregations/bucket/ordering-composite-buckets.md | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 _aggregations/bucket/ordering-composite-buckets.md diff --git a/_aggregations/bucket/ordering-composite-buckets.md b/_aggregations/bucket/ordering-composite-buckets.md new file mode 100644 index 0000000000..ccb7fc4b61 --- /dev/null +++ b/_aggregations/bucket/ordering-composite-buckets.md @@ -0,0 +1,8 @@ +--- +layout: default +title: Ordering composite buckets +parent: Composite +grand_parent: Bucket aggregations +great_grand_parent: Aggregations +nav_order: 15 +--- \ No newline at end of file From 621d0c050f73bcde3b292d18b71981b87efe7f64 Mon Sep 17 00:00:00 2001 From: Melissa Vagi Date: Tue, 9 Jul 2024 17:10:09 -0600 Subject: [PATCH 4/5] Add composite aggregation Signed-off-by: Melissa Vagi --- _aggregations/bucket/early-termination.md | 8 ++++++++ _aggregations/bucket/missing-bucket.md | 8 ++++++++ _aggregations/bucket/size-pagination.md | 8 ++++++++ _aggregations/bucket/subaggregations.md | 8 ++++++++ 4 files changed, 32 insertions(+) create mode 100644 _aggregations/bucket/early-termination.md create mode 100644 _aggregations/bucket/missing-bucket.md create mode 100644 _aggregations/bucket/size-pagination.md create mode 100644 _aggregations/bucket/subaggregations.md diff --git a/_aggregations/bucket/early-termination.md b/_aggregations/bucket/early-termination.md new file mode 100644 index 0000000000..db8c2d2d86 --- /dev/null +++ b/_aggregations/bucket/early-termination.md @@ -0,0 +1,8 @@ +--- +layout: default +title: Early termination +parent: Composite +grand_parent: Bucket aggregations +great_grand_parent: Aggregations +nav_order: 35 +--- \ No newline at end of file diff --git a/_aggregations/bucket/missing-bucket.md b/_aggregations/bucket/missing-bucket.md new file mode 100644 index 0000000000..efa29a95dd --- /dev/null +++ b/_aggregations/bucket/missing-bucket.md @@ -0,0 +1,8 @@ +--- +layout: default +title: Missing bucket +parent: Composite +grand_parent: Bucket aggregations +great_grand_parent: Aggregations +nav_order: 20 +--- \ No newline at end of file diff --git a/_aggregations/bucket/size-pagination.md b/_aggregations/bucket/size-pagination.md new file mode 100644 index 0000000000..bd8c734cc3 --- /dev/null +++ b/_aggregations/bucket/size-pagination.md @@ -0,0 +1,8 @@ +--- +layout: default +title: Size and pagination +parent: Composite +grand_parent: Bucket aggregations +great_grand_parent: Aggregations +nav_order: 25 +--- \ No newline at end of file diff --git a/_aggregations/bucket/subaggregations.md b/_aggregations/bucket/subaggregations.md new file mode 100644 index 0000000000..68dea3c4e6 --- /dev/null +++ b/_aggregations/bucket/subaggregations.md @@ -0,0 +1,8 @@ +--- +layout: default +title: Subaggregations +parent: Composite +grand_parent: Bucket aggregations +great_grand_parent: Aggregations +nav_order: 30 +--- \ No newline at end of file From 1fdb04dd9bcfbda96a108ed0a2f6abe923a0943a Mon Sep 17 00:00:00 2001 From: Melissa Vagi Date: Wed, 10 Jul 2024 16:15:58 -0600 Subject: [PATCH 5/5] Add composite aggregations content Signed-off-by: Melissa Vagi --- _aggregations/bucket/early-termination.md | 148 ++++++- _aggregations/bucket/missing-bucket.md | 121 +++++- .../bucket/ordering-composite-buckets.md | 8 - _aggregations/bucket/size-pagination.md | 388 +++++++++++++++++- .../bucket/sort-composite-buckets.md | 137 +++++++ _aggregations/bucket/subaggregations.md | 144 ++++++- 6 files changed, 930 insertions(+), 16 deletions(-) delete mode 100644 _aggregations/bucket/ordering-composite-buckets.md create mode 100644 _aggregations/bucket/sort-composite-buckets.md diff --git a/_aggregations/bucket/early-termination.md b/_aggregations/bucket/early-termination.md index db8c2d2d86..9240d76355 100644 --- a/_aggregations/bucket/early-termination.md +++ b/_aggregations/bucket/early-termination.md @@ -1,8 +1,152 @@ --- layout: default -title: Early termination +title: Optimizing composite aggregations with early termination parent: Composite grand_parent: Bucket aggregations great_grand_parent: Aggregations nav_order: 35 ---- \ No newline at end of file +--- + +# Optimizing composite aggregations with early termination + +Composite aggregations can be optimized for better performance by using the early termination feature. Early termination stops processing the aggregation as soon as it has found all the relevant buckets. + +## Setting the index sort + +To enable early termination, you need to set the `sort.field` and `sort.order` settings on your index. These settings define the order in which the documents are sorted in the index, which should match the order of the sources in your composite aggregation. + +The following example request shows how to set the index sort when creating an index, sorting by `username` in ascending order and then by the `timestamp` field in descending order: + +```json +PUT my-index +{ + "settings": { + "index": { + "sort.field": ["username", "timestamp"], + "sort.order": ["asc", "desc"] + } + }, + "mappings": { + "properties": { + "username": { + "type": "keyword", + "doc_values": true + }, + "timestamp": { + "type": "date" + } + } + } +} +``` +{% include copy-curl.html %} + + +## Ordering sources + +For optimal early termination, composite aggregation sources should be ordered to match the index sort, with higher cardinality sources placed first, followed by lower cardinality sources. The field order within the aggregation must align with the index sort order. + +For example, if the index is sorted by `username` (ascending) and then `timestamp` (descending), your composite aggregation should have the same order similar the following query: + +```json +GET /my-index/_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "user_name": { "terms": { "field": "username" } } }, + { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d", "order": "desc" } } } + ] + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +{ + "took": 10, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 0, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "my_buckets": { + "buckets": [] + } + } +} +``` +{% include copy-curl.html %} + +## Disabling total hit tracking + +To further optimize performance, you can disable the tracking of total hits by setting `track_total_hits` to `false` in your query. This prevents OpenSearch from calculating the total number of matching documents for every page of results. Note that if you need to know the total number of matching documents, you can retrieve it from the first request and skip the calculation for subsequent requests. See the following example query: + +```json +GET /my-index/_search +{ + "size": 0, + "track_total_hits": false, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "user_name": { "terms": { "field": "username" } } }, + { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d", "order": "desc" } } } + ] + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +{ + "took": 13, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "max_score": null, + "hits": [] + }, + "aggregations": { + "my_buckets": { + "buckets": [] + } + } +} +``` +{% include copy-curl.html %} + +## Additional considerations + +Keep in the following considerations in mind when working with this feature: + +- Multi-valued fields cannot be used for early termination, so it is recommended to place them last in the `sources` array. +- Index sorting can potentially slow down indexing operations, so it is important to test the impact of index sorting on your specific use case and dataset. +- If the index is not sorted, composite aggregations will still attempt early termination if the query matches all documents, for example, a `match_all` query. diff --git a/_aggregations/bucket/missing-bucket.md b/_aggregations/bucket/missing-bucket.md index efa29a95dd..efba33f5ff 100644 --- a/_aggregations/bucket/missing-bucket.md +++ b/_aggregations/bucket/missing-bucket.md @@ -1,8 +1,125 @@ --- layout: default -title: Missing bucket +title: Handling missing buckets parent: Composite grand_parent: Bucket aggregations great_grand_parent: Aggregations nav_order: 20 ---- \ No newline at end of file +--- + +## Handling missing buckets + +By default, composite aggregations exclude documents that do not have a value for a particular source. However, you can choose to include these missing values by setting the `missing_bucket` parameter to `true` for the relevant source. + +## Syntax + +The syntax for handling missing values in a composite aggregation requires you to include the `missing_bucket` parameter with a value of `true` within the relevant source definition, as shown in the following example syntax for the `sources` array. + +```json +"sources": [ + { + "NAME": { + "AGGREGATION": { + "field": "FIELD", + "missing_bucket": true + } + } + } +] +``` +{% include copy-curl.html %} + +--- + +## Example + +For example, the following query groups documents by product name using a `terms` aggregation and includes a bucket for documents that do not have a product name specified: + +```json +GET /sales/_search +{ + "size": 0, + "aggs": { + "sales_by_day_product": { + "composite": { + "sources": [ + { + "day": { + "date_histogram": { + "field": "timestamp", + "calendar_interval": "1d", + "order": "desc" + } + } + }, + { + "product": { + "terms": { + "field": "product.keyword", + "order": "asc", + "missing_bucket": true + } + } + } + ] + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +{ + "took": 23, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "sales_by_day_product": { + "after_key": { + "day": 1680307200000, + "product": "Product B" + }, + "buckets": [ + { + "key": { + "day": 1680393600000, + "product": "Product A" + }, + "doc_count": 1 + }, + { + "key": { + "day": 1680307200000, + "product": "Product A" + }, + "doc_count": 1 + }, + { + "key": { + "day": 1680307200000, + "product": "Product B" + }, + "doc_count": 1 + } + ] + } + } +} +``` +{% include copy-curl.html %} \ No newline at end of file diff --git a/_aggregations/bucket/ordering-composite-buckets.md b/_aggregations/bucket/ordering-composite-buckets.md deleted file mode 100644 index ccb7fc4b61..0000000000 --- a/_aggregations/bucket/ordering-composite-buckets.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -layout: default -title: Ordering composite buckets -parent: Composite -grand_parent: Bucket aggregations -great_grand_parent: Aggregations -nav_order: 15 ---- \ No newline at end of file diff --git a/_aggregations/bucket/size-pagination.md b/_aggregations/bucket/size-pagination.md index bd8c734cc3..c2d39060f9 100644 --- a/_aggregations/bucket/size-pagination.md +++ b/_aggregations/bucket/size-pagination.md @@ -1,8 +1,392 @@ --- layout: default -title: Size and pagination +title: Limiting and paginating composite aggregation results parent: Composite grand_parent: Bucket aggregations great_grand_parent: Aggregations nav_order: 25 ---- \ No newline at end of file +--- + +# Limiting and paginating composite aggregation results + +When working with composite aggregations, you may need to limit the number of composite buckets returned or paginate through large result sets. You can achieve this by using the `size` and `after` parameters. + +## Limiting the number of composite buckets with `size` + +The `size` parameter defines the maximum number of composite buckets to be included in the response. Each composite bucket is treated as a single bucket, regardless of the number of value sources used to create it. + +### Syntax + +```json +"composite": { + "size": NUMBER, + "sources": [ + { + "NAME": { + "AGGREGATION": { + "field": "FIELD" + } + } + }, + ... + ] +} +``` +{% include copy-curl.html %} + + +## Paginating composite aggregation results with `after` + +When handling large amounts of data or when you need to display the results in a paginated user interface, you can use the `after` parameter in combination with the `size` parameter to retrieve composite buckets in smaller chunks or pages. By combining `size` and `after`, you can efficiently control the number of composite buckets returned and paginate through large result sets. + +### Syntax + +```json +"composite": { + "size": NUMBER, + "after": ["VALUE_SOURCE_1", "VALUE_SOURCE_2", ...], + "sources": [ + { + "NAME": { + "AGGREGATION": { + "field": "FIELD" + } + } + }, + ... + ] +} +``` +{% include copy-curl.html %} + +--- + +## Example query with `size` + +In the following example query, the `size` parameter is set to `5`, limiting the response to the top five composite buckets based on the specified value sources and the `total_sales` aggregation: + +```json +GET /sales/_search +{ + "size": 0, + "aggs": { + "sales_by_day_product": { + "composite": { + "size": 5, + "sources": [ + { + "day": { + "date_histogram": { + "field": "timestamp", + "calendar_interval": "1d", + "order": "desc" + } + } + }, + { + "product": { + "terms": { + "field": "product.keyword", + "order": "asc" + } + } + } + ] + }, + "aggregations": { + "total_sales": { + "sum": { + "field": "price" + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +{ + "took": 43, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "sales_by_day_product": { + "after_key": { + "day": 1680307200000, + "product": "Product B" + }, + "buckets": [ + { + "key": { + "day": 1680393600000, + "product": "Product A" + }, + "doc_count": 1, + "total_sales": { + "value": 0 + } + }, + { + "key": { + "day": 1680307200000, + "product": "Product A" + }, + "doc_count": 1, + "total_sales": { + "value": 0 + } + }, + { + "key": { + "day": 1680307200000, + "product": "Product B" + }, + "doc_count": 1, + "total_sales": { + "value": 0 + } + } + ] + } + } +} +``` +{% include copy-curl.html %} + + +## Example query with `size` and `after` + +The following example query groups and aggregates data based on multiple criteria while also paginating the results: + +```json +GET /sales/_search +{ + "size": 0, + "aggs": { + "sales_by_day_product": { + "composite": { + "size": 5, + "sources": [ + { + "day": { + "date_histogram": { + "field": "timestamp", + "calendar_interval": "1d", + "order": "desc" + } + } + }, + { + "product": { + "terms": { + "field": "product.keyword", + "order": "asc" + } + } + } + ] + }, + "aggregations": { + "total_sales": { + "sum": { + "field": "price" + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example reponse + +```json +{ + "took": 10, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "sales_by_day_product": { + "after_key": { + "day": 1680307200000, + "product": "Product B" + }, + "buckets": [ + { + "key": { + "day": 1680393600000, + "product": "Product A" + }, + "doc_count": 1, + "total_sales": { + "value": 0 + } + }, + { + "key": { + "day": 1680307200000, + "product": "Product A" + }, + "doc_count": 1, + "total_sales": { + "value": 0 + } + }, + { + "key": { + "day": 1680307200000, + "product": "Product B" + }, + "doc_count": 1, + "total_sales": { + "value": 0 + } + } + ] + } + } +} +``` +{% include copy-curl.html %} + + When dealing with large amounts of data or when you need to display the results in a paginated user interface, you can use the `after` parameter in combination with the `size` parameter to retrieve composite buckets in smaller chunks or pages. The `after` parameter expects an object with keys that match the names of the value sources defined in the `sources` array. The values in the `after` object should correspond to the last composite bucket from the previous page. See the following example query: + +```json +GET /sales/_search +{ + "size": 0, + "aggs": { + "sales_by_day_product": { + "composite": { + "size": 5, + "after": { + "day": 1680403200000, + "product": "Product B" + }, + "sources": [ + { + "day": { + "date_histogram": { + "field": "timestamp", + "calendar_interval": "1d", + "order": "desc" + } + } + }, + { + "product": { + "terms": { + "field": "product.keyword", + "order": "asc" + } + } + } + ] + }, + "aggregations": { + "total_sales": { + "sum": { + "field": "price" + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +{ + "took": 9, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "sales_by_day_product": { + "after_key": { + "day": 1680307200000, + "product": "Product B" + }, + "buckets": [ + { + "key": { + "day": 1680393600000, + "product": "Product A" + }, + "doc_count": 1, + "total_sales": { + "value": 0 + } + }, + { + "key": { + "day": 1680307200000, + "product": "Product A" + }, + "doc_count": 1, + "total_sales": { + "value": 0 + } + }, + { + "key": { + "day": 1680307200000, + "product": "Product B" + }, + "doc_count": 1, + "total_sales": { + "value": 0 + } + } + ] + } + } +} +``` +{% include copy-curl.html %} diff --git a/_aggregations/bucket/sort-composite-buckets.md b/_aggregations/bucket/sort-composite-buckets.md new file mode 100644 index 0000000000..4674d068a8 --- /dev/null +++ b/_aggregations/bucket/sort-composite-buckets.md @@ -0,0 +1,137 @@ +--- +layout: default +title: Sorting composite buckets +parent: Composite +grand_parent: Bucket aggregations +great_grand_parent: Aggregations +nav_order: 15 +--- + +# Sorting composite buckets + +By default, composite buckets are sorted in natural ascending order based on their values. However, you can customize the sort order for each [value source]({{site.url}}{{site.baseurl}}/aggregations/bucket/value-sources/) within a composite bucket aggregation. + +## Syntax + +The `order` parameter is used to specify the sort direction for a value source within the `sources` array of the composite aggregation. It accepts two values: + +- `asc` (default): Sort in ascending order +- `desc`: Sort in descending order + +```json +"composite": { + "sources": [ + { + "NAME": { + "AGGREGATION": { + "field": "FIELD", + "order": "asc|desc" + } + } + }, + ... + ] +} +``` +{% include copy-curl.html %} + + +--- + +## Example + +For example, the following query groups documents by day (`date_histogram`) in descending order, and then by product name (`terms`) in ascending order: + +```json +GET /sales/_search +{ + "size": 0, + "aggs": { + "sales_by_day_product": { + "composite": { + "sources": [ + { + "day": { + "date_histogram": { + "field": "timestamp", + "calendar_interval": "1d", + "order": "desc" + } + } + }, + { + "product": { + "terms": { + "field": "product.keyword", + "order": "asc" + } + } + } + ] + } + } + } +} +``` +{% include copy-curl.html %} + + +In this example, + +- The `day` source uses a `date_histogram` aggregation on the `timestamp` field, with a calendar interval of 1 day, sorted in descending order. +- The `product` source uses a `terms` aggregation on the `product` field, sorted in ascending order. + +#### Example response + +```json +{ + "took": 65, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "sales_by_day_product": { + "after_key": { + "day": 1680307200000, + "product": "Product B" + }, + "buckets": [ + { + "key": { + "day": 1680393600000, + "product": "Product A" + }, + "doc_count": 1 + }, + { + "key": { + "day": 1680307200000, + "product": "Product A" + }, + "doc_count": 1 + }, + { + "key": { + "day": 1680307200000, + "product": "Product B" + }, + "doc_count": 1 + } + ] + } + } +} +``` +{% include copy-curl.html %} diff --git a/_aggregations/bucket/subaggregations.md b/_aggregations/bucket/subaggregations.md index 68dea3c4e6..7129c83e05 100644 --- a/_aggregations/bucket/subaggregations.md +++ b/_aggregations/bucket/subaggregations.md @@ -1,8 +1,148 @@ --- layout: default -title: Subaggregations +title: Working with subaggregations parent: Composite grand_parent: Bucket aggregations great_grand_parent: Aggregations nav_order: 30 ---- \ No newline at end of file +--- + +# Working with subaggregations + +Composite aggregations support the use of subaggregations, which allows you to compute additional buckets or statistics for each composite bucket created by the parent aggregation. Subaggregations provide a powerful way to analyze and summarize your data at multiple levels within a single query. + +## Syntax + +To include subaggregations in a composite aggregation, you need to add an `aggregations` field within the composite aggregation definition. This field should contain the subaggregation(s) you want to compute for each composite bucket. See the following example definition: + +```json +{ + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "SOURCE_NAME": { "AGGREGATION": { ... } } }, + ... + ] + }, + "aggregations": { + "SUB_AGGREGATION_NAME": { + "AGGREGATION_TYPE": { ... } + } + } + } + } +} +``` +{% include copy-curl.html %} + +--- + +## Example + +Consider an index `sales` with fields `timestamp` (date), `product` (keyword), and `price` (float). You can composite aggregate sales by `day` and `product`, then calculate the average price for each resulting bucket with the following query: + +```json +GET /sales/_search +{ + "size": 0, + "aggs": { + "sales_by_day_product": { + "composite": { + "sources": [ + { + "day": { + "date_histogram": { + "field": "timestamp", + "calendar_interval": "1d", + "order": "desc" + } + } + }, + { + "product": { + "terms": { + "field": "product.keyword", + "order": "asc" + } + } + } + ] + }, + "aggregations": { + "avg_price": { + "avg": { + "field": "price" + } + } + } + } + } +} +``` +{% include copy-curl.html %} + + +#### Example response + +```json +{ + "took": 12, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "sales_by_day_product": { + "after_key": { + "day": 1680307200000, + "product": "Product B" + }, + "buckets": [ + { + "key": { + "day": 1680393600000, + "product": "Product A" + }, + "doc_count": 1, + "avg_price": { + "value": null + } + }, + { + "key": { + "day": 1680307200000, + "product": "Product A" + }, + "doc_count": 1, + "avg_price": { + "value": null + } + }, + { + "key": { + "day": 1680307200000, + "product": "Product B" + }, + "doc_count": 1, + "avg_price": { + "value": null + } + } + ] + } + } +} +``` +{% include copy-curl.html %}