Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ESQL: Ask for correct field names if only LOOKUP/ENRICH fields are kept #120372

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
6 changes: 6 additions & 0 deletions docs/changelog/120372.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 120372
summary: Ask for correct field names if only LOOKUP/ENRICH fields are kept
area: ES|QL
type: bug
issues:
- 120272
Original file line number Diff line number Diff line change
Expand Up @@ -643,3 +643,19 @@ FROM airports
abbrev:k | city_name:k | city_location:geo_point | country:k | location:geo_point | name:text | region:text | boundary_wkt_length:i
IDR | Indore | POINT(75.8472 22.7167) | India | POINT(75.8092915005895 22.727749187571) | Devi Ahilyabai Holkar Int'l | Indore City | 231
;

enrichDroppingOriginalIndexFields
required_capability: enrich_load
required_capability: fix_field_names_enrich_lookup_no_valid_fields

from employees
| limit 3
| eval x = 2
| enrich languages_policy on x
| keep language_name;

language_name:keyword
French
French
French
;
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,29 @@ emp_no:integer
10001
;

dropAllOriginalIndexFields
required_capability: join_lookup_v11
alex-spies marked this conversation as resolved.
Show resolved Hide resolved
required_capability: fix_field_names_enrich_lookup_no_valid_fields

FROM employees
| LIMIT 2
| EVAL language_code = 1
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
| SORT language_name, country
| KEEP language_name, country
;

language_name:keyword | country:text
English | Canada
English | Canada
English | United States of America
English | United States of America
English | null
English | null
null | United Kingdom
null | United Kingdom
;

###############################################
# Filtering tests with languages_lookup index
###############################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,13 @@ public enum Cap {
*/
SEMANTIC_TEXT_FIELD_CAPS,

/**
* Fix for https://github.com/elastic/elasticsearch/issues/120272.
* When there's an ENRICH/LOOKUP, and all fields from the original indices are discarded,
* field-caps wasn't asking for any existing field.
*/
FIX_FIELD_NAMES_ENRICH_LOOKUP_NO_VALID_FIELDS,

/**
* Support named argument for function in map format.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,10 @@ private static void resolveFieldNames(LogicalPlan parsed, EnrichResolution enric
}
}

/**
* Get the field names from the parsed plan combined with the ENRICH match fields from the ENRICH policy
* and the JOIN ON keys.
*/
static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicyMatchFields, PreAnalysisResult result) {
if (false == parsed.anyMatch(plan -> plan instanceof Aggregate || plan instanceof Project)) {
// no explicit columns selection, for example "from employees"
Expand Down Expand Up @@ -635,17 +639,16 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
// remove valid metadata attributes because they will be filtered out by the IndexResolver anyway
// otherwise, in some edge cases, we will fail to ask for "*" (all fields) instead
references.removeIf(a -> a instanceof MetadataAttribute || MetadataAttribute.isSupported(a.name()));
Set<String> fieldNames = references.names();

if (fieldNames.isEmpty() && enrichPolicyMatchFields.isEmpty()) {
// there cannot be an empty list of fields, we'll ask the simplest and lightest one instead: _index
return result.withFieldNames(IndexResolver.INDEX_METADATA_FIELD);
} else {
fieldNames.addAll(subfields(fieldNames));
fieldNames.addAll(enrichPolicyMatchFields);
fieldNames.addAll(subfields(enrichPolicyMatchFields));
return result.withFieldNames(fieldNames);
}
Set<String> fieldNames = references.names();
fieldNames.addAll(subfields(fieldNames));
fieldNames.addAll(enrichPolicyMatchFields);
fieldNames.addAll(subfields(enrichPolicyMatchFields));
// We add the "_index" field because it's light, and there must always be some field matching the indices.
// Originally, this was used only when no fields were found, but ENRICH/LOOKUP fields can't currently
// be distinguished from fields of the original index, so we always add it as a safety measure.
fieldNames.addAll(IndexResolver.INDEX_METADATA_FIELD);
alex-spies marked this conversation as resolved.
Show resolved Hide resolved
return result.withFieldNames(fieldNames);
}

private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
package org.elasticsearch.xpack.esql.session;

import org.elasticsearch.Build;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.esql.action.EsqlCapabilities;
import org.elasticsearch.xpack.esql.parser.EsqlParser;
Expand Down Expand Up @@ -1321,15 +1322,14 @@ public void testEnrichOnDefaultFieldWithKeep() {
from employees
| enrich languages_policy
| keep emp_no""", Set.of("language_name"));
assertThat(fieldNames, equalTo(Set.of("emp_no", "emp_no.*", "language_name", "language_name.*")));
assertThat(fieldNames, equalTo(expectedFieldNames(Set.of("emp_no", "emp_no.*", "language_name", "language_name.*"))));
}

public void testDissectOverwriteName() {
Set<String> fieldNames = fieldNames("""
assertFieldNames("""
from employees
| dissect first_name "%{first_name} %{more}"
| keep emp_no, first_name, more""", Set.of());
assertThat(fieldNames, equalTo(Set.of("emp_no", "emp_no.*", "first_name", "first_name.*")));
| keep emp_no, first_name, more""", Set.of("emp_no", "emp_no.*", "first_name", "first_name.*"));
}

public void testEnrichOnDefaultField() {
Expand All @@ -1346,20 +1346,17 @@ public void testMetrics() {
assertThat(e.getMessage(), containsString("line 1:1: mismatched input 'METRICS' expecting {"));
return;
}
Set<String> fieldNames = fieldNames(query, Set.of());
assertThat(
fieldNames,
equalTo(
Set.of(
"@timestamp",
"@timestamp.*",
"network.total_bytes_in",
"network.total_bytes_in.*",
"network.total_cost",
"network.total_cost.*",
"cluster",
"cluster.*"
)
assertFieldNames(
query,
Set.of(
"@timestamp",
"@timestamp.*",
"network.total_bytes_in",
"network.total_bytes_in.*",
"network.total_cost",
"network.total_cost.*",
"cluster",
"cluster.*"
)
);
}
Expand Down Expand Up @@ -1575,19 +1572,36 @@ public void testMultiLookupJoinSameIndexKeepAfter() {
);
}

public void testLookupJoinWithEvalKey() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V11.isEnabled());
assertFieldNames("""
FROM some_index
| EVAL key = 5
| LOOKUP JOIN some_lookup ON key
| KEEP test""", Set.of("test", "test.*", "key", "key.*"));
}

private Set<String> fieldNames(String query, Set<String> enrichPolicyMatchFields) {
var preAnalysisResult = new EsqlSession.PreAnalysisResult(null);
return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames();
}

private void assertFieldNames(String query, Set<String> expected) {
Set<String> fieldNames = fieldNames(query, Collections.emptySet());
assertThat(fieldNames, equalTo(expected));
assertThat(fieldNames, equalTo(expectedFieldNames(expected)));
}

private void assertFieldNames(String query, Set<String> expected, Set<String> wildCardIndices) {
var preAnalysisResult = EsqlSession.fieldNames(parser.createStatement(query), Set.of(), new EsqlSession.PreAnalysisResult(null));
assertThat("Query-wide field names", preAnalysisResult.fieldNames(), equalTo(expected));
assertThat("Query-wide field names", preAnalysisResult.fieldNames(), equalTo(expectedFieldNames(expected)));
assertThat("Lookup Indices that expect wildcard lookups", preAnalysisResult.wildcardJoinIndices(), equalTo(wildCardIndices));
}

private Set<String> expectedFieldNames(Set<String> expected) {
if (expected.size() == 1 && expected.contains("*")) {
return expected;
}

return Sets.union(expected, IndexResolver.INDEX_METADATA_FIELD);
}
}