Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ESQL: Ask for correct field names if only LOOKUP/ENRICH fields are kept #120372

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ IDR | Indore | POINT(75.8472 22.7167) | India | POINT(75.8
;

enrichDroppingOriginalIndexFields
required_capability: enrich_load
required_capability: fix_field_names_enrich_lookup_no_valid_fields

from employees
| limit 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ emp_no:integer

dropAllOriginalIndexFields
required_capability: join_lookup_v11
alex-spies marked this conversation as resolved.
Show resolved Hide resolved
required_capability: fix_field_names_enrich_lookup_no_valid_fields

FROM employees
| LIMIT 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,13 @@ public enum Cap {
*/
SEMANTIC_TEXT_FIELD_CAPS,

/**
* Fix for https://github.com/elastic/elasticsearch/issues/120272.
* When there's an ENRICH/LOOKUP, and all fields from the original indices are discarded,
* field-caps wasn't asking for any existing field.
*/
FIX_FIELD_NAMES_ENRICH_LOOKUP_NO_VALID_FIELDS,

/**
* Support named argument for function in map format.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,9 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
fieldNames.addAll(subfields(fieldNames));
fieldNames.addAll(enrichPolicyMatchFields);
fieldNames.addAll(subfields(enrichPolicyMatchFields));
// We add the "_field" because it's light, and there must always be some field matching the indices.
ivancea marked this conversation as resolved.
Show resolved Hide resolved
// Originally, this was used only when no fields were found, but ENRICH/LOOKUP fields can't currently
// be distinguished from fields of the original index, so we always add it as a safety measure.
fieldNames.addAll(IndexResolver.INDEX_METADATA_FIELD);
alex-spies marked this conversation as resolved.
Show resolved Hide resolved
return result.withFieldNames(fieldNames);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
package org.elasticsearch.xpack.esql.session;

import org.elasticsearch.Build;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.esql.action.EsqlCapabilities;
import org.elasticsearch.xpack.esql.parser.EsqlParser;
Expand Down Expand Up @@ -1321,15 +1322,14 @@ public void testEnrichOnDefaultFieldWithKeep() {
from employees
| enrich languages_policy
| keep emp_no""", Set.of("language_name"));
assertThat(fieldNames, equalTo(Set.of("emp_no", "emp_no.*", "language_name", "language_name.*")));
assertThat(fieldNames, equalTo(expectedFieldNames(Set.of("emp_no", "emp_no.*", "language_name", "language_name.*"))));
}

public void testDissectOverwriteName() {
Set<String> fieldNames = fieldNames("""
assertFieldNames("""
from employees
| dissect first_name "%{first_name} %{more}"
| keep emp_no, first_name, more""", Set.of());
assertThat(fieldNames, equalTo(Set.of("emp_no", "emp_no.*", "first_name", "first_name.*")));
| keep emp_no, first_name, more""", Set.of("emp_no", "emp_no.*", "first_name", "first_name.*"));
}

public void testEnrichOnDefaultField() {
Expand All @@ -1346,20 +1346,17 @@ public void testMetrics() {
assertThat(e.getMessage(), containsString("line 1:1: mismatched input 'METRICS' expecting {"));
return;
}
Set<String> fieldNames = fieldNames(query, Set.of());
assertThat(
fieldNames,
equalTo(
Set.of(
"@timestamp",
"@timestamp.*",
"network.total_bytes_in",
"network.total_bytes_in.*",
"network.total_cost",
"network.total_cost.*",
"cluster",
"cluster.*"
)
assertFieldNames(
query,
Set.of(
"@timestamp",
"@timestamp.*",
"network.total_bytes_in",
"network.total_bytes_in.*",
"network.total_cost",
"network.total_cost.*",
"cluster",
"cluster.*"
)
);
}
Expand Down Expand Up @@ -1575,19 +1572,39 @@ public void testMultiLookupJoinSameIndexKeepAfter() {
);
}

public void testLookupJoinWithEvalKey() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V11.isEnabled());
assertFieldNames(
"""
FROM some_index
| EVAL key = 5
| LOOKUP JOIN some_lookup ON key
| KEEP test""",
Set.of("test", "test.*", "key", "key.*")
);
}

private Set<String> fieldNames(String query, Set<String> enrichPolicyMatchFields) {
var preAnalysisResult = new EsqlSession.PreAnalysisResult(null);
return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames();
}

private void assertFieldNames(String query, Set<String> expected) {
Set<String> fieldNames = fieldNames(query, Collections.emptySet());
assertThat(fieldNames, equalTo(expected));
assertThat(fieldNames, equalTo(expectedFieldNames(expected)));
}

private void assertFieldNames(String query, Set<String> expected, Set<String> wildCardIndices) {
var preAnalysisResult = EsqlSession.fieldNames(parser.createStatement(query), Set.of(), new EsqlSession.PreAnalysisResult(null));
assertThat("Query-wide field names", preAnalysisResult.fieldNames(), equalTo(expected));
assertThat("Query-wide field names", preAnalysisResult.fieldNames(), equalTo(expectedFieldNames(expected)));
assertThat("Lookup Indices that expect wildcard lookups", preAnalysisResult.wildcardJoinIndices(), equalTo(wildCardIndices));
}

private Set<String> expectedFieldNames(Set<String> expected) {
if (expected.size() == 1 && expected.contains("*")) {
return expected;
}

return Sets.union(expected, IndexResolver.INDEX_METADATA_FIELD);
}
}