From ff43d5abe58b8503235897b01e63f3011d580bdb Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Fri, 17 Jan 2025 15:25:45 +0100 Subject: [PATCH] Fix SearchTimeoutIT Two of the timeout tests have been muted for several months. The reason is that we tightened the assertions to cover for partial results being returned, but there were edge cases in which partial results were not actually returned. The edge case is a typical timing issue where the initial check for timeout in CancellableBulkScorer already triggers the timeout, before any document has been collected. I made several adjustments to the test to make it more robust: - use index random to index documents, that speeds it up - share indexing across test methods, so that it happens once at the suite level - raise a single timeout, rather than one per visited document which causes an unnecessary slowdown - make sure that one document is always visited before a timeout, so that partial results are always guaranteed to be returned Closes #98369 Closes #98053 --- .../elasticsearch/search/SearchTimeoutIT.java | 100 ++++++++++-------- 1 file changed, 57 insertions(+), 43 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/SearchTimeoutIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/SearchTimeoutIT.java index f63f09764621b..df10d8da607dd 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/SearchTimeoutIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/SearchTimeoutIT.java @@ -9,8 +9,9 @@ package org.elasticsearch.search; +import org.apache.lucene.util.ThreadInterruptedException; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; import org.elasticsearch.plugins.Plugin; @@ -20,22 +21,26 @@ import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.hamcrest.ElasticsearchAssertions; import java.util.Collection; import java.util.Collections; import java.util.Map; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; -import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE; import static org.elasticsearch.index.query.QueryBuilders.scriptQuery; import static org.elasticsearch.search.SearchTimeoutIT.ScriptedTimeoutPlugin.SCRIPT_NAME; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE) +@ESIntegTestCase.SuiteScopeTestCase public class SearchTimeoutIT extends ESIntegTestCase { + private static final AtomicInteger scriptExecutions = new AtomicInteger(0); + @Override protected Collection> nodePlugins() { return Collections.singleton(ScriptedTimeoutPlugin.class); @@ -46,53 +51,55 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { return Settings.builder().put(super.nodeSettings(nodeOrdinal, otherSettings)).build(); } - private void indexDocs() { - for (int i = 0; i < 32; i++) { - prepareIndex("test").setId(Integer.toString(i)).setSource("field", "value").get(); - } - refresh("test"); + @Override + protected void setupSuiteScopeCluster() throws Exception { + super.setupSuiteScopeCluster(); + indexRandom(true, "test", randomIntBetween(5, 30)); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + scriptExecutions.set(0); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/98369") public void testTopHitsTimeout() { - indexDocs(); - SearchResponse searchResponse = prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.MILLISECONDS)) - .setQuery(scriptQuery(new Script(ScriptType.INLINE, "mockscript", SCRIPT_NAME, Collections.emptyMap()))) - .get(); - assertThat(searchResponse.isTimedOut(), equalTo(true)); - assertEquals(0, searchResponse.getShardFailures().length); - assertEquals(0, searchResponse.getFailedShards()); - assertThat(searchResponse.getSuccessfulShards(), greaterThan(0)); - assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards()); - assertThat(searchResponse.getHits().getTotalHits().value(), greaterThan(0L)); - assertThat(searchResponse.getHits().getHits().length, greaterThan(0)); + SearchRequestBuilder searchRequestBuilder = prepareSearch("test").setTimeout(new TimeValue(100, TimeUnit.MILLISECONDS)) + .setQuery(scriptQuery(new Script(ScriptType.INLINE, "mockscript", SCRIPT_NAME, Collections.emptyMap()))); + ElasticsearchAssertions.assertResponse(searchRequestBuilder, searchResponse -> { + assertThat(searchResponse.isTimedOut(), equalTo(true)); + assertEquals(0, searchResponse.getShardFailures().length); + assertEquals(0, searchResponse.getFailedShards()); + assertThat(searchResponse.getSuccessfulShards(), greaterThan(0)); + assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards()); + assertThat(searchResponse.getHits().getTotalHits().value(), greaterThan(0L)); + assertThat(searchResponse.getHits().getHits().length, greaterThan(0)); + }); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/98053") public void testAggsTimeout() { - indexDocs(); - SearchResponse searchResponse = prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.MILLISECONDS)) + indexRandom(true, "test", randomIntBetween(5, 30)); + SearchRequestBuilder searchRequestBuilder = prepareSearch("test").setTimeout(new TimeValue(100, TimeUnit.MILLISECONDS)) .setSize(0) .setQuery(scriptQuery(new Script(ScriptType.INLINE, "mockscript", SCRIPT_NAME, Collections.emptyMap()))) - .addAggregation(new TermsAggregationBuilder("terms").field("field.keyword")) - .get(); - assertThat(searchResponse.isTimedOut(), equalTo(true)); - assertEquals(0, searchResponse.getShardFailures().length); - assertEquals(0, searchResponse.getFailedShards()); - assertThat(searchResponse.getSuccessfulShards(), greaterThan(0)); - assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards()); - assertThat(searchResponse.getHits().getTotalHits().value(), greaterThan(0L)); - assertEquals(searchResponse.getHits().getHits().length, 0); - StringTerms terms = searchResponse.getAggregations().get("terms"); - assertEquals(1, terms.getBuckets().size()); - StringTerms.Bucket bucket = terms.getBuckets().get(0); - assertEquals("value", bucket.getKeyAsString()); - assertThat(bucket.getDocCount(), greaterThan(0L)); + .addAggregation(new TermsAggregationBuilder("terms").field("field.keyword")); + ElasticsearchAssertions.assertResponse(searchRequestBuilder, searchResponse -> { + assertThat(searchResponse.isTimedOut(), equalTo(true)); + assertEquals(0, searchResponse.getShardFailures().length); + assertEquals(0, searchResponse.getFailedShards()); + assertThat(searchResponse.getSuccessfulShards(), greaterThan(0)); + assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards()); + assertThat(searchResponse.getHits().getTotalHits().value(), greaterThan(0L)); + assertEquals(0, searchResponse.getHits().getHits().length); + StringTerms terms = searchResponse.getAggregations().get("terms"); + assertEquals(1, terms.getBuckets().size()); + StringTerms.Bucket bucket = terms.getBuckets().get(0); + assertEquals("value", bucket.getKeyAsString()); + assertThat(bucket.getDocCount(), greaterThan(0L)); + }); } - public void testPartialResultsIntolerantTimeout() throws Exception { - prepareIndex("test").setId("1").setSource("field", "value").setRefreshPolicy(IMMEDIATE).get(); - + public void testPartialResultsIntolerantTimeout() { ElasticsearchException ex = expectThrows( ElasticsearchException.class, prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.MILLISECONDS)) @@ -100,6 +107,7 @@ public void testPartialResultsIntolerantTimeout() throws Exception { .setAllowPartialSearchResults(false) // this line causes timeouts to report failures ); assertTrue(ex.toString().contains("Time exceeded")); + assertEquals(504, ex.status().getStatus()); } public static class ScriptedTimeoutPlugin extends MockScriptPlugin { @@ -108,10 +116,16 @@ public static class ScriptedTimeoutPlugin extends MockScriptPlugin { @Override public Map, Object>> pluginScripts() { return Collections.singletonMap(SCRIPT_NAME, params -> { - try { - Thread.sleep(500); - } catch (InterruptedException e) { - throw new RuntimeException(e); + // sleep only once per test, but only after executing the script once without sleeping. + // This ensures that one document is always returned before the timeout happens. + // Also, don't sleep any further to avoid slowing down the test excessively. + // A timeout on a specific slice of a single shard is enough. + if (scriptExecutions.incrementAndGet() == 1) { + try { + Thread.sleep(500); + } catch (InterruptedException e) { + throw new ThreadInterruptedException(e); + } } return true; });