diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LegacyGermanStemmer.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LegacyGermanStemmer.java new file mode 100644 index 0000000000000..3d75795bfd3cc --- /dev/null +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LegacyGermanStemmer.java @@ -0,0 +1,471 @@ +/* + * @notice + * Generated by Snowball 2.0.0 - https://snowballstem.org/ + * + * Modifications copyright (C) 2024 Elasticsearch B.V. + */ + +package org.elasticsearch.analysis.common; + +import org.tartarus.snowball.Among; +import org.tartarus.snowball.ext.GermanStemmer; + +/** + * This class implements the stemming algorithm defined by a snowball script. + * NOTE: This is the GermanStemmer used up until Lucene 9 + * + * @deprecated this class exists for backwards compatibility with Lucene 9 indices + */ +@SuppressWarnings("checkstyle:DescendantToken") +@Deprecated +public class LegacyGermanStemmer extends org.tartarus.snowball.SnowballStemmer { + + private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup(); + + private static final Among a_0[] = { + new Among("", -1, 5), + new Among("U", 0, 2), + new Among("Y", 0, 1), + new Among("\u00E4", 0, 3), + new Among("\u00F6", 0, 4), + new Among("\u00FC", 0, 2) }; + + private static final Among a_1[] = { + new Among("e", -1, 2), + new Among("em", -1, 1), + new Among("en", -1, 2), + new Among("ern", -1, 1), + new Among("er", -1, 1), + new Among("s", -1, 3), + new Among("es", 5, 2) }; + + private static final Among a_2[] = { new Among("en", -1, 1), new Among("er", -1, 1), new Among("st", -1, 2), new Among("est", 2, 1) }; + + private static final Among a_3[] = { new Among("ig", -1, 1), new Among("lich", -1, 1) }; + + private static final Among a_4[] = { + new Among("end", -1, 1), + new Among("ig", -1, 2), + new Among("ung", -1, 1), + new Among("lich", -1, 3), + new Among("isch", -1, 2), + new Among("ik", -1, 2), + new Among("heit", -1, 3), + new Among("keit", -1, 4) }; + + private static final char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; + + private static final char g_s_ending[] = { 117, 30, 5 }; + + private static final char g_st_ending[] = { 117, 30, 4 }; + + private int I_x; + private int I_p2; + private int I_p1; + + private boolean r_prelude() { + int v_1 = cursor; + while (true) { + int v_2 = cursor; + lab0: { + lab1: { + int v_3 = cursor; + lab2: { + bra = cursor; + if (!(eq_s("\u00DF"))) { + break lab2; + } + ket = cursor; + slice_from("ss"); + break lab1; + } + cursor = v_3; + if (cursor >= limit) { + break lab0; + } + cursor++; + } + continue; + } + cursor = v_2; + break; + } + cursor = v_1; + while (true) { + int v_4 = cursor; + lab3: { + golab4: while (true) { + int v_5 = cursor; + lab5: { + if (!(in_grouping(g_v, 97, 252))) { + break lab5; + } + bra = cursor; + lab6: { + int v_6 = cursor; + lab7: { + if (!(eq_s("u"))) { + break lab7; + } + ket = cursor; + if (!(in_grouping(g_v, 97, 252))) { + break lab7; + } + slice_from("U"); + break lab6; + } + cursor = v_6; + if (!(eq_s("y"))) { + break lab5; + } + ket = cursor; + if (!(in_grouping(g_v, 97, 252))) { + break lab5; + } + slice_from("Y"); + } + cursor = v_5; + break golab4; + } + cursor = v_5; + if (cursor >= limit) { + break lab3; + } + cursor++; + } + continue; + } + cursor = v_4; + break; + } + return true; + } + + private boolean r_mark_regions() { + I_p1 = limit; + I_p2 = limit; + int v_1 = cursor; + { + int c = cursor + 3; + if (0 > c || c > limit) { + return false; + } + cursor = c; + } + I_x = cursor; + cursor = v_1; + golab0: while (true) { + lab1: { + if (!(in_grouping(g_v, 97, 252))) { + break lab1; + } + break golab0; + } + if (cursor >= limit) { + return false; + } + cursor++; + } + golab2: while (true) { + lab3: { + if (!(out_grouping(g_v, 97, 252))) { + break lab3; + } + break golab2; + } + if (cursor >= limit) { + return false; + } + cursor++; + } + I_p1 = cursor; + lab4: { + if (!(I_p1 < I_x)) { + break lab4; + } + I_p1 = I_x; + } + golab5: while (true) { + lab6: { + if (!(in_grouping(g_v, 97, 252))) { + break lab6; + } + break golab5; + } + if (cursor >= limit) { + return false; + } + cursor++; + } + golab7: while (true) { + lab8: { + if (!(out_grouping(g_v, 97, 252))) { + break lab8; + } + break golab7; + } + if (cursor >= limit) { + return false; + } + cursor++; + } + I_p2 = cursor; + return true; + } + + private boolean r_postlude() { + int among_var; + while (true) { + int v_1 = cursor; + lab0: { + bra = cursor; + among_var = find_among(a_0); + if (among_var == 0) { + break lab0; + } + ket = cursor; + switch (among_var) { + case 1: + slice_from("y"); + break; + case 2: + slice_from("u"); + break; + case 3: + slice_from("a"); + break; + case 4: + slice_from("o"); + break; + case 5: + if (cursor >= limit) { + break lab0; + } + cursor++; + break; + } + continue; + } + cursor = v_1; + break; + } + return true; + } + + private boolean r_R1() { + if (!(I_p1 <= cursor)) { + return false; + } + return true; + } + + private boolean r_R2() { + if (!(I_p2 <= cursor)) { + return false; + } + return true; + } + + private boolean r_standard_suffix() { + int among_var; + int v_1 = limit - cursor; + lab0: { + ket = cursor; + among_var = find_among_b(a_1); + if (among_var == 0) { + break lab0; + } + bra = cursor; + if (!r_R1()) { + break lab0; + } + switch (among_var) { + case 1: + slice_del(); + break; + case 2: + slice_del(); + int v_2 = limit - cursor; + lab1: { + ket = cursor; + if (!(eq_s_b("s"))) { + cursor = limit - v_2; + break lab1; + } + bra = cursor; + if (!(eq_s_b("nis"))) { + cursor = limit - v_2; + break lab1; + } + slice_del(); + } + break; + case 3: + if (!(in_grouping_b(g_s_ending, 98, 116))) { + break lab0; + } + slice_del(); + break; + } + } + cursor = limit - v_1; + int v_3 = limit - cursor; + lab2: { + ket = cursor; + among_var = find_among_b(a_2); + if (among_var == 0) { + break lab2; + } + bra = cursor; + if (!r_R1()) { + break lab2; + } + switch (among_var) { + case 1: + slice_del(); + break; + case 2: + if (!(in_grouping_b(g_st_ending, 98, 116))) { + break lab2; + } { + int c = cursor - 3; + if (limit_backward > c || c > limit) { + break lab2; + } + cursor = c; + } + slice_del(); + break; + } + } + cursor = limit - v_3; + int v_4 = limit - cursor; + lab3: { + ket = cursor; + among_var = find_among_b(a_4); + if (among_var == 0) { + break lab3; + } + bra = cursor; + if (!r_R2()) { + break lab3; + } + switch (among_var) { + case 1: + slice_del(); + int v_5 = limit - cursor; + lab4: { + ket = cursor; + if (!(eq_s_b("ig"))) { + cursor = limit - v_5; + break lab4; + } + bra = cursor; + { + int v_6 = limit - cursor; + lab5: { + if (!(eq_s_b("e"))) { + break lab5; + } + cursor = limit - v_5; + break lab4; + } + cursor = limit - v_6; + } + if (!r_R2()) { + cursor = limit - v_5; + break lab4; + } + slice_del(); + } + break; + case 2: { + int v_7 = limit - cursor; + lab6: { + if (!(eq_s_b("e"))) { + break lab6; + } + break lab3; + } + cursor = limit - v_7; + } + slice_del(); + break; + case 3: + slice_del(); + int v_8 = limit - cursor; + lab7: { + ket = cursor; + lab8: { + int v_9 = limit - cursor; + lab9: { + if (!(eq_s_b("er"))) { + break lab9; + } + break lab8; + } + cursor = limit - v_9; + if (!(eq_s_b("en"))) { + cursor = limit - v_8; + break lab7; + } + } + bra = cursor; + if (!r_R1()) { + cursor = limit - v_8; + break lab7; + } + slice_del(); + } + break; + case 4: + slice_del(); + int v_10 = limit - cursor; + lab10: { + ket = cursor; + if (find_among_b(a_3) == 0) { + cursor = limit - v_10; + break lab10; + } + bra = cursor; + if (!r_R2()) { + cursor = limit - v_10; + break lab10; + } + slice_del(); + } + break; + } + } + cursor = limit - v_4; + return true; + } + + @Override + public boolean stem() { + int v_1 = cursor; + r_prelude(); + cursor = v_1; + int v_2 = cursor; + r_mark_regions(); + cursor = v_2; + limit_backward = cursor; + cursor = limit; + r_standard_suffix(); + cursor = limit_backward; + int v_4 = cursor; + r_postlude(); + cursor = v_4; + return true; + } + + @Override + public boolean equals(Object o) { + return o instanceof GermanStemmer; + } + + @Override + public int hashCode() { + return GermanStemmer.class.getName().hashCode(); + } +} diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java index 3c70ff5b18615..07c5f5dcf39b6 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java @@ -48,9 +48,13 @@ import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.sv.SwedishLightStemFilter; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.logging.DeprecationCategory; +import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; import org.tartarus.snowball.ext.ArmenianStemmer; import org.tartarus.snowball.ext.BasqueStemmer; @@ -82,10 +86,14 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory { private static final TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream(); - private String language; + private final String language; + private final IndexVersion version; + + private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(StemmerTokenFilterFactory.class); StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException { super(name, settings); + this.version = indexSettings.getIndexVersionCreated(); this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter"))); // check that we have a valid language by trying to create a TokenStream create(EMPTY_TOKEN_STREAM).close(); @@ -165,9 +173,19 @@ public TokenStream create(TokenStream tokenStream) { // German stemmers } else if ("german".equalsIgnoreCase(language)) { - return new SnowballFilter(tokenStream, new GermanStemmer()); + if (this.version.onOrAfter(IndexVersions.UPGRADE_TO_LUCENE_10_0_0)) { + return new SnowballFilter(tokenStream, new GermanStemmer()); + } else { + // use pre-L10 GermanStemmer that doesn't normalize umlauts etc... + return new SnowballFilter(tokenStream, new LegacyGermanStemmer()); + } } else if ("german2".equalsIgnoreCase(language)) { - // TODO Lucene 10 upgrade: how about bw comp for users relying on german2 stemmer that is now folded into german stemmer? + DEPRECATION_LOGGER.warn( + DeprecationCategory.ANALYSIS, + "german2_stemmer_deprecation", + "The 'german2' stemmer has been deprecated and folged into the 'german' Stemmer. " + + "Replace all usages of 'german2' with 'german'." + ); return new SnowballFilter(tokenStream, new GermanStemmer()); } else if ("light_german".equalsIgnoreCase(language) || "lightGerman".equalsIgnoreCase(language)) { return new GermanLightStemFilter(tokenStream); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ASCIIFoldingTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ASCIIFoldingTokenFilterFactoryTests.java index a301bc1c851a7..c3017bd3ea237 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ASCIIFoldingTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ASCIIFoldingTokenFilterFactoryTests.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class ASCIIFoldingTokenFilterFactoryTests extends ESTokenStreamTestCase { public void testDefault() throws IOException { ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings( diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/BaseWordDelimiterTokenFilterFactoryTestCase.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/BaseWordDelimiterTokenFilterFactoryTestCase.java index 001f54ee238d4..ab26112005bd6 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/BaseWordDelimiterTokenFilterFactoryTestCase.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/BaseWordDelimiterTokenFilterFactoryTestCase.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + /** * Base class to test {@link WordDelimiterTokenFilterFactory} and * {@link WordDelimiterGraphTokenFilterFactory}. diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CJKFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CJKFilterFactoryTests.java index 16614f056c05a..95b093b03f9a7 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CJKFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CJKFilterFactoryTests.java @@ -22,6 +22,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class CJKFilterFactoryTests extends ESTokenStreamTestCase { private static final String RESOURCE = "/org/elasticsearch/analysis/common/cjk_analysis.json"; diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java index d5a36b110a7be..ee9701b89c127 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java @@ -24,6 +24,8 @@ import java.io.StringReader; import java.util.Arrays; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class CharGroupTokenizerFactoryTests extends ESTokenStreamTestCase { public void testParseTokenChars() { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactoryTests.java index 115ed1522381a..77902a2ab982f 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactoryTests.java @@ -27,6 +27,8 @@ import java.nio.file.Files; import java.nio.file.Path; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { public void testDefault() throws IOException { Settings settings = Settings.builder() diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java index db8b5c92165a2..b0cda5620c3e5 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java @@ -23,6 +23,7 @@ import java.io.StringReader; import java.text.ParseException; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.containsString; public class ESSolrSynonymParserTests extends ESTokenStreamTestCase { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java index 16c6aa256009b..17455c431992f 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java @@ -23,6 +23,7 @@ import java.io.StringReader; import java.text.ParseException; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.containsString; public class ESWordnetSynonymParserTests extends ESTokenStreamTestCase { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenFilterFactoryTests.java index c4e695cabf695..446cee8f48379 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenFilterFactoryTests.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class EdgeNGramTokenFilterFactoryTests extends ESTokenStreamTestCase { public void testDefault() throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java index 329318a096efb..11d1653439e59 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerTests.java @@ -29,6 +29,9 @@ import java.io.StringReader; import java.util.Collections; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class EdgeNGramTokenizerTests extends ESTokenStreamTestCase { private static IndexAnalyzers buildAnalyzers(IndexVersion version, String tokenizer) throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FingerprintAnalyzerTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FingerprintAnalyzerTests.java index 8049c09025cf2..8783860b8e02e 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FingerprintAnalyzerTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FingerprintAnalyzerTests.java @@ -13,6 +13,8 @@ import org.apache.lucene.analysis.CharArraySet; import org.elasticsearch.test.ESTokenStreamTestCase; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; + public class FingerprintAnalyzerTests extends ESTokenStreamTestCase { public void testFingerprint() throws Exception { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactoryTests.java index d6c2792af7de7..2f3dd1917ebe2 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactoryTests.java @@ -20,6 +20,8 @@ import java.io.IOException; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class FlattenGraphTokenFilterFactoryTests extends ESTokenStreamTestCase { public void testBasic() throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepFilterFactoryTests.java index 5d84457df1495..a3c9eb2cf3aae 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepFilterFactoryTests.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.instanceOf; public class KeepFilterFactoryTests extends ESTokenStreamTestCase { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepTypesFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepTypesFilterFactoryTests.java index 5a7ead779621e..e499f6f7eebdc 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepTypesFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeepTypesFilterFactoryTests.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.instanceOf; public class KeepTypesFilterFactoryTests extends ESTokenStreamTestCase { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeywordMarkerFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeywordMarkerFilterFactoryTests.java index c249db706a189..53641602a1c43 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeywordMarkerFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeywordMarkerFilterFactoryTests.java @@ -23,6 +23,7 @@ import java.io.IOException; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; import static org.hamcrest.Matchers.instanceOf; /** diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactoryTests.java index c3a9531b4a2ed..ee117de653d95 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactoryTests.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase { public void testDefault() throws IOException { Settings settings = Settings.builder() diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MinHashFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MinHashFilterFactoryTests.java index aff05dbc4d3a3..020b78a50b213 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MinHashFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MinHashFilterFactoryTests.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertStreamHasNumberOfTokens; + public class MinHashFilterFactoryTests extends ESTokenStreamTestCase { public void testDefault() throws IOException { int default_hash_count = 1; diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java index 7436263f8df9e..eb9032061d134 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java @@ -26,6 +26,8 @@ import java.io.IOException; import java.util.Collections; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; + public class MultiplexerTokenFilterTests extends ESTokenStreamTestCase { public void testMultiplexingFilter() throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenFilterFactoryTests.java index 4b0232ed95e0e..ef02f91c30a40 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenFilterFactoryTests.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class NGramTokenFilterFactoryTests extends ESTokenStreamTestCase { public void testDefault() throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java index 9c4286d40db77..8c365a1362f85 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java @@ -28,7 +28,7 @@ import java.io.StringReader; import java.util.Arrays; -import static com.carrotsearch.randomizedtesting.RandomizedTest.scaledRandomIntBetween; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.instanceOf; public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase { @@ -183,6 +183,9 @@ public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception { assertThat(edgeNGramTokenFilter, instanceOf(EdgeNGramTokenFilter.class)); } } + assertWarnings( + "The [side] parameter is deprecated and will be removed. Use a [reverse] before and after the [edge_ngram] instead." + ); } /*` diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java index b36bb18529109..5121c6390ceb0 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class PathHierarchyTokenizerFactoryTests extends ESTokenStreamTestCase { public void testDefaults() throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternAnalyzerTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternAnalyzerTests.java index 6c13c4eac4ab7..91fd74bca9c93 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternAnalyzerTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternAnalyzerTests.java @@ -18,6 +18,9 @@ import java.util.Arrays; import java.util.regex.Pattern; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.checkRandomData; + /** * Verifies the behavior of PatternAnalyzer. */ diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternCaptureTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternCaptureTokenFilterTests.java index 4ac4b44d8ffdd..a9e9cd6a51aa5 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternCaptureTokenFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternCaptureTokenFilterTests.java @@ -19,6 +19,7 @@ import org.elasticsearch.test.ESTokenStreamTestCase; import org.elasticsearch.test.IndexSettingsModule; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.elasticsearch.test.ESTestCase.createTestAnalysis; import static org.hamcrest.Matchers.containsString; diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterTests.java index 48434461fc151..91637f1cb9449 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterTests.java @@ -19,6 +19,8 @@ import java.io.IOException; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class PatternReplaceTokenFilterTests extends ESTokenStreamTestCase { public void testNormalizer() throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PredicateTokenScriptFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PredicateTokenScriptFilterTests.java index ae8c17decb3b7..40ba9acbc257a 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PredicateTokenScriptFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PredicateTokenScriptFilterTests.java @@ -37,6 +37,7 @@ import java.io.IOException; import java.util.Collections; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/RemoveDuplicatesFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/RemoveDuplicatesFilterFactoryTests.java index df0c0aa6e7df6..bb37b9bb7f4ef 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/RemoveDuplicatesFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/RemoveDuplicatesFilterFactoryTests.java @@ -20,6 +20,7 @@ import java.io.IOException; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.instanceOf; public class RemoveDuplicatesFilterFactoryTests extends ESTokenStreamTestCase { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterTests.java index 2a480f7cb4a75..fb5eee96acffb 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterTests.java @@ -36,6 +36,7 @@ import java.util.Collections; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/SnowballAnalyzerTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/SnowballAnalyzerTests.java index 9153b5d9b3819..8fd8b86047488 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/SnowballAnalyzerTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/SnowballAnalyzerTests.java @@ -13,6 +13,8 @@ import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.elasticsearch.test.ESTokenStreamTestCase; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; + public class SnowballAnalyzerTests extends ESTokenStreamTestCase { public void testEnglish() throws Exception { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerOverrideTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerOverrideTokenFilterFactoryTests.java index 2266d554fcba6..bbe22ff43d52e 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerOverrideTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerOverrideTokenFilterFactoryTests.java @@ -25,6 +25,8 @@ import java.util.List; import java.util.Locale; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class StemmerOverrideTokenFilterFactoryTests extends ESTokenStreamTestCase { @Rule public ExpectedException expectedException = ExpectedException.none(); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java index fe6a0487671a3..40d40bd66abd2 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.analysis.AnalysisTestsHelper; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -28,7 +29,7 @@ import java.io.IOException; import java.io.StringReader; -import static com.carrotsearch.randomizedtesting.RandomizedTest.scaledRandomIntBetween; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_VERSION_CREATED; import static org.hamcrest.Matchers.instanceOf; @@ -106,15 +107,38 @@ public void testMultipleLanguagesThrowsException() throws IOException { } public void testGermanVsGerman2Stemmer() throws IOException { - Analyzer analyzer = createGermanStemmer("german"); - assertAnalyzesTo(analyzer, "Buecher Bücher", new String[] { "Buech", "Buch" }); - - analyzer = createGermanStemmer("german2"); - assertAnalyzesTo(analyzer, "Buecher Bücher", new String[] { "Buch", "Buch"}); + { + IndexVersion v = IndexVersionUtils.randomVersionBetween( + random(), + IndexVersionUtils.getFirstVersion(), + IndexVersionUtils.getPreviousVersion(IndexVersions.UPGRADE_TO_LUCENE_10_0_0) + ); + Analyzer analyzer = createGermanStemmer("german", v); + assertAnalyzesTo(analyzer, "Buecher Bücher", new String[] { "Buech", "Buch" }); + + analyzer = createGermanStemmer("german2", v); + assertAnalyzesTo(analyzer, "Buecher Bücher", new String[] { "Buch", "Buch" }); + } + { + IndexVersion v = IndexVersionUtils.randomVersionBetween( + random(), + IndexVersions.UPGRADE_TO_LUCENE_10_0_0, + IndexVersion.current() + ); + Analyzer analyzer = createGermanStemmer("german", v); + assertAnalyzesTo(analyzer, "Buecher Bücher", new String[] { "Buch", "Buch" }); + + analyzer = createGermanStemmer("german2", v); + assertAnalyzesTo(analyzer, "Buecher Bücher", new String[] { "Buch", "Buch" }); + assertWarnings( + "The 'german2' stemmer has been deprecated and folged into the 'german' Stemmer. " + + "Replace all usages of 'german2' with 'german'." + ); + } } - public Analyzer createGermanStemmer(String variant) throws IOException { - IndexVersion v = IndexVersionUtils.randomVersion(random()); + public Analyzer createGermanStemmer(String variant, IndexVersion v) throws IOException { + Settings settings = Settings.builder() .put("index.analysis.filter.my_german.type", "stemmer") .put("index.analysis.filter.my_german.language", variant) diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StopAnalyzerTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StopAnalyzerTests.java index db25d6a0f1845..d72e68fcbab14 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StopAnalyzerTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StopAnalyzerTests.java @@ -19,6 +19,7 @@ import org.elasticsearch.test.ESTokenStreamTestCase; import org.elasticsearch.test.IndexSettingsModule; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.elasticsearch.test.ESTestCase.createTestAnalysis; public class StopAnalyzerTests extends ESTokenStreamTestCase { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/TrimTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/TrimTokenFilterTests.java index f3816f43d2b2b..63e9732f99a8a 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/TrimTokenFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/TrimTokenFilterTests.java @@ -19,6 +19,8 @@ import java.io.IOException; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class TrimTokenFilterTests extends ESTokenStreamTestCase { public void testNormalizer() throws IOException { diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterGraphTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterGraphTokenFilterFactoryTests.java index 4a060ab11e2bd..4995fe844c9c5 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterGraphTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterGraphTokenFilterFactoryTests.java @@ -30,6 +30,9 @@ import java.io.StringReader; import java.util.Collections; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class WordDelimiterGraphTokenFilterFactoryTests extends BaseWordDelimiterTokenFilterFactoryTestCase { public WordDelimiterGraphTokenFilterFactoryTests() { super("word_delimiter_graph"); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterTokenFilterFactoryTests.java index 2644303991b8d..636174f5c79cc 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterTokenFilterFactoryTests.java @@ -19,6 +19,8 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; + public class WordDelimiterTokenFilterFactoryTests extends BaseWordDelimiterTokenFilterFactoryTestCase { public WordDelimiterTokenFilterFactoryTests() { super("word_delimiter"); diff --git a/plugins/analysis-nori/src/test/java/org/elasticsearch/plugin/analysis/nori/NoriAnalysisTests.java b/plugins/analysis-nori/src/test/java/org/elasticsearch/plugin/analysis/nori/NoriAnalysisTests.java index 1709d02263eea..6efcdb7594fa4 100644 --- a/plugins/analysis-nori/src/test/java/org/elasticsearch/plugin/analysis/nori/NoriAnalysisTests.java +++ b/plugins/analysis-nori/src/test/java/org/elasticsearch/plugin/analysis/nori/NoriAnalysisTests.java @@ -33,6 +33,7 @@ import java.nio.file.Files; import java.nio.file.Path; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.instanceOf; diff --git a/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java b/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java index d2aa11f9f3866..13c58fed1c5ad 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java @@ -27,6 +27,7 @@ import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; public class CustomNormalizerTests extends ESTokenStreamTestCase { private static final AnalysisPlugin MOCK_ANALYSIS_PLUGIN = new MockAnalysisPlugin(); diff --git a/server/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java b/server/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java index 3cfdbdcdf37da..0aa7652e5a5f6 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactoryTests.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.io.StringReader; +import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.instanceOf; @ThreadLeakScope(Scope.NONE) diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTokenStreamTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTokenStreamTestCase.java index a1ce19f820433..402ab55549609 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTokenStreamTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTokenStreamTestCase.java @@ -12,7 +12,6 @@ import com.carrotsearch.randomizedtesting.annotations.Listeners; import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; -import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TimeUnits; import org.elasticsearch.bootstrap.BootstrapForTesting; @@ -29,7 +28,7 @@ * Basic test case for token streams. the assertion methods in this class will * run basic checks to enforce correct behavior of the token streams. */ -public abstract class ESTokenStreamTestCase extends BaseTokenStreamTestCase { +public abstract class ESTokenStreamTestCase extends ESTestCase { static { try {