Skip to content

Commit

Permalink
Add tests for "german" and "german2" snowball stemming
Browse files Browse the repository at this point in the history
  • Loading branch information
cbuescher committed Sep 26, 2024
1 parent 05b4b6e commit e16bd8e
Showing 1 changed file with 32 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
*/
package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
Expand Down Expand Up @@ -103,4 +104,35 @@ public void testMultipleLanguagesThrowsException() throws IOException {
);
assertEquals("Invalid stemmer class specified: [english, light_english]", e.getMessage());
}

public void testGermanVsGerman2Stemmer() throws IOException {
Analyzer analyzer = createGermanStemmer("german");
assertAnalyzesTo(analyzer, "Buecher Bücher", new String[] { "Buech", "Buch" });

analyzer = createGermanStemmer("german2");
assertAnalyzesTo(analyzer, "Buecher Bücher", new String[] { "Buch", "Buch"});
}

public Analyzer createGermanStemmer(String variant) throws IOException {
IndexVersion v = IndexVersionUtils.randomVersion(random());
Settings settings = Settings.builder()
.put("index.analysis.filter.my_german.type", "stemmer")
.put("index.analysis.filter.my_german.language", variant)
.put("index.analysis.analyzer.my_german.tokenizer", "whitespace")
.put("index.analysis.analyzer.my_german.filter", "my_german")
.put(SETTING_VERSION_CREATED, v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();

ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_german");
assertThat(tokenFilter, instanceOf(StemmerTokenFilterFactory.class));
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader("Buecher oder Bücher"));
TokenStream create = tokenFilter.create(tokenizer);
assertThat(create, instanceOf(SnowballFilter.class));
IndexAnalyzers indexAnalyzers = analysis.indexAnalyzers;
NamedAnalyzer analyzer = indexAnalyzers.get("my_german");
return analyzer;
}
}

0 comments on commit e16bd8e

Please sign in to comment.