From 0c1ef66c0493a392d65eece8ae97efae745267bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoph=20B=C3=BCscher?= <christophbuescher@posteo.de>
Date: Tue, 1 Oct 2024 16:04:09 +0200
Subject: [PATCH] Add bwc layer for 'romanian' analyzer

The 'romanian' language analyzer has been improved in Lucene 10 in two important
ways. First, the snowball stemmer has been modified to work with s-comma and t-comma characters
but only with their cedilla forms used when Romanian didn't have full Unicode
support (https://github.com/snowballstem/snowball/pull/177). Second, the
analyzer now contains a normalization step to map cedilla forms to forms with comma.

In order to maintain backwards compatibility with existing indices, this change
moves the Lucene 9 stemmer over to the analysis module was a deprecated variant
and creates the analyzer for existing indices with the "old" stemmer and without
the normalization step. New indices automatically run with the improved
behaviour.
---
 .../common/LegacyRomanianStemmer.java         | 744 ++++++++++++++++++
 .../common/RomanianAnalyzerProvider.java      |  46 +-
 .../common/RomanianAnalyzerTests.java         |  80 ++
 .../upgrades/FullClusterRestartIT.java        | 106 +++
 4 files changed, 969 insertions(+), 7 deletions(-)
 create mode 100644 modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LegacyRomanianStemmer.java
 create mode 100644 modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/RomanianAnalyzerTests.java

diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LegacyRomanianStemmer.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LegacyRomanianStemmer.java
new file mode 100644
index 0000000000000..944fc1dacd880
--- /dev/null
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LegacyRomanianStemmer.java
@@ -0,0 +1,744 @@
+/*
+ * @notice
+ * Generated by Snowball 2.0.0 - https://snowballstem.org/
+ *
+ * Modifications copyright (C) 2024 Elasticsearch B.V.
+ */
+
+package org.elasticsearch.analysis.common;
+
+import org.tartarus.snowball.Among;
+
+/**
+* This class implements the stemming algorithm defined by a snowball script.
+* NOTE: This is the RomanianStemmer used in Lucene 9
+*
+* @deprecated this class exists for backwards compatibility of indices created with Lucene 9
+*/
+@Deprecated
+@SuppressWarnings("checkstyle:DescendantToken")
+public class LegacyRomanianStemmer extends org.tartarus.snowball.SnowballStemmer {
+
+    private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
+
+    private static final Among a_0[] = { new Among("", -1, 3), new Among("I", 0, 1), new Among("U", 0, 2) };
+
+    private static final Among a_1[] = {
+        new Among("ea", -1, 3),
+        new Among("a\u0163ia", -1, 7),
+        new Among("aua", -1, 2),
+        new Among("iua", -1, 4),
+        new Among("a\u0163ie", -1, 7),
+        new Among("ele", -1, 3),
+        new Among("ile", -1, 5),
+        new Among("iile", 6, 4),
+        new Among("iei", -1, 4),
+        new Among("atei", -1, 6),
+        new Among("ii", -1, 4),
+        new Among("ului", -1, 1),
+        new Among("ul", -1, 1),
+        new Among("elor", -1, 3),
+        new Among("ilor", -1, 4),
+        new Among("iilor", 14, 4) };
+
+    private static final Among a_2[] = {
+        new Among("icala", -1, 4),
+        new Among("iciva", -1, 4),
+        new Among("ativa", -1, 5),
+        new Among("itiva", -1, 6),
+        new Among("icale", -1, 4),
+        new Among("a\u0163iune", -1, 5),
+        new Among("i\u0163iune", -1, 6),
+        new Among("atoare", -1, 5),
+        new Among("itoare", -1, 6),
+        new Among("\u0103toare", -1, 5),
+        new Among("icitate", -1, 4),
+        new Among("abilitate", -1, 1),
+        new Among("ibilitate", -1, 2),
+        new Among("ivitate", -1, 3),
+        new Among("icive", -1, 4),
+        new Among("ative", -1, 5),
+        new Among("itive", -1, 6),
+        new Among("icali", -1, 4),
+        new Among("atori", -1, 5),
+        new Among("icatori", 18, 4),
+        new Among("itori", -1, 6),
+        new Among("\u0103tori", -1, 5),
+        new Among("icitati", -1, 4),
+        new Among("abilitati", -1, 1),
+        new Among("ivitati", -1, 3),
+        new Among("icivi", -1, 4),
+        new Among("ativi", -1, 5),
+        new Among("itivi", -1, 6),
+        new Among("icit\u0103i", -1, 4),
+        new Among("abilit\u0103i", -1, 1),
+        new Among("ivit\u0103i", -1, 3),
+        new Among("icit\u0103\u0163i", -1, 4),
+        new Among("abilit\u0103\u0163i", -1, 1),
+        new Among("ivit\u0103\u0163i", -1, 3),
+        new Among("ical", -1, 4),
+        new Among("ator", -1, 5),
+        new Among("icator", 35, 4),
+        new Among("itor", -1, 6),
+        new Among("\u0103tor", -1, 5),
+        new Among("iciv", -1, 4),
+        new Among("ativ", -1, 5),
+        new Among("itiv", -1, 6),
+        new Among("ical\u0103", -1, 4),
+        new Among("iciv\u0103", -1, 4),
+        new Among("ativ\u0103", -1, 5),
+        new Among("itiv\u0103", -1, 6) };
+
+    private static final Among a_3[] = {
+        new Among("ica", -1, 1),
+        new Among("abila", -1, 1),
+        new Among("ibila", -1, 1),
+        new Among("oasa", -1, 1),
+        new Among("ata", -1, 1),
+        new Among("ita", -1, 1),
+        new Among("anta", -1, 1),
+        new Among("ista", -1, 3),
+        new Among("uta", -1, 1),
+        new Among("iva", -1, 1),
+        new Among("ic", -1, 1),
+        new Among("ice", -1, 1),
+        new Among("abile", -1, 1),
+        new Among("ibile", -1, 1),
+        new Among("isme", -1, 3),
+        new Among("iune", -1, 2),
+        new Among("oase", -1, 1),
+        new Among("ate", -1, 1),
+        new Among("itate", 17, 1),
+        new Among("ite", -1, 1),
+        new Among("ante", -1, 1),
+        new Among("iste", -1, 3),
+        new Among("ute", -1, 1),
+        new Among("ive", -1, 1),
+        new Among("ici", -1, 1),
+        new Among("abili", -1, 1),
+        new Among("ibili", -1, 1),
+        new Among("iuni", -1, 2),
+        new Among("atori", -1, 1),
+        new Among("osi", -1, 1),
+        new Among("ati", -1, 1),
+        new Among("itati", 30, 1),
+        new Among("iti", -1, 1),
+        new Among("anti", -1, 1),
+        new Among("isti", -1, 3),
+        new Among("uti", -1, 1),
+        new Among("i\u015Fti", -1, 3),
+        new Among("ivi", -1, 1),
+        new Among("it\u0103i", -1, 1),
+        new Among("o\u015Fi", -1, 1),
+        new Among("it\u0103\u0163i", -1, 1),
+        new Among("abil", -1, 1),
+        new Among("ibil", -1, 1),
+        new Among("ism", -1, 3),
+        new Among("ator", -1, 1),
+        new Among("os", -1, 1),
+        new Among("at", -1, 1),
+        new Among("it", -1, 1),
+        new Among("ant", -1, 1),
+        new Among("ist", -1, 3),
+        new Among("ut", -1, 1),
+        new Among("iv", -1, 1),
+        new Among("ic\u0103", -1, 1),
+        new Among("abil\u0103", -1, 1),
+        new Among("ibil\u0103", -1, 1),
+        new Among("oas\u0103", -1, 1),
+        new Among("at\u0103", -1, 1),
+        new Among("it\u0103", -1, 1),
+        new Among("ant\u0103", -1, 1),
+        new Among("ist\u0103", -1, 3),
+        new Among("ut\u0103", -1, 1),
+        new Among("iv\u0103", -1, 1) };
+
+    private static final Among a_4[] = {
+        new Among("ea", -1, 1),
+        new Among("ia", -1, 1),
+        new Among("esc", -1, 1),
+        new Among("\u0103sc", -1, 1),
+        new Among("ind", -1, 1),
+        new Among("\u00E2nd", -1, 1),
+        new Among("are", -1, 1),
+        new Among("ere", -1, 1),
+        new Among("ire", -1, 1),
+        new Among("\u00E2re", -1, 1),
+        new Among("se", -1, 2),
+        new Among("ase", 10, 1),
+        new Among("sese", 10, 2),
+        new Among("ise", 10, 1),
+        new Among("use", 10, 1),
+        new Among("\u00E2se", 10, 1),
+        new Among("e\u015Fte", -1, 1),
+        new Among("\u0103\u015Fte", -1, 1),
+        new Among("eze", -1, 1),
+        new Among("ai", -1, 1),
+        new Among("eai", 19, 1),
+        new Among("iai", 19, 1),
+        new Among("sei", -1, 2),
+        new Among("e\u015Fti", -1, 1),
+        new Among("\u0103\u015Fti", -1, 1),
+        new Among("ui", -1, 1),
+        new Among("ezi", -1, 1),
+        new Among("\u00E2i", -1, 1),
+        new Among("a\u015Fi", -1, 1),
+        new Among("se\u015Fi", -1, 2),
+        new Among("ase\u015Fi", 29, 1),
+        new Among("sese\u015Fi", 29, 2),
+        new Among("ise\u015Fi", 29, 1),
+        new Among("use\u015Fi", 29, 1),
+        new Among("\u00E2se\u015Fi", 29, 1),
+        new Among("i\u015Fi", -1, 1),
+        new Among("u\u015Fi", -1, 1),
+        new Among("\u00E2\u015Fi", -1, 1),
+        new Among("a\u0163i", -1, 2),
+        new Among("ea\u0163i", 38, 1),
+        new Among("ia\u0163i", 38, 1),
+        new Among("e\u0163i", -1, 2),
+        new Among("i\u0163i", -1, 2),
+        new Among("\u00E2\u0163i", -1, 2),
+        new Among("ar\u0103\u0163i", -1, 1),
+        new Among("ser\u0103\u0163i", -1, 2),
+        new Among("aser\u0103\u0163i", 45, 1),
+        new Among("seser\u0103\u0163i", 45, 2),
+        new Among("iser\u0103\u0163i", 45, 1),
+        new Among("user\u0103\u0163i", 45, 1),
+        new Among("\u00E2ser\u0103\u0163i", 45, 1),
+        new Among("ir\u0103\u0163i", -1, 1),
+        new Among("ur\u0103\u0163i", -1, 1),
+        new Among("\u00E2r\u0103\u0163i", -1, 1),
+        new Among("am", -1, 1),
+        new Among("eam", 54, 1),
+        new Among("iam", 54, 1),
+        new Among("em", -1, 2),
+        new Among("asem", 57, 1),
+        new Among("sesem", 57, 2),
+        new Among("isem", 57, 1),
+        new Among("usem", 57, 1),
+        new Among("\u00E2sem", 57, 1),
+        new Among("im", -1, 2),
+        new Among("\u00E2m", -1, 2),
+        new Among("\u0103m", -1, 2),
+        new Among("ar\u0103m", 65, 1),
+        new Among("ser\u0103m", 65, 2),
+        new Among("aser\u0103m", 67, 1),
+        new Among("seser\u0103m", 67, 2),
+        new Among("iser\u0103m", 67, 1),
+        new Among("user\u0103m", 67, 1),
+        new Among("\u00E2ser\u0103m", 67, 1),
+        new Among("ir\u0103m", 65, 1),
+        new Among("ur\u0103m", 65, 1),
+        new Among("\u00E2r\u0103m", 65, 1),
+        new Among("au", -1, 1),
+        new Among("eau", 76, 1),
+        new Among("iau", 76, 1),
+        new Among("indu", -1, 1),
+        new Among("\u00E2ndu", -1, 1),
+        new Among("ez", -1, 1),
+        new Among("easc\u0103", -1, 1),
+        new Among("ar\u0103", -1, 1),
+        new Among("ser\u0103", -1, 2),
+        new Among("aser\u0103", 84, 1),
+        new Among("seser\u0103", 84, 2),
+        new Among("iser\u0103", 84, 1),
+        new Among("user\u0103", 84, 1),
+        new Among("\u00E2ser\u0103", 84, 1),
+        new Among("ir\u0103", -1, 1),
+        new Among("ur\u0103", -1, 1),
+        new Among("\u00E2r\u0103", -1, 1),
+        new Among("eaz\u0103", -1, 1) };
+
+    private static final Among a_5[] = {
+        new Among("a", -1, 1),
+        new Among("e", -1, 1),
+        new Among("ie", 1, 1),
+        new Among("i", -1, 1),
+        new Among("\u0103", -1, 1) };
+
+    private static final char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 32, 0, 0, 4 };
+
+    private boolean B_standard_suffix_removed;
+    private int I_p2;
+    private int I_p1;
+    private int I_pV;
+
+    private boolean r_prelude() {
+        while (true) {
+            int v_1 = cursor;
+            lab0: {
+                golab1: while (true) {
+                    int v_2 = cursor;
+                    lab2: {
+                        if (!(in_grouping(g_v, 97, 259))) {
+                            break lab2;
+                        }
+                        bra = cursor;
+                        lab3: {
+                            int v_3 = cursor;
+                            lab4: {
+                                if (!(eq_s("u"))) {
+                                    break lab4;
+                                }
+                                ket = cursor;
+                                if (!(in_grouping(g_v, 97, 259))) {
+                                    break lab4;
+                                }
+                                slice_from("U");
+                                break lab3;
+                            }
+                            cursor = v_3;
+                            if (!(eq_s("i"))) {
+                                break lab2;
+                            }
+                            ket = cursor;
+                            if (!(in_grouping(g_v, 97, 259))) {
+                                break lab2;
+                            }
+                            slice_from("I");
+                        }
+                        cursor = v_2;
+                        break golab1;
+                    }
+                    cursor = v_2;
+                    if (cursor >= limit) {
+                        break lab0;
+                    }
+                    cursor++;
+                }
+                continue;
+            }
+            cursor = v_1;
+            break;
+        }
+        return true;
+    }
+
+    private boolean r_mark_regions() {
+        I_pV = limit;
+        I_p1 = limit;
+        I_p2 = limit;
+        int v_1 = cursor;
+        lab0: {
+            lab1: {
+                int v_2 = cursor;
+                lab2: {
+                    if (!(in_grouping(g_v, 97, 259))) {
+                        break lab2;
+                    }
+                    lab3: {
+                        int v_3 = cursor;
+                        lab4: {
+                            if (!(out_grouping(g_v, 97, 259))) {
+                                break lab4;
+                            }
+                            golab5: while (true) {
+                                lab6: {
+                                    if (!(in_grouping(g_v, 97, 259))) {
+                                        break lab6;
+                                    }
+                                    break golab5;
+                                }
+                                if (cursor >= limit) {
+                                    break lab4;
+                                }
+                                cursor++;
+                            }
+                            break lab3;
+                        }
+                        cursor = v_3;
+                        if (!(in_grouping(g_v, 97, 259))) {
+                            break lab2;
+                        }
+                        golab7: while (true) {
+                            lab8: {
+                                if (!(out_grouping(g_v, 97, 259))) {
+                                    break lab8;
+                                }
+                                break golab7;
+                            }
+                            if (cursor >= limit) {
+                                break lab2;
+                            }
+                            cursor++;
+                        }
+                    }
+                    break lab1;
+                }
+                cursor = v_2;
+                if (!(out_grouping(g_v, 97, 259))) {
+                    break lab0;
+                }
+                lab9: {
+                    int v_6 = cursor;
+                    lab10: {
+                        if (!(out_grouping(g_v, 97, 259))) {
+                            break lab10;
+                        }
+                        golab11: while (true) {
+                            lab12: {
+                                if (!(in_grouping(g_v, 97, 259))) {
+                                    break lab12;
+                                }
+                                break golab11;
+                            }
+                            if (cursor >= limit) {
+                                break lab10;
+                            }
+                            cursor++;
+                        }
+                        break lab9;
+                    }
+                    cursor = v_6;
+                    if (!(in_grouping(g_v, 97, 259))) {
+                        break lab0;
+                    }
+                    if (cursor >= limit) {
+                        break lab0;
+                    }
+                    cursor++;
+                }
+            }
+            I_pV = cursor;
+        }
+        cursor = v_1;
+        int v_8 = cursor;
+        lab13: {
+            golab14: while (true) {
+                lab15: {
+                    if (!(in_grouping(g_v, 97, 259))) {
+                        break lab15;
+                    }
+                    break golab14;
+                }
+                if (cursor >= limit) {
+                    break lab13;
+                }
+                cursor++;
+            }
+            golab16: while (true) {
+                lab17: {
+                    if (!(out_grouping(g_v, 97, 259))) {
+                        break lab17;
+                    }
+                    break golab16;
+                }
+                if (cursor >= limit) {
+                    break lab13;
+                }
+                cursor++;
+            }
+            I_p1 = cursor;
+            golab18: while (true) {
+                lab19: {
+                    if (!(in_grouping(g_v, 97, 259))) {
+                        break lab19;
+                    }
+                    break golab18;
+                }
+                if (cursor >= limit) {
+                    break lab13;
+                }
+                cursor++;
+            }
+            golab20: while (true) {
+                lab21: {
+                    if (!(out_grouping(g_v, 97, 259))) {
+                        break lab21;
+                    }
+                    break golab20;
+                }
+                if (cursor >= limit) {
+                    break lab13;
+                }
+                cursor++;
+            }
+            I_p2 = cursor;
+        }
+        cursor = v_8;
+        return true;
+    }
+
+    private boolean r_postlude() {
+        int among_var;
+        while (true) {
+            int v_1 = cursor;
+            lab0: {
+                bra = cursor;
+                among_var = find_among(a_0);
+                if (among_var == 0) {
+                    break lab0;
+                }
+                ket = cursor;
+                switch (among_var) {
+                    case 1:
+                        slice_from("i");
+                        break;
+                    case 2:
+                        slice_from("u");
+                        break;
+                    case 3:
+                        if (cursor >= limit) {
+                            break lab0;
+                        }
+                        cursor++;
+                        break;
+                }
+                continue;
+            }
+            cursor = v_1;
+            break;
+        }
+        return true;
+    }
+
+    private boolean r_RV() {
+        if (!(I_pV <= cursor)) {
+            return false;
+        }
+        return true;
+    }
+
+    private boolean r_R1() {
+        if (!(I_p1 <= cursor)) {
+            return false;
+        }
+        return true;
+    }
+
+    private boolean r_R2() {
+        if (!(I_p2 <= cursor)) {
+            return false;
+        }
+        return true;
+    }
+
+    private boolean r_step_0() {
+        int among_var;
+        ket = cursor;
+        among_var = find_among_b(a_1);
+        if (among_var == 0) {
+            return false;
+        }
+        bra = cursor;
+        if (!r_R1()) {
+            return false;
+        }
+        switch (among_var) {
+            case 1:
+                slice_del();
+                break;
+            case 2:
+                slice_from("a");
+                break;
+            case 3:
+                slice_from("e");
+                break;
+            case 4:
+                slice_from("i");
+                break;
+            case 5: {
+                int v_1 = limit - cursor;
+                lab0: {
+                    if (!(eq_s_b("ab"))) {
+                        break lab0;
+                    }
+                    return false;
+                }
+                cursor = limit - v_1;
+            }
+                slice_from("i");
+                break;
+            case 6:
+                slice_from("at");
+                break;
+            case 7:
+                slice_from("a\u0163i");
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_combo_suffix() {
+        int among_var;
+        int v_1 = limit - cursor;
+        ket = cursor;
+        among_var = find_among_b(a_2);
+        if (among_var == 0) {
+            return false;
+        }
+        bra = cursor;
+        if (!r_R1()) {
+            return false;
+        }
+        switch (among_var) {
+            case 1:
+                slice_from("abil");
+                break;
+            case 2:
+                slice_from("ibil");
+                break;
+            case 3:
+                slice_from("iv");
+                break;
+            case 4:
+                slice_from("ic");
+                break;
+            case 5:
+                slice_from("at");
+                break;
+            case 6:
+                slice_from("it");
+                break;
+        }
+        B_standard_suffix_removed = true;
+        cursor = limit - v_1;
+        return true;
+    }
+
+    private boolean r_standard_suffix() {
+        int among_var;
+        B_standard_suffix_removed = false;
+        while (true) {
+            int v_1 = limit - cursor;
+            lab0: {
+                if (!r_combo_suffix()) {
+                    break lab0;
+                }
+                continue;
+            }
+            cursor = limit - v_1;
+            break;
+        }
+        ket = cursor;
+        among_var = find_among_b(a_3);
+        if (among_var == 0) {
+            return false;
+        }
+        bra = cursor;
+        if (!r_R2()) {
+            return false;
+        }
+        switch (among_var) {
+            case 1:
+                slice_del();
+                break;
+            case 2:
+                if (!(eq_s_b("\u0163"))) {
+                    return false;
+                }
+                bra = cursor;
+                slice_from("t");
+                break;
+            case 3:
+                slice_from("ist");
+                break;
+        }
+        B_standard_suffix_removed = true;
+        return true;
+    }
+
+    private boolean r_verb_suffix() {
+        int among_var;
+        if (cursor < I_pV) {
+            return false;
+        }
+        int v_2 = limit_backward;
+        limit_backward = I_pV;
+        ket = cursor;
+        among_var = find_among_b(a_4);
+        if (among_var == 0) {
+            limit_backward = v_2;
+            return false;
+        }
+        bra = cursor;
+        switch (among_var) {
+            case 1:
+                lab0: {
+                    int v_3 = limit - cursor;
+                    lab1: {
+                        if (!(out_grouping_b(g_v, 97, 259))) {
+                            break lab1;
+                        }
+                        break lab0;
+                    }
+                    cursor = limit - v_3;
+                    if (!(eq_s_b("u"))) {
+                        limit_backward = v_2;
+                        return false;
+                    }
+                }
+                slice_del();
+                break;
+            case 2:
+                slice_del();
+                break;
+        }
+        limit_backward = v_2;
+        return true;
+    }
+
+    private boolean r_vowel_suffix() {
+        ket = cursor;
+        if (find_among_b(a_5) == 0) {
+            return false;
+        }
+        bra = cursor;
+        if (!r_RV()) {
+            return false;
+        }
+        slice_del();
+        return true;
+    }
+
+    @Override
+    public boolean stem() {
+        int v_1 = cursor;
+        r_prelude();
+        cursor = v_1;
+        r_mark_regions();
+        limit_backward = cursor;
+        cursor = limit;
+        int v_3 = limit - cursor;
+        r_step_0();
+        cursor = limit - v_3;
+        int v_4 = limit - cursor;
+        r_standard_suffix();
+        cursor = limit - v_4;
+        int v_5 = limit - cursor;
+        lab0: {
+            lab1: {
+                int v_6 = limit - cursor;
+                lab2: {
+                    if (!(B_standard_suffix_removed)) {
+                        break lab2;
+                    }
+                    break lab1;
+                }
+                cursor = limit - v_6;
+                if (!r_verb_suffix()) {
+                    break lab0;
+                }
+            }
+        }
+        cursor = limit - v_5;
+        int v_7 = limit - cursor;
+        r_vowel_suffix();
+        cursor = limit - v_7;
+        cursor = limit_backward;
+        int v_8 = cursor;
+        r_postlude();
+        cursor = v_8;
+        return true;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        return o instanceof LegacyRomanianStemmer;
+    }
+
+    @Override
+    public int hashCode() {
+        return LegacyRomanianStemmer.class.getName().hashCode();
+    }
+}
diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/RomanianAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/RomanianAnalyzerProvider.java
index cf33a38abd634..6c28df83a6d36 100644
--- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/RomanianAnalyzerProvider.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/RomanianAnalyzerProvider.java
@@ -9,28 +9,60 @@
 
 package org.elasticsearch.analysis.common;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.ro.RomanianAnalyzer;
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.IndexVersions;
 import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
 import org.elasticsearch.index.analysis.Analysis;
 
-public class RomanianAnalyzerProvider extends AbstractIndexAnalyzerProvider<RomanianAnalyzer> {
+public class RomanianAnalyzerProvider extends AbstractIndexAnalyzerProvider<StopwordAnalyzerBase> {
 
-    private final RomanianAnalyzer analyzer;
+    private final StopwordAnalyzerBase analyzer;
 
     RomanianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(name, settings);
-        analyzer = new RomanianAnalyzer(
-            Analysis.parseStopWords(env, settings, RomanianAnalyzer.getDefaultStopSet()),
-            Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
-        );
+        CharArraySet stopwords = Analysis.parseStopWords(env, settings, RomanianAnalyzer.getDefaultStopSet());
+        CharArraySet stemExclusionSet = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
+        if (indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.UPGRADE_TO_LUCENE_10_0_0)) {
+            // since Lucene 10, this analyzer a modern unicode form and normalizes cedilla forms to forms with commas
+            analyzer = new RomanianAnalyzer(stopwords, stemExclusionSet);
+        } else {
+            // for older index versions we need the old behaviour without normalization
+            analyzer = new StopwordAnalyzerBase(Analysis.parseStopWords(env, settings, RomanianAnalyzer.getDefaultStopSet())) {
+
+                protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
+                    final Tokenizer source = new StandardTokenizer();
+                    TokenStream result = new LowerCaseFilter(source);
+                    result = new StopFilter(result, stopwords);
+                    if (stemExclusionSet.isEmpty() == false) {
+                        result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+                    }
+                    result = new SnowballFilter(result, new LegacyRomanianStemmer());
+                    return new TokenStreamComponents(source, result);
+                }
+
+                protected TokenStream normalize(String fieldName, TokenStream in) {
+                    return new LowerCaseFilter(in);
+                }
+            };
+
+        }
     }
 
     @Override
-    public RomanianAnalyzer get() {
+    public StopwordAnalyzerBase get() {
         return this.analyzer;
     }
 }
diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/RomanianAnalyzerTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/RomanianAnalyzerTests.java
new file mode 100644
index 0000000000000..1af44bc71f35d
--- /dev/null
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/RomanianAnalyzerTests.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.analysis.common;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.IndexVersion;
+import org.elasticsearch.index.IndexVersions;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.test.ESTokenStreamTestCase;
+import org.elasticsearch.test.IndexSettingsModule;
+import org.elasticsearch.test.index.IndexVersionUtils;
+
+import java.io.IOException;
+
+import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo;
+
+/**
+ * Verifies the behavior of Romanian analyzer.
+ */
+public class RomanianAnalyzerTests extends ESTokenStreamTestCase {
+
+    public void testRomanianAnalyzerPostLucene10() throws IOException {
+        IndexVersion postLucene10Version = IndexVersionUtils.randomVersionBetween(
+            random(),
+            IndexVersions.UPGRADE_TO_LUCENE_10_0_0,
+            IndexVersion.current()
+        );
+        Settings settings = ESTestCase.indexSettings(1, 1)
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .put(IndexMetadata.SETTING_VERSION_CREATED, postLucene10Version)
+            .build();
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+        Environment environment = new Environment(settings, null);
+
+        RomanianAnalyzerProvider romanianAnalyzerProvider = new RomanianAnalyzerProvider(
+            idxSettings,
+            environment,
+            "my-analyzer",
+            Settings.EMPTY
+        );
+        Analyzer analyzer = romanianAnalyzerProvider.get();
+        assertAnalyzesTo(analyzer, "absenţa", new String[] { "absenț" });
+        assertAnalyzesTo(analyzer, "cunoştinţă", new String[] { "cunoștinț" });
+    }
+
+    public void testRomanianAnalyzerPreLucene10() throws IOException {
+        IndexVersion preLucene10Version = IndexVersionUtils.randomVersionBetween(
+            random(),
+            IndexVersionUtils.getFirstVersion(),
+            IndexVersionUtils.getPreviousVersion(IndexVersions.UPGRADE_TO_LUCENE_10_0_0)
+        );
+        Settings settings = ESTestCase.indexSettings(1, 1)
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .put(IndexMetadata.SETTING_VERSION_CREATED, preLucene10Version)
+            .build();
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+        Environment environment = new Environment(settings, null);
+
+        RomanianAnalyzerProvider romanianAnalyzerProvider = new RomanianAnalyzerProvider(
+            idxSettings,
+            environment,
+            "my-analyzer",
+            Settings.EMPTY
+        );
+        Analyzer analyzer = romanianAnalyzerProvider.get();
+        assertAnalyzesTo(analyzer, "absenţa", new String[] { "absenţ" });
+        assertAnalyzesTo(analyzer, "cunoştinţă", new String[] { "cunoştinţ" });
+    }
+}
diff --git a/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java b/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java
index ee18f8fc2ec4b..5bbf6d536f0f8 100644
--- a/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java
+++ b/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java
@@ -33,6 +33,7 @@
 import org.elasticsearch.index.IndexVersions;
 import org.elasticsearch.index.mapper.DateFieldMapper;
 import org.elasticsearch.rest.action.admin.indices.RestPutIndexTemplateAction;
+import org.elasticsearch.search.SearchFeatures;
 import org.elasticsearch.test.NotEqualMessageBuilder;
 import org.elasticsearch.test.XContentTestUtils;
 import org.elasticsearch.test.cluster.ElasticsearchCluster;
@@ -1726,6 +1727,111 @@ public void testSystemIndexMetadataIsUpgraded() throws Exception {
         }
     }
 
+    /**
+     * This test ensures that search results on old indices using "romanain" analyzer don't change
+     * after we introduce Lucene 10
+     */
+    public void testRomanianAnalyzerBWC() throws Exception {
+        var originalClusterLegacyRomanianAnalyzer = oldClusterHasFeature(SearchFeatures.LUCENE_10_0_0_UPGRADE) == false;
+        assumeTrue("Don't run this test if both versions already support stemming", originalClusterLegacyRomanianAnalyzer);
+        final String indexName = "test_romanian_stemmer";
+        Settings idxSettings = indexSettings(1, 1).build();
+        String cedillaForm = "absenţa";
+        String commaForm = "absența";
+
+        String mapping = """
+                {
+                  "properties": {
+                    "textfield" : {
+                      "type": "text",
+                      "analyzer": "romanian"
+                    }
+                  }
+                }
+            """;
+
+        // query that uses the cedilla form of "t"
+        String query = """
+                {
+                  "query": {
+                    "match": {
+                      "textfield": "absenţa"
+                    }
+                  }
+                }
+            """;
+
+        if (isRunningAgainstOldCluster()) {
+            createIndex(client(), indexName, idxSettings, mapping);
+            ensureGreen(indexName);
+
+            assertOK(
+                client().performRequest(
+                    newXContentRequest(
+                        HttpMethod.POST,
+                        "/" + indexName + "/" + "_doc/1",
+                        (builder, params) -> builder.field("textfield", cedillaForm)
+                    )
+                )
+            );
+            assertOK(
+                client().performRequest(
+                    newXContentRequest(
+                        HttpMethod.POST,
+                        "/" + indexName + "/" + "_doc/2",
+                        // this doc uses the comma form
+                        (builder, params) -> builder.field("textfield", commaForm)
+                    )
+                )
+            );
+            refresh(indexName);
+
+            assertNumHits(indexName, 2, 1);
+
+            Request searchRequest = new Request("POST", "/" + indexName + "/_search");
+            searchRequest.setJsonEntity(query);
+            assertTotalHits(1, entityAsMap(client().performRequest(searchRequest)));
+        } else {
+            // old index should still only return one doc
+            Request searchRequest = new Request("POST", "/" + indexName + "/_search");
+            searchRequest.setJsonEntity(query);
+            assertTotalHits(1, entityAsMap(client().performRequest(searchRequest)));
+
+            String newIndexName = indexName + "_new";
+            createIndex(client(), newIndexName, idxSettings, mapping);
+            ensureGreen(newIndexName);
+
+            assertOK(
+                client().performRequest(
+                    newXContentRequest(
+                        HttpMethod.POST,
+                        "/" + newIndexName + "/" + "_doc/1",
+                        (builder, params) -> builder.field("textfield", cedillaForm)
+                    )
+                )
+            );
+            assertOK(
+                client().performRequest(
+                    newXContentRequest(
+                        HttpMethod.POST,
+                        "/" + newIndexName + "/" + "_doc/2",
+                        (builder, params) -> builder.field("textfield", commaForm)
+                    )
+                )
+            );
+            refresh(newIndexName);
+
+            searchRequest = new Request("POST", "/" + newIndexName + "/_search");
+            searchRequest.setJsonEntity(query);
+            assertTotalHits(2, entityAsMap(client().performRequest(searchRequest)));
+
+            // searching both indices (old and new analysis version) we should get 1 hit from the old and 2 from the new index
+            searchRequest = new Request("POST", "/" + indexName + "," + newIndexName + "/_search");
+            searchRequest.setJsonEntity(query);
+            assertTotalHits(3, entityAsMap(client().performRequest(searchRequest)));
+        }
+    }
+
     /**
      * This test ensures that soft deletes are enabled a when upgrading a pre-8 cluster to 8.0+
      */