Skip to content

Commit

Permalink
More doc and test
Browse files Browse the repository at this point in the history
  • Loading branch information
zhaih committed Nov 9, 2023
1 parent 61f92fd commit 75eb903
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 2 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ New Features

* LUCENE-10010 Introduce NFARunAutomaton to run NFA directly. (Patrick Zhai)

* GITHUB-12767: Add a flag to enable executing using NFA in RegexpQuery. (Patrick Zhai)

* LUCENE-10626 Hunspell: add tools to aid dictionary editing:
analysis introspection, stem expansion and stem/flag suggestion (Peter Gromov)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,10 @@ public RegexpQuery(
* @param doDeterminization whether do determinization to force the query to use DFA as
* runAutomaton, if false, the query will not try to determinize the generated automaton from
* regexp such that it might or might not be a DFA. In case it is an NFA, the query will
* eventually use {@link org.apache.lucene.util.automaton.NFARunAutomaton} to execute.
* eventually use {@link org.apache.lucene.util.automaton.NFARunAutomaton} to execute. Notice
* that {@link org.apache.lucene.util.automaton.NFARunAutomaton} is not thread-safe, so better
* to avoid rewritten method like {@link #CONSTANT_SCORE_BLENDED_REWRITE} when searcher is
* configured with an executor service
*/
public RegexpQuery(
Term term,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@

/**
* A RunAutomaton that does not require DFA. It will lazily determinize on-demand, memorizing the
* generated DFA states that has been explored
* generated DFA states that has been explored. Note: the current implementation is NOT thread-safe
*
* <p>implemented based on: https://swtch.com/~rsc/regexp/regexp1.html
*
* @lucene.internal
*/
public class NFARunAutomaton implements ByteRunnable, TransitionAccessor {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
public class TestRegexpRandom2 extends LuceneTestCase {
protected IndexSearcher searcher1;
protected IndexSearcher searcher2;
protected IndexSearcher searcher3;
private IndexReader reader;
private Directory dir;
protected String fieldName;
Expand Down Expand Up @@ -95,6 +96,7 @@ public void setUp() throws Exception {
reader = writer.getReader();
searcher1 = newSearcher(reader);
searcher2 = newSearcher(reader);
searcher3 = newSearcher(reader);
writer.close();
}

Expand Down Expand Up @@ -172,11 +174,22 @@ public void testRegexps() throws Exception {
/** check that the # of hits is the same as from a very simple regexpquery implementation. */
protected void assertSame(String regexp) throws IOException {
RegexpQuery smart = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
RegexpQuery nfaQuery =
new RegexpQuery(
new Term(fieldName, regexp),
RegExp.NONE,
0,
RegexpQuery.DEFAULT_PROVIDER,
0,
MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE,
false);
DumbRegexpQuery dumb = new DumbRegexpQuery(new Term(fieldName, regexp), RegExp.NONE);

TopDocs smartDocs = searcher1.search(smart, 25);
TopDocs dumbDocs = searcher2.search(dumb, 25);
TopDocs nfaDocs = searcher3.search(nfaQuery, 25);

CheckHits.checkEqual(smart, smartDocs.scoreDocs, dumbDocs.scoreDocs);
CheckHits.checkEqual(nfaQuery, nfaDocs.scoreDocs, dumbDocs.scoreDocs);
}
}

0 comments on commit 75eb903

Please sign in to comment.