Skip to content

Commit

Permalink
Ensure #finish is called on all drill-sideways FacetCollectors even w…
Browse files Browse the repository at this point in the history
…hen no hits are scored (apache#12853)
  • Loading branch information
gsmiller authored Dec 8, 2023
1 parent fb269c9 commit a9b5ef4
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 77 deletions.
4 changes: 3 additions & 1 deletion lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,11 @@ Optimizations

Bug Fixes
---------------------

* GITHUB#12866: Prevent extra similarity computation for single-level HNSW graphs. (Kaival Parikh)

* GITHUB#12558: Ensure #finish is called on all drill-sideways FacetsCollectors even when no hits are scored.
(Greg Miller)

Other
---------------------

Expand Down
15 changes: 13 additions & 2 deletions lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,14 @@ protected FacetsCollectorManager createDrillDownFacetsCollectorManager() {
return new FacetsCollectorManager();
}

/**
* Subclass can override to customize drill sideways facets collector. This should not return
* {@code null} as we assume drill sideways is being used to collect "sideways" hits:
*/
protected FacetsCollectorManager createDrillSidewaysFacetsCollectorManager() {
return new FacetsCollectorManager();
}

/** Subclass can override to customize per-dim Facets impl. */
protected Facets buildFacetsResult(
FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims)
Expand Down Expand Up @@ -397,7 +405,7 @@ private <R> ConcurrentDrillSidewaysResult<R> searchSequentially(
FacetsCollectorManager[] drillSidewaysFacetsCollectorManagers =
new FacetsCollectorManager[numDims];
for (int i = 0; i < numDims; i++) {
drillSidewaysFacetsCollectorManagers[i] = new FacetsCollectorManager();
drillSidewaysFacetsCollectorManagers[i] = createDrillSidewaysFacetsCollectorManager();
}

DrillSidewaysQuery dsq =
Expand Down Expand Up @@ -467,7 +475,10 @@ private <R> ConcurrentDrillSidewaysResult<R> searchConcurrently(
for (String dim : drillDownDims.keySet())
callableCollectors.add(
new CallableCollector(
i++, searcher, getDrillDownQuery(query, filters, dim), new FacetsCollectorManager()));
i++,
searcher,
getDrillDownQuery(query, filters, dim),
createDrillSidewaysFacetsCollectorManager()));

final FacetsCollector mainFacetsCollector;
final FacetsCollector[] facetsCollectors = new FacetsCollector[drillDownDims.size()];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.facet.DrillSidewaysScorer.DocsAndCost;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
Expand Down Expand Up @@ -175,6 +175,17 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {

int drillDownCount = drillDowns.length;

FacetsCollector drillDownCollector;
LeafCollector drillDownLeafCollector;
if (drillDownCollectorManager != null) {
drillDownCollector = drillDownCollectorManager.newCollector();
managedDrillDownCollectors.add(drillDownCollector);
drillDownLeafCollector = drillDownCollector.getLeafCollector(context);
} else {
drillDownCollector = null;
drillDownLeafCollector = null;
}

FacetsCollector[] sidewaysCollectors = new FacetsCollector[drillDownCount];
managedDrillSidewaysCollectors.add(sidewaysCollectors);

Expand All @@ -193,42 +204,29 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
FacetsCollector sidewaysCollector = drillSidewaysCollectorManagers[dim].newCollector();
sidewaysCollectors[dim] = sidewaysCollector;

dims[dim] = new DrillSidewaysScorer.DocsAndCost(scorer, sidewaysCollector);
dims[dim] =
new DrillSidewaysScorer.DocsAndCost(
scorer, sidewaysCollector.getLeafCollector(context));
}

// If more than one dim has no matches, then there
// are no hits nor drill-sideways counts. Or, if we
// have only one dim and that dim has no matches,
// same thing.
// if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) {
if (nullCount > 1) {
// If baseScorer is null or the dim nullCount > 1, then we have nothing to score. We return
// a null scorer in this case, but we need to make sure #finish gets called on all facet
// collectors since IndexSearcher won't handle this for us:
if (baseScorer == null || nullCount > 1) {
if (drillDownCollector != null) {
drillDownCollector.finish();
}
for (FacetsCollector fc : sidewaysCollectors) {
fc.finish();
}
return null;
}

// Sort drill-downs by most restrictive first:
Arrays.sort(
dims,
new Comparator<DrillSidewaysScorer.DocsAndCost>() {
@Override
public int compare(DocsAndCost o1, DocsAndCost o2) {
return Long.compare(o1.approximation.cost(), o2.approximation.cost());
}
});

if (baseScorer == null) {
return null;
}

FacetsCollector drillDownCollector;
if (drillDownCollectorManager != null) {
drillDownCollector = drillDownCollectorManager.newCollector();
managedDrillDownCollectors.add(drillDownCollector);
} else {
drillDownCollector = null;
}
Arrays.sort(dims, Comparator.comparingLong(o -> o.approximation.cost()));

return new DrillSidewaysScorer(
context, baseScorer, drillDownCollector, dims, scoreSubDocsAtOnce);
context, baseScorer, drillDownLeafCollector, dims, scoreSubDocsAtOnce);
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import java.util.List;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Scorable;
Expand All @@ -46,8 +45,7 @@ class DrillSidewaysScorer extends BulkScorer {

// private static boolean DEBUG = false;

private final Collector drillDownCollector;
private LeafCollector drillDownLeafCollector;
private final LeafCollector drillDownLeafCollector;

private final DocsAndCost[] dims;

Expand All @@ -70,7 +68,7 @@ class DrillSidewaysScorer extends BulkScorer {
DrillSidewaysScorer(
LeafReaderContext context,
Scorer baseScorer,
Collector drillDownCollector,
LeafCollector drillDownLeafCollector,
DocsAndCost[] dims,
boolean scoreSubDocsAtOnce) {
this.dims = dims;
Expand All @@ -83,7 +81,7 @@ class DrillSidewaysScorer extends BulkScorer {
} else {
this.baseApproximation = baseIterator;
}
this.drillDownCollector = drillDownCollector;
this.drillDownLeafCollector = drillDownLeafCollector;
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
}

Expand All @@ -101,18 +99,6 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int maxDoc)
if (maxDoc != Integer.MAX_VALUE) {
throw new IllegalArgumentException("maxDoc must be Integer.MAX_VALUE");
}
// if (DEBUG) {
// System.out.println("\nscore: reader=" + context.reader());
// }
// System.out.println("score r=" + context.reader());
if (drillDownCollector != null) {
drillDownLeafCollector = drillDownCollector.getLeafCollector(context);
} else {
drillDownLeafCollector = null;
}
for (DocsAndCost dim : dims) {
dim.sidewaysLeafCollector = dim.sidewaysCollector.getLeafCollector(context);
}

// some scorers, eg ReqExlScorer, can hit NPE if cost is called after nextDoc
long baseQueryCost = baseIterator.cost();
Expand Down Expand Up @@ -723,7 +709,7 @@ private void collectHit(LeafCollector collector, DocsAndCost[] dims) throws IOEx
// }

collector.collect(collectDocID);
if (drillDownCollector != null) {
if (drillDownLeafCollector != null) {
drillDownLeafCollector.collect(collectDocID);
}

Expand All @@ -739,7 +725,7 @@ private void collectHit(LeafCollector collector, DocsAndCost[] dims) throws IOEx

private void collectHit(LeafCollector collector, DocsAndCost dim) throws IOException {
collector.collect(collectDocID);
if (drillDownCollector != null) {
if (drillDownLeafCollector != null) {
drillDownLeafCollector.collect(collectDocID);
}

Expand All @@ -749,7 +735,7 @@ private void collectHit(LeafCollector collector, DocsAndCost dim) throws IOExcep

private void collectHit(LeafCollector collector, List<DocsAndCost> dims) throws IOException {
collector.collect(collectDocID);
if (drillDownCollector != null) {
if (drillDownLeafCollector != null) {
drillDownLeafCollector.collect(collectDocID);
}

Expand Down Expand Up @@ -808,10 +794,9 @@ static class DocsAndCost {
// two-phase confirmation, or null if the approximation is accurate
final TwoPhaseIterator twoPhase;
final float matchCost;
final Collector sidewaysCollector;
LeafCollector sidewaysLeafCollector;
final LeafCollector sidewaysLeafCollector;

DocsAndCost(Scorer scorer, Collector sidewaysCollector) {
DocsAndCost(Scorer scorer, LeafCollector sidewaysLeafCollector) {
final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
if (twoPhase == null) {
this.approximation = scorer.iterator();
Expand All @@ -823,7 +808,7 @@ static class DocsAndCost {
this.matchCost = twoPhase.matchCost();
}
this.cost = approximation.cost();
this.sidewaysCollector = sidewaysCollector;
this.sidewaysLeafCollector = sidewaysLeafCollector;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,14 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException {

@Override
public void finish() throws IOException {
matchingDocs.add(new MatchingDocs(this.context, docsBuilder.build(), totalHits, scores));
docsBuilder = null;
DocIdSet bits;
if (docsBuilder != null) {
bits = docsBuilder.build();
docsBuilder = null;
} else {
bits = DocIdSet.EMPTY;
}
matchingDocs.add(new MatchingDocs(this.context, bits, totalHits, scores));
scores = null;
context = null;
}
Expand Down
Loading

0 comments on commit a9b5ef4

Please sign in to comment.