Skip to content

Commit

Permalink
Fix BitSetIterator to correctly honor the contract of `DocIdSetIter…
Browse files Browse the repository at this point in the history
…ator#intoBitSet`. (#14142)

`BitSetIterator#intoBitSet` would currently fail if `upTo - offset` exceeds the
length of the destination bit set. However, `DocIdSetIterator#intoBitSet` only
requires matching docs to be set into the bit set, so having `upTo - offset`
exceed the length of the dest bit set is legal as long as no bits are set
beyond `offset + bitSet.length()`.
  • Loading branch information
jpountz authored Jan 16, 2025
1 parent 5c91f15 commit 16da44a
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 5 deletions.
13 changes: 8 additions & 5 deletions lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,15 @@ public long cost() {

@Override
public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException {
upTo = Math.min(upTo, bits.length());
if (upTo > doc && bits instanceof FixedBitSet fixedBits) {
FixedBitSet.orRange(fixedBits, doc, bitSet, doc - offset, upTo - doc);
advance(upTo); // set the current doc
} else {
super.intoBitSet(upTo, bitSet, offset);
int actualUpto = Math.min(upTo, length);
// The destination bit set may be shorter than this bit set. This is only legal if all bits
// beyond offset + bitSet.length() are clear. If not, the below call to `super.intoBitSet`
// will throw an exception.
actualUpto = (int) Math.min(actualUpto, offset + (long) bitSet.length());
FixedBitSet.orRange(fixedBits, doc, bitSet, doc - offset, actualUpto - doc);
advance(actualUpto); // set the current doc
}
super.intoBitSet(upTo, bitSet, offset);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;

/** Base test class for {@link DocIdSet}s. */
public abstract class BaseDocIdSetTestCase<T extends DocIdSet> extends LuceneTestCase {
Expand Down Expand Up @@ -196,4 +197,71 @@ private long ramBytesUsed(DocIdSet set, int length) throws IOException {
long bytes2 = RamUsageTester.ramUsed(dummy);
return bytes1 - bytes2;
}

public void testIntoBitSet() throws IOException {
Random random = random();
final int numBits = TestUtil.nextInt(random, 100, 1 << 20);
// test various random sets with various load factors
for (float percentSet : new float[] {0f, 0.0001f, random.nextFloat(), 0.9f, 1f}) {
final BitSet set = randomSet(numBits, percentSet);
final T copy = copyOf(set, numBits);
int from = TestUtil.nextInt(random(), 0, numBits - 1);
int to = TestUtil.nextInt(random(), from, numBits + 5);
FixedBitSet actual = new FixedBitSet(to - from);
DocIdSetIterator it1 = copy.iterator();
if (it1 == null) {
continue;
}
int fromDoc = it1.advance(from);
// No docs to set
it1.intoBitSet(from, actual, from);
assertTrue(actual.scanIsEmpty());
assertEquals(fromDoc, it1.docID());

// Now actually set some bits
it1.intoBitSet(to, actual, from);
FixedBitSet expected = new FixedBitSet(to - from);
DocIdSetIterator it2 = copy.iterator();
for (int doc = it2.advance(from); doc < to; doc = it2.nextDoc()) {
expected.set(doc - from);
}
assertEquals(expected, actual);
// Check if docID() / nextDoc() return the same value after #intoBitSet has been called.
assertEquals(it2.docID(), it1.docID());
if (it2.docID() != DocIdSetIterator.NO_MORE_DOCS) {
assertEquals(it2.nextDoc(), it1.nextDoc());
}
}
}

public void testIntoBitSetBoundChecks() throws IOException {
final BitSet set = new BitSet();
set.set(20);
set.set(42);
final T copy = copyOf(set, 256);
int from = TestUtil.nextInt(random(), 0, 20);
int to = TestUtil.nextInt(random(), 43, 256);
int offset = TestUtil.nextInt(random(), 0, from);
FixedBitSet dest1 = new FixedBitSet(42 - offset + 1);
DocIdSetIterator it1 = copy.iterator();
it1.advance(from);
// This call is legal, since all "set" bits are in the range
it1.intoBitSet(to, dest1, offset);
for (int i = 0; i < dest1.length(); ++i) {
assertEquals(offset + i == 20 || offset + i == 42, dest1.get(i));
}

FixedBitSet dest2 = new FixedBitSet(42 - offset);
DocIdSetIterator it2 = copy.iterator();
it2.advance(from);
// This call is not legal, since there is one bit that is set beyond the end of the target bit
// set
expectThrows(Throwable.class, () -> it2.intoBitSet(to, dest2, offset));

FixedBitSet dest3 = new FixedBitSet(42 - offset + 1);
DocIdSetIterator it3 = copy.iterator();
it3.advance(from);
// This call is not legal, since offset is greater than the current doc
expectThrows(Throwable.class, () -> it3.intoBitSet(to, dest3, 21));
}
}

0 comments on commit 16da44a

Please sign in to comment.