From 68051f1b9f5a052b9a2b4274ad7197eafb4a8caf Mon Sep 17 00:00:00 2001 From: Michael Sokolov Date: Tue, 31 Dec 2024 12:05:12 -0500 Subject: [PATCH] SlowCompositeCodecReaderWrapper must copy its sub-vector values to maintain thread-safety (#14092) --- .../SlowCompositeCodecReaderWrapper.java | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java index 69d557d270ae..48f4e19033e9 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java @@ -301,7 +301,12 @@ public void checkIntegrity() throws IOException { } } - private record DocValuesSub(T sub, int docStart, int ordStart) {} + private record DocValuesSub(T sub, int docStart, int ordStart) { + @SuppressWarnings("unchecked") + DocValuesSub copy() throws IOException { + return new DocValuesSub((T) (sub.copy()), docStart, ordStart); + } + } private static class MergedDocIterator extends KnnVectorValues.DocIndexIterator { @@ -850,7 +855,7 @@ public FloatVectorValues getFloatVectorValues(String field) throws IOException { class MergedFloatVectorValues extends FloatVectorValues { final int dimension; final int size; - final DocValuesSub[] subs; + final List> subs; final MergedDocIterator iter; final int[] starts; int lastSubIndex; @@ -858,7 +863,7 @@ class MergedFloatVectorValues extends FloatVectorValues { MergedFloatVectorValues(int dimension, int size, List> subs) { this.dimension = dimension; this.size = size; - this.subs = subs.toArray(new DocValuesSub[0]); + this.subs = subs; iter = new MergedDocIterator<>(subs); // [0, start(1), ..., size] - we want the extra element // to avoid checking for out-of-array bounds @@ -888,8 +893,8 @@ public int size() { @Override public FloatVectorValues copy() throws IOException { List> subsCopy = new ArrayList<>(); - for (Object sub : subs) { - subsCopy.add((DocValuesSub) sub); + for (DocValuesSub sub : subs) { + subsCopy.add(sub.copy()); } return new MergedFloatVectorValues(dimension, size, subsCopy); } @@ -900,9 +905,9 @@ public float[] vectorValue(int ord) throws IOException { // We need to implement fully random-access API here in order to support callers like // SortingCodecReader that rely on it. lastSubIndex = findSub(ord, lastSubIndex, starts); - assert subs[lastSubIndex].sub != null; - return ((FloatVectorValues) subs[lastSubIndex].sub) - .vectorValue(ord - subs[lastSubIndex].ordStart); + DocValuesSub sub = subs.get(lastSubIndex); + assert sub.sub != null; + return (sub.sub).vectorValue(ord - sub.ordStart); } } @@ -929,7 +934,7 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { class MergedByteVectorValues extends ByteVectorValues { final int dimension; final int size; - final DocValuesSub[] subs; + final List> subs; final MergedDocIterator iter; final int[] starts; int lastSubIndex; @@ -937,7 +942,7 @@ class MergedByteVectorValues extends ByteVectorValues { MergedByteVectorValues(int dimension, int size, List> subs) { this.dimension = dimension; this.size = size; - this.subs = subs.toArray(new DocValuesSub[0]); + this.subs = subs; iter = new MergedDocIterator<>(subs); // [0, start(1), ..., size] - we want the extra element // to avoid checking for out-of-array bounds @@ -970,16 +975,16 @@ public byte[] vectorValue(int ord) throws IOException { // SortingCodecReader that rely on it. We maintain lastSubIndex since we expect some // repetition. lastSubIndex = findSub(ord, lastSubIndex, starts); - return ((ByteVectorValues) subs[lastSubIndex].sub) - .vectorValue(ord - subs[lastSubIndex].ordStart); + DocValuesSub sub = subs.get(lastSubIndex); + return sub.sub.vectorValue(ord - sub.ordStart); } @SuppressWarnings("unchecked") @Override public ByteVectorValues copy() throws IOException { List> newSubs = new ArrayList<>(); - for (Object sub : subs) { - newSubs.add((DocValuesSub) sub); + for (DocValuesSub sub : subs) { + newSubs.add(sub.copy()); } return new MergedByteVectorValues(dimension, size, newSubs); }