Skip to content
This repository has been archived by the owner on Sep 2, 2024. It is now read-only.

Commit

Permalink
PARQUET-1744: Some filters throws ArrayIndexOutOfBoundsException (apa…
Browse files Browse the repository at this point in the history
  • Loading branch information
gszadovszky authored and rshkv committed Jan 30, 2021
1 parent 79edf7e commit f3402f2
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ OfInt eq(ColumnIndexBase<?>.ValueComparator comparator) {
@Override
OfInt gt(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
if (length == 0) {
// No matching rows if the column index contains null pages only
return IndexIterator.EMPTY;
}
int left = 0;
int right = length;
do {
Expand All @@ -100,6 +104,10 @@ OfInt gt(ColumnIndexBase<?>.ValueComparator comparator) {
@Override
OfInt gtEq(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
if (length == 0) {
// No matching rows if the column index contains null pages only
return IndexIterator.EMPTY;
}
int left = 0;
int right = length;
do {
Expand All @@ -116,6 +124,10 @@ OfInt gtEq(ColumnIndexBase<?>.ValueComparator comparator) {
@Override
OfInt lt(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
if (length == 0) {
// No matching rows if the column index contains null pages only
return IndexIterator.EMPTY;
}
int left = -1;
int right = length - 1;
do {
Expand All @@ -132,6 +144,10 @@ OfInt lt(ColumnIndexBase<?>.ValueComparator comparator) {
@Override
OfInt ltEq(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
if (length == 0) {
// No matching rows if the column index contains null pages only
return IndexIterator.EMPTY;
}
int left = -1;
int right = length - 1;
do {
Expand Down Expand Up @@ -207,6 +223,10 @@ OfInt eq(ColumnIndexBase<?>.ValueComparator comparator) {
@Override
OfInt gt(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
if (length == 0) {
// No matching rows if the column index contains null pages only
return IndexIterator.EMPTY;
}
int left = -1;
int right = length - 1;
do {
Expand All @@ -223,6 +243,10 @@ OfInt gt(ColumnIndexBase<?>.ValueComparator comparator) {
@Override
OfInt gtEq(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
if (length == 0) {
// No matching rows if the column index contains null pages only
return IndexIterator.EMPTY;
}
int left = -1;
int right = length - 1;
do {
Expand All @@ -239,6 +263,10 @@ OfInt gtEq(ColumnIndexBase<?>.ValueComparator comparator) {
@Override
OfInt lt(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
if (length == 0) {
// No matching rows if the column index contains null pages only
return IndexIterator.EMPTY;
}
int left = 0;
int right = length;
do {
Expand All @@ -255,6 +283,10 @@ OfInt lt(ColumnIndexBase<?>.ValueComparator comparator) {
@Override
OfInt ltEq(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
if (length == 0) {
// No matching rows if the column index contains null pages only
return IndexIterator.EMPTY;
}
int left = 0;
int right = length;
do {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ public String toString() {
private static final int RAND_COUNT = 2000;
private static final ColumnIndexBase<?> RAND_ASCENDING;
private static final ColumnIndexBase<?> RAND_DESCENDING;
private static final ColumnIndexBase<?> ALL_NULL_PAGES;
private static final SpyValueComparatorBuilder SPY_COMPARATOR_BUILDER = new SpyValueComparatorBuilder();
static {
ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(TYPE, Integer.MAX_VALUE);
Expand Down Expand Up @@ -233,6 +234,13 @@ public String toString() {
builder.add(stats(it.next(), it.next()));
}
RAND_DESCENDING = (ColumnIndexBase<?>) builder.build();

builder = ColumnIndexBuilder.getBuilder(TYPE, Integer.MAX_VALUE);
// Add a couple of empty stats so column index will contain null pages only
for (int i = 0; i < 10; ++i) {
builder.add(Statistics.createStats(TYPE));
}
ALL_NULL_PAGES = (ColumnIndexBase<?>) builder.build();
}

private static Statistics<?> stats(int min, int max) {
Expand Down Expand Up @@ -267,6 +275,14 @@ public void testEq() {
BoundaryOrder.UNORDERED::eq,
BoundaryOrder.DESCENDING::eq,
DESCENDING.createValueComparator(i));
validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ",
BoundaryOrder.UNORDERED::eq,
BoundaryOrder.ASCENDING::eq,
ALL_NULL_PAGES.createValueComparator(i));
validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ",
BoundaryOrder.UNORDERED::eq,
BoundaryOrder.DESCENDING::eq,
ALL_NULL_PAGES.createValueComparator(i));
}
for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
Expand Down Expand Up @@ -305,6 +321,14 @@ public void testGt() {
BoundaryOrder.UNORDERED::gt,
BoundaryOrder.DESCENDING::gt,
DESCENDING.createValueComparator(i));
validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ",
BoundaryOrder.UNORDERED::gt,
BoundaryOrder.ASCENDING::gt,
ALL_NULL_PAGES.createValueComparator(i));
validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ",
BoundaryOrder.UNORDERED::gt,
BoundaryOrder.DESCENDING::gt,
ALL_NULL_PAGES.createValueComparator(i));
}
for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
Expand Down Expand Up @@ -343,6 +367,14 @@ public void testGtEq() {
BoundaryOrder.UNORDERED::gtEq,
BoundaryOrder.DESCENDING::gtEq,
DESCENDING.createValueComparator(i));
validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ",
BoundaryOrder.UNORDERED::gtEq,
BoundaryOrder.ASCENDING::gtEq,
ALL_NULL_PAGES.createValueComparator(i));
validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ",
BoundaryOrder.UNORDERED::gtEq,
BoundaryOrder.DESCENDING::gtEq,
ALL_NULL_PAGES.createValueComparator(i));
}
for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
Expand Down Expand Up @@ -381,6 +413,14 @@ public void testLt() {
BoundaryOrder.UNORDERED::lt,
BoundaryOrder.DESCENDING::lt,
DESCENDING.createValueComparator(i));
validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ",
BoundaryOrder.UNORDERED::lt,
BoundaryOrder.ASCENDING::lt,
ALL_NULL_PAGES.createValueComparator(i));
validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ",
BoundaryOrder.UNORDERED::lt,
BoundaryOrder.DESCENDING::lt,
ALL_NULL_PAGES.createValueComparator(i));
}
for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
Expand Down Expand Up @@ -419,6 +459,14 @@ public void testLtEq() {
BoundaryOrder.UNORDERED::ltEq,
BoundaryOrder.DESCENDING::ltEq,
DESCENDING.createValueComparator(i));
validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ",
BoundaryOrder.UNORDERED::ltEq,
BoundaryOrder.ASCENDING::ltEq,
ALL_NULL_PAGES.createValueComparator(i));
validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ",
BoundaryOrder.UNORDERED::ltEq,
BoundaryOrder.DESCENDING::ltEq,
ALL_NULL_PAGES.createValueComparator(i));
}
for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
Expand Down Expand Up @@ -457,6 +505,14 @@ public void testNotEq() {
BoundaryOrder.UNORDERED::notEq,
BoundaryOrder.DESCENDING::notEq,
DESCENDING.createValueComparator(i));
validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ",
BoundaryOrder.UNORDERED::notEq,
BoundaryOrder.ASCENDING::notEq,
ALL_NULL_PAGES.createValueComparator(i));
validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ",
BoundaryOrder.UNORDERED::notEq,
BoundaryOrder.DESCENDING::notEq,
ALL_NULL_PAGES.createValueComparator(i));
}
for (int i = FROM - 1; i <= TO + 1; ++i) {
ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import static org.apache.parquet.filter2.predicate.FilterApi.gt;
import static org.apache.parquet.filter2.predicate.FilterApi.gtEq;
import static org.apache.parquet.filter2.predicate.FilterApi.intColumn;
import static org.apache.parquet.filter2.predicate.FilterApi.longColumn;
import static org.apache.parquet.filter2.predicate.FilterApi.lt;
import static org.apache.parquet.filter2.predicate.FilterApi.ltEq;
import static org.apache.parquet.filter2.predicate.FilterApi.notEq;
Expand All @@ -38,10 +39,11 @@
import static org.apache.parquet.internal.column.columnindex.BoundaryOrder.UNORDERED;
import static org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges;
import static org.apache.parquet.io.api.Binary.fromString;
import static org.apache.parquet.schema.OriginalType.UTF8;
import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
import static org.apache.parquet.schema.Types.optional;
import static org.junit.Assert.assertArrayEquals;

Expand Down Expand Up @@ -161,55 +163,56 @@ public boolean inverseCanDrop(Statistics<Integer> statistics) {

/**
* <pre>
* row column1 column2 column3 column4 (no column index)
* ------0------ ------0------ ------0------ ------0------
* 0. 1 Zulu 2.03
* ------1------ ------1------ ------1------ ------1------
* 1. 2 Yankee 4.67
* 2. 3 Xray 3.42
* 3. 4 Whiskey 8.71
* row column1 column2 column3 column4 column5
* (no column index)
* ------0------ ------0------ ------0------ ------0------ ------0------
* 0. 1 Zulu 2.03 null
* ------1------ ------1------ ------1------ ------1------ ------1------
* 1. 2 Yankee 4.67 null
* 2. 3 Xray 3.42 null
* 3. 4 Whiskey 8.71 null
* ------2------ ------2------
* 4. 5 Victor 0.56
* 5. 6 Uniform 4.30
* 4. 5 Victor 0.56 null
* 5. 6 Uniform 4.30 null
* ------2------ ------3------
* 6. null null null
* 6. null null null null
* ------2------ ------4------
* 7. 7 Tango 3.50
* 7. 7 Tango 3.50 null
* ------3------
* 8. 7 null 3.14
* 8. 7 null 3.14 null
* ------3------
* 9. 7 null null
* 9. 7 null null null
* ------3------
* 10. null null 9.99
* 10. null null 9.99 null
* ------4------
* 11. 8 Sierra 8.78
* 11. 8 Sierra 8.78 null
* ------5------
* 12. 9 Romeo 9.56
* 13. 10 Quebec 2.71
* 12. 9 Romeo 9.56 null
* 13. 10 Quebec 2.71 null
* ------4------
* 14. 11 Papa 5.71
* 15. 12 Oscar 4.09
* 14. 11 Papa 5.71 null
* 15. 12 Oscar 4.09 null
* ------5------ ------4------ ------6------
* 16. 13 November null
* 17. 14 Mike null
* 18. 15 Lima 0.36
* 19. 16 Kilo 2.94
* 20. 17 Juliett 4.23
* 16. 13 November null null
* 17. 14 Mike null null
* 18. 15 Lima 0.36 null
* 19. 16 Kilo 2.94 null
* 20. 17 Juliett 4.23 null
* ------5------ ------6------ ------7------
* 21. 18 India null
* 22. 19 Hotel 5.32
* 21. 18 India null null
* 22. 19 Hotel 5.32 null
* ------5------
* 23. 20 Golf 4.17
* 24. 21 Foxtrot 7.92
* 25. 22 Echo 7.95
* 23. 20 Golf 4.17 null
* 24. 21 Foxtrot 7.92 null
* 25. 22 Echo 7.95 null
* ------6------
* 26. 23 Delta null
* 26. 23 Delta null null
* ------6------
* 27. 24 Charlie null
* 27. 24 Charlie null null
* ------8------
* 28. 25 Bravo null
* 28. 25 Bravo null null
* ------7------
* 29. 26 Alfa null
* 29. 26 Alfa null null
* </pre>
*/
private static final long TOTAL_ROW_COUNT = 30;
Expand All @@ -231,7 +234,7 @@ public boolean inverseCanDrop(Statistics<Integer> statistics) {
.addPage(6)
.addPage(3)
.build();
private static final ColumnIndex COLUMN2_CI = new CIBuilder(optional(BINARY).as(UTF8).named("column2"), DESCENDING)
private static final ColumnIndex COLUMN2_CI = new CIBuilder(optional(BINARY).as(stringType()).named("column2"), DESCENDING)
.addPage(0, "Zulu", "Zulu")
.addPage(0, "Whiskey", "Yankee")
.addPage(1, "Tango", "Victor")
Expand Down Expand Up @@ -281,6 +284,14 @@ public boolean inverseCanDrop(Statistics<Integer> statistics) {
.addPage(7)
.addPage(2)
.build();
private static final ColumnIndex COLUMN5_CI = new CIBuilder(optional(INT64).named("column5"), ASCENDING)
.addNullPage(1)
.addNullPage(29)
.build();
private static final OffsetIndex COLUMN5_OI = new OIBuilder()
.addPage(1)
.addPage(29)
.build();
private static final ColumnIndexStore STORE = new ColumnIndexStore() {
@Override
public ColumnIndex getColumnIndex(ColumnPath column) {
Expand All @@ -293,6 +304,8 @@ public ColumnIndex getColumnIndex(ColumnPath column) {
return COLUMN3_CI;
case "column4":
return COLUMN4_CI;
case "column5":
return COLUMN5_CI;
default:
return null;
}
Expand All @@ -309,6 +322,8 @@ public OffsetIndex getOffsetIndex(ColumnPath column) {
return COLUMN3_OI;
case "column4":
return COLUMN4_OI;
case "column5":
return COLUMN5_OI;
default:
throw new MissingOffsetIndexException(column);
}
Expand Down Expand Up @@ -461,4 +476,25 @@ public void testFilteringWithMissingOffsetIndex() {
TOTAL_ROW_COUNT);
}

@Test
public void testFilteringWithAllNullPages() {
Set<ColumnPath> paths = paths("column1", "column5");

assertAllRows(calculateRowRanges(FilterCompat.get(
notEq(longColumn("column5"), 1234567L)),
STORE, paths, TOTAL_ROW_COUNT),
TOTAL_ROW_COUNT);
assertAllRows(calculateRowRanges(FilterCompat.get(
or(gtEq(intColumn("column1"), 10),
notEq(longColumn("column5"), 1234567L))),
STORE, paths, TOTAL_ROW_COUNT),
TOTAL_ROW_COUNT);
assertRows(calculateRowRanges(FilterCompat.get(
eq(longColumn("column5"), 1234567L)),
STORE, paths, TOTAL_ROW_COUNT));
assertRows(calculateRowRanges(FilterCompat.get(
and(lt(intColumn("column1"), 20),
gtEq(longColumn("column5"), 1234567L))),
STORE, paths, TOTAL_ROW_COUNT));
}
}

0 comments on commit f3402f2

Please sign in to comment.