Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix failure when equality delete updated nested fields in Iceberg #24512

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@
import java.io.UncheckedIOException;
import java.util.List;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.OptionalLong;
import java.util.concurrent.CompletableFuture;
import java.util.function.Function;
import java.util.function.Supplier;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Throwables.throwIfInstanceOf;
import static io.trino.plugin.base.util.Closables.closeAllSuppress;
import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA;
Expand All @@ -41,6 +41,7 @@ public class IcebergPageSource
implements ConnectorPageSource
{
private final int[] expectedColumnIndexes;
private final int[] deleteFilterIndexes;
private final ConnectorPageSource delegate;
private final Optional<ReaderProjectionsAdapter> projectionsAdapter;
private final Supplier<Optional<RowPredicate>> deletePredicate;
Expand All @@ -53,6 +54,8 @@ public class IcebergPageSource
public IcebergPageSource(
List<IcebergColumnHandle> expectedColumns,
List<IcebergColumnHandle> requiredColumns,
List<IcebergColumnHandle> deleteFilterColumns,
OptionalInt rowPositionColumnIndex,
ConnectorPageSource delegate,
Optional<ReaderProjectionsAdapter> projectionsAdapter,
Supplier<Optional<RowPredicate>> deletePredicate,
Expand All @@ -62,15 +65,26 @@ public IcebergPageSource(
// requiredColumns should include all expectedColumns as well as any columns needed by the DeleteFilter
requireNonNull(expectedColumns, "expectedColumns is null");
requireNonNull(requiredColumns, "requiredColumns is null");
requireNonNull(deleteFilterColumns, "deleteFilterColumns is null");
requireNonNull(rowPositionColumnIndex, "rowPositionColumnIndex is null");
this.expectedColumnIndexes = new int[expectedColumns.size()];
for (int i = 0; i < expectedColumns.size(); i++) {
IcebergColumnHandle expectedColumn = expectedColumns.get(i);
checkArgument(expectedColumn.equals(requiredColumns.get(i)), "Expected columns must be a prefix of required columns");
expectedColumnIndexes[i] = i;

if (expectedColumn.isMergeRowIdColumn()) {
this.deleteFilterIndexes = new int[deleteFilterColumns.size()];
for (int i = 0; i < requiredColumns.size(); i++) {
IcebergColumnHandle column = requiredColumns.get(i);
if (i < expectedColumns.size()) {
expectedColumnIndexes[i] = i;
}
if (column.isMergeRowIdColumn()) {
this.rowIdColumnIndex = i;
}
if (deleteFilterColumns.contains(column)) {
if (column.isRowPositionColumn()) {
this.deleteFilterIndexes[deleteFilterColumns.indexOf(column)] = rowPositionColumnIndex.orElseThrow();
}
else {
this.deleteFilterIndexes[deleteFilterColumns.indexOf(column)] = i;
}
}
}

this.delegate = requireNonNull(delegate, "delegate is null");
Expand Down Expand Up @@ -120,7 +134,7 @@ public Page getNextPage()

Optional<RowPredicate> deleteFilterPredicate = deletePredicate.get();
if (deleteFilterPredicate.isPresent()) {
dataPage = deleteFilterPredicate.get().filterPage(dataPage);
dataPage = deleteFilterPredicate.get().filterPage(dataPage, deleteFilterIndexes);
ebyhr marked this conversation as resolved.
Show resolved Hide resolved
}

if (projectionsAdapter.isPresent()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.OptionalLong;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
Expand Down Expand Up @@ -184,7 +185,6 @@
import static java.util.function.Predicate.not;
import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.mapping;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toUnmodifiableList;
import static org.apache.iceberg.FileContent.EQUALITY_DELETES;
import static org.apache.iceberg.FileContent.POSITION_DELETES;
Expand Down Expand Up @@ -349,23 +349,21 @@ public ConnectorPageSource createPageSource(
column -> ((IcebergColumnHandle) column).getType(),
IcebergPageSourceProvider::applyProjection));

List<IcebergColumnHandle> readColumns = dataPageSource.getReaderColumns()
.map(readerColumns -> readerColumns.get().stream().map(IcebergColumnHandle.class::cast).collect(toList()))
.orElse(requiredColumns);

Supplier<Optional<RowPredicate>> deletePredicate = memoize(() -> getDeleteManager(partitionSpec, partitionData)
.getDeletePredicate(
path,
dataSequenceNumber,
deletes,
readColumns,
deleteFilterRequiredColumns.stream().collect(toImmutableList()),
ebyhr marked this conversation as resolved.
Show resolved Hide resolved
tableSchema,
readerPageSourceWithRowPositions,
(deleteFile, deleteColumns, tupleDomain) -> openDeletes(session, fileSystem, deleteFile, deleteColumns, tupleDomain)));

return new IcebergPageSource(
icebergColumns,
requiredColumns,
deleteFilterRequiredColumns.stream().collect(toImmutableList()),
readerPageSourceWithRowPositions.rowPositionColumnIndex,
dataPageSource.get(),
projectionsAdapter,
deletePredicate,
Expand Down Expand Up @@ -618,6 +616,7 @@ private static ReaderPageSourceWithRowPositions createOrcPageSource(
List<ProjectedLayout> projectedLayouts = new ArrayList<>(readBaseColumns.size());
List<ColumnAdaptation> columnAdaptations = new ArrayList<>(readBaseColumns.size());

OptionalInt rowPositionColumnIndex = OptionalInt.empty();
for (IcebergColumnHandle column : readBaseColumns) {
verify(column.isBaseColumn(), "Column projections must be based from a root column");
OrcColumn orcColumn = fileColumnsByIcebergId.get(column.getId());
Expand All @@ -643,6 +642,7 @@ else if (column.isMergeRowIdColumn()) {
}
else if (column.isRowPositionColumn()) {
columnAdaptations.add(ColumnAdaptation.positionColumn());
rowPositionColumnIndex = OptionalInt.of(readBaseColumns.indexOf(column));
}
else if (orcColumn != null) {
Type readType = getOrcReadType(column.getType(), typeManager);
Expand Down Expand Up @@ -700,6 +700,7 @@ else if (orcColumn != null) {
stats,
reader.getCompressionKind()),
baseColumnProjections),
rowPositionColumnIndex,
recordReader.getStartRowPosition(),
recordReader.getEndRowPosition());
}
Expand Down Expand Up @@ -911,6 +912,7 @@ private static ReaderPageSourceWithRowPositions createParquetPageSource(
ParquetPageSource.Builder pageSourceBuilder = ParquetPageSource.builder();
int parquetSourceChannel = 0;

OptionalInt rowPositionColumnIndex = OptionalInt.empty();
ImmutableList.Builder<Column> parquetColumnFieldsBuilder = ImmutableList.builder();
for (int columnIndex = 0; columnIndex < readBaseColumns.size(); columnIndex++) {
IcebergColumnHandle column = readBaseColumns.get(columnIndex);
Expand All @@ -935,6 +937,7 @@ else if (column.isMergeRowIdColumn()) {
}
else if (column.isRowPositionColumn()) {
pageSourceBuilder.addRowIndexColumn();
rowPositionColumnIndex = OptionalInt.of(columnIndex);
}
else {
org.apache.parquet.schema.Type parquetField = parquetFields.get(columnIndex);
Expand Down Expand Up @@ -972,6 +975,7 @@ else if (column.isRowPositionColumn()) {
new ReaderPageSource(
pageSourceBuilder.build(parquetReader),
baseColumnProjections),
rowPositionColumnIndex,
startRowPosition,
endRowPosition);
}
Expand Down Expand Up @@ -1076,6 +1080,7 @@ private static ReaderPageSourceWithRowPositions createAvroPageSource(

Map<Integer, org.apache.avro.Schema.Field> fileColumnsByIcebergId = mapIdsToAvroFields(fileFields);

OptionalInt rowPositionColumnIndex = OptionalInt.empty();
ImmutableList.Builder<String> columnNames = ImmutableList.builder();
ImmutableList.Builder<Type> columnTypes = ImmutableList.builder();
ImmutableList.Builder<Boolean> rowIndexChannels = ImmutableList.builder();
Expand All @@ -1097,6 +1102,9 @@ else if (column.isMergeRowIdColumn() || column.isRowPositionColumn()) {
columnNames.add(ROW_POSITION.name());
columnTypes.add(BIGINT);
constantPopulatingPageSourceBuilder.addDelegateColumn(avroSourceChannel);
if (column.isRowPositionColumn()) {
rowPositionColumnIndex = OptionalInt.of(readBaseColumns.indexOf(column));
}
avroSourceChannel++;
}
else if (field == null) {
Expand Down Expand Up @@ -1124,6 +1132,7 @@ else if (field == null) {
rowIndexChannels.build(),
newSimpleAggregatedMemoryContext())),
baseColumnProjections),
rowPositionColumnIndex,
Optional.empty(),
Optional.empty());
}
Expand Down Expand Up @@ -1394,11 +1403,12 @@ private static TrinoException handleException(ParquetDataSourceId dataSourceId,
return new TrinoException(ICEBERG_CURSOR_ERROR, format("Failed to read Parquet file: %s", dataSourceId), exception);
}

public record ReaderPageSourceWithRowPositions(ReaderPageSource readerPageSource, Optional<Long> startRowPosition, Optional<Long> endRowPosition)
public record ReaderPageSourceWithRowPositions(ReaderPageSource readerPageSource, OptionalInt rowPositionColumnIndex, Optional<Long> startRowPosition, Optional<Long> endRowPosition)
{
public ReaderPageSourceWithRowPositions
{
requireNonNull(readerPageSource, "readerPageSource is null");
requireNonNull(rowPositionColumnIndex, "rowPositionColumnIndex is null");
requireNonNull(startRowPosition, "startRowPosition is null");
requireNonNull(endRowPosition, "endRowPosition is null");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -432,10 +432,17 @@ private static IcebergColumnHandle createColumnHandle(NestedField baseColumn, Ne

public static Schema schemaFromHandles(List<IcebergColumnHandle> columns)
{
List<NestedField> icebergColumns = columns.stream()
.map(column -> NestedField.optional(column.getId(), column.getName(), toIcebergType(column.getType(), column.getColumnIdentity())))
.collect(toImmutableList());
return new Schema(StructType.of(icebergColumns).asStructType().fields());
Schema schema = new Schema();
List<NestedField> icebergColumns = new ArrayList<>();
for (IcebergColumnHandle column : columns) {
NestedField field = NestedField.optional(column.getId(), column.getName(), toIcebergType(column.getType(), column.getColumnIdentity()));
// Schema disallows duplicate fields
if (schema.findField(field.fieldId()) == null) {
icebergColumns.add(field);
schema = new Schema(StructType.of(icebergColumns).asStructType().fields());
}
}
return schema;
}

public static Map<PartitionField, Integer> getIdentityPartitions(PartitionSpec partitionSpec)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,14 @@ default RowPredicate and(RowPredicate other)
return (page, position) -> test(page, position) && other.test(page, position);
}

default Page filterPage(Page page)
default Page filterPage(Page page, int[] columns)
{
int positionCount = page.getPositionCount();
int[] retained = new int[positionCount];
int retainedCount = 0;
Page selectedPage = columns.length == 0 ? page : page.getColumns(columns);
for (int position = 0; position < positionCount; position++) {
if (test(page, position)) {
if (test(selectedPage, position)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we extract a subset of columns as input to test, then does it break assumptions about block index in the implementations ? E.g. in io.trino.plugin.iceberg.delete.PositionDeleteFilter#createPredicate we have page.getBlock(filePosChannel) and that can select different column if we pass in a subset of blocks of the page.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if I fully understand your concern. List<IcebergColumnHandle> columns argument in PositionDeleteFilter#createPredicate is also adjusted in this PR. https://github.com/trinodb/trino/pull/24512/files#diff-705548e3717786bbc076aba181f1eca081ec69200050c18100bfa959e05095dfR357

retained[retainedCount] = position;
retainedCount++;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -597,8 +597,8 @@ public void testMultipleEqualityDeletesWithNestedFields()
equalityFieldIds);
}

// TODO: support read equality deletes with nested fields(https://github.com/trinodb/trino/issues/18625)
assertThat(query("SELECT * FROM " + tableName)).failure().hasMessageContaining("Multiple entries with same key");
assertThat(query("SELECT * FROM " + tableName))
.matches("VALUES (BIGINT '1', CAST(row(10, 100) AS ROW(nested BIGINT, nested_other BIGINT)))");
assertUpdate("DROP TABLE " + tableName);
}

Expand Down
Loading