diff --git a/core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java b/core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java index 61b90d9fc6e3..295f6fc46d01 100644 --- a/core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java +++ b/core/src/main/java/org/apache/iceberg/actions/SizeBasedDataRewriter.java @@ -47,12 +47,14 @@ public abstract class SizeBasedDataRewriter extends SizeBasedFileRewriter validOptions() { public void init(Map options) { super.init(options); this.deleteFileThreshold = deleteFileThreshold(options); + this.deleteRatioThreshold = PropertyUtil.propertyAsDouble( + options, DELETE_FILE_THRESHOLD, DEFAULT_DELETE_THRESHOLD); } @Override @@ -116,7 +120,7 @@ private boolean tooHighDeleteRatio(FileScanTask task) { double deletedRecords = (double) Math.min(knownDeletedRecordCount, task.file().recordCount()); double deleteRatio = deletedRecords / task.file().recordCount(); - return deleteRatio >= DELETE_RATIO_THRESHOLD; + return deleteRatio >= this.deleteRatioThreshold; } @Override diff --git a/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedRewriter.java b/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedRewriter.java index 77d16d3bc821..9a2ee69dee45 100644 --- a/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedRewriter.java +++ b/core/src/test/java/org/apache/iceberg/actions/TestSizeBasedRewriter.java @@ -76,6 +76,34 @@ public void testSplitSizeLowerBound() { assertThat(splitSize).isLessThan(maxFileSize); } + @TestTemplate + public void testDeleteFileThresholdOption() { + SizeBasedDataFileRewriterImpl rewriter = new SizeBasedDataFileRewriterImpl(table); + + Map options = ImmutableMap.of( + SizeBasedDataRewriter.DEFAULT_DELETE_THRESHOLD, "5" + ); + rewriter.init(options); + + assertThat(rewriter.getDeleteFileThreshold()).isEqualTo(5); + } + + @TestTemplate + public void testHighDeleteRatioTriggersRewrite() { + SizeBasedDataFileRewriterImpl rewriter = new SizeBasedDataFileRewriterImpl(table); + + FileScanTask task = new MockFileScanTask(100L * 1024 * 1024, 80); // 80% delete ratio + + assertThat(rewriter.tooHighDeleteRatio(task)).isTrue(); + } + + @Test + private void validateThreshold(double threshold) { + if (threshold <= 0.0 || threshold > 1.0) { + throw new IllegalArgumentException("Threshold must be greater than 0.0 and less than or equal to 1.0"); + } + } + private static class SizeBasedDataFileRewriterImpl extends SizeBasedDataRewriter { SizeBasedDataFileRewriterImpl(Table table) {