From f16932a1025f3876dbd422701a6a0a3e74c1aed4 Mon Sep 17 00:00:00 2001 From: Khushboo Desai Date: Mon, 13 Jan 2025 11:58:30 -0800 Subject: [PATCH] Adds suggested changes --- .../hive/formats/ion/IonDecoderFactory.java | 143 +++++------------- .../trino/hive/formats/ion/TestIonFormat.java | 4 +- 2 files changed, 39 insertions(+), 108 deletions(-) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java index 76a9bc0feee7..d20247ab6a3f 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java @@ -17,6 +17,8 @@ import com.amazon.ion.IonReader; import com.amazon.ion.IonType; import com.amazon.ion.Timestamp; +import com.amazon.ion.IonWriter; +import com.amazon.ion.system.IonTextWriterBuilder; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.airlift.slice.Slices; @@ -53,6 +55,7 @@ import io.trino.spi.type.VarcharType; import io.trino.spi.type.Varchars; +import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; import java.math.RoundingMode; @@ -128,8 +131,8 @@ private static BlockDecoder decoderForType(Type type) case BooleanType t -> wrapDecoder(boolDecoder, t, IonType.BOOL); case DateType t -> wrapDecoder(dateDecoder, t, IonType.TIMESTAMP); case TimestampType t -> wrapDecoder(timestampDecoder(t), t, IonType.TIMESTAMP); - case VarcharType t -> wrapDecoderWithTextCoercion(varcharDecoder(t), t, IonType.STRING, IonType.SYMBOL); - case CharType t -> wrapDecoderWithTextCoercion(charDecoder(t), t, IonType.STRING, IonType.SYMBOL); + case VarcharType t -> wrapDecoder(varcharDecoder(t), t, IonType.values()); + case CharType t -> wrapDecoder(charDecoder(t), t, IonType.values()); case VarbinaryType t -> wrapDecoder(binaryDecoder, t, IonType.BLOB, IonType.CLOB); case RowType t -> wrapDecoder(RowDecoder.forFields(t.getFields()), t, IonType.STRUCT); case ArrayType t -> wrapDecoder(new ArrayDecoder(decoderForType(t.getElementType())), t, IonType.LIST, IonType.SEXP); @@ -148,38 +151,6 @@ private static BlockDecoder decoderForType(Type type) * This code treats all values as nullable. */ private static BlockDecoder wrapDecoder(BlockDecoder decoder, Type trinoType, IonType... allowedTypes) - { - Set allowedWithNull = new HashSet<>(Arrays.asList(allowedTypes)); - return createConfigurableDecoder(decoder, trinoType, false, allowedTypes); - } - - /** - * Wraps decoders for common handling logic. - *

- * Handles un-typed and correctly typed null values. - * Throws for mistyped values, whether null or not. - * Delegates to Decoder for correctly-typed, non-null values. - * Handles text coercion for Varchar and Char types. - *

- * This code treats all values as nullable. - */ - private static BlockDecoder wrapDecoderWithTextCoercion(BlockDecoder decoder, Type trinoType, IonType... allowedTypes) - { - return createConfigurableDecoder(decoder, trinoType, true, allowedTypes); - } - - /** - * Wraps decoders for common handling logic. - *

- * Handles un-typed and correctly typed null values. - * Throws for mistyped values, whether null or not. - * Delegates to Decoder for correctly-typed, non-null values. - * Handles text coercion for Varchar and Char types. - *

- * This code treats all values as nullable. - */ - private static BlockDecoder createConfigurableDecoder(BlockDecoder decoder, Type trinoType, boolean textCoercion, - IonType... allowedTypes) { final Set allowedWithNull = new HashSet<>(Arrays.asList(allowedTypes)); allowedWithNull.add(IonType.NULL); @@ -187,15 +158,8 @@ private static BlockDecoder createConfigurableDecoder(BlockDecoder decoder, Type return (reader, builder) -> { final IonType ionType = reader.getType(); if (!allowedWithNull.contains(ionType)) { - if (textCoercion) { - String coercedValue = coerceToString(reader, ionType); - VarcharType.VARCHAR.writeSlice(builder, Slices.utf8Slice(coercedValue)); - return; - } - else { - throw new TrinoException(StandardErrorCode.GENERIC_USER_ERROR, - "Cannot coerce IonType %s to Trino type %s".formatted(ionType, trinoType)); - } + throw new TrinoException(StandardErrorCode.GENERIC_USER_ERROR, + "Cannot coerce IonType %s to Trino type %s".formatted(ionType, trinoType)); } if (reader.isNullValue()) { builder.appendNull(); @@ -206,65 +170,6 @@ private static BlockDecoder createConfigurableDecoder(BlockDecoder decoder, Type }; } - /** - * Coerces an Ion value to its string representation. - * - * This method handles all IonTypes and converts them to a string format. - * For complex types (LIST, SEXP, STRUCT), it recursively processes their elements. - * - * @param reader The IonReader containing the value to be coerced. - * @param type The IonType of the value to be coerced. - * @return A string representation of the Ion value. - * @throws IllegalArgumentException if the IonType is not supported for text coercion. - * @throws IonException if there's an error reading from the IonReader. - */ - private static String coerceToString(IonReader reader, IonType type) - { - switch (type) { - case BOOL: - return Boolean.toString(reader.booleanValue()); - case INT: - return Long.toString(reader.longValue()); - case FLOAT: - return Double.toString(reader.doubleValue()); - case DECIMAL: - return reader.decimalValue().toString(); - case TIMESTAMP: - return reader.timestampValue().toString(); - case SYMBOL: - case STRING: - return reader.stringValue(); - case CLOB: - case BLOB: - return new String(reader.newBytes(), StandardCharsets.UTF_8); - case LIST: - case SEXP: - StringBuilder sb = new StringBuilder("["); - reader.stepIn(); - while (reader.next() != null) { - if (sb.length() > 1) { - sb.append(", "); - } - sb.append(coerceToString(reader, reader.getType())); - } - reader.stepOut(); - return sb.append("]").toString(); - case STRUCT: - sb = new StringBuilder("{"); - reader.stepIn(); - while (reader.next() != null) { - if (sb.length() > 1) { - sb.append(", "); - } - sb.append(reader.getFieldName()).append(": ").append(coerceToString(reader, reader.getType())); - } - reader.stepOut(); - return sb.append("}").toString(); - default: - throw new IllegalArgumentException(String.format("Text coercion is not supported for IonType: %s", type)); - } - } - /** * The RowDecoder is used as the BlockDecoder for nested RowTypes and is used for decoding * top-level structs into pages. @@ -459,16 +364,42 @@ private static BlockDecoder decimalDecoder(DecimalType type) }; } + private static String getCoercedValue(IonReader ionReader) + { + IonTextWriterBuilder textWriterBuilder = IonTextWriterBuilder.standard(); + StringBuilder stringBuilder = new StringBuilder(); + IonWriter writer = textWriterBuilder.build(stringBuilder); + try { + writer.writeValue(ionReader); + } + catch (IOException e) { + throw new RuntimeException(e); + } + return stringBuilder.toString(); + } + private static BlockDecoder varcharDecoder(VarcharType type) { - return (ionReader, blockBuilder) -> - type.writeSlice(blockBuilder, Varchars.truncateToLength(Slices.utf8Slice(ionReader.stringValue()), type)); + (ionReader, blockBuilder) -> { + if (valueType == IonType.SYMBOL || valueType == IonType.STRING) { + return type.writeSlice(blockBuilder, Varchars.truncateToLength(Slices.utf8Slice(ionReader.stringValue()), type)); + } + // For any types other than IonType.SYMBOL and IonType.STRING, performs text coercion + String coercedValue = getCoercedValue(ionReader); + return type.writeSlice(blockBuilder, Varchars.truncateToLength(Slices.utf8Slice(coercedValue), type)); + } } private static BlockDecoder charDecoder(CharType type) { - return (ionReader, blockBuilder) -> - type.writeSlice(blockBuilder, Chars.truncateToLengthAndTrimSpaces(Slices.utf8Slice(ionReader.stringValue()), type)); + (ionReader, blockBuilder) -> { + if (valueType == IonType.SYMBOL || valueType == IonType.STRING) { + return type.writeSlice(blockBuilder, Chars.truncateToLengthAndTrimSpaces(Slices.utf8Slice(ionReader.stringValue()), type)); + } + // For any types other than IonType.SYMBOL and IonType.STRING, performs text coercion + String coercedValue = getCoercedValue(ionReader); + return type.writeSlice(blockBuilder, Chars.truncateToLengthAndTrimSpaces(Slices.utf8Slice(coercedValue), type)); + } } private static final BlockDecoder byteDecoder = (ionReader, blockBuilder) -> diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/ion/TestIonFormat.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/ion/TestIonFormat.java index 49feeb8c6888..ead606fe3e1b 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/ion/TestIonFormat.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/ion/TestIonFormat.java @@ -209,7 +209,7 @@ public void testStringCoercions() RowType.rowType( field("foo", VARCHAR)), "{ foo: [1, 2, 3] }", - List.of("[1, 2, 3]")); + List.of("[1,2,3]")); assertValues( RowType.rowType( field("foo", VARCHAR)), @@ -219,7 +219,7 @@ public void testStringCoercions() RowType.rowType( field("foo", VARCHAR)), "{ foo: { nested_foo: 12 } }", - List.of("{nested_foo: 12}")); + List.of("{nested_foo:12}")); } @Test