Skip to content

Commit

Permalink
Adds support to string coercions
Browse files Browse the repository at this point in the history
  • Loading branch information
desaikd committed Dec 24, 2024
1 parent f18f95d commit a050424
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.RoundingMode;
import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
import java.util.Arrays;
import java.util.HashSet;
Expand Down Expand Up @@ -127,8 +128,8 @@ private static BlockDecoder decoderForType(Type type)
case BooleanType t -> wrapDecoder(boolDecoder, t, IonType.BOOL);
case DateType t -> wrapDecoder(dateDecoder, t, IonType.TIMESTAMP);
case TimestampType t -> wrapDecoder(timestampDecoder(t), t, IonType.TIMESTAMP);
case VarcharType t -> wrapDecoder(varcharDecoder(t), t, IonType.STRING, IonType.SYMBOL);
case CharType t -> wrapDecoder(charDecoder(t), t, IonType.STRING, IonType.SYMBOL);
case VarcharType t -> wrapDecoderWithTextCoercion(varcharDecoder(t), t, IonType.STRING, IonType.SYMBOL);
case CharType t -> wrapDecoderWithTextCoercion(charDecoder(t), t, IonType.STRING, IonType.SYMBOL);
case VarbinaryType t -> wrapDecoder(binaryDecoder, t, IonType.BLOB, IonType.CLOB);
case RowType t -> wrapDecoder(RowDecoder.forFields(t.getFields()), t, IonType.STRUCT);
case ArrayType t -> wrapDecoder(new ArrayDecoder(decoderForType(t.getElementType())), t, IonType.LIST, IonType.SEXP);
Expand All @@ -149,13 +150,52 @@ private static BlockDecoder decoderForType(Type type)
private static BlockDecoder wrapDecoder(BlockDecoder decoder, Type trinoType, IonType... allowedTypes)
{
Set<IonType> allowedWithNull = new HashSet<>(Arrays.asList(allowedTypes));

Check failure on line 152 in lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java

View workflow job for this annotation

GitHub Actions / error-prone-checks

The local variable 'allowedWithNull' is never read.

Check failure on line 152 in lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java

View workflow job for this annotation

GitHub Actions / error-prone-checks

A collection or proto builder was created, but its values were never accessed.

Check failure on line 152 in lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java

View workflow job for this annotation

GitHub Actions / error-prone-checks

The local variable 'allowedWithNull' is never read.

Check failure on line 152 in lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ion/IonDecoderFactory.java

View workflow job for this annotation

GitHub Actions / error-prone-checks

A collection or proto builder was created, but its values were never accessed.
return createConfigurableDecoder(decoder, trinoType, false, allowedTypes);
}

/**
* Wraps decoders for common handling logic.
* <p>
* Handles un-typed and correctly typed null values.
* Throws for mistyped values, whether null or not.
* Delegates to Decoder for correctly-typed, non-null values.
* Handles text coercion for Varchar and Char types.
* <p>
* This code treats all values as nullable.
*/
private static BlockDecoder wrapDecoderWithTextCoercion(BlockDecoder decoder, Type trinoType, IonType... allowedTypes)
{
return createConfigurableDecoder(decoder, trinoType, true, allowedTypes);
}

/**
* Wraps decoders for common handling logic.
* <p>
* Handles un-typed and correctly typed null values.
* Throws for mistyped values, whether null or not.
* Delegates to Decoder for correctly-typed, non-null values.
* Handles text coercion for Varchar and Char types.
* <p>
* This code treats all values as nullable.
*/
private static BlockDecoder createConfigurableDecoder(BlockDecoder decoder, Type trinoType, boolean textCoercion,
IonType... allowedTypes)
{
final Set<IonType> allowedWithNull = new HashSet<>(Arrays.asList(allowedTypes));
allowedWithNull.add(IonType.NULL);

return (reader, builder) -> {
final IonType ionType = reader.getType();
if (!allowedWithNull.contains(ionType)) {
throw new TrinoException(StandardErrorCode.GENERIC_USER_ERROR,
"Cannot coerce IonType %s to Trino type %s".formatted(ionType, trinoType));
if (textCoercion) {
String coercedValue = coerceToString(reader, ionType);
VarcharType.VARCHAR.writeSlice(builder, Slices.utf8Slice(coercedValue));
return;
}
else {
throw new TrinoException(StandardErrorCode.GENERIC_USER_ERROR,
"Cannot coerce IonType %s to Trino type %s".formatted(ionType, trinoType));
}
}
if (reader.isNullValue()) {
builder.appendNull();
Expand All @@ -166,6 +206,65 @@ private static BlockDecoder wrapDecoder(BlockDecoder decoder, Type trinoType, Io
};
}

/**
* Coerces an Ion value to its string representation.
*
* This method handles all IonTypes and converts them to a string format.
* For complex types (LIST, SEXP, STRUCT), it recursively processes their elements.
*
* @param reader The IonReader containing the value to be coerced.
* @param type The IonType of the value to be coerced.
* @return A string representation of the Ion value.
* @throws IllegalArgumentException if the IonType is not supported for text coercion.
* @throws IonException if there's an error reading from the IonReader.
*/
private static String coerceToString(IonReader reader, IonType type)
{
switch (type) {
case BOOL:
return Boolean.toString(reader.booleanValue());
case INT:
return Long.toString(reader.longValue());
case FLOAT:
return Double.toString(reader.doubleValue());
case DECIMAL:
return reader.decimalValue().toString();
case TIMESTAMP:
return reader.timestampValue().toString();
case SYMBOL:
case STRING:
return reader.stringValue();
case CLOB:
case BLOB:
return new String(reader.newBytes(), StandardCharsets.UTF_8);
case LIST:
case SEXP:
StringBuilder sb = new StringBuilder("[");
reader.stepIn();
while (reader.next() != null) {
if (sb.length() > 1) {
sb.append(", ");
}
sb.append(coerceToString(reader, reader.getType()));
}
reader.stepOut();
return sb.append("]").toString();
case STRUCT:
sb = new StringBuilder("{");
reader.stepIn();
while (reader.next() != null) {
if (sb.length() > 1) {
sb.append(", ");
}
sb.append(reader.getFieldName()).append(": ").append(coerceToString(reader, reader.getType()));
}
reader.stepOut();
return sb.append("}").toString();
default:
throw new IllegalArgumentException(String.format("Text coercion is not supported for IonType: %s", type));
}
}

/**
* The RowDecoder is used as the BlockDecoder for nested RowTypes and is used for decoding
* top-level structs into pages.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,42 @@ public void testCaseInsensitivityOfKeys()
List.of(31, "baz"));
}

@Test
public void testStringCoercions()
throws IOException
{
assertValues(
RowType.rowType(
field("foo", VARCHAR)),
"{ foo: true }",
List.of("true"));
assertValues(
RowType.rowType(
field("foo", VARCHAR)),
"{ foo: 31 }",
List.of("31"));
assertValues(
RowType.rowType(
field("foo", VARCHAR)),
"{ foo: 31.50 }",
List.of("31.50"));
assertValues(
RowType.rowType(
field("foo", VARCHAR)),
"{ foo: [1, 2, 3] }",
List.of("[1, 2, 3]"));
assertValues(
RowType.rowType(
field("foo", VARCHAR)),
"{ foo: \"bar\" }",
List.of("bar"));
assertValues(
RowType.rowType(
field("foo", VARCHAR)),
"{ foo: { nested_foo: 12 } }",
List.of("{nested_foo: 12}"));
}

@Test
public void testCaseInsensitivityOfDuplicateKeys()
throws IOException
Expand Down

0 comments on commit a050424

Please sign in to comment.