diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/CharLengthFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/CharLengthFunction.java new file mode 100644 index 0000000000..a78d16db8f --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/CharLengthFunction.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.util.List; + +/** + * LengthFunction + * description: char_length(string) + * - return the character length of the string + * - return NULL if the string is NULL + */ +@TransformFunction(names = {"char_length"}) +public class CharLengthFunction implements ValueParser { + + private final ValueParser stringParser; + + public CharLengthFunction(Function expr) { + List expressions = expr.getParameters().getExpressions(); + stringParser = OperatorTools.buildParser(expressions.get(0)); + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + Object stringObject = stringParser.parse(sourceData, rowIndex, context); + if (stringObject == null) { + return null; + } + String str = OperatorTools.parseString(stringObject); + return str.length(); + } +} diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java index 0c1abba8f0..e7c7df2a22 100644 --- a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java @@ -22,21 +22,31 @@ import org.apache.inlong.sdk.transform.process.operator.OperatorTools; import org.apache.inlong.sdk.transform.process.parser.ValueParser; +import net.sf.jsqlparser.expression.Expression; import net.sf.jsqlparser.expression.Function; +import java.nio.charset.Charset; +import java.util.List; + /** * LengthFunction - * description: length(string) - * - return the length of the string + * description: length(string,[charsetName]) + * - return the byte length of the string * - return NULL if the string is NULL */ @TransformFunction(names = {"length"}) public class LengthFunction implements ValueParser { private final ValueParser stringParser; + private ValueParser charSetNameParser; + private final Charset DEFAULT_CHARSET = Charset.defaultCharset(); public LengthFunction(Function expr) { - stringParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(0)); + List expressions = expr.getParameters().getExpressions(); + stringParser = OperatorTools.buildParser(expressions.get(0)); + if (expressions.size() > 1) { + charSetNameParser = OperatorTools.buildParser(expressions.get(1)); + } } @Override @@ -45,6 +55,12 @@ public Object parse(SourceData sourceData, int rowIndex, Context context) { if (stringObject == null) { return null; } - return OperatorTools.parseString(stringObject).length(); + Charset charset = DEFAULT_CHARSET; + if (charSetNameParser != null) { + charset = Charset.forName(OperatorTools.parseString( + charSetNameParser.parse(sourceData, rowIndex, context))); + } + String str = OperatorTools.parseString(stringObject); + return str.getBytes(charset).length; } } diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCharLengthFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCharLengthFunction.java new file mode 100644 index 0000000000..35a360c6f6 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCharLengthFunction.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; + +public class TestCharLengthFunction extends AbstractFunctionStringTestBase { + + @Test + public void testCharLengthFunction() throws Exception { + String transformSql = null, data = null; + TransformConfig config = null; + TransformProcessor processor = null; + List output = null; + + transformSql = "select char_length(string1) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case1: char_length('hello world') + data = "hello world|"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=11", output.get(0)); + + // case2: char_length('应龙') + data = "应龙|"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=2", output.get(0)); + + transformSql = "select char_length(xxd) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case3: char_length(null) + data = "hello world|apple|cloud|2|1|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=", output.get(0)); + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCompressFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCompressFunction.java index dd01c06ecb..c31867be56 100644 --- a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCompressFunction.java +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCompressFunction.java @@ -32,52 +32,52 @@ public class TestCompressFunction extends AbstractFunctionStringTestBase { @Test public void testCompressFunction() throws Exception { - String transformSql = "select length(compress(replicate(string1,100))) from source"; + String transformSql = "select length(compress(replicate(string1,100)),'ISO_8859_1') from source"; TransformConfig config = new TransformConfig(transformSql); TransformProcessor processor1 = TransformProcessor .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), SinkEncoderFactory.createKvEncoder(kvSink)); - // case1: length(compress(replicate(string1,100))) + // case1: length(compress(replicate(string1,100)),'ISO_8859_1') List output1 = processor1.transform("abcdefghijk|apple|cloud|2|1|3", new HashMap<>()); Assert.assertEquals(1, output1.size()); Assert.assertEquals("result=33", output1.get(0)); - transformSql = "select length(compress(string1)) from source"; + transformSql = "select length(compress(string1),'ISO_8859_1') from source"; config = new TransformConfig(transformSql); processor1 = TransformProcessor .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), SinkEncoderFactory.createKvEncoder(kvSink)); - // case2: length(compress('')) + // case2: length(compress(''),'ISO_8859_1') output1 = processor1.transform("|apple|cloud|2|1|3", new HashMap<>()); Assert.assertEquals(1, output1.size()); Assert.assertEquals("result=0", output1.get(0)); - transformSql = "select length(compress(xxd)) from source"; + transformSql = "select length(compress(xxd),'ISO_8859_1') from source"; config = new TransformConfig(transformSql); processor1 = TransformProcessor .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), SinkEncoderFactory.createKvEncoder(kvSink)); - // case3: length(compress(null)) + // case3: length(compress(null),'ISO_8859_1') output1 = processor1.transform("hello world|apple|cloud|2|1|3", new HashMap<>()); Assert.assertEquals(1, output1.size()); Assert.assertEquals("result=", output1.get(0)); - transformSql = "select length(compress(string1,string2)) from source"; + transformSql = "select length(compress(string1,string2),'ISO_8859_1') from source"; config = new TransformConfig(transformSql); processor1 = TransformProcessor .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), SinkEncoderFactory.createKvEncoder(kvSink)); - // case4: length(compress('hello world','Gzip')) + // case4: length(compress('hello world','Gzip'),'ISO_8859_1') output1 = processor1.transform("hello world|Gzip|cloud|2|1|3", new HashMap<>()); Assert.assertEquals(1, output1.size()); Assert.assertEquals("result=35", output1.get(0)); - // case5: length(compress('hello world','zip')) + // case5: length(compress('hello world','zip'),'ISO_8859_1') output1 = processor1.transform("hello world|zip|cloud|2|1|3", new HashMap<>()); Assert.assertEquals(1, output1.size()); Assert.assertEquals("result=131", output1.get(0)); - // case5: length(compress('hello world','undefinedType')) + // case5: length(compress('hello world','undefinedType'),'ISO_8859_1') output1 = processor1.transform("hello world|undefinedType|cloud|2|1|3", new HashMap<>()); Assert.assertEquals(1, output1.size()); Assert.assertEquals("result=", output1.get(0)); diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java index 7181dcedf9..a4c87b68e4 100644 --- a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java @@ -32,24 +32,48 @@ public class TestLengthFunction extends AbstractFunctionStringTestBase { @Test public void testLengthFunction() throws Exception { - String transformSql = "select length(string1) from source"; - TransformConfig config = new TransformConfig(transformSql); - TransformProcessor processor1 = TransformProcessor + String transformSql = null, data = null; + TransformConfig config = null; + TransformProcessor processor = null; + List output = null; + + transformSql = "select length(string1) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), SinkEncoderFactory.createKvEncoder(kvSink)); // case1: length('hello world') - List output1 = processor1.transform("hello world|apple|cloud|2|1|3", new HashMap<>()); - Assert.assertEquals(1, output1.size()); - Assert.assertEquals("result=11", output1.get(0)); + data = "hello world|apple|cloud|2|1|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=11", output.get(0)); transformSql = "select length(xxd) from source"; config = new TransformConfig(transformSql); - processor1 = TransformProcessor + processor = TransformProcessor .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), SinkEncoderFactory.createKvEncoder(kvSink)); // case2: length(null) - output1 = processor1.transform("hello world|apple|cloud|2|1|3", new HashMap<>()); - Assert.assertEquals(1, output1.size()); - Assert.assertEquals("result=", output1.get(0)); + data = "hello world|apple|cloud|2|1|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=", output.get(0)); + + transformSql = "select length(string1,string2) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case3: length(应龙, utf-8) + data = "应龙|utf-8|cloud|2|1|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=6", output.get(0)); + + // case4: length(应龙, gbk) + data = "应龙|gbk|cloud|2|1|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=4", output.get(0)); } }