diff --git a/src/java/org/rapidcontext/core/data/TextEncoding.java b/src/java/org/rapidcontext/core/data/TextEncoding.java index 0c10e47c..71cea15d 100644 --- a/src/java/org/rapidcontext/core/data/TextEncoding.java +++ b/src/java/org/rapidcontext/core/data/TextEncoding.java @@ -16,8 +16,7 @@ import java.io.UnsupportedEncodingException; import java.net.URLEncoder; - -import org.apache.commons.lang3.CharUtils; +import java.util.Objects; /** * A text encoding/escaping helper. Always encodes to printable ASCII @@ -98,13 +97,12 @@ public static String encode(TextEncoding encoding, String str) { */ public static String encodeAscii(String str, boolean linebreaks) { StringBuilder buffer = new StringBuilder(); - for (int i = 0; str != null && i < str.length(); i ++) { - char c = str.charAt(i); + Objects.requireNonNullElse(str, "").chars().forEach(c -> { buffer.append(switch (c) { - case '\n', '\r' -> linebreaks ? c : ' '; - default -> CharUtils.isAsciiPrintable(c) ? c : ' '; + case '\n', '\r' -> linebreaks ? (char) c : ' '; + default -> (32 <= c && c < 127) ? (char) c : ' '; }); - } + }); return buffer.toString(); } @@ -122,40 +120,18 @@ public static String encodeAscii(String str, boolean linebreaks) { */ public static String encodeProperty(String str, boolean linebreaks) { StringBuilder buffer = new StringBuilder(); - for (int i = 0; str != null && i < str.length(); i ++) { - char c = str.charAt(i); - switch (c) { - case '\\': - buffer.append("\\\\"); - break; - case '\t': - buffer.append("\\t"); - break; - case ' ': - if (buffer.lastIndexOf("\n") == buffer.length() - 1) { - buffer.append("\\"); - } - buffer.append(" "); - break; - case '\n': - buffer.append("\\n"); - if (linebreaks) { - buffer.append("\\\n"); - } - break; - case '\r': - if (!linebreaks) { - buffer.append("\\r"); - } - break; - default: - if (CharUtils.isAsciiPrintable(c)) { - buffer.append(c); - } else { - buffer.append(String.format("\\u%04x", Integer.valueOf(c))); - } - } - } + Objects.requireNonNullElse(str, "").chars().forEach(c -> { + int len = buffer.length(); + boolean initial = len == 0 || buffer.charAt(len - 1) == '\n'; + buffer.append(switch (c) { + case '\\' -> "\\\\"; + case '\t' -> "\\t"; + case ' ' -> initial ? "\\ " : " "; + case '\n' -> linebreaks ? "\\n\\\n" : "\\n"; + case '\r' -> linebreaks ? "" : "\\r"; + default -> (32 <= c && c < 127) ? (char) c : String.format("\\u%04x", c); + }); + }); return buffer.toString(); } @@ -170,32 +146,16 @@ public static String encodeProperty(String str, boolean linebreaks) { */ public static String encodeJson(String str) { StringBuilder buffer = new StringBuilder(); - for (int i = 0; str != null && i < str.length(); i++) { - char c = str.charAt(i); - switch (c) { - case '\\': - buffer.append("\\\\"); - break; - case '\"': - buffer.append("\\\""); - break; - case '\t': - buffer.append("\\t"); - break; - case '\n': - buffer.append("\\n"); - break; - case '\r': - buffer.append("\\r"); - break; - default: - if (32 <= c && c < 127) { - buffer.append(c); - } else { - buffer.append(String.format("\\u%04x", Integer.valueOf(c))); - } - } - } + Objects.requireNonNullElse(str, "").chars().forEach(c -> { + buffer.append(switch (c) { + case '\\' -> "\\\\"; + case '\"' -> "\\\""; + case '\t' -> "\\t"; + case '\n' -> "\\n"; + case '\r' -> "\\r"; + default -> (32 <= c && c < 127) ? (char) c : String.format("\\u%04x", c); + }); + }); return buffer.toString(); } @@ -230,37 +190,17 @@ public static String encodeJsonString(String str) { */ public static String encodeXml(String str, boolean linebreaks) { StringBuilder buffer = new StringBuilder(); - for (int i = 0; str != null && i < str.length(); i ++) { - char c = str.charAt(i); - switch (c) { - case '<': - buffer.append("<"); - break; - case '>': - buffer.append(">"); - break; - case '&': - buffer.append("&"); - break; - case '"': - buffer.append("""); - break; - case '\n': - buffer.append(linebreaks ? "\n" : " "); - break; - case '\r': - buffer.append(linebreaks ? "\r" : " "); - break; - default: - if (CharUtils.isAsciiPrintable(c)) { - buffer.append(c); - } else { - buffer.append("&#"); - buffer.append(String.valueOf((int) c)); - buffer.append(";"); - } - } - } + Objects.requireNonNullElse(str, "").codePoints().forEach(c -> { + buffer.append(switch (c) { + case '<' -> "<"; + case '>' -> ">"; + case '&' -> "&"; + case '"' -> """; + case '\n' -> linebreaks ? "\n" : " "; + case '\r' -> linebreaks ? "\r" : " "; + default -> (32 <= c && c < 127) ? (char) c : String.format("&#%d;", c); + }); + }); return buffer.toString(); } diff --git a/test/src/java/org/rapidcontext/core/data/TextEncodingTest.java b/test/src/java/org/rapidcontext/core/data/TextEncodingTest.java new file mode 100644 index 00000000..d3a624ca --- /dev/null +++ b/test/src/java/org/rapidcontext/core/data/TextEncodingTest.java @@ -0,0 +1,44 @@ +package org.rapidcontext.core.data; + +import static org.junit.Assert.*; + +import org.junit.Test; + +/** + * Unit tests for the TextEncoding helper methods. + */ +@SuppressWarnings("javadoc") +public class TextEncodingTest { + + @Test + public void testEncodeProperty() { + assertEquals("", TextEncoding.encodeProperty(null, true)); + assertEquals("\\ \\t\\n\\\n\\ ", TextEncoding.encodeProperty(" \t\r\n ", true)); + assertEquals("\\ \\t\\r\\n ", TextEncoding.encodeProperty(" \t\r\n ", false)); + assertEquals("\\\\", TextEncoding.encodeProperty("\\", true)); + assertEquals("abc123", TextEncoding.encodeProperty("abc123", true)); + assertEquals("\\u00e5\\u00e4\\u00f6", TextEncoding.encodeProperty("\u00E5\u00E4\u00F6", true)); + assertEquals("emoji", "\\ud83e\\udd16", TextEncoding.encodeProperty("\uD83E\uDD16", true)); + } + + @Test + public void testEncodeJson() { + assertEquals("", TextEncoding.encodeJson(null)); + assertEquals(" \\t\\r\\n", TextEncoding.encodeJson(" \t\r\n")); + assertEquals("\\\\\\\"'", TextEncoding.encodeJson("\\\"'")); + assertEquals("abc123", TextEncoding.encodeJson("abc123")); + assertEquals("\\u00e5\\u00e4\\u00f6", TextEncoding.encodeJson("\u00E5\u00E4\u00F6")); + assertEquals("emoji", "\\ud83e\\udd16", TextEncoding.encodeJson("\uD83E\uDD16")); + } + + @Test + public void testEncodeXml() { + assertEquals("", TextEncoding.encodeXml(null, true)); + assertEquals("\r\n", TextEncoding.encodeXml("\r\n", true)); + assertEquals(" ", TextEncoding.encodeXml("\r\n", false)); + assertEquals("<>&"'", TextEncoding.encodeXml("<>&\"'", true)); + assertEquals("abc123", TextEncoding.encodeXml("abc123", true)); + assertEquals("åäö", TextEncoding.encodeXml("\u00E5\u00E4\u00F6", true)); + assertEquals("emoji", "🤖", TextEncoding.encodeXml("\uD83E\uDD16", true)); + } +}