Skip to content

Commit

Permalink
core: Refactored text encoding helper for compactness
Browse files Browse the repository at this point in the history
  • Loading branch information
cederberg committed Dec 16, 2024
1 parent 1da23a2 commit d9dc0d7
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 98 deletions.
136 changes: 38 additions & 98 deletions src/java/org/rapidcontext/core/data/TextEncoding.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;

import org.apache.commons.lang3.CharUtils;
import java.util.Objects;

/**
* A text encoding/escaping helper. Always encodes to printable ASCII
Expand Down Expand Up @@ -98,13 +97,12 @@ public static String encode(TextEncoding encoding, String str) {
*/
public static String encodeAscii(String str, boolean linebreaks) {
StringBuilder buffer = new StringBuilder();
for (int i = 0; str != null && i < str.length(); i ++) {
char c = str.charAt(i);
Objects.requireNonNullElse(str, "").chars().forEach(c -> {
buffer.append(switch (c) {
case '\n', '\r' -> linebreaks ? c : ' ';
default -> CharUtils.isAsciiPrintable(c) ? c : ' ';
case '\n', '\r' -> linebreaks ? (char) c : ' ';
default -> (32 <= c && c < 127) ? (char) c : ' ';
});
}
});
return buffer.toString();
}

Expand All @@ -122,40 +120,18 @@ public static String encodeAscii(String str, boolean linebreaks) {
*/
public static String encodeProperty(String str, boolean linebreaks) {
StringBuilder buffer = new StringBuilder();
for (int i = 0; str != null && i < str.length(); i ++) {
char c = str.charAt(i);
switch (c) {
case '\\':
buffer.append("\\\\");
break;
case '\t':
buffer.append("\\t");
break;
case ' ':
if (buffer.lastIndexOf("\n") == buffer.length() - 1) {
buffer.append("\\");
}
buffer.append(" ");
break;
case '\n':
buffer.append("\\n");
if (linebreaks) {
buffer.append("\\\n");
}
break;
case '\r':
if (!linebreaks) {
buffer.append("\\r");
}
break;
default:
if (CharUtils.isAsciiPrintable(c)) {
buffer.append(c);
} else {
buffer.append(String.format("\\u%04x", Integer.valueOf(c)));
}
}
}
Objects.requireNonNullElse(str, "").chars().forEach(c -> {
int len = buffer.length();
boolean initial = len == 0 || buffer.charAt(len - 1) == '\n';
buffer.append(switch (c) {
case '\\' -> "\\\\";
case '\t' -> "\\t";
case ' ' -> initial ? "\\ " : " ";
case '\n' -> linebreaks ? "\\n\\\n" : "\\n";
case '\r' -> linebreaks ? "" : "\\r";
default -> (32 <= c && c < 127) ? (char) c : String.format("\\u%04x", c);
});
});
return buffer.toString();
}

Expand All @@ -170,32 +146,16 @@ public static String encodeProperty(String str, boolean linebreaks) {
*/
public static String encodeJson(String str) {
StringBuilder buffer = new StringBuilder();
for (int i = 0; str != null && i < str.length(); i++) {
char c = str.charAt(i);
switch (c) {
case '\\':
buffer.append("\\\\");
break;
case '\"':
buffer.append("\\\"");
break;
case '\t':
buffer.append("\\t");
break;
case '\n':
buffer.append("\\n");
break;
case '\r':
buffer.append("\\r");
break;
default:
if (32 <= c && c < 127) {
buffer.append(c);
} else {
buffer.append(String.format("\\u%04x", Integer.valueOf(c)));
}
}
}
Objects.requireNonNullElse(str, "").chars().forEach(c -> {
buffer.append(switch (c) {
case '\\' -> "\\\\";
case '\"' -> "\\\"";
case '\t' -> "\\t";
case '\n' -> "\\n";
case '\r' -> "\\r";
default -> (32 <= c && c < 127) ? (char) c : String.format("\\u%04x", c);
});
});
return buffer.toString();
}

Expand Down Expand Up @@ -230,37 +190,17 @@ public static String encodeJsonString(String str) {
*/
public static String encodeXml(String str, boolean linebreaks) {
StringBuilder buffer = new StringBuilder();
for (int i = 0; str != null && i < str.length(); i ++) {
char c = str.charAt(i);
switch (c) {
case '<':
buffer.append("&lt;");
break;
case '>':
buffer.append("&gt;");
break;
case '&':
buffer.append("&amp;");
break;
case '"':
buffer.append("&quot;");
break;
case '\n':
buffer.append(linebreaks ? "\n" : "&#10;");
break;
case '\r':
buffer.append(linebreaks ? "\r" : "&#13;");
break;
default:
if (CharUtils.isAsciiPrintable(c)) {
buffer.append(c);
} else {
buffer.append("&#");
buffer.append(String.valueOf((int) c));
buffer.append(";");
}
}
}
Objects.requireNonNullElse(str, "").codePoints().forEach(c -> {
buffer.append(switch (c) {
case '<' -> "&lt;";
case '>' -> "&gt;";
case '&' -> "&amp;";
case '"' -> "&quot;";
case '\n' -> linebreaks ? "\n" : "&#10;";
case '\r' -> linebreaks ? "\r" : "&#13;";
default -> (32 <= c && c < 127) ? (char) c : String.format("&#%d;", c);
});
});
return buffer.toString();
}

Expand Down
44 changes: 44 additions & 0 deletions test/src/java/org/rapidcontext/core/data/TextEncodingTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package org.rapidcontext.core.data;

import static org.junit.Assert.*;

import org.junit.Test;

/**
* Unit tests for the TextEncoding helper methods.
*/
@SuppressWarnings("javadoc")
public class TextEncodingTest {

@Test
public void testEncodeProperty() {
assertEquals("", TextEncoding.encodeProperty(null, true));
assertEquals("\\ \\t\\n\\\n\\ ", TextEncoding.encodeProperty(" \t\r\n ", true));
assertEquals("\\ \\t\\r\\n ", TextEncoding.encodeProperty(" \t\r\n ", false));
assertEquals("\\\\", TextEncoding.encodeProperty("\\", true));
assertEquals("abc123", TextEncoding.encodeProperty("abc123", true));
assertEquals("\\u00e5\\u00e4\\u00f6", TextEncoding.encodeProperty("\u00E5\u00E4\u00F6", true));
assertEquals("emoji", "\\ud83e\\udd16", TextEncoding.encodeProperty("\uD83E\uDD16", true));
}

@Test
public void testEncodeJson() {
assertEquals("", TextEncoding.encodeJson(null));
assertEquals(" \\t\\r\\n", TextEncoding.encodeJson(" \t\r\n"));
assertEquals("\\\\\\\"'", TextEncoding.encodeJson("\\\"'"));
assertEquals("abc123", TextEncoding.encodeJson("abc123"));
assertEquals("\\u00e5\\u00e4\\u00f6", TextEncoding.encodeJson("\u00E5\u00E4\u00F6"));
assertEquals("emoji", "\\ud83e\\udd16", TextEncoding.encodeJson("\uD83E\uDD16"));
}

@Test
public void testEncodeXml() {
assertEquals("", TextEncoding.encodeXml(null, true));
assertEquals("\r\n", TextEncoding.encodeXml("\r\n", true));
assertEquals("&#13;&#10;", TextEncoding.encodeXml("\r\n", false));
assertEquals("&lt;&gt;&amp;&quot;'", TextEncoding.encodeXml("<>&\"'", true));
assertEquals("abc123", TextEncoding.encodeXml("abc123", true));
assertEquals("&#229;&#228;&#246;", TextEncoding.encodeXml("\u00E5\u00E4\u00F6", true));
assertEquals("emoji", "&#129302;", TextEncoding.encodeXml("\uD83E\uDD16", true));
}
}

0 comments on commit d9dc0d7

Please sign in to comment.