Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Branch-less alphanumeric underscore's replacement #1294

Merged
merged 5 commits into from
Feb 27, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 104 additions & 16 deletions common/src/main/java/io/smallrye/config/common/utils/StringUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,43 @@
*/
public class StringUtil {

// this is accounting for Latin1 chars only
private static final byte[] NON_ALPHANUMERIC_UNDERSCORE_REPLACEMENTS = new byte[256];

static {
// replace every non alpha-numeric latin char by an underscore
for (int c = 0; c < 256; c++) {
if ('a' <= c && c <= 'z' ||
'A' <= c && c <= 'Z' ||
'0' <= c && c <= '9') {
NON_ALPHANUMERIC_UNDERSCORE_REPLACEMENTS[c] = (byte) c;
} else {
NON_ALPHANUMERIC_UNDERSCORE_REPLACEMENTS[c] = '_';
}
}
}

public static boolean isAsciiLetterOrDigit(char c) {
if (c > 255) {
return false;
}
return NON_ALPHANUMERIC_UNDERSCORE_REPLACEMENTS[c & 0xFF] != '_';
}

private static char replacementOf(char c) {
if (c > 255) {
return '_';
}
return (char) (((int) NON_ALPHANUMERIC_UNDERSCORE_REPLACEMENTS[c & 0xFF]) & 0xFF);
}

private static byte rawReplacementOf(char c) {
if (c > 255) {
return '_';
}
return NON_ALPHANUMERIC_UNDERSCORE_REPLACEMENTS[c & 0xFF];
}

private static final String[] NO_STRINGS = new String[0];

private static final Pattern ITEM_PATTERN = Pattern.compile("(,+)|([^\\\\,]+)|\\\\(.)");
Expand Down Expand Up @@ -87,32 +124,83 @@ public static String[] split(String text) {
return list.toArray(NO_STRINGS);
}

public static boolean isAsciiLetterOrDigit(char c) {
return 'a' <= c && c <= 'z' ||
'A' <= c && c <= 'Z' ||
'0' <= c && c <= '9';
}

public static String replaceNonAlphanumericByUnderscores(final String name) {
return replaceNonAlphanumericByUnderscores(name, new StringBuilder(name.length()));
final int length = name.length();
if (length == 0) {
return name;
}
// size it accounting for worst case scenario
final byte[] result = new byte[length + 1];
char c = 0;
for (int i = 0; i < length; i++) {
c = name.charAt(i);
result[i] = rawReplacementOf(c);
}
if (c == '"') {
result[length] = '_';
return new String(result, 0, 0, length + 1);
} else {
return new String(result, 0, 0, length);
}
}

public static String replaceNonAlphanumericByUnderscores(final String name, final StringBuilder sb) {
int length = name.length();
// bogus value
char c = 0;
for (int i = 0; i < length; i++) {
char c = name.charAt(i);
if (isAsciiLetterOrDigit(c)) {
sb.append(c);
} else {
sb.append('_');
if (c == '"' && i + 1 == length) {
sb.append('_');
}
}
c = name.charAt(i);
sb.append(replacementOf(c));
}
if (c == '"') {
sb.append('_');
}
return sb.toString();
}

public static final class ResizableByteArray {

private byte[] array;

public ResizableByteArray(int initialSize) {
this.array = new byte[initialSize];
}

private byte[] ensureCapacity(int capacity) {
byte[] array = this.array;
if (array.length < capacity) {
// no need to copy the content, it's going to be rewritten!
byte[] newArray = new byte[capacity];
this.array = newArray;
return newArray;
}
return array;
}
}

public static String replaceNonAlphanumericByUnderscores(final String name, final ResizableByteArray sb) {
int length = name.length();
if (length == 0) {
return name;
}
// size it accounting for worst case scenario
byte[] ascii = sb.ensureCapacity(length + 1);
// despite this could be refactored in a separate common method
// since this can run in Tier1 compilation level it is better to deplicate it
// and C1MaxInlineSize is 35 which means that's not going to be inlined by the C1 compiler
char c = 0;
for (int i = 0; i < length; i++) {
c = name.charAt(i);
ascii[i] = rawReplacementOf(c);
}
if (c == '"') {
ascii[length] = '_';
return new String(ascii, 0, 0, length + 1);
} else {
return new String(ascii, 0, 0, length);
}
}

public static String toLowerCaseAndDotted(final String name) {
int length = name.length();

Expand Down