Skip to content

Commit

Permalink
add unicode normalization transform
Browse files Browse the repository at this point in the history
  • Loading branch information
ayoisaiah committed Jan 1, 2025
1 parent 69054a4 commit dd14dbd
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 1 deletion.
12 changes: 12 additions & 0 deletions replace/replace_test/variables_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,18 @@ func TestVariables(t *testing.T) {
"$1 - {<$2>.up}{ext.ti}",
},
},
{
Name: "normalize unicode NFKC",
Changes: file.Changes{
{
Source: "𝐇𝐞𝐥𝐥𝐨 World Ⓗⓘ ᵂᵒʳˡᵈ 𝟙𝟚𝟛 ℍ𝕚 ℕ𝕦𝕞𝕓𝕖𝕣𝕤 123 ①②③ ⒈⒉⒊",
},
},
Want: []string{
"Hello World Hi World 123 Hi Numbers 123 123 1.2.3.",
},
Args: []string{"-f", ".*", "-r", "{.norm}"},
},
{
Name: "remove diacritics",
Changes: file.Changes{
Expand Down
2 changes: 1 addition & 1 deletion replace/variables/variable_regex.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func init() {
tokenString := strings.Join(tokens, "|")

transformTokens = fmt.Sprintf(
"(up|lw|ti|win|mac|di|(?:dt\\.(%s)))",
"(up|lw|ti|win|mac|di|norm|(?:dt\\.(%s)))",
tokenString,
)

Expand Down
7 changes: 7 additions & 0 deletions replace/variables/variables.go
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,13 @@ func transformString(source, token string) string {
return source
}

return result
case "norm":
result, _, err := transform.String(norm.NFKC, source)
if err != nil {
return source
}

return result
}

Expand Down

0 comments on commit dd14dbd

Please sign in to comment.