Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Quick Check #2769

Merged
merged 1 commit into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions gcc/rust/rust-lang.cc
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ run_rust_tests ()
{
// Call tests for the rust frontend here
rust_input_source_test ();
rust_nfc_qc_test ();
rust_utf8_normalize_test ();
rust_punycode_encode_test ();
rust_cfg_parser_test ();
Expand Down
1 change: 1 addition & 0 deletions gcc/rust/rust-system.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include <utility>
#include <fstream>
#include <array>
#include <algorithm>

// Rust frontend requires C++11 minimum, so will have unordered_map and set
#include <unordered_map>
Expand Down
39 changes: 31 additions & 8 deletions gcc/rust/util/make-rust-unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,30 @@ def write_numeric() -> None:
print("}};")


def write_nfc_qc():
print(
"const std::array<std::pair<uint32_t, uint32_t>, {}> NFC_QC_NO_RANGES = {{{{".format(
len(nfc_qc_no_ranges)
)
)
print(" // clang-format off")
for r in nfc_qc_no_ranges:
print(" {{{:#06x}, {:#06x}}},".format(r[0], r[1]))
print(" // clang-format on")
print("}};")

print(
"const std::array<std::pair<uint32_t, uint32_t>, {}> NFC_QC_MAYBE_RANGES = {{{{".format(
len(nfc_qc_maybe_ranges)
)
)
print(" // clang-format off")
for r in nfc_qc_maybe_ranges:
print(" {{{:#06x}, {:#06x}}},".format(r[0], r[1]))
print(" // clang-format on")
print("}};")


def main() -> None:
if len(sys.argv) != 4:
print("too few arguments", file=sys.stderr)
Expand All @@ -265,13 +289,12 @@ def main() -> None:
print(COPYRIGHT)
print()

print('#include "rust-system.h"')
print()
print("namespace Rust {")
print()
print('#include "rust-system.h"\n')
print("namespace Rust {\n")
print("const uint32_t NUM_ALPHABETIC_RANGES = {};".format(len(alphabetic_ranges)))
print("const uint32_t NUM_NUMERIC_CODEPOINTS = {};".format(len(numeric_codepoints)))
print()
print(
"const uint32_t NUM_NUMERIC_CODEPOINTS = {};\n".format(len(numeric_codepoints))
)

write_decomposition()
print()
Expand All @@ -283,8 +306,8 @@ def main() -> None:
print()
write_numeric()
print()

# TODO: write NFC_QC table
write_nfc_qc()
print()

print("} // namespace Rust")

Expand Down
1 change: 1 addition & 0 deletions gcc/rust/util/rust-codepoint.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ struct Codepoint
static Codepoint eof () { return Codepoint (UINT32_MAX); }
bool is_eof () const { return value == UINT32_MAX; }
bool is_ascii () const { return value <= MAX_ASCII_CODEPOINT; }
bool is_supplementary_character () const { return value > 0xFFFF; }

// Returns a C++ string containing string value of codepoint.
std::string as_string ();
Expand Down
158 changes: 154 additions & 4 deletions gcc/rust/util/rust-unicode-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

namespace Rust {

const uint32_t NUM_ALPHABETIC_RANGES = 1117;
const uint32_t NUM_ALPHABETIC_RANGES = 1141;
const uint32_t NUM_NUMERIC_CODEPOINTS = 1831;

const std::map<uint32_t, std::vector<uint32_t>> DECOMPOSITION_MAP = {
Expand Down Expand Up @@ -4167,6 +4167,7 @@ const std::array<std::pair<uint32_t, uint32_t>, NUM_ALPHABETIC_RANGES>
{0x0bd7, 0x0bd8},
{0x0c00, 0x0c01},
{0x0c01, 0x0c04},
{0x0c04, 0x0c05},
{0x0c05, 0x0c0d},
{0x0c0e, 0x0c11},
{0x0c12, 0x0c29},
Expand Down Expand Up @@ -4202,6 +4203,7 @@ const std::array<std::pair<uint32_t, uint32_t>, NUM_ALPHABETIC_RANGES>
{0x0ce0, 0x0ce2},
{0x0ce2, 0x0ce4},
{0x0cf1, 0x0cf3},
{0x0cf3, 0x0cf4},
{0x0d00, 0x0d02},
{0x0d02, 0x0d04},
{0x0d04, 0x0d0d},
Expand Down Expand Up @@ -4257,7 +4259,7 @@ const std::array<std::pair<uint32_t, uint32_t>, NUM_ALPHABETIC_RANGES>
{0x0f49, 0x0f6d},
{0x0f71, 0x0f7f},
{0x0f7f, 0x0f80},
{0x0f80, 0x0f82},
{0x0f80, 0x0f84},
{0x0f88, 0x0f8d},
{0x0f8d, 0x0f98},
{0x0f99, 0x0fbd},
Expand Down Expand Up @@ -4758,6 +4760,7 @@ const std::array<std::pair<uint32_t, uint32_t>, NUM_ALPHABETIC_RANGES>
{0x11071, 0x11073},
{0x11073, 0x11075},
{0x11075, 0x11076},
{0x11080, 0x11082},
{0x11082, 0x11083},
{0x11083, 0x110b0},
{0x110b0, 0x110b3},
Expand Down Expand Up @@ -4794,6 +4797,8 @@ const std::array<std::pair<uint32_t, uint32_t>, NUM_ALPHABETIC_RANGES>
{0x11234, 0x11235},
{0x11237, 0x11238},
{0x1123e, 0x1123f},
{0x1123f, 0x11241},
{0x11241, 0x11242},
{0x11280, 0x11287},
{0x11288, 0x11289},
{0x1128a, 0x1128e},
Expand Down Expand Up @@ -4948,12 +4953,22 @@ const std::array<std::pair<uint32_t, uint32_t>, NUM_ALPHABETIC_RANGES>
{0x11ee0, 0x11ef3},
{0x11ef3, 0x11ef5},
{0x11ef5, 0x11ef7},
{0x11f00, 0x11f02},
{0x11f02, 0x11f03},
{0x11f03, 0x11f04},
{0x11f04, 0x11f11},
{0x11f12, 0x11f34},
{0x11f34, 0x11f36},
{0x11f36, 0x11f3b},
{0x11f3e, 0x11f40},
{0x11f40, 0x11f41},
{0x11fb0, 0x11fb1},
{0x12000, 0x1239a},
{0x12400, 0x1246f},
{0x12480, 0x12544},
{0x12f90, 0x12ff1},
{0x13000, 0x1342f},
{0x13000, 0x13430},
{0x13441, 0x13447},
{0x14400, 0x14647},
{0x16800, 0x16a39},
{0x16a40, 0x16a5f},
Expand All @@ -4980,7 +4995,9 @@ const std::array<std::pair<uint32_t, uint32_t>, NUM_ALPHABETIC_RANGES>
{0x1aff5, 0x1affc},
{0x1affd, 0x1afff},
{0x1b000, 0x1b123},
{0x1b132, 0x1b133},
{0x1b150, 0x1b153},
{0x1b155, 0x1b156},
{0x1b164, 0x1b168},
{0x1b170, 0x1b2fc},
{0x1bc00, 0x1bc6b},
Expand Down Expand Up @@ -5021,16 +5038,21 @@ const std::array<std::pair<uint32_t, uint32_t>, NUM_ALPHABETIC_RANGES>
{0x1df00, 0x1df0a},
{0x1df0a, 0x1df0b},
{0x1df0b, 0x1df1f},
{0x1df25, 0x1df2b},
{0x1e000, 0x1e007},
{0x1e008, 0x1e019},
{0x1e01b, 0x1e022},
{0x1e023, 0x1e025},
{0x1e026, 0x1e02b},
{0x1e030, 0x1e06e},
{0x1e08f, 0x1e090},
{0x1e100, 0x1e12d},
{0x1e137, 0x1e13e},
{0x1e14e, 0x1e14f},
{0x1e290, 0x1e2ae},
{0x1e2c0, 0x1e2ec},
{0x1e4d0, 0x1e4eb},
{0x1e4eb, 0x1e4ec},
{0x1e7e0, 0x1e7e7},
{0x1e7e8, 0x1e7ec},
{0x1e7ed, 0x1e7ef},
Expand Down Expand Up @@ -5076,12 +5098,14 @@ const std::array<std::pair<uint32_t, uint32_t>, NUM_ALPHABETIC_RANGES>
{0x1f150, 0x1f16a},
{0x1f170, 0x1f18a},
{0x20000, 0x2a6e0},
{0x2a700, 0x2b739},
{0x2a700, 0x2b73a},
{0x2b740, 0x2b81e},
{0x2b820, 0x2cea2},
{0x2ceb0, 0x2ebe1},
{0x2ebf0, 0x2ee5e},
{0x2f800, 0x2fa1e},
{0x30000, 0x3134b},
{0x31350, 0x323b0},
// clang-format on
}};

Expand Down Expand Up @@ -5205,4 +5229,130 @@ const std::array<uint32_t, NUM_NUMERIC_CODEPOINTS> NUMERIC_CODEPOINTS = {{
// clang-format on
}};

const std::array<std::pair<uint32_t, uint32_t>, 74> NFC_QC_NO_RANGES = {{
// clang-format off
{0x0340, 0x0342},
{0x0343, 0x0345},
{0x0374, 0x0375},
{0x037e, 0x037f},
{0x0387, 0x0388},
{0x0958, 0x0960},
{0x09dc, 0x09de},
{0x09df, 0x09e0},
{0x0a33, 0x0a34},
{0x0a36, 0x0a37},
{0x0a59, 0x0a5c},
{0x0a5e, 0x0a5f},
{0x0b5c, 0x0b5e},
{0x0f43, 0x0f44},
{0x0f4d, 0x0f4e},
{0x0f52, 0x0f53},
{0x0f57, 0x0f58},
{0x0f5c, 0x0f5d},
{0x0f69, 0x0f6a},
{0x0f73, 0x0f74},
{0x0f75, 0x0f77},
{0x0f78, 0x0f79},
{0x0f81, 0x0f82},
{0x0f93, 0x0f94},
{0x0f9d, 0x0f9e},
{0x0fa2, 0x0fa3},
{0x0fa7, 0x0fa8},
{0x0fac, 0x0fad},
{0x0fb9, 0x0fba},
{0x1f71, 0x1f72},
{0x1f73, 0x1f74},
{0x1f75, 0x1f76},
{0x1f77, 0x1f78},
{0x1f79, 0x1f7a},
{0x1f7b, 0x1f7c},
{0x1f7d, 0x1f7e},
{0x1fbb, 0x1fbc},
{0x1fbe, 0x1fbf},
{0x1fc9, 0x1fca},
{0x1fcb, 0x1fcc},
{0x1fd3, 0x1fd4},
{0x1fdb, 0x1fdc},
{0x1fe3, 0x1fe4},
{0x1feb, 0x1fec},
{0x1fee, 0x1ff0},
{0x1ff9, 0x1ffa},
{0x1ffb, 0x1ffc},
{0x1ffd, 0x1ffe},
{0x2000, 0x2002},
{0x2126, 0x2127},
{0x212a, 0x212c},
{0x2329, 0x232a},
{0x232a, 0x232b},
{0x2adc, 0x2add},
{0xf900, 0xfa0e},
{0xfa10, 0xfa11},
{0xfa12, 0xfa13},
{0xfa15, 0xfa1f},
{0xfa20, 0xfa21},
{0xfa22, 0xfa23},
{0xfa25, 0xfa27},
{0xfa2a, 0xfa6e},
{0xfa70, 0xfada},
{0xfb1d, 0xfb1e},
{0xfb1f, 0xfb20},
{0xfb2a, 0xfb37},
{0xfb38, 0xfb3d},
{0xfb3e, 0xfb3f},
{0xfb40, 0xfb42},
{0xfb43, 0xfb45},
{0xfb46, 0xfb4f},
{0x1d15e, 0x1d165},
{0x1d1bb, 0x1d1c1},
{0x2f800, 0x2fa1e},
// clang-format on
}};
const std::array<std::pair<uint32_t, uint32_t>, 43> NFC_QC_MAYBE_RANGES = {{
// clang-format off
{0x0300, 0x0305},
{0x0306, 0x030d},
{0x030f, 0x0310},
{0x0311, 0x0312},
{0x0313, 0x0315},
{0x031b, 0x031c},
{0x0323, 0x0329},
{0x032d, 0x032f},
{0x0330, 0x0332},
{0x0338, 0x0339},
{0x0342, 0x0343},
{0x0345, 0x0346},
{0x0653, 0x0656},
{0x093c, 0x093d},
{0x09be, 0x09bf},
{0x09d7, 0x09d8},
{0x0b3e, 0x0b3f},
{0x0b56, 0x0b57},
{0x0b57, 0x0b58},
{0x0bbe, 0x0bbf},
{0x0bd7, 0x0bd8},
{0x0c56, 0x0c57},
{0x0cc2, 0x0cc3},
{0x0cd5, 0x0cd7},
{0x0d3e, 0x0d3f},
{0x0d57, 0x0d58},
{0x0dca, 0x0dcb},
{0x0dcf, 0x0dd0},
{0x0ddf, 0x0de0},
{0x102e, 0x102f},
{0x1161, 0x1176},
{0x11a8, 0x11c3},
{0x1b35, 0x1b36},
{0x3099, 0x309b},
{0x110ba, 0x110bb},
{0x11127, 0x11128},
{0x1133e, 0x1133f},
{0x11357, 0x11358},
{0x114b0, 0x114b1},
{0x114ba, 0x114bb},
{0x114bd, 0x114be},
{0x115af, 0x115b0},
{0x11930, 0x11931},
// clang-format on
}};

} // namespace Rust
Loading
Loading