3_meta_inline _Pure uint32 _strUTF8SeqLen(uint8 u)
9 if (u >= 0x80 && u <= 0xbf)
11 else if (u == 0xc0 || u == 0xc1)
13 else if (u >= 0xc2 && u <= 0xdf)
15 else if (u >= 0xe0 && u <= 0xef)
17 else if (u >= 0xf0 && u <= 0xf4)
23_meta_inline
bool _strUTF8DecodeSeq(
striter *_Nonnull it, uint32 len, uint8 ch, int32 *_Nullable codepoint)
36 for (; len > 1; --len) {
40 if (ch < 0x80 || ch > 0xbf)
43 ret = (ret << 6) | (ch & 0x3f);
47 (ret >= 0xd800 && ret <= 0xdfff) ||
48 (len == 2 && ret < 0x80) ||
49 (len == 3 && ret < 0x800) ||
50 (len == 4 && ret < 0x10000))
59_meta_inline uint32 _strUTF8Decode(
striter *_Nonnull it, int32 *_Nullable codepoint)
65 uint32 len = _strUTF8SeqLen(first);
73 if (_strUTF8DecodeSeq(it, len, first, codepoint))
78_meta_inline uint32 _strUTF8Encode(uint8 *_Nonnull buffer, int32 codepoint)
82 else if (codepoint < 0x80) {
83 buffer[0] = (uint8)codepoint;
85 }
else if (codepoint < 0x800) {
86 buffer[0] = 0xc0 | ((codepoint & 0x7c0) >> 6);
87 buffer[1] = 0x80 | ((codepoint & 0x03f));
89 }
else if (codepoint < 0x10000) {
90 buffer[0] = 0xe0 | ((codepoint & 0xf000) >> 12);
91 buffer[1] = 0x80 | ((codepoint & 0x0fc0) >> 6);
92 buffer[2] = 0x80 | ((codepoint & 0x003f));
94 }
else if (codepoint < 0x10ffff) {
95 buffer[0] = 0xf0 | ((codepoint & 0x1c0000) >> 18);
96 buffer[1] = 0x80 | ((codepoint & 0x03f000) >> 12);
97 buffer[2] = 0x80 | ((codepoint & 0x000fc0) >> 6);
98 buffer[3] = 0x80 | ((codepoint & 0x00003f));
bool striChar(striter *i, uint8 *out)