Fix UTF-8 auto alignment
This commit is contained in:
parent
be917d9f84
commit
550eb6bbaa
@ -333,9 +333,34 @@ void
|
||||
align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
|
||||
{
|
||||
const char * fromLim = *fromLimRef;
|
||||
for (; fromLim > from; fromLim--)
|
||||
if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
|
||||
size_t walked = 0;
|
||||
for (; fromLim > from; fromLim--, walked++) {
|
||||
const unsigned char prev = (unsigned char)fromLim[-1];
|
||||
if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
|
||||
if (walked + 1 >= 4) {
|
||||
fromLim += 4 - 1;
|
||||
break;
|
||||
} else {
|
||||
walked = 0;
|
||||
}
|
||||
} else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
|
||||
if (walked + 1 >= 3) {
|
||||
fromLim += 3 - 1;
|
||||
break;
|
||||
} else {
|
||||
walked = 0;
|
||||
}
|
||||
} else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
|
||||
if (walked + 1 >= 2) {
|
||||
fromLim += 2 - 1;
|
||||
break;
|
||||
} else {
|
||||
walked = 0;
|
||||
}
|
||||
} else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
|
||||
break;
|
||||
}
|
||||
}
|
||||
*fromLimRef = fromLim;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user