Fix UTF-8 auto alignment

This commit is contained in:
Sebastian Pipping 2016-05-20 22:28:18 +02:00
parent be917d9f84
commit 550eb6bbaa

View File

@ -333,9 +333,34 @@ void
align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
{
const char * fromLim = *fromLimRef;
for (; fromLim > from; fromLim--)
if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
size_t walked = 0;
for (; fromLim > from; fromLim--, walked++) {
const unsigned char prev = (unsigned char)fromLim[-1];
if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
if (walked + 1 >= 4) {
fromLim += 4 - 1;
break;
} else {
walked = 0;
}
} else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
if (walked + 1 >= 3) {
fromLim += 3 - 1;
break;
} else {
walked = 0;
}
} else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
if (walked + 1 >= 2) {
fromLim += 2 - 1;
break;
} else {
walked = 0;
}
} else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
break;
}
}
*fromLimRef = fromLim;
}