Fix UTF-8 character corruption every 32kb of text buffer

2014-01-08 15:03:25 +01:00 · 2014-01-08 15:03:25 +01:00 · fc4a4d2471
commit fc4a4d2471
parent 0331970d64
2 changed files with 13 additions and 0 deletions
--- a/src/fe-common/core/utf8.h
+++ b/src/fe-common/core/utf8.h
@ -12,5 +12,6 @@
 int mk_wcwidth(unichar c);

 #define unichar_isprint(c) (((c) & ~0x80) >= 32)
+#define is_utf8_leading(c) (((c) & 0xc0) != 0x80)

 #endif
--- a/src/fe-text/textbuffer.c
+++ b/src/fe-text/textbuffer.c
@ -23,6 +23,7 @@
 #include "module.h"
 #include "misc.h"
 #include "formats.h"
+#include "utf8.h"

 #include "textbuffer.h"

@ -154,6 +155,17 @@ static void text_chunk_append(TEXT_BUFFER_REC *buffer,
        chunk = buffer->cur_text;
 	while (chunk->pos + len >= TEXT_CHUNK_USABLE_SIZE) {
 		left = TEXT_CHUNK_USABLE_SIZE - chunk->pos;
+
+		/* don't split utf-8 character. (assume we can split non-utf8 anywhere.) */
+		if (left < len && !is_utf8_leading(data[left])) {
+			int i;
+			for (i = 1; i < 4 && left >= i; i++)
+				if (is_utf8_leading(data[left - i])) {
+					left -= i;
+					break;
+				}
+		}
+
 		if (left > 0 && data[left-1] == 0)
 			left--; /* don't split the commands */