whitespace + Add message.[ch]

Signed-off-by: Mattias Andrée <maandree@kth.se>
author: Mattias Andrée <maandree@kth.se> 2016-07-11 20:28:28 +0200
committer: Mattias Andrée <maandree@kth.se> 2016-07-11 20:28:28 +0200
commit: 564cbb378e1480ff736a14c832d39fe6db9f7891 (patch)
tree: 4f5e3377f21c173b6a7f72a11071fa8aa0637fd9 /src/util.c
parent: Add signal handlers (diff)
download: coopgammad-564cbb378e1480ff736a14c832d39fe6db9f7891.tar.gz
coopgammad-564cbb378e1480ff736a14c832d39fe6db9f7891.tar.bz2
coopgammad-564cbb378e1480ff736a14c832d39fe6db9f7891.tar.xz
1 files changed, 75 insertions, 0 deletions
diff --git a/src/util.c b/src/util.c
index 81926c6..f73634a 100644
--- a/src/util.c
+++ b/src/util.c
@@ -152,3 +152,78 @@ void msleep(int ms)
     nanosleep(&ts, NULL);
 }
 
+/**
+ * Check whether a NUL-terminated string is encoded in UTF-8
+ * 
+ * @param   string              The string
+ * @param   allow_modified_nul  Whether Modified UTF-8 is allowed, which allows a two-byte encoding for NUL
+ * @return                      Zero if good, -1 on encoding error
+ */
+int verify_utf8(const char* string, int allow_modified_nul)
+{
+  static long BYTES_TO_MIN_BITS[] = {0, 0,  8, 12, 17, 22, 37};
+  static long BYTES_TO_MAX_BITS[] = {0, 7, 11, 16, 21, 26, 31};
+  long bytes = 0, read_bytes = 0, bits = 0, c, character;
+  
+  /*                                                      min bits  max bits
+    0.......                                                 0         7
+    110..... 10......                                        8        11
+    1110.... 10...... 10......                              12        16
+    11110... 10...... 10...... 10......                     17        21
+    111110.. 10...... 10...... 10...... 10......            22        26
+    1111110. 10...... 10...... 10...... 10...... 10......   27        31
+   */
+  
+  while ((c = (long)(*string++)))
+    if (read_bytes == 0)
+      {
+	/* First byte of the character. */
+	
+	if ((c & 0x80) == 0x00)
+	    /* Single-byte character. */
+	    continue;
+	
+	if ((c & 0xC0) == 0x80)
+	    /* Single-byte character marked as multibyte, or
+	            a non-first byte in a multibyte character. */
+	    return -1;
+	
+	/* Multibyte character. */
+	while ((c & 0x80))
+	    bytes++, c <<= 1;
+	read_bytes = 1;
+	character = c & 0x7F;
+	if (bytes > 6)
+	    /* 31-bit characters can be encoded with 6-bytes,
+	            and UTF-8 does not cover higher code points. */
+	    return -1;
+      }
+    else
+      {
+	/* Not first byte of the character. */
+	
+	if ((c & 0xC0) != 0x80)
+	    /* Beginning of new character before a
+	            multibyte character has ended. */
+	    return -1;
+	
+	character = (character << 6) | (c & 0x7F);
+	
+	if (++read_bytes < bytes)
+	    /* Not at last byte yet. */
+	    continue;
+	
+	/* Check that the character is not unnecessarily long. */
+	while (character)
+	    character >>= 1, bits++;
+	bits = ((bits == 0) && (bytes == 2) && allow_modified_nul) ? 8 : bits;
+	if ((bits < BYTES_TO_MIN_BITS[bytes]) || (BYTES_TO_MAX_BITS[bytes] < bits))
+	    return -1;
+	
+	read_bytes = bytes = bits = 0;
+      }
+  
+  /* Make sure we did not stop at the middle of a multibyte character. */
+  return read_bytes == 0 ? 0 : -1;
+}
+
author	Mattias Andrée <maandree@kth.se>	2016-07-11 20:28:28 +0200
committer	Mattias Andrée <maandree@kth.se>	2016-07-11 20:28:28 +0200
commit	564cbb378e1480ff736a14c832d39fe6db9f7891 (patch)
tree	4f5e3377f21c173b6a7f72a11071fa8aa0637fd9 /src/util.c
parent	Add signal handlers (diff)
download	coopgammad-564cbb378e1480ff736a14c832d39fe6db9f7891.tar.gz coopgammad-564cbb378e1480ff736a14c832d39fe6db9f7891.tar.bz2 coopgammad-564cbb378e1480ff736a14c832d39fe6db9f7891.tar.xz