diff options
Diffstat (limited to 'isutf8.c')
-rw-r--r-- | isutf8.c | 41 |
1 files changed, 35 insertions, 6 deletions
@@ -1,11 +1,12 @@ /* See LICENSE file for copyright and license details. */ #include "libsimple.h" +#ifndef TEST int libsimple_isutf8(const char *string, int allow_modified_nul) { - static long BYTES_TO_MIN_BITS[] = {0, 0, 8, 12, 17, 22, 37}; + static long BYTES_TO_MIN_BITS[] = {0, 0, 8, 12, 17, 22, 27}; static long BYTES_TO_MAX_BITS[] = {0, 7, 11, 16, 21, 26, 31}; long int bytes = 0, read_bytes = 0, bits = 0, c, character; @@ -35,7 +36,7 @@ libsimple_isutf8(const char *string, int allow_modified_nul) while ((c & 0x80)) bytes++, c <<= 1; read_bytes = 1; - character = c & 0x7F; + character = (c & 0xFF) >> bytes; if (bytes > 6) /* 31-bit characters can be encoded with 6-bytes, and UTF-8 does not cover higher code points. */ @@ -70,8 +71,8 @@ libsimple_isutf8(const char *string, int allow_modified_nul) } -#ifdef TEST -#include <assert.h> +#else +#include "test.h" int main(void) @@ -93,12 +94,40 @@ main(void) assert(libsimple_isutf8("\xC0", i) == 0); assert(libsimple_isutf8("\xC0\x80", i) == i); assert(libsimple_isutf8("\xC0\x81", i) == 0); + assert(libsimple_isutf8("\xCF", i) == 0); + assert(libsimple_isutf8("\xEF", i) == 0); + assert(libsimple_isutf8("\xEF\x8F", i) == 0); + assert(libsimple_isutf8("\xF7", i) == 0); + assert(libsimple_isutf8("\xF7\x8F", i) == 0); + assert(libsimple_isutf8("\xF7\x8F\x8F", i) == 0); + assert(libsimple_isutf8("\xFA", i) == 0); + assert(libsimple_isutf8("\xFA\x8F", i) == 0); + assert(libsimple_isutf8("\xFA\x8F\x8F", i) == 0); + assert(libsimple_isutf8("\xFA\x8F\x8F\x8F", i) == 0); + assert(libsimple_isutf8("\xFD", i) == 0); + assert(libsimple_isutf8("\xFD\x8F", i) == 0); + assert(libsimple_isutf8("\xFD\x8F\x8F", i) == 0); + assert(libsimple_isutf8("\xFD\x8F\x8F\x8F", i) == 0); + assert(libsimple_isutf8("\xFD\x8F\x8F\x8F\x8F", i) == 0); + assert(libsimple_isutf8("\xFE", i) == 0); + assert(libsimple_isutf8("\xFE\x8F", i) == 0); + assert(libsimple_isutf8("\xFE\x8F\x8F", i) == 0); + assert(libsimple_isutf8("\xFE\x8F\x8F\x8F", i) == 0); + assert(libsimple_isutf8("\xFE\x8F\x8F\x8F\x8F", i) == 0); + assert(libsimple_isutf8("\xFE\x8F\x8F\x8F\x8F\x8F", i) == 0); + assert(libsimple_isutf8("\xFF", i) == 0); + assert(libsimple_isutf8("\xFF\x8F", i) == 0); + assert(libsimple_isutf8("\xFF\x8F\x8F", i) == 0); + assert(libsimple_isutf8("\xFF\x8F\x8F\x8F", i) == 0); + assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F", i) == 0); + assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F\x8F", i) == 0); + assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F\x8F\x8F", i) == 0); assert(libsimple_isutf8("\xC1\x80", i) == 0); assert(libsimple_isutf8("\xC2\x80", i) == 1); - assert(libsimple_isutf8("\xE1\x80\x80\x80", i) == 1); + assert(libsimple_isutf8("\xE1\x80\x80\x80", i) == 0); assert(libsimple_isutf8("\xE1\x80\xC0\x80", i) == 0); assert(libsimple_isutf8("\xE1\x80\x00\x80", i) == 0); - assert(libsimple_isutf8("\xF1\x80\x80\x80", i) == 0); + assert(libsimple_isutf8("\xF1\x80\x80\x80", i) == 1); assert(libsimple_isutf8("\xFF\x80\x80\x80\x80\x80\x80\x80", i) == 0); } return 0; |