aboutsummaryrefslogtreecommitdiffstats
path: root/isutf8.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--isutf8.c41
1 files changed, 35 insertions, 6 deletions
diff --git a/isutf8.c b/isutf8.c
index 557aafb..232be5d 100644
--- a/isutf8.c
+++ b/isutf8.c
@@ -1,11 +1,12 @@
/* See LICENSE file for copyright and license details. */
#include "libsimple.h"
+#ifndef TEST
int
libsimple_isutf8(const char *string, int allow_modified_nul)
{
- static long BYTES_TO_MIN_BITS[] = {0, 0, 8, 12, 17, 22, 37};
+ static long BYTES_TO_MIN_BITS[] = {0, 0, 8, 12, 17, 22, 27};
static long BYTES_TO_MAX_BITS[] = {0, 7, 11, 16, 21, 26, 31};
long int bytes = 0, read_bytes = 0, bits = 0, c, character;
@@ -35,7 +36,7 @@ libsimple_isutf8(const char *string, int allow_modified_nul)
while ((c & 0x80))
bytes++, c <<= 1;
read_bytes = 1;
- character = c & 0x7F;
+ character = (c & 0xFF) >> bytes;
if (bytes > 6)
/* 31-bit characters can be encoded with 6-bytes,
and UTF-8 does not cover higher code points. */
@@ -70,8 +71,8 @@ libsimple_isutf8(const char *string, int allow_modified_nul)
}
-#ifdef TEST
-#include <assert.h>
+#else
+#include "test.h"
int
main(void)
@@ -93,12 +94,40 @@ main(void)
assert(libsimple_isutf8("\xC0", i) == 0);
assert(libsimple_isutf8("\xC0\x80", i) == i);
assert(libsimple_isutf8("\xC0\x81", i) == 0);
+ assert(libsimple_isutf8("\xCF", i) == 0);
+ assert(libsimple_isutf8("\xEF", i) == 0);
+ assert(libsimple_isutf8("\xEF\x8F", i) == 0);
+ assert(libsimple_isutf8("\xF7", i) == 0);
+ assert(libsimple_isutf8("\xF7\x8F", i) == 0);
+ assert(libsimple_isutf8("\xF7\x8F\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFA", i) == 0);
+ assert(libsimple_isutf8("\xFA\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFA\x8F\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFA\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFD", i) == 0);
+ assert(libsimple_isutf8("\xFD\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFD\x8F\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFD\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFD\x8F\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFE", i) == 0);
+ assert(libsimple_isutf8("\xFE\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFE\x8F\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFE\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFE\x8F\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFE\x8F\x8F\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFF", i) == 0);
+ assert(libsimple_isutf8("\xFF\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFF\x8F\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFF\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F\x8F\x8F", i) == 0);
assert(libsimple_isutf8("\xC1\x80", i) == 0);
assert(libsimple_isutf8("\xC2\x80", i) == 1);
- assert(libsimple_isutf8("\xE1\x80\x80\x80", i) == 1);
+ assert(libsimple_isutf8("\xE1\x80\x80\x80", i) == 0);
assert(libsimple_isutf8("\xE1\x80\xC0\x80", i) == 0);
assert(libsimple_isutf8("\xE1\x80\x00\x80", i) == 0);
- assert(libsimple_isutf8("\xF1\x80\x80\x80", i) == 0);
+ assert(libsimple_isutf8("\xF1\x80\x80\x80", i) == 1);
assert(libsimple_isutf8("\xFF\x80\x80\x80\x80\x80\x80\x80", i) == 0);
}
return 0;