Rename isutf8 to strisutf8 and add man page

Signed-off-by: Mattias Andrée <maandree@kth.se>
author: Mattias Andrée <maandree@kth.se> 2018-11-05 18:54:48 +0100
committer: Mattias Andrée <maandree@kth.se> 2018-11-05 18:54:48 +0100
commit: e19f3d5e8c0e9576a3fda2b77ad69be135bef047 (patch)
tree: a3aa862ed02b6f17483fe8f6607c05d1fac6e4e9 /isutf8.c
parent: Add man pages for unlist (diff)
download: libsimple-e19f3d5e8c0e9576a3fda2b77ad69be135bef047.tar.gz
libsimple-e19f3d5e8c0e9576a3fda2b77ad69be135bef047.tar.bz2
libsimple-e19f3d5e8c0e9576a3fda2b77ad69be135bef047.tar.xz
1 files changed, 0 insertions, 136 deletions
diff --git a/isutf8.c b/isutf8.c
deleted file mode 100644
index 232be5d..0000000
--- a/isutf8.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#include "libsimple.h"
-#ifndef TEST
-
-
-int
-libsimple_isutf8(const char *string, int allow_modified_nul)
-{
-	static long BYTES_TO_MIN_BITS[] = {0, 0,  8, 12, 17, 22, 27};
-        static long BYTES_TO_MAX_BITS[] = {0, 7, 11, 16, 21, 26, 31};
-        long int bytes = 0, read_bytes = 0, bits = 0, c, character;
-
-        /*                                                      min bits  max bits
-          0.......                                                 0         7
-          110..... 10......                                        8        11
-          1110.... 10...... 10......                              12        16
-          11110... 10...... 10...... 10......                     17        21
-          111110.. 10...... 10...... 10...... 10......            22        26
-          1111110. 10...... 10...... 10...... 10...... 10......   27        31
-        */
-
-	while ((c = (long int)(*string++))) {
-                if (!read_bytes) {
-                        /* First byte of the character. */
-
-                        if (!(c & 0x80))
-                                /* Single-byte character. */
-                                continue;
-
-                        if ((c & 0xC0) == 0x80)
-                                /* Single-byte character marked as multibyte, or
-                                   a non-first byte in a multibyte character. */
-                                return 0;
-
-                        /* Multibyte character. */
-                        while ((c & 0x80))
-                                bytes++, c <<= 1;
-                        read_bytes = 1;
-			character = (c & 0xFF) >> bytes;
-                        if (bytes > 6)
-                                /* 31-bit characters can be encoded with 6-bytes,
-                                   and UTF-8 does not cover higher code points. */
-                                return 0;
-                } else {
-                        /* Not first byte of the character. */
-
-                        if ((c & 0xC0) != 0x80)
-                                /* Beginning of new character before a
-                                   multibyte character has ended. */
-                                return 0;
-
-                        character = (character << 6) | (c & 0x7F);
-
-                        if (++read_bytes < bytes)
-                                /* Not at last byte yet. */
-                                continue;
-
-                        /* Check that the character is not unnecessarily long. */
-                        while (character)
-                                character >>= 1, bits++;
-                        bits = (!bits && bytes == 2 && allow_modified_nul) ? 8 : bits;
-                        if (bits < BYTES_TO_MIN_BITS[bytes] || BYTES_TO_MAX_BITS[bytes] < bits)
-                                return 0;
-
-                        read_bytes = bytes = bits = 0;
-                }
-        }
-
-        /* Make sure we did not stop at the middle of a multibyte character. */
-        return !read_bytes;
-}
-
-
-#else
-#include "test.h"
-
-int
-main(void)
-{
-	int i;
-	for (i = 0; i < 2; i++) {
-		assert(libsimple_isutf8("", i) == 1);
-		assert(libsimple_isutf8("a", i) == 1);
-		assert(libsimple_isutf8("abc", i) == 1);
-		assert(libsimple_isutf8("123", i) == 1);
-		assert(libsimple_isutf8("åäö", i) == 1);
-		assert(libsimple_isutf8("𝖆𝖇𝖈", i) == 1);
-		assert(libsimple_isutf8("\x1b", i) == 1);
-		assert(libsimple_isutf8("\n\r\t\f", i) == 1);
-		assert(libsimple_isutf8("\xFF", i) == 0);
-		assert(libsimple_isutf8("\x01", i) == 1);
-		assert(libsimple_isutf8("\x7F", i) == 1);
-		assert(libsimple_isutf8("\x80", i) == 0);
-		assert(libsimple_isutf8("\xC0", i) == 0);
-		assert(libsimple_isutf8("\xC0\x80", i) == i);
-		assert(libsimple_isutf8("\xC0\x81", i) == 0);
-		assert(libsimple_isutf8("\xCF", i) == 0);
-		assert(libsimple_isutf8("\xEF", i) == 0);
-		assert(libsimple_isutf8("\xEF\x8F", i) == 0);
-		assert(libsimple_isutf8("\xF7", i) == 0);
-		assert(libsimple_isutf8("\xF7\x8F", i) == 0);
-		assert(libsimple_isutf8("\xF7\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFA", i) == 0);
-		assert(libsimple_isutf8("\xFA\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFA\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFA\x8F\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFD", i) == 0);
-		assert(libsimple_isutf8("\xFD\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFD\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFD\x8F\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFD\x8F\x8F\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFE", i) == 0);
-		assert(libsimple_isutf8("\xFE\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFE\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFE\x8F\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFE\x8F\x8F\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFE\x8F\x8F\x8F\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFF", i) == 0);
-		assert(libsimple_isutf8("\xFF\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFF\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFF\x8F\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F\x8F\x8F", i) == 0);
-		assert(libsimple_isutf8("\xC1\x80", i) == 0);
-		assert(libsimple_isutf8("\xC2\x80", i) == 1);
-		assert(libsimple_isutf8("\xE1\x80\x80\x80", i) == 0);
-		assert(libsimple_isutf8("\xE1\x80\xC0\x80", i) == 0);
-		assert(libsimple_isutf8("\xE1\x80\x00\x80", i) == 0);
-		assert(libsimple_isutf8("\xF1\x80\x80\x80", i) == 1);
-		assert(libsimple_isutf8("\xFF\x80\x80\x80\x80\x80\x80\x80", i) == 0);
-	}
-	return 0;
-}
-
-#endif
author	Mattias Andrée <maandree@kth.se>	2018-11-05 18:54:48 +0100
committer	Mattias Andrée <maandree@kth.se>	2018-11-05 18:54:48 +0100
commit	e19f3d5e8c0e9576a3fda2b77ad69be135bef047 (patch)
tree	a3aa862ed02b6f17483fe8f6607c05d1fac6e4e9 /isutf8.c
parent	Add man pages for unlist (diff)
download	libsimple-e19f3d5e8c0e9576a3fda2b77ad69be135bef047.tar.gz libsimple-e19f3d5e8c0e9576a3fda2b77ad69be135bef047.tar.bz2 libsimple-e19f3d5e8c0e9576a3fda2b77ad69be135bef047.tar.xz