diff options
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | libsimple/str.h | 6 | ||||
-rw-r--r-- | man/libsimple_inchrset.3 | 2 | ||||
-rw-r--r-- | man/libsimple_strisutf8.3 | 74 | ||||
-rw-r--r-- | strisutf8.c (renamed from isutf8.c) | 102 |
5 files changed, 130 insertions, 56 deletions
@@ -58,7 +58,6 @@ OBJ =\ envposix_memalignzn.o\ envputenvf.o\ envreallocn.o\ - isutf8.o\ memcasechr.o\ memcasecmp.o\ memcaseends.o\ @@ -94,6 +93,7 @@ OBJ =\ strchrnul.o\ strends.o\ streqlen.o\ + strisutf8.o\ strncasechr.o\ strncasechrnul.o\ strncaseends.o\ diff --git a/libsimple/str.h b/libsimple/str.h index 3fd1e21..b990fe9 100644 --- a/libsimple/str.h +++ b/libsimple/str.h @@ -383,7 +383,7 @@ static inline int libsimple_inchrcaseset(int __c, const char *__s) * @return 1 if good, 0 on encoding error */ _LIBSIMPLE_GCC_ONLY(__attribute__((__pure__, __nonnull__, __warn_unused_result__))) -int libsimple_isutf8(const char *, int); -#ifndef isutf8 -# define isutf8 libsimple_isutf8 +int libsimple_strisutf8(const char *, int); +#ifndef strisutf8 +# define strisutf8 libsimple_strisutf8 #endif diff --git a/man/libsimple_inchrset.3 b/man/libsimple_inchrset.3 index 09b1ca1..f07263c 100644 --- a/man/libsimple_inchrset.3 +++ b/man/libsimple_inchrset.3 @@ -1,4 +1,4 @@ -.TH LIBSIMPLE_STRCHRNUL 3 2018-11-05 libsimple +.TH LIBSIMPLE_INCHRSET 3 2018-11-05 libsimple .SH NAME libsimple_inchrset, libsimple_inchrcaseset \- the whether a character belongs to a set .SH SYNOPSIS diff --git a/man/libsimple_strisutf8.3 b/man/libsimple_strisutf8.3 new file mode 100644 index 0000000..24dcd96 --- /dev/null +++ b/man/libsimple_strisutf8.3 @@ -0,0 +1,74 @@ +.TH LIBSIMPLE_strisutf8 3 2018-11-05 libsimple +.SH NAME +libsimple_strisutf8 \- check if a string is encoded in UTF-8 +.SH SYNOPSIS +.nf +#include <libsimple.h> + +int libsimple_strisutf8(const char *\fIstring\fP, int \fIallow_modified_nul\fP); + +#ifndef strisutf8 +# define strisutf8 libsimple_strisutf8 +#endif +.fi +.PP +Link with +.IR \-lsimple . +.SH DESCRIPTION +The +.BR libsimple_strisutf8 () +function checks if +.I string +is in valid UTF-8. If +.I allow_modified_nul +is non-zero, NUL encoded with 2 bytes is accepted. +.SH RETURN VALUE +The +.BR libsimple_strisutf8 () +returns 1 if the +.I string +is in valid UTF-8 (Modified UTF-8 if +.I allow_modified_nul +is non-zero); otherwise 0 is returned. +.SH ERRORS +The +.BR libsimple_strisutf8 () +function cannot fail. +.SH ATTRIBUTES +For an explanation of the terms used in this section, see +.BR attributes (7). +.TS +allbox; +lb lb lb +l l l. +Interface Attribute Value +T{ +.BR libsimple_inchrset (), +.br +.BR libsimple_inchrcaseset () +T} Thread safety MT-Safe +T{ +.BR libsimple_inchrset (), +.br +.BR libsimple_strchrnul () +T} Async-signal safety AS-Safe +T{ +.BR libsimple_inchrset (), +.br +.BR libsimple_strchrnul () +T} Async-cancel safety AC-Safe +.TE +.SH EXAMPLES +None. +.SH APPLICATION USAGE +None. +.SH RATIONALE +None. +.SH FUTURE DIRECTIONS +None. +.SH NOTES +None. +.SH BUGS +None. +.SH SEE ALSO +None. @@ -4,7 +4,7 @@ int -libsimple_isutf8(const char *string, int allow_modified_nul) +libsimple_strisutf8(const char *string, int allow_modified_nul) { static long BYTES_TO_MIN_BITS[] = {0, 0, 8, 12, 17, 22, 27}; static long BYTES_TO_MAX_BITS[] = {0, 7, 11, 16, 21, 26, 31}; @@ -79,56 +79,56 @@ main(void) { int i; for (i = 0; i < 2; i++) { - assert(libsimple_isutf8("", i) == 1); - assert(libsimple_isutf8("a", i) == 1); - assert(libsimple_isutf8("abc", i) == 1); - assert(libsimple_isutf8("123", i) == 1); - assert(libsimple_isutf8("åäö", i) == 1); - assert(libsimple_isutf8("𝖆𝖇𝖈", i) == 1); - assert(libsimple_isutf8("\x1b", i) == 1); - assert(libsimple_isutf8("\n\r\t\f", i) == 1); - assert(libsimple_isutf8("\xFF", i) == 0); - assert(libsimple_isutf8("\x01", i) == 1); - assert(libsimple_isutf8("\x7F", i) == 1); - assert(libsimple_isutf8("\x80", i) == 0); - assert(libsimple_isutf8("\xC0", i) == 0); - assert(libsimple_isutf8("\xC0\x80", i) == i); - assert(libsimple_isutf8("\xC0\x81", i) == 0); - assert(libsimple_isutf8("\xCF", i) == 0); - assert(libsimple_isutf8("\xEF", i) == 0); - assert(libsimple_isutf8("\xEF\x8F", i) == 0); - assert(libsimple_isutf8("\xF7", i) == 0); - assert(libsimple_isutf8("\xF7\x8F", i) == 0); - assert(libsimple_isutf8("\xF7\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xFA", i) == 0); - assert(libsimple_isutf8("\xFA\x8F", i) == 0); - assert(libsimple_isutf8("\xFA\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xFA\x8F\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xFD", i) == 0); - assert(libsimple_isutf8("\xFD\x8F", i) == 0); - assert(libsimple_isutf8("\xFD\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xFD\x8F\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xFD\x8F\x8F\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xFE", i) == 0); - assert(libsimple_isutf8("\xFE\x8F", i) == 0); - assert(libsimple_isutf8("\xFE\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xFE\x8F\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xFE\x8F\x8F\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xFE\x8F\x8F\x8F\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xFF", i) == 0); - assert(libsimple_isutf8("\xFF\x8F", i) == 0); - assert(libsimple_isutf8("\xFF\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xFF\x8F\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F\x8F\x8F", i) == 0); - assert(libsimple_isutf8("\xC1\x80", i) == 0); - assert(libsimple_isutf8("\xC2\x80", i) == 1); - assert(libsimple_isutf8("\xE1\x80\x80\x80", i) == 0); - assert(libsimple_isutf8("\xE1\x80\xC0\x80", i) == 0); - assert(libsimple_isutf8("\xE1\x80\x00\x80", i) == 0); - assert(libsimple_isutf8("\xF1\x80\x80\x80", i) == 1); - assert(libsimple_isutf8("\xFF\x80\x80\x80\x80\x80\x80\x80", i) == 0); + assert(libsimple_strisutf8("", i) == 1); + assert(libsimple_strisutf8("a", i) == 1); + assert(libsimple_strisutf8("abc", i) == 1); + assert(libsimple_strisutf8("123", i) == 1); + assert(libsimple_strisutf8("åäö", i) == 1); + assert(libsimple_strisutf8("𝖆𝖇𝖈", i) == 1); + assert(libsimple_strisutf8("\x1b", i) == 1); + assert(libsimple_strisutf8("\n\r\t\f", i) == 1); + assert(libsimple_strisutf8("\xFF", i) == 0); + assert(libsimple_strisutf8("\x01", i) == 1); + assert(libsimple_strisutf8("\x7F", i) == 1); + assert(libsimple_strisutf8("\x80", i) == 0); + assert(libsimple_strisutf8("\xC0", i) == 0); + assert(libsimple_strisutf8("\xC0\x80", i) == i); + assert(libsimple_strisutf8("\xC0\x81", i) == 0); + assert(libsimple_strisutf8("\xCF", i) == 0); + assert(libsimple_strisutf8("\xEF", i) == 0); + assert(libsimple_strisutf8("\xEF\x8F", i) == 0); + assert(libsimple_strisutf8("\xF7", i) == 0); + assert(libsimple_strisutf8("\xF7\x8F", i) == 0); + assert(libsimple_strisutf8("\xF7\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xFA", i) == 0); + assert(libsimple_strisutf8("\xFA\x8F", i) == 0); + assert(libsimple_strisutf8("\xFA\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xFA\x8F\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xFD", i) == 0); + assert(libsimple_strisutf8("\xFD\x8F", i) == 0); + assert(libsimple_strisutf8("\xFD\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xFD\x8F\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xFD\x8F\x8F\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xFE", i) == 0); + assert(libsimple_strisutf8("\xFE\x8F", i) == 0); + assert(libsimple_strisutf8("\xFE\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xFE\x8F\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xFE\x8F\x8F\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xFE\x8F\x8F\x8F\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xFF", i) == 0); + assert(libsimple_strisutf8("\xFF\x8F", i) == 0); + assert(libsimple_strisutf8("\xFF\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xFF\x8F\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xFF\x8F\x8F\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xFF\x8F\x8F\x8F\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xFF\x8F\x8F\x8F\x8F\x8F\x8F", i) == 0); + assert(libsimple_strisutf8("\xC1\x80", i) == 0); + assert(libsimple_strisutf8("\xC2\x80", i) == 1); + assert(libsimple_strisutf8("\xE1\x80\x80\x80", i) == 0); + assert(libsimple_strisutf8("\xE1\x80\xC0\x80", i) == 0); + assert(libsimple_strisutf8("\xE1\x80\x00\x80", i) == 0); + assert(libsimple_strisutf8("\xF1\x80\x80\x80", i) == 1); + assert(libsimple_strisutf8("\xFF\x80\x80\x80\x80\x80\x80\x80", i) == 0); } return 0; } |