aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--libsimple/str.h6
-rw-r--r--man/libsimple_inchrset.32
-rw-r--r--man/libsimple_strisutf8.374
-rw-r--r--strisutf8.c (renamed from isutf8.c)102
5 files changed, 130 insertions, 56 deletions
diff --git a/Makefile b/Makefile
index 3469282..dbd993c 100644
--- a/Makefile
+++ b/Makefile
@@ -58,7 +58,6 @@ OBJ =\
envposix_memalignzn.o\
envputenvf.o\
envreallocn.o\
- isutf8.o\
memcasechr.o\
memcasecmp.o\
memcaseends.o\
@@ -94,6 +93,7 @@ OBJ =\
strchrnul.o\
strends.o\
streqlen.o\
+ strisutf8.o\
strncasechr.o\
strncasechrnul.o\
strncaseends.o\
diff --git a/libsimple/str.h b/libsimple/str.h
index 3fd1e21..b990fe9 100644
--- a/libsimple/str.h
+++ b/libsimple/str.h
@@ -383,7 +383,7 @@ static inline int libsimple_inchrcaseset(int __c, const char *__s)
* @return 1 if good, 0 on encoding error
*/
_LIBSIMPLE_GCC_ONLY(__attribute__((__pure__, __nonnull__, __warn_unused_result__)))
-int libsimple_isutf8(const char *, int);
-#ifndef isutf8
-# define isutf8 libsimple_isutf8
+int libsimple_strisutf8(const char *, int);
+#ifndef strisutf8
+# define strisutf8 libsimple_strisutf8
#endif
diff --git a/man/libsimple_inchrset.3 b/man/libsimple_inchrset.3
index 09b1ca1..f07263c 100644
--- a/man/libsimple_inchrset.3
+++ b/man/libsimple_inchrset.3
@@ -1,4 +1,4 @@
-.TH LIBSIMPLE_STRCHRNUL 3 2018-11-05 libsimple
+.TH LIBSIMPLE_INCHRSET 3 2018-11-05 libsimple
.SH NAME
libsimple_inchrset, libsimple_inchrcaseset \- the whether a character belongs to a set
.SH SYNOPSIS
diff --git a/man/libsimple_strisutf8.3 b/man/libsimple_strisutf8.3
new file mode 100644
index 0000000..24dcd96
--- /dev/null
+++ b/man/libsimple_strisutf8.3
@@ -0,0 +1,74 @@
+.TH LIBSIMPLE_strisutf8 3 2018-11-05 libsimple
+.SH NAME
+libsimple_strisutf8 \- check if a string is encoded in UTF-8
+.SH SYNOPSIS
+.nf
+#include <libsimple.h>
+
+int libsimple_strisutf8(const char *\fIstring\fP, int \fIallow_modified_nul\fP);
+
+#ifndef strisutf8
+# define strisutf8 libsimple_strisutf8
+#endif
+.fi
+.PP
+Link with
+.IR \-lsimple .
+.SH DESCRIPTION
+The
+.BR libsimple_strisutf8 ()
+function checks if
+.I string
+is in valid UTF-8. If
+.I allow_modified_nul
+is non-zero, NUL encoded with 2 bytes is accepted.
+.SH RETURN VALUE
+The
+.BR libsimple_strisutf8 ()
+returns 1 if the
+.I string
+is in valid UTF-8 (Modified UTF-8 if
+.I allow_modified_nul
+is non-zero); otherwise 0 is returned.
+.SH ERRORS
+The
+.BR libsimple_strisutf8 ()
+function cannot fail.
+.SH ATTRIBUTES
+For an explanation of the terms used in this section, see
+.BR attributes (7).
+.TS
+allbox;
+lb lb lb
+l l l.
+Interface Attribute Value
+T{
+.BR libsimple_inchrset (),
+.br
+.BR libsimple_inchrcaseset ()
+T} Thread safety MT-Safe
+T{
+.BR libsimple_inchrset (),
+.br
+.BR libsimple_strchrnul ()
+T} Async-signal safety AS-Safe
+T{
+.BR libsimple_inchrset (),
+.br
+.BR libsimple_strchrnul ()
+T} Async-cancel safety AC-Safe
+.TE
+.SH EXAMPLES
+None.
+.SH APPLICATION USAGE
+None.
+.SH RATIONALE
+None.
+.SH FUTURE DIRECTIONS
+None.
+.SH NOTES
+None.
+.SH BUGS
+None.
+.SH SEE ALSO
+None.
diff --git a/isutf8.c b/strisutf8.c
index 232be5d..ac4d0cb 100644
--- a/isutf8.c
+++ b/strisutf8.c
@@ -4,7 +4,7 @@
int
-libsimple_isutf8(const char *string, int allow_modified_nul)
+libsimple_strisutf8(const char *string, int allow_modified_nul)
{
static long BYTES_TO_MIN_BITS[] = {0, 0, 8, 12, 17, 22, 27};
static long BYTES_TO_MAX_BITS[] = {0, 7, 11, 16, 21, 26, 31};
@@ -79,56 +79,56 @@ main(void)
{
int i;
for (i = 0; i < 2; i++) {
- assert(libsimple_isutf8("", i) == 1);
- assert(libsimple_isutf8("a", i) == 1);
- assert(libsimple_isutf8("abc", i) == 1);
- assert(libsimple_isutf8("123", i) == 1);
- assert(libsimple_isutf8("åäö", i) == 1);
- assert(libsimple_isutf8("𝖆𝖇𝖈", i) == 1);
- assert(libsimple_isutf8("\x1b", i) == 1);
- assert(libsimple_isutf8("\n\r\t\f", i) == 1);
- assert(libsimple_isutf8("\xFF", i) == 0);
- assert(libsimple_isutf8("\x01", i) == 1);
- assert(libsimple_isutf8("\x7F", i) == 1);
- assert(libsimple_isutf8("\x80", i) == 0);
- assert(libsimple_isutf8("\xC0", i) == 0);
- assert(libsimple_isutf8("\xC0\x80", i) == i);
- assert(libsimple_isutf8("\xC0\x81", i) == 0);
- assert(libsimple_isutf8("\xCF", i) == 0);
- assert(libsimple_isutf8("\xEF", i) == 0);
- assert(libsimple_isutf8("\xEF\x8F", i) == 0);
- assert(libsimple_isutf8("\xF7", i) == 0);
- assert(libsimple_isutf8("\xF7\x8F", i) == 0);
- assert(libsimple_isutf8("\xF7\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xFA", i) == 0);
- assert(libsimple_isutf8("\xFA\x8F", i) == 0);
- assert(libsimple_isutf8("\xFA\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xFA\x8F\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xFD", i) == 0);
- assert(libsimple_isutf8("\xFD\x8F", i) == 0);
- assert(libsimple_isutf8("\xFD\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xFD\x8F\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xFD\x8F\x8F\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xFE", i) == 0);
- assert(libsimple_isutf8("\xFE\x8F", i) == 0);
- assert(libsimple_isutf8("\xFE\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xFE\x8F\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xFE\x8F\x8F\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xFE\x8F\x8F\x8F\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xFF", i) == 0);
- assert(libsimple_isutf8("\xFF\x8F", i) == 0);
- assert(libsimple_isutf8("\xFF\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xFF\x8F\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xFF\x8F\x8F\x8F\x8F\x8F\x8F", i) == 0);
- assert(libsimple_isutf8("\xC1\x80", i) == 0);
- assert(libsimple_isutf8("\xC2\x80", i) == 1);
- assert(libsimple_isutf8("\xE1\x80\x80\x80", i) == 0);
- assert(libsimple_isutf8("\xE1\x80\xC0\x80", i) == 0);
- assert(libsimple_isutf8("\xE1\x80\x00\x80", i) == 0);
- assert(libsimple_isutf8("\xF1\x80\x80\x80", i) == 1);
- assert(libsimple_isutf8("\xFF\x80\x80\x80\x80\x80\x80\x80", i) == 0);
+ assert(libsimple_strisutf8("", i) == 1);
+ assert(libsimple_strisutf8("a", i) == 1);
+ assert(libsimple_strisutf8("abc", i) == 1);
+ assert(libsimple_strisutf8("123", i) == 1);
+ assert(libsimple_strisutf8("åäö", i) == 1);
+ assert(libsimple_strisutf8("𝖆𝖇𝖈", i) == 1);
+ assert(libsimple_strisutf8("\x1b", i) == 1);
+ assert(libsimple_strisutf8("\n\r\t\f", i) == 1);
+ assert(libsimple_strisutf8("\xFF", i) == 0);
+ assert(libsimple_strisutf8("\x01", i) == 1);
+ assert(libsimple_strisutf8("\x7F", i) == 1);
+ assert(libsimple_strisutf8("\x80", i) == 0);
+ assert(libsimple_strisutf8("\xC0", i) == 0);
+ assert(libsimple_strisutf8("\xC0\x80", i) == i);
+ assert(libsimple_strisutf8("\xC0\x81", i) == 0);
+ assert(libsimple_strisutf8("\xCF", i) == 0);
+ assert(libsimple_strisutf8("\xEF", i) == 0);
+ assert(libsimple_strisutf8("\xEF\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xF7", i) == 0);
+ assert(libsimple_strisutf8("\xF7\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xF7\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFA", i) == 0);
+ assert(libsimple_strisutf8("\xFA\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFA\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFA\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFD", i) == 0);
+ assert(libsimple_strisutf8("\xFD\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFD\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFD\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFD\x8F\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFE", i) == 0);
+ assert(libsimple_strisutf8("\xFE\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFE\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFE\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFE\x8F\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFE\x8F\x8F\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFF", i) == 0);
+ assert(libsimple_strisutf8("\xFF\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFF\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFF\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFF\x8F\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFF\x8F\x8F\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xFF\x8F\x8F\x8F\x8F\x8F\x8F", i) == 0);
+ assert(libsimple_strisutf8("\xC1\x80", i) == 0);
+ assert(libsimple_strisutf8("\xC2\x80", i) == 1);
+ assert(libsimple_strisutf8("\xE1\x80\x80\x80", i) == 0);
+ assert(libsimple_strisutf8("\xE1\x80\xC0\x80", i) == 0);
+ assert(libsimple_strisutf8("\xE1\x80\x00\x80", i) == 0);
+ assert(libsimple_strisutf8("\xF1\x80\x80\x80", i) == 1);
+ assert(libsimple_strisutf8("\xFF\x80\x80\x80\x80\x80\x80\x80", i) == 0);
}
return 0;
}