From 120d0259be2e9796757572ea5b7e43ef5715402e Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Sun, 23 Feb 2025 15:48:02 +0100 Subject: Add some encodings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- maps/SYNTAX | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ maps/iso-ir-014 | 5 +++++ maps/ucs-2-be | 1 + maps/ucs-2-le | 1 + maps/utf-1 | 35 +++++++++++++++++++++++++++++++++++ maps/utf-16-be | 4 ++++ maps/utf-16-le | 4 ++++ maps/utf-32-be | 1 + maps/utf-32-le | 1 + maps/utf-8 | 26 ++++++++++++++++++++++++++ maps/wtf-8 | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 186 insertions(+) create mode 100644 maps/SYNTAX create mode 100644 maps/iso-ir-014 create mode 100644 maps/ucs-2-be create mode 100644 maps/ucs-2-le create mode 100644 maps/utf-1 create mode 100644 maps/utf-16-be create mode 100644 maps/utf-16-le create mode 100644 maps/utf-32-be create mode 100644 maps/utf-32-le create mode 100644 maps/utf-8 create mode 100644 maps/wtf-8 (limited to 'maps') diff --git a/maps/SYNTAX b/maps/SYNTAX new file mode 100644 index 0000000..a3ad110 --- /dev/null +++ b/maps/SYNTAX @@ -0,0 +1,52 @@ +"include" + Include the contents of . + +"unmap" + Remove mapping for byte sequence range. + +"remap" "to" "U+" ["using" ] + Like "map", except assume all sequence is already defined + and redefine them instead. + +["readonly"] "map" "to" "U+" ["using" ] + Map a byte sequence range to a code point range. + If a sequence is already defined, fail. + (hexadecimal) shall be the code point for the + lowest valued sequence in + If multiple byte sequences, may be "big-endian" or "little-endian" + to specify the significance of the bytes in the sequence, alternative + value calculations can be specified in + If "readonly" is present, the mapping should only be used when decoding, + not when encoding. For each mapped code point, there must exist exactly + on mapping that is not "readonly". + + + + Single byte range. + [] (", " []) ... + A sequence of bytes, in storage order. + shall appear on all or no , and + shall appear if and only if was not used but at + least two s are valued (have multiple legal values). + + + + Single byte, not valued. + (["-"|"+"])... + The s should be ordered by mapping value. + The first is the lowest values byte in domain (valued 0). + "-" creates a range from the previous upto and include + the behind it + "+" creates a domain disjunction, skipping over the values + between, but excluding, the values between the s + on either side of it + + + "<<<" + set the relative significance of the byte, 0 is least significant. + + + + 0x + +\ may be used on the end of a line to replace the with a space. diff --git a/maps/iso-ir-014 b/maps/iso-ir-014 new file mode 100644 index 0000000..1e660e3 --- /dev/null +++ b/maps/iso-ir-014 @@ -0,0 +1,5 @@ +map 0x20-0x5B to U+0020 +map 0x5C to U+00A5 +map 0x5D-0x7D to U+005D +map 0x7E to U+00AF +map 0x7F to U+007F diff --git a/maps/ucs-2-be b/maps/ucs-2-be new file mode 100644 index 0000000..97719bb --- /dev/null +++ b/maps/ucs-2-be @@ -0,0 +1 @@ +map 0x00-0xFF, 0x00-0xFF to U+0000 using big-endian diff --git a/maps/ucs-2-le b/maps/ucs-2-le new file mode 100644 index 0000000..1ed78ee --- /dev/null +++ b/maps/ucs-2-le @@ -0,0 +1 @@ +map 0x00-0xFF, 0x00-0xFF to U+0000 using little-endian diff --git a/maps/utf-1 b/maps/utf-1 new file mode 100644 index 0000000..f81a7b6 --- /dev/null +++ b/maps/utf-1 @@ -0,0 +1,35 @@ +map 0x00-0x9F to U+0000 + +map 0xA0,\ + 0xA0-0xFF to U+00A0 + +map 0xA1-0xF5,\ + 0x21-0x7E+0xA0-0xFF to U+0100 using big-endian + +map 0xF6-0xFB,\ + 0x21-0x7E+0xA0-0xFF,\ + 0x21-0x7E+0xA0-0xFF to U+4016 using big-endian + +map 0xFC,\ + 0x21-0x7E+0xA0-0xFF,\ + 0x21-0x7E+0xA0-0xFF,\ + 0x21-0x7E+0xA0-0xFF,\ + 0x21-0x7E+0xA0-0xFF to U+00038E2E using big-endian + +map 0xFD,\ + 0x21-0x7E+0xA0-0xBC,\ + 0x21-0x7E+0xA0-0xFF,\ + 0x21-0x7E+0xA0-0xFF,\ + 0x21-0x7E+0xA0-0xFF to U+4DB0F63E using big-endian + +map 0xFD, 0xBC,\ + 0x21-0x2A,\ + 0x21-0x7E+0xA0-0xFF,\ + 0x21-0x7E+0xA0-0xFF to U+7FFA2566 using big-endian + +map 0xFD, 0xBC, 0x2B,\ + 0x21-0x7E+0xA0-0xB8,\ + 0x21-0x7E+0xA0-0xFF to U+7FFFA78E using big-endian + +map 0xFD, 0xBC, 0x2B, 0xB9,\ + 0x21-0x40 to U+7FFFFFE0 diff --git a/maps/utf-16-be b/maps/utf-16-be new file mode 100644 index 0000000..7199108 --- /dev/null +++ b/maps/utf-16-be @@ -0,0 +1,4 @@ +map 0x00-0xD7, 0x00-0xFF to U+0000 using big-endian +map 0xE0-0xFF, 0x00-0xFF to U+E000 using big-endian +map 0xD8-0xDB, 0x00-0xFF,\ + 0xDC-0xDF, 0x00-0xFF to U+010000 using big-endian diff --git a/maps/utf-16-le b/maps/utf-16-le new file mode 100644 index 0000000..0985f1f --- /dev/null +++ b/maps/utf-16-le @@ -0,0 +1,4 @@ +map 0x00-0xFF, 0x00-0xD7 to U+0000 using little-endian +map 0x00-0xFF, 0xE0-0xFF to U+E000 using little-endian +map 0x00-0xFF <<< 2, 0xD8-0xDB <<< 3,\ + 0x00-0xFF <<< 0, 0xDC-0xDF <<< 1 to U+010000 diff --git a/maps/utf-32-be b/maps/utf-32-be new file mode 100644 index 0000000..27a74ec --- /dev/null +++ b/maps/utf-32-be @@ -0,0 +1 @@ +map 0x00-0x7F, 0x00-0xFF, 0x00-0xFF, 0x00-0xFF to U+00000000 using big-endian diff --git a/maps/utf-32-le b/maps/utf-32-le new file mode 100644 index 0000000..994884e --- /dev/null +++ b/maps/utf-32-le @@ -0,0 +1 @@ +map 0x00-0xFF, 0x00-0xFF, 0x00-0xFF, 0x00-0x7F to U+00000000 using little-endian diff --git a/maps/utf-8 b/maps/utf-8 new file mode 100644 index 0000000..f707552 --- /dev/null +++ b/maps/utf-8 @@ -0,0 +1,26 @@ +map 0x00-0x7F to U+0000 + +map 0xC2-0xDF,\ + 0x80-0xBF to U+0080 using big-endian + +map 0xE0-0xEF,\ + 0x80-0xBF,\ + 0x80-0xBF to U+0800 using big-endian + +map 0xF0-0xF7,\ + 0x80-0xBF,\ + 0x80-0xBF,\ + 0x80-0xBF to U+010000 using big-endian + +map 0xF8-0xFB,\ + 0x80-0xBF,\ + 0x80-0xBF,\ + 0x80-0xBF,\ + 0x80-0xBF to U+200000 using big-endian + +map 0xFC-0xFD,\ + 0x80-0xBF,\ + 0x80-0xBF,\ + 0x80-0xBF,\ + 0x80-0xBF,\ + 0x80-0xBF to U+04000000 using big-endian diff --git a/maps/wtf-8 b/maps/wtf-8 new file mode 100644 index 0000000..43757ee --- /dev/null +++ b/maps/wtf-8 @@ -0,0 +1,56 @@ +map 0x00-0x7F to U+0000 + +map 0xC2-0xDF,\ + 0x80-0xBF to U+0080 using big-endian + +map 0xE0-0xEC,\ + 0x80-0xBF,\ + 0x80-0xBF to U+0800 using big-endian + +map 0xED,\ + 0x80-0x9F,\ + 0x80-0xBF to U+D000 using big-endian + +map 0xED,\ + 0xA0-0xAF,\ + 0x80-0xBF,\ + 0xED,\ + 0xB0-0xBF,\ + 0x80-0xBF to U+010000 using big-endian + +map 0xEE-0xEF,\ + 0x80-0xBF,\ + 0x80-0xBF to U+E000 using big-endian + +readonly map 0xF0-0xF3,\ + 0x80-0xBF,\ + 0x80-0xBF,\ + 0x80-0xBF to U+010000 using big-endian + +readonly map 0xF4,\ + 0x80-0x8F,\ + 0x80-0xBF,\ + 0x80-0xBF to U+100000 using big-endian + +map 0xF4,\ + 0x90-0xBF,\ + 0x80-0xBF,\ + 0x80-0xBF to U+110000 using big-endian + +map 0xF5-0xF7,\ + 0x80-0xBF,\ + 0x80-0xBF,\ + 0x80-0xBF to U+140000 using big-endian + +map 0xF8-0xFB,\ + 0x80-0xBF,\ + 0x80-0xBF,\ + 0x80-0xBF,\ + 0x80-0xBF to U+200000 using big-endian + +map 0xFC-0xFD,\ + 0x80-0xBF,\ + 0x80-0xBF,\ + 0x80-0xBF,\ + 0x80-0xBF,\ + 0x80-0xBF to U+04000000 using big-endian -- cgit v1.2.3-70-g09d2