blob: 5b6957b8940a5d1ec16fea081516bfe2e6695d8c (
plain) (
tree)
|
|
# Based on the Unicode Character Database 7.0
macro dead_ascii/1
<dead letter \1> [" " <dead letter \1>] : "\1"
<compose> "\1" : <dead letter \1>
end macro
macro dead_ascii/2
<dead letter \1> [" " "\2" <dead letter \1>] : "\1"
<compose> ["\1" "\2"] : <dead letter \1>
end macro
macro composite/3
<dead letter \2> "\1" : "\3"
<compose> "\1" "\2" : "\3"
end macro
macro composite/4
<dead letter \2> "\1" : "\4"
<compose> "\1" ["\2" "\3"] : "\4"
end macro
<compose> "u" "+" : <dead hexcompose>
<compose> "U" "+" : <dead longhexcompose>
# TODO C0 Controls
# Basic Latin:
dead_ascii("^")
dead_ascii("`")
dead_ascii("~")
# TODO C1 Controls
# Latin-1 Supplement:
<compose> "!" "!" : "¡"
<compose> ("/" "c") : "¢"
<compose> ("|" ["c" "L"]) : "¢"
<compose> ("-" ["l" "L"]) : "£"
<compose> (["o" "O"] ["x" "X"]) : "¤"
<compose> (["-" "="] ["y" "Y"]) : "¥"
<compose> ("!" "|") : "¦"
<compose> (["o" "O"] ["s" "S"]) : "§"
dead_ascii("¨", "\"")
<compose> (["o" "O"] ["c" "C"]) : "©"
(<compose> <dead letter ~>) "_" "a" : "ª"
<compose> "<" "<" : "«"
<compose> ("-" ",") : "¬"
<compose> "-" "-" " " : ""
<compose> (["o" "O"] ["r" "R"]) : "®"
<compose> "-" : <dead letter ¯>
<compose> "o" : <dead letter °>
dead_ascii("°")
<compose> "+" "-" : "±"
<dead letter ^> "2" : "²"
<dead letter ^> "3" : "³"
dead_ascii("´" "'")
<compose> "m" "u" : "µ"
<compose> ["p" "P"] ["p" "P" "!"] : "¶"
<compose> "!" ["p" "P"] : "¶"
<compose> "." "-" : "·"
dead_ascii("," ",")
<dead letter ^> "1" : "¹"
(<compose> <dead letter ~>) "_" "o" : "º"
<compose> ">" ">" : "»"
<compose> "1" "4" : "¼"
<compose> "1" "2" : "½"
<compose> "3" "4" : "¾"
<compose> "?" "?" : "¿"
composite("`" "A" "À")
composite("´" "'" "A" "Á")
composite("~" "A" "Ã")
composite("¨" "\"" "A" "Ä")
<dead letter °> "A" : "Å"
<compose> "A" "E" : "Æ"
composite("¸" "," "C" "Ç")
composite("`" "E" "È")
composite("´" "'" "E" "É")
composite("^" "E" "Ê")
composite("¨" "\"" "E" "Ë")
composite("`" "I" "Ì")
composite("´" "'" "I" "Í")
composite("^" "I" "Î")
composite("¨" "\"" "I" "Ï")
<compose> "-" : <dead letter ->
composite("-" "D" "Đ")
<compose> "/" : <dead letter />
composite("/" "D" "Đ")
composite("~" "N" "Ñ")
composite("`" "O" "Ò")
composite("´" "'" "O" "Ó")
composite("^" "O" "Ô")
composite("~" "O" "Õ")
composite("¨" "\"" "O" "Ö")
<compose> "x" "x" : "×"
composite("/" "O" "Ø")
composite("`" "U" "Ù")
composite("´" "'" "U" "Ú")
composite("^" "U" "Û")
composite("¨" "\"" "U" "Ü")
composite("´" "'" "Y" "Ý")
<compose> "T" "H" : "Þ"
<compose> "s" "s" : "ß"
<compose> "ſ" "s" : "ß"
composite("`" "a" "à")
composite("´" "'" "a" "á")
composite("~" "a" "ã")
composite("¨" "\"" "a" "ä")
<dead letter °> "a" : "å"
<compose> "a" "e" : "æ"
composite("¸" "," "c" "ç")
composite("`" "e" "è")
composite("´" "'" "e" "é")
composite("^" "e" "ê")
composite("¨" "\"" "e" "ë")
composite("`" "i" "ì")
composite("´" "'" "i" "í")
composite("^" "i" "î")
composite("¨" "\"" "i" "ï")
composite("/" "d" "ð")
composite("~" "n" "ñ")
composite("`" "o" "ò")
composite("´" "'" "o" "ó")
composite("^" "o" "ô")
composite("~" "o" "õ")
composite("¨" "\"" "o" "ö")
<compose> ":" "-" : "÷"
composite("/" "o" "ø")
composite("`" "u" "ù")
composite("´" "'" "u" "ú")
composite("^" "u" "û")
composite("¨" "\"" "u" "ü")
composite("´" "'" "y" "ý")
<compose> "t" "h" : "þ"
composite("¨" "\"" "y" "ÿ")
# TODO Latin Extended-A
# TODO Latin Extended-B
# TODO IPA Extensions
# TODO Spacing Modifier Letter
# TODO Combining Diacritical Marks
# TODO Greek and Coptic
# TODO Cyrillic
# TODO Cyrillic Supplement
# TODO Armenian
# TODO Hebrew
# TODO Arabic
# TODO Syriac
# TODO Arabic Supplement
# TODO Thaana
# TODO NKo
# TODO Samaritan
# TODO Mandaic
# TODO Arabic Extended-A
# TODO Devanagari
# TODO Bengali
# TODO Gurmukhi
# TODO Gujarati
# TODO Oriya
# TODO Tamil
# TODO Telugu
# TODO Kannada
# TODO Malayalam
# TODO Sinhala
# TODO Thai
# TODO Lao
# TODO Tibetan
# TODO Myanmar
# TODO Georgian
# TODO Hangul Jamo
# TODO Ethiopic
# TODO Ethiopic Supplement
# TODO Cherokee
# TODO Unified Canadian Aboriginal Syllabics
# TODO Ogham
# TODO Runic
# TODO Tagalog
# TODO Hanunoo
# TODO Buhid
# TODO Tagbanwa
# TODO Khmer
# TODO Mongolian
# TODO Unified Canadian Aboriginal Syllabics Extended
# TODO Limbu
# TODO Tai Le
# TODO New Tai Lue
# TODO Khmer Symbols
# TODO Buginese
# TODO Tai Tham
# TODO Combining Diacritical Marks Extended
# TODO Balinese
# TODO Sundanese
# TODO Batak
# TODO Lepcha
# TODO Ol Chiki
# TODO Sundanese Supplement
# TODO Vedic Extensions
# TODO Phonetic Extensions
# TODO Phonetic Extensions Supplement
# TODO Combining Diacritical Marks Supplement
# TODO Latin Extended Additional
# TODO Greek Extended
# General Punctuation:
<compose> <space> "_" "." : "\u2000" # en quad
<compose> <space> "_" "_" : "\u2001" # em quad
<compose> <space> "-" "." : "\u2002" # en space
<compose> <space> "-" "-" : "\u2003" # em space
<compose> <space> "3" : "\u2004" # three-per-em space
<compose> <space> "4" : "\u2005" # four-per-em space
<compose> <space> "6" : "\u2006" # six-per-em space
<compose> <space> "f" : "\u2007" # figure space
<compose> <space> "." : "\u2008" # punctuation space
<compose> <space> "t" : "\u2009" # thin space
<compose> <space> "h" : "\u200A" # hairspace
<compose> <space> "0" : "\u200B" # zero width space
<compose> <space> "J" : "\u200C" # zero width non-joiner
<compose> <space> "j" : "\u200D" # zero width joiner
<compose> <space> "l" : "\u200E" # left-to-right mark
<compose> <space> "r" : "\u200F" # right-to-left mark
<compose> "-" <space> : "\u2010" # hyphen
<compose> "-" " " : "\u2011" # non-breaking hyphen
<compose> "-" "-" "f" : "\u2012" # figure dash
<compose> "-" "-" "." : "\u2013" # en dash
<compose> "-" "-" "-" : "\u2014" # em dash
<compose> "-" "-" "_" : "\u2015" # horizontal bar
<compose> "|" "|" : "\u2016" # double verical line
<compose> "-" "_" : "\u2017" # double low line
<compose> ("(" "'") : "‘"
<compose> (")" "'") : "’"
<compose> ("(" ",") : "‚"
<compose> (")" ",") : "‛"
<compose> ("(" "\"") : "“"
<compose> (")" "\"") : "”"
<compose> ("(" ";") : "„"
<compose> (")" ";") : "‟"
<compose> "+" "T" : "†"
<compose> "+" "+" "T" : "‡"
<shift compose> "." "." : "•"
<shift compose> "|" ">" : "‣"
<compose> "." "," : "\u2024" # one dot leader
<compose> "." ";" : "\u2025" # two dot leader
<compose> "." "." : "…"
<compose> "-" "." : "\u2027" # hyphenation point
<compose> "l" <space> : "\u2028" # line separator
<compose> "p" <space> : "\u2029" # paragraph separator
<compose> <space> "e" "l" : "\u202A" # left-to-right embedding
<compose> <space> "e" "r" : "\u202B" # right-to-left embedding
<compose> <space> "e" "p" : "\u202C" # pop directional formatting
<compose> <space> "o" "l" : "\u202D" # left-to-right override
<compose> <space> "o" "r" : "\u202E" # right-to-left override
<compose> "m" " " : "\u202F" # narrow no-break space
<compose> "%" ["o" "."] : "‰"
<compose> "%" ["O" ":"] : "‱"
<compose> ("1" ["'", "´"]) : "′"
<compose> ("2" ["'", "´"]) : "″"
<compose> ("3" ["'", "´"]) : "‴"
<compose> ("1" "`") : "‵"
<compose> ("2" "`") : "‶"
<compose> ("3" "`") : "‷"
<shift compose> ("." "^") : "‸"
<shift compose> ("." "<") : "‹"
<shift compose> ("." ">") : "›"
<compose> ("x" "+") : "※"
<shift compose> "!" "!" : "‼"
<compose> "!" "?" : "‽"
<shift compose> "^" "_" : "‾"
<shift compose> ("_" "u") : "‿"
<shift compose> ("^" "u") : "⁀"
<shift compose> "/" "/" : "⁁"
<compose> "*" "*" : "⁂"
<shift compose> "-" "-" : "⁃"
<compose> "/" "/" : "⁄"
<compose> ("[" "-") : "⁅"
<compose> ("]" "-") : "⁆"
<shift compose> "?" "?" : "⁇"
<shift compose> "?" "!" : "⁈"
<shift compose> "!" "?" : "⁉"
<compose> ("/" "7"): "⁊"
<compose> "¶" "¶" : "⁋"
<compose> ("?" ["p" "P"]) : "⁋"
<compose> "(" "|" : "⁌"
<compose> "|" ")" : "⁍"
<compose> ("*" ".") : "⁎"
<compose> ";" ";" : "⁏"
<shift compose> "(" ")": "⁐"
<compose> ("*" ":") : "⁑"
<compose> "%" "%" : "⁒"
<shift compose> "~" ["-" "~"] : "⁓"
<shift compose> "-" "~" : "⁓"
<sjift compose> ("_" "U") : "⁔"
<compose> ("x" "*") : "⁕"
<compose> "." ":" <space> : "⁖"
<compose> ("4" ["'", "´"]) : "⁗"
<compose> "." ":" "." : "⁘"
<compose> ":" "." ":" : "⁙"
<compose> "|" "." "." : "⁚"
<shift compose> "." ":" "." : "⁛"
<compose> ("x" ":") : "⁜"
<compose> "|" ":" "." : "⁝"
<compose> "|" ":" ":" : "⁞"
<compose> <space> "m" : "\u205F" # medium mathematical space
<compose> <space> "w" : "\u2060" # word joiner
<shift compose> ("f" " ") : "\u2061" # function application
<shift compose> (["x" "×" "*"] " ") : "\u2062" # invisible times
<shift compose> (["," "."] " ") : "\u2063" # invisible separator
<shift compose> ("+" " ") : "\u2064" # invisible plus
<compose> <space> "i" "l" : "\u2066" # left-to-right isolate
<compose> <space> "i" "r" : "\u2067" # right-to-left isolate
<compose> <space> "i" "s" : "\u2068" # first strong isolate
<compose> <space> "i" "p" : "\u2069" # pop directional isolate
# U+206A through U+206C are deprecated
# TODO Superscripts and Subscripts
# TODO Currency Symbols
# TODO Combining Diacritical Marks for Symbols
# TODO Letterlike Symbols
# TODO Number Forms
# TODO Arrows
# TODO Mathematical Operators
# TODO Miscellaneous Technical
# TODO Control Pictures
# TODO Optical Character Recognition
# TODO Enclosed Alphanumerics
# TODO Box Drawing
# TODO Block Elements
# TODO Geometric Shapes
# TODO Miscellaneous Symbols
# TODO Dingbats
# TODO Miscellaneous Mathematical Symbols-A
# TODO Supplemental Arrows-A
# TODO Braille Patterns
# TODO Supplemental Arrows-B
# TODO Miscellaneous Mathematical Symbols-B
# TODO Supplemental Mathematical Operators
# TODO Miscellaneous Symbols and Arrows
# TODO Glagolitic
# TODO Latin Extended-C
# TODO Coptic
# TODO Georgian Supplement
# TODO Tifinagh
# TODO Ethiopic Extended
# TODO Cyrillic Extended-A
# TODO Supplemental Punctuation
# TODO CJK Radicals Supplement
# TODO Kangxi Radicals
# TODO Ideographic Description Characters
# TODO CJK Symbols and Punctuation
# TODO Hiragana
# TODO Katakana
# TODO Bopomofo
# TODO Hangul Compatibility Jamo
# TODO Kanbun
# TODO Bopomofo Extended
# TODO CJK Strokes
# TODO Katakana Phonetic Extensions
# TODO Enclosed CJK Letters and Months
# TODO CJK Compatibility
# TODO CJK Unified Ideographs Extension A
# TODO Yijing Hexagram Symbols
# TODO CJK Unified Ideographs
# TODO Yi Syllables
# TODO Yi Radicals
# TODO Lisu
# TODO Vai
# TODO Cyrillic Extended-B
# TODO Bamum
# TODO Modifier Tone Letters
# TODO Latin Extended-D
# TODO Syloti Nagri
# TODO Common Indic Number Forms
# TODO Phags-pa
# TODO Saurashtra
# TODO Devanagari Extended
# TODO Kayah Li
# TODO Rejang
# TODO Hangul Jamo Extended-A
# TODO Javanese
# TODO Myanmar Extended-B
# TODO Cham
# TODO Myanmar Extended-A
# TODO Tai Viet
# TODO Meetei Mayek Extensions
# TODO Ethiopic Extended-A
# TODO Latin Extended-E
# TODO Meetei Mayek
# TODO Hangul Syllables
# TODO Hangul Jamo Extended-B
# TODO CJK Compatibility Ideographs
# TODO Alphabetic Presentation Forms
# TODO Arabic Presentation Forms-A
# TODO Variation Selectors
# TODO Vertical Forms
# TODO Combining Half Marks
# TODO CJK Compatibility Forms
# TODO Small Form Variants
# TODO Arabic Presentation Forms-B
# TODO Halfwidth and Fullwidth Forms
# TODO Specials
# TODO Linear B Syllabary
# TODO Linear B Ideograms
# TODO Aegean Numbers
# TODO Ancient Greek Numbers
# TODO Ancient Symbols
# TODO Phaistos Disc
# TODO Lycian
# TODO Carian
# TODO Coptic Epact Numbers
# TODO Old Italic
# TODO Gothic
# TODO Old Permic
# TODO Ugaritic
# TODO Old Persian
# TODO Deseret
# TODO Shavian
# TODO Osmanya
# TODO Elbasan
# TODO Caucasian Albanian
# TODO Linear A
# TODO Cypriot Syllabary
# TODO Imperial Aramaic
# TODO Palmyrene
# TODO Nabataean
# TODO Phoenician
# TODO Lydian
# TODO Meroitic Hieroglyphs
# TODO Meroitic Cursive
# TODO Kharoshthi
# TODO Old South Arabian
# TODO Old North Arabian
# TODO Manichaean
# TODO Avestan
# TODO Inscriptional Parthian
# TODO Psalter Pahlavi
# TODO Old Turkic
# TODO Rumi Numeral Symbols
# TODO Brahmi
# TODO Kaithi
# TODO Sora Sompeng
# TODO Chakma
# TODO Mahajani
# TODO Sharada
# TODO Sinhala Archaic Numbers
# TODO Khojki
# TODO Khudawadi
# TODO Grantha
# TODO Tirhuta
# TODO Siddham
# TODO Modi
# TODO Takri
# TODO Warang Citi
# TODO Pau Cin Hau
# TODO Cuneiform
# TODO Cuneiform Numbers and Punctuation
# TODO Egyptian Hieroglyphs
# TODO Bamum Supplement
# TODO Mro
# TODO Bassa Vah
# TODO Pahawh Hmong
# TODO Miao
# TODO Kana Supplement
# TODO Duployan
# TODO Shorthand Format Controls
# TODO Byzantine Musical Symbols
# TODO Musical Symbols
# TODO Ancient Greek Musical Notation
# TODO Tai Xuan Jing Symbols
# TODO Counting Rod Numerals
# TODO Mathematical Alphanumeric Symbols
# TODO Mende Kikakui
# TODO Arabic Mathematical Alphabetic Symbols
# TODO Mahjong Tiles
# TODO Domino Tiles
# TODO Playing Cards
# TODO Enclosed Alphanumeric Supplement
# TODO Enclosed Ideographic Supplement
# TODO Miscellaneous Symbols and Pictographs
# TODO Emoticons
# TODO Ornamental Dingbats
# TODO Transport and Map Symbols
# TODO Alchemical Symbols
# TODO Geometric Shapes Extended
# TODO Supplemental Arrows-C
# TODO CJK Unified Ideographs Extension B
# TODO CJK Unified Ideographs Extension C
# TODO CJK Unified Ideographs Extension D
# TODO CJK Compatibility Ideographs Supplement
# TODO Tags
# TODO Variation Selectors Supplement
|