Implement script detection
This commit is contained in:
parent
04940cea04
commit
2097b64da5
4 changed files with 4288 additions and 2716 deletions
|
@ -13,6 +13,10 @@ export fn main() void = {
|
|||
case => break;
|
||||
};
|
||||
const gc = unicode::rune_gc(rn);
|
||||
fmt::printfln("'{}'/{:x}: {}", rn, rn: u32, unicode::gc_code(gc))!;
|
||||
const sc = unicode::rune_script(rn);
|
||||
fmt::printfln("'{}'/{:x}: {} : {}",
|
||||
rn, rn: u32,
|
||||
unicode::gc_code(gc),
|
||||
unicode::script_code(sc))!;
|
||||
};
|
||||
};
|
||||
|
|
|
@ -61,6 +61,7 @@ NAME_ALIASES = "NameAliases%s.txt"
|
|||
NAMED_SEQUENCES = "NamedSequences%s.txt"
|
||||
SPECIAL_CASING = "SpecialCasing%s.txt"
|
||||
CASE_FOLDING = "CaseFolding%s.txt"
|
||||
SCRIPTS = "Scripts%s.txt"
|
||||
|
||||
# Private Use Areas -- in planes 1, 15, 16
|
||||
PUA_1 = range(0xE000, 0xF900)
|
||||
|
@ -88,6 +89,173 @@ EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ]
|
|||
|
||||
MANDATORY_LINE_BREAKS = [ "BK", "CR", "LF", "NL" ]
|
||||
|
||||
SCRIPT_NAMES = [
|
||||
"Common",
|
||||
"Inherited",
|
||||
"Unknown",
|
||||
"Adlam",
|
||||
"Caucasian_Albanian",
|
||||
"Ahom",
|
||||
"Arabic",
|
||||
"Imperial_Aramaic",
|
||||
"Armenian",
|
||||
"Avestan",
|
||||
"Balinese",
|
||||
"Bamum",
|
||||
"Bassa_Vah",
|
||||
"Batak",
|
||||
"Bengali",
|
||||
"Bhaiksuki",
|
||||
"Bopomofo",
|
||||
"Brahmi",
|
||||
"Braille",
|
||||
"Buginese",
|
||||
"Buhid",
|
||||
"Chakma",
|
||||
"Canadian_Aboriginal",
|
||||
"Carian",
|
||||
"Cham",
|
||||
"Cherokee",
|
||||
"Chorasmian",
|
||||
"Coptic",
|
||||
"Cypro_Minoan",
|
||||
"Cypriot",
|
||||
"Cyrillic",
|
||||
"Devanagari",
|
||||
"Dives_Akuru",
|
||||
"Dogra",
|
||||
"Deseret",
|
||||
"Duployan",
|
||||
"Egyptian_Hieroglyphs",
|
||||
"Elbasan",
|
||||
"Elymaic",
|
||||
"Ethiopic",
|
||||
"Georgian",
|
||||
"Glagolitic",
|
||||
"Gunjala_Gondi",
|
||||
"Masaram_Gondi",
|
||||
"Gothic",
|
||||
"Grantha",
|
||||
"Greek",
|
||||
"Gujarati",
|
||||
"Gurmukhi",
|
||||
"Hangul",
|
||||
"Han",
|
||||
"Hanunoo",
|
||||
"Hatran",
|
||||
"Hebrew",
|
||||
"Hiragana",
|
||||
"Anatolian_Hieroglyphs",
|
||||
"Pahawh_Hmong",
|
||||
"Nyiakeng_Puachue_Hmong",
|
||||
"Old_Hungarian",
|
||||
"Old_Italic",
|
||||
"Javanese",
|
||||
"Kayah_Li",
|
||||
"Katakana",
|
||||
"Kawi",
|
||||
"Kharoshthi",
|
||||
"Khmer",
|
||||
"Khojki",
|
||||
"Khitan_Small_Script",
|
||||
"Kannada",
|
||||
"Kaithi",
|
||||
"Tai_Tham",
|
||||
"Lao",
|
||||
"Latin",
|
||||
"Lepcha",
|
||||
"Limbu",
|
||||
"Linear_A",
|
||||
"Linear_B",
|
||||
"Lisu",
|
||||
"Lycian",
|
||||
"Lydian",
|
||||
"Mahajani",
|
||||
"Makasar",
|
||||
"Mandaic",
|
||||
"Manichaean",
|
||||
"Marchen",
|
||||
"Medefaidrin",
|
||||
"Mende_Kikakui",
|
||||
"Meroitic_Cursive",
|
||||
"Meroitic_Hieroglyphs",
|
||||
"Malayalam",
|
||||
"Modi",
|
||||
"Mongolian",
|
||||
"Mro",
|
||||
"Meetei_Mayek",
|
||||
"Multani",
|
||||
"Myanmar",
|
||||
"Nag_Mundari",
|
||||
"Nandinagari",
|
||||
"Old_North_Arabian",
|
||||
"Nabataean",
|
||||
"Newa",
|
||||
"Nko",
|
||||
"Nushu",
|
||||
"Ogham",
|
||||
"Ol_Chiki",
|
||||
"Old_Turkic",
|
||||
"Oriya",
|
||||
"Osage",
|
||||
"Osmanya",
|
||||
"Old_Uyghur",
|
||||
"Palmyrene",
|
||||
"Pau_Cin_Hau",
|
||||
"Old_Permic",
|
||||
"Phags_Pa",
|
||||
"Inscriptional_Pahlavi",
|
||||
"Psalter_Pahlavi",
|
||||
"Phoenician",
|
||||
"Miao",
|
||||
"Inscriptional_Parthian",
|
||||
"Rejang",
|
||||
"Hanifi_Rohingya",
|
||||
"Runic",
|
||||
"Samaritan",
|
||||
"Old_South_Arabian",
|
||||
"Saurashtra",
|
||||
"SignWriting",
|
||||
"Shavian",
|
||||
"Sharada",
|
||||
"Siddham",
|
||||
"Khudawadi",
|
||||
"Sinhala",
|
||||
"Sogdian",
|
||||
"Old_Sogdian",
|
||||
"Sora_Sompeng",
|
||||
"Soyombo",
|
||||
"Sundanese",
|
||||
"Syloti_Nagri",
|
||||
"Syriac",
|
||||
"Tagbanwa",
|
||||
"Takri",
|
||||
"Tai_Le",
|
||||
"New_Tai_Lue",
|
||||
"Tamil",
|
||||
"Tangut",
|
||||
"Tai_Viet",
|
||||
"Telugu",
|
||||
"Tifinagh",
|
||||
"Tagalog",
|
||||
"Thaana",
|
||||
"Thai",
|
||||
"Tibetan",
|
||||
"Tirhuta",
|
||||
"Tangsa",
|
||||
"Toto",
|
||||
"Ugaritic",
|
||||
"Vai",
|
||||
"Vithkuqi",
|
||||
"Warang_Citi",
|
||||
"Wancho",
|
||||
"Old_Persian",
|
||||
"Cuneiform",
|
||||
"Yezidi",
|
||||
"Yi",
|
||||
"Zanabazar_Square",
|
||||
]
|
||||
|
||||
# note: should match definitions in Objects/unicodectype.c
|
||||
ALPHA_MASK = 0x01
|
||||
DECIMAL_MASK = 0x02
|
||||
|
@ -133,7 +301,7 @@ def maketables(trace=0):
|
|||
|
||||
def makeunicodedata(unicode, trace):
|
||||
|
||||
dummy = (0, 0, 0, 0, 0)
|
||||
dummy = (0, 0, 0, 0, 0, 0)
|
||||
table = [dummy]
|
||||
cache = {0: dummy}
|
||||
index = [0] * len(unicode.chars)
|
||||
|
@ -151,8 +319,9 @@ def makeunicodedata(unicode, trace):
|
|||
bidirectional = BIDIRECTIONAL_NAMES.index(record.bidi_class)
|
||||
mirrored = record.bidi_mirrored == "Y"
|
||||
eastasianwidth = EASTASIANWIDTH_NAMES.index(record.east_asian_width)
|
||||
script = SCRIPT_NAMES.index(record.script or "Unknown")
|
||||
item = (
|
||||
category, combining, bidirectional, mirrored, eastasianwidth,
|
||||
category, combining, bidirectional, mirrored, eastasianwidth, script,
|
||||
)
|
||||
# add entry to index and item tables
|
||||
i = cache.get(item)
|
||||
|
@ -176,7 +345,7 @@ def makeunicodedata(unicode, trace):
|
|||
fprint("// List of unique database records")
|
||||
fprint("const ucd_records: [_]ucd_encodedrec = [")
|
||||
for item in table:
|
||||
fprint(" (%d, %d, %d, %d, %d)," % item)
|
||||
fprint(" (%d, %d, %d, %d, %d, %d)," % item)
|
||||
fprint("];")
|
||||
fprint()
|
||||
|
||||
|
@ -288,9 +457,12 @@ class UcdRecord:
|
|||
# We store them as a bitmask.
|
||||
quick_check: int
|
||||
|
||||
# From Script.txt
|
||||
script: str
|
||||
|
||||
|
||||
def from_row(row: List[str]) -> UcdRecord:
|
||||
return UcdRecord(*row, None, set(), 0)
|
||||
return UcdRecord(*row, None, set(), 0, "Unknown")
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
@ -386,6 +558,14 @@ class UnicodeData:
|
|||
if table[i] is not None:
|
||||
table[i].east_asian_width = widths[i]
|
||||
|
||||
scripts = [None] * 0x110000
|
||||
for char, (script,) in UcdFile(SCRIPTS, version).expanded():
|
||||
scripts[char] = script
|
||||
|
||||
for i in range(0, 0x110000):
|
||||
if table[i] is not None:
|
||||
table[i].script = scripts[i]
|
||||
|
||||
for char, (p,) in UcdFile(DERIVED_CORE_PROPERTIES, version).expanded():
|
||||
if table[char]:
|
||||
# Some properties (e.g. Default_Ignorable_Code_Point)
|
||||
|
|
350
unicode/ucd.ha
350
unicode/ucd.ha
|
@ -1,4 +1,4 @@
|
|||
type ucd_encodedrec = (u8, u8, u8, u8, u8);
|
||||
type ucd_encodedrec = (u8, u8, u8, u8, u8, u16);
|
||||
|
||||
type ucd_record = struct {
|
||||
category: u8,
|
||||
|
@ -6,11 +6,12 @@ type ucd_record = struct {
|
|||
bidirectional: u8,
|
||||
mirrored: u8,
|
||||
east_asian_width: u8,
|
||||
script: u16,
|
||||
};
|
||||
|
||||
fn get_ucdrecord(rn: rune) *ucd_record = {
|
||||
const code = rn: u32;
|
||||
let index = 0u8;
|
||||
let index = 0u16;
|
||||
if (code < 0x110000) {
|
||||
index = index1[(code>>UCD_RECORD_SHIFT)];
|
||||
index = index2[(index<<UCD_RECORD_SHIFT)+(code&((1<<UCD_RECORD_SHIFT)-1))];
|
||||
|
@ -92,3 +93,348 @@ export fn gc_code(v: gc) const str = {
|
|||
case gc::SPACE_SEPARATOR => return "Zs";
|
||||
};
|
||||
};
|
||||
|
||||
// Unicode character Script attribute.
|
||||
export type script = enum u16 {
|
||||
COMMON, // Zyyy
|
||||
INHERITED, // Zinh
|
||||
UNKNOWN, // Zzzz
|
||||
ADLAM, // Adlm
|
||||
CAUCASIAN_ALBANIAN, // Aghb
|
||||
AHOM, // Ahom
|
||||
ARABIC, // Arab
|
||||
IMPERIAL_ARAMAIC, // Armi
|
||||
ARMENIAN, // Armn
|
||||
AVESTAN, // Avst
|
||||
BALINESE, // Bali
|
||||
BAMUM, // Bamu
|
||||
BASSA_VAH, // Bass
|
||||
BATAK, // Batk
|
||||
BENGALI, // Beng
|
||||
BHAIKSUKI, // Bhks
|
||||
BOPOMOFO, // Bopo
|
||||
BRAHMI, // Brah
|
||||
BRAILLE, // Brai
|
||||
BUGINESE, // Bugi
|
||||
BUHID, // Buhd
|
||||
CHAKMA, // Cakm
|
||||
CANADIAN_SYLLABICS, // Cans
|
||||
CARIAN, // Cari
|
||||
CHAM, // Cham
|
||||
CHEROKEE, // Cher
|
||||
CHORASMIAN, // Chrs
|
||||
COPTIC, // Copt
|
||||
CYPRO_MINOAN, // Cpmn
|
||||
CYPRIOT, // Cprt
|
||||
CYRILLIC, // Cyrl
|
||||
DEVANAGARI, // Deva
|
||||
DIVES_AKURU, // Diak
|
||||
DOGRA, // Dogr
|
||||
DESERET, // Dsrt
|
||||
DUPLOYAN, // Dupl
|
||||
EGYPTIAN_HIEROGLYPHS, // Egyp
|
||||
ELBASAN, // Elba
|
||||
ELYMAIC, // Elym
|
||||
ETHIOPIC, // Ethi
|
||||
GEORGIAN, // Geor
|
||||
GLAGOLITIC, // Glag
|
||||
GUNJALA_GONDI, // Gong
|
||||
MASARAM_GONDI, // Gonm
|
||||
GOTHIC, // Goth
|
||||
GRANTHA, // Gran
|
||||
GREEK, // Grek
|
||||
GUJARATI, // Gujr
|
||||
GURMUKHI, // Guru
|
||||
HANGUL, // Hang
|
||||
HAN, // Hani
|
||||
HANUNOO, // Hano
|
||||
HATRAN, // Hatr
|
||||
HEBREW, // Hebr
|
||||
HIRAGANA, // Hira
|
||||
ANATOLIAN_HIEROGLYPHS, // Hluw
|
||||
PAHAWH_HMONG, // Hmng
|
||||
NYIAKENG_PUACHUE_HMONG, // Hmnp
|
||||
OLD_HUNGARIAN, // Hung
|
||||
OLD_ITALIC, // Ital
|
||||
JAVANESE, // Java
|
||||
KAYAH_LI, // Kali
|
||||
KATAKANA, // Kana
|
||||
KAWI, // Kawi
|
||||
KHAROSHTHI, // Khar
|
||||
KHMER, // Khmr
|
||||
KHOJKI, // Khoj
|
||||
KHITAN_SMALL_SCRIPT, // Kits
|
||||
KANNADA, // Knda
|
||||
KAITHI, // Kthi
|
||||
TAI_THAM, // Lana
|
||||
LAO, // Laoo
|
||||
LATIN, // Latn
|
||||
LEPCHA, // Lepc
|
||||
LIMBU, // Limb
|
||||
LINEAR_A, // Lina
|
||||
LINEAR_B, // Linb
|
||||
LISU, // Lisu
|
||||
LYCIAN, // Lyci
|
||||
LYDIAN, // Lydi
|
||||
MAHAJANI, // Mahj
|
||||
MAKASAR, // Maka
|
||||
MANDAIC, // Mand
|
||||
MANICHAEAN, // Mani
|
||||
MARCHEN, // Marc
|
||||
MEDEFAIDRIN, // Medf
|
||||
MENDE_KIKAKUI, // Mend
|
||||
MEROITIC_CURSIVE, // Merc
|
||||
MEROITIC_HIEROGLYPHS, // Mero
|
||||
MALAYALAM, // Mlym
|
||||
MODI, // Modi
|
||||
MONGOLIAN, // Mong
|
||||
MRO, // Mroo
|
||||
MEETEI_MAYEK, // Mtei
|
||||
MULTANI, // Mult
|
||||
MYANMAR, // Mymr
|
||||
NAG_MUNDARI, // Nagm
|
||||
NANDINAGARI, // Nand
|
||||
OLD_NORTH_ARABIAN, // Narb
|
||||
NABATAEAN, // Nbat
|
||||
NEWA, // Newa
|
||||
NKO, // Nkoo
|
||||
NUSHU, // Nshu
|
||||
OGHAM, // Ogam
|
||||
OL_CHIKI, // Olck
|
||||
OLD_TURKIC, // Orkh
|
||||
ORIYA, // Orya
|
||||
OSAGE, // Osge
|
||||
OSMANYA, // Osma
|
||||
OLD_UYGHUR, // Ougr
|
||||
PALMYRENE, // Palm
|
||||
PAU_CIN_HAU, // Pauc
|
||||
OLD_PERMIC, // Perm
|
||||
PHAGS_PA, // Phag
|
||||
INSCRIPTIONAL_PAHLAVI, // Phli
|
||||
PSALTER_PAHLAVI, // Phlp
|
||||
PHOENICIAN, // Phnx
|
||||
MIAO, // Plrd
|
||||
INSCRIPTIONAL_PARTHIAN, // Prti
|
||||
REJANG, // Rjng
|
||||
HANIFI_ROHINGYA, // Rohg
|
||||
RUNIC, // Runr
|
||||
SAMARITAN, // Samr
|
||||
OLD_SOUTH_ARABIAN, // Sarb
|
||||
SAURASHTRA, // Saur
|
||||
SIGNWRITING, // Sgnw
|
||||
SHAVIAN, // Shaw
|
||||
SHARADA, // Shrd
|
||||
SIDDHAM, // Sidd
|
||||
KHUDAWADI, // Sind
|
||||
SINHALA, // Sinh
|
||||
SOGDIAN, // Sogd
|
||||
OLD_SOGDIAN, // Sogo
|
||||
SORA_SOMPENG, // Sora
|
||||
SOYOMBO, // Soyo
|
||||
SUNDANESE, // Sund
|
||||
SYLOTI_NAGRI, // Sylo
|
||||
SYRIAC, // Syrc
|
||||
TAGBANWA, // Tagb
|
||||
TAKRI, // Takr
|
||||
TAI_LE, // Tale
|
||||
NEW_TAI_LUE, // Talu
|
||||
TAMIL, // Taml
|
||||
TANGUT, // Tang
|
||||
TAI_VIET, // Tavt
|
||||
TELUGU, // Telu
|
||||
TIFINAGH, // Tfng
|
||||
TAGALOG, // Tglg
|
||||
THAANA, // Thaa
|
||||
THAI, // Thai
|
||||
TIBETAN, // Tibt
|
||||
TIRHUTA, // Tirh
|
||||
TANGSA, // Tnsa
|
||||
TOTO, // Toto
|
||||
UGARITIC, // Ugar
|
||||
VAI, // Vaii
|
||||
VITHKUQI, // Vith
|
||||
WARANG_CITI, // Wara
|
||||
WANCHO, // Wcho
|
||||
OLD_PERSIAN, // Xpeo
|
||||
CUNEIFORM, // Xsux
|
||||
YEZIDI, // Yezi
|
||||
YI, // Yiii
|
||||
ZANABAZAR_SQUARE, // Zanb
|
||||
MATH, // Zmth
|
||||
};
|
||||
|
||||
// Returns the [[general_category]] corresponding to this rune.
|
||||
export fn rune_script(rn: rune) script = {
|
||||
return get_ucdrecord(rn).script: script;
|
||||
};
|
||||
|
||||
// Returns the four-character code associated with a [[script]] value.
|
||||
export fn script_code(sc: script) const str = {
|
||||
switch (sc) {
|
||||
case script::COMMON => return "Zyyy";
|
||||
case script::INHERITED => return "Zinh";
|
||||
case script::UNKNOWN => return "Zzzz";
|
||||
case script::ARABIC => return "Arab";
|
||||
case script::ARMENIAN => return "Armn";
|
||||
case script::BENGALI => return "Beng";
|
||||
case script::CYRILLIC => return "Cyrl";
|
||||
case script::DEVANAGARI => return "Deva";
|
||||
case script::GEORGIAN => return "Geor";
|
||||
case script::GREEK => return "Grek";
|
||||
case script::GUJARATI => return "Gujr";
|
||||
case script::GURMUKHI => return "Guru";
|
||||
case script::HANGUL => return "Hang";
|
||||
case script::HAN => return "Hani";
|
||||
case script::HEBREW => return "Hebr";
|
||||
case script::HIRAGANA => return "Hira";
|
||||
case script::KANNADA => return "Knda";
|
||||
case script::KATAKANA => return "Kana";
|
||||
case script::LAO => return "Laoo";
|
||||
case script::LATIN => return "Latn";
|
||||
case script::MALAYALAM => return "Mlym";
|
||||
case script::ORIYA => return "Orya";
|
||||
case script::TAMIL => return "Taml";
|
||||
case script::TELUGU => return "Telu";
|
||||
case script::THAI => return "Thai";
|
||||
case script::TIBETAN => return "Tibt";
|
||||
case script::BOPOMOFO => return "Bopo";
|
||||
case script::BRAILLE => return "Brai";
|
||||
case script::CANADIAN_SYLLABICS => return "Cans";
|
||||
case script::CHEROKEE => return "Cher";
|
||||
case script::ETHIOPIC => return "Ethi";
|
||||
case script::KHMER => return "Khmr";
|
||||
case script::MONGOLIAN => return "Mong";
|
||||
case script::MYANMAR => return "Mymr";
|
||||
case script::OGHAM => return "Ogam";
|
||||
case script::RUNIC => return "Runr";
|
||||
case script::SINHALA => return "Sinh";
|
||||
case script::SYRIAC => return "Syrc";
|
||||
case script::THAANA => return "Thaa";
|
||||
case script::YI => return "Yiii";
|
||||
case script::DESERET => return "Dsrt";
|
||||
case script::GOTHIC => return "Goth";
|
||||
case script::OLD_ITALIC => return "Ital";
|
||||
case script::BUHID => return "Buhd";
|
||||
case script::HANUNOO => return "Hano";
|
||||
case script::TAGALOG => return "Tglg";
|
||||
case script::TAGBANWA => return "Tagb";
|
||||
case script::CYPRIOT => return "Cprt";
|
||||
case script::LIMBU => return "Limb";
|
||||
case script::LINEAR_B => return "Linb";
|
||||
case script::OSMANYA => return "Osma";
|
||||
case script::SHAVIAN => return "Shaw";
|
||||
case script::TAI_LE => return "Tale";
|
||||
case script::UGARITIC => return "Ugar";
|
||||
case script::BUGINESE => return "Bugi";
|
||||
case script::COPTIC => return "Copt";
|
||||
case script::GLAGOLITIC => return "Glag";
|
||||
case script::KHAROSHTHI => return "Khar";
|
||||
case script::NEW_TAI_LUE => return "Talu";
|
||||
case script::OLD_PERSIAN => return "Xpeo";
|
||||
case script::SYLOTI_NAGRI => return "Sylo";
|
||||
case script::TIFINAGH => return "Tfng";
|
||||
case script::BALINESE => return "Bali";
|
||||
case script::CUNEIFORM => return "Xsux";
|
||||
case script::NKO => return "Nkoo";
|
||||
case script::PHAGS_PA => return "Phag";
|
||||
case script::PHOENICIAN => return "Phnx";
|
||||
case script::CARIAN => return "Cari";
|
||||
case script::CHAM => return "Cham";
|
||||
case script::KAYAH_LI => return "Kali";
|
||||
case script::LEPCHA => return "Lepc";
|
||||
case script::LYCIAN => return "Lyci";
|
||||
case script::LYDIAN => return "Lydi";
|
||||
case script::OL_CHIKI => return "Olck";
|
||||
case script::REJANG => return "Rjng";
|
||||
case script::SAURASHTRA => return "Saur";
|
||||
case script::SUNDANESE => return "Sund";
|
||||
case script::VAI => return "Vaii";
|
||||
case script::AVESTAN => return "Avst";
|
||||
case script::BAMUM => return "Bamu";
|
||||
case script::EGYPTIAN_HIEROGLYPHS => return "Egyp";
|
||||
case script::IMPERIAL_ARAMAIC => return "Armi";
|
||||
case script::INSCRIPTIONAL_PAHLAVI => return "Phli";
|
||||
case script::INSCRIPTIONAL_PARTHIAN => return "Prti";
|
||||
case script::JAVANESE => return "Java";
|
||||
case script::KAITHI => return "Kthi";
|
||||
case script::LISU => return "Lisu";
|
||||
case script::MEETEI_MAYEK => return "Mtei";
|
||||
case script::OLD_SOUTH_ARABIAN => return "Sarb";
|
||||
case script::OLD_TURKIC => return "Orkh";
|
||||
case script::SAMARITAN => return "Samr";
|
||||
case script::TAI_THAM => return "Lana";
|
||||
case script::TAI_VIET => return "Tavt";
|
||||
case script::BATAK => return "Batk";
|
||||
case script::BRAHMI => return "Brah";
|
||||
case script::MANDAIC => return "Mand";
|
||||
case script::CHAKMA => return "Cakm";
|
||||
case script::MEROITIC_CURSIVE => return "Merc";
|
||||
case script::MEROITIC_HIEROGLYPHS => return "Mero";
|
||||
case script::MIAO => return "Plrd";
|
||||
case script::SHARADA => return "Shrd";
|
||||
case script::SORA_SOMPENG => return "Sora";
|
||||
case script::TAKRI => return "Takr";
|
||||
case script::BASSA_VAH => return "Bass";
|
||||
case script::CAUCASIAN_ALBANIAN => return "Aghb";
|
||||
case script::DUPLOYAN => return "Dupl";
|
||||
case script::ELBASAN => return "Elba";
|
||||
case script::GRANTHA => return "Gran";
|
||||
case script::KHOJKI => return "Khoj";
|
||||
case script::KHUDAWADI => return "Sind";
|
||||
case script::LINEAR_A => return "Lina";
|
||||
case script::MAHAJANI => return "Mahj";
|
||||
case script::MANICHAEAN => return "Mani";
|
||||
case script::MENDE_KIKAKUI => return "Mend";
|
||||
case script::MODI => return "Modi";
|
||||
case script::MRO => return "Mroo";
|
||||
case script::NABATAEAN => return "Nbat";
|
||||
case script::OLD_NORTH_ARABIAN => return "Narb";
|
||||
case script::OLD_PERMIC => return "Perm";
|
||||
case script::PAHAWH_HMONG => return "Hmng";
|
||||
case script::PALMYRENE => return "Palm";
|
||||
case script::PAU_CIN_HAU => return "Pauc";
|
||||
case script::PSALTER_PAHLAVI => return "Phlp";
|
||||
case script::SIDDHAM => return "Sidd";
|
||||
case script::TIRHUTA => return "Tirh";
|
||||
case script::WARANG_CITI => return "Wara";
|
||||
case script::AHOM => return "Ahom";
|
||||
case script::ANATOLIAN_HIEROGLYPHS => return "Hluw";
|
||||
case script::HATRAN => return "Hatr";
|
||||
case script::MULTANI => return "Mult";
|
||||
case script::OLD_HUNGARIAN => return "Hung";
|
||||
case script::SIGNWRITING => return "Sgnw";
|
||||
case script::ADLAM => return "Adlm";
|
||||
case script::BHAIKSUKI => return "Bhks";
|
||||
case script::MARCHEN => return "Marc";
|
||||
case script::OSAGE => return "Osge";
|
||||
case script::TANGUT => return "Tang";
|
||||
case script::NEWA => return "Newa";
|
||||
case script::MASARAM_GONDI => return "Gonm";
|
||||
case script::NUSHU => return "Nshu";
|
||||
case script::SOYOMBO => return "Soyo";
|
||||
case script::ZANABAZAR_SQUARE => return "Zanb";
|
||||
case script::DOGRA => return "Dogr";
|
||||
case script::GUNJALA_GONDI => return "Gong";
|
||||
case script::HANIFI_ROHINGYA => return "Rohg";
|
||||
case script::MAKASAR => return "Maka";
|
||||
case script::MEDEFAIDRIN => return "Medf";
|
||||
case script::OLD_SOGDIAN => return "Sogo";
|
||||
case script::SOGDIAN => return "Sogd";
|
||||
case script::ELYMAIC => return "Elym";
|
||||
case script::NANDINAGARI => return "Nand";
|
||||
case script::NYIAKENG_PUACHUE_HMONG => return "Hmnp";
|
||||
case script::WANCHO => return "Wcho";
|
||||
case script::CHORASMIAN => return "Chrs";
|
||||
case script::DIVES_AKURU => return "Diak";
|
||||
case script::KHITAN_SMALL_SCRIPT => return "Kits";
|
||||
case script::YEZIDI => return "Yezi";
|
||||
case script::CYPRO_MINOAN => return "Cpmn";
|
||||
case script::OLD_UYGHUR => return "Ougr";
|
||||
case script::TANGSA => return "Tnsa";
|
||||
case script::TOTO => return "Toto";
|
||||
case script::VITHKUQI => return "Vith";
|
||||
case script::MATH => return "Zmth";
|
||||
case script::KAWI => return "Kawi";
|
||||
case script::NAG_MUNDARI => return "Nagm";
|
||||
};
|
||||
};
|
||||
|
|
6460
unicode/ucd_gen.ha
6460
unicode/ucd_gen.ha
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue