Implement script detection
This commit is contained in:
parent
04940cea04
commit
2097b64da5
4 changed files with 4288 additions and 2716 deletions
|
@ -13,6 +13,10 @@ export fn main() void = {
|
||||||
case => break;
|
case => break;
|
||||||
};
|
};
|
||||||
const gc = unicode::rune_gc(rn);
|
const gc = unicode::rune_gc(rn);
|
||||||
fmt::printfln("'{}'/{:x}: {}", rn, rn: u32, unicode::gc_code(gc))!;
|
const sc = unicode::rune_script(rn);
|
||||||
|
fmt::printfln("'{}'/{:x}: {} : {}",
|
||||||
|
rn, rn: u32,
|
||||||
|
unicode::gc_code(gc),
|
||||||
|
unicode::script_code(sc))!;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
|
@ -61,6 +61,7 @@ NAME_ALIASES = "NameAliases%s.txt"
|
||||||
NAMED_SEQUENCES = "NamedSequences%s.txt"
|
NAMED_SEQUENCES = "NamedSequences%s.txt"
|
||||||
SPECIAL_CASING = "SpecialCasing%s.txt"
|
SPECIAL_CASING = "SpecialCasing%s.txt"
|
||||||
CASE_FOLDING = "CaseFolding%s.txt"
|
CASE_FOLDING = "CaseFolding%s.txt"
|
||||||
|
SCRIPTS = "Scripts%s.txt"
|
||||||
|
|
||||||
# Private Use Areas -- in planes 1, 15, 16
|
# Private Use Areas -- in planes 1, 15, 16
|
||||||
PUA_1 = range(0xE000, 0xF900)
|
PUA_1 = range(0xE000, 0xF900)
|
||||||
|
@ -88,6 +89,173 @@ EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ]
|
||||||
|
|
||||||
MANDATORY_LINE_BREAKS = [ "BK", "CR", "LF", "NL" ]
|
MANDATORY_LINE_BREAKS = [ "BK", "CR", "LF", "NL" ]
|
||||||
|
|
||||||
|
SCRIPT_NAMES = [
|
||||||
|
"Common",
|
||||||
|
"Inherited",
|
||||||
|
"Unknown",
|
||||||
|
"Adlam",
|
||||||
|
"Caucasian_Albanian",
|
||||||
|
"Ahom",
|
||||||
|
"Arabic",
|
||||||
|
"Imperial_Aramaic",
|
||||||
|
"Armenian",
|
||||||
|
"Avestan",
|
||||||
|
"Balinese",
|
||||||
|
"Bamum",
|
||||||
|
"Bassa_Vah",
|
||||||
|
"Batak",
|
||||||
|
"Bengali",
|
||||||
|
"Bhaiksuki",
|
||||||
|
"Bopomofo",
|
||||||
|
"Brahmi",
|
||||||
|
"Braille",
|
||||||
|
"Buginese",
|
||||||
|
"Buhid",
|
||||||
|
"Chakma",
|
||||||
|
"Canadian_Aboriginal",
|
||||||
|
"Carian",
|
||||||
|
"Cham",
|
||||||
|
"Cherokee",
|
||||||
|
"Chorasmian",
|
||||||
|
"Coptic",
|
||||||
|
"Cypro_Minoan",
|
||||||
|
"Cypriot",
|
||||||
|
"Cyrillic",
|
||||||
|
"Devanagari",
|
||||||
|
"Dives_Akuru",
|
||||||
|
"Dogra",
|
||||||
|
"Deseret",
|
||||||
|
"Duployan",
|
||||||
|
"Egyptian_Hieroglyphs",
|
||||||
|
"Elbasan",
|
||||||
|
"Elymaic",
|
||||||
|
"Ethiopic",
|
||||||
|
"Georgian",
|
||||||
|
"Glagolitic",
|
||||||
|
"Gunjala_Gondi",
|
||||||
|
"Masaram_Gondi",
|
||||||
|
"Gothic",
|
||||||
|
"Grantha",
|
||||||
|
"Greek",
|
||||||
|
"Gujarati",
|
||||||
|
"Gurmukhi",
|
||||||
|
"Hangul",
|
||||||
|
"Han",
|
||||||
|
"Hanunoo",
|
||||||
|
"Hatran",
|
||||||
|
"Hebrew",
|
||||||
|
"Hiragana",
|
||||||
|
"Anatolian_Hieroglyphs",
|
||||||
|
"Pahawh_Hmong",
|
||||||
|
"Nyiakeng_Puachue_Hmong",
|
||||||
|
"Old_Hungarian",
|
||||||
|
"Old_Italic",
|
||||||
|
"Javanese",
|
||||||
|
"Kayah_Li",
|
||||||
|
"Katakana",
|
||||||
|
"Kawi",
|
||||||
|
"Kharoshthi",
|
||||||
|
"Khmer",
|
||||||
|
"Khojki",
|
||||||
|
"Khitan_Small_Script",
|
||||||
|
"Kannada",
|
||||||
|
"Kaithi",
|
||||||
|
"Tai_Tham",
|
||||||
|
"Lao",
|
||||||
|
"Latin",
|
||||||
|
"Lepcha",
|
||||||
|
"Limbu",
|
||||||
|
"Linear_A",
|
||||||
|
"Linear_B",
|
||||||
|
"Lisu",
|
||||||
|
"Lycian",
|
||||||
|
"Lydian",
|
||||||
|
"Mahajani",
|
||||||
|
"Makasar",
|
||||||
|
"Mandaic",
|
||||||
|
"Manichaean",
|
||||||
|
"Marchen",
|
||||||
|
"Medefaidrin",
|
||||||
|
"Mende_Kikakui",
|
||||||
|
"Meroitic_Cursive",
|
||||||
|
"Meroitic_Hieroglyphs",
|
||||||
|
"Malayalam",
|
||||||
|
"Modi",
|
||||||
|
"Mongolian",
|
||||||
|
"Mro",
|
||||||
|
"Meetei_Mayek",
|
||||||
|
"Multani",
|
||||||
|
"Myanmar",
|
||||||
|
"Nag_Mundari",
|
||||||
|
"Nandinagari",
|
||||||
|
"Old_North_Arabian",
|
||||||
|
"Nabataean",
|
||||||
|
"Newa",
|
||||||
|
"Nko",
|
||||||
|
"Nushu",
|
||||||
|
"Ogham",
|
||||||
|
"Ol_Chiki",
|
||||||
|
"Old_Turkic",
|
||||||
|
"Oriya",
|
||||||
|
"Osage",
|
||||||
|
"Osmanya",
|
||||||
|
"Old_Uyghur",
|
||||||
|
"Palmyrene",
|
||||||
|
"Pau_Cin_Hau",
|
||||||
|
"Old_Permic",
|
||||||
|
"Phags_Pa",
|
||||||
|
"Inscriptional_Pahlavi",
|
||||||
|
"Psalter_Pahlavi",
|
||||||
|
"Phoenician",
|
||||||
|
"Miao",
|
||||||
|
"Inscriptional_Parthian",
|
||||||
|
"Rejang",
|
||||||
|
"Hanifi_Rohingya",
|
||||||
|
"Runic",
|
||||||
|
"Samaritan",
|
||||||
|
"Old_South_Arabian",
|
||||||
|
"Saurashtra",
|
||||||
|
"SignWriting",
|
||||||
|
"Shavian",
|
||||||
|
"Sharada",
|
||||||
|
"Siddham",
|
||||||
|
"Khudawadi",
|
||||||
|
"Sinhala",
|
||||||
|
"Sogdian",
|
||||||
|
"Old_Sogdian",
|
||||||
|
"Sora_Sompeng",
|
||||||
|
"Soyombo",
|
||||||
|
"Sundanese",
|
||||||
|
"Syloti_Nagri",
|
||||||
|
"Syriac",
|
||||||
|
"Tagbanwa",
|
||||||
|
"Takri",
|
||||||
|
"Tai_Le",
|
||||||
|
"New_Tai_Lue",
|
||||||
|
"Tamil",
|
||||||
|
"Tangut",
|
||||||
|
"Tai_Viet",
|
||||||
|
"Telugu",
|
||||||
|
"Tifinagh",
|
||||||
|
"Tagalog",
|
||||||
|
"Thaana",
|
||||||
|
"Thai",
|
||||||
|
"Tibetan",
|
||||||
|
"Tirhuta",
|
||||||
|
"Tangsa",
|
||||||
|
"Toto",
|
||||||
|
"Ugaritic",
|
||||||
|
"Vai",
|
||||||
|
"Vithkuqi",
|
||||||
|
"Warang_Citi",
|
||||||
|
"Wancho",
|
||||||
|
"Old_Persian",
|
||||||
|
"Cuneiform",
|
||||||
|
"Yezidi",
|
||||||
|
"Yi",
|
||||||
|
"Zanabazar_Square",
|
||||||
|
]
|
||||||
|
|
||||||
# note: should match definitions in Objects/unicodectype.c
|
# note: should match definitions in Objects/unicodectype.c
|
||||||
ALPHA_MASK = 0x01
|
ALPHA_MASK = 0x01
|
||||||
DECIMAL_MASK = 0x02
|
DECIMAL_MASK = 0x02
|
||||||
|
@ -133,7 +301,7 @@ def maketables(trace=0):
|
||||||
|
|
||||||
def makeunicodedata(unicode, trace):
|
def makeunicodedata(unicode, trace):
|
||||||
|
|
||||||
dummy = (0, 0, 0, 0, 0)
|
dummy = (0, 0, 0, 0, 0, 0)
|
||||||
table = [dummy]
|
table = [dummy]
|
||||||
cache = {0: dummy}
|
cache = {0: dummy}
|
||||||
index = [0] * len(unicode.chars)
|
index = [0] * len(unicode.chars)
|
||||||
|
@ -151,8 +319,9 @@ def makeunicodedata(unicode, trace):
|
||||||
bidirectional = BIDIRECTIONAL_NAMES.index(record.bidi_class)
|
bidirectional = BIDIRECTIONAL_NAMES.index(record.bidi_class)
|
||||||
mirrored = record.bidi_mirrored == "Y"
|
mirrored = record.bidi_mirrored == "Y"
|
||||||
eastasianwidth = EASTASIANWIDTH_NAMES.index(record.east_asian_width)
|
eastasianwidth = EASTASIANWIDTH_NAMES.index(record.east_asian_width)
|
||||||
|
script = SCRIPT_NAMES.index(record.script or "Unknown")
|
||||||
item = (
|
item = (
|
||||||
category, combining, bidirectional, mirrored, eastasianwidth,
|
category, combining, bidirectional, mirrored, eastasianwidth, script,
|
||||||
)
|
)
|
||||||
# add entry to index and item tables
|
# add entry to index and item tables
|
||||||
i = cache.get(item)
|
i = cache.get(item)
|
||||||
|
@ -176,7 +345,7 @@ def makeunicodedata(unicode, trace):
|
||||||
fprint("// List of unique database records")
|
fprint("// List of unique database records")
|
||||||
fprint("const ucd_records: [_]ucd_encodedrec = [")
|
fprint("const ucd_records: [_]ucd_encodedrec = [")
|
||||||
for item in table:
|
for item in table:
|
||||||
fprint(" (%d, %d, %d, %d, %d)," % item)
|
fprint(" (%d, %d, %d, %d, %d, %d)," % item)
|
||||||
fprint("];")
|
fprint("];")
|
||||||
fprint()
|
fprint()
|
||||||
|
|
||||||
|
@ -288,9 +457,12 @@ class UcdRecord:
|
||||||
# We store them as a bitmask.
|
# We store them as a bitmask.
|
||||||
quick_check: int
|
quick_check: int
|
||||||
|
|
||||||
|
# From Script.txt
|
||||||
|
script: str
|
||||||
|
|
||||||
|
|
||||||
def from_row(row: List[str]) -> UcdRecord:
|
def from_row(row: List[str]) -> UcdRecord:
|
||||||
return UcdRecord(*row, None, set(), 0)
|
return UcdRecord(*row, None, set(), 0, "Unknown")
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
|
@ -386,6 +558,14 @@ class UnicodeData:
|
||||||
if table[i] is not None:
|
if table[i] is not None:
|
||||||
table[i].east_asian_width = widths[i]
|
table[i].east_asian_width = widths[i]
|
||||||
|
|
||||||
|
scripts = [None] * 0x110000
|
||||||
|
for char, (script,) in UcdFile(SCRIPTS, version).expanded():
|
||||||
|
scripts[char] = script
|
||||||
|
|
||||||
|
for i in range(0, 0x110000):
|
||||||
|
if table[i] is not None:
|
||||||
|
table[i].script = scripts[i]
|
||||||
|
|
||||||
for char, (p,) in UcdFile(DERIVED_CORE_PROPERTIES, version).expanded():
|
for char, (p,) in UcdFile(DERIVED_CORE_PROPERTIES, version).expanded():
|
||||||
if table[char]:
|
if table[char]:
|
||||||
# Some properties (e.g. Default_Ignorable_Code_Point)
|
# Some properties (e.g. Default_Ignorable_Code_Point)
|
||||||
|
|
350
unicode/ucd.ha
350
unicode/ucd.ha
|
@ -1,4 +1,4 @@
|
||||||
type ucd_encodedrec = (u8, u8, u8, u8, u8);
|
type ucd_encodedrec = (u8, u8, u8, u8, u8, u16);
|
||||||
|
|
||||||
type ucd_record = struct {
|
type ucd_record = struct {
|
||||||
category: u8,
|
category: u8,
|
||||||
|
@ -6,11 +6,12 @@ type ucd_record = struct {
|
||||||
bidirectional: u8,
|
bidirectional: u8,
|
||||||
mirrored: u8,
|
mirrored: u8,
|
||||||
east_asian_width: u8,
|
east_asian_width: u8,
|
||||||
|
script: u16,
|
||||||
};
|
};
|
||||||
|
|
||||||
fn get_ucdrecord(rn: rune) *ucd_record = {
|
fn get_ucdrecord(rn: rune) *ucd_record = {
|
||||||
const code = rn: u32;
|
const code = rn: u32;
|
||||||
let index = 0u8;
|
let index = 0u16;
|
||||||
if (code < 0x110000) {
|
if (code < 0x110000) {
|
||||||
index = index1[(code>>UCD_RECORD_SHIFT)];
|
index = index1[(code>>UCD_RECORD_SHIFT)];
|
||||||
index = index2[(index<<UCD_RECORD_SHIFT)+(code&((1<<UCD_RECORD_SHIFT)-1))];
|
index = index2[(index<<UCD_RECORD_SHIFT)+(code&((1<<UCD_RECORD_SHIFT)-1))];
|
||||||
|
@ -92,3 +93,348 @@ export fn gc_code(v: gc) const str = {
|
||||||
case gc::SPACE_SEPARATOR => return "Zs";
|
case gc::SPACE_SEPARATOR => return "Zs";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Unicode character Script attribute.
|
||||||
|
export type script = enum u16 {
|
||||||
|
COMMON, // Zyyy
|
||||||
|
INHERITED, // Zinh
|
||||||
|
UNKNOWN, // Zzzz
|
||||||
|
ADLAM, // Adlm
|
||||||
|
CAUCASIAN_ALBANIAN, // Aghb
|
||||||
|
AHOM, // Ahom
|
||||||
|
ARABIC, // Arab
|
||||||
|
IMPERIAL_ARAMAIC, // Armi
|
||||||
|
ARMENIAN, // Armn
|
||||||
|
AVESTAN, // Avst
|
||||||
|
BALINESE, // Bali
|
||||||
|
BAMUM, // Bamu
|
||||||
|
BASSA_VAH, // Bass
|
||||||
|
BATAK, // Batk
|
||||||
|
BENGALI, // Beng
|
||||||
|
BHAIKSUKI, // Bhks
|
||||||
|
BOPOMOFO, // Bopo
|
||||||
|
BRAHMI, // Brah
|
||||||
|
BRAILLE, // Brai
|
||||||
|
BUGINESE, // Bugi
|
||||||
|
BUHID, // Buhd
|
||||||
|
CHAKMA, // Cakm
|
||||||
|
CANADIAN_SYLLABICS, // Cans
|
||||||
|
CARIAN, // Cari
|
||||||
|
CHAM, // Cham
|
||||||
|
CHEROKEE, // Cher
|
||||||
|
CHORASMIAN, // Chrs
|
||||||
|
COPTIC, // Copt
|
||||||
|
CYPRO_MINOAN, // Cpmn
|
||||||
|
CYPRIOT, // Cprt
|
||||||
|
CYRILLIC, // Cyrl
|
||||||
|
DEVANAGARI, // Deva
|
||||||
|
DIVES_AKURU, // Diak
|
||||||
|
DOGRA, // Dogr
|
||||||
|
DESERET, // Dsrt
|
||||||
|
DUPLOYAN, // Dupl
|
||||||
|
EGYPTIAN_HIEROGLYPHS, // Egyp
|
||||||
|
ELBASAN, // Elba
|
||||||
|
ELYMAIC, // Elym
|
||||||
|
ETHIOPIC, // Ethi
|
||||||
|
GEORGIAN, // Geor
|
||||||
|
GLAGOLITIC, // Glag
|
||||||
|
GUNJALA_GONDI, // Gong
|
||||||
|
MASARAM_GONDI, // Gonm
|
||||||
|
GOTHIC, // Goth
|
||||||
|
GRANTHA, // Gran
|
||||||
|
GREEK, // Grek
|
||||||
|
GUJARATI, // Gujr
|
||||||
|
GURMUKHI, // Guru
|
||||||
|
HANGUL, // Hang
|
||||||
|
HAN, // Hani
|
||||||
|
HANUNOO, // Hano
|
||||||
|
HATRAN, // Hatr
|
||||||
|
HEBREW, // Hebr
|
||||||
|
HIRAGANA, // Hira
|
||||||
|
ANATOLIAN_HIEROGLYPHS, // Hluw
|
||||||
|
PAHAWH_HMONG, // Hmng
|
||||||
|
NYIAKENG_PUACHUE_HMONG, // Hmnp
|
||||||
|
OLD_HUNGARIAN, // Hung
|
||||||
|
OLD_ITALIC, // Ital
|
||||||
|
JAVANESE, // Java
|
||||||
|
KAYAH_LI, // Kali
|
||||||
|
KATAKANA, // Kana
|
||||||
|
KAWI, // Kawi
|
||||||
|
KHAROSHTHI, // Khar
|
||||||
|
KHMER, // Khmr
|
||||||
|
KHOJKI, // Khoj
|
||||||
|
KHITAN_SMALL_SCRIPT, // Kits
|
||||||
|
KANNADA, // Knda
|
||||||
|
KAITHI, // Kthi
|
||||||
|
TAI_THAM, // Lana
|
||||||
|
LAO, // Laoo
|
||||||
|
LATIN, // Latn
|
||||||
|
LEPCHA, // Lepc
|
||||||
|
LIMBU, // Limb
|
||||||
|
LINEAR_A, // Lina
|
||||||
|
LINEAR_B, // Linb
|
||||||
|
LISU, // Lisu
|
||||||
|
LYCIAN, // Lyci
|
||||||
|
LYDIAN, // Lydi
|
||||||
|
MAHAJANI, // Mahj
|
||||||
|
MAKASAR, // Maka
|
||||||
|
MANDAIC, // Mand
|
||||||
|
MANICHAEAN, // Mani
|
||||||
|
MARCHEN, // Marc
|
||||||
|
MEDEFAIDRIN, // Medf
|
||||||
|
MENDE_KIKAKUI, // Mend
|
||||||
|
MEROITIC_CURSIVE, // Merc
|
||||||
|
MEROITIC_HIEROGLYPHS, // Mero
|
||||||
|
MALAYALAM, // Mlym
|
||||||
|
MODI, // Modi
|
||||||
|
MONGOLIAN, // Mong
|
||||||
|
MRO, // Mroo
|
||||||
|
MEETEI_MAYEK, // Mtei
|
||||||
|
MULTANI, // Mult
|
||||||
|
MYANMAR, // Mymr
|
||||||
|
NAG_MUNDARI, // Nagm
|
||||||
|
NANDINAGARI, // Nand
|
||||||
|
OLD_NORTH_ARABIAN, // Narb
|
||||||
|
NABATAEAN, // Nbat
|
||||||
|
NEWA, // Newa
|
||||||
|
NKO, // Nkoo
|
||||||
|
NUSHU, // Nshu
|
||||||
|
OGHAM, // Ogam
|
||||||
|
OL_CHIKI, // Olck
|
||||||
|
OLD_TURKIC, // Orkh
|
||||||
|
ORIYA, // Orya
|
||||||
|
OSAGE, // Osge
|
||||||
|
OSMANYA, // Osma
|
||||||
|
OLD_UYGHUR, // Ougr
|
||||||
|
PALMYRENE, // Palm
|
||||||
|
PAU_CIN_HAU, // Pauc
|
||||||
|
OLD_PERMIC, // Perm
|
||||||
|
PHAGS_PA, // Phag
|
||||||
|
INSCRIPTIONAL_PAHLAVI, // Phli
|
||||||
|
PSALTER_PAHLAVI, // Phlp
|
||||||
|
PHOENICIAN, // Phnx
|
||||||
|
MIAO, // Plrd
|
||||||
|
INSCRIPTIONAL_PARTHIAN, // Prti
|
||||||
|
REJANG, // Rjng
|
||||||
|
HANIFI_ROHINGYA, // Rohg
|
||||||
|
RUNIC, // Runr
|
||||||
|
SAMARITAN, // Samr
|
||||||
|
OLD_SOUTH_ARABIAN, // Sarb
|
||||||
|
SAURASHTRA, // Saur
|
||||||
|
SIGNWRITING, // Sgnw
|
||||||
|
SHAVIAN, // Shaw
|
||||||
|
SHARADA, // Shrd
|
||||||
|
SIDDHAM, // Sidd
|
||||||
|
KHUDAWADI, // Sind
|
||||||
|
SINHALA, // Sinh
|
||||||
|
SOGDIAN, // Sogd
|
||||||
|
OLD_SOGDIAN, // Sogo
|
||||||
|
SORA_SOMPENG, // Sora
|
||||||
|
SOYOMBO, // Soyo
|
||||||
|
SUNDANESE, // Sund
|
||||||
|
SYLOTI_NAGRI, // Sylo
|
||||||
|
SYRIAC, // Syrc
|
||||||
|
TAGBANWA, // Tagb
|
||||||
|
TAKRI, // Takr
|
||||||
|
TAI_LE, // Tale
|
||||||
|
NEW_TAI_LUE, // Talu
|
||||||
|
TAMIL, // Taml
|
||||||
|
TANGUT, // Tang
|
||||||
|
TAI_VIET, // Tavt
|
||||||
|
TELUGU, // Telu
|
||||||
|
TIFINAGH, // Tfng
|
||||||
|
TAGALOG, // Tglg
|
||||||
|
THAANA, // Thaa
|
||||||
|
THAI, // Thai
|
||||||
|
TIBETAN, // Tibt
|
||||||
|
TIRHUTA, // Tirh
|
||||||
|
TANGSA, // Tnsa
|
||||||
|
TOTO, // Toto
|
||||||
|
UGARITIC, // Ugar
|
||||||
|
VAI, // Vaii
|
||||||
|
VITHKUQI, // Vith
|
||||||
|
WARANG_CITI, // Wara
|
||||||
|
WANCHO, // Wcho
|
||||||
|
OLD_PERSIAN, // Xpeo
|
||||||
|
CUNEIFORM, // Xsux
|
||||||
|
YEZIDI, // Yezi
|
||||||
|
YI, // Yiii
|
||||||
|
ZANABAZAR_SQUARE, // Zanb
|
||||||
|
MATH, // Zmth
|
||||||
|
};
|
||||||
|
|
||||||
|
// Returns the [[general_category]] corresponding to this rune.
|
||||||
|
export fn rune_script(rn: rune) script = {
|
||||||
|
return get_ucdrecord(rn).script: script;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Returns the four-character code associated with a [[script]] value.
|
||||||
|
export fn script_code(sc: script) const str = {
|
||||||
|
switch (sc) {
|
||||||
|
case script::COMMON => return "Zyyy";
|
||||||
|
case script::INHERITED => return "Zinh";
|
||||||
|
case script::UNKNOWN => return "Zzzz";
|
||||||
|
case script::ARABIC => return "Arab";
|
||||||
|
case script::ARMENIAN => return "Armn";
|
||||||
|
case script::BENGALI => return "Beng";
|
||||||
|
case script::CYRILLIC => return "Cyrl";
|
||||||
|
case script::DEVANAGARI => return "Deva";
|
||||||
|
case script::GEORGIAN => return "Geor";
|
||||||
|
case script::GREEK => return "Grek";
|
||||||
|
case script::GUJARATI => return "Gujr";
|
||||||
|
case script::GURMUKHI => return "Guru";
|
||||||
|
case script::HANGUL => return "Hang";
|
||||||
|
case script::HAN => return "Hani";
|
||||||
|
case script::HEBREW => return "Hebr";
|
||||||
|
case script::HIRAGANA => return "Hira";
|
||||||
|
case script::KANNADA => return "Knda";
|
||||||
|
case script::KATAKANA => return "Kana";
|
||||||
|
case script::LAO => return "Laoo";
|
||||||
|
case script::LATIN => return "Latn";
|
||||||
|
case script::MALAYALAM => return "Mlym";
|
||||||
|
case script::ORIYA => return "Orya";
|
||||||
|
case script::TAMIL => return "Taml";
|
||||||
|
case script::TELUGU => return "Telu";
|
||||||
|
case script::THAI => return "Thai";
|
||||||
|
case script::TIBETAN => return "Tibt";
|
||||||
|
case script::BOPOMOFO => return "Bopo";
|
||||||
|
case script::BRAILLE => return "Brai";
|
||||||
|
case script::CANADIAN_SYLLABICS => return "Cans";
|
||||||
|
case script::CHEROKEE => return "Cher";
|
||||||
|
case script::ETHIOPIC => return "Ethi";
|
||||||
|
case script::KHMER => return "Khmr";
|
||||||
|
case script::MONGOLIAN => return "Mong";
|
||||||
|
case script::MYANMAR => return "Mymr";
|
||||||
|
case script::OGHAM => return "Ogam";
|
||||||
|
case script::RUNIC => return "Runr";
|
||||||
|
case script::SINHALA => return "Sinh";
|
||||||
|
case script::SYRIAC => return "Syrc";
|
||||||
|
case script::THAANA => return "Thaa";
|
||||||
|
case script::YI => return "Yiii";
|
||||||
|
case script::DESERET => return "Dsrt";
|
||||||
|
case script::GOTHIC => return "Goth";
|
||||||
|
case script::OLD_ITALIC => return "Ital";
|
||||||
|
case script::BUHID => return "Buhd";
|
||||||
|
case script::HANUNOO => return "Hano";
|
||||||
|
case script::TAGALOG => return "Tglg";
|
||||||
|
case script::TAGBANWA => return "Tagb";
|
||||||
|
case script::CYPRIOT => return "Cprt";
|
||||||
|
case script::LIMBU => return "Limb";
|
||||||
|
case script::LINEAR_B => return "Linb";
|
||||||
|
case script::OSMANYA => return "Osma";
|
||||||
|
case script::SHAVIAN => return "Shaw";
|
||||||
|
case script::TAI_LE => return "Tale";
|
||||||
|
case script::UGARITIC => return "Ugar";
|
||||||
|
case script::BUGINESE => return "Bugi";
|
||||||
|
case script::COPTIC => return "Copt";
|
||||||
|
case script::GLAGOLITIC => return "Glag";
|
||||||
|
case script::KHAROSHTHI => return "Khar";
|
||||||
|
case script::NEW_TAI_LUE => return "Talu";
|
||||||
|
case script::OLD_PERSIAN => return "Xpeo";
|
||||||
|
case script::SYLOTI_NAGRI => return "Sylo";
|
||||||
|
case script::TIFINAGH => return "Tfng";
|
||||||
|
case script::BALINESE => return "Bali";
|
||||||
|
case script::CUNEIFORM => return "Xsux";
|
||||||
|
case script::NKO => return "Nkoo";
|
||||||
|
case script::PHAGS_PA => return "Phag";
|
||||||
|
case script::PHOENICIAN => return "Phnx";
|
||||||
|
case script::CARIAN => return "Cari";
|
||||||
|
case script::CHAM => return "Cham";
|
||||||
|
case script::KAYAH_LI => return "Kali";
|
||||||
|
case script::LEPCHA => return "Lepc";
|
||||||
|
case script::LYCIAN => return "Lyci";
|
||||||
|
case script::LYDIAN => return "Lydi";
|
||||||
|
case script::OL_CHIKI => return "Olck";
|
||||||
|
case script::REJANG => return "Rjng";
|
||||||
|
case script::SAURASHTRA => return "Saur";
|
||||||
|
case script::SUNDANESE => return "Sund";
|
||||||
|
case script::VAI => return "Vaii";
|
||||||
|
case script::AVESTAN => return "Avst";
|
||||||
|
case script::BAMUM => return "Bamu";
|
||||||
|
case script::EGYPTIAN_HIEROGLYPHS => return "Egyp";
|
||||||
|
case script::IMPERIAL_ARAMAIC => return "Armi";
|
||||||
|
case script::INSCRIPTIONAL_PAHLAVI => return "Phli";
|
||||||
|
case script::INSCRIPTIONAL_PARTHIAN => return "Prti";
|
||||||
|
case script::JAVANESE => return "Java";
|
||||||
|
case script::KAITHI => return "Kthi";
|
||||||
|
case script::LISU => return "Lisu";
|
||||||
|
case script::MEETEI_MAYEK => return "Mtei";
|
||||||
|
case script::OLD_SOUTH_ARABIAN => return "Sarb";
|
||||||
|
case script::OLD_TURKIC => return "Orkh";
|
||||||
|
case script::SAMARITAN => return "Samr";
|
||||||
|
case script::TAI_THAM => return "Lana";
|
||||||
|
case script::TAI_VIET => return "Tavt";
|
||||||
|
case script::BATAK => return "Batk";
|
||||||
|
case script::BRAHMI => return "Brah";
|
||||||
|
case script::MANDAIC => return "Mand";
|
||||||
|
case script::CHAKMA => return "Cakm";
|
||||||
|
case script::MEROITIC_CURSIVE => return "Merc";
|
||||||
|
case script::MEROITIC_HIEROGLYPHS => return "Mero";
|
||||||
|
case script::MIAO => return "Plrd";
|
||||||
|
case script::SHARADA => return "Shrd";
|
||||||
|
case script::SORA_SOMPENG => return "Sora";
|
||||||
|
case script::TAKRI => return "Takr";
|
||||||
|
case script::BASSA_VAH => return "Bass";
|
||||||
|
case script::CAUCASIAN_ALBANIAN => return "Aghb";
|
||||||
|
case script::DUPLOYAN => return "Dupl";
|
||||||
|
case script::ELBASAN => return "Elba";
|
||||||
|
case script::GRANTHA => return "Gran";
|
||||||
|
case script::KHOJKI => return "Khoj";
|
||||||
|
case script::KHUDAWADI => return "Sind";
|
||||||
|
case script::LINEAR_A => return "Lina";
|
||||||
|
case script::MAHAJANI => return "Mahj";
|
||||||
|
case script::MANICHAEAN => return "Mani";
|
||||||
|
case script::MENDE_KIKAKUI => return "Mend";
|
||||||
|
case script::MODI => return "Modi";
|
||||||
|
case script::MRO => return "Mroo";
|
||||||
|
case script::NABATAEAN => return "Nbat";
|
||||||
|
case script::OLD_NORTH_ARABIAN => return "Narb";
|
||||||
|
case script::OLD_PERMIC => return "Perm";
|
||||||
|
case script::PAHAWH_HMONG => return "Hmng";
|
||||||
|
case script::PALMYRENE => return "Palm";
|
||||||
|
case script::PAU_CIN_HAU => return "Pauc";
|
||||||
|
case script::PSALTER_PAHLAVI => return "Phlp";
|
||||||
|
case script::SIDDHAM => return "Sidd";
|
||||||
|
case script::TIRHUTA => return "Tirh";
|
||||||
|
case script::WARANG_CITI => return "Wara";
|
||||||
|
case script::AHOM => return "Ahom";
|
||||||
|
case script::ANATOLIAN_HIEROGLYPHS => return "Hluw";
|
||||||
|
case script::HATRAN => return "Hatr";
|
||||||
|
case script::MULTANI => return "Mult";
|
||||||
|
case script::OLD_HUNGARIAN => return "Hung";
|
||||||
|
case script::SIGNWRITING => return "Sgnw";
|
||||||
|
case script::ADLAM => return "Adlm";
|
||||||
|
case script::BHAIKSUKI => return "Bhks";
|
||||||
|
case script::MARCHEN => return "Marc";
|
||||||
|
case script::OSAGE => return "Osge";
|
||||||
|
case script::TANGUT => return "Tang";
|
||||||
|
case script::NEWA => return "Newa";
|
||||||
|
case script::MASARAM_GONDI => return "Gonm";
|
||||||
|
case script::NUSHU => return "Nshu";
|
||||||
|
case script::SOYOMBO => return "Soyo";
|
||||||
|
case script::ZANABAZAR_SQUARE => return "Zanb";
|
||||||
|
case script::DOGRA => return "Dogr";
|
||||||
|
case script::GUNJALA_GONDI => return "Gong";
|
||||||
|
case script::HANIFI_ROHINGYA => return "Rohg";
|
||||||
|
case script::MAKASAR => return "Maka";
|
||||||
|
case script::MEDEFAIDRIN => return "Medf";
|
||||||
|
case script::OLD_SOGDIAN => return "Sogo";
|
||||||
|
case script::SOGDIAN => return "Sogd";
|
||||||
|
case script::ELYMAIC => return "Elym";
|
||||||
|
case script::NANDINAGARI => return "Nand";
|
||||||
|
case script::NYIAKENG_PUACHUE_HMONG => return "Hmnp";
|
||||||
|
case script::WANCHO => return "Wcho";
|
||||||
|
case script::CHORASMIAN => return "Chrs";
|
||||||
|
case script::DIVES_AKURU => return "Diak";
|
||||||
|
case script::KHITAN_SMALL_SCRIPT => return "Kits";
|
||||||
|
case script::YEZIDI => return "Yezi";
|
||||||
|
case script::CYPRO_MINOAN => return "Cpmn";
|
||||||
|
case script::OLD_UYGHUR => return "Ougr";
|
||||||
|
case script::TANGSA => return "Tnsa";
|
||||||
|
case script::TOTO => return "Toto";
|
||||||
|
case script::VITHKUQI => return "Vith";
|
||||||
|
case script::MATH => return "Zmth";
|
||||||
|
case script::KAWI => return "Kawi";
|
||||||
|
case script::NAG_MUNDARI => return "Nagm";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
6460
unicode/ucd_gen.ha
6460
unicode/ucd_gen.ha
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue