type ucd_encodedrec = (u8, u8, u8, u8, u8, u16); type ucd_record = struct { category: u8, combining: u8, bidirectional: u8, mirrored: u8, east_asian_width: u8, script: u16, }; fn get_ucdrecord(rn: rune) *ucd_record = { const code = rn: u32; let index = 0u16; if (code < 0x110000) { index = index1[(code>>UCD_RECORD_SHIFT)]; index = index2[(index< return "Cc"; case gc::FORMAT => return "Cf"; case gc::UNASSIGNED => return "Cn"; case gc::PRIVATE_USE => return "Co"; case gc::SURROGATE => return "Cs"; case gc::LOWERCASE_LETTER => return "Ll"; case gc::MODIFIER_LETTER => return "Lm"; case gc::OTHER_LETTER => return "Lo"; case gc::TITLECASE_LETTER => return "Lt"; case gc::UPPERCASE_LETTER => return "Lu"; case gc::SPACING_MARK => return "Mc"; case gc::ENCLOSING_MARK => return "Me"; case gc::NON_SPACING_MARK => return "Mn"; case gc::DECIMAL_NUMBER => return "Nd"; case gc::LETTER_NUMBER => return "Nl"; case gc::OTHER_NUMBER => return "No"; case gc::CONNECT_PUNCTUATION => return "Pc"; case gc::DASH_PUNCTUATION => return "Pd"; case gc::CLOSE_PUNCTUATION => return "Pe"; case gc::FINAL_PUNCTUATION => return "Pf"; case gc::INITIAL_PUNCTUATION => return "Pi"; case gc::OTHER_PUNCTUATION => return "Po"; case gc::OPEN_PUNCTUATION => return "Ps"; case gc::CURRENCY_SYMBOL => return "Sc"; case gc::MODIFIER_SYMBOL => return "Sk"; case gc::MATH_SYMBOL => return "Sm"; case gc::OTHER_SYMBOL => return "So"; case gc::LINE_SEPARATOR => return "Zl"; case gc::PARAGRAPH_SEPARATOR => return "Zp"; case gc::SPACE_SEPARATOR => return "Zs"; }; }; // Bidirectional classification. export type bidi = enum u8 { UNKNOWN, L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, AN, CS, NSM, BN, B, S, WS, ON, LRI, RLI, FSI, PDI, }; // Returns the [[bidi]] classification corresponding to this rune. export fn rune_bidi(rn: rune) bidi = { return get_ucdrecord(rn).bidirectional: bidi; }; // Unicode character Script attribute. export type script = enum u16 { COMMON, // Zyyy INHERITED, // Zinh UNKNOWN, // Zzzz ADLAM, // Adlm CAUCASIAN_ALBANIAN, // Aghb AHOM, // Ahom ARABIC, // Arab IMPERIAL_ARAMAIC, // Armi ARMENIAN, // Armn AVESTAN, // Avst BALINESE, // Bali BAMUM, // Bamu BASSA_VAH, // Bass BATAK, // Batk BENGALI, // Beng BHAIKSUKI, // Bhks BOPOMOFO, // Bopo BRAHMI, // Brah BRAILLE, // Brai BUGINESE, // Bugi BUHID, // Buhd CHAKMA, // Cakm CANADIAN_SYLLABICS, // Cans CARIAN, // Cari CHAM, // Cham CHEROKEE, // Cher CHORASMIAN, // Chrs COPTIC, // Copt CYPRO_MINOAN, // Cpmn CYPRIOT, // Cprt CYRILLIC, // Cyrl DEVANAGARI, // Deva DIVES_AKURU, // Diak DOGRA, // Dogr DESERET, // Dsrt DUPLOYAN, // Dupl EGYPTIAN_HIEROGLYPHS, // Egyp ELBASAN, // Elba ELYMAIC, // Elym ETHIOPIC, // Ethi GEORGIAN, // Geor GLAGOLITIC, // Glag GUNJALA_GONDI, // Gong MASARAM_GONDI, // Gonm GOTHIC, // Goth GRANTHA, // Gran GREEK, // Grek GUJARATI, // Gujr GURMUKHI, // Guru HANGUL, // Hang HAN, // Hani HANUNOO, // Hano HATRAN, // Hatr HEBREW, // Hebr HIRAGANA, // Hira ANATOLIAN_HIEROGLYPHS, // Hluw PAHAWH_HMONG, // Hmng NYIAKENG_PUACHUE_HMONG, // Hmnp OLD_HUNGARIAN, // Hung OLD_ITALIC, // Ital JAVANESE, // Java KAYAH_LI, // Kali KATAKANA, // Kana KAWI, // Kawi KHAROSHTHI, // Khar KHMER, // Khmr KHOJKI, // Khoj KHITAN_SMALL_SCRIPT, // Kits KANNADA, // Knda KAITHI, // Kthi TAI_THAM, // Lana LAO, // Laoo LATIN, // Latn LEPCHA, // Lepc LIMBU, // Limb LINEAR_A, // Lina LINEAR_B, // Linb LISU, // Lisu LYCIAN, // Lyci LYDIAN, // Lydi MAHAJANI, // Mahj MAKASAR, // Maka MANDAIC, // Mand MANICHAEAN, // Mani MARCHEN, // Marc MEDEFAIDRIN, // Medf MENDE_KIKAKUI, // Mend MEROITIC_CURSIVE, // Merc MEROITIC_HIEROGLYPHS, // Mero MALAYALAM, // Mlym MODI, // Modi MONGOLIAN, // Mong MRO, // Mroo MEETEI_MAYEK, // Mtei MULTANI, // Mult MYANMAR, // Mymr NAG_MUNDARI, // Nagm NANDINAGARI, // Nand OLD_NORTH_ARABIAN, // Narb NABATAEAN, // Nbat NEWA, // Newa NKO, // Nkoo NUSHU, // Nshu OGHAM, // Ogam OL_CHIKI, // Olck OLD_TURKIC, // Orkh ORIYA, // Orya OSAGE, // Osge OSMANYA, // Osma OLD_UYGHUR, // Ougr PALMYRENE, // Palm PAU_CIN_HAU, // Pauc OLD_PERMIC, // Perm PHAGS_PA, // Phag INSCRIPTIONAL_PAHLAVI, // Phli PSALTER_PAHLAVI, // Phlp PHOENICIAN, // Phnx MIAO, // Plrd INSCRIPTIONAL_PARTHIAN, // Prti REJANG, // Rjng HANIFI_ROHINGYA, // Rohg RUNIC, // Runr SAMARITAN, // Samr OLD_SOUTH_ARABIAN, // Sarb SAURASHTRA, // Saur SIGNWRITING, // Sgnw SHAVIAN, // Shaw SHARADA, // Shrd SIDDHAM, // Sidd KHUDAWADI, // Sind SINHALA, // Sinh SOGDIAN, // Sogd OLD_SOGDIAN, // Sogo SORA_SOMPENG, // Sora SOYOMBO, // Soyo SUNDANESE, // Sund SYLOTI_NAGRI, // Sylo SYRIAC, // Syrc TAGBANWA, // Tagb TAKRI, // Takr TAI_LE, // Tale NEW_TAI_LUE, // Talu TAMIL, // Taml TANGUT, // Tang TAI_VIET, // Tavt TELUGU, // Telu TIFINAGH, // Tfng TAGALOG, // Tglg THAANA, // Thaa THAI, // Thai TIBETAN, // Tibt TIRHUTA, // Tirh TANGSA, // Tnsa TOTO, // Toto UGARITIC, // Ugar VAI, // Vaii VITHKUQI, // Vith WARANG_CITI, // Wara WANCHO, // Wcho OLD_PERSIAN, // Xpeo CUNEIFORM, // Xsux YEZIDI, // Yezi YI, // Yiii ZANABAZAR_SQUARE, // Zanb MATH, // Zmth }; // Returns the [[general_category]] corresponding to this rune. export fn rune_script(rn: rune) script = { return get_ucdrecord(rn).script: script; }; // Returns the four-character code associated with a [[script]] value. export fn script_code(sc: script) const str = { switch (sc) { case script::COMMON => return "Zyyy"; case script::INHERITED => return "Zinh"; case script::UNKNOWN => return "Zzzz"; case script::ARABIC => return "Arab"; case script::ARMENIAN => return "Armn"; case script::BENGALI => return "Beng"; case script::CYRILLIC => return "Cyrl"; case script::DEVANAGARI => return "Deva"; case script::GEORGIAN => return "Geor"; case script::GREEK => return "Grek"; case script::GUJARATI => return "Gujr"; case script::GURMUKHI => return "Guru"; case script::HANGUL => return "Hang"; case script::HAN => return "Hani"; case script::HEBREW => return "Hebr"; case script::HIRAGANA => return "Hira"; case script::KANNADA => return "Knda"; case script::KATAKANA => return "Kana"; case script::LAO => return "Laoo"; case script::LATIN => return "Latn"; case script::MALAYALAM => return "Mlym"; case script::ORIYA => return "Orya"; case script::TAMIL => return "Taml"; case script::TELUGU => return "Telu"; case script::THAI => return "Thai"; case script::TIBETAN => return "Tibt"; case script::BOPOMOFO => return "Bopo"; case script::BRAILLE => return "Brai"; case script::CANADIAN_SYLLABICS => return "Cans"; case script::CHEROKEE => return "Cher"; case script::ETHIOPIC => return "Ethi"; case script::KHMER => return "Khmr"; case script::MONGOLIAN => return "Mong"; case script::MYANMAR => return "Mymr"; case script::OGHAM => return "Ogam"; case script::RUNIC => return "Runr"; case script::SINHALA => return "Sinh"; case script::SYRIAC => return "Syrc"; case script::THAANA => return "Thaa"; case script::YI => return "Yiii"; case script::DESERET => return "Dsrt"; case script::GOTHIC => return "Goth"; case script::OLD_ITALIC => return "Ital"; case script::BUHID => return "Buhd"; case script::HANUNOO => return "Hano"; case script::TAGALOG => return "Tglg"; case script::TAGBANWA => return "Tagb"; case script::CYPRIOT => return "Cprt"; case script::LIMBU => return "Limb"; case script::LINEAR_B => return "Linb"; case script::OSMANYA => return "Osma"; case script::SHAVIAN => return "Shaw"; case script::TAI_LE => return "Tale"; case script::UGARITIC => return "Ugar"; case script::BUGINESE => return "Bugi"; case script::COPTIC => return "Copt"; case script::GLAGOLITIC => return "Glag"; case script::KHAROSHTHI => return "Khar"; case script::NEW_TAI_LUE => return "Talu"; case script::OLD_PERSIAN => return "Xpeo"; case script::SYLOTI_NAGRI => return "Sylo"; case script::TIFINAGH => return "Tfng"; case script::BALINESE => return "Bali"; case script::CUNEIFORM => return "Xsux"; case script::NKO => return "Nkoo"; case script::PHAGS_PA => return "Phag"; case script::PHOENICIAN => return "Phnx"; case script::CARIAN => return "Cari"; case script::CHAM => return "Cham"; case script::KAYAH_LI => return "Kali"; case script::LEPCHA => return "Lepc"; case script::LYCIAN => return "Lyci"; case script::LYDIAN => return "Lydi"; case script::OL_CHIKI => return "Olck"; case script::REJANG => return "Rjng"; case script::SAURASHTRA => return "Saur"; case script::SUNDANESE => return "Sund"; case script::VAI => return "Vaii"; case script::AVESTAN => return "Avst"; case script::BAMUM => return "Bamu"; case script::EGYPTIAN_HIEROGLYPHS => return "Egyp"; case script::IMPERIAL_ARAMAIC => return "Armi"; case script::INSCRIPTIONAL_PAHLAVI => return "Phli"; case script::INSCRIPTIONAL_PARTHIAN => return "Prti"; case script::JAVANESE => return "Java"; case script::KAITHI => return "Kthi"; case script::LISU => return "Lisu"; case script::MEETEI_MAYEK => return "Mtei"; case script::OLD_SOUTH_ARABIAN => return "Sarb"; case script::OLD_TURKIC => return "Orkh"; case script::SAMARITAN => return "Samr"; case script::TAI_THAM => return "Lana"; case script::TAI_VIET => return "Tavt"; case script::BATAK => return "Batk"; case script::BRAHMI => return "Brah"; case script::MANDAIC => return "Mand"; case script::CHAKMA => return "Cakm"; case script::MEROITIC_CURSIVE => return "Merc"; case script::MEROITIC_HIEROGLYPHS => return "Mero"; case script::MIAO => return "Plrd"; case script::SHARADA => return "Shrd"; case script::SORA_SOMPENG => return "Sora"; case script::TAKRI => return "Takr"; case script::BASSA_VAH => return "Bass"; case script::CAUCASIAN_ALBANIAN => return "Aghb"; case script::DUPLOYAN => return "Dupl"; case script::ELBASAN => return "Elba"; case script::GRANTHA => return "Gran"; case script::KHOJKI => return "Khoj"; case script::KHUDAWADI => return "Sind"; case script::LINEAR_A => return "Lina"; case script::MAHAJANI => return "Mahj"; case script::MANICHAEAN => return "Mani"; case script::MENDE_KIKAKUI => return "Mend"; case script::MODI => return "Modi"; case script::MRO => return "Mroo"; case script::NABATAEAN => return "Nbat"; case script::OLD_NORTH_ARABIAN => return "Narb"; case script::OLD_PERMIC => return "Perm"; case script::PAHAWH_HMONG => return "Hmng"; case script::PALMYRENE => return "Palm"; case script::PAU_CIN_HAU => return "Pauc"; case script::PSALTER_PAHLAVI => return "Phlp"; case script::SIDDHAM => return "Sidd"; case script::TIRHUTA => return "Tirh"; case script::WARANG_CITI => return "Wara"; case script::AHOM => return "Ahom"; case script::ANATOLIAN_HIEROGLYPHS => return "Hluw"; case script::HATRAN => return "Hatr"; case script::MULTANI => return "Mult"; case script::OLD_HUNGARIAN => return "Hung"; case script::SIGNWRITING => return "Sgnw"; case script::ADLAM => return "Adlm"; case script::BHAIKSUKI => return "Bhks"; case script::MARCHEN => return "Marc"; case script::OSAGE => return "Osge"; case script::TANGUT => return "Tang"; case script::NEWA => return "Newa"; case script::MASARAM_GONDI => return "Gonm"; case script::NUSHU => return "Nshu"; case script::SOYOMBO => return "Soyo"; case script::ZANABAZAR_SQUARE => return "Zanb"; case script::DOGRA => return "Dogr"; case script::GUNJALA_GONDI => return "Gong"; case script::HANIFI_ROHINGYA => return "Rohg"; case script::MAKASAR => return "Maka"; case script::MEDEFAIDRIN => return "Medf"; case script::OLD_SOGDIAN => return "Sogo"; case script::SOGDIAN => return "Sogd"; case script::ELYMAIC => return "Elym"; case script::NANDINAGARI => return "Nand"; case script::NYIAKENG_PUACHUE_HMONG => return "Hmnp"; case script::WANCHO => return "Wcho"; case script::CHORASMIAN => return "Chrs"; case script::DIVES_AKURU => return "Diak"; case script::KHITAN_SMALL_SCRIPT => return "Kits"; case script::YEZIDI => return "Yezi"; case script::CYPRO_MINOAN => return "Cpmn"; case script::OLD_UYGHUR => return "Ougr"; case script::TANGSA => return "Tnsa"; case script::TOTO => return "Toto"; case script::VITHKUQI => return "Vith"; case script::MATH => return "Zmth"; case script::KAWI => return "Kawi"; case script::NAG_MUNDARI => return "Nagm"; }; };