unicode::gc: use two-character identifiers
Signed-off-by: Drew DeVault <sir@cmpwn.com>
This commit is contained in:
parent
8183289d6f
commit
c56f5d40af
1 changed files with 96 additions and 60 deletions
156
unicode/ucd.ha
156
unicode/ucd.ha
|
@ -22,36 +22,36 @@ fn get_ucdrecord(rn: rune) *ucd_record = {
|
|||
|
||||
// Unicode character General_Category attribute
|
||||
export type gc = enum u8 {
|
||||
CONTROL, // Cc
|
||||
FORMAT, // Cf
|
||||
UNASSIGNED, // Cn
|
||||
PRIVATE_USE, // Co
|
||||
SURROGATE, // Cs
|
||||
LOWERCASE_LETTER, // Ll
|
||||
MODIFIER_LETTER, // Lm
|
||||
OTHER_LETTER, // Lo
|
||||
TITLECASE_LETTER, // Lt
|
||||
UPPERCASE_LETTER, // Lu
|
||||
SPACING_MARK, // Mc
|
||||
ENCLOSING_MARK, // Me
|
||||
NON_SPACING_MARK, // Mn
|
||||
DECIMAL_NUMBER, // Nd
|
||||
LETTER_NUMBER, // Nl
|
||||
OTHER_NUMBER, // No
|
||||
CONNECT_PUNCTUATION, // Pc
|
||||
DASH_PUNCTUATION, // Pd
|
||||
CLOSE_PUNCTUATION, // Pe
|
||||
FINAL_PUNCTUATION, // Pf
|
||||
INITIAL_PUNCTUATION, // Pi
|
||||
OTHER_PUNCTUATION, // Po
|
||||
OPEN_PUNCTUATION, // Ps
|
||||
CURRENCY_SYMBOL, // Sc
|
||||
MODIFIER_SYMBOL, // Sk
|
||||
MATH_SYMBOL, // Sm
|
||||
OTHER_SYMBOL, // So
|
||||
LINE_SEPARATOR, // Zl
|
||||
PARAGRAPH_SEPARATOR, // Zp
|
||||
SPACE_SEPARATOR, // Zs
|
||||
Cc, // Control
|
||||
Cf, // Format
|
||||
Cn, // Unassigned
|
||||
Co, // Private use
|
||||
Cs, // Surrogate
|
||||
Ll, // Lowercase letter
|
||||
Lm, // Modifier letter
|
||||
Lo, // Other letter
|
||||
Lt, // Titlecase letter
|
||||
Lu, // Uppercase letter
|
||||
Mc, // Spacing mark
|
||||
Me, // Enclosing mark
|
||||
Mn, // Non-spacing mark
|
||||
Nd, // Decimal number
|
||||
Nl, // Letter number
|
||||
No, // Other number
|
||||
Pc, // Connect punctuation
|
||||
Pd, // Dash punctuation
|
||||
Pe, // Close punctuation
|
||||
Pf, // Final punctuation
|
||||
Pi, // Initial punctuation
|
||||
Po, // Other punctuation
|
||||
Ps, // Open punctuation
|
||||
Sc, // Currency symbol
|
||||
Sk, // Modifier symbol
|
||||
Sm, // Math symbol
|
||||
So, // Other symbol
|
||||
Zl, // Line separator
|
||||
Zp, // Paragraph separator
|
||||
Zs, // Space separator
|
||||
};
|
||||
|
||||
// Returns the [[general_category]] corresponding to this rune.
|
||||
|
@ -59,39 +59,75 @@ export fn rune_gc(rn: rune) gc = {
|
|||
return get_ucdrecord(rn).category: gc;
|
||||
};
|
||||
|
||||
// Returns the name associated with a [[gc]] value.
|
||||
export fn gc_name(v: gc) const str = {
|
||||
switch (v) {
|
||||
case gc::Cc => return "Control";
|
||||
case gc::Cf => return "Format";
|
||||
case gc::Cn => return "Unassigned";
|
||||
case gc::Co => return "Private use";
|
||||
case gc::Cs => return "Surrogate";
|
||||
case gc::Ll => return "Lowercase letter";
|
||||
case gc::Lm => return "Modifier letter";
|
||||
case gc::Lo => return "Other letter";
|
||||
case gc::Lt => return "Titlecase letter";
|
||||
case gc::Lu => return "Uppercase letter";
|
||||
case gc::Mc => return "Spacing mark";
|
||||
case gc::Me => return "Enclosing mark";
|
||||
case gc::Mn => return "Non-spacing mark";
|
||||
case gc::Nd => return "Decimal number";
|
||||
case gc::Nl => return "Letter number";
|
||||
case gc::No => return "Other number";
|
||||
case gc::Pc => return "Connect punctuation";
|
||||
case gc::Pd => return "Dash punctuation";
|
||||
case gc::Pe => return "Close punctuation";
|
||||
case gc::Pf => return "Final punctuation";
|
||||
case gc::Pi => return "Initial punctuation";
|
||||
case gc::Po => return "Other punctuation";
|
||||
case gc::Ps => return "Open punctuation";
|
||||
case gc::Sc => return "Currency symbol";
|
||||
case gc::Sk => return "Modifier symbol";
|
||||
case gc::Sm => return "Math symbol";
|
||||
case gc::So => return "Other symbol";
|
||||
case gc::Zl => return "Line separator";
|
||||
case gc::Zp => return "Paragraph separator";
|
||||
case gc::Zs => return "Space separator";
|
||||
};
|
||||
};
|
||||
|
||||
// Returns the two-character code associated with a [[gc]] value.
|
||||
export fn gc_code(v: gc) const str = {
|
||||
switch (v) {
|
||||
case gc::CONTROL => return "Cc";
|
||||
case gc::FORMAT => return "Cf";
|
||||
case gc::UNASSIGNED => return "Cn";
|
||||
case gc::PRIVATE_USE => return "Co";
|
||||
case gc::SURROGATE => return "Cs";
|
||||
case gc::LOWERCASE_LETTER => return "Ll";
|
||||
case gc::MODIFIER_LETTER => return "Lm";
|
||||
case gc::OTHER_LETTER => return "Lo";
|
||||
case gc::TITLECASE_LETTER => return "Lt";
|
||||
case gc::UPPERCASE_LETTER => return "Lu";
|
||||
case gc::SPACING_MARK => return "Mc";
|
||||
case gc::ENCLOSING_MARK => return "Me";
|
||||
case gc::NON_SPACING_MARK => return "Mn";
|
||||
case gc::DECIMAL_NUMBER => return "Nd";
|
||||
case gc::LETTER_NUMBER => return "Nl";
|
||||
case gc::OTHER_NUMBER => return "No";
|
||||
case gc::CONNECT_PUNCTUATION => return "Pc";
|
||||
case gc::DASH_PUNCTUATION => return "Pd";
|
||||
case gc::CLOSE_PUNCTUATION => return "Pe";
|
||||
case gc::FINAL_PUNCTUATION => return "Pf";
|
||||
case gc::INITIAL_PUNCTUATION => return "Pi";
|
||||
case gc::OTHER_PUNCTUATION => return "Po";
|
||||
case gc::OPEN_PUNCTUATION => return "Ps";
|
||||
case gc::CURRENCY_SYMBOL => return "Sc";
|
||||
case gc::MODIFIER_SYMBOL => return "Sk";
|
||||
case gc::MATH_SYMBOL => return "Sm";
|
||||
case gc::OTHER_SYMBOL => return "So";
|
||||
case gc::LINE_SEPARATOR => return "Zl";
|
||||
case gc::PARAGRAPH_SEPARATOR => return "Zp";
|
||||
case gc::SPACE_SEPARATOR => return "Zs";
|
||||
case gc::Cc => return "Cc";
|
||||
case gc::Cf => return "Cf";
|
||||
case gc::Cn => return "Cn";
|
||||
case gc::Co => return "Co";
|
||||
case gc::Cs => return "Cs";
|
||||
case gc::Ll => return "Ll";
|
||||
case gc::Lm => return "Lm";
|
||||
case gc::Lo => return "Lo";
|
||||
case gc::Lt => return "Lt";
|
||||
case gc::Lu => return "Lu";
|
||||
case gc::Mc => return "Mc";
|
||||
case gc::Me => return "Me";
|
||||
case gc::Mn => return "Mn";
|
||||
case gc::Nd => return "Nd";
|
||||
case gc::Nl => return "Nl";
|
||||
case gc::No => return "No";
|
||||
case gc::Pc => return "Pc";
|
||||
case gc::Pd => return "Pd";
|
||||
case gc::Pe => return "Pe";
|
||||
case gc::Pf => return "Pf";
|
||||
case gc::Pi => return "Pi";
|
||||
case gc::Po => return "Po";
|
||||
case gc::Ps => return "Ps";
|
||||
case gc::Sc => return "Sc";
|
||||
case gc::Sk => return "Sk";
|
||||
case gc::Sm => return "Sm";
|
||||
case gc::So => return "So";
|
||||
case gc::Zl => return "Zl";
|
||||
case gc::Zp => return "Zp";
|
||||
case gc::Zs => return "Zs";
|
||||
};
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue