unicode::gc: use two-character identifiers

Signed-off-by: Drew DeVault <sir@cmpwn.com>
This commit is contained in:
Drew DeVault 2024-04-16 18:32:11 +02:00
parent 8183289d6f
commit c56f5d40af

View file

@ -22,36 +22,36 @@ fn get_ucdrecord(rn: rune) *ucd_record = {
// Unicode character General_Category attribute // Unicode character General_Category attribute
export type gc = enum u8 { export type gc = enum u8 {
CONTROL, // Cc Cc, // Control
FORMAT, // Cf Cf, // Format
UNASSIGNED, // Cn Cn, // Unassigned
PRIVATE_USE, // Co Co, // Private use
SURROGATE, // Cs Cs, // Surrogate
LOWERCASE_LETTER, // Ll Ll, // Lowercase letter
MODIFIER_LETTER, // Lm Lm, // Modifier letter
OTHER_LETTER, // Lo Lo, // Other letter
TITLECASE_LETTER, // Lt Lt, // Titlecase letter
UPPERCASE_LETTER, // Lu Lu, // Uppercase letter
SPACING_MARK, // Mc Mc, // Spacing mark
ENCLOSING_MARK, // Me Me, // Enclosing mark
NON_SPACING_MARK, // Mn Mn, // Non-spacing mark
DECIMAL_NUMBER, // Nd Nd, // Decimal number
LETTER_NUMBER, // Nl Nl, // Letter number
OTHER_NUMBER, // No No, // Other number
CONNECT_PUNCTUATION, // Pc Pc, // Connect punctuation
DASH_PUNCTUATION, // Pd Pd, // Dash punctuation
CLOSE_PUNCTUATION, // Pe Pe, // Close punctuation
FINAL_PUNCTUATION, // Pf Pf, // Final punctuation
INITIAL_PUNCTUATION, // Pi Pi, // Initial punctuation
OTHER_PUNCTUATION, // Po Po, // Other punctuation
OPEN_PUNCTUATION, // Ps Ps, // Open punctuation
CURRENCY_SYMBOL, // Sc Sc, // Currency symbol
MODIFIER_SYMBOL, // Sk Sk, // Modifier symbol
MATH_SYMBOL, // Sm Sm, // Math symbol
OTHER_SYMBOL, // So So, // Other symbol
LINE_SEPARATOR, // Zl Zl, // Line separator
PARAGRAPH_SEPARATOR, // Zp Zp, // Paragraph separator
SPACE_SEPARATOR, // Zs Zs, // Space separator
}; };
// Returns the [[general_category]] corresponding to this rune. // Returns the [[general_category]] corresponding to this rune.
@ -59,39 +59,75 @@ export fn rune_gc(rn: rune) gc = {
return get_ucdrecord(rn).category: gc; return get_ucdrecord(rn).category: gc;
}; };
// Returns the name associated with a [[gc]] value.
export fn gc_name(v: gc) const str = {
switch (v) {
case gc::Cc => return "Control";
case gc::Cf => return "Format";
case gc::Cn => return "Unassigned";
case gc::Co => return "Private use";
case gc::Cs => return "Surrogate";
case gc::Ll => return "Lowercase letter";
case gc::Lm => return "Modifier letter";
case gc::Lo => return "Other letter";
case gc::Lt => return "Titlecase letter";
case gc::Lu => return "Uppercase letter";
case gc::Mc => return "Spacing mark";
case gc::Me => return "Enclosing mark";
case gc::Mn => return "Non-spacing mark";
case gc::Nd => return "Decimal number";
case gc::Nl => return "Letter number";
case gc::No => return "Other number";
case gc::Pc => return "Connect punctuation";
case gc::Pd => return "Dash punctuation";
case gc::Pe => return "Close punctuation";
case gc::Pf => return "Final punctuation";
case gc::Pi => return "Initial punctuation";
case gc::Po => return "Other punctuation";
case gc::Ps => return "Open punctuation";
case gc::Sc => return "Currency symbol";
case gc::Sk => return "Modifier symbol";
case gc::Sm => return "Math symbol";
case gc::So => return "Other symbol";
case gc::Zl => return "Line separator";
case gc::Zp => return "Paragraph separator";
case gc::Zs => return "Space separator";
};
};
// Returns the two-character code associated with a [[gc]] value. // Returns the two-character code associated with a [[gc]] value.
export fn gc_code(v: gc) const str = { export fn gc_code(v: gc) const str = {
switch (v) { switch (v) {
case gc::CONTROL => return "Cc"; case gc::Cc => return "Cc";
case gc::FORMAT => return "Cf"; case gc::Cf => return "Cf";
case gc::UNASSIGNED => return "Cn"; case gc::Cn => return "Cn";
case gc::PRIVATE_USE => return "Co"; case gc::Co => return "Co";
case gc::SURROGATE => return "Cs"; case gc::Cs => return "Cs";
case gc::LOWERCASE_LETTER => return "Ll"; case gc::Ll => return "Ll";
case gc::MODIFIER_LETTER => return "Lm"; case gc::Lm => return "Lm";
case gc::OTHER_LETTER => return "Lo"; case gc::Lo => return "Lo";
case gc::TITLECASE_LETTER => return "Lt"; case gc::Lt => return "Lt";
case gc::UPPERCASE_LETTER => return "Lu"; case gc::Lu => return "Lu";
case gc::SPACING_MARK => return "Mc"; case gc::Mc => return "Mc";
case gc::ENCLOSING_MARK => return "Me"; case gc::Me => return "Me";
case gc::NON_SPACING_MARK => return "Mn"; case gc::Mn => return "Mn";
case gc::DECIMAL_NUMBER => return "Nd"; case gc::Nd => return "Nd";
case gc::LETTER_NUMBER => return "Nl"; case gc::Nl => return "Nl";
case gc::OTHER_NUMBER => return "No"; case gc::No => return "No";
case gc::CONNECT_PUNCTUATION => return "Pc"; case gc::Pc => return "Pc";
case gc::DASH_PUNCTUATION => return "Pd"; case gc::Pd => return "Pd";
case gc::CLOSE_PUNCTUATION => return "Pe"; case gc::Pe => return "Pe";
case gc::FINAL_PUNCTUATION => return "Pf"; case gc::Pf => return "Pf";
case gc::INITIAL_PUNCTUATION => return "Pi"; case gc::Pi => return "Pi";
case gc::OTHER_PUNCTUATION => return "Po"; case gc::Po => return "Po";
case gc::OPEN_PUNCTUATION => return "Ps"; case gc::Ps => return "Ps";
case gc::CURRENCY_SYMBOL => return "Sc"; case gc::Sc => return "Sc";
case gc::MODIFIER_SYMBOL => return "Sk"; case gc::Sk => return "Sk";
case gc::MATH_SYMBOL => return "Sm"; case gc::Sm => return "Sm";
case gc::OTHER_SYMBOL => return "So"; case gc::So => return "So";
case gc::LINE_SEPARATOR => return "Zl"; case gc::Zl => return "Zl";
case gc::PARAGRAPH_SEPARATOR => return "Zp"; case gc::Zp => return "Zp";
case gc::SPACE_SEPARATOR => return "Zs"; case gc::Zs => return "Zs";
}; };
}; };