Initial commit
This commit is contained in:
commit
04940cea04
6 changed files with 3915 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
*.zip
|
||||
/.data
|
367
COPYING
Normal file
367
COPYING
Normal file
|
@ -0,0 +1,367 @@
|
|||
Mozilla Public License Version 2.0
|
||||
==================================
|
||||
|
||||
1. Definitions
|
||||
--------------
|
||||
|
||||
1.1. "Contributor"
|
||||
means each individual or legal entity that creates, contributes to
|
||||
the creation of, or owns Covered Software.
|
||||
|
||||
1.2. "Contributor Version"
|
||||
means the combination of the Contributions of others (if any) used
|
||||
by a Contributor and that particular Contributor's Contribution.
|
||||
|
||||
1.3. "Contribution"
|
||||
means Covered Software of a particular Contributor.
|
||||
|
||||
1.4. "Covered Software"
|
||||
means Source Code Form to which the initial Contributor has attached
|
||||
the notice in Exhibit A, the Executable Form of such Source Code
|
||||
Form, and Modifications of such Source Code Form, in each case
|
||||
including portions thereof.
|
||||
|
||||
1.5. "Incompatible With Secondary Licenses"
|
||||
means
|
||||
|
||||
(a) that the initial Contributor has attached the notice described
|
||||
in Exhibit B to the Covered Software; or
|
||||
|
||||
(b) that the Covered Software was made available under the terms of
|
||||
version 1.1 or earlier of the License, but not also under the
|
||||
terms of a Secondary License.
|
||||
|
||||
1.6. "Executable Form"
|
||||
means any form of the work other than Source Code Form.
|
||||
|
||||
1.7. "Larger Work"
|
||||
means a work that combines Covered Software with other material, in
|
||||
a separate file or files, that is not Covered Software.
|
||||
|
||||
1.8. "License"
|
||||
means this document.
|
||||
|
||||
1.9. "Licensable"
|
||||
means having the right to grant, to the maximum extent possible,
|
||||
whether at the time of the initial grant or subsequently, any and
|
||||
all of the rights conveyed by this License.
|
||||
|
||||
1.10. "Modifications"
|
||||
means any of the following:
|
||||
|
||||
(a) any file in Source Code Form that results from an addition to,
|
||||
deletion from, or modification of the contents of Covered
|
||||
Software; or
|
||||
|
||||
(b) any new file in Source Code Form that contains any Covered
|
||||
Software.
|
||||
|
||||
1.11. "Patent Claims" of a Contributor
|
||||
means any patent claim(s), including without limitation, method,
|
||||
process, and apparatus claims, in any patent Licensable by such
|
||||
Contributor that would be infringed, but for the grant of the
|
||||
License, by the making, using, selling, offering for sale, having
|
||||
made, import, or transfer of either its Contributions or its
|
||||
Contributor Version.
|
||||
|
||||
1.12. "Secondary License"
|
||||
means either the GNU General Public License, Version 2.0, the GNU
|
||||
Lesser General Public License, Version 2.1, the GNU Affero General
|
||||
Public License, Version 3.0, or any later versions of those
|
||||
licenses.
|
||||
|
||||
1.13. "Source Code Form"
|
||||
means the form of the work preferred for making modifications.
|
||||
|
||||
1.14. "You" (or "Your")
|
||||
means an individual or a legal entity exercising rights under this
|
||||
License. For legal entities, "You" includes any entity that
|
||||
controls, is controlled by, or is under common control with You. For
|
||||
purposes of this definition, "control" means (a) the power, direct
|
||||
or indirect, to cause the direction or management of such entity,
|
||||
whether by contract or otherwise, or (b) ownership of more than
|
||||
fifty percent (50%) of the outstanding shares or beneficial
|
||||
ownership of such entity.
|
||||
|
||||
2. License Grants and Conditions
|
||||
--------------------------------
|
||||
|
||||
2.1. Grants
|
||||
|
||||
Each Contributor hereby grants You a world-wide, royalty-free,
|
||||
non-exclusive license:
|
||||
|
||||
(a) under intellectual property rights (other than patent or trademark)
|
||||
Licensable by such Contributor to use, reproduce, make available,
|
||||
modify, display, perform, distribute, and otherwise exploit its
|
||||
Contributions, either on an unmodified basis, with Modifications, or
|
||||
as part of a Larger Work; and
|
||||
|
||||
(b) under Patent Claims of such Contributor to make, use, sell, offer
|
||||
for sale, have made, import, and otherwise transfer either its
|
||||
Contributions or its Contributor Version.
|
||||
|
||||
2.2. Effective Date
|
||||
|
||||
The licenses granted in Section 2.1 with respect to any Contribution
|
||||
become effective for each Contribution on the date the Contributor first
|
||||
distributes such Contribution.
|
||||
|
||||
2.3. Limitations on Grant Scope
|
||||
|
||||
The licenses granted in this Section 2 are the only rights granted under
|
||||
this License. No additional rights or licenses will be implied from the
|
||||
distribution or licensing of Covered Software under this License.
|
||||
Notwithstanding Section 2.1(b) above, no patent license is granted by a
|
||||
Contributor:
|
||||
|
||||
(a) for any code that a Contributor has removed from Covered Software;
|
||||
or
|
||||
|
||||
(b) for infringements caused by: (i) Your and any other third party's
|
||||
modifications of Covered Software, or (ii) the combination of its
|
||||
Contributions with other software (except as part of its Contributor
|
||||
Version); or
|
||||
|
||||
(c) under Patent Claims infringed by Covered Software in the absence of
|
||||
its Contributions.
|
||||
|
||||
This License does not grant any rights in the trademarks, service marks,
|
||||
or logos of any Contributor (except as may be necessary to comply with
|
||||
the notice requirements in Section 3.4).
|
||||
|
||||
2.4. Subsequent Licenses
|
||||
|
||||
No Contributor makes additional grants as a result of Your choice to
|
||||
distribute the Covered Software under a subsequent version of this
|
||||
License (see Section 10.2) or under the terms of a Secondary License (if
|
||||
permitted under the terms of Section 3.3).
|
||||
|
||||
2.5. Representation
|
||||
|
||||
Each Contributor represents that the Contributor believes its
|
||||
Contributions are its original creation(s) or it has sufficient rights
|
||||
to grant the rights to its Contributions conveyed by this License.
|
||||
|
||||
2.6. Fair Use
|
||||
|
||||
This License is not intended to limit any rights You have under
|
||||
applicable copyright doctrines of fair use, fair dealing, or other
|
||||
equivalents.
|
||||
|
||||
2.7. Conditions
|
||||
|
||||
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
|
||||
in Section 2.1.
|
||||
|
||||
3. Responsibilities
|
||||
-------------------
|
||||
|
||||
3.1. Distribution of Source Form
|
||||
|
||||
All distribution of Covered Software in Source Code Form, including any
|
||||
Modifications that You create or to which You contribute, must be under
|
||||
the terms of this License. You must inform recipients that the Source
|
||||
Code Form of the Covered Software is governed by the terms of this
|
||||
License, and how they can obtain a copy of this License. You may not
|
||||
attempt to alter or restrict the recipients' rights in the Source Code
|
||||
Form.
|
||||
|
||||
3.2. Distribution of Executable Form
|
||||
|
||||
If You distribute Covered Software in Executable Form then:
|
||||
|
||||
(a) such Covered Software must also be made available in Source Code
|
||||
Form, as described in Section 3.1, and You must inform recipients of
|
||||
the Executable Form how they can obtain a copy of such Source Code
|
||||
Form by reasonable means in a timely manner, at a charge no more
|
||||
than the cost of distribution to the recipient; and
|
||||
|
||||
(b) You may distribute such Executable Form under the terms of this
|
||||
License, or sublicense it under different terms, provided that the
|
||||
license for the Executable Form does not attempt to limit or alter
|
||||
the recipients' rights in the Source Code Form under this License.
|
||||
|
||||
3.3. Distribution of a Larger Work
|
||||
|
||||
You may create and distribute a Larger Work under terms of Your choice,
|
||||
provided that You also comply with the requirements of this License for
|
||||
the Covered Software. If the Larger Work is a combination of Covered
|
||||
Software with a work governed by one or more Secondary Licenses, and the
|
||||
Covered Software is not Incompatible With Secondary Licenses, this
|
||||
License permits You to additionally distribute such Covered Software
|
||||
under the terms of such Secondary License(s), so that the recipient of
|
||||
the Larger Work may, at their option, further distribute the Covered
|
||||
Software under the terms of either this License or such Secondary
|
||||
License(s).
|
||||
|
||||
3.4. Notices
|
||||
|
||||
You may not remove or alter the substance of any license notices
|
||||
(including copyright notices, patent notices, disclaimers of warranty,
|
||||
or limitations of liability) contained within the Source Code Form of
|
||||
the Covered Software, except that You may alter any license notices to
|
||||
the extent required to remedy known factual inaccuracies.
|
||||
|
||||
3.5. Application of Additional Terms
|
||||
|
||||
You may choose to offer, and to charge a fee for, warranty, support,
|
||||
indemnity or liability obligations to one or more recipients of Covered
|
||||
Software. However, You may do so only on Your own behalf, and not on
|
||||
behalf of any Contributor. You must make it absolutely clear that any
|
||||
such warranty, support, indemnity, or liability obligation is offered by
|
||||
You alone, and You hereby agree to indemnify every Contributor for any
|
||||
liability incurred by such Contributor as a result of warranty, support,
|
||||
indemnity or liability terms You offer. You may include additional
|
||||
disclaimers of warranty and limitations of liability specific to any
|
||||
jurisdiction.
|
||||
|
||||
4. Inability to Comply Due to Statute or Regulation
|
||||
---------------------------------------------------
|
||||
|
||||
If it is impossible for You to comply with any of the terms of this
|
||||
License with respect to some or all of the Covered Software due to
|
||||
statute, judicial order, or regulation then You must: (a) comply with
|
||||
the terms of this License to the maximum extent possible; and (b)
|
||||
describe the limitations and the code they affect. Such description must
|
||||
be placed in a text file included with all distributions of the Covered
|
||||
Software under this License. Except to the extent prohibited by statute
|
||||
or regulation, such description must be sufficiently detailed for a
|
||||
recipient of ordinary skill to be able to understand it.
|
||||
|
||||
5. Termination
|
||||
--------------
|
||||
|
||||
5.1. The rights granted under this License will terminate automatically
|
||||
if You fail to comply with any of its terms. However, if You become
|
||||
compliant, then the rights granted under this License from a particular
|
||||
Contributor are reinstated (a) provisionally, unless and until such
|
||||
Contributor explicitly and finally terminates Your grants, and (b) on an
|
||||
ongoing basis, if such Contributor fails to notify You of the
|
||||
non-compliance by some reasonable means prior to 60 days after You have
|
||||
come back into compliance. Moreover, Your grants from a particular
|
||||
Contributor are reinstated on an ongoing basis if such Contributor
|
||||
notifies You of the non-compliance by some reasonable means, this is the
|
||||
first time You have received notice of non-compliance with this License
|
||||
from such Contributor, and You become compliant prior to 30 days after
|
||||
Your receipt of the notice.
|
||||
|
||||
5.2. If You initiate litigation against any entity by asserting a patent
|
||||
infringement claim (excluding declaratory judgment actions,
|
||||
counter-claims, and cross-claims) alleging that a Contributor Version
|
||||
directly or indirectly infringes any patent, then the rights granted to
|
||||
You by any and all Contributors for the Covered Software under Section
|
||||
2.1 of this License shall terminate.
|
||||
|
||||
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
|
||||
end user license agreements (excluding distributors and resellers) which
|
||||
have been validly granted by You or Your distributors under this License
|
||||
prior to termination shall survive termination.
|
||||
|
||||
************************************************************************
|
||||
* *
|
||||
* 6. Disclaimer of Warranty *
|
||||
* ------------------------- *
|
||||
* *
|
||||
* Covered Software is provided under this License on an "as is" *
|
||||
* basis, without warranty of any kind, either expressed, implied, or *
|
||||
* statutory, including, without limitation, warranties that the *
|
||||
* Covered Software is free of defects, merchantable, fit for a *
|
||||
* particular purpose or non-infringing. The entire risk as to the *
|
||||
* quality and performance of the Covered Software is with You. *
|
||||
* Should any Covered Software prove defective in any respect, You *
|
||||
* (not any Contributor) assume the cost of any necessary servicing, *
|
||||
* repair, or correction. This disclaimer of warranty constitutes an *
|
||||
* essential part of this License. No use of any Covered Software is *
|
||||
* authorized under this License except under this disclaimer. *
|
||||
* *
|
||||
************************************************************************
|
||||
|
||||
************************************************************************
|
||||
* *
|
||||
* 7. Limitation of Liability *
|
||||
* -------------------------- *
|
||||
* *
|
||||
* Under no circumstances and under no legal theory, whether tort *
|
||||
* (including negligence), contract, or otherwise, shall any *
|
||||
* Contributor, or anyone who distributes Covered Software as *
|
||||
* permitted above, be liable to You for any direct, indirect, *
|
||||
* special, incidental, or consequential damages of any character *
|
||||
* including, without limitation, damages for lost profits, loss of *
|
||||
* goodwill, work stoppage, computer failure or malfunction, or any *
|
||||
* and all other commercial damages or losses, even if such party *
|
||||
* shall have been informed of the possibility of such damages. This *
|
||||
* limitation of liability shall not apply to liability for death or *
|
||||
* personal injury resulting from such party's negligence to the *
|
||||
* extent applicable law prohibits such limitation. Some *
|
||||
* jurisdictions do not allow the exclusion or limitation of *
|
||||
* incidental or consequential damages, so this exclusion and *
|
||||
* limitation may not apply to You. *
|
||||
* *
|
||||
************************************************************************
|
||||
|
||||
8. Litigation
|
||||
-------------
|
||||
|
||||
Any litigation relating to this License may be brought only in the
|
||||
courts of a jurisdiction where the defendant maintains its principal
|
||||
place of business and such litigation shall be governed by laws of that
|
||||
jurisdiction, without reference to its conflict-of-law provisions.
|
||||
Nothing in this Section shall prevent a party's ability to bring
|
||||
cross-claims or counter-claims.
|
||||
|
||||
9. Miscellaneous
|
||||
----------------
|
||||
|
||||
This License represents the complete agreement concerning the subject
|
||||
matter hereof. If any provision of this License is held to be
|
||||
unenforceable, such provision shall be reformed only to the extent
|
||||
necessary to make it enforceable. Any law or regulation which provides
|
||||
that the language of a contract shall be construed against the drafter
|
||||
shall not be used to construe this License against a Contributor.
|
||||
|
||||
10. Versions of the License
|
||||
---------------------------
|
||||
|
||||
10.1. New Versions
|
||||
|
||||
Mozilla Foundation is the license steward. Except as provided in Section
|
||||
10.3, no one other than the license steward has the right to modify or
|
||||
publish new versions of this License. Each version will be given a
|
||||
distinguishing version number.
|
||||
|
||||
10.2. Effect of New Versions
|
||||
|
||||
You may distribute the Covered Software under the terms of the version
|
||||
of the License under which You originally received the Covered Software,
|
||||
or under the terms of any subsequent version published by the license
|
||||
steward.
|
||||
|
||||
10.3. Modified Versions
|
||||
|
||||
If you create software not governed by this License, and you want to
|
||||
create a new license for such software, you may create and use a
|
||||
modified version of this License if you rename the license and remove
|
||||
any references to the name of the license steward (except to note that
|
||||
such modified license differs from this License).
|
||||
|
||||
10.4. Distributing Source Code Form that is Incompatible With Secondary
|
||||
Licenses
|
||||
|
||||
If You choose to distribute Source Code Form that is Incompatible With
|
||||
Secondary Licenses under the terms of this version of the License, the
|
||||
notice described in Exhibit B of this License must be attached.
|
||||
|
||||
Exhibit A - Source Code Form License Notice
|
||||
-------------------------------------------
|
||||
|
||||
This Source Code Form is subject to the terms of the Mozilla Public
|
||||
License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
If it is not possible or desirable to put the notice in a particular
|
||||
file, then You may include the notice in a location (such as a LICENSE
|
||||
file in a relevant directory) where a recipient would be likely to look
|
||||
for such a notice.
|
||||
|
||||
You may add additional accurate notices of copyright ownership.
|
18
cmd/ucdtest/main.ha
Normal file
18
cmd/ucdtest/main.ha
Normal file
|
@ -0,0 +1,18 @@
|
|||
use fmt;
|
||||
use os;
|
||||
use strings;
|
||||
use unicode;
|
||||
|
||||
export fn main() void = {
|
||||
const in = os::args[1];
|
||||
const iter = strings::iter(in);
|
||||
for (true) {
|
||||
const rn = match (strings::next(&iter)) {
|
||||
case let rn: rune =>
|
||||
yield rn;
|
||||
case => break;
|
||||
};
|
||||
const gc = unicode::rune_gc(rn);
|
||||
fmt::printfln("'{}'/{:x}: {}", rn, rn: u32, unicode::gc_code(gc))!;
|
||||
};
|
||||
};
|
665
scripts/gen-ucd.py
Executable file
665
scripts/gen-ucd.py
Executable file
|
@ -0,0 +1,665 @@
|
|||
#!/usr/bin/python3
|
||||
# Based on CPython's unicodedata generation script,
|
||||
# Tools/unicode/makeunicodedata.py, forked and adapted for Hare
|
||||
#
|
||||
# PSF License
|
||||
#
|
||||
# (re)generate unicode property and type databases
|
||||
#
|
||||
# This script converts Unicode database files to Modules/unicodedata_db.h,
|
||||
# Modules/unicodename_db.h, and Objects/unicodetype_db.h
|
||||
#
|
||||
# history:
|
||||
# 2000-09-24 fl created (based on bits and pieces from unidb)
|
||||
# 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table
|
||||
# 2000-09-25 fl added character type table
|
||||
# 2000-09-26 fl added LINEBREAK, DECIMAL, and DIGIT flags/fields (2.0)
|
||||
# 2000-11-03 fl expand first/last ranges
|
||||
# 2001-01-19 fl added character name tables (2.1)
|
||||
# 2001-01-21 fl added decomp compression; dynamic phrasebook threshold
|
||||
# 2002-09-11 wd use string methods
|
||||
# 2002-10-18 mvl update to Unicode 3.2
|
||||
# 2002-10-22 mvl generate NFC tables
|
||||
# 2002-11-24 mvl expand all ranges, sort names version-independently
|
||||
# 2002-11-25 mvl add UNIDATA_VERSION
|
||||
# 2004-05-29 perky add east asian width information
|
||||
# 2006-03-10 mvl update to Unicode 4.1; add UCD 3.2 delta
|
||||
# 2008-06-11 gb add PRINTABLE_MASK for Atsuo Ishimoto's ascii() patch
|
||||
# 2011-10-21 ezio add support for name aliases and named sequences
|
||||
# 2012-01 benjamin add full case mappings
|
||||
#
|
||||
# written by Fredrik Lundh (fredrik@pythonware.com)
|
||||
#
|
||||
|
||||
import dataclasses
|
||||
import os
|
||||
import sys
|
||||
import zipfile
|
||||
|
||||
from functools import partial
|
||||
from textwrap import dedent
|
||||
from typing import Iterator, List, Optional, Set, Tuple
|
||||
|
||||
SCRIPT = sys.argv[0]
|
||||
VERSION = "3.3"
|
||||
|
||||
# The Unicode Database
|
||||
# --------------------
|
||||
# When changing UCD version please update
|
||||
# * Doc/library/stdtypes.rst, and
|
||||
# * Doc/library/unicodedata.rst
|
||||
# * Doc/reference/lexical_analysis.rst (two occurrences)
|
||||
UNIDATA_VERSION = "13.0.0"
|
||||
UNICODE_DATA = "UnicodeData%s.txt"
|
||||
COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
|
||||
EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
|
||||
UNIHAN = "Unihan%s.zip"
|
||||
DERIVED_CORE_PROPERTIES = "DerivedCoreProperties%s.txt"
|
||||
DERIVEDNORMALIZATION_PROPS = "DerivedNormalizationProps%s.txt"
|
||||
LINE_BREAK = "LineBreak%s.txt"
|
||||
NAME_ALIASES = "NameAliases%s.txt"
|
||||
NAMED_SEQUENCES = "NamedSequences%s.txt"
|
||||
SPECIAL_CASING = "SpecialCasing%s.txt"
|
||||
CASE_FOLDING = "CaseFolding%s.txt"
|
||||
|
||||
# Private Use Areas -- in planes 1, 15, 16
|
||||
PUA_1 = range(0xE000, 0xF900)
|
||||
PUA_15 = range(0xF0000, 0xFFFFE)
|
||||
PUA_16 = range(0x100000, 0x10FFFE)
|
||||
|
||||
# we use this ranges of PUA_15 to store name aliases and named sequences
|
||||
NAME_ALIASES_START = 0xF0000
|
||||
NAMED_SEQUENCES_START = 0xF0200
|
||||
|
||||
old_versions = []
|
||||
|
||||
# Order must match ucd.ha
|
||||
CATEGORY_NAMES = [
|
||||
"Cc", "Cf", "Cn", "Co", "Cs", "Ll", "Lm", "Lo", "Lt", "Lu", "Mc", "Me",
|
||||
"Mn", "Nd", "Nl", "No", "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps", "Sc",
|
||||
"Sk", "Sm", "So", "Zl", "Zp", "Zs",
|
||||
]
|
||||
|
||||
BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO",
|
||||
"PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS",
|
||||
"ON", "LRI", "RLI", "FSI", "PDI" ]
|
||||
|
||||
EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ]
|
||||
|
||||
MANDATORY_LINE_BREAKS = [ "BK", "CR", "LF", "NL" ]
|
||||
|
||||
# note: should match definitions in Objects/unicodectype.c
|
||||
ALPHA_MASK = 0x01
|
||||
DECIMAL_MASK = 0x02
|
||||
DIGIT_MASK = 0x04
|
||||
LOWER_MASK = 0x08
|
||||
LINEBREAK_MASK = 0x10
|
||||
SPACE_MASK = 0x20
|
||||
TITLE_MASK = 0x40
|
||||
UPPER_MASK = 0x80
|
||||
XID_START_MASK = 0x100
|
||||
XID_CONTINUE_MASK = 0x200
|
||||
PRINTABLE_MASK = 0x400
|
||||
NUMERIC_MASK = 0x800
|
||||
CASE_IGNORABLE_MASK = 0x1000
|
||||
CASED_MASK = 0x2000
|
||||
EXTENDED_CASE_MASK = 0x4000
|
||||
|
||||
# these ranges need to match unicodedata.c:is_unified_ideograph
|
||||
cjk_ranges = [
|
||||
('3400', '4DBF'),
|
||||
('4E00', '9FFC'),
|
||||
('20000', '2A6DD'),
|
||||
('2A700', '2B734'),
|
||||
('2B740', '2B81D'),
|
||||
('2B820', '2CEA1'),
|
||||
('2CEB0', '2EBE0'),
|
||||
('30000', '3134A'),
|
||||
]
|
||||
|
||||
def maketables(trace=0):
|
||||
|
||||
print("--- Reading", UNICODE_DATA % "", "...")
|
||||
|
||||
unicode = UnicodeData(UNIDATA_VERSION)
|
||||
|
||||
print(len(list(filter(None, unicode.table))), "characters")
|
||||
|
||||
makeunicodedata(unicode, trace)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# unicode character properties
|
||||
|
||||
def makeunicodedata(unicode, trace):
|
||||
|
||||
dummy = (0, 0, 0, 0, 0)
|
||||
table = [dummy]
|
||||
cache = {0: dummy}
|
||||
index = [0] * len(unicode.chars)
|
||||
|
||||
FILE = "unicode/ucd_gen.ha"
|
||||
|
||||
print("--- Preparing", FILE, "...")
|
||||
|
||||
for char in unicode.chars:
|
||||
record = unicode.table[char]
|
||||
if record:
|
||||
# extract database properties
|
||||
category = CATEGORY_NAMES.index(record.general_category)
|
||||
combining = int(record.canonical_combining_class)
|
||||
bidirectional = BIDIRECTIONAL_NAMES.index(record.bidi_class)
|
||||
mirrored = record.bidi_mirrored == "Y"
|
||||
eastasianwidth = EASTASIANWIDTH_NAMES.index(record.east_asian_width)
|
||||
item = (
|
||||
category, combining, bidirectional, mirrored, eastasianwidth,
|
||||
)
|
||||
# add entry to index and item tables
|
||||
i = cache.get(item)
|
||||
if i is None:
|
||||
cache[item] = i = len(table)
|
||||
table.append(item)
|
||||
index[char] = i
|
||||
|
||||
print(len(table), "unique properties")
|
||||
|
||||
print("--- Writing", FILE, "...")
|
||||
|
||||
with open(FILE, "w") as fp:
|
||||
fprint = partial(print, file=fp)
|
||||
|
||||
fprint("// Generated by scripts/gen-ucd.py")
|
||||
fprint()
|
||||
fprint('// Unicode database version supported by this module')
|
||||
fprint('export def UNIDATA_VERSION: str = "%s";' % UNIDATA_VERSION)
|
||||
fprint('')
|
||||
fprint("// List of unique database records")
|
||||
fprint("const ucd_records: [_]ucd_encodedrec = [")
|
||||
for item in table:
|
||||
fprint(" (%d, %d, %d, %d, %d)," % item)
|
||||
fprint("];")
|
||||
fprint()
|
||||
|
||||
# split record index table
|
||||
index1, index2, shift = splitbins(index, trace)
|
||||
|
||||
fprint("// index tables for the database records")
|
||||
fprint("def UCD_RECORD_SHIFT: size = %d;" % shift)
|
||||
Array("index1", index1).dump(fp, trace)
|
||||
Array("index2", index2).dump(fp, trace)
|
||||
|
||||
|
||||
DATA_DIR = os.path.join('.data')
|
||||
|
||||
def open_data(template, version):
|
||||
local = os.path.join(DATA_DIR, template % ('-'+version,))
|
||||
if not os.path.exists(local):
|
||||
import urllib.request
|
||||
if version == '3.2.0':
|
||||
# irregular url structure
|
||||
url = ('https://www.unicode.org/Public/3.2-Update/'+template) % ('-'+version,)
|
||||
else:
|
||||
url = ('https://www.unicode.org/Public/%s/ucd/'+template) % (version, '')
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
urllib.request.urlretrieve(url, filename=local)
|
||||
if local.endswith('.txt'):
|
||||
return open(local, encoding='utf-8')
|
||||
else:
|
||||
# Unihan.zip
|
||||
return open(local, 'rb')
|
||||
|
||||
|
||||
def expand_range(char_range: str) -> Iterator[int]:
|
||||
'''
|
||||
Parses ranges of code points, as described in UAX #44:
|
||||
https://www.unicode.org/reports/tr44/#Code_Point_Ranges
|
||||
'''
|
||||
if '..' in char_range:
|
||||
first, last = [int(c, 16) for c in char_range.split('..')]
|
||||
else:
|
||||
first = last = int(char_range, 16)
|
||||
for char in range(first, last+1):
|
||||
yield char
|
||||
|
||||
|
||||
class UcdFile:
|
||||
'''
|
||||
A file in the standard format of the UCD.
|
||||
|
||||
See: https://www.unicode.org/reports/tr44/#Format_Conventions
|
||||
|
||||
Note that, as described there, the Unihan data files have their
|
||||
own separate format.
|
||||
'''
|
||||
|
||||
def __init__(self, template: str, version: str) -> None:
|
||||
self.template = template
|
||||
self.version = version
|
||||
|
||||
def records(self) -> Iterator[List[str]]:
|
||||
with open_data(self.template, self.version) as file:
|
||||
for line in file:
|
||||
line = line.split('#', 1)[0].strip()
|
||||
if not line:
|
||||
continue
|
||||
yield [field.strip() for field in line.split(';')]
|
||||
|
||||
def __iter__(self) -> Iterator[List[str]]:
|
||||
return self.records()
|
||||
|
||||
def expanded(self) -> Iterator[Tuple[int, List[str]]]:
|
||||
for record in self.records():
|
||||
char_range, rest = record[0], record[1:]
|
||||
for char in expand_range(char_range):
|
||||
yield char, rest
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class UcdRecord:
|
||||
# 15 fields from UnicodeData.txt . See:
|
||||
# https://www.unicode.org/reports/tr44/#UnicodeData.txt
|
||||
codepoint: str
|
||||
name: str
|
||||
general_category: str
|
||||
canonical_combining_class: str
|
||||
bidi_class: str
|
||||
decomposition_type: str
|
||||
decomposition_mapping: str
|
||||
numeric_type: str
|
||||
numeric_value: str
|
||||
bidi_mirrored: str
|
||||
unicode_1_name: str # obsolete
|
||||
iso_comment: str # obsolete
|
||||
simple_uppercase_mapping: str
|
||||
simple_lowercase_mapping: str
|
||||
simple_titlecase_mapping: str
|
||||
|
||||
# https://www.unicode.org/reports/tr44/#EastAsianWidth.txt
|
||||
east_asian_width: Optional[str]
|
||||
|
||||
# Binary properties, as a set of those that are true.
|
||||
# Taken from multiple files:
|
||||
# https://www.unicode.org/reports/tr44/#DerivedCoreProperties.txt
|
||||
# https://www.unicode.org/reports/tr44/#LineBreak.txt
|
||||
binary_properties: Set[str]
|
||||
|
||||
# The Quick_Check properties related to normalization:
|
||||
# https://www.unicode.org/reports/tr44/#Decompositions_and_Normalization
|
||||
# We store them as a bitmask.
|
||||
quick_check: int
|
||||
|
||||
|
||||
def from_row(row: List[str]) -> UcdRecord:
|
||||
return UcdRecord(*row, None, set(), 0)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# the following support code is taken from the unidb utilities
|
||||
# Copyright (c) 1999-2000 by Secret Labs AB
|
||||
|
||||
# load a unicode-data file from disk
|
||||
|
||||
class UnicodeData:
|
||||
# table: List[Optional[UcdRecord]] # index is codepoint; None means unassigned
|
||||
|
||||
def __init__(self, version, cjk_check=True):
|
||||
self.changed = []
|
||||
table = [None] * 0x110000
|
||||
for s in UcdFile(UNICODE_DATA, version):
|
||||
char = int(s[0], 16)
|
||||
table[char] = from_row(s)
|
||||
|
||||
cjk_ranges_found = []
|
||||
|
||||
# expand first-last ranges
|
||||
field = None
|
||||
for i in range(0, 0x110000):
|
||||
# The file UnicodeData.txt has its own distinct way of
|
||||
# expressing ranges. See:
|
||||
# https://www.unicode.org/reports/tr44/#Code_Point_Ranges
|
||||
s = table[i]
|
||||
if s:
|
||||
if s.name[-6:] == "First>":
|
||||
s.name = ""
|
||||
field = dataclasses.astuple(s)[:15]
|
||||
elif s.name[-5:] == "Last>":
|
||||
if s.name.startswith("<CJK Ideograph"):
|
||||
cjk_ranges_found.append((field[0],
|
||||
s.codepoint))
|
||||
s.name = ""
|
||||
field = None
|
||||
elif field:
|
||||
table[i] = from_row(('%X' % i,) + field[1:])
|
||||
if cjk_check and cjk_ranges != cjk_ranges_found:
|
||||
raise ValueError("CJK ranges deviate: have %r" % cjk_ranges_found)
|
||||
|
||||
# public attributes
|
||||
self.filename = UNICODE_DATA % ''
|
||||
self.table = table
|
||||
self.chars = list(range(0x110000)) # unicode 3.2
|
||||
|
||||
# check for name aliases and named sequences, see #12753
|
||||
# aliases and named sequences are not in 3.2.0
|
||||
if version != '3.2.0':
|
||||
self.aliases = []
|
||||
# store aliases in the Private Use Area 15, in range U+F0000..U+F00FF,
|
||||
# in order to take advantage of the compression and lookup
|
||||
# algorithms used for the other characters
|
||||
pua_index = NAME_ALIASES_START
|
||||
for char, name, abbrev in UcdFile(NAME_ALIASES, version):
|
||||
char = int(char, 16)
|
||||
self.aliases.append((name, char))
|
||||
# also store the name in the PUA 1
|
||||
self.table[pua_index].name = name
|
||||
pua_index += 1
|
||||
assert pua_index - NAME_ALIASES_START == len(self.aliases)
|
||||
|
||||
self.named_sequences = []
|
||||
# store named sequences in the PUA 1, in range U+F0100..,
|
||||
# in order to take advantage of the compression and lookup
|
||||
# algorithms used for the other characters.
|
||||
|
||||
assert pua_index < NAMED_SEQUENCES_START
|
||||
pua_index = NAMED_SEQUENCES_START
|
||||
for name, chars in UcdFile(NAMED_SEQUENCES, version):
|
||||
chars = tuple(int(char, 16) for char in chars.split())
|
||||
# check that the structure defined in makeunicodename is OK
|
||||
assert 2 <= len(chars) <= 4, "change the Py_UCS2 array size"
|
||||
assert all(c <= 0xFFFF for c in chars), ("use Py_UCS4 in "
|
||||
"the NamedSequence struct and in unicodedata_lookup")
|
||||
self.named_sequences.append((name, chars))
|
||||
# also store these in the PUA 1
|
||||
self.table[pua_index].name = name
|
||||
pua_index += 1
|
||||
assert pua_index - NAMED_SEQUENCES_START == len(self.named_sequences)
|
||||
|
||||
self.exclusions = {}
|
||||
for char, in UcdFile(COMPOSITION_EXCLUSIONS, version):
|
||||
char = int(char, 16)
|
||||
self.exclusions[char] = 1
|
||||
|
||||
widths = [None] * 0x110000
|
||||
for char, (width,) in UcdFile(EASTASIAN_WIDTH, version).expanded():
|
||||
widths[char] = width
|
||||
|
||||
for i in range(0, 0x110000):
|
||||
if table[i] is not None:
|
||||
table[i].east_asian_width = widths[i]
|
||||
|
||||
for char, (p,) in UcdFile(DERIVED_CORE_PROPERTIES, version).expanded():
|
||||
if table[char]:
|
||||
# Some properties (e.g. Default_Ignorable_Code_Point)
|
||||
# apply to unassigned code points; ignore them
|
||||
table[char].binary_properties.add(p)
|
||||
|
||||
for char_range, value in UcdFile(LINE_BREAK, version):
|
||||
if value not in MANDATORY_LINE_BREAKS:
|
||||
continue
|
||||
for char in expand_range(char_range):
|
||||
table[char].binary_properties.add('Line_Break')
|
||||
|
||||
# We only want the quickcheck properties
|
||||
# Format: NF?_QC; Y(es)/N(o)/M(aybe)
|
||||
# Yes is the default, hence only N and M occur
|
||||
# In 3.2.0, the format was different (NF?_NO)
|
||||
# The parsing will incorrectly determine these as
|
||||
# "yes", however, unicodedata.c will not perform quickchecks
|
||||
# for older versions, and no delta records will be created.
|
||||
quickchecks = [0] * 0x110000
|
||||
qc_order = 'NFD_QC NFKD_QC NFC_QC NFKC_QC'.split()
|
||||
for s in UcdFile(DERIVEDNORMALIZATION_PROPS, version):
|
||||
if len(s) < 2 or s[1] not in qc_order:
|
||||
continue
|
||||
quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No
|
||||
quickcheck_shift = qc_order.index(s[1])*2
|
||||
quickcheck <<= quickcheck_shift
|
||||
for char in expand_range(s[0]):
|
||||
assert not (quickchecks[char]>>quickcheck_shift)&3
|
||||
quickchecks[char] |= quickcheck
|
||||
for i in range(0, 0x110000):
|
||||
if table[i] is not None:
|
||||
table[i].quick_check = quickchecks[i]
|
||||
|
||||
with open_data(UNIHAN, version) as file:
|
||||
zip = zipfile.ZipFile(file)
|
||||
if version == '3.2.0':
|
||||
data = zip.open('Unihan-3.2.0.txt').read()
|
||||
else:
|
||||
data = zip.open('Unihan_NumericValues.txt').read()
|
||||
for line in data.decode("utf-8").splitlines():
|
||||
if not line.startswith('U+'):
|
||||
continue
|
||||
code, tag, value = line.split(None, 3)[:3]
|
||||
if tag not in ('kAccountingNumeric', 'kPrimaryNumeric',
|
||||
'kOtherNumeric'):
|
||||
continue
|
||||
value = value.strip().replace(',', '')
|
||||
i = int(code[2:], 16)
|
||||
# Patch the numeric field
|
||||
if table[i] is not None:
|
||||
table[i].numeric_value = value
|
||||
|
||||
sc = self.special_casing = {}
|
||||
for data in UcdFile(SPECIAL_CASING, version):
|
||||
if data[4]:
|
||||
# We ignore all conditionals (since they depend on
|
||||
# languages) except for one, which is hardcoded. See
|
||||
# handle_capital_sigma in unicodeobject.c.
|
||||
continue
|
||||
c = int(data[0], 16)
|
||||
lower = [int(char, 16) for char in data[1].split()]
|
||||
title = [int(char, 16) for char in data[2].split()]
|
||||
upper = [int(char, 16) for char in data[3].split()]
|
||||
sc[c] = (lower, title, upper)
|
||||
|
||||
cf = self.case_folding = {}
|
||||
if version != '3.2.0':
|
||||
for data in UcdFile(CASE_FOLDING, version):
|
||||
if data[1] in "CF":
|
||||
c = int(data[0], 16)
|
||||
cf[c] = [int(char, 16) for char in data[2].split()]
|
||||
|
||||
def uselatin1(self):
|
||||
# restrict character range to ISO Latin 1
|
||||
self.chars = list(range(256))
|
||||
|
||||
|
||||
# hash table tools
|
||||
|
||||
# this is a straight-forward reimplementation of Python's built-in
|
||||
# dictionary type, using a static data structure, and a custom string
|
||||
# hash algorithm.
|
||||
|
||||
def myhash(s, magic):
|
||||
h = 0
|
||||
for c in map(ord, s.upper()):
|
||||
h = (h * magic) + c
|
||||
ix = h & 0xff000000
|
||||
if ix:
|
||||
h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff
|
||||
return h
|
||||
|
||||
|
||||
SIZES = [
|
||||
(4,3), (8,3), (16,3), (32,5), (64,3), (128,3), (256,29), (512,17),
|
||||
(1024,9), (2048,5), (4096,83), (8192,27), (16384,43), (32768,3),
|
||||
(65536,45), (131072,9), (262144,39), (524288,39), (1048576,9),
|
||||
(2097152,5), (4194304,3), (8388608,33), (16777216,27)
|
||||
]
|
||||
|
||||
|
||||
class Hash:
|
||||
def __init__(self, name, data, magic):
|
||||
# turn a (key, value) list into a static hash table structure
|
||||
|
||||
# determine table size
|
||||
for size, poly in SIZES:
|
||||
if size > len(data):
|
||||
poly = size + poly
|
||||
break
|
||||
else:
|
||||
raise AssertionError("ran out of polynomials")
|
||||
|
||||
print(size, "slots in hash table")
|
||||
|
||||
table = [None] * size
|
||||
|
||||
mask = size-1
|
||||
|
||||
n = 0
|
||||
|
||||
hash = myhash
|
||||
|
||||
# initialize hash table
|
||||
for key, value in data:
|
||||
h = hash(key, magic)
|
||||
i = (~h) & mask
|
||||
v = table[i]
|
||||
if v is None:
|
||||
table[i] = value
|
||||
continue
|
||||
incr = (h ^ (h >> 3)) & mask
|
||||
if not incr:
|
||||
incr = mask
|
||||
while 1:
|
||||
n = n + 1
|
||||
i = (i + incr) & mask
|
||||
v = table[i]
|
||||
if v is None:
|
||||
table[i] = value
|
||||
break
|
||||
incr = incr << 1
|
||||
if incr > mask:
|
||||
incr = incr ^ poly
|
||||
|
||||
print(n, "collisions")
|
||||
self.collisions = n
|
||||
|
||||
for i in range(len(table)):
|
||||
if table[i] is None:
|
||||
table[i] = 0
|
||||
|
||||
self.data = Array(name + "_hash", table)
|
||||
self.magic = magic
|
||||
self.name = name
|
||||
self.size = size
|
||||
self.poly = poly
|
||||
|
||||
def dump(self, file, trace):
|
||||
# write data to file, as a C array
|
||||
self.data.dump(file, trace)
|
||||
file.write("#define %s_magic %d\n" % (self.name, self.magic))
|
||||
file.write("#define %s_size %d\n" % (self.name, self.size))
|
||||
file.write("#define %s_poly %d\n" % (self.name, self.poly))
|
||||
|
||||
|
||||
# stuff to deal with arrays of unsigned integers
|
||||
|
||||
class Array:
|
||||
|
||||
def __init__(self, name, data):
|
||||
self.name = name
|
||||
self.data = data
|
||||
|
||||
def dump(self, file, trace=0):
|
||||
# write data to file, as a C array
|
||||
size = getsize(self.data)
|
||||
if trace:
|
||||
print(self.name+":", size*len(self.data), "bytes", file=sys.stderr)
|
||||
file.write("const " + self.name + ": [_]")
|
||||
if size == 1:
|
||||
file.write("u8")
|
||||
elif size == 2:
|
||||
file.write("u16")
|
||||
else:
|
||||
file.write("u32")
|
||||
file.write(" = [\n")
|
||||
if self.data:
|
||||
s = " "
|
||||
for item in self.data:
|
||||
i = str(item) + ", "
|
||||
if len(s) + len(i) > 78:
|
||||
file.write(s.rstrip() + "\n")
|
||||
s = " " + i
|
||||
else:
|
||||
s = s + i
|
||||
if s.strip():
|
||||
file.write(s.rstrip() + "\n")
|
||||
file.write("];\n\n")
|
||||
|
||||
|
||||
def getsize(data):
|
||||
# return smallest possible integer size for the given array
|
||||
maxdata = max(data)
|
||||
if maxdata < 256:
|
||||
return 1
|
||||
elif maxdata < 65536:
|
||||
return 2
|
||||
else:
|
||||
return 4
|
||||
|
||||
|
||||
def splitbins(t, trace=0):
|
||||
"""t, trace=0 -> (t1, t2, shift). Split a table to save space.
|
||||
|
||||
t is a sequence of ints. This function can be useful to save space if
|
||||
many of the ints are the same. t1 and t2 are lists of ints, and shift
|
||||
is an int, chosen to minimize the combined size of t1 and t2 (in C
|
||||
code), and where for each i in range(len(t)),
|
||||
t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
|
||||
where mask is a bitmask isolating the last "shift" bits.
|
||||
|
||||
If optional arg trace is non-zero (default zero), progress info
|
||||
is printed to sys.stderr. The higher the value, the more info
|
||||
you'll get.
|
||||
"""
|
||||
|
||||
if trace:
|
||||
def dump(t1, t2, shift, bytes):
|
||||
print("%d+%d bins at shift %d; %d bytes" % (
|
||||
len(t1), len(t2), shift, bytes), file=sys.stderr)
|
||||
print("Size of original table:", len(t)*getsize(t), "bytes",
|
||||
file=sys.stderr)
|
||||
n = len(t)-1 # last valid index
|
||||
maxshift = 0 # the most we can shift n and still have something left
|
||||
if n > 0:
|
||||
while n >> 1:
|
||||
n >>= 1
|
||||
maxshift += 1
|
||||
del n
|
||||
bytes = sys.maxsize # smallest total size so far
|
||||
t = tuple(t) # so slices can be dict keys
|
||||
for shift in range(maxshift + 1):
|
||||
t1 = []
|
||||
t2 = []
|
||||
size = 2**shift
|
||||
bincache = {}
|
||||
for i in range(0, len(t), size):
|
||||
bin = t[i:i+size]
|
||||
index = bincache.get(bin)
|
||||
if index is None:
|
||||
index = len(t2)
|
||||
bincache[bin] = index
|
||||
t2.extend(bin)
|
||||
t1.append(index >> shift)
|
||||
# determine memory size
|
||||
b = len(t1)*getsize(t1) + len(t2)*getsize(t2)
|
||||
if trace > 1:
|
||||
dump(t1, t2, shift, b)
|
||||
if b < bytes:
|
||||
best = t1, t2, shift
|
||||
bytes = b
|
||||
t1, t2, shift = best
|
||||
if trace:
|
||||
print("Best:", end=' ', file=sys.stderr)
|
||||
dump(t1, t2, shift, bytes)
|
||||
if __debug__:
|
||||
# exhaustively verify that the decomposition is correct
|
||||
mask = ~((~0) << shift) # i.e., low-bit mask of shift bits
|
||||
for i in range(len(t)):
|
||||
assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
|
||||
return best
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
maketables(1)
|
94
unicode/ucd.ha
Normal file
94
unicode/ucd.ha
Normal file
|
@ -0,0 +1,94 @@
|
|||
type ucd_encodedrec = (u8, u8, u8, u8, u8);
|
||||
|
||||
type ucd_record = struct {
|
||||
category: u8,
|
||||
combining: u8,
|
||||
bidirectional: u8,
|
||||
mirrored: u8,
|
||||
east_asian_width: u8,
|
||||
};
|
||||
|
||||
fn get_ucdrecord(rn: rune) *ucd_record = {
|
||||
const code = rn: u32;
|
||||
let index = 0u8;
|
||||
if (code < 0x110000) {
|
||||
index = index1[(code>>UCD_RECORD_SHIFT)];
|
||||
index = index2[(index<<UCD_RECORD_SHIFT)+(code&((1<<UCD_RECORD_SHIFT)-1))];
|
||||
};
|
||||
return &ucd_records[index]: *ucd_record;
|
||||
};
|
||||
|
||||
// Unicode character General_Category attribute
|
||||
export type gc = enum u8 {
|
||||
CONTROL, // Cc
|
||||
FORMAT, // Cf
|
||||
UNASSIGNED, // Cn
|
||||
PRIVATE_USE, // Co
|
||||
SURROGATE, // Cs
|
||||
LOWERCASE_LETTER, // Ll
|
||||
MODIFIER_LETTER, // Lm
|
||||
OTHER_LETTER, // Lo
|
||||
TITLECASE_LETTER, // Lt
|
||||
UPPERCASE_LETTER, // Lu
|
||||
SPACING_MARK, // Mc
|
||||
ENCLOSING_MARK, // Me
|
||||
NON_SPACING_MARK, // Mn
|
||||
DECIMAL_NUMBER, // Nd
|
||||
LETTER_NUMBER, // Nl
|
||||
OTHER_NUMBER, // No
|
||||
CONNECT_PUNCTUATION, // Pc
|
||||
DASH_PUNCTUATION, // Pd
|
||||
CLOSE_PUNCTUATION, // Pe
|
||||
FINAL_PUNCTUATION, // Pf
|
||||
INITIAL_PUNCTUATION, // Pi
|
||||
OTHER_PUNCTUATION, // Po
|
||||
OPEN_PUNCTUATION, // Ps
|
||||
CURRENCY_SYMBOL, // Sc
|
||||
MODIFIER_SYMBOL, // Sk
|
||||
MATH_SYMBOL, // Sm
|
||||
OTHER_SYMBOL, // So
|
||||
LINE_SEPARATOR, // Zl
|
||||
PARAGRAPH_SEPARATOR, // Zp
|
||||
SPACE_SEPARATOR, // Zs
|
||||
};
|
||||
|
||||
// Returns the [[general_category]] corresponding to this rune.
|
||||
export fn rune_gc(rn: rune) gc = {
|
||||
return get_ucdrecord(rn).category: gc;
|
||||
};
|
||||
|
||||
// Returns the two-character code associated with a [[gc]] value.
|
||||
export fn gc_code(v: gc) const str = {
|
||||
switch (v) {
|
||||
case gc::CONTROL => return "Cc";
|
||||
case gc::FORMAT => return "Cf";
|
||||
case gc::UNASSIGNED => return "Cn";
|
||||
case gc::PRIVATE_USE => return "Co";
|
||||
case gc::SURROGATE => return "Cs";
|
||||
case gc::LOWERCASE_LETTER => return "Ll";
|
||||
case gc::MODIFIER_LETTER => return "Lm";
|
||||
case gc::OTHER_LETTER => return "Lo";
|
||||
case gc::TITLECASE_LETTER => return "Lt";
|
||||
case gc::UPPERCASE_LETTER => return "Lu";
|
||||
case gc::SPACING_MARK => return "Mc";
|
||||
case gc::ENCLOSING_MARK => return "Me";
|
||||
case gc::NON_SPACING_MARK => return "Mn";
|
||||
case gc::DECIMAL_NUMBER => return "Nd";
|
||||
case gc::LETTER_NUMBER => return "Nl";
|
||||
case gc::OTHER_NUMBER => return "No";
|
||||
case gc::CONNECT_PUNCTUATION => return "Pc";
|
||||
case gc::DASH_PUNCTUATION => return "Pd";
|
||||
case gc::CLOSE_PUNCTUATION => return "Pe";
|
||||
case gc::FINAL_PUNCTUATION => return "Pf";
|
||||
case gc::INITIAL_PUNCTUATION => return "Pi";
|
||||
case gc::OTHER_PUNCTUATION => return "Po";
|
||||
case gc::OPEN_PUNCTUATION => return "Ps";
|
||||
case gc::CURRENCY_SYMBOL => return "Sc";
|
||||
case gc::MODIFIER_SYMBOL => return "Sk";
|
||||
case gc::MATH_SYMBOL => return "Sm";
|
||||
case gc::OTHER_SYMBOL => return "So";
|
||||
case gc::LINE_SEPARATOR => return "Zl";
|
||||
case gc::PARAGRAPH_SEPARATOR => return "Zp";
|
||||
case gc::SPACE_SEPARATOR => return "Zs";
|
||||
};
|
||||
};
|
2769
unicode/ucd_gen.ha
Normal file
2769
unicode/ucd_gen.ha
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue