Add 'vendor/hare-unicode/' from commit '1488c26f46f7f8568235eaee6224983ac46e78ff'

git-subtree-dir: vendor/hare-unicode git-subtree-mainline: 57979aa6fc git-subtree-split: 1488c26f46
2024-12-04 13:29:21 -03:00 · 2024-12-04 13:29:21 -03:00 · c70ec9f648
commit c70ec9f648
parent 57979aa6fc 1488c26f46
9 changed files with 6329 additions and 0 deletions
--- a/vendor/hare-unicode/.gitignore
+++ b/vendor/hare-unicode/.gitignore
@ -0,0 +1,2 @@
 *.zip
 /.data
--- a/vendor/hare-unicode/COPYING
+++ b/vendor/hare-unicode/COPYING
@ -0,0 +1,367 @@
 Mozilla Public License Version 2.0
 ==================================
 1. Definitions
 --------------
 1.1. "Contributor"
    means each individual or legal entity that creates, contributes to
    the creation of, or owns Covered Software.
 1.2. "Contributor Version"
    means the combination of the Contributions of others (if any) used
    by a Contributor and that particular Contributor's Contribution.
 1.3. "Contribution"
    means Covered Software of a particular Contributor.
 1.4. "Covered Software"
    means Source Code Form to which the initial Contributor has attached
    the notice in Exhibit A, the Executable Form of such Source Code
    Form, and Modifications of such Source Code Form, in each case
    including portions thereof.
 1.5. "Incompatible With Secondary Licenses"
    means
    (a) that the initial Contributor has attached the notice described
        in Exhibit B to the Covered Software; or
    (b) that the Covered Software was made available under the terms of
        version 1.1 or earlier of the License, but not also under the
        terms of a Secondary License.
 1.6. "Executable Form"
    means any form of the work other than Source Code Form.
 1.7. "Larger Work"
    means a work that combines Covered Software with other material, in 
    a separate file or files, that is not Covered Software.
 1.8. "License"
    means this document.
 1.9. "Licensable"
    means having the right to grant, to the maximum extent possible,
    whether at the time of the initial grant or subsequently, any and
    all of the rights conveyed by this License.
 1.10. "Modifications"
    means any of the following:
    (a) any file in Source Code Form that results from an addition to,
        deletion from, or modification of the contents of Covered
        Software; or
    (b) any new file in Source Code Form that contains any Covered
        Software.
 1.11. "Patent Claims" of a Contributor
    means any patent claim(s), including without limitation, method,
    process, and apparatus claims, in any patent Licensable by such
    Contributor that would be infringed, but for the grant of the
    License, by the making, using, selling, offering for sale, having
    made, import, or transfer of either its Contributions or its
    Contributor Version.
 1.12. "Secondary License"
    means either the GNU General Public License, Version 2.0, the GNU
    Lesser General Public License, Version 2.1, the GNU Affero General
    Public License, Version 3.0, or any later versions of those
    licenses.
 1.13. "Source Code Form"
    means the form of the work preferred for making modifications.
 1.14. "You" (or "Your")
    means an individual or a legal entity exercising rights under this
    License. For legal entities, "You" includes any entity that
    controls, is controlled by, or is under common control with You. For
    purposes of this definition, "control" means (a) the power, direct
    or indirect, to cause the direction or management of such entity,
    whether by contract or otherwise, or (b) ownership of more than
    fifty percent (50%) of the outstanding shares or beneficial
    ownership of such entity.
 2. License Grants and Conditions
 --------------------------------
 2.1. Grants
 Each Contributor hereby grants You a world-wide, royalty-free,
 non-exclusive license:
 (a) under intellectual property rights (other than patent or trademark)
    Licensable by such Contributor to use, reproduce, make available,
    modify, display, perform, distribute, and otherwise exploit its
    Contributions, either on an unmodified basis, with Modifications, or
    as part of a Larger Work; and
 (b) under Patent Claims of such Contributor to make, use, sell, offer
    for sale, have made, import, and otherwise transfer either its
    Contributions or its Contributor Version.
 2.2. Effective Date
 The licenses granted in Section 2.1 with respect to any Contribution
 become effective for each Contribution on the date the Contributor first
 distributes such Contribution.
 2.3. Limitations on Grant Scope
 The licenses granted in this Section 2 are the only rights granted under
 this License. No additional rights or licenses will be implied from the
 distribution or licensing of Covered Software under this License.
 Notwithstanding Section 2.1(b) above, no patent license is granted by a
 Contributor:
 (a) for any code that a Contributor has removed from Covered Software;
    or
 (b) for infringements caused by: (i) Your and any other third party's
    modifications of Covered Software, or (ii) the combination of its
    Contributions with other software (except as part of its Contributor
    Version); or
 (c) under Patent Claims infringed by Covered Software in the absence of
    its Contributions.
 This License does not grant any rights in the trademarks, service marks,
 or logos of any Contributor (except as may be necessary to comply with
 the notice requirements in Section 3.4).
 2.4. Subsequent Licenses
 No Contributor makes additional grants as a result of Your choice to
 distribute the Covered Software under a subsequent version of this
 License (see Section 10.2) or under the terms of a Secondary License (if
 permitted under the terms of Section 3.3).
 2.5. Representation
 Each Contributor represents that the Contributor believes its
 Contributions are its original creation(s) or it has sufficient rights
 to grant the rights to its Contributions conveyed by this License.
 2.6. Fair Use
 This License is not intended to limit any rights You have under
 applicable copyright doctrines of fair use, fair dealing, or other
 equivalents.
 2.7. Conditions
 Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
 in Section 2.1.
 3. Responsibilities
 -------------------
 3.1. Distribution of Source Form
 All distribution of Covered Software in Source Code Form, including any
 Modifications that You create or to which You contribute, must be under
 the terms of this License. You must inform recipients that the Source
 Code Form of the Covered Software is governed by the terms of this
 License, and how they can obtain a copy of this License. You may not
 attempt to alter or restrict the recipients' rights in the Source Code
 Form.
 3.2. Distribution of Executable Form
 If You distribute Covered Software in Executable Form then:
 (a) such Covered Software must also be made available in Source Code
    Form, as described in Section 3.1, and You must inform recipients of
    the Executable Form how they can obtain a copy of such Source Code
    Form by reasonable means in a timely manner, at a charge no more
    than the cost of distribution to the recipient; and
 (b) You may distribute such Executable Form under the terms of this
    License, or sublicense it under different terms, provided that the
    license for the Executable Form does not attempt to limit or alter
    the recipients' rights in the Source Code Form under this License.
 3.3. Distribution of a Larger Work
 You may create and distribute a Larger Work under terms of Your choice,
 provided that You also comply with the requirements of this License for
 the Covered Software. If the Larger Work is a combination of Covered
 Software with a work governed by one or more Secondary Licenses, and the
 Covered Software is not Incompatible With Secondary Licenses, this
 License permits You to additionally distribute such Covered Software
 under the terms of such Secondary License(s), so that the recipient of
 the Larger Work may, at their option, further distribute the Covered
 Software under the terms of either this License or such Secondary
 License(s).
 3.4. Notices
 You may not remove or alter the substance of any license notices
 (including copyright notices, patent notices, disclaimers of warranty,
 or limitations of liability) contained within the Source Code Form of
 the Covered Software, except that You may alter any license notices to
 the extent required to remedy known factual inaccuracies.
 3.5. Application of Additional Terms
 You may choose to offer, and to charge a fee for, warranty, support,
 indemnity or liability obligations to one or more recipients of Covered
 Software. However, You may do so only on Your own behalf, and not on
 behalf of any Contributor. You must make it absolutely clear that any
 such warranty, support, indemnity, or liability obligation is offered by
 You alone, and You hereby agree to indemnify every Contributor for any
 liability incurred by such Contributor as a result of warranty, support,
 indemnity or liability terms You offer. You may include additional
 disclaimers of warranty and limitations of liability specific to any
 jurisdiction.
 4. Inability to Comply Due to Statute or Regulation
 ---------------------------------------------------
 If it is impossible for You to comply with any of the terms of this
 License with respect to some or all of the Covered Software due to
 statute, judicial order, or regulation then You must: (a) comply with
 the terms of this License to the maximum extent possible; and (b)
 describe the limitations and the code they affect. Such description must
 be placed in a text file included with all distributions of the Covered
 Software under this License. Except to the extent prohibited by statute
 or regulation, such description must be sufficiently detailed for a
 recipient of ordinary skill to be able to understand it.
 5. Termination
 --------------
 5.1. The rights granted under this License will terminate automatically
 if You fail to comply with any of its terms. However, if You become
 compliant, then the rights granted under this License from a particular
 Contributor are reinstated (a) provisionally, unless and until such
 Contributor explicitly and finally terminates Your grants, and (b) on an
 ongoing basis, if such Contributor fails to notify You of the
 non-compliance by some reasonable means prior to 60 days after You have
 come back into compliance. Moreover, Your grants from a particular
 Contributor are reinstated on an ongoing basis if such Contributor
 notifies You of the non-compliance by some reasonable means, this is the
 first time You have received notice of non-compliance with this License
 from such Contributor, and You become compliant prior to 30 days after
 Your receipt of the notice.
 5.2. If You initiate litigation against any entity by asserting a patent
 infringement claim (excluding declaratory judgment actions,
 counter-claims, and cross-claims) alleging that a Contributor Version
 directly or indirectly infringes any patent, then the rights granted to
 You by any and all Contributors for the Covered Software under Section
 2.1 of this License shall terminate.
 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
 end user license agreements (excluding distributors and resellers) which
 have been validly granted by You or Your distributors under this License
 prior to termination shall survive termination.
 ************************************************************************
 *                                                                      *
 *  6. Disclaimer of Warranty                                           *
 *  -------------------------                                           *
 *                                                                      *
 *  Covered Software is provided under this License on an "as is"       *
 *  basis, without warranty of any kind, either expressed, implied, or  *
 *  statutory, including, without limitation, warranties that the       *
 *  Covered Software is free of defects, merchantable, fit for a        *
 *  particular purpose or non-infringing. The entire risk as to the     *
 *  quality and performance of the Covered Software is with You.        *
 *  Should any Covered Software prove defective in any respect, You     *
 *  (not any Contributor) assume the cost of any necessary servicing,   *
 *  repair, or correction. This disclaimer of warranty constitutes an   *
 *  essential part of this License. No use of any Covered Software is   *
 *  authorized under this License except under this disclaimer.         *
 *                                                                      *
 ************************************************************************
 ************************************************************************
 *                                                                      *
 *  7. Limitation of Liability                                          *
 *  --------------------------                                          *
 *                                                                      *
 *  Under no circumstances and under no legal theory, whether tort      *
 *  (including negligence), contract, or otherwise, shall any           *
 *  Contributor, or anyone who distributes Covered Software as          *
 *  permitted above, be liable to You for any direct, indirect,         *
 *  special, incidental, or consequential damages of any character      *
 *  including, without limitation, damages for lost profits, loss of    *
 *  goodwill, work stoppage, computer failure or malfunction, or any    *
 *  and all other commercial damages or losses, even if such party      *
 *  shall have been informed of the possibility of such damages. This   *
 *  limitation of liability shall not apply to liability for death or   *
 *  personal injury resulting from such party's negligence to the       *
 *  extent applicable law prohibits such limitation. Some               *
 *  jurisdictions do not allow the exclusion or limitation of           *
 *  incidental or consequential damages, so this exclusion and          *
 *  limitation may not apply to You.                                    *
 *                                                                      *
 ************************************************************************
 8. Litigation
 -------------
 Any litigation relating to this License may be brought only in the
 courts of a jurisdiction where the defendant maintains its principal
 place of business and such litigation shall be governed by laws of that
 jurisdiction, without reference to its conflict-of-law provisions.
 Nothing in this Section shall prevent a party's ability to bring
 cross-claims or counter-claims.
 9. Miscellaneous
 ----------------
 This License represents the complete agreement concerning the subject
 matter hereof. If any provision of this License is held to be
 unenforceable, such provision shall be reformed only to the extent
 necessary to make it enforceable. Any law or regulation which provides
 that the language of a contract shall be construed against the drafter
 shall not be used to construe this License against a Contributor.
 10. Versions of the License
 ---------------------------
 10.1. New Versions
 Mozilla Foundation is the license steward. Except as provided in Section
 10.3, no one other than the license steward has the right to modify or
 publish new versions of this License. Each version will be given a
 distinguishing version number.
 10.2. Effect of New Versions
 You may distribute the Covered Software under the terms of the version
 of the License under which You originally received the Covered Software,
 or under the terms of any subsequent version published by the license
 steward.
 10.3. Modified Versions
 If you create software not governed by this License, and you want to
 create a new license for such software, you may create and use a
 modified version of this License if you rename the license and remove
 any references to the name of the license steward (except to note that
 such modified license differs from this License).
 10.4. Distributing Source Code Form that is Incompatible With Secondary
 Licenses
 If You choose to distribute Source Code Form that is Incompatible With
 Secondary Licenses under the terms of this version of the License, the
 notice described in Exhibit B of this License must be attached.
 Exhibit A - Source Code Form License Notice
 -------------------------------------------
  This Source Code Form is subject to the terms of the Mozilla Public
  License, v. 2.0. If a copy of the MPL was not distributed with this
  file, You can obtain one at http://mozilla.org/MPL/2.0/.
 If it is not possible or desirable to put the notice in a particular
 file, then You may include the notice in a location (such as a LICENSE
 file in a relevant directory) where a recipient would be likely to look
 for such a notice.
 You may add additional accurate notices of copyright ownership.
--- a/vendor/hare-unicode/cmd/linebreak/main.ha
+++ b/vendor/hare-unicode/cmd/linebreak/main.ha
@ -0,0 +1,40 @@
 use encoding::hex;
 use fmt;
 use os;
 use strings;
 use unicode;
 export fn main() void = {
 	const input = os::args[1];
 	const data = strings::toutf8(input);
 	hex::dump(os::stdout, data)!;
 	fmt::println(input)!;
 	let ix = 0u;
 	const lb = unicode::new_line_breaker(input);
 	for (const (pos, _, mand) => unicode::next_line_break(&lb)) {
 		for (ix < pos; ix += 1) {
 			fmt::print(' ')!;
 		};
 		ix += 1;
 		if (mand) {
 			fmt::println('|')!;
 		} else {
 			fmt::print('^')!;
 		};
 	};
 	fmt::println()!;
 	fmt::println()!;
 	fmt::println("Line break opportunities:")!;
 	const lb = unicode::new_line_breaker(input);
 	for (const (pos, bpos, mand) => unicode::next_line_break(&lb)) {
 		fmt::printfln("- {}:{} {} (before '{}'/0x{:x})", pos, bpos,
 			if (mand) "(mandatory)" else "",
 			strings::sub(input, pos, pos+1),
 			data[bpos])!;
 	};
 };
--- a/vendor/hare-unicode/cmd/ucdtest/main.ha
+++ b/vendor/hare-unicode/cmd/ucdtest/main.ha
@ -0,0 +1,25 @@
 use fmt;
 use os;
 use strings;
 use unicode;
 export fn main() void = {
 	const in = os::args[1];
 	const iter = strings::iter(in);
 	for (true) {
 		const rn = match (strings::next(&iter)) {
 		case let rn: rune =>
 			yield rn;
 		case => break;
 		};
 		const gc = unicode::rune_gc(rn);
 		const sc = unicode::rune_script(rn);
 		const lb = unicode::rune_line_break(rn);
 		fmt::printfln("'{}'/{:x}: {} : {} : {}",
 			rn, rn: u32,
 			unicode::gc_code(gc),
 			unicode::script_code(sc),
 			unicode::line_break_code(lb))!;
 	};
 };
--- a/vendor/hare-unicode/scripts/gen-ucd.py
+++ b/vendor/hare-unicode/scripts/gen-ucd.py
@ -0,0 +1,860 @@
 #!/usr/bin/python3
 # Based on CPython's unicodedata generation script,
 # Tools/unicode/makeunicodedata.py, forked and adapted for Hare
 #
 # PSF License
 #
 # (re)generate unicode property and type databases
 #
 # This script converts Unicode database files to Modules/unicodedata_db.h,
 # Modules/unicodename_db.h, and Objects/unicodetype_db.h
 #
 # history:
 # 2000-09-24 fl   created (based on bits and pieces from unidb)
 # 2000-09-25 fl   merged tim's splitbin fixes, separate decomposition table
 # 2000-09-25 fl   added character type table
 # 2000-09-26 fl   added LINEBREAK, DECIMAL, and DIGIT flags/fields (2.0)
 # 2000-11-03 fl   expand first/last ranges
 # 2001-01-19 fl   added character name tables (2.1)
 # 2001-01-21 fl   added decomp compression; dynamic phrasebook threshold
 # 2002-09-11 wd   use string methods
 # 2002-10-18 mvl  update to Unicode 3.2
 # 2002-10-22 mvl  generate NFC tables
 # 2002-11-24 mvl  expand all ranges, sort names version-independently
 # 2002-11-25 mvl  add UNIDATA_VERSION
 # 2004-05-29 perky add east asian width information
 # 2006-03-10 mvl  update to Unicode 4.1; add UCD 3.2 delta
 # 2008-06-11 gb   add PRINTABLE_MASK for Atsuo Ishimoto's ascii() patch
 # 2011-10-21 ezio add support for name aliases and named sequences
 # 2012-01    benjamin add full case mappings
 #
 # written by Fredrik Lundh (fredrik@pythonware.com)
 #
 import dataclasses
 import os
 import sys
 import zipfile
 from functools import partial
 from textwrap import dedent
 from typing import Iterator, List, Optional, Set, Tuple
 SCRIPT = sys.argv[0]
 VERSION = "3.3"
 # The Unicode Database
 # --------------------
 # When changing UCD version please update
 #   * Doc/library/stdtypes.rst, and
 #   * Doc/library/unicodedata.rst
 #   * Doc/reference/lexical_analysis.rst (two occurrences)
 UNIDATA_VERSION = "13.0.0"
 UNICODE_DATA = "UnicodeData%s.txt"
 COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
 EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
 UNIHAN = "Unihan%s.zip"
 DERIVED_CORE_PROPERTIES = "DerivedCoreProperties%s.txt"
 DERIVEDNORMALIZATION_PROPS = "DerivedNormalizationProps%s.txt"
 LINE_BREAK = "LineBreak%s.txt"
 NAME_ALIASES = "NameAliases%s.txt"
 NAMED_SEQUENCES = "NamedSequences%s.txt"
 SPECIAL_CASING = "SpecialCasing%s.txt"
 CASE_FOLDING = "CaseFolding%s.txt"
 SCRIPTS = "Scripts%s.txt"
 # Private Use Areas -- in planes 1, 15, 16
 PUA_1 = range(0xE000, 0xF900)
 PUA_15 = range(0xF0000, 0xFFFFE)
 PUA_16 = range(0x100000, 0x10FFFE)
 # we use this ranges of PUA_15 to store name aliases and named sequences
 NAME_ALIASES_START = 0xF0000
 NAMED_SEQUENCES_START = 0xF0200
 old_versions = []
 # Order must match ucd.ha
 CATEGORY_NAMES = [
    "Cc", "Cf", "Cn", "Co", "Cs", "Ll", "Lm", "Lo", "Lt", "Lu", "Mc", "Me",
    "Mn", "Nd", "Nl", "No", "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps", "Sc",
    "Sk", "Sm", "So", "Zl", "Zp", "Zs",
 ]
 BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO",
    "PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS",
    "ON", "LRI", "RLI", "FSI", "PDI" ]
 EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ]
 MANDATORY_LINE_BREAKS = [ "BK", "CR", "LF", "NL" ]
 LINE_BREAKS = [
    "XX", "AI", "BK", "CJ", "CR", "LF", "NL", "SA", "SG", "SP", "OP", "CL",
    "CP", "QU", "GL", "NS", "EX", "SY", "IS", "PR", "PO", "NU", "AL", "HL",
    "ID", "IN", "HY", "BA", "BB", "B2", "ZW", "CM", "WJ", "H2", "H3", "JL",
    "JV", "JT", "RI", "EB", "EM", "ZWJ", "CB",
 ]
 SCRIPT_NAMES = [
    "Common",
    "Inherited",
    "Unknown",
    "Adlam",
    "Caucasian_Albanian",
    "Ahom",
    "Arabic",
    "Imperial_Aramaic",
    "Armenian",
    "Avestan",
    "Balinese",
    "Bamum",
    "Bassa_Vah",
    "Batak",
    "Bengali",
    "Bhaiksuki",
    "Bopomofo",
    "Brahmi",
    "Braille",
    "Buginese",
    "Buhid",
    "Chakma",
    "Canadian_Aboriginal",
    "Carian",
    "Cham",
    "Cherokee",
    "Chorasmian",
    "Coptic",
    "Cypro_Minoan",
    "Cypriot",
    "Cyrillic",
    "Devanagari",
    "Dives_Akuru",
    "Dogra",
    "Deseret",
    "Duployan",
    "Egyptian_Hieroglyphs",
    "Elbasan",
    "Elymaic",
    "Ethiopic",
    "Georgian",
    "Glagolitic",
    "Gunjala_Gondi",
    "Masaram_Gondi",
    "Gothic",
    "Grantha",
    "Greek",
    "Gujarati",
    "Gurmukhi",
    "Hangul",
    "Han",
    "Hanunoo",
    "Hatran",
    "Hebrew",
    "Hiragana",
    "Anatolian_Hieroglyphs",
    "Pahawh_Hmong",
    "Nyiakeng_Puachue_Hmong",
    "Old_Hungarian",
    "Old_Italic",
    "Javanese",
    "Kayah_Li",
    "Katakana",
    "Kawi",
    "Kharoshthi",
    "Khmer",
    "Khojki",
    "Khitan_Small_Script",
    "Kannada",
    "Kaithi",
    "Tai_Tham",
    "Lao",
    "Latin",
    "Lepcha",
    "Limbu",
    "Linear_A",
    "Linear_B",
    "Lisu",
    "Lycian",
    "Lydian",
    "Mahajani",
    "Makasar",
    "Mandaic",
    "Manichaean",
    "Marchen",
    "Medefaidrin",
    "Mende_Kikakui",
    "Meroitic_Cursive",
    "Meroitic_Hieroglyphs",
    "Malayalam",
    "Modi",
    "Mongolian",
    "Mro",
    "Meetei_Mayek",
    "Multani",
    "Myanmar",
    "Nag_Mundari",
    "Nandinagari",
    "Old_North_Arabian",
    "Nabataean",
    "Newa",
    "Nko",
    "Nushu",
    "Ogham",
    "Ol_Chiki",
    "Old_Turkic",
    "Oriya",
    "Osage",
    "Osmanya",
    "Old_Uyghur",
    "Palmyrene",
    "Pau_Cin_Hau",
    "Old_Permic",
    "Phags_Pa",
    "Inscriptional_Pahlavi",
    "Psalter_Pahlavi",
    "Phoenician",
    "Miao",
    "Inscriptional_Parthian",
    "Rejang",
    "Hanifi_Rohingya",
    "Runic",
    "Samaritan",
    "Old_South_Arabian",
    "Saurashtra",
    "SignWriting",
    "Shavian",
    "Sharada",
    "Siddham",
    "Khudawadi",
    "Sinhala",
    "Sogdian",
    "Old_Sogdian",
    "Sora_Sompeng",
    "Soyombo",
    "Sundanese",
    "Syloti_Nagri",
    "Syriac",
    "Tagbanwa",
    "Takri",
    "Tai_Le",
    "New_Tai_Lue",
    "Tamil",
    "Tangut",
    "Tai_Viet",
    "Telugu",
    "Tifinagh",
    "Tagalog",
    "Thaana",
    "Thai",
    "Tibetan",
    "Tirhuta",
    "Tangsa",
    "Toto",
    "Ugaritic",
    "Vai",
    "Vithkuqi",
    "Warang_Citi",
    "Wancho",
    "Old_Persian",
    "Cuneiform",
    "Yezidi",
    "Yi",
    "Zanabazar_Square",
 ]
 # note: should match definitions in Objects/unicodectype.c
 ALPHA_MASK = 0x01
 DECIMAL_MASK = 0x02
 DIGIT_MASK = 0x04
 LOWER_MASK = 0x08
 LINEBREAK_MASK = 0x10
 SPACE_MASK = 0x20
 TITLE_MASK = 0x40
 UPPER_MASK = 0x80
 XID_START_MASK = 0x100
 XID_CONTINUE_MASK = 0x200
 PRINTABLE_MASK = 0x400
 NUMERIC_MASK = 0x800
 CASE_IGNORABLE_MASK = 0x1000
 CASED_MASK = 0x2000
 EXTENDED_CASE_MASK = 0x4000
 # these ranges need to match unicodedata.c:is_unified_ideograph
 cjk_ranges = [
    ('3400', '4DBF'),
    ('4E00', '9FFC'),
    ('20000', '2A6DD'),
    ('2A700', '2B734'),
    ('2B740', '2B81D'),
    ('2B820', '2CEA1'),
    ('2CEB0', '2EBE0'),
    ('30000', '3134A'),
 ]
 def maketables(trace=0):
    print("--- Reading", UNICODE_DATA % "", "...")
    unicode = UnicodeData(UNIDATA_VERSION)
    print(len(list(filter(None, unicode.table))), "characters")
    makeunicodedata(unicode, trace)
 # --------------------------------------------------------------------
 # unicode character properties
 def makeunicodedata(unicode, trace):
    dummy = (0, 0, 0, 0, 0, 0, 0)
    table = [dummy]
    cache = {0: dummy}
    index = [0] * len(unicode.chars)
    FILE = "unicode/ucd_gen.ha"
    print("--- Preparing", FILE, "...")
    for char in unicode.chars:
        record = unicode.table[char]
        if record:
            # extract database properties
            category = CATEGORY_NAMES.index(record.general_category)
            combining = int(record.canonical_combining_class)
            bidirectional = BIDIRECTIONAL_NAMES.index(record.bidi_class)
            mirrored = record.bidi_mirrored == "Y"
            eastasianwidth = EASTASIANWIDTH_NAMES.index(record.east_asian_width)
            script = SCRIPT_NAMES.index(record.script or "Unknown")
            line_break = LINE_BREAKS.index(record.line_break)
            item = (
                category, combining, bidirectional,
                mirrored, eastasianwidth, script,
                line_break,
                )
            # add entry to index and item tables
            i = cache.get(item)
            if i is None:
                cache[item] = i = len(table)
                table.append(item)
            index[char] = i
    print(len(table), "unique properties")
    print("--- Writing", FILE, "...")
    with open(FILE, "w") as fp:
        fprint = partial(print, file=fp)
        fprint("// Generated by scripts/gen-ucd.py")
        fprint()
        fprint('// Unicode database version supported by this module')
        fprint('export def UNIDATA_VERSION: str = "%s";' % UNIDATA_VERSION)
        fprint('')
        fprint("// List of unique database records")
        fprint("const ucd_records: [_]ucd_encodedrec = [")
        for item in table:
            fprint("    (%d, %d, %d, %d, %d, %d, %d)," % item)
        fprint("];")
        fprint()
        # split record index table
        index1, index2, shift = splitbins(index, trace)
        fprint("// index tables for the database records")
        fprint("def UCD_RECORD_SHIFT: size = %d;" % shift)
        Array("index1", index1).dump(fp, trace)
        Array("index2", index2).dump(fp, trace)
 DATA_DIR = os.path.join('.data')
 def open_data(template, version):
    local = os.path.join(DATA_DIR, template % ('-'+version,))
    if not os.path.exists(local):
        import urllib.request
        if version == '3.2.0':
            # irregular url structure
            url = ('https://www.unicode.org/Public/3.2-Update/'+template) % ('-'+version,)
        else:
            url = ('https://www.unicode.org/Public/%s/ucd/'+template) % (version, '')
        os.makedirs(DATA_DIR, exist_ok=True)
        urllib.request.urlretrieve(url, filename=local)
    if local.endswith('.txt'):
        return open(local, encoding='utf-8')
    else:
        # Unihan.zip
        return open(local, 'rb')
 def expand_range(char_range: str) -> Iterator[int]:
    '''
    Parses ranges of code points, as described in UAX #44:
      https://www.unicode.org/reports/tr44/#Code_Point_Ranges
    '''
    if '..' in char_range:
        first, last = [int(c, 16) for c in char_range.split('..')]
    else:
        first = last = int(char_range, 16)
    for char in range(first, last+1):
        yield char
 class UcdFile:
    '''
    A file in the standard format of the UCD.
    See: https://www.unicode.org/reports/tr44/#Format_Conventions
    Note that, as described there, the Unihan data files have their
    own separate format.
    '''
    def __init__(self, template: str, version: str) -> None:
        self.template = template
        self.version = version
    def records(self) -> Iterator[List[str]]:
        with open_data(self.template, self.version) as file:
            for line in file:
                line = line.split('#', 1)[0].strip()
                if not line:
                    continue
                yield [field.strip() for field in line.split(';')]
    def __iter__(self) -> Iterator[List[str]]:
        return self.records()
    def expanded(self) -> Iterator[Tuple[int, List[str]]]:
        for record in self.records():
            char_range, rest = record[0], record[1:]
            for char in expand_range(char_range):
                yield char, rest
@dataclasses.dataclass
 class UcdRecord:
    # 15 fields from UnicodeData.txt .  See:
    #   https://www.unicode.org/reports/tr44/#UnicodeData.txt
    codepoint: str
    name: str
    general_category: str
    canonical_combining_class: str
    bidi_class: str
    decomposition_type: str
    decomposition_mapping: str
    numeric_type: str
    numeric_value: str
    bidi_mirrored: str
    unicode_1_name: str  # obsolete
    iso_comment: str  # obsolete
    simple_uppercase_mapping: str
    simple_lowercase_mapping: str
    simple_titlecase_mapping: str
    # https://www.unicode.org/reports/tr44/#EastAsianWidth.txt
    east_asian_width: Optional[str]
    # Binary properties, as a set of those that are true.
    # Taken from multiple files:
    #   https://www.unicode.org/reports/tr44/#DerivedCoreProperties.txt
    #   https://www.unicode.org/reports/tr44/#LineBreak.txt
    binary_properties: Set[str]
    # The Quick_Check properties related to normalization:
    #   https://www.unicode.org/reports/tr44/#Decompositions_and_Normalization
    # We store them as a bitmask.
    quick_check: int
    # From Script.txt
    script: str
    # From LineBreak.txt
    line_break: str
 def from_row(row: List[str]) -> UcdRecord:
    return UcdRecord(*row, None, set(), 0, "Unknown", "XX")
 # --------------------------------------------------------------------
 # the following support code is taken from the unidb utilities
 # Copyright (c) 1999-2000 by Secret Labs AB
 # load a unicode-data file from disk
 class UnicodeData:
    # table: List[Optional[UcdRecord]]  # index is codepoint; None means unassigned
    def __init__(self, version, cjk_check=True):
        self.changed = []
        table = [None] * 0x110000
        for s in UcdFile(UNICODE_DATA, version):
            char = int(s[0], 16)
            table[char] = from_row(s)
        cjk_ranges_found = []
        # expand first-last ranges
        field = None
        for i in range(0, 0x110000):
            # The file UnicodeData.txt has its own distinct way of
            # expressing ranges.  See:
            #   https://www.unicode.org/reports/tr44/#Code_Point_Ranges
            s = table[i]
            if s:
                if s.name[-6:] == "First>":
                    s.name = ""
                    field = dataclasses.astuple(s)[:15]
                elif s.name[-5:] == "Last>":
                    if s.name.startswith("<CJK Ideograph"):
                        cjk_ranges_found.append((field[0],
                                                 s.codepoint))
                    s.name = ""
                    field = None
            elif field:
                table[i] = from_row(('%X' % i,) + field[1:])
        if cjk_check and cjk_ranges != cjk_ranges_found:
            raise ValueError("CJK ranges deviate: have %r" % cjk_ranges_found)
        # public attributes
        self.filename = UNICODE_DATA % ''
        self.table = table
        self.chars = list(range(0x110000)) # unicode 3.2
        # check for name aliases and named sequences, see #12753
        # aliases and named sequences are not in 3.2.0
        if version != '3.2.0':
            self.aliases = []
            # store aliases in the Private Use Area 15, in range U+F0000..U+F00FF,
            # in order to take advantage of the compression and lookup
            # algorithms used for the other characters
            pua_index = NAME_ALIASES_START
            for char, name, abbrev in UcdFile(NAME_ALIASES, version):
                char = int(char, 16)
                self.aliases.append((name, char))
                # also store the name in the PUA 1
                self.table[pua_index].name = name
                pua_index += 1
            assert pua_index - NAME_ALIASES_START == len(self.aliases)
            self.named_sequences = []
            # store named sequences in the PUA 1, in range U+F0100..,
            # in order to take advantage of the compression and lookup
            # algorithms used for the other characters.
            assert pua_index < NAMED_SEQUENCES_START
            pua_index = NAMED_SEQUENCES_START
            for name, chars in UcdFile(NAMED_SEQUENCES, version):
                chars = tuple(int(char, 16) for char in chars.split())
                # check that the structure defined in makeunicodename is OK
                assert 2 <= len(chars) <= 4, "change the Py_UCS2 array size"
                assert all(c <= 0xFFFF for c in chars), ("use Py_UCS4 in "
                    "the NamedSequence struct and in unicodedata_lookup")
                self.named_sequences.append((name, chars))
                # also store these in the PUA 1
                self.table[pua_index].name = name
                pua_index += 1
            assert pua_index - NAMED_SEQUENCES_START == len(self.named_sequences)
        self.exclusions = {}
        for char, in UcdFile(COMPOSITION_EXCLUSIONS, version):
            char = int(char, 16)
            self.exclusions[char] = 1
        widths = [None] * 0x110000
        for char, (width,) in UcdFile(EASTASIAN_WIDTH, version).expanded():
            widths[char] = width
        for i in range(0, 0x110000):
            if table[i] is not None:
                table[i].east_asian_width = widths[i]
        scripts = [None] * 0x110000
        for char, (script,) in UcdFile(SCRIPTS, version).expanded():
            scripts[char] = script
        for i in range(0, 0x110000):
            if table[i] is not None:
                table[i].script = scripts[i]
        for char, (p,) in UcdFile(DERIVED_CORE_PROPERTIES, version).expanded():
            if table[char]:
                # Some properties (e.g. Default_Ignorable_Code_Point)
                # apply to unassigned code points; ignore them
                table[char].binary_properties.add(p)
        for char_range, value in UcdFile(LINE_BREAK, version):
            for char in expand_range(char_range):
                if not table[char]:
                    continue
                if value in MANDATORY_LINE_BREAKS:
                    table[char].binary_properties.add('Line_Break')
                table[char].line_break = value
        # We only want the quickcheck properties
        # Format: NF?_QC; Y(es)/N(o)/M(aybe)
        # Yes is the default, hence only N and M occur
        # In 3.2.0, the format was different (NF?_NO)
        # The parsing will incorrectly determine these as
        # "yes", however, unicodedata.c will not perform quickchecks
        # for older versions, and no delta records will be created.
        quickchecks = [0] * 0x110000
        qc_order = 'NFD_QC NFKD_QC NFC_QC NFKC_QC'.split()
        for s in UcdFile(DERIVEDNORMALIZATION_PROPS, version):
            if len(s) < 2 or s[1] not in qc_order:
                continue
            quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No
            quickcheck_shift = qc_order.index(s[1])*2
            quickcheck <<= quickcheck_shift
            for char in expand_range(s[0]):
                assert not (quickchecks[char]>>quickcheck_shift)&3
                quickchecks[char] |= quickcheck
        for i in range(0, 0x110000):
            if table[i] is not None:
                table[i].quick_check = quickchecks[i]
        with open_data(UNIHAN, version) as file:
            zip = zipfile.ZipFile(file)
            if version == '3.2.0':
                data = zip.open('Unihan-3.2.0.txt').read()
            else:
                data = zip.open('Unihan_NumericValues.txt').read()
        for line in data.decode("utf-8").splitlines():
            if not line.startswith('U+'):
                continue
            code, tag, value = line.split(None, 3)[:3]
            if tag not in ('kAccountingNumeric', 'kPrimaryNumeric',
                           'kOtherNumeric'):
                continue
            value = value.strip().replace(',', '')
            i = int(code[2:], 16)
            # Patch the numeric field
            if table[i] is not None:
                table[i].numeric_value = value
        sc = self.special_casing = {}
        for data in UcdFile(SPECIAL_CASING, version):
            if data[4]:
                # We ignore all conditionals (since they depend on
                # languages) except for one, which is hardcoded. See
                # handle_capital_sigma in unicodeobject.c.
                continue
            c = int(data[0], 16)
            lower = [int(char, 16) for char in data[1].split()]
            title = [int(char, 16) for char in data[2].split()]
            upper = [int(char, 16) for char in data[3].split()]
            sc[c] = (lower, title, upper)
        cf = self.case_folding = {}
        if version != '3.2.0':
            for data in UcdFile(CASE_FOLDING, version):
                if data[1] in "CF":
                    c = int(data[0], 16)
                    cf[c] = [int(char, 16) for char in data[2].split()]
    def uselatin1(self):
        # restrict character range to ISO Latin 1
        self.chars = list(range(256))
 # hash table tools
 # this is a straight-forward reimplementation of Python's built-in
 # dictionary type, using a static data structure, and a custom string
 # hash algorithm.
 def myhash(s, magic):
    h = 0
    for c in map(ord, s.upper()):
        h = (h * magic) + c
        ix = h & 0xff000000
        if ix:
            h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff
    return h
 SIZES = [
    (4,3), (8,3), (16,3), (32,5), (64,3), (128,3), (256,29), (512,17),
    (1024,9), (2048,5), (4096,83), (8192,27), (16384,43), (32768,3),
    (65536,45), (131072,9), (262144,39), (524288,39), (1048576,9),
    (2097152,5), (4194304,3), (8388608,33), (16777216,27)
 ]
 class Hash:
    def __init__(self, name, data, magic):
        # turn a (key, value) list into a static hash table structure
        # determine table size
        for size, poly in SIZES:
            if size > len(data):
                poly = size + poly
                break
        else:
            raise AssertionError("ran out of polynomials")
        print(size, "slots in hash table")
        table = [None] * size
        mask = size-1
        n = 0
        hash = myhash
        # initialize hash table
        for key, value in data:
            h = hash(key, magic)
            i = (~h) & mask
            v = table[i]
            if v is None:
                table[i] = value
                continue
            incr = (h ^ (h >> 3)) & mask
            if not incr:
                incr = mask
            while 1:
                n = n + 1
                i = (i + incr) & mask
                v = table[i]
                if v is None:
                    table[i] = value
                    break
                incr = incr << 1
                if incr > mask:
                    incr = incr ^ poly
        print(n, "collisions")
        self.collisions = n
        for i in range(len(table)):
            if table[i] is None:
                table[i] = 0
        self.data = Array(name + "_hash", table)
        self.magic = magic
        self.name = name
        self.size = size
        self.poly = poly
    def dump(self, file, trace):
        # write data to file, as a C array
        self.data.dump(file, trace)
        file.write("#define %s_magic %d\n" % (self.name, self.magic))
        file.write("#define %s_size %d\n" % (self.name, self.size))
        file.write("#define %s_poly %d\n" % (self.name, self.poly))
 # stuff to deal with arrays of unsigned integers
 class Array:
    def __init__(self, name, data):
        self.name = name
        self.data = data
    def dump(self, file, trace=0):
        # write data to file, as a C array
        size = getsize(self.data)
        if trace:
            print(self.name+":", size*len(self.data), "bytes", file=sys.stderr)
        file.write("const " + self.name + ": [_]")
        if size == 1:
            file.write("u8")
        elif size == 2:
            file.write("u16")
        else:
            file.write("u32")
        file.write(" = [\n")
        if self.data:
            s = "    "
            for item in self.data:
                i = str(item) + ", "
                if len(s) + len(i) > 78:
                    file.write(s.rstrip() + "\n")
                    s = "    " + i
                else:
                    s = s + i
            if s.strip():
                file.write(s.rstrip() + "\n")
        file.write("];\n\n")
 def getsize(data):
    # return smallest possible integer size for the given array
    maxdata = max(data)
    if maxdata < 256:
        return 1
    elif maxdata < 65536:
        return 2
    else:
        return 4
 def splitbins(t, trace=0):
    """t, trace=0 -> (t1, t2, shift).  Split a table to save space.
    t is a sequence of ints.  This function can be useful to save space if
    many of the ints are the same.  t1 and t2 are lists of ints, and shift
    is an int, chosen to minimize the combined size of t1 and t2 (in C
    code), and where for each i in range(len(t)),
        t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
    where mask is a bitmask isolating the last "shift" bits.
    If optional arg trace is non-zero (default zero), progress info
    is printed to sys.stderr.  The higher the value, the more info
    you'll get.
    """
    if trace:
        def dump(t1, t2, shift, bytes):
            print("%d+%d bins at shift %d; %d bytes" % (
                len(t1), len(t2), shift, bytes), file=sys.stderr)
        print("Size of original table:", len(t)*getsize(t), "bytes",
              file=sys.stderr)
    n = len(t)-1    # last valid index
    maxshift = 0    # the most we can shift n and still have something left
    if n > 0:
        while n >> 1:
            n >>= 1
            maxshift += 1
    del n
    bytes = sys.maxsize  # smallest total size so far
    t = tuple(t)    # so slices can be dict keys
    for shift in range(maxshift + 1):
        t1 = []
        t2 = []
        size = 2**shift
        bincache = {}
        for i in range(0, len(t), size):
            bin = t[i:i+size]
            index = bincache.get(bin)
            if index is None:
                index = len(t2)
                bincache[bin] = index
                t2.extend(bin)
            t1.append(index >> shift)
        # determine memory size
        b = len(t1)*getsize(t1) + len(t2)*getsize(t2)
        if trace > 1:
            dump(t1, t2, shift, b)
        if b < bytes:
            best = t1, t2, shift
            bytes = b
    t1, t2, shift = best
    if trace:
        print("Best:", end=' ', file=sys.stderr)
        dump(t1, t2, shift, bytes)
    if __debug__:
        # exhaustively verify that the decomposition is correct
        mask = ~((~0) << shift) # i.e., low-bit mask of shift bits
        for i in range(len(t)):
            assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
    return best
 if __name__ == "__main__":
    maketables(1)
--- a/vendor/hare-unicode/unicode/linebreak.ha
+++ b/vendor/hare-unicode/unicode/linebreak.ha
@ -0,0 +1,196 @@
 use encoding::utf8;
 use strings;
 export type line_breaker = struct {
 	input: str,
 	iter: strings::iterator,
 	// Current position
 	pos: size,
 	// Current position, bytes
 	bpos: size,
 	// Current line break class
 	cur: line_break,
 	// Next line break class
 	next: line_break,
 	// State for specific rules
 	lb8a: bool,
 	lb21a: bool,
 	lb30a: uint,
 };
 // Creates a new line breaking algorithm state machine. See [[next_line_break]]
 // to enumerate the line break opportunities in the input string.
 export fn new_line_breaker(input: str) line_breaker = {
 	return line_breaker {
 		input = input,
 		...
 	};
 };
 // Returns the next line break opportunity as a tuple of the rune-wise index,
 // byte-wise index, and a boolean indicating whether or not the break is
 // mandatory at this location. The line break opportunity directly precedes the
 // index returned from this function.
 //
 // 	Hello world!
 // 	      ^ Line break opportunity at index 6
 export fn next_line_break(lb: *line_breaker) ((size, size, bool) | done) = {
 	if (lb.pos == 0) {
 		if (len(lb.input) == 0) {
 			return done; // special case
 		};
 		lb.iter = strings::iter(lb.input);
 		const (class, rn) = next_lb1_class(lb) as (line_break, rune);
 		class = resolve_lb2_class(class);
 		lb.cur = class;
 		lb.next = class;
 		lb.lb8a = class == line_break::ZWJ;
 	};
 	for (const (next, rn) => next_lb1_class(lb)) {
 		const prev = lb.next;
 		lb.next = next;
 		const rnsz = utf8::runesz(rn);
 		defer {
 			lb.pos += 1;
 			lb.bpos += rnsz;
 		};
 		const mandatory = lb.cur == line_break::BK
 			|| (lb.cur == line_break::CR
 				&& lb.next != line_break::LF);
 		if (mandatory) {
 			lb.cur = resolve_lb2_class(next);
 			return (lb.pos + 1, lb.bpos + rnsz, true);
 		};
 		lb.lb8a = next == line_break::ZWJ;
 		let can_break = lb_simple_case(lb);
 		match (can_break) {
 		case bool => void;
 		case void =>
 			can_break = lb_complex_case(lb, prev);
 		};
 		assert(can_break is bool);
 		const can_break = can_break as bool;
 		if (can_break) {
 			return (lb.pos + 1, lb.bpos + rnsz, false);
 		};
 	};
 	return done;
 };
 // Applies LB1 suggested rules for resolving context-dependent classes.
 fn next_lb1_class(lb: *line_breaker) ((line_break, rune) | done) = {
 	const rn = match (strings::next(&lb.iter)) {
 	case let rn: rune =>
 		yield rn;
 	case done =>
 		return done;
 	};
 	const class = rune_line_break(rn);
 	switch (class) {
 	case line_break::AI, line_break::SG, line_break::XX =>
 		return (line_break::AL, rn);
 	case line_break::SA =>
 		switch (rune_gc(rn)) {
 		case gc::Mn, gc::Mc =>
 			return (line_break::CM, rn);
 		case =>
 			return (line_break::AL, rn);
 		};
 	case line_break::CJ =>
 		return (line_break::NS, rn);
 	case =>
 		return (class, rn);
 	};
 };
 // Applies LB2 suggested rules for resolving the start-of-text line-break class.
 fn resolve_lb2_class(lb: line_break) line_break = {
 	switch (lb) {
 	case line_break::LF, line_break::NL =>
 		return line_break::BK;
 	case line_break::SP =>
 		return line_break::WJ;
 	case =>
 		return lb;
 	};
 };
 // If this is a simple case, return whether or not this is a break opportunity
 // as a boolean. Returns void for special cases.
 fn lb_simple_case(lb: *line_breaker) (bool | void) = {
 	switch (lb.next) {
 	case line_break::SP =>
 		return false;
 	case line_break::BK, line_break::LF, line_break::NL =>
 		lb.cur = line_break::BK;
 		return false;
 	case line_break::CR =>
 		lb.cur = line_break::CR;
 		return false;
 	case =>
 		return;
 	};
 };
 // Handles more complex rules, including pair table lookups via
 // linebreak_table.ha.
 fn lb_complex_case(lb: *line_breaker, prev: line_break) bool = {
 	let can_break = false;
 	const ucur = lb.cur: uint - line_break::OP: uint;
 	const unext = lb.next: uint - line_break::OP: uint;
 	if (ucur < len(lb_pairs) && unext < len(lb_pairs[0])) {
 		switch (lb_pairs[ucur][unext]) {
 		case bo::DI => // Direct break
 			can_break = true;
 		case bo::IN => // Indirect break opportunity
 			can_break = prev == line_break::SP;
 		case bo::CI => // Indirect opportunity for combining marks
 			can_break = prev == line_break::SP;
 			if (!can_break) {
 				return false;
 			};
 		case bo::CP => // Prohibited for combining marks
 			if (prev != line_break::SP) {
 				return false;
 			};
 		case bo::PR => void;
 		};
 	};
 	// Rule LB8a
 	if (lb.lb8a) {
 		can_break = false;
 	};
 	// Rule LB21a
 	if (lb.lb21a && (lb.cur == line_break::HY || lb.cur == line_break::BA)) {
 		can_break = false;
 		lb.lb21a = false;
 	} else {
 		lb.lb21a = lb.cur == line_break::HL;
 	};
 	// Rule LB30a
 	if (lb.cur == line_break::RI) {
 		lb.lb30a += 1;
 		if (lb.lb30a == 2 && lb.next == line_break::RI) {
 			can_break = true;
 			lb.lb30a = 0;
 		};
 	} else {
 		lb.lb30a = 0;
 	};
 	lb.cur = lb.next;
 	return can_break;
 };
--- a/vendor/hare-unicode/unicode/linebreak_table.ha
+++ b/vendor/hare-unicode/unicode/linebreak_table.ha
@ -0,0 +1,63 @@
 // Break opportunity
 type bo = enum {
 	// Direct opportunity
 	DI,
 	// Indirect opportunity
 	IN,
 	// Indirect opportunity for combining marks
 	CI,
 	// Prohibited break for combining marks
 	CP,
 	// Prohibited break
 	PR,
 };
 // Based on JavaScript implementation here:
 //
 // https://github.com/foliojs/linebreak/blob/master/src/pairs.js
 //
 // This is itself based on the example pair table from Unicode, which was last
 // published in revision 37 of the line break algorithm, and has since been
 // touched up by the JavaScript maintainers to incorporate later changes to the
 // algorithm.
 //
 // - ZWJ special processing for LB8a of Revision 41
 // - CB manually added as per Rule LB20
 // - CL, CP, NS, SY, IS, PR, PO, HY, BA, B2 and RI manually adjusted as per LB22 of Revision 45
 const lb_pairs = [
 	//OP   , CL    , CP    , QU    , GL    , NS    , EX    , SY    , IS    , PR    , PO    , NU    , AL    , HL    , ID    , IN    , HY    , BA    , BB    , B2    , ZW    , CM    , WJ    , H2    , H3    , JL    , JV    , JT    , RI    , EB    , EM    , ZWJ   , CB
 	[bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::CP, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR], // OP
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // CL
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // CP
 	[bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::PR, bo::CI, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN], // QU
 	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::PR, bo::CI, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN], // GL
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // NS
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // EX
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::IN, bo::DI, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // SY
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // IS
 	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI], // PR
 	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // PO
 	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // NU
 	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // AL
 	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // HL
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // ID
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // IN
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::DI, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // HY
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::DI, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // BA
 	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::PR, bo::CI, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI], // BB
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::PR, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // B2
 	[bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI], // ZW
 	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // CM
 	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::PR, bo::CI, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN], // WJ
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // H2
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // H3
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // JL
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // JV
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // JT
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI, bo::DI, bo::IN, bo::DI], // RI
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::DI], // EB
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // EM
 	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // ZWJ
 	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::DI, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI]  // CB
 ];
--- a/vendor/hare-unicode/unicode/ucd.ha
+++ b/vendor/hare-unicode/unicode/ucd.ha
@ -0,0 +1,654 @@
 type ucd_encodedrec = (u8, u8, u8, u8, u8, u16, u8);
 type ucd_record = struct {
 	category: u8,
 	combining: u8,
 	bidirectional: u8,
 	mirrored: u8,
 	east_asian_width: u8,
 	script: u16,
 	line_break: u8,
 };
 fn get_ucdrecord(rn: rune) *ucd_record = {
 	const code = rn: u32;
 	let index = 0u16;
 	if (code < 0x110000) {
 		index = index1[(code>>UCD_RECORD_SHIFT)];
 		index = index2[(index<<UCD_RECORD_SHIFT)+(code&((1<<UCD_RECORD_SHIFT)-1))];
 	};
 	return &ucd_records[index]: *ucd_record;
 };
 // Unicode character General_Category attribute
 export type gc = enum u8 {
 	Cc,	// Control
 	Cf,	// Format
 	Cn,	// Unassigned
 	Co,	// Private use
 	Cs,	// Surrogate
 	Ll,	// Lowercase letter
 	Lm,	// Modifier letter
 	Lo,	// Other letter
 	Lt,	// Titlecase letter
 	Lu,	// Uppercase letter
 	Mc,	// Spacing mark
 	Me,	// Enclosing mark
 	Mn,	// Non-spacing mark
 	Nd,	// Decimal number
 	Nl,	// Letter number
 	No,	// Other number
 	Pc,	// Connect punctuation
 	Pd,	// Dash punctuation
 	Pe,	// Close punctuation
 	Pf,	// Final punctuation
 	Pi,	// Initial punctuation
 	Po,	// Other punctuation
 	Ps,	// Open punctuation
 	Sc,	// Currency symbol
 	Sk,	// Modifier symbol
 	Sm,	// Math symbol
 	So,	// Other symbol
 	Zl,	// Line separator
 	Zp,	// Paragraph separator
 	Zs,	// Space separator
 };
 // Returns the [[general_category]] corresponding to this rune.
 export fn rune_gc(rn: rune) gc = {
 	return get_ucdrecord(rn).category: gc;
 };
 // Returns the name associated with a [[gc]] value.
 export fn gc_name(v: gc) const str = {
 	switch (v) {
 	case gc::Cc => return "Control";
 	case gc::Cf => return "Format";
 	case gc::Cn => return "Unassigned";
 	case gc::Co => return "Private use";
 	case gc::Cs => return "Surrogate";
 	case gc::Ll => return "Lowercase letter";
 	case gc::Lm => return "Modifier letter";
 	case gc::Lo => return "Other letter";
 	case gc::Lt => return "Titlecase letter";
 	case gc::Lu => return "Uppercase letter";
 	case gc::Mc => return "Spacing mark";
 	case gc::Me => return "Enclosing mark";
 	case gc::Mn => return "Non-spacing mark";
 	case gc::Nd => return "Decimal number";
 	case gc::Nl => return "Letter number";
 	case gc::No => return "Other number";
 	case gc::Pc => return "Connect punctuation";
 	case gc::Pd => return "Dash punctuation";
 	case gc::Pe => return "Close punctuation";
 	case gc::Pf => return "Final punctuation";
 	case gc::Pi => return "Initial punctuation";
 	case gc::Po => return "Other punctuation";
 	case gc::Ps => return "Open punctuation";
 	case gc::Sc => return "Currency symbol";
 	case gc::Sk => return "Modifier symbol";
 	case gc::Sm => return "Math symbol";
 	case gc::So => return "Other symbol";
 	case gc::Zl => return "Line separator";
 	case gc::Zp => return "Paragraph separator";
 	case gc::Zs => return "Space separator";
 	};
 };
 // Returns the two-character code associated with a [[gc]] value.
 export fn gc_code(v: gc) const str = {
 	switch (v) {
 	case gc::Cc => return "Cc";
 	case gc::Cf => return "Cf";
 	case gc::Cn => return "Cn";
 	case gc::Co => return "Co";
 	case gc::Cs => return "Cs";
 	case gc::Ll => return "Ll";
 	case gc::Lm => return "Lm";
 	case gc::Lo => return "Lo";
 	case gc::Lt => return "Lt";
 	case gc::Lu => return "Lu";
 	case gc::Mc => return "Mc";
 	case gc::Me => return "Me";
 	case gc::Mn => return "Mn";
 	case gc::Nd => return "Nd";
 	case gc::Nl => return "Nl";
 	case gc::No => return "No";
 	case gc::Pc => return "Pc";
 	case gc::Pd => return "Pd";
 	case gc::Pe => return "Pe";
 	case gc::Pf => return "Pf";
 	case gc::Pi => return "Pi";
 	case gc::Po => return "Po";
 	case gc::Ps => return "Ps";
 	case gc::Sc => return "Sc";
 	case gc::Sk => return "Sk";
 	case gc::Sm => return "Sm";
 	case gc::So => return "So";
 	case gc::Zl => return "Zl";
 	case gc::Zp => return "Zp";
 	case gc::Zs => return "Zs";
 	};
 };
 // Bidirectional classification.
 export type bidi = enum u8 {
 	UNKNOWN,
 	L,
 	LRE,
 	LRO,
 	R,
 	AL,
 	RLE,
 	RLO,
 	PDF,
 	EN,
 	ES,
 	ET,
 	AN,
 	CS,
 	NSM,
 	BN,
 	B,
 	S,
 	WS,
 	ON,
 	LRI,
 	RLI,
 	FSI,
 	PDI,
 };
 // Returns the [[bidi]] classification corresponding to this rune.
 export fn rune_bidi(rn: rune) bidi = {
 	return get_ucdrecord(rn).bidirectional: bidi;
 };
 // Unicode character Script attribute.
 export type script = enum u16 {
 	COMMON,			// Zyyy
 	INHERITED,		// Zinh
 	UNKNOWN,		// Zzzz
 	ADLAM,			// Adlm
 	CAUCASIAN_ALBANIAN,	// Aghb
 	AHOM,			// Ahom
 	ARABIC,			// Arab
 	IMPERIAL_ARAMAIC,	// Armi
 	ARMENIAN,		// Armn
 	AVESTAN,		// Avst
 	BALINESE,		// Bali
 	BAMUM,			// Bamu
 	BASSA_VAH,		// Bass
 	BATAK,			// Batk
 	BENGALI,		// Beng
 	BHAIKSUKI,		// Bhks
 	BOPOMOFO,		// Bopo
 	BRAHMI,			// Brah
 	BRAILLE,		// Brai
 	BUGINESE,		// Bugi
 	BUHID,			// Buhd
 	CHAKMA,			// Cakm
 	CANADIAN_SYLLABICS,	// Cans
 	CARIAN,			// Cari
 	CHAM,			// Cham
 	CHEROKEE,		// Cher
 	CHORASMIAN,		// Chrs
 	COPTIC,			// Copt
 	CYPRO_MINOAN,		// Cpmn
 	CYPRIOT,		// Cprt
 	CYRILLIC,		// Cyrl
 	DEVANAGARI,		// Deva
 	DIVES_AKURU,		// Diak
 	DOGRA,			// Dogr
 	DESERET,		// Dsrt
 	DUPLOYAN,		// Dupl
 	EGYPTIAN_HIEROGLYPHS,	// Egyp
 	ELBASAN,		// Elba
 	ELYMAIC,		// Elym
 	ETHIOPIC,		// Ethi
 	GEORGIAN,		// Geor
 	GLAGOLITIC,		// Glag
 	GUNJALA_GONDI,		// Gong
 	MASARAM_GONDI,		// Gonm
 	GOTHIC,			// Goth
 	GRANTHA,		// Gran
 	GREEK,			// Grek
 	GUJARATI,		// Gujr
 	GURMUKHI,		// Guru
 	HANGUL,			// Hang
 	HAN,			// Hani
 	HANUNOO,		// Hano
 	HATRAN,			// Hatr
 	HEBREW,			// Hebr
 	HIRAGANA,		// Hira
 	ANATOLIAN_HIEROGLYPHS,	// Hluw
 	PAHAWH_HMONG,		// Hmng
 	NYIAKENG_PUACHUE_HMONG,	// Hmnp
 	OLD_HUNGARIAN,		// Hung
 	OLD_ITALIC,		// Ital
 	JAVANESE,		// Java
 	KAYAH_LI,		// Kali
 	KATAKANA,		// Kana
 	KAWI,			// Kawi
 	KHAROSHTHI,		// Khar
 	KHMER,			// Khmr
 	KHOJKI,			// Khoj
 	KHITAN_SMALL_SCRIPT,	// Kits
 	KANNADA,		// Knda
 	KAITHI,			// Kthi
 	TAI_THAM,		// Lana
 	LAO,			// Laoo
 	LATIN,			// Latn
 	LEPCHA,			// Lepc
 	LIMBU,			// Limb
 	LINEAR_A,		// Lina
 	LINEAR_B,		// Linb
 	LISU,			// Lisu
 	LYCIAN,			// Lyci
 	LYDIAN,			// Lydi
 	MAHAJANI,		// Mahj
 	MAKASAR,		// Maka
 	MANDAIC,		// Mand
 	MANICHAEAN,		// Mani
 	MARCHEN,		// Marc
 	MEDEFAIDRIN,		// Medf
 	MENDE_KIKAKUI,		// Mend
 	MEROITIC_CURSIVE,	// Merc
 	MEROITIC_HIEROGLYPHS,	// Mero
 	MALAYALAM,		// Mlym
 	MODI,			// Modi
 	MONGOLIAN,		// Mong
 	MRO,			// Mroo
 	MEETEI_MAYEK,		// Mtei
 	MULTANI,		// Mult
 	MYANMAR,		// Mymr
 	NAG_MUNDARI,		// Nagm
 	NANDINAGARI,		// Nand
 	OLD_NORTH_ARABIAN,	// Narb
 	NABATAEAN,		// Nbat
 	NEWA,			// Newa
 	NKO,			// Nkoo
 	NUSHU,			// Nshu
 	OGHAM,			// Ogam
 	OL_CHIKI,		// Olck
 	OLD_TURKIC,		// Orkh
 	ORIYA,			// Orya
 	OSAGE,			// Osge
 	OSMANYA,		// Osma
 	OLD_UYGHUR,		// Ougr
 	PALMYRENE,		// Palm
 	PAU_CIN_HAU,		// Pauc
 	OLD_PERMIC,		// Perm
 	PHAGS_PA,		// Phag
 	INSCRIPTIONAL_PAHLAVI,	// Phli
 	PSALTER_PAHLAVI,	// Phlp
 	PHOENICIAN,		// Phnx
 	MIAO,			// Plrd
 	INSCRIPTIONAL_PARTHIAN,	// Prti
 	REJANG,			// Rjng
 	HANIFI_ROHINGYA,	// Rohg
 	RUNIC,			// Runr
 	SAMARITAN,		// Samr
 	OLD_SOUTH_ARABIAN,	// Sarb
 	SAURASHTRA,		// Saur
 	SIGNWRITING,		// Sgnw
 	SHAVIAN,		// Shaw
 	SHARADA,		// Shrd
 	SIDDHAM,		// Sidd
 	KHUDAWADI,		// Sind
 	SINHALA,		// Sinh
 	SOGDIAN,		// Sogd
 	OLD_SOGDIAN,		// Sogo
 	SORA_SOMPENG,		// Sora
 	SOYOMBO,		// Soyo
 	SUNDANESE,		// Sund
 	SYLOTI_NAGRI,		// Sylo
 	SYRIAC,			// Syrc
 	TAGBANWA,		// Tagb
 	TAKRI,			// Takr
 	TAI_LE,			// Tale
 	NEW_TAI_LUE,		// Talu
 	TAMIL,			// Taml
 	TANGUT,			// Tang
 	TAI_VIET,		// Tavt
 	TELUGU,			// Telu
 	TIFINAGH,		// Tfng
 	TAGALOG,		// Tglg
 	THAANA,			// Thaa
 	THAI,			// Thai
 	TIBETAN,		// Tibt
 	TIRHUTA,		// Tirh
 	TANGSA,			// Tnsa
 	TOTO,			// Toto
 	UGARITIC,		// Ugar
 	VAI,			// Vaii
 	VITHKUQI,		// Vith
 	WARANG_CITI,		// Wara
 	WANCHO,			// Wcho
 	OLD_PERSIAN,		// Xpeo
 	CUNEIFORM,		// Xsux
 	YEZIDI,			// Yezi
 	YI,			// Yiii
 	ZANABAZAR_SQUARE,	// Zanb
 	MATH,			// Zmth
 };
 // Returns the [[script]] corresponding to this rune.
 export fn rune_script(rn: rune) script = {
 	return get_ucdrecord(rn).script: script;
 };
 // Returns the four-character code associated with a [[script]] value.
 export fn script_code(sc: script) const str = {
 	switch (sc) {
 	case script::COMMON => return "Zyyy";
 	case script::INHERITED => return "Zinh";
 	case script::UNKNOWN => return "Zzzz";
 	case script::ARABIC => return "Arab";
 	case script::ARMENIAN => return "Armn";
 	case script::BENGALI => return "Beng";
 	case script::CYRILLIC => return "Cyrl";
 	case script::DEVANAGARI => return "Deva";
 	case script::GEORGIAN => return "Geor";
 	case script::GREEK => return "Grek";
 	case script::GUJARATI => return "Gujr";
 	case script::GURMUKHI => return "Guru";
 	case script::HANGUL => return "Hang";
 	case script::HAN => return "Hani";
 	case script::HEBREW => return "Hebr";
 	case script::HIRAGANA => return "Hira";
 	case script::KANNADA => return "Knda";
 	case script::KATAKANA => return "Kana";
 	case script::LAO => return "Laoo";
 	case script::LATIN => return "Latn";
 	case script::MALAYALAM => return "Mlym";
 	case script::ORIYA => return "Orya";
 	case script::TAMIL => return "Taml";
 	case script::TELUGU => return "Telu";
 	case script::THAI => return "Thai";
 	case script::TIBETAN => return "Tibt";
 	case script::BOPOMOFO => return "Bopo";
 	case script::BRAILLE => return "Brai";
 	case script::CANADIAN_SYLLABICS => return "Cans";
 	case script::CHEROKEE => return "Cher";
 	case script::ETHIOPIC => return "Ethi";
 	case script::KHMER => return "Khmr";
 	case script::MONGOLIAN => return "Mong";
 	case script::MYANMAR => return "Mymr";
 	case script::OGHAM => return "Ogam";
 	case script::RUNIC => return "Runr";
 	case script::SINHALA => return "Sinh";
 	case script::SYRIAC => return "Syrc";
 	case script::THAANA => return "Thaa";
 	case script::YI => return "Yiii";
 	case script::DESERET => return "Dsrt";
 	case script::GOTHIC => return "Goth";
 	case script::OLD_ITALIC => return "Ital";
 	case script::BUHID => return "Buhd";
 	case script::HANUNOO => return "Hano";
 	case script::TAGALOG => return "Tglg";
 	case script::TAGBANWA => return "Tagb";
 	case script::CYPRIOT => return "Cprt";
 	case script::LIMBU => return "Limb";
 	case script::LINEAR_B => return "Linb";
 	case script::OSMANYA => return "Osma";
 	case script::SHAVIAN => return "Shaw";
 	case script::TAI_LE => return "Tale";
 	case script::UGARITIC => return "Ugar";
 	case script::BUGINESE => return "Bugi";
 	case script::COPTIC => return "Copt";
 	case script::GLAGOLITIC => return "Glag";
 	case script::KHAROSHTHI => return "Khar";
 	case script::NEW_TAI_LUE => return "Talu";
 	case script::OLD_PERSIAN => return "Xpeo";
 	case script::SYLOTI_NAGRI => return "Sylo";
 	case script::TIFINAGH => return "Tfng";
 	case script::BALINESE => return "Bali";
 	case script::CUNEIFORM => return "Xsux";
 	case script::NKO => return "Nkoo";
 	case script::PHAGS_PA => return "Phag";
 	case script::PHOENICIAN => return "Phnx";
 	case script::CARIAN => return "Cari";
 	case script::CHAM => return "Cham";
 	case script::KAYAH_LI => return "Kali";
 	case script::LEPCHA => return "Lepc";
 	case script::LYCIAN => return "Lyci";
 	case script::LYDIAN => return "Lydi";
 	case script::OL_CHIKI => return "Olck";
 	case script::REJANG => return "Rjng";
 	case script::SAURASHTRA => return "Saur";
 	case script::SUNDANESE => return "Sund";
 	case script::VAI => return "Vaii";
 	case script::AVESTAN => return "Avst";
 	case script::BAMUM => return "Bamu";
 	case script::EGYPTIAN_HIEROGLYPHS => return "Egyp";
 	case script::IMPERIAL_ARAMAIC => return "Armi";
 	case script::INSCRIPTIONAL_PAHLAVI => return "Phli";
 	case script::INSCRIPTIONAL_PARTHIAN => return "Prti";
 	case script::JAVANESE => return "Java";
 	case script::KAITHI => return "Kthi";
 	case script::LISU => return "Lisu";
 	case script::MEETEI_MAYEK => return "Mtei";
 	case script::OLD_SOUTH_ARABIAN => return "Sarb";
 	case script::OLD_TURKIC => return "Orkh";
 	case script::SAMARITAN => return "Samr";
 	case script::TAI_THAM => return "Lana";
 	case script::TAI_VIET => return "Tavt";
 	case script::BATAK => return "Batk";
 	case script::BRAHMI => return "Brah";
 	case script::MANDAIC => return "Mand";
 	case script::CHAKMA => return "Cakm";
 	case script::MEROITIC_CURSIVE => return "Merc";
 	case script::MEROITIC_HIEROGLYPHS => return "Mero";
 	case script::MIAO => return "Plrd";
 	case script::SHARADA => return "Shrd";
 	case script::SORA_SOMPENG => return "Sora";
 	case script::TAKRI => return "Takr";
 	case script::BASSA_VAH => return "Bass";
 	case script::CAUCASIAN_ALBANIAN => return "Aghb";
 	case script::DUPLOYAN => return "Dupl";
 	case script::ELBASAN => return "Elba";
 	case script::GRANTHA => return "Gran";
 	case script::KHOJKI => return "Khoj";
 	case script::KHUDAWADI => return "Sind";
 	case script::LINEAR_A => return "Lina";
 	case script::MAHAJANI => return "Mahj";
 	case script::MANICHAEAN => return "Mani";
 	case script::MENDE_KIKAKUI => return "Mend";
 	case script::MODI => return "Modi";
 	case script::MRO => return "Mroo";
 	case script::NABATAEAN => return "Nbat";
 	case script::OLD_NORTH_ARABIAN => return "Narb";
 	case script::OLD_PERMIC => return "Perm";
 	case script::PAHAWH_HMONG => return "Hmng";
 	case script::PALMYRENE => return "Palm";
 	case script::PAU_CIN_HAU => return "Pauc";
 	case script::PSALTER_PAHLAVI => return "Phlp";
 	case script::SIDDHAM => return "Sidd";
 	case script::TIRHUTA => return "Tirh";
 	case script::WARANG_CITI => return "Wara";
 	case script::AHOM => return "Ahom";
 	case script::ANATOLIAN_HIEROGLYPHS => return "Hluw";
 	case script::HATRAN => return "Hatr";
 	case script::MULTANI => return "Mult";
 	case script::OLD_HUNGARIAN => return "Hung";
 	case script::SIGNWRITING => return "Sgnw";
 	case script::ADLAM => return "Adlm";
 	case script::BHAIKSUKI => return "Bhks";
 	case script::MARCHEN => return "Marc";
 	case script::OSAGE => return "Osge";
 	case script::TANGUT => return "Tang";
 	case script::NEWA => return "Newa";
 	case script::MASARAM_GONDI => return "Gonm";
 	case script::NUSHU => return "Nshu";
 	case script::SOYOMBO => return "Soyo";
 	case script::ZANABAZAR_SQUARE => return "Zanb";
 	case script::DOGRA => return "Dogr";
 	case script::GUNJALA_GONDI => return "Gong";
 	case script::HANIFI_ROHINGYA => return "Rohg";
 	case script::MAKASAR => return "Maka";
 	case script::MEDEFAIDRIN => return "Medf";
 	case script::OLD_SOGDIAN => return "Sogo";
 	case script::SOGDIAN => return "Sogd";
 	case script::ELYMAIC => return "Elym";
 	case script::NANDINAGARI => return "Nand";
 	case script::NYIAKENG_PUACHUE_HMONG => return "Hmnp";
 	case script::WANCHO => return "Wcho";
 	case script::CHORASMIAN => return "Chrs";
 	case script::DIVES_AKURU => return "Diak";
 	case script::KHITAN_SMALL_SCRIPT => return "Kits";
 	case script::YEZIDI => return "Yezi";
 	case script::CYPRO_MINOAN => return "Cpmn";
 	case script::OLD_UYGHUR => return "Ougr";
 	case script::TANGSA => return "Tnsa";
 	case script::TOTO => return "Toto";
 	case script::VITHKUQI => return "Vith";
 	case script::MATH => return "Zmth";
 	case script::KAWI => return "Kawi";
 	case script::NAG_MUNDARI => return "Nagm";
 	};
 };
 // Line break classification.
 export type line_break = enum u8 {
 	XX,
 	AI,
 	BK,
 	CJ,
 	CR,
 	LF,
 	NL,
 	SA,
 	SG,
 	SP,
 	OP,
 	CL,
 	CP,
 	QU,
 	GL,
 	NS,
 	EX,
 	SY,
 	IS,
 	PR,
 	PO,
 	NU,
 	AL,
 	HL,
 	ID,
 	IN,
 	HY,
 	BA,
 	BB,
 	B2,
 	ZW,
 	CM,
 	WJ,
 	H2,
 	H3,
 	JL,
 	JV,
 	JT,
 	RI,
 	EB,
 	EM,
 	ZWJ,
 	CB,
 };
 // Returns the [[line_break]] classification corresponding to this rune.
 export fn rune_line_break(rn: rune) line_break = {
 	return get_ucdrecord(rn).line_break: line_break;
 };
 // Returns the two-character code associated with a [[line_break]] value.
 export fn line_break_code(lb: line_break) const str = {
 	switch (lb) {
 	case line_break::XX =>
 		return "XX";
 	case line_break::AI =>
 		return "AI";
 	case line_break::AL =>
 		return "AL";
 	case line_break::B2 =>
 		return "B2";
 	case line_break::BA =>
 		return "BA";
 	case line_break::BB =>
 		return "BB";
 	case line_break::BK =>
 		return "BK";
 	case line_break::CB =>
 		return "CB";
 	case line_break::CJ =>
 		return "CJ";
 	case line_break::CL =>
 		return "CL";
 	case line_break::CM =>
 		return "CM";
 	case line_break::CP =>
 		return "CP";
 	case line_break::CR =>
 		return "CR";
 	case line_break::EB =>
 		return "EB";
 	case line_break::EM =>
 		return "EM";
 	case line_break::EX =>
 		return "EX";
 	case line_break::GL =>
 		return "GL";
 	case line_break::H2 =>
 		return "H2";
 	case line_break::H3 =>
 		return "H3";
 	case line_break::HL =>
 		return "HL";
 	case line_break::HY =>
 		return "HY";
 	case line_break::ID =>
 		return "ID";
 	case line_break::IN =>
 		return "IN";
 	case line_break::IS =>
 		return "IS";
 	case line_break::JL =>
 		return "JL";
 	case line_break::JT =>
 		return "JT";
 	case line_break::JV =>
 		return "JV";
 	case line_break::LF =>
 		return "LF";
 	case line_break::NL =>
 		return "NL";
 	case line_break::NS =>
 		return "NS";
 	case line_break::NU =>
 		return "NU";
 	case line_break::OP =>
 		return "OP";
 	case line_break::PO =>
 		return "PO";
 	case line_break::PR =>
 		return "PR";
 	case line_break::QU =>
 		return "QU";
 	case line_break::RI =>
 		return "RI";
 	case line_break::SA =>
 		return "SA";
 	case line_break::SG =>
 		return "SG";
 	case line_break::SP =>
 		return "SP";
 	case line_break::SY =>
 		return "SY";
 	case line_break::WJ =>
 		return "WJ";
 	case line_break::ZW =>
 		return "ZW";
 	case line_break::ZWJ =>
 		return "ZWJ";
 	};
 };
--- a/vendor/hare-unicode/unicode/ucd_gen.ha
+++ b/vendor/hare-unicode/unicode/ucd_gen.ha