Add 'vendor/hare-unicode/' from commit '1488c26f46f7f8568235eaee6224983ac46e78ff'

git-subtree-dir: vendor/hare-unicode git-subtree-mainline: 57979aa6fc git-subtree-split: 1488c26f46
2024-12-04 13:29:21 -03:00 · 2024-12-04 13:29:21 -03:00 · c70ec9f648
commit c70ec9f648
parent 57979aa6fc 1488c26f46
9 changed files with 6329 additions and 0 deletions
--- a/vendor/hare-unicode/.gitignore
+++ b/vendor/hare-unicode/.gitignore
@ -0,0 +1,2 @@
+*.zip
+/.data
--- a/vendor/hare-unicode/COPYING
+++ b/vendor/hare-unicode/COPYING
@ -0,0 +1,367 @@
+Mozilla Public License Version 2.0
+==================================
+
+1. Definitions
+--------------
+
+1.1. "Contributor"
+    means each individual or legal entity that creates, contributes to
+    the creation of, or owns Covered Software.
+
+1.2. "Contributor Version"
+    means the combination of the Contributions of others (if any) used
+    by a Contributor and that particular Contributor's Contribution.
+
+1.3. "Contribution"
+    means Covered Software of a particular Contributor.
+
+1.4. "Covered Software"
+    means Source Code Form to which the initial Contributor has attached
+    the notice in Exhibit A, the Executable Form of such Source Code
+    Form, and Modifications of such Source Code Form, in each case
+    including portions thereof.
+
+1.5. "Incompatible With Secondary Licenses"
+    means
+
+    (a) that the initial Contributor has attached the notice described
+        in Exhibit B to the Covered Software; or
+
+    (b) that the Covered Software was made available under the terms of
+        version 1.1 or earlier of the License, but not also under the
+        terms of a Secondary License.
+
+1.6. "Executable Form"
+    means any form of the work other than Source Code Form.
+
+1.7. "Larger Work"
+    means a work that combines Covered Software with other material, in 
+    a separate file or files, that is not Covered Software.
+
+1.8. "License"
+    means this document.
+
+1.9. "Licensable"
+    means having the right to grant, to the maximum extent possible,
+    whether at the time of the initial grant or subsequently, any and
+    all of the rights conveyed by this License.
+
+1.10. "Modifications"
+    means any of the following:
+
+    (a) any file in Source Code Form that results from an addition to,
+        deletion from, or modification of the contents of Covered
+        Software; or
+
+    (b) any new file in Source Code Form that contains any Covered
+        Software.
+
+1.11. "Patent Claims" of a Contributor
+    means any patent claim(s), including without limitation, method,
+    process, and apparatus claims, in any patent Licensable by such
+    Contributor that would be infringed, but for the grant of the
+    License, by the making, using, selling, offering for sale, having
+    made, import, or transfer of either its Contributions or its
+    Contributor Version.
+
+1.12. "Secondary License"
+    means either the GNU General Public License, Version 2.0, the GNU
+    Lesser General Public License, Version 2.1, the GNU Affero General
+    Public License, Version 3.0, or any later versions of those
+    licenses.
+
+1.13. "Source Code Form"
+    means the form of the work preferred for making modifications.
+
+1.14. "You" (or "Your")
+    means an individual or a legal entity exercising rights under this
+    License. For legal entities, "You" includes any entity that
+    controls, is controlled by, or is under common control with You. For
+    purposes of this definition, "control" means (a) the power, direct
+    or indirect, to cause the direction or management of such entity,
+    whether by contract or otherwise, or (b) ownership of more than
+    fifty percent (50%) of the outstanding shares or beneficial
+    ownership of such entity.
+
+2. License Grants and Conditions
+--------------------------------
+
+2.1. Grants
+
+Each Contributor hereby grants You a world-wide, royalty-free,
+non-exclusive license:
+
+(a) under intellectual property rights (other than patent or trademark)
+    Licensable by such Contributor to use, reproduce, make available,
+    modify, display, perform, distribute, and otherwise exploit its
+    Contributions, either on an unmodified basis, with Modifications, or
+    as part of a Larger Work; and
+
+(b) under Patent Claims of such Contributor to make, use, sell, offer
+    for sale, have made, import, and otherwise transfer either its
+    Contributions or its Contributor Version.
+
+2.2. Effective Date
+
+The licenses granted in Section 2.1 with respect to any Contribution
+become effective for each Contribution on the date the Contributor first
+distributes such Contribution.
+
+2.3. Limitations on Grant Scope
+
+The licenses granted in this Section 2 are the only rights granted under
+this License. No additional rights or licenses will be implied from the
+distribution or licensing of Covered Software under this License.
+Notwithstanding Section 2.1(b) above, no patent license is granted by a
+Contributor:
+
+(a) for any code that a Contributor has removed from Covered Software;
+    or
+
+(b) for infringements caused by: (i) Your and any other third party's
+    modifications of Covered Software, or (ii) the combination of its
+    Contributions with other software (except as part of its Contributor
+    Version); or
+
+(c) under Patent Claims infringed by Covered Software in the absence of
+    its Contributions.
+
+This License does not grant any rights in the trademarks, service marks,
+or logos of any Contributor (except as may be necessary to comply with
+the notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+No Contributor makes additional grants as a result of Your choice to
+distribute the Covered Software under a subsequent version of this
+License (see Section 10.2) or under the terms of a Secondary License (if
+permitted under the terms of Section 3.3).
+
+2.5. Representation
+
+Each Contributor represents that the Contributor believes its
+Contributions are its original creation(s) or it has sufficient rights
+to grant the rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+This License is not intended to limit any rights You have under
+applicable copyright doctrines of fair use, fair dealing, or other
+equivalents.
+
+2.7. Conditions
+
+Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
+in Section 2.1.
+
+3. Responsibilities
+-------------------
+
+3.1. Distribution of Source Form
+
+All distribution of Covered Software in Source Code Form, including any
+Modifications that You create or to which You contribute, must be under
+the terms of this License. You must inform recipients that the Source
+Code Form of the Covered Software is governed by the terms of this
+License, and how they can obtain a copy of this License. You may not
+attempt to alter or restrict the recipients' rights in the Source Code
+Form.
+
+3.2. Distribution of Executable Form
+
+If You distribute Covered Software in Executable Form then:
+
+(a) such Covered Software must also be made available in Source Code
+    Form, as described in Section 3.1, and You must inform recipients of
+    the Executable Form how they can obtain a copy of such Source Code
+    Form by reasonable means in a timely manner, at a charge no more
+    than the cost of distribution to the recipient; and
+
+(b) You may distribute such Executable Form under the terms of this
+    License, or sublicense it under different terms, provided that the
+    license for the Executable Form does not attempt to limit or alter
+    the recipients' rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+You may create and distribute a Larger Work under terms of Your choice,
+provided that You also comply with the requirements of this License for
+the Covered Software. If the Larger Work is a combination of Covered
+Software with a work governed by one or more Secondary Licenses, and the
+Covered Software is not Incompatible With Secondary Licenses, this
+License permits You to additionally distribute such Covered Software
+under the terms of such Secondary License(s), so that the recipient of
+the Larger Work may, at their option, further distribute the Covered
+Software under the terms of either this License or such Secondary
+License(s).
+
+3.4. Notices
+
+You may not remove or alter the substance of any license notices
+(including copyright notices, patent notices, disclaimers of warranty,
+or limitations of liability) contained within the Source Code Form of
+the Covered Software, except that You may alter any license notices to
+the extent required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+You may choose to offer, and to charge a fee for, warranty, support,
+indemnity or liability obligations to one or more recipients of Covered
+Software. However, You may do so only on Your own behalf, and not on
+behalf of any Contributor. You must make it absolutely clear that any
+such warranty, support, indemnity, or liability obligation is offered by
+You alone, and You hereby agree to indemnify every Contributor for any
+liability incurred by such Contributor as a result of warranty, support,
+indemnity or liability terms You offer. You may include additional
+disclaimers of warranty and limitations of liability specific to any
+jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+---------------------------------------------------
+
+If it is impossible for You to comply with any of the terms of this
+License with respect to some or all of the Covered Software due to
+statute, judicial order, or regulation then You must: (a) comply with
+the terms of this License to the maximum extent possible; and (b)
+describe the limitations and the code they affect. Such description must
+be placed in a text file included with all distributions of the Covered
+Software under this License. Except to the extent prohibited by statute
+or regulation, such description must be sufficiently detailed for a
+recipient of ordinary skill to be able to understand it.
+
+5. Termination
+--------------
+
+5.1. The rights granted under this License will terminate automatically
+if You fail to comply with any of its terms. However, if You become
+compliant, then the rights granted under this License from a particular
+Contributor are reinstated (a) provisionally, unless and until such
+Contributor explicitly and finally terminates Your grants, and (b) on an
+ongoing basis, if such Contributor fails to notify You of the
+non-compliance by some reasonable means prior to 60 days after You have
+come back into compliance. Moreover, Your grants from a particular
+Contributor are reinstated on an ongoing basis if such Contributor
+notifies You of the non-compliance by some reasonable means, this is the
+first time You have received notice of non-compliance with this License
+from such Contributor, and You become compliant prior to 30 days after
+Your receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+infringement claim (excluding declaratory judgment actions,
+counter-claims, and cross-claims) alleging that a Contributor Version
+directly or indirectly infringes any patent, then the rights granted to
+You by any and all Contributors for the Covered Software under Section
+2.1 of this License shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all
+end user license agreements (excluding distributors and resellers) which
+have been validly granted by You or Your distributors under this License
+prior to termination shall survive termination.
+
+************************************************************************
+*                                                                      *
+*  6. Disclaimer of Warranty                                           *
+*  -------------------------                                           *
+*                                                                      *
+*  Covered Software is provided under this License on an "as is"       *
+*  basis, without warranty of any kind, either expressed, implied, or  *
+*  statutory, including, without limitation, warranties that the       *
+*  Covered Software is free of defects, merchantable, fit for a        *
+*  particular purpose or non-infringing. The entire risk as to the     *
+*  quality and performance of the Covered Software is with You.        *
+*  Should any Covered Software prove defective in any respect, You     *
+*  (not any Contributor) assume the cost of any necessary servicing,   *
+*  repair, or correction. This disclaimer of warranty constitutes an   *
+*  essential part of this License. No use of any Covered Software is   *
+*  authorized under this License except under this disclaimer.         *
+*                                                                      *
+************************************************************************
+
+************************************************************************
+*                                                                      *
+*  7. Limitation of Liability                                          *
+*  --------------------------                                          *
+*                                                                      *
+*  Under no circumstances and under no legal theory, whether tort      *
+*  (including negligence), contract, or otherwise, shall any           *
+*  Contributor, or anyone who distributes Covered Software as          *
+*  permitted above, be liable to You for any direct, indirect,         *
+*  special, incidental, or consequential damages of any character      *
+*  including, without limitation, damages for lost profits, loss of    *
+*  goodwill, work stoppage, computer failure or malfunction, or any    *
+*  and all other commercial damages or losses, even if such party      *
+*  shall have been informed of the possibility of such damages. This   *
+*  limitation of liability shall not apply to liability for death or   *
+*  personal injury resulting from such party's negligence to the       *
+*  extent applicable law prohibits such limitation. Some               *
+*  jurisdictions do not allow the exclusion or limitation of           *
+*  incidental or consequential damages, so this exclusion and          *
+*  limitation may not apply to You.                                    *
+*                                                                      *
+************************************************************************
+
+8. Litigation
+-------------
+
+Any litigation relating to this License may be brought only in the
+courts of a jurisdiction where the defendant maintains its principal
+place of business and such litigation shall be governed by laws of that
+jurisdiction, without reference to its conflict-of-law provisions.
+Nothing in this Section shall prevent a party's ability to bring
+cross-claims or counter-claims.
+
+9. Miscellaneous
+----------------
+
+This License represents the complete agreement concerning the subject
+matter hereof. If any provision of this License is held to be
+unenforceable, such provision shall be reformed only to the extent
+necessary to make it enforceable. Any law or regulation which provides
+that the language of a contract shall be construed against the drafter
+shall not be used to construe this License against a Contributor.
+
+10. Versions of the License
+---------------------------
+
+10.1. New Versions
+
+Mozilla Foundation is the license steward. Except as provided in Section
+10.3, no one other than the license steward has the right to modify or
+publish new versions of this License. Each version will be given a
+distinguishing version number.
+
+10.2. Effect of New Versions
+
+You may distribute the Covered Software under the terms of the version
+of the License under which You originally received the Covered Software,
+or under the terms of any subsequent version published by the license
+steward.
+
+10.3. Modified Versions
+
+If you create software not governed by this License, and you want to
+create a new license for such software, you may create and use a
+modified version of this License if you rename the license and remove
+any references to the name of the license steward (except to note that
+such modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary
+Licenses
+
+If You choose to distribute Source Code Form that is Incompatible With
+Secondary Licenses under the terms of this version of the License, the
+notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+-------------------------------------------
+
+  This Source Code Form is subject to the terms of the Mozilla Public
+  License, v. 2.0. If a copy of the MPL was not distributed with this
+  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular
+file, then You may include the notice in a location (such as a LICENSE
+file in a relevant directory) where a recipient would be likely to look
+for such a notice.
+
+You may add additional accurate notices of copyright ownership.
--- a/vendor/hare-unicode/cmd/linebreak/main.ha
+++ b/vendor/hare-unicode/cmd/linebreak/main.ha
@ -0,0 +1,40 @@
+use encoding::hex;
+use fmt;
+use os;
+use strings;
+use unicode;
+
+export fn main() void = {
+	const input = os::args[1];
+	const data = strings::toutf8(input);
+	hex::dump(os::stdout, data)!;
+
+	fmt::println(input)!;
+
+	let ix = 0u;
+	const lb = unicode::new_line_breaker(input);
+	for (const (pos, _, mand) => unicode::next_line_break(&lb)) {
+		for (ix < pos; ix += 1) {
+			fmt::print(' ')!;
+		};
+		ix += 1;
+
+		if (mand) {
+			fmt::println('|')!;
+		} else {
+			fmt::print('^')!;
+		};
+	};
+
+	fmt::println()!;
+	fmt::println()!;
+
+	fmt::println("Line break opportunities:")!;
+	const lb = unicode::new_line_breaker(input);
+	for (const (pos, bpos, mand) => unicode::next_line_break(&lb)) {
+		fmt::printfln("- {}:{} {} (before '{}'/0x{:x})", pos, bpos,
+			if (mand) "(mandatory)" else "",
+			strings::sub(input, pos, pos+1),
+			data[bpos])!;
+	};
+};
--- a/vendor/hare-unicode/cmd/ucdtest/main.ha
+++ b/vendor/hare-unicode/cmd/ucdtest/main.ha
@ -0,0 +1,25 @@
+use fmt;
+use os;
+use strings;
+use unicode;
+
+export fn main() void = {
+	const in = os::args[1];
+	const iter = strings::iter(in);
+
+	for (true) {
+		const rn = match (strings::next(&iter)) {
+		case let rn: rune =>
+			yield rn;
+		case => break;
+		};
+		const gc = unicode::rune_gc(rn);
+		const sc = unicode::rune_script(rn);
+		const lb = unicode::rune_line_break(rn);
+		fmt::printfln("'{}'/{:x}: {} : {} : {}",
+			rn, rn: u32,
+			unicode::gc_code(gc),
+			unicode::script_code(sc),
+			unicode::line_break_code(lb))!;
+	};
+};
--- a/vendor/hare-unicode/scripts/gen-ucd.py
+++ b/vendor/hare-unicode/scripts/gen-ucd.py
@ -0,0 +1,860 @@
+#!/usr/bin/python3
+# Based on CPython's unicodedata generation script,
+# Tools/unicode/makeunicodedata.py, forked and adapted for Hare
+#
+# PSF License
+#
+# (re)generate unicode property and type databases
+#
+# This script converts Unicode database files to Modules/unicodedata_db.h,
+# Modules/unicodename_db.h, and Objects/unicodetype_db.h
+#
+# history:
+# 2000-09-24 fl   created (based on bits and pieces from unidb)
+# 2000-09-25 fl   merged tim's splitbin fixes, separate decomposition table
+# 2000-09-25 fl   added character type table
+# 2000-09-26 fl   added LINEBREAK, DECIMAL, and DIGIT flags/fields (2.0)
+# 2000-11-03 fl   expand first/last ranges
+# 2001-01-19 fl   added character name tables (2.1)
+# 2001-01-21 fl   added decomp compression; dynamic phrasebook threshold
+# 2002-09-11 wd   use string methods
+# 2002-10-18 mvl  update to Unicode 3.2
+# 2002-10-22 mvl  generate NFC tables
+# 2002-11-24 mvl  expand all ranges, sort names version-independently
+# 2002-11-25 mvl  add UNIDATA_VERSION
+# 2004-05-29 perky add east asian width information
+# 2006-03-10 mvl  update to Unicode 4.1; add UCD 3.2 delta
+# 2008-06-11 gb   add PRINTABLE_MASK for Atsuo Ishimoto's ascii() patch
+# 2011-10-21 ezio add support for name aliases and named sequences
+# 2012-01    benjamin add full case mappings
+#
+# written by Fredrik Lundh (fredrik@pythonware.com)
+#
+
+import dataclasses
+import os
+import sys
+import zipfile
+
+from functools import partial
+from textwrap import dedent
+from typing import Iterator, List, Optional, Set, Tuple
+
+SCRIPT = sys.argv[0]
+VERSION = "3.3"
+
+# The Unicode Database
+# --------------------
+# When changing UCD version please update
+#   * Doc/library/stdtypes.rst, and
+#   * Doc/library/unicodedata.rst
+#   * Doc/reference/lexical_analysis.rst (two occurrences)
+UNIDATA_VERSION = "13.0.0"
+UNICODE_DATA = "UnicodeData%s.txt"
+COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
+EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
+UNIHAN = "Unihan%s.zip"
+DERIVED_CORE_PROPERTIES = "DerivedCoreProperties%s.txt"
+DERIVEDNORMALIZATION_PROPS = "DerivedNormalizationProps%s.txt"
+LINE_BREAK = "LineBreak%s.txt"
+NAME_ALIASES = "NameAliases%s.txt"
+NAMED_SEQUENCES = "NamedSequences%s.txt"
+SPECIAL_CASING = "SpecialCasing%s.txt"
+CASE_FOLDING = "CaseFolding%s.txt"
+SCRIPTS = "Scripts%s.txt"
+
+# Private Use Areas -- in planes 1, 15, 16
+PUA_1 = range(0xE000, 0xF900)
+PUA_15 = range(0xF0000, 0xFFFFE)
+PUA_16 = range(0x100000, 0x10FFFE)
+
+# we use this ranges of PUA_15 to store name aliases and named sequences
+NAME_ALIASES_START = 0xF0000
+NAMED_SEQUENCES_START = 0xF0200
+
+old_versions = []
+
+# Order must match ucd.ha
+CATEGORY_NAMES = [
+    "Cc", "Cf", "Cn", "Co", "Cs", "Ll", "Lm", "Lo", "Lt", "Lu", "Mc", "Me",
+    "Mn", "Nd", "Nl", "No", "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps", "Sc",
+    "Sk", "Sm", "So", "Zl", "Zp", "Zs",
+]
+
+BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO",
+    "PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS",
+    "ON", "LRI", "RLI", "FSI", "PDI" ]
+
+EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ]
+
+MANDATORY_LINE_BREAKS = [ "BK", "CR", "LF", "NL" ]
+
+LINE_BREAKS = [
+    "XX", "AI", "BK", "CJ", "CR", "LF", "NL", "SA", "SG", "SP", "OP", "CL",
+    "CP", "QU", "GL", "NS", "EX", "SY", "IS", "PR", "PO", "NU", "AL", "HL",
+    "ID", "IN", "HY", "BA", "BB", "B2", "ZW", "CM", "WJ", "H2", "H3", "JL",
+    "JV", "JT", "RI", "EB", "EM", "ZWJ", "CB",
+]
+
+SCRIPT_NAMES = [
+    "Common",
+    "Inherited",
+    "Unknown",
+    "Adlam",
+    "Caucasian_Albanian",
+    "Ahom",
+    "Arabic",
+    "Imperial_Aramaic",
+    "Armenian",
+    "Avestan",
+    "Balinese",
+    "Bamum",
+    "Bassa_Vah",
+    "Batak",
+    "Bengali",
+    "Bhaiksuki",
+    "Bopomofo",
+    "Brahmi",
+    "Braille",
+    "Buginese",
+    "Buhid",
+    "Chakma",
+    "Canadian_Aboriginal",
+    "Carian",
+    "Cham",
+    "Cherokee",
+    "Chorasmian",
+    "Coptic",
+    "Cypro_Minoan",
+    "Cypriot",
+    "Cyrillic",
+    "Devanagari",
+    "Dives_Akuru",
+    "Dogra",
+    "Deseret",
+    "Duployan",
+    "Egyptian_Hieroglyphs",
+    "Elbasan",
+    "Elymaic",
+    "Ethiopic",
+    "Georgian",
+    "Glagolitic",
+    "Gunjala_Gondi",
+    "Masaram_Gondi",
+    "Gothic",
+    "Grantha",
+    "Greek",
+    "Gujarati",
+    "Gurmukhi",
+    "Hangul",
+    "Han",
+    "Hanunoo",
+    "Hatran",
+    "Hebrew",
+    "Hiragana",
+    "Anatolian_Hieroglyphs",
+    "Pahawh_Hmong",
+    "Nyiakeng_Puachue_Hmong",
+    "Old_Hungarian",
+    "Old_Italic",
+    "Javanese",
+    "Kayah_Li",
+    "Katakana",
+    "Kawi",
+    "Kharoshthi",
+    "Khmer",
+    "Khojki",
+    "Khitan_Small_Script",
+    "Kannada",
+    "Kaithi",
+    "Tai_Tham",
+    "Lao",
+    "Latin",
+    "Lepcha",
+    "Limbu",
+    "Linear_A",
+    "Linear_B",
+    "Lisu",
+    "Lycian",
+    "Lydian",
+    "Mahajani",
+    "Makasar",
+    "Mandaic",
+    "Manichaean",
+    "Marchen",
+    "Medefaidrin",
+    "Mende_Kikakui",
+    "Meroitic_Cursive",
+    "Meroitic_Hieroglyphs",
+    "Malayalam",
+    "Modi",
+    "Mongolian",
+    "Mro",
+    "Meetei_Mayek",
+    "Multani",
+    "Myanmar",
+    "Nag_Mundari",
+    "Nandinagari",
+    "Old_North_Arabian",
+    "Nabataean",
+    "Newa",
+    "Nko",
+    "Nushu",
+    "Ogham",
+    "Ol_Chiki",
+    "Old_Turkic",
+    "Oriya",
+    "Osage",
+    "Osmanya",
+    "Old_Uyghur",
+    "Palmyrene",
+    "Pau_Cin_Hau",
+    "Old_Permic",
+    "Phags_Pa",
+    "Inscriptional_Pahlavi",
+    "Psalter_Pahlavi",
+    "Phoenician",
+    "Miao",
+    "Inscriptional_Parthian",
+    "Rejang",
+    "Hanifi_Rohingya",
+    "Runic",
+    "Samaritan",
+    "Old_South_Arabian",
+    "Saurashtra",
+    "SignWriting",
+    "Shavian",
+    "Sharada",
+    "Siddham",
+    "Khudawadi",
+    "Sinhala",
+    "Sogdian",
+    "Old_Sogdian",
+    "Sora_Sompeng",
+    "Soyombo",
+    "Sundanese",
+    "Syloti_Nagri",
+    "Syriac",
+    "Tagbanwa",
+    "Takri",
+    "Tai_Le",
+    "New_Tai_Lue",
+    "Tamil",
+    "Tangut",
+    "Tai_Viet",
+    "Telugu",
+    "Tifinagh",
+    "Tagalog",
+    "Thaana",
+    "Thai",
+    "Tibetan",
+    "Tirhuta",
+    "Tangsa",
+    "Toto",
+    "Ugaritic",
+    "Vai",
+    "Vithkuqi",
+    "Warang_Citi",
+    "Wancho",
+    "Old_Persian",
+    "Cuneiform",
+    "Yezidi",
+    "Yi",
+    "Zanabazar_Square",
+]
+
+# note: should match definitions in Objects/unicodectype.c
+ALPHA_MASK = 0x01
+DECIMAL_MASK = 0x02
+DIGIT_MASK = 0x04
+LOWER_MASK = 0x08
+LINEBREAK_MASK = 0x10
+SPACE_MASK = 0x20
+TITLE_MASK = 0x40
+UPPER_MASK = 0x80
+XID_START_MASK = 0x100
+XID_CONTINUE_MASK = 0x200
+PRINTABLE_MASK = 0x400
+NUMERIC_MASK = 0x800
+CASE_IGNORABLE_MASK = 0x1000
+CASED_MASK = 0x2000
+EXTENDED_CASE_MASK = 0x4000
+
+# these ranges need to match unicodedata.c:is_unified_ideograph
+cjk_ranges = [
+    ('3400', '4DBF'),
+    ('4E00', '9FFC'),
+    ('20000', '2A6DD'),
+    ('2A700', '2B734'),
+    ('2B740', '2B81D'),
+    ('2B820', '2CEA1'),
+    ('2CEB0', '2EBE0'),
+    ('30000', '3134A'),
+]
+
+def maketables(trace=0):
+
+    print("--- Reading", UNICODE_DATA % "", "...")
+
+    unicode = UnicodeData(UNIDATA_VERSION)
+
+    print(len(list(filter(None, unicode.table))), "characters")
+
+    makeunicodedata(unicode, trace)
+
+
+# --------------------------------------------------------------------
+# unicode character properties
+
+def makeunicodedata(unicode, trace):
+
+    dummy = (0, 0, 0, 0, 0, 0, 0)
+    table = [dummy]
+    cache = {0: dummy}
+    index = [0] * len(unicode.chars)
+
+    FILE = "unicode/ucd_gen.ha"
+
+    print("--- Preparing", FILE, "...")
+
+    for char in unicode.chars:
+        record = unicode.table[char]
+        if record:
+            # extract database properties
+            category = CATEGORY_NAMES.index(record.general_category)
+            combining = int(record.canonical_combining_class)
+            bidirectional = BIDIRECTIONAL_NAMES.index(record.bidi_class)
+            mirrored = record.bidi_mirrored == "Y"
+            eastasianwidth = EASTASIANWIDTH_NAMES.index(record.east_asian_width)
+            script = SCRIPT_NAMES.index(record.script or "Unknown")
+            line_break = LINE_BREAKS.index(record.line_break)
+            item = (
+                category, combining, bidirectional,
+                mirrored, eastasianwidth, script,
+                line_break,
+                )
+            # add entry to index and item tables
+            i = cache.get(item)
+            if i is None:
+                cache[item] = i = len(table)
+                table.append(item)
+            index[char] = i
+
+    print(len(table), "unique properties")
+
+    print("--- Writing", FILE, "...")
+
+    with open(FILE, "w") as fp:
+        fprint = partial(print, file=fp)
+
+        fprint("// Generated by scripts/gen-ucd.py")
+        fprint()
+        fprint('// Unicode database version supported by this module')
+        fprint('export def UNIDATA_VERSION: str = "%s";' % UNIDATA_VERSION)
+        fprint('')
+        fprint("// List of unique database records")
+        fprint("const ucd_records: [_]ucd_encodedrec = [")
+        for item in table:
+            fprint("    (%d, %d, %d, %d, %d, %d, %d)," % item)
+        fprint("];")
+        fprint()
+
+        # split record index table
+        index1, index2, shift = splitbins(index, trace)
+
+        fprint("// index tables for the database records")
+        fprint("def UCD_RECORD_SHIFT: size = %d;" % shift)
+        Array("index1", index1).dump(fp, trace)
+        Array("index2", index2).dump(fp, trace)
+
+
+DATA_DIR = os.path.join('.data')
+
+def open_data(template, version):
+    local = os.path.join(DATA_DIR, template % ('-'+version,))
+    if not os.path.exists(local):
+        import urllib.request
+        if version == '3.2.0':
+            # irregular url structure
+            url = ('https://www.unicode.org/Public/3.2-Update/'+template) % ('-'+version,)
+        else:
+            url = ('https://www.unicode.org/Public/%s/ucd/'+template) % (version, '')
+        os.makedirs(DATA_DIR, exist_ok=True)
+        urllib.request.urlretrieve(url, filename=local)
+    if local.endswith('.txt'):
+        return open(local, encoding='utf-8')
+    else:
+        # Unihan.zip
+        return open(local, 'rb')
+
+
+def expand_range(char_range: str) -> Iterator[int]:
+    '''
+    Parses ranges of code points, as described in UAX #44:
+      https://www.unicode.org/reports/tr44/#Code_Point_Ranges
+    '''
+    if '..' in char_range:
+        first, last = [int(c, 16) for c in char_range.split('..')]
+    else:
+        first = last = int(char_range, 16)
+    for char in range(first, last+1):
+        yield char
+
+
+class UcdFile:
+    '''
+    A file in the standard format of the UCD.
+
+    See: https://www.unicode.org/reports/tr44/#Format_Conventions
+
+    Note that, as described there, the Unihan data files have their
+    own separate format.
+    '''
+
+    def __init__(self, template: str, version: str) -> None:
+        self.template = template
+        self.version = version
+
+    def records(self) -> Iterator[List[str]]:
+        with open_data(self.template, self.version) as file:
+            for line in file:
+                line = line.split('#', 1)[0].strip()
+                if not line:
+                    continue
+                yield [field.strip() for field in line.split(';')]
+
+    def __iter__(self) -> Iterator[List[str]]:
+        return self.records()
+
+    def expanded(self) -> Iterator[Tuple[int, List[str]]]:
+        for record in self.records():
+            char_range, rest = record[0], record[1:]
+            for char in expand_range(char_range):
+                yield char, rest
+
+
+@dataclasses.dataclass
+class UcdRecord:
+    # 15 fields from UnicodeData.txt .  See:
+    #   https://www.unicode.org/reports/tr44/#UnicodeData.txt
+    codepoint: str
+    name: str
+    general_category: str
+    canonical_combining_class: str
+    bidi_class: str
+    decomposition_type: str
+    decomposition_mapping: str
+    numeric_type: str
+    numeric_value: str
+    bidi_mirrored: str
+    unicode_1_name: str  # obsolete
+    iso_comment: str  # obsolete
+    simple_uppercase_mapping: str
+    simple_lowercase_mapping: str
+    simple_titlecase_mapping: str
+
+    # https://www.unicode.org/reports/tr44/#EastAsianWidth.txt
+    east_asian_width: Optional[str]
+
+    # Binary properties, as a set of those that are true.
+    # Taken from multiple files:
+    #   https://www.unicode.org/reports/tr44/#DerivedCoreProperties.txt
+    #   https://www.unicode.org/reports/tr44/#LineBreak.txt
+    binary_properties: Set[str]
+
+    # The Quick_Check properties related to normalization:
+    #   https://www.unicode.org/reports/tr44/#Decompositions_and_Normalization
+    # We store them as a bitmask.
+    quick_check: int
+
+    # From Script.txt
+    script: str
+
+    # From LineBreak.txt
+    line_break: str
+
+
+def from_row(row: List[str]) -> UcdRecord:
+    return UcdRecord(*row, None, set(), 0, "Unknown", "XX")
+
+
+# --------------------------------------------------------------------
+# the following support code is taken from the unidb utilities
+# Copyright (c) 1999-2000 by Secret Labs AB
+
+# load a unicode-data file from disk
+
+class UnicodeData:
+    # table: List[Optional[UcdRecord]]  # index is codepoint; None means unassigned
+
+    def __init__(self, version, cjk_check=True):
+        self.changed = []
+        table = [None] * 0x110000
+        for s in UcdFile(UNICODE_DATA, version):
+            char = int(s[0], 16)
+            table[char] = from_row(s)
+
+        cjk_ranges_found = []
+
+        # expand first-last ranges
+        field = None
+        for i in range(0, 0x110000):
+            # The file UnicodeData.txt has its own distinct way of
+            # expressing ranges.  See:
+            #   https://www.unicode.org/reports/tr44/#Code_Point_Ranges
+            s = table[i]
+            if s:
+                if s.name[-6:] == "First>":
+                    s.name = ""
+                    field = dataclasses.astuple(s)[:15]
+                elif s.name[-5:] == "Last>":
+                    if s.name.startswith("<CJK Ideograph"):
+                        cjk_ranges_found.append((field[0],
+                                                 s.codepoint))
+                    s.name = ""
+                    field = None
+            elif field:
+                table[i] = from_row(('%X' % i,) + field[1:])
+        if cjk_check and cjk_ranges != cjk_ranges_found:
+            raise ValueError("CJK ranges deviate: have %r" % cjk_ranges_found)
+
+        # public attributes
+        self.filename = UNICODE_DATA % ''
+        self.table = table
+        self.chars = list(range(0x110000)) # unicode 3.2
+
+        # check for name aliases and named sequences, see #12753
+        # aliases and named sequences are not in 3.2.0
+        if version != '3.2.0':
+            self.aliases = []
+            # store aliases in the Private Use Area 15, in range U+F0000..U+F00FF,
+            # in order to take advantage of the compression and lookup
+            # algorithms used for the other characters
+            pua_index = NAME_ALIASES_START
+            for char, name, abbrev in UcdFile(NAME_ALIASES, version):
+                char = int(char, 16)
+                self.aliases.append((name, char))
+                # also store the name in the PUA 1
+                self.table[pua_index].name = name
+                pua_index += 1
+            assert pua_index - NAME_ALIASES_START == len(self.aliases)
+
+            self.named_sequences = []
+            # store named sequences in the PUA 1, in range U+F0100..,
+            # in order to take advantage of the compression and lookup
+            # algorithms used for the other characters.
+
+            assert pua_index < NAMED_SEQUENCES_START
+            pua_index = NAMED_SEQUENCES_START
+            for name, chars in UcdFile(NAMED_SEQUENCES, version):
+                chars = tuple(int(char, 16) for char in chars.split())
+                # check that the structure defined in makeunicodename is OK
+                assert 2 <= len(chars) <= 4, "change the Py_UCS2 array size"
+                assert all(c <= 0xFFFF for c in chars), ("use Py_UCS4 in "
+                    "the NamedSequence struct and in unicodedata_lookup")
+                self.named_sequences.append((name, chars))
+                # also store these in the PUA 1
+                self.table[pua_index].name = name
+                pua_index += 1
+            assert pua_index - NAMED_SEQUENCES_START == len(self.named_sequences)
+
+        self.exclusions = {}
+        for char, in UcdFile(COMPOSITION_EXCLUSIONS, version):
+            char = int(char, 16)
+            self.exclusions[char] = 1
+
+        widths = [None] * 0x110000
+        for char, (width,) in UcdFile(EASTASIAN_WIDTH, version).expanded():
+            widths[char] = width
+
+        for i in range(0, 0x110000):
+            if table[i] is not None:
+                table[i].east_asian_width = widths[i]
+
+        scripts = [None] * 0x110000
+        for char, (script,) in UcdFile(SCRIPTS, version).expanded():
+            scripts[char] = script
+
+        for i in range(0, 0x110000):
+            if table[i] is not None:
+                table[i].script = scripts[i]
+
+        for char, (p,) in UcdFile(DERIVED_CORE_PROPERTIES, version).expanded():
+            if table[char]:
+                # Some properties (e.g. Default_Ignorable_Code_Point)
+                # apply to unassigned code points; ignore them
+                table[char].binary_properties.add(p)
+
+        for char_range, value in UcdFile(LINE_BREAK, version):
+            for char in expand_range(char_range):
+                if not table[char]:
+                    continue
+                if value in MANDATORY_LINE_BREAKS:
+                    table[char].binary_properties.add('Line_Break')
+                table[char].line_break = value
+
+        # We only want the quickcheck properties
+        # Format: NF?_QC; Y(es)/N(o)/M(aybe)
+        # Yes is the default, hence only N and M occur
+        # In 3.2.0, the format was different (NF?_NO)
+        # The parsing will incorrectly determine these as
+        # "yes", however, unicodedata.c will not perform quickchecks
+        # for older versions, and no delta records will be created.
+        quickchecks = [0] * 0x110000
+        qc_order = 'NFD_QC NFKD_QC NFC_QC NFKC_QC'.split()
+        for s in UcdFile(DERIVEDNORMALIZATION_PROPS, version):
+            if len(s) < 2 or s[1] not in qc_order:
+                continue
+            quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No
+            quickcheck_shift = qc_order.index(s[1])*2
+            quickcheck <<= quickcheck_shift
+            for char in expand_range(s[0]):
+                assert not (quickchecks[char]>>quickcheck_shift)&3
+                quickchecks[char] |= quickcheck
+        for i in range(0, 0x110000):
+            if table[i] is not None:
+                table[i].quick_check = quickchecks[i]
+
+        with open_data(UNIHAN, version) as file:
+            zip = zipfile.ZipFile(file)
+            if version == '3.2.0':
+                data = zip.open('Unihan-3.2.0.txt').read()
+            else:
+                data = zip.open('Unihan_NumericValues.txt').read()
+        for line in data.decode("utf-8").splitlines():
+            if not line.startswith('U+'):
+                continue
+            code, tag, value = line.split(None, 3)[:3]
+            if tag not in ('kAccountingNumeric', 'kPrimaryNumeric',
+                           'kOtherNumeric'):
+                continue
+            value = value.strip().replace(',', '')
+            i = int(code[2:], 16)
+            # Patch the numeric field
+            if table[i] is not None:
+                table[i].numeric_value = value
+
+        sc = self.special_casing = {}
+        for data in UcdFile(SPECIAL_CASING, version):
+            if data[4]:
+                # We ignore all conditionals (since they depend on
+                # languages) except for one, which is hardcoded. See
+                # handle_capital_sigma in unicodeobject.c.
+                continue
+            c = int(data[0], 16)
+            lower = [int(char, 16) for char in data[1].split()]
+            title = [int(char, 16) for char in data[2].split()]
+            upper = [int(char, 16) for char in data[3].split()]
+            sc[c] = (lower, title, upper)
+
+        cf = self.case_folding = {}
+        if version != '3.2.0':
+            for data in UcdFile(CASE_FOLDING, version):
+                if data[1] in "CF":
+                    c = int(data[0], 16)
+                    cf[c] = [int(char, 16) for char in data[2].split()]
+
+    def uselatin1(self):
+        # restrict character range to ISO Latin 1
+        self.chars = list(range(256))
+
+
+# hash table tools
+
+# this is a straight-forward reimplementation of Python's built-in
+# dictionary type, using a static data structure, and a custom string
+# hash algorithm.
+
+def myhash(s, magic):
+    h = 0
+    for c in map(ord, s.upper()):
+        h = (h * magic) + c
+        ix = h & 0xff000000
+        if ix:
+            h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff
+    return h
+
+
+SIZES = [
+    (4,3), (8,3), (16,3), (32,5), (64,3), (128,3), (256,29), (512,17),
+    (1024,9), (2048,5), (4096,83), (8192,27), (16384,43), (32768,3),
+    (65536,45), (131072,9), (262144,39), (524288,39), (1048576,9),
+    (2097152,5), (4194304,3), (8388608,33), (16777216,27)
+]
+
+
+class Hash:
+    def __init__(self, name, data, magic):
+        # turn a (key, value) list into a static hash table structure
+
+        # determine table size
+        for size, poly in SIZES:
+            if size > len(data):
+                poly = size + poly
+                break
+        else:
+            raise AssertionError("ran out of polynomials")
+
+        print(size, "slots in hash table")
+
+        table = [None] * size
+
+        mask = size-1
+
+        n = 0
+
+        hash = myhash
+
+        # initialize hash table
+        for key, value in data:
+            h = hash(key, magic)
+            i = (~h) & mask
+            v = table[i]
+            if v is None:
+                table[i] = value
+                continue
+            incr = (h ^ (h >> 3)) & mask
+            if not incr:
+                incr = mask
+            while 1:
+                n = n + 1
+                i = (i + incr) & mask
+                v = table[i]
+                if v is None:
+                    table[i] = value
+                    break
+                incr = incr << 1
+                if incr > mask:
+                    incr = incr ^ poly
+
+        print(n, "collisions")
+        self.collisions = n
+
+        for i in range(len(table)):
+            if table[i] is None:
+                table[i] = 0
+
+        self.data = Array(name + "_hash", table)
+        self.magic = magic
+        self.name = name
+        self.size = size
+        self.poly = poly
+
+    def dump(self, file, trace):
+        # write data to file, as a C array
+        self.data.dump(file, trace)
+        file.write("#define %s_magic %d\n" % (self.name, self.magic))
+        file.write("#define %s_size %d\n" % (self.name, self.size))
+        file.write("#define %s_poly %d\n" % (self.name, self.poly))
+
+
+# stuff to deal with arrays of unsigned integers
+
+class Array:
+
+    def __init__(self, name, data):
+        self.name = name
+        self.data = data
+
+    def dump(self, file, trace=0):
+        # write data to file, as a C array
+        size = getsize(self.data)
+        if trace:
+            print(self.name+":", size*len(self.data), "bytes", file=sys.stderr)
+        file.write("const " + self.name + ": [_]")
+        if size == 1:
+            file.write("u8")
+        elif size == 2:
+            file.write("u16")
+        else:
+            file.write("u32")
+        file.write(" = [\n")
+        if self.data:
+            s = "    "
+            for item in self.data:
+                i = str(item) + ", "
+                if len(s) + len(i) > 78:
+                    file.write(s.rstrip() + "\n")
+                    s = "    " + i
+                else:
+                    s = s + i
+            if s.strip():
+                file.write(s.rstrip() + "\n")
+        file.write("];\n\n")
+
+
+def getsize(data):
+    # return smallest possible integer size for the given array
+    maxdata = max(data)
+    if maxdata < 256:
+        return 1
+    elif maxdata < 65536:
+        return 2
+    else:
+        return 4
+
+
+def splitbins(t, trace=0):
+    """t, trace=0 -> (t1, t2, shift).  Split a table to save space.
+
+    t is a sequence of ints.  This function can be useful to save space if
+    many of the ints are the same.  t1 and t2 are lists of ints, and shift
+    is an int, chosen to minimize the combined size of t1 and t2 (in C
+    code), and where for each i in range(len(t)),
+        t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
+    where mask is a bitmask isolating the last "shift" bits.
+
+    If optional arg trace is non-zero (default zero), progress info
+    is printed to sys.stderr.  The higher the value, the more info
+    you'll get.
+    """
+
+    if trace:
+        def dump(t1, t2, shift, bytes):
+            print("%d+%d bins at shift %d; %d bytes" % (
+                len(t1), len(t2), shift, bytes), file=sys.stderr)
+        print("Size of original table:", len(t)*getsize(t), "bytes",
+              file=sys.stderr)
+    n = len(t)-1    # last valid index
+    maxshift = 0    # the most we can shift n and still have something left
+    if n > 0:
+        while n >> 1:
+            n >>= 1
+            maxshift += 1
+    del n
+    bytes = sys.maxsize  # smallest total size so far
+    t = tuple(t)    # so slices can be dict keys
+    for shift in range(maxshift + 1):
+        t1 = []
+        t2 = []
+        size = 2**shift
+        bincache = {}
+        for i in range(0, len(t), size):
+            bin = t[i:i+size]
+            index = bincache.get(bin)
+            if index is None:
+                index = len(t2)
+                bincache[bin] = index
+                t2.extend(bin)
+            t1.append(index >> shift)
+        # determine memory size
+        b = len(t1)*getsize(t1) + len(t2)*getsize(t2)
+        if trace > 1:
+            dump(t1, t2, shift, b)
+        if b < bytes:
+            best = t1, t2, shift
+            bytes = b
+    t1, t2, shift = best
+    if trace:
+        print("Best:", end=' ', file=sys.stderr)
+        dump(t1, t2, shift, bytes)
+    if __debug__:
+        # exhaustively verify that the decomposition is correct
+        mask = ~((~0) << shift) # i.e., low-bit mask of shift bits
+        for i in range(len(t)):
+            assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
+    return best
+
+
+if __name__ == "__main__":
+    maketables(1)
--- a/vendor/hare-unicode/unicode/linebreak.ha
+++ b/vendor/hare-unicode/unicode/linebreak.ha
@ -0,0 +1,196 @@
+use encoding::utf8;
+use strings;
+
+export type line_breaker = struct {
+	input: str,
+	iter: strings::iterator,
+	// Current position
+	pos: size,
+	// Current position, bytes
+	bpos: size,
+	// Current line break class
+	cur: line_break,
+	// Next line break class
+	next: line_break,
+	// State for specific rules
+	lb8a: bool,
+	lb21a: bool,
+	lb30a: uint,
+};
+
+// Creates a new line breaking algorithm state machine. See [[next_line_break]]
+// to enumerate the line break opportunities in the input string.
+export fn new_line_breaker(input: str) line_breaker = {
+	return line_breaker {
+		input = input,
+		...
+	};
+};
+
+// Returns the next line break opportunity as a tuple of the rune-wise index,
+// byte-wise index, and a boolean indicating whether or not the break is
+// mandatory at this location. The line break opportunity directly precedes the
+// index returned from this function.
+//
+// 	Hello world!
+// 	      ^ Line break opportunity at index 6
+export fn next_line_break(lb: *line_breaker) ((size, size, bool) | done) = {
+	if (lb.pos == 0) {
+		if (len(lb.input) == 0) {
+			return done; // special case
+		};
+
+		lb.iter = strings::iter(lb.input);
+
+		const (class, rn) = next_lb1_class(lb) as (line_break, rune);
+		class = resolve_lb2_class(class);
+		lb.cur = class;
+		lb.next = class;
+		lb.lb8a = class == line_break::ZWJ;
+	};
+
+	for (const (next, rn) => next_lb1_class(lb)) {
+		const prev = lb.next;
+		lb.next = next;
+		const rnsz = utf8::runesz(rn);
+		defer {
+			lb.pos += 1;
+			lb.bpos += rnsz;
+		};
+
+		const mandatory = lb.cur == line_break::BK
+			|| (lb.cur == line_break::CR
+				&& lb.next != line_break::LF);
+		if (mandatory) {
+			lb.cur = resolve_lb2_class(next);
+			return (lb.pos + 1, lb.bpos + rnsz, true);
+		};
+
+		lb.lb8a = next == line_break::ZWJ;
+
+		let can_break = lb_simple_case(lb);
+		match (can_break) {
+		case bool => void;
+		case void =>
+			can_break = lb_complex_case(lb, prev);
+		};
+
+		assert(can_break is bool);
+		const can_break = can_break as bool;
+		if (can_break) {
+			return (lb.pos + 1, lb.bpos + rnsz, false);
+		};
+	};
+
+	return done;
+};
+
+// Applies LB1 suggested rules for resolving context-dependent classes.
+fn next_lb1_class(lb: *line_breaker) ((line_break, rune) | done) = {
+	const rn = match (strings::next(&lb.iter)) {
+	case let rn: rune =>
+		yield rn;
+	case done =>
+		return done;
+	};
+
+	const class = rune_line_break(rn);
+	switch (class) {
+	case line_break::AI, line_break::SG, line_break::XX =>
+		return (line_break::AL, rn);
+	case line_break::SA =>
+		switch (rune_gc(rn)) {
+		case gc::Mn, gc::Mc =>
+			return (line_break::CM, rn);
+		case =>
+			return (line_break::AL, rn);
+		};
+	case line_break::CJ =>
+		return (line_break::NS, rn);
+	case =>
+		return (class, rn);
+	};
+};
+
+// Applies LB2 suggested rules for resolving the start-of-text line-break class.
+fn resolve_lb2_class(lb: line_break) line_break = {
+	switch (lb) {
+	case line_break::LF, line_break::NL =>
+		return line_break::BK;
+	case line_break::SP =>
+		return line_break::WJ;
+	case =>
+		return lb;
+	};
+};
+
+// If this is a simple case, return whether or not this is a break opportunity
+// as a boolean. Returns void for special cases.
+fn lb_simple_case(lb: *line_breaker) (bool | void) = {
+	switch (lb.next) {
+	case line_break::SP =>
+		return false;
+	case line_break::BK, line_break::LF, line_break::NL =>
+		lb.cur = line_break::BK;
+		return false;
+	case line_break::CR =>
+		lb.cur = line_break::CR;
+		return false;
+	case =>
+		return;
+	};
+};
+
+// Handles more complex rules, including pair table lookups via
+// linebreak_table.ha.
+fn lb_complex_case(lb: *line_breaker, prev: line_break) bool = {
+	let can_break = false;
+
+	const ucur = lb.cur: uint - line_break::OP: uint;
+	const unext = lb.next: uint - line_break::OP: uint;
+	if (ucur < len(lb_pairs) && unext < len(lb_pairs[0])) {
+		switch (lb_pairs[ucur][unext]) {
+		case bo::DI => // Direct break
+			can_break = true;
+		case bo::IN => // Indirect break opportunity
+			can_break = prev == line_break::SP;
+		case bo::CI => // Indirect opportunity for combining marks
+			can_break = prev == line_break::SP;
+			if (!can_break) {
+				return false;
+			};
+		case bo::CP => // Prohibited for combining marks
+			if (prev != line_break::SP) {
+				return false;
+			};
+		case bo::PR => void;
+		};
+	};
+
+	// Rule LB8a
+	if (lb.lb8a) {
+		can_break = false;
+	};
+
+	// Rule LB21a
+	if (lb.lb21a && (lb.cur == line_break::HY || lb.cur == line_break::BA)) {
+		can_break = false;
+		lb.lb21a = false;
+	} else {
+		lb.lb21a = lb.cur == line_break::HL;
+	};
+
+	// Rule LB30a
+	if (lb.cur == line_break::RI) {
+		lb.lb30a += 1;
+		if (lb.lb30a == 2 && lb.next == line_break::RI) {
+			can_break = true;
+			lb.lb30a = 0;
+		};
+	} else {
+		lb.lb30a = 0;
+	};
+
+	lb.cur = lb.next;
+	return can_break;
+};
--- a/vendor/hare-unicode/unicode/linebreak_table.ha
+++ b/vendor/hare-unicode/unicode/linebreak_table.ha
@ -0,0 +1,63 @@
+// Break opportunity
+type bo = enum {
+	// Direct opportunity
+	DI,
+	// Indirect opportunity
+	IN,
+	// Indirect opportunity for combining marks
+	CI,
+	// Prohibited break for combining marks
+	CP,
+	// Prohibited break
+	PR,
+};
+
+// Based on JavaScript implementation here:
+//
+// https://github.com/foliojs/linebreak/blob/master/src/pairs.js
+//
+// This is itself based on the example pair table from Unicode, which was last
+// published in revision 37 of the line break algorithm, and has since been
+// touched up by the JavaScript maintainers to incorporate later changes to the
+// algorithm.
+//
+// - ZWJ special processing for LB8a of Revision 41
+// - CB manually added as per Rule LB20
+// - CL, CP, NS, SY, IS, PR, PO, HY, BA, B2 and RI manually adjusted as per LB22 of Revision 45
+const lb_pairs = [
+	//OP   , CL    , CP    , QU    , GL    , NS    , EX    , SY    , IS    , PR    , PO    , NU    , AL    , HL    , ID    , IN    , HY    , BA    , BB    , B2    , ZW    , CM    , WJ    , H2    , H3    , JL    , JV    , JT    , RI    , EB    , EM    , ZWJ   , CB
+	[bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::CP, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR, bo::PR], // OP
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // CL
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // CP
+	[bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::PR, bo::CI, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN], // QU
+	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::PR, bo::CI, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN], // GL
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // NS
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // EX
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::IN, bo::DI, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // SY
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // IS
+	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI], // PR
+	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // PO
+	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // NU
+	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // AL
+	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // HL
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // ID
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // IN
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::DI, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // HY
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::DI, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // BA
+	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::PR, bo::CI, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI], // BB
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::PR, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // B2
+	[bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI], // ZW
+	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // CM
+	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::PR, bo::CI, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN], // WJ
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // H2
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // H3
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // JL
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // JV
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // JT
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI, bo::DI, bo::IN, bo::DI], // RI
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::DI], // EB
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::DI, bo::IN, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // EM
+	[bo::IN, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::PR, bo::PR, bo::PR, bo::IN, bo::IN, bo::IN, bo::IN, bo::IN, bo::DI, bo::IN, bo::IN, bo::IN, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI], // ZWJ
+	[bo::DI, bo::PR, bo::PR, bo::IN, bo::IN, bo::DI, bo::PR, bo::PR, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::PR, bo::CI, bo::PR, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::DI, bo::IN, bo::DI]  // CB
+];
+
--- a/vendor/hare-unicode/unicode/ucd.ha
+++ b/vendor/hare-unicode/unicode/ucd.ha
@ -0,0 +1,654 @@
+type ucd_encodedrec = (u8, u8, u8, u8, u8, u16, u8);
+
+type ucd_record = struct {
+	category: u8,
+	combining: u8,
+	bidirectional: u8,
+	mirrored: u8,
+	east_asian_width: u8,
+	script: u16,
+	line_break: u8,
+};
+
+fn get_ucdrecord(rn: rune) *ucd_record = {
+	const code = rn: u32;
+	let index = 0u16;
+	if (code < 0x110000) {
+		index = index1[(code>>UCD_RECORD_SHIFT)];
+		index = index2[(index<<UCD_RECORD_SHIFT)+(code&((1<<UCD_RECORD_SHIFT)-1))];
+	};
+	return &ucd_records[index]: *ucd_record;
+};
+
+// Unicode character General_Category attribute
+export type gc = enum u8 {
+	Cc,	// Control
+	Cf,	// Format
+	Cn,	// Unassigned
+	Co,	// Private use
+	Cs,	// Surrogate
+	Ll,	// Lowercase letter
+	Lm,	// Modifier letter
+	Lo,	// Other letter
+	Lt,	// Titlecase letter
+	Lu,	// Uppercase letter
+	Mc,	// Spacing mark
+	Me,	// Enclosing mark
+	Mn,	// Non-spacing mark
+	Nd,	// Decimal number
+	Nl,	// Letter number
+	No,	// Other number
+	Pc,	// Connect punctuation
+	Pd,	// Dash punctuation
+	Pe,	// Close punctuation
+	Pf,	// Final punctuation
+	Pi,	// Initial punctuation
+	Po,	// Other punctuation
+	Ps,	// Open punctuation
+	Sc,	// Currency symbol
+	Sk,	// Modifier symbol
+	Sm,	// Math symbol
+	So,	// Other symbol
+	Zl,	// Line separator
+	Zp,	// Paragraph separator
+	Zs,	// Space separator
+};
+
+// Returns the [[general_category]] corresponding to this rune.
+export fn rune_gc(rn: rune) gc = {
+	return get_ucdrecord(rn).category: gc;
+};
+
+// Returns the name associated with a [[gc]] value.
+export fn gc_name(v: gc) const str = {
+	switch (v) {
+	case gc::Cc => return "Control";
+	case gc::Cf => return "Format";
+	case gc::Cn => return "Unassigned";
+	case gc::Co => return "Private use";
+	case gc::Cs => return "Surrogate";
+	case gc::Ll => return "Lowercase letter";
+	case gc::Lm => return "Modifier letter";
+	case gc::Lo => return "Other letter";
+	case gc::Lt => return "Titlecase letter";
+	case gc::Lu => return "Uppercase letter";
+	case gc::Mc => return "Spacing mark";
+	case gc::Me => return "Enclosing mark";
+	case gc::Mn => return "Non-spacing mark";
+	case gc::Nd => return "Decimal number";
+	case gc::Nl => return "Letter number";
+	case gc::No => return "Other number";
+	case gc::Pc => return "Connect punctuation";
+	case gc::Pd => return "Dash punctuation";
+	case gc::Pe => return "Close punctuation";
+	case gc::Pf => return "Final punctuation";
+	case gc::Pi => return "Initial punctuation";
+	case gc::Po => return "Other punctuation";
+	case gc::Ps => return "Open punctuation";
+	case gc::Sc => return "Currency symbol";
+	case gc::Sk => return "Modifier symbol";
+	case gc::Sm => return "Math symbol";
+	case gc::So => return "Other symbol";
+	case gc::Zl => return "Line separator";
+	case gc::Zp => return "Paragraph separator";
+	case gc::Zs => return "Space separator";
+	};
+};
+
+// Returns the two-character code associated with a [[gc]] value.
+export fn gc_code(v: gc) const str = {
+	switch (v) {
+	case gc::Cc => return "Cc";
+	case gc::Cf => return "Cf";
+	case gc::Cn => return "Cn";
+	case gc::Co => return "Co";
+	case gc::Cs => return "Cs";
+	case gc::Ll => return "Ll";
+	case gc::Lm => return "Lm";
+	case gc::Lo => return "Lo";
+	case gc::Lt => return "Lt";
+	case gc::Lu => return "Lu";
+	case gc::Mc => return "Mc";
+	case gc::Me => return "Me";
+	case gc::Mn => return "Mn";
+	case gc::Nd => return "Nd";
+	case gc::Nl => return "Nl";
+	case gc::No => return "No";
+	case gc::Pc => return "Pc";
+	case gc::Pd => return "Pd";
+	case gc::Pe => return "Pe";
+	case gc::Pf => return "Pf";
+	case gc::Pi => return "Pi";
+	case gc::Po => return "Po";
+	case gc::Ps => return "Ps";
+	case gc::Sc => return "Sc";
+	case gc::Sk => return "Sk";
+	case gc::Sm => return "Sm";
+	case gc::So => return "So";
+	case gc::Zl => return "Zl";
+	case gc::Zp => return "Zp";
+	case gc::Zs => return "Zs";
+	};
+};
+
+// Bidirectional classification.
+export type bidi = enum u8 {
+	UNKNOWN,
+	L,
+	LRE,
+	LRO,
+	R,
+	AL,
+	RLE,
+	RLO,
+	PDF,
+	EN,
+	ES,
+	ET,
+	AN,
+	CS,
+	NSM,
+	BN,
+	B,
+	S,
+	WS,
+	ON,
+	LRI,
+	RLI,
+	FSI,
+	PDI,
+};
+
+// Returns the [[bidi]] classification corresponding to this rune.
+export fn rune_bidi(rn: rune) bidi = {
+	return get_ucdrecord(rn).bidirectional: bidi;
+};
+
+// Unicode character Script attribute.
+export type script = enum u16 {
+	COMMON,			// Zyyy
+	INHERITED,		// Zinh
+	UNKNOWN,		// Zzzz
+	ADLAM,			// Adlm
+	CAUCASIAN_ALBANIAN,	// Aghb
+	AHOM,			// Ahom
+	ARABIC,			// Arab
+	IMPERIAL_ARAMAIC,	// Armi
+	ARMENIAN,		// Armn
+	AVESTAN,		// Avst
+	BALINESE,		// Bali
+	BAMUM,			// Bamu
+	BASSA_VAH,		// Bass
+	BATAK,			// Batk
+	BENGALI,		// Beng
+	BHAIKSUKI,		// Bhks
+	BOPOMOFO,		// Bopo
+	BRAHMI,			// Brah
+	BRAILLE,		// Brai
+	BUGINESE,		// Bugi
+	BUHID,			// Buhd
+	CHAKMA,			// Cakm
+	CANADIAN_SYLLABICS,	// Cans
+	CARIAN,			// Cari
+	CHAM,			// Cham
+	CHEROKEE,		// Cher
+	CHORASMIAN,		// Chrs
+	COPTIC,			// Copt
+	CYPRO_MINOAN,		// Cpmn
+	CYPRIOT,		// Cprt
+	CYRILLIC,		// Cyrl
+	DEVANAGARI,		// Deva
+	DIVES_AKURU,		// Diak
+	DOGRA,			// Dogr
+	DESERET,		// Dsrt
+	DUPLOYAN,		// Dupl
+	EGYPTIAN_HIEROGLYPHS,	// Egyp
+	ELBASAN,		// Elba
+	ELYMAIC,		// Elym
+	ETHIOPIC,		// Ethi
+	GEORGIAN,		// Geor
+	GLAGOLITIC,		// Glag
+	GUNJALA_GONDI,		// Gong
+	MASARAM_GONDI,		// Gonm
+	GOTHIC,			// Goth
+	GRANTHA,		// Gran
+	GREEK,			// Grek
+	GUJARATI,		// Gujr
+	GURMUKHI,		// Guru
+	HANGUL,			// Hang
+	HAN,			// Hani
+	HANUNOO,		// Hano
+	HATRAN,			// Hatr
+	HEBREW,			// Hebr
+	HIRAGANA,		// Hira
+	ANATOLIAN_HIEROGLYPHS,	// Hluw
+	PAHAWH_HMONG,		// Hmng
+	NYIAKENG_PUACHUE_HMONG,	// Hmnp
+	OLD_HUNGARIAN,		// Hung
+	OLD_ITALIC,		// Ital
+	JAVANESE,		// Java
+	KAYAH_LI,		// Kali
+	KATAKANA,		// Kana
+	KAWI,			// Kawi
+	KHAROSHTHI,		// Khar
+	KHMER,			// Khmr
+	KHOJKI,			// Khoj
+	KHITAN_SMALL_SCRIPT,	// Kits
+	KANNADA,		// Knda
+	KAITHI,			// Kthi
+	TAI_THAM,		// Lana
+	LAO,			// Laoo
+	LATIN,			// Latn
+	LEPCHA,			// Lepc
+	LIMBU,			// Limb
+	LINEAR_A,		// Lina
+	LINEAR_B,		// Linb
+	LISU,			// Lisu
+	LYCIAN,			// Lyci
+	LYDIAN,			// Lydi
+	MAHAJANI,		// Mahj
+	MAKASAR,		// Maka
+	MANDAIC,		// Mand
+	MANICHAEAN,		// Mani
+	MARCHEN,		// Marc
+	MEDEFAIDRIN,		// Medf
+	MENDE_KIKAKUI,		// Mend
+	MEROITIC_CURSIVE,	// Merc
+	MEROITIC_HIEROGLYPHS,	// Mero
+	MALAYALAM,		// Mlym
+	MODI,			// Modi
+	MONGOLIAN,		// Mong
+	MRO,			// Mroo
+	MEETEI_MAYEK,		// Mtei
+	MULTANI,		// Mult
+	MYANMAR,		// Mymr
+	NAG_MUNDARI,		// Nagm
+	NANDINAGARI,		// Nand
+	OLD_NORTH_ARABIAN,	// Narb
+	NABATAEAN,		// Nbat
+	NEWA,			// Newa
+	NKO,			// Nkoo
+	NUSHU,			// Nshu
+	OGHAM,			// Ogam
+	OL_CHIKI,		// Olck
+	OLD_TURKIC,		// Orkh
+	ORIYA,			// Orya
+	OSAGE,			// Osge
+	OSMANYA,		// Osma
+	OLD_UYGHUR,		// Ougr
+	PALMYRENE,		// Palm
+	PAU_CIN_HAU,		// Pauc
+	OLD_PERMIC,		// Perm
+	PHAGS_PA,		// Phag
+	INSCRIPTIONAL_PAHLAVI,	// Phli
+	PSALTER_PAHLAVI,	// Phlp
+	PHOENICIAN,		// Phnx
+	MIAO,			// Plrd
+	INSCRIPTIONAL_PARTHIAN,	// Prti
+	REJANG,			// Rjng
+	HANIFI_ROHINGYA,	// Rohg
+	RUNIC,			// Runr
+	SAMARITAN,		// Samr
+	OLD_SOUTH_ARABIAN,	// Sarb
+	SAURASHTRA,		// Saur
+	SIGNWRITING,		// Sgnw
+	SHAVIAN,		// Shaw
+	SHARADA,		// Shrd
+	SIDDHAM,		// Sidd
+	KHUDAWADI,		// Sind
+	SINHALA,		// Sinh
+	SOGDIAN,		// Sogd
+	OLD_SOGDIAN,		// Sogo
+	SORA_SOMPENG,		// Sora
+	SOYOMBO,		// Soyo
+	SUNDANESE,		// Sund
+	SYLOTI_NAGRI,		// Sylo
+	SYRIAC,			// Syrc
+	TAGBANWA,		// Tagb
+	TAKRI,			// Takr
+	TAI_LE,			// Tale
+	NEW_TAI_LUE,		// Talu
+	TAMIL,			// Taml
+	TANGUT,			// Tang
+	TAI_VIET,		// Tavt
+	TELUGU,			// Telu
+	TIFINAGH,		// Tfng
+	TAGALOG,		// Tglg
+	THAANA,			// Thaa
+	THAI,			// Thai
+	TIBETAN,		// Tibt
+	TIRHUTA,		// Tirh
+	TANGSA,			// Tnsa
+	TOTO,			// Toto
+	UGARITIC,		// Ugar
+	VAI,			// Vaii
+	VITHKUQI,		// Vith
+	WARANG_CITI,		// Wara
+	WANCHO,			// Wcho
+	OLD_PERSIAN,		// Xpeo
+	CUNEIFORM,		// Xsux
+	YEZIDI,			// Yezi
+	YI,			// Yiii
+	ZANABAZAR_SQUARE,	// Zanb
+	MATH,			// Zmth
+};
+
+// Returns the [[script]] corresponding to this rune.
+export fn rune_script(rn: rune) script = {
+	return get_ucdrecord(rn).script: script;
+};
+
+// Returns the four-character code associated with a [[script]] value.
+export fn script_code(sc: script) const str = {
+	switch (sc) {
+	case script::COMMON => return "Zyyy";
+	case script::INHERITED => return "Zinh";
+	case script::UNKNOWN => return "Zzzz";
+	case script::ARABIC => return "Arab";
+	case script::ARMENIAN => return "Armn";
+	case script::BENGALI => return "Beng";
+	case script::CYRILLIC => return "Cyrl";
+	case script::DEVANAGARI => return "Deva";
+	case script::GEORGIAN => return "Geor";
+	case script::GREEK => return "Grek";
+	case script::GUJARATI => return "Gujr";
+	case script::GURMUKHI => return "Guru";
+	case script::HANGUL => return "Hang";
+	case script::HAN => return "Hani";
+	case script::HEBREW => return "Hebr";
+	case script::HIRAGANA => return "Hira";
+	case script::KANNADA => return "Knda";
+	case script::KATAKANA => return "Kana";
+	case script::LAO => return "Laoo";
+	case script::LATIN => return "Latn";
+	case script::MALAYALAM => return "Mlym";
+	case script::ORIYA => return "Orya";
+	case script::TAMIL => return "Taml";
+	case script::TELUGU => return "Telu";
+	case script::THAI => return "Thai";
+	case script::TIBETAN => return "Tibt";
+	case script::BOPOMOFO => return "Bopo";
+	case script::BRAILLE => return "Brai";
+	case script::CANADIAN_SYLLABICS => return "Cans";
+	case script::CHEROKEE => return "Cher";
+	case script::ETHIOPIC => return "Ethi";
+	case script::KHMER => return "Khmr";
+	case script::MONGOLIAN => return "Mong";
+	case script::MYANMAR => return "Mymr";
+	case script::OGHAM => return "Ogam";
+	case script::RUNIC => return "Runr";
+	case script::SINHALA => return "Sinh";
+	case script::SYRIAC => return "Syrc";
+	case script::THAANA => return "Thaa";
+	case script::YI => return "Yiii";
+	case script::DESERET => return "Dsrt";
+	case script::GOTHIC => return "Goth";
+	case script::OLD_ITALIC => return "Ital";
+	case script::BUHID => return "Buhd";
+	case script::HANUNOO => return "Hano";
+	case script::TAGALOG => return "Tglg";
+	case script::TAGBANWA => return "Tagb";
+	case script::CYPRIOT => return "Cprt";
+	case script::LIMBU => return "Limb";
+	case script::LINEAR_B => return "Linb";
+	case script::OSMANYA => return "Osma";
+	case script::SHAVIAN => return "Shaw";
+	case script::TAI_LE => return "Tale";
+	case script::UGARITIC => return "Ugar";
+	case script::BUGINESE => return "Bugi";
+	case script::COPTIC => return "Copt";
+	case script::GLAGOLITIC => return "Glag";
+	case script::KHAROSHTHI => return "Khar";
+	case script::NEW_TAI_LUE => return "Talu";
+	case script::OLD_PERSIAN => return "Xpeo";
+	case script::SYLOTI_NAGRI => return "Sylo";
+	case script::TIFINAGH => return "Tfng";
+	case script::BALINESE => return "Bali";
+	case script::CUNEIFORM => return "Xsux";
+	case script::NKO => return "Nkoo";
+	case script::PHAGS_PA => return "Phag";
+	case script::PHOENICIAN => return "Phnx";
+	case script::CARIAN => return "Cari";
+	case script::CHAM => return "Cham";
+	case script::KAYAH_LI => return "Kali";
+	case script::LEPCHA => return "Lepc";
+	case script::LYCIAN => return "Lyci";
+	case script::LYDIAN => return "Lydi";
+	case script::OL_CHIKI => return "Olck";
+	case script::REJANG => return "Rjng";
+	case script::SAURASHTRA => return "Saur";
+	case script::SUNDANESE => return "Sund";
+	case script::VAI => return "Vaii";
+	case script::AVESTAN => return "Avst";
+	case script::BAMUM => return "Bamu";
+	case script::EGYPTIAN_HIEROGLYPHS => return "Egyp";
+	case script::IMPERIAL_ARAMAIC => return "Armi";
+	case script::INSCRIPTIONAL_PAHLAVI => return "Phli";
+	case script::INSCRIPTIONAL_PARTHIAN => return "Prti";
+	case script::JAVANESE => return "Java";
+	case script::KAITHI => return "Kthi";
+	case script::LISU => return "Lisu";
+	case script::MEETEI_MAYEK => return "Mtei";
+	case script::OLD_SOUTH_ARABIAN => return "Sarb";
+	case script::OLD_TURKIC => return "Orkh";
+	case script::SAMARITAN => return "Samr";
+	case script::TAI_THAM => return "Lana";
+	case script::TAI_VIET => return "Tavt";
+	case script::BATAK => return "Batk";
+	case script::BRAHMI => return "Brah";
+	case script::MANDAIC => return "Mand";
+	case script::CHAKMA => return "Cakm";
+	case script::MEROITIC_CURSIVE => return "Merc";
+	case script::MEROITIC_HIEROGLYPHS => return "Mero";
+	case script::MIAO => return "Plrd";
+	case script::SHARADA => return "Shrd";
+	case script::SORA_SOMPENG => return "Sora";
+	case script::TAKRI => return "Takr";
+	case script::BASSA_VAH => return "Bass";
+	case script::CAUCASIAN_ALBANIAN => return "Aghb";
+	case script::DUPLOYAN => return "Dupl";
+	case script::ELBASAN => return "Elba";
+	case script::GRANTHA => return "Gran";
+	case script::KHOJKI => return "Khoj";
+	case script::KHUDAWADI => return "Sind";
+	case script::LINEAR_A => return "Lina";
+	case script::MAHAJANI => return "Mahj";
+	case script::MANICHAEAN => return "Mani";
+	case script::MENDE_KIKAKUI => return "Mend";
+	case script::MODI => return "Modi";
+	case script::MRO => return "Mroo";
+	case script::NABATAEAN => return "Nbat";
+	case script::OLD_NORTH_ARABIAN => return "Narb";
+	case script::OLD_PERMIC => return "Perm";
+	case script::PAHAWH_HMONG => return "Hmng";
+	case script::PALMYRENE => return "Palm";
+	case script::PAU_CIN_HAU => return "Pauc";
+	case script::PSALTER_PAHLAVI => return "Phlp";
+	case script::SIDDHAM => return "Sidd";
+	case script::TIRHUTA => return "Tirh";
+	case script::WARANG_CITI => return "Wara";
+	case script::AHOM => return "Ahom";
+	case script::ANATOLIAN_HIEROGLYPHS => return "Hluw";
+	case script::HATRAN => return "Hatr";
+	case script::MULTANI => return "Mult";
+	case script::OLD_HUNGARIAN => return "Hung";
+	case script::SIGNWRITING => return "Sgnw";
+	case script::ADLAM => return "Adlm";
+	case script::BHAIKSUKI => return "Bhks";
+	case script::MARCHEN => return "Marc";
+	case script::OSAGE => return "Osge";
+	case script::TANGUT => return "Tang";
+	case script::NEWA => return "Newa";
+	case script::MASARAM_GONDI => return "Gonm";
+	case script::NUSHU => return "Nshu";
+	case script::SOYOMBO => return "Soyo";
+	case script::ZANABAZAR_SQUARE => return "Zanb";
+	case script::DOGRA => return "Dogr";
+	case script::GUNJALA_GONDI => return "Gong";
+	case script::HANIFI_ROHINGYA => return "Rohg";
+	case script::MAKASAR => return "Maka";
+	case script::MEDEFAIDRIN => return "Medf";
+	case script::OLD_SOGDIAN => return "Sogo";
+	case script::SOGDIAN => return "Sogd";
+	case script::ELYMAIC => return "Elym";
+	case script::NANDINAGARI => return "Nand";
+	case script::NYIAKENG_PUACHUE_HMONG => return "Hmnp";
+	case script::WANCHO => return "Wcho";
+	case script::CHORASMIAN => return "Chrs";
+	case script::DIVES_AKURU => return "Diak";
+	case script::KHITAN_SMALL_SCRIPT => return "Kits";
+	case script::YEZIDI => return "Yezi";
+	case script::CYPRO_MINOAN => return "Cpmn";
+	case script::OLD_UYGHUR => return "Ougr";
+	case script::TANGSA => return "Tnsa";
+	case script::TOTO => return "Toto";
+	case script::VITHKUQI => return "Vith";
+	case script::MATH => return "Zmth";
+	case script::KAWI => return "Kawi";
+	case script::NAG_MUNDARI => return "Nagm";
+	};
+};
+
+// Line break classification.
+export type line_break = enum u8 {
+	XX,
+	AI,
+	BK,
+	CJ,
+	CR,
+	LF,
+	NL,
+	SA,
+	SG,
+	SP,
+	OP,
+	CL,
+	CP,
+	QU,
+	GL,
+	NS,
+	EX,
+	SY,
+	IS,
+	PR,
+	PO,
+	NU,
+	AL,
+	HL,
+	ID,
+	IN,
+	HY,
+	BA,
+	BB,
+	B2,
+	ZW,
+	CM,
+	WJ,
+	H2,
+	H3,
+	JL,
+	JV,
+	JT,
+	RI,
+	EB,
+	EM,
+	ZWJ,
+	CB,
+};
+
+// Returns the [[line_break]] classification corresponding to this rune.
+export fn rune_line_break(rn: rune) line_break = {
+	return get_ucdrecord(rn).line_break: line_break;
+};
+
+// Returns the two-character code associated with a [[line_break]] value.
+export fn line_break_code(lb: line_break) const str = {
+	switch (lb) {
+	case line_break::XX =>
+		return "XX";
+	case line_break::AI =>
+		return "AI";
+	case line_break::AL =>
+		return "AL";
+	case line_break::B2 =>
+		return "B2";
+	case line_break::BA =>
+		return "BA";
+	case line_break::BB =>
+		return "BB";
+	case line_break::BK =>
+		return "BK";
+	case line_break::CB =>
+		return "CB";
+	case line_break::CJ =>
+		return "CJ";
+	case line_break::CL =>
+		return "CL";
+	case line_break::CM =>
+		return "CM";
+	case line_break::CP =>
+		return "CP";
+	case line_break::CR =>
+		return "CR";
+	case line_break::EB =>
+		return "EB";
+	case line_break::EM =>
+		return "EM";
+	case line_break::EX =>
+		return "EX";
+	case line_break::GL =>
+		return "GL";
+	case line_break::H2 =>
+		return "H2";
+	case line_break::H3 =>
+		return "H3";
+	case line_break::HL =>
+		return "HL";
+	case line_break::HY =>
+		return "HY";
+	case line_break::ID =>
+		return "ID";
+	case line_break::IN =>
+		return "IN";
+	case line_break::IS =>
+		return "IS";
+	case line_break::JL =>
+		return "JL";
+	case line_break::JT =>
+		return "JT";
+	case line_break::JV =>
+		return "JV";
+	case line_break::LF =>
+		return "LF";
+	case line_break::NL =>
+		return "NL";
+	case line_break::NS =>
+		return "NS";
+	case line_break::NU =>
+		return "NU";
+	case line_break::OP =>
+		return "OP";
+	case line_break::PO =>
+		return "PO";
+	case line_break::PR =>
+		return "PR";
+	case line_break::QU =>
+		return "QU";
+	case line_break::RI =>
+		return "RI";
+	case line_break::SA =>
+		return "SA";
+	case line_break::SG =>
+		return "SG";
+	case line_break::SP =>
+		return "SP";
+	case line_break::SY =>
+		return "SY";
+	case line_break::WJ =>
+		return "WJ";
+	case line_break::ZW =>
+		return "ZW";
+	case line_break::ZWJ =>
+		return "ZWJ";
+	};
+};
--- a/vendor/hare-unicode/unicode/ucd_gen.ha
+++ b/vendor/hare-unicode/unicode/ucd_gen.ha