use strings; export type line_breaker = struct { input: str, iter: strings::iterator, // Current position pos: uint, // Previous position ppos: uint, // Current line break class cur: line_break, // Next line break class next: line_break, // State for specific rules lb8a: bool, lb21a: bool, lb30a: uint, }; // Creates a new line breaking algorithm state machine. See [[next_line_break]] // to enumerate the line break opportunities in the input string. export fn new_line_breaker(input: str) line_breaker = { return line_breaker { input = input, ... }; }; // Returns the next line break opportunity as a tuple of the rune-wise index of // the opportunity in the input string and a boolean indicating if the line // break is mandatory at this location. The line break opportunity directly // precedes the index returned from this function. // // Hello world! // ^ Line break opportunity at index 6 export fn next_line_break(lb: *line_breaker) ((uint, bool) | done) = { if (lb.pos == 0) { if (len(lb.input) == 0) { return done; // special case }; lb.iter = strings::iter(lb.input); let class = next_lb1_class(lb) as line_break; class = resolve_lb2_class(class); lb.cur = class; lb.next = class; lb.lb8a = class == line_break::ZWJ; }; for (const next => next_lb1_class(lb)) { const prev = lb.next; lb.next = next; lb.ppos = lb.pos; defer lb.pos += 1; const mandatory = lb.cur == line_break::BK || (lb.cur == line_break::CR && lb.next != line_break::LF); if (mandatory) { lb.cur = resolve_lb2_class(next); return (lb.pos + 1, true); }; lb.lb8a = next == line_break::ZWJ; let can_break = lb_simple_case(lb); match (can_break) { case bool => void; case void => can_break = lb_complex_case(lb, prev); }; assert(can_break is bool); const can_break = can_break as bool; if (can_break) { return (lb.pos + 1, false); }; }; return done; }; // Applies LB1 suggested rules for resolving context-dependent classes. fn next_lb1_class(lb: *line_breaker) (line_break | done) = { const rn = match (strings::next(&lb.iter)) { case let rn: rune => yield rn; case done => return done; }; const class = rune_line_break(rn); switch (class) { case line_break::AI, line_break::SG, line_break::XX => return line_break::AL; case line_break::SA => switch (rune_gc(rn)) { case gc::Mn, gc::Mc => return line_break::CM; case => return line_break::AL; }; case line_break::CJ => return line_break::NS; case => return class; }; }; // Applies LB2 suggested rules for resolving the start-of-text line-break class. fn resolve_lb2_class(lb: line_break) line_break = { switch (lb) { case line_break::LF, line_break::NL => return line_break::BK; case line_break::SP => return line_break::WJ; case => return lb; }; }; // If this is a simple case, return whether or not this is a break opportunity // as a boolean. Returns void for special cases. fn lb_simple_case(lb: *line_breaker) (bool | void) = { switch (lb.next) { case line_break::SP => return false; case line_break::BK, line_break::LF, line_break::NL => lb.cur = line_break::BK; return false; case line_break::CR => lb.cur = line_break::CR; return false; case => return; }; }; // Handles more complex rules, including pair table lookups via // linebreak_table.ha. fn lb_complex_case(lb: *line_breaker, prev: line_break) bool = { let can_break = false; const ucur = lb.cur: uint - line_break::OP: uint; const unext = lb.next: uint - line_break::OP: uint; if (ucur < len(lb_pairs) && unext < len(lb_pairs[0])) { switch (lb_pairs[ucur][unext]) { case bo::DI => // Direct break can_break = true; case bo::IN => // Indirect break opportunity can_break = prev == line_break::SP; case bo::CI => // Indirect opportunity for combining marks can_break = prev == line_break::SP; if (!can_break) { return false; }; case bo::CP => // Prohibited for combining marks if (prev != line_break::SP) { return false; }; case bo::PR => void; }; }; // Rule LB8a if (lb.lb8a) { can_break = false; }; // Rule LB21a if (lb.lb21a && (lb.cur == line_break::HY || lb.cur == line_break::BA)) { can_break = false; lb.lb21a = false; } else { lb.lb21a = lb.cur == line_break::HL; }; // Rule LB30a if (lb.cur == line_break::RI) { lb.lb30a += 1; if (lb.lb30a == 2 && lb.next == line_break::RI) { can_break = true; lb.lb30a = 0; }; } else { lb.lb30a = 0; }; lb.cur = lb.next; return can_break; };