diff --git a/parse/lex.ha b/parse/lex.ha index ce8e808..603c2a8 100644 --- a/parse/lex.ha +++ b/parse/lex.ha @@ -247,15 +247,19 @@ fn scanchar(lex: *lexer) (rune | error) = { if (isspace(rnn)) { return rn; } else { - memio::appendrune(&namebuf, rn)!; - memio::concat(&namebuf, scanword(lex)?)!; - const name = memio::string(&namebuf)!; - for (let i = 0z; i < len(longcharnames); i += 1) { - if (name == longcharnames[i].0) { - return longcharnames[i].1; + if (rn == 'x') { + return scanescape2(lex); + } else { + memio::appendrune(&namebuf, rn)!; + memio::concat(&namebuf, scanword(lex)?)!; + const name = memio::string(&namebuf)!; + for (let i = 0z; i < len(longcharnames); i += 1) { + if (name == longcharnames[i].0) { + return longcharnames[i].1; + }; }; + return lex.loc: invalid; }; - return lex.loc: invalid; }; case io::EOF => return rn; @@ -282,6 +286,56 @@ fn scanescape(lex: *lexer) (rune | error) = { case 'v' => return '\v'; case '\\' => return '\\'; case '"' => return '"'; + case 'x' => return scanescape2(lex)?; + case => + return lex.loc: invalid; + }; +}; + +fn scanescape2(lex: *lexer) (rune | error) = { + // This handles the `\xhh...;` family of escapes. + // It's on a separate function since both [[scanescape]] and + // [[scanchar]] make use of it. Much like how [[scanescape]] assumes + // that the backslash has already been consumed, this one assumes that + // the leading character has been consumed prior to entering this + // function. + + const rn = match (nextrune(lex)?) { + case let rn: rune => + yield rn; + case io::EOF => + return ("escape sequence", lex.loc.0, lex.loc.1): unterminated; + }; + + const buf: [6]u8 = [0...]; + let buf = memio::fixed(buf); + memio::appendrune(&buf, rn)!; + + let count = 1z; + for (true) { + const rn = match (nextrune(lex)?) { + case let rn: rune => + yield rn; + case io::EOF => + return ("escape sequence", lex.loc.0, lex.loc.1): unterminated; + }; + + count += 1; + + if (count > 6) { + return lex.loc: invalid; + } else if (rn == ';') { + break; + } else { + memio::appendrune(&buf, rn)!; + }; + }; + + const buf = memio::string(&buf)!; + + return match (strconv::stou32(buf, strconv::base::HEX)) { + case let codepoint: u32 => + return codepoint: rune; case => return lex.loc: invalid; };