add unicode escape sequences

This commit is contained in:
Lobo Torres 2024-12-05 13:43:12 -03:00
parent b767928130
commit a9a72e8f1f

View file

@ -246,6 +246,9 @@ fn scanchar(lex: *lexer) (rune | error) = {
unget(lex, rnn);
if (isspace(rnn)) {
return rn;
} else {
if (rn == 'x') {
return scanescape2(lex);
} else {
memio::appendrune(&namebuf, rn)!;
memio::concat(&namebuf, scanword(lex)?)!;
@ -257,6 +260,7 @@ fn scanchar(lex: *lexer) (rune | error) = {
};
return lex.loc: invalid;
};
};
case io::EOF =>
return rn;
};
@ -282,6 +286,56 @@ fn scanescape(lex: *lexer) (rune | error) = {
case 'v' => return '\v';
case '\\' => return '\\';
case '"' => return '"';
case 'x' => return scanescape2(lex)?;
case =>
return lex.loc: invalid;
};
};
fn scanescape2(lex: *lexer) (rune | error) = {
// This handles the `\xhh...;` family of escapes.
// It's on a separate function since both [[scanescape]] and
// [[scanchar]] make use of it. Much like how [[scanescape]] assumes
// that the backslash has already been consumed, this one assumes that
// the leading character has been consumed prior to entering this
// function.
const rn = match (nextrune(lex)?) {
case let rn: rune =>
yield rn;
case io::EOF =>
return ("escape sequence", lex.loc.0, lex.loc.1): unterminated;
};
const buf: [6]u8 = [0...];
let buf = memio::fixed(buf);
memio::appendrune(&buf, rn)!;
let count = 1z;
for (true) {
const rn = match (nextrune(lex)?) {
case let rn: rune =>
yield rn;
case io::EOF =>
return ("escape sequence", lex.loc.0, lex.loc.1): unterminated;
};
count += 1;
if (count > 6) {
return lex.loc: invalid;
} else if (rn == ';') {
break;
} else {
memio::appendrune(&buf, rn)!;
};
};
const buf = memio::string(&buf)!;
return match (strconv::stou32(buf, strconv::base::HEX)) {
case let codepoint: u32 =>
return codepoint: rune;
case =>
return lex.loc: invalid;
};