add unicode escape sequences

This commit is contained in:
Lobo Torres 2024-12-05 13:43:12 -03:00
parent b767928130
commit a9a72e8f1f

View file

@ -247,15 +247,19 @@ fn scanchar(lex: *lexer) (rune | error) = {
if (isspace(rnn)) { if (isspace(rnn)) {
return rn; return rn;
} else { } else {
memio::appendrune(&namebuf, rn)!; if (rn == 'x') {
memio::concat(&namebuf, scanword(lex)?)!; return scanescape2(lex);
const name = memio::string(&namebuf)!; } else {
for (let i = 0z; i < len(longcharnames); i += 1) { memio::appendrune(&namebuf, rn)!;
if (name == longcharnames[i].0) { memio::concat(&namebuf, scanword(lex)?)!;
return longcharnames[i].1; const name = memio::string(&namebuf)!;
for (let i = 0z; i < len(longcharnames); i += 1) {
if (name == longcharnames[i].0) {
return longcharnames[i].1;
};
}; };
return lex.loc: invalid;
}; };
return lex.loc: invalid;
}; };
case io::EOF => case io::EOF =>
return rn; return rn;
@ -282,6 +286,56 @@ fn scanescape(lex: *lexer) (rune | error) = {
case 'v' => return '\v'; case 'v' => return '\v';
case '\\' => return '\\'; case '\\' => return '\\';
case '"' => return '"'; case '"' => return '"';
case 'x' => return scanescape2(lex)?;
case =>
return lex.loc: invalid;
};
};
fn scanescape2(lex: *lexer) (rune | error) = {
// This handles the `\xhh...;` family of escapes.
// It's on a separate function since both [[scanescape]] and
// [[scanchar]] make use of it. Much like how [[scanescape]] assumes
// that the backslash has already been consumed, this one assumes that
// the leading character has been consumed prior to entering this
// function.
const rn = match (nextrune(lex)?) {
case let rn: rune =>
yield rn;
case io::EOF =>
return ("escape sequence", lex.loc.0, lex.loc.1): unterminated;
};
const buf: [6]u8 = [0...];
let buf = memio::fixed(buf);
memio::appendrune(&buf, rn)!;
let count = 1z;
for (true) {
const rn = match (nextrune(lex)?) {
case let rn: rune =>
yield rn;
case io::EOF =>
return ("escape sequence", lex.loc.0, lex.loc.1): unterminated;
};
count += 1;
if (count > 6) {
return lex.loc: invalid;
} else if (rn == ';') {
break;
} else {
memio::appendrune(&buf, rn)!;
};
};
const buf = memio::string(&buf)!;
return match (strconv::stou32(buf, strconv::base::HEX)) {
case let codepoint: u32 =>
return codepoint: rune;
case => case =>
return lex.loc: invalid; return lex.loc: invalid;
}; };