Compare commits

...

2 commits

Author SHA1 Message Date
1e5ed47497 parse: show error on test failure 2024-12-05 13:43:31 -03:00
a9a72e8f1f add unicode escape sequences 2024-12-05 13:43:12 -03:00
3 changed files with 75 additions and 10 deletions

View file

@ -13,6 +13,8 @@ use io;
mkword("def")]),
(`#t #f`, [true, false]),
(`#\a #\space #\nul`, ['a', ' ', '\0']),
(`"\x0a;" "\x2014;" "\x2f9f4;"`, ["\n", "", "嶲"]),
(`#\x #\x0a; #\x2014; #\x2f9f4;`, ['x', '\n', '—', '嶲']),
];
for (let i = 0z; i < len(cases); i += 1) {
@ -23,7 +25,16 @@ use io;
for (let j = 0z; j < len(cases[i].1); j += 1) {
const want = cases[i].1[j];
const have = lex(&lexer)! as token;
const have = match (lex(&lexer)) {
case let tok: token =>
yield tok;
case io::EOF =>
assert(false, "reached EOF");
return;
case let err: error =>
assert(false, strerror(err));
return;
};
if (!tokeq(want, have)) {
fmt::printfln("Case {}: {}", i, cases[i].0)!;

View file

@ -247,15 +247,19 @@ fn scanchar(lex: *lexer) (rune | error) = {
if (isspace(rnn)) {
return rn;
} else {
memio::appendrune(&namebuf, rn)!;
memio::concat(&namebuf, scanword(lex)?)!;
const name = memio::string(&namebuf)!;
for (let i = 0z; i < len(longcharnames); i += 1) {
if (name == longcharnames[i].0) {
return longcharnames[i].1;
if (rn == 'x') {
return scanescape2(lex);
} else {
memio::appendrune(&namebuf, rn)!;
memio::concat(&namebuf, scanword(lex)?)!;
const name = memio::string(&namebuf)!;
for (let i = 0z; i < len(longcharnames); i += 1) {
if (name == longcharnames[i].0) {
return longcharnames[i].1;
};
};
return lex.loc: invalid;
};
return lex.loc: invalid;
};
case io::EOF =>
return rn;
@ -282,6 +286,56 @@ fn scanescape(lex: *lexer) (rune | error) = {
case 'v' => return '\v';
case '\\' => return '\\';
case '"' => return '"';
case 'x' => return scanescape2(lex)?;
case =>
return lex.loc: invalid;
};
};
fn scanescape2(lex: *lexer) (rune | error) = {
// This handles the `\xhh...;` family of escapes.
// It's on a separate function since both [[scanescape]] and
// [[scanchar]] make use of it. Much like how [[scanescape]] assumes
// that the backslash has already been consumed, this one assumes that
// the leading character has been consumed prior to entering this
// function.
const rn = match (nextrune(lex)?) {
case let rn: rune =>
yield rn;
case io::EOF =>
return ("escape sequence", lex.loc.0, lex.loc.1): unterminated;
};
const buf: [6]u8 = [0...];
let buf = memio::fixed(buf);
memio::appendrune(&buf, rn)!;
let count = 1z;
for (true) {
const rn = match (nextrune(lex)?) {
case let rn: rune =>
yield rn;
case io::EOF =>
return ("escape sequence", lex.loc.0, lex.loc.1): unterminated;
};
count += 1;
if (count > 6) {
return lex.loc: invalid;
} else if (rn == ';') {
break;
} else {
memio::appendrune(&buf, rn)!;
};
};
const buf = memio::string(&buf)!;
return match (strconv::stou32(buf, strconv::base::HEX)) {
case let codepoint: u32 =>
return codepoint: rune;
case =>
return lex.loc: invalid;
};

View file

@ -22,10 +22,10 @@ export fn strerror(err: error) const str = {
match (err) {
case let err: invalid =>
return fmt::bsprintf(buf,
"{}:{}: Invalid token found", err.0, err.1);
"Invalid token found at {}:{}", err.0, err.1);
case let err: unterminated =>
return fmt::bsprintf(buf,
"{}:{}: Unterminated {} found", err.1, err.2, err.0);
"Unterminated {} found at {}:{}", err.0, err.1, err.2);
case let err: io::error =>
return io::strerror(err);
};