lex: tokstr now prints escaped strings

this should be the final thing to make [[tokstr]] print syntactically-valid
tokens in all cases
This commit is contained in:
Lobo Torres 2024-12-06 13:56:26 -03:00
parent b09e0b37fb
commit 7506679f54
4 changed files with 88 additions and 15 deletions

View file

@ -7,7 +7,8 @@ HARESRCDIR=${SRCDIR}/hare
HARE=hare HARE=hare
HAREPATH=vendor/hare-unicode:${HARESRCDIR}/stdlib:${HARESRCDIR}/third-party HAREPATH=vendor/hare-unicode:${HARESRCDIR}/stdlib:${HARESRCDIR}/third-party
.PHONY: check build clean .PHONY: check build clean print-harepath
check: check:
@env HAREPATH=${HAREPATH} hare test -T +test @env HAREPATH=${HAREPATH} hare test -T +test
@ -17,3 +18,6 @@ build:
clean: clean:
rm -f kj rm -f kj
print-harepath:
@echo ${HAREPATH}

View file

@ -28,11 +28,12 @@ type dummytoken = (ty, value);
(ty::BOOLEAN, true), (ty::BOOLEAN, true),
(ty::BOOLEAN, false), (ty::BOOLEAN, false),
]), ]),
(`#\a #\space #\nul`, (`#\a #\space #\nul #\嶲`,
[ [
(ty::CHAR, 'a'), (ty::CHAR, 'a'),
(ty::CHAR, ' '), (ty::CHAR, ' '),
(ty::CHAR, '\0'), (ty::CHAR, '\0'),
(ty::CHAR, '嶲'),
]), ]),
(`"\x0a;\x2014;\x2f9f4;"`, (`"\x0a;\x2014;\x2f9f4;"`,
[ [

View file

@ -27,8 +27,7 @@ export fn tokstr(tok: token) str = {
case ty::KEYWORD => case ty::KEYWORD =>
return fmt::bsprintf(buf, ":{}", tok.1 as str); return fmt::bsprintf(buf, ":{}", tok.1 as str);
case ty::STRING => case ty::STRING =>
// TODO: escape string before printing return fmt::bsprintf(buf, "\"{}\"", escape(tok.1 as str));
return fmt::bsprintf(buf, "\"{}\"", tok.1 as str);
case ty::CHAR => case ty::CHAR =>
let rn = tok.1 as rune; let rn = tok.1 as rune;
for (let i = 0z; i < len(longcharnames); i += 1) { for (let i = 0z; i < len(longcharnames); i += 1) {
@ -37,10 +36,10 @@ export fn tokstr(tok: token) str = {
longcharnames[i].0); longcharnames[i].0);
}; };
}; };
if (ascii::isgraph(rn)) { if (isgraph(rn)) {
return fmt::bsprintf(buf, "#\\{}", rn); return fmt::bsprintf(buf, "#\\{}", rn);
} else { } else {
return fmt::bsprintf(buf, "#\\x{x};", rn: u32); return fmt::bsprintf(buf, "#\\x{:x};", rn: u32);
}; };
case ty::NUMBER => case ty::NUMBER =>
return tok.1 as str; return tok.1 as str;

69
kojote/lex/util.ha Normal file
View file

@ -0,0 +1,69 @@
use ascii;
use fmt;
use memio;
use strings;
use io;
use sort;
use unicode;
def graphtable: [unicode::gc::Zs]bool = [
false, false, false, false, false, // C
true, true, true, true, true, // L
true, true, true, // M
true, true, true, // N
true, true, true, true, true, true, // P
true, true, true, true, // S
false, false, false, // Z
];
// Sorted to use with [[sort::search]].
def escapetable: [_](rune, rune) = [
('\0', '0'),
('\a', 'a'),
('\b', 'b'),
('\t', 't'),
('\n', 'n'),
('\v', 'v'),
('\f', 'f'),
('\r', 'r'),
('\x1b', 'e'),
('"', '"'),
('\\', '\\'),
];
fn escapetable_cmpfunc(a: const *opaque, b: const *opaque) int =
(a: *(rune, rune)).0: int - (b: *(rune, rune)).0: int;
// Returns whether a rune is a graphical character.
fn isgraph(r: rune) bool =
if (r == ' ') true else graphtable[unicode::rune_gc(r)];
// Escapes a string.
fn escape(s: str) str = {
static let buf: [2048]u8 = [0...];
let buf = memio::fixed(buf);
let iter = strings::iter(s);
for (let ch => strings::next(&iter)) {
if (isgraph(ch)) {
memio::appendrune(&buf, ch)!;
} else {
match (sort::search(
escapetable: []opaque,
size((rune, rune)),
&ch: const *opaque,
&escapetable_cmpfunc)) {
case let sz: size =>
memio::appendrune(&buf, '\\')!;
memio::appendrune(&buf, escapetable[sz].1)!;
case void =>
static let hexbuf: [8]u8 = [0...];
memio::concat(&buf, fmt::bsprintf(
hexbuf, `\x{:x};`, ch: u32))!;
};
};
};
return memio::string(&buf)!;
};