lex: tokstr now prints escaped strings
this should be the final thing to make [[tokstr]] print syntactically-valid tokens in all cases
This commit is contained in:
parent
b09e0b37fb
commit
7506679f54
4 changed files with 88 additions and 15 deletions
6
Makefile
6
Makefile
|
@ -7,7 +7,8 @@ HARESRCDIR=${SRCDIR}/hare
|
|||
HARE=hare
|
||||
HAREPATH=vendor/hare-unicode:${HARESRCDIR}/stdlib:${HARESRCDIR}/third-party
|
||||
|
||||
.PHONY: check build clean
|
||||
.PHONY: check build clean print-harepath
|
||||
|
||||
|
||||
check:
|
||||
@env HAREPATH=${HAREPATH} hare test -T +test
|
||||
|
@ -17,3 +18,6 @@ build:
|
|||
|
||||
clean:
|
||||
rm -f kj
|
||||
|
||||
print-harepath:
|
||||
@echo ${HAREPATH}
|
||||
|
|
|
@ -28,11 +28,12 @@ type dummytoken = (ty, value);
|
|||
(ty::BOOLEAN, true),
|
||||
(ty::BOOLEAN, false),
|
||||
]),
|
||||
(`#\a #\space #\nul`,
|
||||
(`#\a #\space #\nul #\嶲`,
|
||||
[
|
||||
(ty::CHAR, 'a'),
|
||||
(ty::CHAR, ' '),
|
||||
(ty::CHAR, '\0'),
|
||||
(ty::CHAR, '嶲'),
|
||||
]),
|
||||
(`"\x0a;\x2014;\x2f9f4;"`,
|
||||
[
|
||||
|
|
|
@ -27,8 +27,7 @@ export fn tokstr(tok: token) str = {
|
|||
case ty::KEYWORD =>
|
||||
return fmt::bsprintf(buf, ":{}", tok.1 as str);
|
||||
case ty::STRING =>
|
||||
// TODO: escape string before printing
|
||||
return fmt::bsprintf(buf, "\"{}\"", tok.1 as str);
|
||||
return fmt::bsprintf(buf, "\"{}\"", escape(tok.1 as str));
|
||||
case ty::CHAR =>
|
||||
let rn = tok.1 as rune;
|
||||
for (let i = 0z; i < len(longcharnames); i += 1) {
|
||||
|
@ -37,10 +36,10 @@ export fn tokstr(tok: token) str = {
|
|||
longcharnames[i].0);
|
||||
};
|
||||
};
|
||||
if (ascii::isgraph(rn)) {
|
||||
if (isgraph(rn)) {
|
||||
return fmt::bsprintf(buf, "#\\{}", rn);
|
||||
} else {
|
||||
return fmt::bsprintf(buf, "#\\x{x};", rn: u32);
|
||||
return fmt::bsprintf(buf, "#\\x{:x};", rn: u32);
|
||||
};
|
||||
case ty::NUMBER =>
|
||||
return tok.1 as str;
|
||||
|
|
69
kojote/lex/util.ha
Normal file
69
kojote/lex/util.ha
Normal file
|
@ -0,0 +1,69 @@
|
|||
use ascii;
|
||||
use fmt;
|
||||
use memio;
|
||||
use strings;
|
||||
use io;
|
||||
use sort;
|
||||
|
||||
use unicode;
|
||||
|
||||
def graphtable: [unicode::gc::Zs]bool = [
|
||||
false, false, false, false, false, // C
|
||||
true, true, true, true, true, // L
|
||||
true, true, true, // M
|
||||
true, true, true, // N
|
||||
true, true, true, true, true, true, // P
|
||||
true, true, true, true, // S
|
||||
false, false, false, // Z
|
||||
];
|
||||
|
||||
// Sorted to use with [[sort::search]].
|
||||
def escapetable: [_](rune, rune) = [
|
||||
('\0', '0'),
|
||||
('\a', 'a'),
|
||||
('\b', 'b'),
|
||||
('\t', 't'),
|
||||
('\n', 'n'),
|
||||
('\v', 'v'),
|
||||
('\f', 'f'),
|
||||
('\r', 'r'),
|
||||
('\x1b', 'e'),
|
||||
('"', '"'),
|
||||
('\\', '\\'),
|
||||
];
|
||||
|
||||
fn escapetable_cmpfunc(a: const *opaque, b: const *opaque) int =
|
||||
(a: *(rune, rune)).0: int - (b: *(rune, rune)).0: int;
|
||||
|
||||
// Returns whether a rune is a graphical character.
|
||||
fn isgraph(r: rune) bool =
|
||||
if (r == ' ') true else graphtable[unicode::rune_gc(r)];
|
||||
|
||||
// Escapes a string.
|
||||
fn escape(s: str) str = {
|
||||
static let buf: [2048]u8 = [0...];
|
||||
let buf = memio::fixed(buf);
|
||||
let iter = strings::iter(s);
|
||||
|
||||
for (let ch => strings::next(&iter)) {
|
||||
if (isgraph(ch)) {
|
||||
memio::appendrune(&buf, ch)!;
|
||||
} else {
|
||||
match (sort::search(
|
||||
escapetable: []opaque,
|
||||
size((rune, rune)),
|
||||
&ch: const *opaque,
|
||||
&escapetable_cmpfunc)) {
|
||||
case let sz: size =>
|
||||
memio::appendrune(&buf, '\\')!;
|
||||
memio::appendrune(&buf, escapetable[sz].1)!;
|
||||
case void =>
|
||||
static let hexbuf: [8]u8 = [0...];
|
||||
memio::concat(&buf, fmt::bsprintf(
|
||||
hexbuf, `\x{:x};`, ch: u32))!;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
return memio::string(&buf)!;
|
||||
};
|
Loading…
Reference in a new issue