From 7506679f543d0a85286e07a63b79ef7e13450a5d Mon Sep 17 00:00:00 2001 From: Lobo Torres Date: Fri, 6 Dec 2024 13:56:26 -0300 Subject: [PATCH] lex: tokstr now prints escaped strings this should be the final thing to make [[tokstr]] print syntactically-valid tokens in all cases --- Makefile | 6 +++- kojote/lex/+test.ha | 21 +++++++------- kojote/lex/token.ha | 7 ++--- kojote/lex/util.ha | 69 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 15 deletions(-) create mode 100644 kojote/lex/util.ha diff --git a/Makefile b/Makefile index 94ce8fa..384330c 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,8 @@ HARESRCDIR=${SRCDIR}/hare HARE=hare HAREPATH=vendor/hare-unicode:${HARESRCDIR}/stdlib:${HARESRCDIR}/third-party -.PHONY: check build clean +.PHONY: check build clean print-harepath + check: @env HAREPATH=${HAREPATH} hare test -T +test @@ -17,3 +18,6 @@ build: clean: rm -f kj + +print-harepath: + @echo ${HAREPATH} diff --git a/kojote/lex/+test.ha b/kojote/lex/+test.ha index b534f71..773ad12 100644 --- a/kojote/lex/+test.ha +++ b/kojote/lex/+test.ha @@ -28,11 +28,12 @@ type dummytoken = (ty, value); (ty::BOOLEAN, true), (ty::BOOLEAN, false), ]), - (`#\a #\space #\nul`, + (`#\a #\space #\nul #\嶲`, [ (ty::CHAR, 'a'), (ty::CHAR, ' '), (ty::CHAR, '\0'), + (ty::CHAR, '嶲'), ]), (`"\x0a;\x2014;\x2f9f4;"`, [ @@ -81,13 +82,13 @@ type dummytoken = (ty, value); fn tokeq(have: token, want: dummytoken) bool = have.0 == want.0 && match (have.1) { - case void => - yield true; - case let s: str => - yield want.1 is str && (want.1 as str) == s; - case let r: rune => - yield want.1 is rune && (want.1 as rune) == r; - case let b: bool => - yield want.1 is bool && (want.1 as bool) == b; - }; + case void => + yield true; + case let s: str => + yield want.1 is str && (want.1 as str) == s; + case let r: rune => + yield want.1 is rune && (want.1 as rune) == r; + case let b: bool => + yield want.1 is bool && (want.1 as bool) == b; + }; diff --git a/kojote/lex/token.ha b/kojote/lex/token.ha index 8297fdb..44da63e 100644 --- a/kojote/lex/token.ha +++ b/kojote/lex/token.ha @@ -27,8 +27,7 @@ export fn tokstr(tok: token) str = { case ty::KEYWORD => return fmt::bsprintf(buf, ":{}", tok.1 as str); case ty::STRING => - // TODO: escape string before printing - return fmt::bsprintf(buf, "\"{}\"", tok.1 as str); + return fmt::bsprintf(buf, "\"{}\"", escape(tok.1 as str)); case ty::CHAR => let rn = tok.1 as rune; for (let i = 0z; i < len(longcharnames); i += 1) { @@ -37,10 +36,10 @@ export fn tokstr(tok: token) str = { longcharnames[i].0); }; }; - if (ascii::isgraph(rn)) { + if (isgraph(rn)) { return fmt::bsprintf(buf, "#\\{}", rn); } else { - return fmt::bsprintf(buf, "#\\x{x};", rn: u32); + return fmt::bsprintf(buf, "#\\x{:x};", rn: u32); }; case ty::NUMBER => return tok.1 as str; diff --git a/kojote/lex/util.ha b/kojote/lex/util.ha new file mode 100644 index 0000000..fb5eacf --- /dev/null +++ b/kojote/lex/util.ha @@ -0,0 +1,69 @@ +use ascii; +use fmt; +use memio; +use strings; +use io; +use sort; + +use unicode; + +def graphtable: [unicode::gc::Zs]bool = [ + false, false, false, false, false, // C + true, true, true, true, true, // L + true, true, true, // M + true, true, true, // N + true, true, true, true, true, true, // P + true, true, true, true, // S + false, false, false, // Z +]; + +// Sorted to use with [[sort::search]]. +def escapetable: [_](rune, rune) = [ + ('\0', '0'), + ('\a', 'a'), + ('\b', 'b'), + ('\t', 't'), + ('\n', 'n'), + ('\v', 'v'), + ('\f', 'f'), + ('\r', 'r'), + ('\x1b', 'e'), + ('"', '"'), + ('\\', '\\'), +]; + +fn escapetable_cmpfunc(a: const *opaque, b: const *opaque) int = + (a: *(rune, rune)).0: int - (b: *(rune, rune)).0: int; + +// Returns whether a rune is a graphical character. +fn isgraph(r: rune) bool = + if (r == ' ') true else graphtable[unicode::rune_gc(r)]; + +// Escapes a string. +fn escape(s: str) str = { + static let buf: [2048]u8 = [0...]; + let buf = memio::fixed(buf); + let iter = strings::iter(s); + + for (let ch => strings::next(&iter)) { + if (isgraph(ch)) { + memio::appendrune(&buf, ch)!; + } else { + match (sort::search( + escapetable: []opaque, + size((rune, rune)), + &ch: const *opaque, + &escapetable_cmpfunc)) { + case let sz: size => + memio::appendrune(&buf, '\\')!; + memio::appendrune(&buf, escapetable[sz].1)!; + case void => + static let hexbuf: [8]u8 = [0...]; + memio::concat(&buf, fmt::bsprintf( + hexbuf, `\x{:x};`, ch: u32))!; + }; + }; + }; + + return memio::string(&buf)!; +};