Compare commits
No commits in common. "c372d85de297f2e0829312f6ae55a56cf0dd1afa" and "b09e0b37fb1e5cda4ba545c244c64421b292a243" have entirely different histories.
c372d85de2
...
b09e0b37fb
6 changed files with 78 additions and 200 deletions
6
Makefile
6
Makefile
|
@ -7,8 +7,7 @@ HARESRCDIR=${SRCDIR}/hare
|
||||||
HARE=hare
|
HARE=hare
|
||||||
HAREPATH=vendor/hare-unicode:${HARESRCDIR}/stdlib:${HARESRCDIR}/third-party
|
HAREPATH=vendor/hare-unicode:${HARESRCDIR}/stdlib:${HARESRCDIR}/third-party
|
||||||
|
|
||||||
.PHONY: check build clean print-harepath
|
.PHONY: check build clean
|
||||||
|
|
||||||
|
|
||||||
check:
|
check:
|
||||||
@env HAREPATH=${HAREPATH} hare test -T +test
|
@env HAREPATH=${HAREPATH} hare test -T +test
|
||||||
|
@ -18,6 +17,3 @@ build:
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f kj
|
rm -f kj
|
||||||
|
|
||||||
print-harepath:
|
|
||||||
@echo ${HAREPATH}
|
|
||||||
|
|
|
@ -2,10 +2,11 @@ use memio;
|
||||||
use fmt;
|
use fmt;
|
||||||
use strings;
|
use strings;
|
||||||
use io;
|
use io;
|
||||||
use os;
|
|
||||||
|
type dummytoken = (ty, value);
|
||||||
|
|
||||||
@test fn next() void = {
|
@test fn next() void = {
|
||||||
const cases: [_](str, [](ty, value)) = [
|
const cases: [_](str, []dummytoken) = [
|
||||||
(`"hello" \greeting def`,
|
(`"hello" \greeting def`,
|
||||||
[
|
[
|
||||||
(ty::STRING, "hello"),
|
(ty::STRING, "hello"),
|
||||||
|
@ -27,12 +28,11 @@ use os;
|
||||||
(ty::BOOLEAN, true),
|
(ty::BOOLEAN, true),
|
||||||
(ty::BOOLEAN, false),
|
(ty::BOOLEAN, false),
|
||||||
]),
|
]),
|
||||||
(`#\a #\space #\nul #\嶲`,
|
(`#\a #\space #\nul`,
|
||||||
[
|
[
|
||||||
(ty::CHAR, 'a'),
|
(ty::CHAR, 'a'),
|
||||||
(ty::CHAR, ' '),
|
(ty::CHAR, ' '),
|
||||||
(ty::CHAR, '\0'),
|
(ty::CHAR, '\0'),
|
||||||
(ty::CHAR, '嶲'),
|
|
||||||
]),
|
]),
|
||||||
(`"\x0a;\x2014;\x2f9f4;"`,
|
(`"\x0a;\x2014;\x2f9f4;"`,
|
||||||
[
|
[
|
||||||
|
@ -47,17 +47,16 @@ use os;
|
||||||
]),
|
]),
|
||||||
];
|
];
|
||||||
|
|
||||||
for (let tcase .. cases) {
|
for (let i = 0z; i < len(cases); i += 1) {
|
||||||
const src = strings::toutf8(tcase.0);
|
const src = strings::toutf8(cases[i].0);
|
||||||
const src = memio::fixed(src);
|
const src = memio::fixed(src);
|
||||||
const lexer = newlexer(&src, "<string>");
|
const lexer = newlexer(&src, "<string>");
|
||||||
defer close(&lexer);
|
defer close(&lexer);
|
||||||
|
|
||||||
for (let want .. tcase.1) {
|
for (let j = 0z; j < len(cases[i].1); j += 1) {
|
||||||
|
const want = cases[i].1[j];
|
||||||
const have = match (next(&lexer)) {
|
const have = match (next(&lexer)) {
|
||||||
case let tok: token =>
|
case let tok: token =>
|
||||||
tokrepr(os::stdout, tok)!;
|
|
||||||
fmt::print(' ')!;
|
|
||||||
yield tok;
|
yield tok;
|
||||||
case io::EOF =>
|
case io::EOF =>
|
||||||
assert(false, "reached EOF");
|
assert(false, "reached EOF");
|
||||||
|
@ -68,28 +67,27 @@ use os;
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!tokeq(have, want)) {
|
if (!tokeq(have, want)) {
|
||||||
fmt::errorf("Expected:\n\t")!;
|
fmt::printf("Expected:\n\t")!;
|
||||||
tokrepr(os::stderr,
|
fmt::println(tokstr((want.0, want.1, location{ ... })))!;
|
||||||
(want.0, want.1, location{ ... }))!;
|
fmt::printf("Got:\n\t")!;
|
||||||
fmt::errorf("\nGot:\n\t")!;
|
fmt::println(tokstr(have))!;
|
||||||
tokrepr(os::stderr, have)!;
|
assert(false);
|
||||||
assert(false, "test case doesn't match expectation");
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
assert(next(&lexer) is io::EOF, "didn't reach EOF at the end of test case");
|
assert(next(&lexer) is io::EOF);
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
fn tokeq(have: token, want: (ty, value)) bool =
|
fn tokeq(have: token, want: dummytoken) bool =
|
||||||
have.0 == want.0 && match (have.1) {
|
have.0 == want.0 && match (have.1) {
|
||||||
case void =>
|
case void =>
|
||||||
yield true;
|
yield true;
|
||||||
case let s: str =>
|
case let s: str =>
|
||||||
yield want.1 is str && (want.1 as str) == s;
|
yield want.1 is str && (want.1 as str) == s;
|
||||||
case let r: rune =>
|
case let r: rune =>
|
||||||
yield want.1 is rune && (want.1 as rune) == r;
|
yield want.1 is rune && (want.1 as rune) == r;
|
||||||
case let b: bool =>
|
case let b: bool =>
|
||||||
yield want.1 is bool && (want.1 as bool) == b;
|
yield want.1 is bool && (want.1 as bool) == b;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1,31 +1,29 @@
|
||||||
use ascii;
|
use ascii;
|
||||||
use strings;
|
|
||||||
use bufio;
|
use bufio;
|
||||||
use encoding::utf8;
|
use encoding::utf8;
|
||||||
use io;
|
use io;
|
||||||
use memio;
|
use memio;
|
||||||
use unicode;
|
use unicode;
|
||||||
use strconv;
|
use strconv;
|
||||||
use sort;
|
|
||||||
|
use fmt;
|
||||||
|
use strings;
|
||||||
|
|
||||||
def longcharnames: [_](str, rune) = [
|
def longcharnames: [_](str, rune) = [
|
||||||
("alarm", '\x07'),
|
("nul", '\u0000'),
|
||||||
("backspace", '\x08'),
|
("alarm", '\u0007'),
|
||||||
("delete", '\x7f'),
|
("backspace", '\u0008'),
|
||||||
("esc", '\x1b'),
|
("tab", '\u0009'),
|
||||||
("linefeed", '\x0a'),
|
("newline", '\u000a'),
|
||||||
("newline", '\x0a'),
|
("linefeed", '\u000a'),
|
||||||
("nul", '\0'),
|
("vtab", '\u000b'),
|
||||||
("page", '\x0c'),
|
("page", '\u000c'),
|
||||||
("return", '\x0d'),
|
("return", '\u000d'),
|
||||||
("space", ' '),
|
("esc", '\u001b'),
|
||||||
("tab", '\x09'),
|
("space", '\u0020'),
|
||||||
("vtab", '\x0b'),
|
("delete", '\u007f'),
|
||||||
];
|
];
|
||||||
|
|
||||||
fn longcharnames_namecmp(a: const *opaque, b: const *opaque) int =
|
|
||||||
strings::compare((a: *(str, rune)).0, (b: *(str, rune)).0);
|
|
||||||
|
|
||||||
export type lexer = struct {
|
export type lexer = struct {
|
||||||
in: io::handle,
|
in: io::handle,
|
||||||
strbuf: memio::stream,
|
strbuf: memio::stream,
|
||||||
|
@ -266,17 +264,13 @@ fn scanchar(lex: *lexer) (token | error) = {
|
||||||
memio::appendrune(&namebuf, rn)!;
|
memio::appendrune(&namebuf, rn)!;
|
||||||
memio::concat(&namebuf, scanword(lex)?)!;
|
memio::concat(&namebuf, scanword(lex)?)!;
|
||||||
const name = memio::string(&namebuf)!;
|
const name = memio::string(&namebuf)!;
|
||||||
|
for (let i = 0z; i < len(longcharnames); i += 1) {
|
||||||
match (sort::search(longcharnames: []opaque,
|
if (name == longcharnames[i].0) {
|
||||||
size((str, rune)),
|
return mktoken(lex, ty::CHAR,
|
||||||
&name: const *opaque,
|
longcharnames[i].1);
|
||||||
&longcharnames_namecmp)) {
|
};
|
||||||
case let ix: size =>
|
|
||||||
return mktoken(lex, ty::CHAR,
|
|
||||||
longcharnames[ix].1);
|
|
||||||
case void =>
|
|
||||||
return mkerror(lex, "invalid named character literal");
|
|
||||||
};
|
};
|
||||||
|
return mkerror(lex, "invalid named character literal");
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,71 +0,0 @@
|
||||||
use ascii;
|
|
||||||
use encoding::utf8;
|
|
||||||
use fmt;
|
|
||||||
use io;
|
|
||||||
use memio;
|
|
||||||
use sort;
|
|
||||||
use strings;
|
|
||||||
|
|
||||||
use unicode;
|
|
||||||
|
|
||||||
// Sorted to use with [[sort::search]].
|
|
||||||
def escapetable: [_](rune, rune) = [
|
|
||||||
('\0', '0'),
|
|
||||||
('\a', 'a'),
|
|
||||||
('\b', 'b'),
|
|
||||||
('\t', 't'),
|
|
||||||
('\n', 'n'),
|
|
||||||
('\v', 'v'),
|
|
||||||
('\f', 'f'),
|
|
||||||
('\r', 'r'),
|
|
||||||
('\x1b', 'e'),
|
|
||||||
('"', '"'),
|
|
||||||
('\\', '\\'),
|
|
||||||
];
|
|
||||||
|
|
||||||
fn escapetable_cmpfunc(a: const *opaque, b: const *opaque) int =
|
|
||||||
(a: *(rune, rune)).0: int - (b: *(rune, rune)).0: int;
|
|
||||||
|
|
||||||
// Quotes a Kojote string and writes it to the provided I/O handle.
|
|
||||||
export fn quote(sink: io::handle, s: str) (size | io::error) = {
|
|
||||||
if (len(s) == 0) {
|
|
||||||
return io::writeall(sink, ['\"', '\"'])?;
|
|
||||||
};
|
|
||||||
|
|
||||||
let z = io::writeall(sink, ['\"'])?;
|
|
||||||
|
|
||||||
const iter = strings::iter(s);
|
|
||||||
for (let rn => strings::next(&iter)) {
|
|
||||||
if (isgraph(rn)) {
|
|
||||||
z += io::writeall(sink, utf8::encoderune(rn))?;
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
|
|
||||||
match (sort::search(
|
|
||||||
escapetable: []opaque,
|
|
||||||
size((rune, rune)),
|
|
||||||
&rn: const *opaque,
|
|
||||||
&escapetable_cmpfunc)) {
|
|
||||||
case let ix: size =>
|
|
||||||
z += io::writeall(sink, ['\\'])?;
|
|
||||||
z += io::writeall(sink,
|
|
||||||
utf8::encoderune(escapetable[ix].1))?;
|
|
||||||
case void =>
|
|
||||||
let buf: [16]u8 = [0...];
|
|
||||||
const esc = fmt::bsprintf(buf, `\x{:x};`, rn: u32);
|
|
||||||
z += io::writeall(sink, strings::toutf8(esc))?;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
z += io::writeall(sink, ['\"'])?;
|
|
||||||
return z;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Quotes a Kojote string and returns a new string. The caller must free the
|
|
||||||
// return value.
|
|
||||||
export fn quotestr(s: str) str = {
|
|
||||||
const sink = memio::dynamic();
|
|
||||||
quote(&sink, s)!;
|
|
||||||
return memio::string(&sink)!;
|
|
||||||
};
|
|
||||||
|
|
|
@ -1,76 +1,51 @@
|
||||||
use ascii;
|
use ascii;
|
||||||
use fmt;
|
use fmt;
|
||||||
use strings;
|
|
||||||
use encoding::utf8;
|
|
||||||
use memio;
|
|
||||||
use io;
|
|
||||||
use sort;
|
|
||||||
|
|
||||||
def punctrepr: [_]u8 = ['[', ']', '{', '}'];
|
// Returns a string representation of the token.
|
||||||
|
//
|
||||||
// Writes the human representation of a token to the provided I/O handle.
|
// This string representation may not correspond one to one with the source,
|
||||||
export fn tokrepr(sink: io::handle, tok: token) (size | io::error) = {
|
// but it is guaranteed to be a syntactically-valid construct, such that
|
||||||
let z = 0z;
|
// parsing it results in the same token.
|
||||||
|
export fn tokstr(tok: token) str = {
|
||||||
if (tok.0: size < len(punctrepr)) {
|
static let buf: [128]u8 = [0...];
|
||||||
return io::writeall(sink, [punctrepr[tok.0]])?;
|
|
||||||
};
|
|
||||||
|
|
||||||
switch (tok.0) {
|
switch (tok.0) {
|
||||||
|
case ty::QUOT_START =>
|
||||||
|
return "[";
|
||||||
|
case ty::QUOT_END =>
|
||||||
|
return "]";
|
||||||
|
case ty::MAP_START =>
|
||||||
|
return "{";
|
||||||
|
case ty::MAP_END =>
|
||||||
|
return "}";
|
||||||
case ty::COMMENT =>
|
case ty::COMMENT =>
|
||||||
z += io::writeall(sink, ['('])?;
|
return fmt::bsprintf(buf, "({})", tok.1 as str);
|
||||||
z += io::writeall(sink, strings::toutf8(tok.1 as str))?;
|
|
||||||
z += io::writeall(sink, [')'])?;
|
|
||||||
case ty::WORD =>
|
case ty::WORD =>
|
||||||
z += io::writeall(sink, strings::toutf8(tok.1 as str))?;
|
return tok.1 as str;
|
||||||
case ty::SYMBOL =>
|
case ty::SYMBOL =>
|
||||||
z += io::writeall(sink, ['\\'])?;
|
return fmt::bsprintf(buf, "\\{}", tok.1 as str);
|
||||||
z += io::writeall(sink, strings::toutf8(tok.1 as str))?;
|
|
||||||
case ty::KEYWORD =>
|
case ty::KEYWORD =>
|
||||||
z += io::writeall(sink, [':'])?;
|
return fmt::bsprintf(buf, ":{}", tok.1 as str);
|
||||||
z += io::writeall(sink, strings::toutf8(tok.1 as str))?;
|
|
||||||
case ty::STRING =>
|
case ty::STRING =>
|
||||||
z += quote(sink, tok.1 as str)?;
|
// TODO: escape string before printing
|
||||||
|
return fmt::bsprintf(buf, "\"{}\"", tok.1 as str);
|
||||||
case ty::CHAR =>
|
case ty::CHAR =>
|
||||||
let rn = tok.1 as rune;
|
let rn = tok.1 as rune;
|
||||||
let named = false;
|
for (let i = 0z; i < len(longcharnames); i += 1) {
|
||||||
|
if (longcharnames[i].1 == rn) {
|
||||||
z += io::writeall(sink, ['#', '\\'])?;
|
return fmt::bsprintf(buf, "#\\{}",
|
||||||
|
longcharnames[i].0);
|
||||||
for (let ch .. longcharnames) {
|
|
||||||
if (ch.1 == rn) {
|
|
||||||
z += io::writeall(sink, strings::toutf8(ch.0))?;
|
|
||||||
named = true;
|
|
||||||
break;
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
if (ascii::isgraph(rn)) {
|
||||||
if (!named) {
|
return fmt::bsprintf(buf, "#\\{}", rn);
|
||||||
if (isgraph(rn)) {
|
} else {
|
||||||
z += io::writeall(sink, utf8::encoderune(rn))?;
|
return fmt::bsprintf(buf, "#\\x{x};", rn: u32);
|
||||||
} else {
|
|
||||||
z += fmt::fprintf(sink, "x{:x};", rn: u32)?;
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
case ty::NUMBER =>
|
case ty::NUMBER =>
|
||||||
z += io::writeall(sink, strings::toutf8(tok.1 as str))?;
|
return tok.1 as str;
|
||||||
case ty::BOOLEAN =>
|
case ty::BOOLEAN =>
|
||||||
z += io::writeall(sink, ['#'])?;
|
return fmt::bsprintf(buf, "#{}",
|
||||||
z += io::writeall(sink,
|
if (tok.1 as bool) 't' else 'f');
|
||||||
if (tok.1 as bool) ['t'] else ['f'])?;
|
|
||||||
case =>
|
|
||||||
// unreachable
|
|
||||||
abort();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
return z;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Returns the human representation of a token as a new string. The caller must
|
|
||||||
// free the return value.
|
|
||||||
export fn tokstr(tok: token) str = {
|
|
||||||
const sink = memio::dynamic();
|
|
||||||
tokrepr(&sink, tok)!;
|
|
||||||
return memio::string(&sink)!;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
|
@ -1,14 +0,0 @@
|
||||||
use unicode;
|
|
||||||
|
|
||||||
def graphtable: [unicode::gc::Zs]bool = [
|
|
||||||
false, false, false, false, false, // C
|
|
||||||
true, true, true, true, true, // L
|
|
||||||
true, true, true, // M
|
|
||||||
true, true, true, // N
|
|
||||||
true, true, true, true, true, true, // P
|
|
||||||
true, true, true, true, // S
|
|
||||||
false, false, false, // Z
|
|
||||||
];
|
|
||||||
|
|
||||||
fn isgraph(r: rune) bool =
|
|
||||||
if (r == ' ') true else graphtable[unicode::rune_gc(r)];
|
|
Loading…
Reference in a new issue