Compare commits
No commits in common. "b09e0b37fb1e5cda4ba545c244c64421b292a243" and "bdfaa4f1abf8a992b08d2966656ae737d31633f1" have entirely different histories.
b09e0b37fb
...
bdfaa4f1ab
7 changed files with 224 additions and 286 deletions
|
@ -1,93 +0,0 @@
|
||||||
use memio;
|
|
||||||
use fmt;
|
|
||||||
use strings;
|
|
||||||
use io;
|
|
||||||
|
|
||||||
type dummytoken = (ty, value);
|
|
||||||
|
|
||||||
@test fn next() void = {
|
|
||||||
const cases: [_](str, []dummytoken) = [
|
|
||||||
(`"hello" \greeting def`,
|
|
||||||
[
|
|
||||||
(ty::STRING, "hello"),
|
|
||||||
(ty::SYMBOL, "greeting"),
|
|
||||||
(ty::WORD, "def")
|
|
||||||
]),
|
|
||||||
(`[dup *] (a -- a) \square def`,
|
|
||||||
[
|
|
||||||
(ty::QUOT_START, void),
|
|
||||||
(ty::WORD, "dup"),
|
|
||||||
(ty::WORD, "*"),
|
|
||||||
(ty::QUOT_END, void),
|
|
||||||
(ty::COMMENT, "a -- a"),
|
|
||||||
(ty::SYMBOL, "square"),
|
|
||||||
(ty::WORD, "def"),
|
|
||||||
]),
|
|
||||||
(`#t #f`,
|
|
||||||
[
|
|
||||||
(ty::BOOLEAN, true),
|
|
||||||
(ty::BOOLEAN, false),
|
|
||||||
]),
|
|
||||||
(`#\a #\space #\nul`,
|
|
||||||
[
|
|
||||||
(ty::CHAR, 'a'),
|
|
||||||
(ty::CHAR, ' '),
|
|
||||||
(ty::CHAR, '\0'),
|
|
||||||
]),
|
|
||||||
(`"\x0a;\x2014;\x2f9f4;"`,
|
|
||||||
[
|
|
||||||
(ty::STRING, "\n—嶲"),
|
|
||||||
]),
|
|
||||||
(`#\x #\x0a; #\x2014; #\x2f9f4;`,
|
|
||||||
[
|
|
||||||
(ty::CHAR, 'x'),
|
|
||||||
(ty::CHAR, '\n'),
|
|
||||||
(ty::CHAR, '—'),
|
|
||||||
(ty::CHAR, '嶲'),
|
|
||||||
]),
|
|
||||||
];
|
|
||||||
|
|
||||||
for (let i = 0z; i < len(cases); i += 1) {
|
|
||||||
const src = strings::toutf8(cases[i].0);
|
|
||||||
const src = memio::fixed(src);
|
|
||||||
const lexer = newlexer(&src, "<string>");
|
|
||||||
defer close(&lexer);
|
|
||||||
|
|
||||||
for (let j = 0z; j < len(cases[i].1); j += 1) {
|
|
||||||
const want = cases[i].1[j];
|
|
||||||
const have = match (next(&lexer)) {
|
|
||||||
case let tok: token =>
|
|
||||||
yield tok;
|
|
||||||
case io::EOF =>
|
|
||||||
assert(false, "reached EOF");
|
|
||||||
return;
|
|
||||||
case let err: error =>
|
|
||||||
assert(false, strerror(err));
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!tokeq(have, want)) {
|
|
||||||
fmt::printf("Expected:\n\t")!;
|
|
||||||
fmt::println(tokstr((want.0, want.1, location{ ... })))!;
|
|
||||||
fmt::printf("Got:\n\t")!;
|
|
||||||
fmt::println(tokstr(have))!;
|
|
||||||
assert(false);
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
assert(next(&lexer) is io::EOF);
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
fn tokeq(have: token, want: dummytoken) bool =
|
|
||||||
have.0 == want.0 && match (have.1) {
|
|
||||||
case void =>
|
|
||||||
yield true;
|
|
||||||
case let s: str =>
|
|
||||||
yield want.1 is str && (want.1 as str) == s;
|
|
||||||
case let r: rune =>
|
|
||||||
yield want.1 is rune && (want.1 as rune) == r;
|
|
||||||
case let b: bool =>
|
|
||||||
yield want.1 is bool && (want.1 as bool) == b;
|
|
||||||
};
|
|
||||||
|
|
|
@ -1,51 +0,0 @@
|
||||||
use ascii;
|
|
||||||
use fmt;
|
|
||||||
|
|
||||||
// Returns a string representation of the token.
|
|
||||||
//
|
|
||||||
// This string representation may not correspond one to one with the source,
|
|
||||||
// but it is guaranteed to be a syntactically-valid construct, such that
|
|
||||||
// parsing it results in the same token.
|
|
||||||
export fn tokstr(tok: token) str = {
|
|
||||||
static let buf: [128]u8 = [0...];
|
|
||||||
|
|
||||||
switch (tok.0) {
|
|
||||||
case ty::QUOT_START =>
|
|
||||||
return "[";
|
|
||||||
case ty::QUOT_END =>
|
|
||||||
return "]";
|
|
||||||
case ty::MAP_START =>
|
|
||||||
return "{";
|
|
||||||
case ty::MAP_END =>
|
|
||||||
return "}";
|
|
||||||
case ty::COMMENT =>
|
|
||||||
return fmt::bsprintf(buf, "({})", tok.1 as str);
|
|
||||||
case ty::WORD =>
|
|
||||||
return tok.1 as str;
|
|
||||||
case ty::SYMBOL =>
|
|
||||||
return fmt::bsprintf(buf, "\\{}", tok.1 as str);
|
|
||||||
case ty::KEYWORD =>
|
|
||||||
return fmt::bsprintf(buf, ":{}", tok.1 as str);
|
|
||||||
case ty::STRING =>
|
|
||||||
// TODO: escape string before printing
|
|
||||||
return fmt::bsprintf(buf, "\"{}\"", tok.1 as str);
|
|
||||||
case ty::CHAR =>
|
|
||||||
let rn = tok.1 as rune;
|
|
||||||
for (let i = 0z; i < len(longcharnames); i += 1) {
|
|
||||||
if (longcharnames[i].1 == rn) {
|
|
||||||
return fmt::bsprintf(buf, "#\\{}",
|
|
||||||
longcharnames[i].0);
|
|
||||||
};
|
|
||||||
};
|
|
||||||
if (ascii::isgraph(rn)) {
|
|
||||||
return fmt::bsprintf(buf, "#\\{}", rn);
|
|
||||||
} else {
|
|
||||||
return fmt::bsprintf(buf, "#\\x{x};", rn: u32);
|
|
||||||
};
|
|
||||||
case ty::NUMBER =>
|
|
||||||
return tok.1 as str;
|
|
||||||
case ty::BOOLEAN =>
|
|
||||||
return fmt::bsprintf(buf, "#{}",
|
|
||||||
if (tok.1 as bool) 't' else 'f');
|
|
||||||
};
|
|
||||||
};
|
|
|
@ -1,51 +0,0 @@
|
||||||
use io;
|
|
||||||
use fmt;
|
|
||||||
|
|
||||||
// A syntax error.
|
|
||||||
export type syntax = !(location, str);
|
|
||||||
|
|
||||||
// All possible lexer errors
|
|
||||||
export type error = !(io::error | syntax);
|
|
||||||
|
|
||||||
// A token type
|
|
||||||
export type ty = enum uint {
|
|
||||||
QUOT_START,
|
|
||||||
QUOT_END,
|
|
||||||
MAP_START,
|
|
||||||
MAP_END,
|
|
||||||
COMMENT,
|
|
||||||
WORD,
|
|
||||||
SYMBOL,
|
|
||||||
KEYWORD,
|
|
||||||
STRING,
|
|
||||||
CHAR,
|
|
||||||
NUMBER,
|
|
||||||
BOOLEAN,
|
|
||||||
};
|
|
||||||
|
|
||||||
// A token value, used for literal tokens and comments.
|
|
||||||
export type value = (str | rune | bool | void);
|
|
||||||
|
|
||||||
// A location within a source file.
|
|
||||||
// The path is borrowed from the file name given to the lexer.
|
|
||||||
export type location = struct {
|
|
||||||
path: str,
|
|
||||||
line: uint,
|
|
||||||
column: uint,
|
|
||||||
};
|
|
||||||
|
|
||||||
// A single lexical token.
|
|
||||||
export type token = (ty, value, location);
|
|
||||||
|
|
||||||
// Returns a human-friendly string for a given error. The result may be
|
|
||||||
// statically allocated.
|
|
||||||
export fn strerror(err: error) const str = {
|
|
||||||
static let buf: [512]u8 = [0...];
|
|
||||||
match (err) {
|
|
||||||
case let err: io::error =>
|
|
||||||
return io::strerror(err);
|
|
||||||
case let s: syntax =>
|
|
||||||
return fmt::bsprintf(buf, "{}:{}:{}: syntax error: {}",
|
|
||||||
s.0.path, s.0.line, s.0.column, s.1);
|
|
||||||
};
|
|
||||||
};
|
|
117
parse/+test/lexer.ha
Normal file
117
parse/+test/lexer.ha
Normal file
|
@ -0,0 +1,117 @@
|
||||||
|
use memio;
|
||||||
|
use fmt;
|
||||||
|
use strings;
|
||||||
|
use io;
|
||||||
|
|
||||||
|
@test fn lex() void = {
|
||||||
|
const cases: [_](str, []token) = [
|
||||||
|
(`"hello" \greeting def`,
|
||||||
|
["hello", mksym("greeting"), mkword("def")]),
|
||||||
|
(`[dup *] (a -- a) \square def`,
|
||||||
|
[quotstart, mkword("dup"), mkword("*"), quotend,
|
||||||
|
mkcomment("a -- a"), mksym("square"),
|
||||||
|
mkword("def")]),
|
||||||
|
(`#t #f`, [true, false]),
|
||||||
|
(`#\a #\space #\nul`, ['a', ' ', '\0']),
|
||||||
|
(`"\x0a;" "\x2014;" "\x2f9f4;"`, ["\n", "—", "嶲"]),
|
||||||
|
(`#\x #\x0a; #\x2014; #\x2f9f4;`, ['x', '\n', '—', '嶲']),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (let i = 0z; i < len(cases); i += 1) {
|
||||||
|
const src = strings::toutf8(cases[i].0);
|
||||||
|
const src = memio::fixed(src);
|
||||||
|
const lexer = newlexer(&src, "<string>");
|
||||||
|
defer close(&lexer);
|
||||||
|
|
||||||
|
for (let j = 0z; j < len(cases[i].1); j += 1) {
|
||||||
|
const want = cases[i].1[j];
|
||||||
|
const have = match (lex(&lexer)) {
|
||||||
|
case let tok: token =>
|
||||||
|
yield tok;
|
||||||
|
case io::EOF =>
|
||||||
|
assert(false, "reached EOF");
|
||||||
|
return;
|
||||||
|
case let err: error =>
|
||||||
|
assert(false, strerror(err));
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!tokeq(want, have)) {
|
||||||
|
fmt::printfln("Case {}: {}", i, cases[i].0)!;
|
||||||
|
fmt::print("\tExpected: ")!;
|
||||||
|
tokpp(want);
|
||||||
|
fmt::print("\tGot: ")!;
|
||||||
|
tokpp(have);
|
||||||
|
assert(false);
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
assert(lex(&lexer) is io::EOF);
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
fn tokeq(have: token, want: token) bool = {
|
||||||
|
match (want) {
|
||||||
|
case quotstart =>
|
||||||
|
return have is quotstart;
|
||||||
|
case quotend =>
|
||||||
|
return have is quotend;
|
||||||
|
case mapstart =>
|
||||||
|
return have is mapstart;
|
||||||
|
case mapend =>
|
||||||
|
return have is mapend;
|
||||||
|
case let w: word =>
|
||||||
|
return (have as word).v == w.v;
|
||||||
|
case let s: str =>
|
||||||
|
return have as str == s;
|
||||||
|
case let s: symbol =>
|
||||||
|
return (have as symbol).v == s.v;
|
||||||
|
case let c: comment =>
|
||||||
|
return (have as comment).v == c.v;
|
||||||
|
case let r: rune =>
|
||||||
|
return have as rune == r;
|
||||||
|
case let b: bool =>
|
||||||
|
return have as bool == b;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
fn tokpp(tok: token) void = {
|
||||||
|
match (tok) {
|
||||||
|
case quotstart =>
|
||||||
|
fmt::println("[")!;
|
||||||
|
case quotend =>
|
||||||
|
fmt::println("]")!;
|
||||||
|
case mapstart =>
|
||||||
|
fmt::println("{")!;
|
||||||
|
case mapend =>
|
||||||
|
fmt::println("}")!;
|
||||||
|
case let w: word =>
|
||||||
|
fmt::println(w.v)!;
|
||||||
|
case let s: symbol =>
|
||||||
|
fmt::printfln("{}{}", if (s.kw) ":" else "\\", s.v)!;
|
||||||
|
case let s: str =>
|
||||||
|
fmt::printfln(`"{}"`, s)!;
|
||||||
|
case let c: comment =>
|
||||||
|
fmt::printfln("({})", c.v)!;
|
||||||
|
case let r: rune =>
|
||||||
|
for (let i = 0z; i < len(longcharnames); i += 1) {
|
||||||
|
if (r == longcharnames[i].1) {
|
||||||
|
fmt::printfln("#\\{}", longcharnames[i].0)!;
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
fmt::printfln("#\\{}", r)!;
|
||||||
|
case let b: bool =>
|
||||||
|
fmt::println(if (b) "#t" else "#f")!;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
fn mkword(v: const str) word =
|
||||||
|
word{ v = v };
|
||||||
|
|
||||||
|
fn mkcomment(v: const str) comment =
|
||||||
|
comment{ v = v };
|
||||||
|
|
||||||
|
fn mksym(v: const str, kw: bool = false) symbol =
|
||||||
|
symbol{ v = v, kw = kw };
|
||||||
|
|
|
@ -27,20 +27,16 @@ def longcharnames: [_](str, rune) = [
|
||||||
export type lexer = struct {
|
export type lexer = struct {
|
||||||
in: io::handle,
|
in: io::handle,
|
||||||
strbuf: memio::stream,
|
strbuf: memio::stream,
|
||||||
commentbuf: memio::stream,
|
|
||||||
path: str,
|
path: str,
|
||||||
loc: (uint, uint),
|
loc: (uint, uint),
|
||||||
prevloc: (uint, uint),
|
prevloc: (uint, uint),
|
||||||
unread: (rune | void),
|
unread: (rune | void),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Creates a new [[lexer]] for the given [[io::handle]].
|
|
||||||
// The path is borrowed.
|
|
||||||
export fn newlexer(in: io::handle, path: str) lexer = {
|
export fn newlexer(in: io::handle, path: str) lexer = {
|
||||||
return lexer {
|
return lexer {
|
||||||
in = in,
|
in = in,
|
||||||
strbuf = memio::dynamic(),
|
strbuf = memio::dynamic(),
|
||||||
commentbuf = memio::dynamic(),
|
|
||||||
path = path,
|
path = path,
|
||||||
loc = (1, 0),
|
loc = (1, 0),
|
||||||
unread = void,
|
unread = void,
|
||||||
|
@ -48,14 +44,11 @@ export fn newlexer(in: io::handle, path: str) lexer = {
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
// Frees resources associated with a [[lexer]].
|
|
||||||
export fn close(lex: *lexer) void = {
|
export fn close(lex: *lexer) void = {
|
||||||
io::close(&lex.strbuf)!;
|
io::close(&lex.strbuf)!;
|
||||||
io::close(&lex.commentbuf)!;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Returns the next token from the lexer.
|
export fn lex(lex: *lexer) (token | io::EOF | error) = {
|
||||||
export fn next(lex: *lexer) (token | io::EOF | error) = {
|
|
||||||
const rn = match (nextrunews(lex)?) {
|
const rn = match (nextrunews(lex)?) {
|
||||||
case io::EOF =>
|
case io::EOF =>
|
||||||
return io::EOF;
|
return io::EOF;
|
||||||
|
@ -65,30 +58,30 @@ export fn next(lex: *lexer) (token | io::EOF | error) = {
|
||||||
|
|
||||||
switch (rn) {
|
switch (rn) {
|
||||||
case '(' =>
|
case '(' =>
|
||||||
return mktoken(lex, ty::COMMENT, scancomment(lex)?);
|
return comment{ v = scancomment(lex)? };
|
||||||
case ')' =>
|
case ')' =>
|
||||||
return mkerror(lex, "invalid token");
|
return lex.loc: invalid;
|
||||||
case '[' =>
|
case '[' =>
|
||||||
return mktoken(lex, ty::QUOT_START, void);
|
return quotstart;
|
||||||
case ']' =>
|
case ']' =>
|
||||||
return mktoken(lex, ty::QUOT_END, void);
|
return quotend;
|
||||||
case '{' =>
|
case '{' =>
|
||||||
return mktoken(lex, ty::MAP_START, void);
|
return mapstart;
|
||||||
case '}' =>
|
case '}' =>
|
||||||
return mktoken(lex, ty::MAP_END, void);
|
return mapend;
|
||||||
case '\\' =>
|
case '\\' =>
|
||||||
let v = scanword(lex)?;
|
let v = scanword(lex)?;
|
||||||
if (len(v) == 0) {
|
if (len(v) == 0) {
|
||||||
return mkerror(lex, "invalid symbol literal");
|
return lex.loc: invalid;
|
||||||
} else {
|
} else {
|
||||||
return mktoken(lex, ty::SYMBOL, v);
|
return symbol{ v = v, kw = false };
|
||||||
};
|
};
|
||||||
case ':' =>
|
case ':' =>
|
||||||
let v = scanword(lex)?;
|
let v = scanword(lex)?;
|
||||||
if (len(v) == 0) {
|
if (len(v) == 0) {
|
||||||
return mkerror(lex, "invalid keyword");
|
return lex.loc: invalid;
|
||||||
} else {
|
} else {
|
||||||
return mktoken(lex, ty::KEYWORD, v);
|
return symbol{ v = v, kw = true };
|
||||||
};
|
};
|
||||||
case '#' =>
|
case '#' =>
|
||||||
return scanpound(lex)?;
|
return scanpound(lex)?;
|
||||||
|
@ -99,7 +92,7 @@ export fn next(lex: *lexer) (token | io::EOF | error) = {
|
||||||
};
|
};
|
||||||
|
|
||||||
unget(lex, rn);
|
unget(lex, rn);
|
||||||
return mktoken(lex, ty::WORD, scanword(lex)?);
|
return word{ v = scanword(lex)? };
|
||||||
};
|
};
|
||||||
|
|
||||||
fn nextrune(lex: *lexer) (rune | io::EOF | error) = {
|
fn nextrune(lex: *lexer) (rune | io::EOF | error) = {
|
||||||
|
@ -131,7 +124,7 @@ fn nextrune(lex: *lexer) (rune | io::EOF | error) = {
|
||||||
case let err: io::error =>
|
case let err: io::error =>
|
||||||
return err;
|
return err;
|
||||||
case utf8::invalid =>
|
case utf8::invalid =>
|
||||||
return mkerror(lex, "invalid UTF-8 sequence");
|
return lex.loc: invalid;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -139,7 +132,9 @@ fn nextrunews(lex: *lexer) (rune | io::EOF | error) = {
|
||||||
for (true) {
|
for (true) {
|
||||||
match (nextrune(lex)?) {
|
match (nextrune(lex)?) {
|
||||||
case let rn: rune =>
|
case let rn: rune =>
|
||||||
if (isspace(rn)) continue;
|
if (isspace(rn)) {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
return rn;
|
return rn;
|
||||||
case io::EOF =>
|
case io::EOF =>
|
||||||
return io::EOF;
|
return io::EOF;
|
||||||
|
@ -172,36 +167,36 @@ fn scanword(lex: *lexer) (str | error) = {
|
||||||
};
|
};
|
||||||
|
|
||||||
fn scancomment(lex: *lexer) (str | error) = {
|
fn scancomment(lex: *lexer) (str | error) = {
|
||||||
memio::reset(&lex.commentbuf);
|
|
||||||
for (true) {
|
|
||||||
const rn = match (nextrune(lex)?) {
|
|
||||||
case let rn: rune =>
|
|
||||||
yield rn;
|
|
||||||
case io::EOF =>
|
|
||||||
return mkerror(lex, "unterminated comment");
|
|
||||||
};
|
|
||||||
|
|
||||||
switch (rn) {
|
|
||||||
case '(' =>
|
|
||||||
return mkerror(lex, "nested comments are not allowed");
|
|
||||||
case ')' =>
|
|
||||||
break;
|
|
||||||
case =>
|
|
||||||
memio::appendrune(&lex.commentbuf, rn)!;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
return memio::string(&lex.commentbuf)!;
|
|
||||||
};
|
|
||||||
|
|
||||||
fn scanstr(lex: *lexer) (token | error) = {
|
|
||||||
memio::reset(&lex.strbuf);
|
memio::reset(&lex.strbuf);
|
||||||
for (true) {
|
for (true) {
|
||||||
const rn = match (nextrune(lex)?) {
|
const rn = match (nextrune(lex)?) {
|
||||||
case let rn: rune =>
|
case let rn: rune =>
|
||||||
yield rn;
|
yield rn;
|
||||||
case io::EOF =>
|
case io::EOF =>
|
||||||
return mkerror(lex, "unterminated string literal");
|
return ("comment", lex.loc.0, lex.loc.1): unterminated;
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (rn) {
|
||||||
|
case '(' =>
|
||||||
|
return lex.loc: invalid;
|
||||||
|
case ')' =>
|
||||||
|
break;
|
||||||
|
case =>
|
||||||
|
memio::appendrune(&lex.strbuf, rn)!;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
return memio::string(&lex.strbuf)!;
|
||||||
|
};
|
||||||
|
|
||||||
|
fn scanstr(lex: *lexer) (str | error) = {
|
||||||
|
memio::reset(&lex.strbuf);
|
||||||
|
for (true) {
|
||||||
|
const rn = match (nextrune(lex)?) {
|
||||||
|
case let rn: rune =>
|
||||||
|
yield rn;
|
||||||
|
case io::EOF =>
|
||||||
|
return ("string literal", lex.loc.0, lex.loc.1): unterminated;
|
||||||
};
|
};
|
||||||
|
|
||||||
switch (rn) {
|
switch (rn) {
|
||||||
|
@ -212,7 +207,7 @@ fn scanstr(lex: *lexer) (token | error) = {
|
||||||
memio::appendrune(&lex.strbuf, rn)!;
|
memio::appendrune(&lex.strbuf, rn)!;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
return mktoken(lex, ty::STRING, memio::string(&lex.strbuf)!);
|
return memio::string(&lex.strbuf)!;
|
||||||
};
|
};
|
||||||
|
|
||||||
fn scanpound(lex: *lexer) (token | error) = {
|
fn scanpound(lex: *lexer) (token | error) = {
|
||||||
|
@ -220,22 +215,22 @@ fn scanpound(lex: *lexer) (token | error) = {
|
||||||
case let rn: rune =>
|
case let rn: rune =>
|
||||||
yield rn;
|
yield rn;
|
||||||
case io::EOF =>
|
case io::EOF =>
|
||||||
return mkerror(lex, "unterminated pound literal");
|
return ("pound literal", lex.loc.0, lex.loc.1): unterminated;
|
||||||
};
|
};
|
||||||
|
|
||||||
switch (rn) {
|
switch (rn) {
|
||||||
case 't' =>
|
case 't' =>
|
||||||
return mktoken(lex, ty::BOOLEAN, true);
|
return true;
|
||||||
case 'f' =>
|
case 'f' =>
|
||||||
return mktoken(lex, ty::BOOLEAN, false);
|
return false;
|
||||||
case '\\' =>
|
case '\\' =>
|
||||||
return scanchar(lex);
|
return scanchar(lex)?;
|
||||||
case =>
|
case =>
|
||||||
return mkerror(lex, "invalid pound literal");
|
return lex.loc: invalid;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
fn scanchar(lex: *lexer) (token | error) = {
|
fn scanchar(lex: *lexer) (rune | error) = {
|
||||||
static let namebuf: [16]u8 = [0...];
|
static let namebuf: [16]u8 = [0...];
|
||||||
let namebuf = memio::fixed(namebuf);
|
let namebuf = memio::fixed(namebuf);
|
||||||
|
|
||||||
|
@ -243,35 +238,31 @@ fn scanchar(lex: *lexer) (token | error) = {
|
||||||
case let rn: rune =>
|
case let rn: rune =>
|
||||||
yield rn;
|
yield rn;
|
||||||
case io::EOF =>
|
case io::EOF =>
|
||||||
return mkerror(lex, "unterminated character literal");
|
return ("character literal", lex.loc.0, lex.loc.1): unterminated;
|
||||||
};
|
};
|
||||||
|
|
||||||
let ret: rune = '\0';
|
|
||||||
|
|
||||||
match (nextrune(lex)?) {
|
match (nextrune(lex)?) {
|
||||||
case io::EOF =>
|
case let rnn: rune =>
|
||||||
return mktoken(lex, ty::CHAR, rn);
|
unget(lex, rnn);
|
||||||
case let next: rune =>
|
if (isspace(rnn)) {
|
||||||
unget(lex, next);
|
return rn;
|
||||||
|
|
||||||
if (isspace(next)) {
|
|
||||||
return mktoken(lex, ty::CHAR, rn);
|
|
||||||
};
|
|
||||||
|
|
||||||
if (rn == 'x') {
|
|
||||||
return mktoken(lex, ty::CHAR, scanescape2(lex)?);
|
|
||||||
} else {
|
} else {
|
||||||
memio::appendrune(&namebuf, rn)!;
|
if (rn == 'x') {
|
||||||
memio::concat(&namebuf, scanword(lex)?)!;
|
return scanescape2(lex);
|
||||||
const name = memio::string(&namebuf)!;
|
} else {
|
||||||
for (let i = 0z; i < len(longcharnames); i += 1) {
|
memio::appendrune(&namebuf, rn)!;
|
||||||
if (name == longcharnames[i].0) {
|
memio::concat(&namebuf, scanword(lex)?)!;
|
||||||
return mktoken(lex, ty::CHAR,
|
const name = memio::string(&namebuf)!;
|
||||||
longcharnames[i].1);
|
for (let i = 0z; i < len(longcharnames); i += 1) {
|
||||||
|
if (name == longcharnames[i].0) {
|
||||||
|
return longcharnames[i].1;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
return lex.loc: invalid;
|
||||||
};
|
};
|
||||||
return mkerror(lex, "invalid named character literal");
|
|
||||||
};
|
};
|
||||||
|
case io::EOF =>
|
||||||
|
return rn;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -280,7 +271,7 @@ fn scanescape(lex: *lexer) (rune | error) = {
|
||||||
case let rn: rune =>
|
case let rn: rune =>
|
||||||
yield rn;
|
yield rn;
|
||||||
case io::EOF =>
|
case io::EOF =>
|
||||||
return mkerror(lex, "unterminated character escape");
|
return ("escape sequence", lex.loc.0, lex.loc.1): unterminated;
|
||||||
};
|
};
|
||||||
|
|
||||||
switch (rn) {
|
switch (rn) {
|
||||||
|
@ -297,7 +288,7 @@ fn scanescape(lex: *lexer) (rune | error) = {
|
||||||
case '"' => return '"';
|
case '"' => return '"';
|
||||||
case 'x' => return scanescape2(lex)?;
|
case 'x' => return scanescape2(lex)?;
|
||||||
case =>
|
case =>
|
||||||
return mkerror(lex, "invalid character escape");
|
return lex.loc: invalid;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -313,7 +304,7 @@ fn scanescape2(lex: *lexer) (rune | error) = {
|
||||||
case let rn: rune =>
|
case let rn: rune =>
|
||||||
yield rn;
|
yield rn;
|
||||||
case io::EOF =>
|
case io::EOF =>
|
||||||
return mkerror(lex, "unterminated character escape");
|
return ("escape sequence", lex.loc.0, lex.loc.1): unterminated;
|
||||||
};
|
};
|
||||||
|
|
||||||
const buf: [6]u8 = [0...];
|
const buf: [6]u8 = [0...];
|
||||||
|
@ -326,11 +317,11 @@ fn scanescape2(lex: *lexer) (rune | error) = {
|
||||||
case let rn: rune =>
|
case let rn: rune =>
|
||||||
yield rn;
|
yield rn;
|
||||||
case io::EOF =>
|
case io::EOF =>
|
||||||
return mkerror(lex, "unterminated escape sequence");
|
return ("escape sequence", lex.loc.0, lex.loc.1): unterminated;
|
||||||
};
|
};
|
||||||
|
|
||||||
if (count > 6) {
|
if (count > 6) {
|
||||||
return mkerror(lex, "invalid escape sequence");
|
return lex.loc: invalid;
|
||||||
} else if (rn == ';') {
|
} else if (rn == ';') {
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
|
@ -346,16 +337,10 @@ fn scanescape2(lex: *lexer) (rune | error) = {
|
||||||
case let codepoint: u32 =>
|
case let codepoint: u32 =>
|
||||||
return codepoint: rune;
|
return codepoint: rune;
|
||||||
case =>
|
case =>
|
||||||
return mkerror(lex, "invalid escape sequence");
|
return lex.loc: invalid;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
fn mktoken(lex: *lexer, ty: ty, value: value) token =
|
|
||||||
(ty, value, location{ path = lex.path, line = lex.loc.0, column = lex.loc.1 });
|
|
||||||
|
|
||||||
fn mkerror(lex: *lexer, msg: const str) syntax =
|
|
||||||
(location{ path = lex.path, line = lex.loc.0, column = lex.loc.1 }, msg);
|
|
||||||
|
|
||||||
fn isspace(rn: rune) bool = {
|
fn isspace(rn: rune) bool = {
|
||||||
if (ascii::isspace(rn)) {
|
if (ascii::isspace(rn)) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -370,7 +355,7 @@ fn isspace(rn: rune) bool = {
|
||||||
};
|
};
|
||||||
|
|
||||||
fn isdelimiter(rn: rune) bool = {
|
fn isdelimiter(rn: rune) bool = {
|
||||||
match (strings::index(`()[]{}`, rn)) {
|
match (strings::index(`()[]{}\:#`, rn)) {
|
||||||
case size =>
|
case size =>
|
||||||
return true;
|
return true;
|
||||||
case =>
|
case =>
|
32
parse/types.ha
Normal file
32
parse/types.ha
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
use io;
|
||||||
|
use fmt;
|
||||||
|
|
||||||
|
export type invalid = !(uint, uint);
|
||||||
|
export type unterminated = !(const str, uint, uint);
|
||||||
|
export type error = !(invalid | unterminated | io::error);
|
||||||
|
|
||||||
|
export type quotstart = void;
|
||||||
|
export type quotend = void;
|
||||||
|
export type mapstart = void;
|
||||||
|
export type mapend = void;
|
||||||
|
|
||||||
|
export type comment = struct { v: str };
|
||||||
|
export type word = struct { v: str };
|
||||||
|
export type symbol = struct { v: str, kw: bool };
|
||||||
|
|
||||||
|
export type token = (quotstart | quotend | mapstart | mapend |
|
||||||
|
word | symbol | comment | str | rune | bool);
|
||||||
|
|
||||||
|
export fn strerror(err: error) const str = {
|
||||||
|
static let buf: [64]u8 = [0...];
|
||||||
|
match (err) {
|
||||||
|
case let err: invalid =>
|
||||||
|
return fmt::bsprintf(buf,
|
||||||
|
"Invalid token found at {}:{}", err.0, err.1);
|
||||||
|
case let err: unterminated =>
|
||||||
|
return fmt::bsprintf(buf,
|
||||||
|
"Unterminated {} found at {}:{}", err.0, err.1, err.2);
|
||||||
|
case let err: io::error =>
|
||||||
|
return io::strerror(err);
|
||||||
|
};
|
||||||
|
};
|
7
test.kj
7
test.kj
|
@ -1,6 +1,5 @@
|
||||||
( hello world! )
|
3.14159 \pi def
|
||||||
\pi 3.14159 def
|
[dup *] \square def
|
||||||
\square [dup *] def
|
|
||||||
\circarea [square pi *] def
|
|
||||||
|
|
||||||
|
[square pi *] \circarea def
|
||||||
20 circarea . ( => 1256.636 )
|
20 circarea . ( => 1256.636 )
|
||||||
|
|
Loading…
Reference in a new issue