From 04efdcac8ed822bd580c7649de703bf755463fcf Mon Sep 17 00:00:00 2001 From: Lobo Torres Date: Wed, 4 Dec 2024 21:40:14 -0300 Subject: [PATCH] add pound literals for booleans and characters replaces the old character literal syntax that i really didn't like and feels more scheme-ish --- Makefile | 2 +- lex.ha => parse/lex.ha | 144 ++++++++++++++++++++++++++++--------- types.ha => parse/types.ha | 2 +- 3 files changed, 112 insertions(+), 36 deletions(-) rename lex.ha => parse/lex.ha (68%) rename types.ha => parse/types.ha (94%) diff --git a/Makefile b/Makefile index 22f3d18..871dda7 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,6 @@ HAREPATH=vendor/hare-unicode:${HARESRCDIR}/stdlib:${HARESRCDIR}/third-party .PHONY: check build check: - @env HAREPATH=${HAREPATH} hare test + @env HAREPATH=${HAREPATH} hare test -T +test build: @env HAREPATH=${HAREPATH} hare build diff --git a/lex.ha b/parse/lex.ha similarity index 68% rename from lex.ha rename to parse/lex.ha index 7c5d279..6fd4c7d 100644 --- a/lex.ha +++ b/parse/lex.ha @@ -4,13 +4,28 @@ use encoding::utf8; use io; use memio; use unicode; +use strconv; -// Testing dependency use fmt; use strings; // my cod prob sux :( +def longcharnames: [_](str, rune) = [ + ("nul", '\u0000'), + ("alarm", '\u0007'), + ("backspace", '\u0008'), + ("newline", '\u000a'), + ("tab", '\u0009'), + ("linefeed", '\u000a'), + ("vtab", '\u000b'), + ("page", '\u000c'), + ("return", '\u000d'), + ("esc", '\u001b'), + ("space", '\u0020'), + ("delete", '\u007f'), +]; + export type lexer = struct { in: io::handle, strbuf: memio::stream, @@ -70,8 +85,8 @@ export fn lex(lex: *lexer) (token | io::EOF | error) = { } else { return symbol{ v = v, kw = true }; }; - case '\'' => - return scanchar(lex)?; + case '#' => + return scanhash(lex)?; case '"' => return scanstr(lex)?; case => @@ -197,7 +212,30 @@ fn scanstr(lex: *lexer) (str | error) = { return memio::string(&lex.strbuf)!; }; +fn scanhash(lex: *lexer) (token | error) = { + const rn = match (nextrune(lex)?) { + case let rn: rune => + yield rn; + case io::EOF => + return ("hash literal", lex.loc.0, lex.loc.1): unterminated; + }; + + switch (rn) { + case 't' => + return true; + case 'f' => + return false; + case '\\' => + return scanchar(lex)?; + case => + return lex.loc: invalid; + }; +}; + fn scanchar(lex: *lexer) (rune | error) = { + static let namebuf: [16]u8 = [0...]; + let namebuf = memio::fixed(namebuf); + const rn = match (nextrune(lex)?) { case let rn: rune => yield rn; @@ -205,10 +243,23 @@ fn scanchar(lex: *lexer) (rune | error) = { return ("character literal", lex.loc.0, lex.loc.1): unterminated; }; - switch (rn) { - case '\\' => - return scanescape(lex)?; - case => + match (nextrune(lex)?) { + case let rnn: rune => + unget(lex, rnn); + if (isspace(rnn)) { + return rn; + } else { + memio::appendrune(&namebuf, rn)!; + memio::concat(&namebuf, scanword(lex)?)!; + const name = memio::string(&namebuf)!; + for (let i = 0z; i < len(longcharnames); i += 1) { + if (name == longcharnames[i].0) { + return longcharnames[i].1; + }; + }; + return lex.loc: invalid; + }; + case io::EOF => return rn; }; }; @@ -218,7 +269,7 @@ fn scanescape(lex: *lexer) (rune | error) = { case let rn: rune => yield rn; case io::EOF => - return lex.loc: invalid; + return ("escape sequence", lex.loc.0, lex.loc.1): unterminated; }; switch (rn) { @@ -230,8 +281,6 @@ fn scanescape(lex: *lexer) (rune | error) = { return '\n'; case 't' => return '\t'; - case 's' => - return ' '; case => return lex.loc: invalid; }; @@ -250,9 +299,8 @@ fn isspace(rn: rune) bool = { }; }; -def delimiters = `()[]{}\:'`; fn isdelimiter(rn: rune) bool = { - match (strings::index(delimiters, rn)) { + match (strings::index(`()[]{}\:#`, rn)) { case size => return true; case => @@ -262,27 +310,14 @@ fn isdelimiter(rn: rune) bool = { @test fn lex() void = { const cases: [_](str, []token) = [ - ( - `"hello" \greeting def`, - [ - "hello", - mksym("greeting"), - mkword("def"), - ] - ), - ( - `[dup *] (a -- a) \square def`, - [ - quotstart, - mkword("dup"), - mkword("*"), - quotend, - mkcomment("a -- a"), - mksym("square"), - mkword("def"), - ] - ), - (`'\s`, [' ']) + (`"hello" \greeting def`, + ["hello", mksym("greeting"), mkword("def")]), + (`[dup *] (a -- a) \square def`, + [quotstart, mkword("dup"), mkword("*"), quotend, + mkcomment("a -- a"), mksym("square"), + mkword("def")]), + (`#t #f`, [true, false]), + (`#\a #\space #\nul`, ['a', ' ', '\0']), ]; for (let i = 0z; i < len(cases); i += 1) { @@ -294,7 +329,15 @@ fn isdelimiter(rn: rune) bool = { for (let j = 0z; j < len(cases[i].1); j += 1) { const want = cases[i].1[j]; const have = lex(&lexer)! as token; - assert(tokeq(want, have)); + + if (!tokeq(want, have)) { + fmt::printfln("Case {}: {}", i, cases[i].0)!; + fmt::print("\tExpected: ")!; + tokpprint(want); + fmt::print("\tGot: ")!; + tokpprint(have); + assert(false); + }; }; assert(lex(&lexer) is io::EOF); @@ -321,6 +364,39 @@ fn tokeq(have: token, want: token) bool = { return (have as comment).v == c.v; case let r: rune => return have as rune == r; + case let b: bool => + return have as bool == b; + }; +}; + +fn tokpprint(tok: token) void = { + match (tok) { + case quotstart => + fmt::println("[")!; + case quotend => + fmt::println("]")!; + case mapstart => + fmt::println("{")!; + case mapend => + fmt::println("}")!; + case let w: word => + fmt::println(w.v)!; + case let s: symbol => + fmt::printfln("{}{}", if (s.kw) ":" else "\\", s.v)!; + case let s: str => + fmt::printfln(`"{}"`, s)!; + case let c: comment => + fmt::printfln("({})", c.v)!; + case let r: rune => + for (let i = 0z; i < len(longcharnames); i += 1) { + if (r == longcharnames[i].1) { + fmt::printfln("#\\{}", longcharnames[i].0)!; + return; + }; + }; + fmt::printfln("#\\{}", r)!; + case let b: bool => + fmt::println(if (b) "#t" else "#f")!; }; }; diff --git a/types.ha b/parse/types.ha similarity index 94% rename from types.ha rename to parse/types.ha index 2217e03..b94c5c6 100644 --- a/types.ha +++ b/parse/types.ha @@ -15,7 +15,7 @@ export type word = struct { v: str }; export type symbol = struct { v: str, kw: bool }; export type token = (quotstart | quotend | mapstart | mapend | - word | symbol | comment | str | rune); + word | symbol | comment | str | rune | bool); export fn strerror(err: error) const str = { static let buf: [64]u8 = [0...];