From 85f78756316a74f5eefd07629b7f363cd7bd826f Mon Sep 17 00:00:00 2001 From: Lobo Torres Date: Tue, 3 Dec 2024 23:41:23 -0300 Subject: [PATCH] initial work on lexer --- lex.ha | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ test.bnm | 5 +++ types.ha | 7 ++++ 3 files changed, 118 insertions(+) create mode 100644 lex.ha create mode 100644 test.bnm create mode 100644 types.ha diff --git a/lex.ha b/lex.ha new file mode 100644 index 0000000..ade3d2b --- /dev/null +++ b/lex.ha @@ -0,0 +1,106 @@ +use bufio; +use fmt; +use io; +use memio; +use os; +use encoding::utf8; +use strings; +use ascii; + +// my cod prob sux :( + +export type lexer = struct { + in: io::handle, + strbuf: memio::stream, + path: str, + loc: (uint, uint), + prevloc: (uint, uint), + unread: (rune | void), +}; + +export fn newlexer(in: io::handle, path: str) lexer = { + return lexer { + in = in, + strbuf = memio::dynamic(), + path = path, + loc = (1, 0), + unread = void, + ... + }; +}; + +export fn close(lex: *lexer) void = { + io::close(&lex.strbuf)!; +}; + +fn updateloc(lex: *lexer, rn: rune) void = { + if (rn == '\n') { + lex.loc = (lex.loc.0 + 1, 0); + } else { + lex.loc.1 += 1; + }; +}; + +fn nextrune(lex: *lexer) (rune | io::error | io::EOF | utf8::invalid) = { + match (lex.unread) { + case let rn: rune => + lex.prevloc = lex.loc; + lex.unread = void; + updateloc(lex, rn); + return rn; + case void => + yield; + }; + + match (bufio::read_rune(lex.in)?) { + case let rn: rune => + lex.prevloc = lex.loc; + updateloc(lex, rn); + return rn; + case io::EOF => + return io::EOF; + }; +}; + +fn unget(lex: *lexer, rn: rune) void = { + assert(lex.unread is void); + lex.unread = rn; + lex.loc = lex.prevloc; +}; + +fn skipws(lex: *lexer) (void | io::EOF | io::error | utf8::invalid) = { + for (true) { + match (nextrune(lex)?) { + case io::EOF => return io::EOF; + case let rn: rune => + if (!ascii::isspace(rn)) { + unget(lex, rn); + return; + }; + }; + }; +}; + + +// Tests! :) + +@test fn test_nextrune() void = { + let lex = newlexer(&memio::fixed(strings::toutf8("a\nb")), + ""); + defer close(&lex); + + assert(nextrune(&lex)! == 'a'); + assert(nextrune(&lex)! == '\n'); + assert(nextrune(&lex)! == 'b'); + assert(lex.loc.0 == 2u && lex.loc.1 == 1u); +}; + +@test fn test_skipws() void = { + let lex = newlexer(&memio::fixed(strings::toutf8("\n a")), + ""); + defer close(&lex); + + skipws(&lex)!; + assert(nextrune(&lex)! == 'a'); + assert(lex.loc.0 == 2u && lex.loc.1 == 2u); +}; diff --git a/test.bnm b/test.bnm new file mode 100644 index 0000000..0a65d59 --- /dev/null +++ b/test.bnm @@ -0,0 +1,5 @@ +3.14159 \pi def +[dup *] \square def + +[square pi *] \circarea def +20 circarea . ( => 1256.636 ) diff --git a/types.ha b/types.ha new file mode 100644 index 0000000..ee64ac6 --- /dev/null +++ b/types.ha @@ -0,0 +1,7 @@ +export type punct = enum uint { + LEFT_PAREN, RIGHT_PAREN, + LEFT_SQUARE_BRACKET, RIGHT_SQUARE_BRACKET, + LEFT_CURLY_BRACKET, RIGHT_CURLY_BRACKET, + BACKSLASH, COLON, +}; +export type token = (punct | str | f64 | bool);