use ascii; // TODO: maybe use unicode? use bufio; use encoding::utf8; use fmt; use io; use memio; use os; use strings; // my cod prob sux :( export type lexer = struct { in: io::handle, strbuf: memio::stream, path: str, loc: (uint, uint), prevloc: (uint, uint), unread: (rune | void), }; export fn newlexer(in: io::handle, path: str) lexer = { return lexer { in = in, strbuf = memio::dynamic(), path = path, loc = (1, 0), unread = void, ... }; }; export fn close(lex: *lexer) void = { io::close(&lex.strbuf)!; }; export fn next(lex: *lexer) (token | io::EOF | error) = { const rn = match (nextrunews(lex)?) { case io::EOF => return io::EOF; case let rn: rune => yield rn; }; switch (rn) { case '(' => return punctuation::LEFT_PAREN: token; case ')' => return punctuation::RIGHT_PAREN: token; case '[' => return punctuation::LEFT_SQUARE_BRACKET: token; case ']' => return punctuation::RIGHT_SQUARE_BRACKET: token; case '{' => return punctuation::LEFT_CURLY_BRACKET: token; case '}' => return punctuation::RIGHT_CURLY_BRACKET: token; case '\\' => return punctuation::BACKSLASH: token; case ':' => return punctuation::COLON: token; case '"' => match (scanstr(lex)?) { case let s: str => return s; case io::EOF => return io::EOF; }; case => yield; }; unget(lex, rn); return word{ v = scanword(lex)? }; }; fn nextrune(lex: *lexer) (rune | io::EOF | error) = { match (lex.unread) { case let rn: rune => lex.prevloc = lex.loc; lex.unread = void; if (rn == '\n') { lex.loc = (lex.loc.0 + 1, 0); } else { lex.loc.1 += 1; }; return rn; case void => yield; }; match (bufio::read_rune(lex.in)) { case let rn: rune => lex.prevloc = lex.loc; if (rn == '\n') { lex.loc = (lex.loc.0 + 1, 0); } else { lex.loc.1 += 1; }; return rn; case io::EOF => return io::EOF; case let err: io::error => return err; case utf8::invalid => return lex.loc: invalid; }; }; fn nextrunews(lex: *lexer) (rune | io::EOF | error) = { for (true) { match (nextrune(lex)?) { case let rn: rune => if (ascii::isspace(rn)) { continue; }; return rn; case io::EOF => return io::EOF; }; }; }; fn unget(lex: *lexer, rn: rune) void = { assert(lex.unread is void); lex.unread = rn; lex.loc = lex.prevloc; }; fn scanword(lex: *lexer) (str | error) = { memio::reset(&lex.strbuf); for (true) { const rn = match (nextrune(lex)?) { case let rn: rune => yield rn; case io::EOF => break; }; if (ascii::isspace(rn)) { unget(lex, rn); break; }; memio::appendrune(&lex.strbuf, rn)!; }; return memio::string(&lex.strbuf)!; }; fn scanstr(lex: *lexer) (str | io::EOF | error) = { memio::reset(&lex.strbuf); for (true) { const rn = match (nextrune(lex)?) { case let rn: rune => yield rn; case io::EOF => return lex.loc: invalid; }; switch (rn) { case '"' => break; case '\\' => memio::appendrune(&lex.strbuf, scanescape(lex)?)!; case => memio::appendrune(&lex.strbuf, rn)!; }; }; return memio::string(&lex.strbuf)!; }; fn scanescape(lex: *lexer) (rune | error) = { const rn = match (nextrune(lex)?) { case let rn: rune => yield rn; case io::EOF => return lex.loc: invalid; }; switch (rn) { case '"' => return '"'; case '\\' => return '\\'; case '\n' => return '\n'; case => return lex.loc: invalid; }; }; // Tests! :) fn tnext(lex: *lexer) token = { match (next(lex)!) { case let t: token => return t; case => assert(false); return word { v = "" }; }; }; @test fn test_next() void = { let lex = newlexer(&memio::fixed( strings::toutf8("\"hello\" \\greeting def")), ""); defer close(&lex); let tk = tnext(&lex); assert(tk is str && tk: str == "hello"); let tk = tnext(&lex); assert(tk is punctuation && tk: punctuation == punctuation::BACKSLASH); let tk = tnext(&lex); assert(tk is word && (tk: word).v == "greeting"); let tk = tnext(&lex); assert(tk is word && (tk: word).v == "def"); }; @test fn test_nextrune() void = { let lex = newlexer(&memio::fixed(strings::toutf8("a\nb")), ""); defer close(&lex); assert(nextrune(&lex)! == 'a'); assert(nextrune(&lex)! == '\n'); assert(nextrune(&lex)! == 'b'); assert(lex.loc.0 == 2u && lex.loc.1 == 1u); }; @test fn test_nextrunews() void = { let lex = newlexer(&memio::fixed(strings::toutf8("\n a")), ""); defer close(&lex); assert(nextrunews(&lex)! == 'a'); assert(lex.loc.0 == 2u && lex.loc.1 == 2u); }; @test fn test_scanword() void = { let lex = newlexer(&memio::fixed(strings::toutf8("string->number .")), ""); defer close(&lex); assert(scanword(&lex)! == "string->number"); }; @test fn test_scanstr() void = { let lex = newlexer(&memio::fixed(strings::toutf8("\"\\\\hello\\\"world!\\\n\"")), ""); defer close(&lex); assert(nextrune(&lex)! == '"'); match (scanstr(&lex)!) { case io::EOF => assert(false); case let s: str => assert(s == "\\hello\"world!\n"); }; };