parse: show error on test failure

add unicode escape sequences
2024-12-05 13:43:31 -03:00 · 2024-12-05 13:43:12 -03:00
3 changed files with 75 additions and 10 deletions
--- a/parse/+test/lexer.ha
+++ b/parse/+test/lexer.ha
@ -13,6 +13,8 @@ use io;
 				mkword("def")]),
 		(`#t #f`, [true, false]),
 		(`#\a #\space #\nul`, ['a', ' ', '\0']),
+		(`"\x0a;" "\x2014;" "\x2f9f4;"`, ["\n", "—", "嶲"]),
+		(`#\x #\x0a; #\x2014; #\x2f9f4;`, ['x', '\n', '—', '嶲']),
 	];

 	for (let i = 0z; i < len(cases); i += 1) {
@ -23,7 +25,16 @@ use io;

 		for (let j = 0z; j < len(cases[i].1); j += 1) {
 			const want = cases[i].1[j];
-			const have = lex(&lexer)! as token;
+			const have = match (lex(&lexer)) {
+			case let tok: token =>
+				yield tok;
+			case io::EOF =>
+				assert(false, "reached EOF");
+				return;
+			case let err: error =>
+				assert(false, strerror(err));
+				return;
+			};

 			if (!tokeq(want, have)) {
 				fmt::printfln("Case {}: {}", i, cases[i].0)!;
--- a/parse/lex.ha
+++ b/parse/lex.ha
@ -247,15 +247,19 @@ fn scanchar(lex: *lexer) (rune | error) = {
 		if (isspace(rnn)) {
 			return rn;
 		} else {
-			memio::appendrune(&namebuf, rn)!;
-			memio::concat(&namebuf, scanword(lex)?)!;
-			const name = memio::string(&namebuf)!;
-				for (let i = 0z; i < len(longcharnames); i += 1) {
-				if (name == longcharnames[i].0) {
-					return longcharnames[i].1;
+			if (rn == 'x') {
+				return scanescape2(lex);
+			} else {
+				memio::appendrune(&namebuf, rn)!;
+				memio::concat(&namebuf, scanword(lex)?)!;
+				const name = memio::string(&namebuf)!;
+					for (let i = 0z; i < len(longcharnames); i += 1) {
+					if (name == longcharnames[i].0) {
+						return longcharnames[i].1;
+					};
 				};
+				return lex.loc: invalid;
 			};
-			return lex.loc: invalid;
 		};
 	case io::EOF =>
 		return rn;
@ -282,6 +286,56 @@ fn scanescape(lex: *lexer) (rune | error) = {
 	case 'v' => return '\v';
 	case '\\' => return '\\';
 	case '"' => return '"';
+	case 'x' => return scanescape2(lex)?;
+	case =>
+		return lex.loc: invalid;
+	};
+};
+
+fn scanescape2(lex: *lexer) (rune | error) = {
+	// This handles the `\xhh...;` family of escapes.
+	// It's on a separate function since both [[scanescape]] and
+	// [[scanchar]] make use of it. Much like how [[scanescape]] assumes
+	// that the backslash has already been consumed, this one assumes that
+	// the leading character has been consumed prior to entering this
+	// function.
+
+	const rn = match (nextrune(lex)?) {
+	case let rn: rune =>
+		yield rn;
+	case io::EOF =>
+		return ("escape sequence", lex.loc.0, lex.loc.1): unterminated;
+	};
+
+	const buf: [6]u8 = [0...];
+	let buf = memio::fixed(buf);
+	memio::appendrune(&buf, rn)!;
+
+	let count = 1z;
+	for (true) {
+		const rn = match (nextrune(lex)?) {
+		case let rn: rune =>
+			yield rn;
+		case io::EOF =>
+			return ("escape sequence", lex.loc.0, lex.loc.1): unterminated;
+		};
+
+		count += 1;
+
+		if (count > 6) {
+			return lex.loc: invalid;
+		} else if (rn == ';') {
+			break;
+		} else {
+			memio::appendrune(&buf, rn)!;
+		};
+	};
+
+	const buf = memio::string(&buf)!;
+
+	return match (strconv::stou32(buf, strconv::base::HEX)) {
+	case let codepoint: u32 =>
+		return codepoint: rune;
 	case =>
 		return lex.loc: invalid;
 	};
--- a/parse/types.ha
+++ b/parse/types.ha
@ -22,10 +22,10 @@ export fn strerror(err: error) const str = {
 	match (err) {
 	case let err: invalid =>
 		return fmt::bsprintf(buf,
-			"{}:{}: Invalid token found", err.0, err.1);
+			"Invalid token found at {}:{}", err.0, err.1);
 	case let err: unterminated =>
 		return fmt::bsprintf(buf,
-			"{}:{}: Unterminated {} found", err.1, err.2, err.0);
+			"Unterminated {} found at {}:{}", err.0, err.1, err.2);
 	case let err: io::error =>
 		return io::strerror(err);
 	};
Author	SHA1	Message	Date
Lobo Torres	1e5ed47497	parse: show error on test failure	2024-12-05 13:43:31 -03:00
Lobo Torres	a9a72e8f1f	add unicode escape sequences	2024-12-05 13:43:12 -03:00