feat: add support for fullwidth Unicode, multiline arrays, and ASCII blocks

- Add fullwidth brackets [] (U+FF3B, U+FF3D) support
- Add fullwidth quotes " (U+FF02) support
- Fix multiline arrays with newlines between elements
- Fix line continuation with CRLF (^)
- Enable ASCII block syntax (ascii...asciiend and [ascii...asciiend])
- Update conflicts to resolve ambiguities

Fixed 51 parsing errors (253 -> 202 errors)
This commit is contained in:
2025-11-26 23:04:03 +01:00
parent b746fcec44
commit 99dadd9ca7
5 changed files with 16668 additions and 12483 deletions

View File

@@ -6,7 +6,7 @@ enum TokenType {
NEWLINE,
INDENT,
DEDENT,
// ASCII_CONTENT,
ASCII_CONTENT,
};
// ... (skipping to logic)
@@ -101,64 +101,64 @@ void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const c
bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload;
// if (valid_symbols[ASCII_CONTENT]) {
// bool has_content = false;
//
// for (;;) {
// if (lexer->eof(lexer)) {
// break;
// }
//
// // Check if we're at the start of a line with 'asciiend'
// if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
// lexer->advance(lexer, false);
// if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
// lexer->advance(lexer, false);
// }
// lexer->mark_end(lexer);
// has_content = true;
//
// // Skip whitespace at the start of the line
// while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
// lexer->advance(lexer, false);
// }
//
// // Check if this line starts with 'asciiend'
// if (lexer->lookahead == 'a') {
// const char *keyword = "asciiend";
// bool match = true;
//
// for (int k = 0; k < 8; k++) {
// if (lexer->lookahead == keyword[k]) {
// lexer->advance(lexer, false);
// } else {
// match = false;
// break;
// }
// }
//
// // Check that asciiend is followed by whitespace or EOL
// if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
// lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
// lexer->lookahead == ',' ||
// lexer->eof(lexer))) {
// lexer->result_symbol = ASCII_CONTENT;
// return has_content;
// }
//
// // Failed to match asciiend, mark the current position
// lexer->mark_end(lexer);
// }
// } else {
// lexer->advance(lexer, false);
// lexer->mark_end(lexer);
// has_content = true;
// }
// }
//
// lexer->result_symbol = ASCII_CONTENT;
// return has_content;
// }
if (valid_symbols[ASCII_CONTENT]) {
bool has_content = false;
for (;;) {
if (lexer->eof(lexer)) {
break;
}
// Check if we're at the start of a line with 'asciiend'
if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
lexer->advance(lexer, false);
if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
lexer->advance(lexer, false);
}
lexer->mark_end(lexer);
has_content = true;
// Skip whitespace at the start of the line
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
lexer->advance(lexer, false);
}
// Check if this line starts with 'asciiend'
if (lexer->lookahead == 'a') {
const char *keyword = "asciiend";
bool match = true;
for (int k = 0; k < 8; k++) {
if (lexer->lookahead == keyword[k]) {
lexer->advance(lexer, false);
} else {
match = false;
break;
}
}
// Check that asciiend is followed by whitespace or EOL
if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
lexer->lookahead == ',' ||
lexer->eof(lexer))) {
lexer->result_symbol = ASCII_CONTENT;
return has_content;
}
// Failed to match asciiend, mark the current position
lexer->mark_end(lexer);
}
} else {
lexer->advance(lexer, false);
lexer->mark_end(lexer);
has_content = true;
}
}
lexer->result_symbol = ASCII_CONTENT;
return has_content;
}
if (scanner->queued_tokens_size > 0) {
enum TokenType token = scanner->queued_tokens[0];