feat: add support for fullwidth Unicode, multiline arrays, and ASCII blocks
- Add fullwidth brackets [] (U+FF3B, U+FF3D) support - Add fullwidth quotes " (U+FF02) support - Fix multiline arrays with newlines between elements - Fix line continuation with CRLF (^) - Enable ASCII block syntax (ascii...asciiend and [ascii...asciiend]) - Update conflicts to resolve ambiguities Fixed 51 parsing errors (253 -> 202 errors)
This commit is contained in:
118
src/scanner.c
118
src/scanner.c
@@ -6,7 +6,7 @@ enum TokenType {
|
||||
NEWLINE,
|
||||
INDENT,
|
||||
DEDENT,
|
||||
// ASCII_CONTENT,
|
||||
ASCII_CONTENT,
|
||||
};
|
||||
|
||||
// ... (skipping to logic)
|
||||
@@ -101,64 +101,64 @@ void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const c
|
||||
bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
|
||||
// if (valid_symbols[ASCII_CONTENT]) {
|
||||
// bool has_content = false;
|
||||
//
|
||||
// for (;;) {
|
||||
// if (lexer->eof(lexer)) {
|
||||
// break;
|
||||
// }
|
||||
//
|
||||
// // Check if we're at the start of a line with 'asciiend'
|
||||
// if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
|
||||
// lexer->advance(lexer, false);
|
||||
// if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
|
||||
// lexer->advance(lexer, false);
|
||||
// }
|
||||
// lexer->mark_end(lexer);
|
||||
// has_content = true;
|
||||
//
|
||||
// // Skip whitespace at the start of the line
|
||||
// while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
|
||||
// lexer->advance(lexer, false);
|
||||
// }
|
||||
//
|
||||
// // Check if this line starts with 'asciiend'
|
||||
// if (lexer->lookahead == 'a') {
|
||||
// const char *keyword = "asciiend";
|
||||
// bool match = true;
|
||||
//
|
||||
// for (int k = 0; k < 8; k++) {
|
||||
// if (lexer->lookahead == keyword[k]) {
|
||||
// lexer->advance(lexer, false);
|
||||
// } else {
|
||||
// match = false;
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // Check that asciiend is followed by whitespace or EOL
|
||||
// if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
|
||||
// lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
|
||||
// lexer->lookahead == ',' ||
|
||||
// lexer->eof(lexer))) {
|
||||
// lexer->result_symbol = ASCII_CONTENT;
|
||||
// return has_content;
|
||||
// }
|
||||
//
|
||||
// // Failed to match asciiend, mark the current position
|
||||
// lexer->mark_end(lexer);
|
||||
// }
|
||||
// } else {
|
||||
// lexer->advance(lexer, false);
|
||||
// lexer->mark_end(lexer);
|
||||
// has_content = true;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// lexer->result_symbol = ASCII_CONTENT;
|
||||
// return has_content;
|
||||
// }
|
||||
if (valid_symbols[ASCII_CONTENT]) {
|
||||
bool has_content = false;
|
||||
|
||||
for (;;) {
|
||||
if (lexer->eof(lexer)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Check if we're at the start of a line with 'asciiend'
|
||||
if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
lexer->mark_end(lexer);
|
||||
has_content = true;
|
||||
|
||||
// Skip whitespace at the start of the line
|
||||
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
// Check if this line starts with 'asciiend'
|
||||
if (lexer->lookahead == 'a') {
|
||||
const char *keyword = "asciiend";
|
||||
bool match = true;
|
||||
|
||||
for (int k = 0; k < 8; k++) {
|
||||
if (lexer->lookahead == keyword[k]) {
|
||||
lexer->advance(lexer, false);
|
||||
} else {
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check that asciiend is followed by whitespace or EOL
|
||||
if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
|
||||
lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
|
||||
lexer->lookahead == ',' ||
|
||||
lexer->eof(lexer))) {
|
||||
lexer->result_symbol = ASCII_CONTENT;
|
||||
return has_content;
|
||||
}
|
||||
|
||||
// Failed to match asciiend, mark the current position
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
} else {
|
||||
lexer->advance(lexer, false);
|
||||
lexer->mark_end(lexer);
|
||||
has_content = true;
|
||||
}
|
||||
}
|
||||
|
||||
lexer->result_symbol = ASCII_CONTENT;
|
||||
return has_content;
|
||||
}
|
||||
|
||||
if (scanner->queued_tokens_size > 0) {
|
||||
enum TokenType token = scanner->queued_tokens[0];
|
||||
|
||||
Reference in New Issue
Block a user