fix: improve parser - fix ASCII strings and comment indentation handling

- Remove 'asciiend' from ascii_string grammar rule (handled by scanner)
- Add scanner logic to skip comment-only lines when measuring indentation
- Update scanner to include 'asciiend' in ASCII_CONTENT token
- Implement external scanner for BLOCK_COMMENT (partial fix)

Results: Reduced parse errors from 156 to 119 (23% improvement)
This commit is contained in:
2025-11-27 11:09:32 +01:00
parent 06e6e3b098
commit 36d6c3947a
7 changed files with 15566 additions and 15434 deletions

View File

@@ -7,6 +7,7 @@ enum TokenType {
INDENT,
DEDENT,
ASCII_CONTENT,
BLOCK_COMMENT,
};
// ... (skipping to logic)
@@ -101,6 +102,33 @@ void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const c
bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload;
// Try to handle block comments whenever we see /*
// This needs to run early before other checks
if (lexer->lookahead == '/') {
lexer->mark_end(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '*') {
lexer->advance(lexer, false);
// Consume everything until */
while (!lexer->eof(lexer)) {
if (lexer->lookahead == '*') {
lexer->advance(lexer, false);
if (lexer->lookahead == '/') {
lexer->advance(lexer, false);
lexer->mark_end(lexer);
lexer->result_symbol = BLOCK_COMMENT;
return true;
}
} else {
lexer->advance(lexer, false);
}
}
// Reached EOF without closing */
return false;
}
}
if (valid_symbols[ASCII_CONTENT]) {
bool has_content = false;
@@ -140,6 +168,7 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
lexer->lookahead == ',' || lexer->lookahead == ')' ||
lexer->lookahead == ']' || lexer->lookahead == 0xFF3D || // full-width
lexer->eof(lexer))) {
lexer->mark_end(lexer);
lexer->result_symbol = ASCII_CONTENT;
return has_content;
}
@@ -212,10 +241,47 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
}
lexer->advance(lexer, false);
}
// Skip comment-only lines when measuring indentation
while (lexer->lookahead == '/' && !lexer->eof(lexer)) {
lexer->mark_end(lexer);
lexer->advance(lexer, false);
// Check if this is a comment
if (lexer->lookahead == '/') {
// Skip the rest of the comment line
while (lexer->lookahead != '\n' && lexer->lookahead != '\r' && !lexer->eof(lexer)) {
lexer->advance(lexer, false);
}
// Skip newline
if (lexer->lookahead == '\r') {
lexer->advance(lexer, false);
}
if (lexer->lookahead == '\n') {
lexer->advance(lexer, false);
}
// Measure indentation of next line
indent_length = 0;
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
if (lexer->lookahead == ' ') {
indent_length++;
} else {
indent_length += 8;
}
lexer->advance(lexer, false);
}
} else {
// Not a comment, break
break;
}
}
}
if (found_end_of_line) {
uint16_t current_indent = scanner->indent_stack[scanner->indent_stack_size - 1];