fix: improve parser - fix ASCII strings and comment indentation handling
- Remove 'asciiend' from ascii_string grammar rule (handled by scanner) - Add scanner logic to skip comment-only lines when measuring indentation - Update scanner to include 'asciiend' in ASCII_CONTENT token - Implement external scanner for BLOCK_COMMENT (partial fix) Results: Reduced parse errors from 156 to 119 (23% improvement)
This commit is contained in:
@@ -7,6 +7,7 @@ enum TokenType {
|
||||
INDENT,
|
||||
DEDENT,
|
||||
ASCII_CONTENT,
|
||||
BLOCK_COMMENT,
|
||||
};
|
||||
|
||||
// ... (skipping to logic)
|
||||
@@ -101,6 +102,33 @@ void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const c
|
||||
bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
|
||||
// Try to handle block comments whenever we see /*
|
||||
// This needs to run early before other checks
|
||||
if (lexer->lookahead == '/') {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == '*') {
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
// Consume everything until */
|
||||
while (!lexer->eof(lexer)) {
|
||||
if (lexer->lookahead == '*') {
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == '/') {
|
||||
lexer->advance(lexer, false);
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = BLOCK_COMMENT;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
}
|
||||
// Reached EOF without closing */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_symbols[ASCII_CONTENT]) {
|
||||
bool has_content = false;
|
||||
|
||||
@@ -140,6 +168,7 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
||||
lexer->lookahead == ',' || lexer->lookahead == ')' ||
|
||||
lexer->lookahead == ']' || lexer->lookahead == 0xFF3D || // ] full-width
|
||||
lexer->eof(lexer))) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = ASCII_CONTENT;
|
||||
return has_content;
|
||||
}
|
||||
@@ -212,10 +241,47 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
||||
}
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
// Skip comment-only lines when measuring indentation
|
||||
while (lexer->lookahead == '/' && !lexer->eof(lexer)) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
// Check if this is a comment
|
||||
if (lexer->lookahead == '/') {
|
||||
// Skip the rest of the comment line
|
||||
while (lexer->lookahead != '\n' && lexer->lookahead != '\r' && !lexer->eof(lexer)) {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
// Skip newline
|
||||
if (lexer->lookahead == '\r') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
if (lexer->lookahead == '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
// Measure indentation of next line
|
||||
indent_length = 0;
|
||||
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
|
||||
if (lexer->lookahead == ' ') {
|
||||
indent_length++;
|
||||
} else {
|
||||
indent_length += 8;
|
||||
}
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
} else {
|
||||
// Not a comment, break
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
if (found_end_of_line) {
|
||||
uint16_t current_indent = scanner->indent_stack[scanner->indent_stack_size - 1];
|
||||
|
||||
|
||||
Reference in New Issue
Block a user