Fix CRLF handling in external scanner

- Consume \r as part of token instead of skipping it
- Break after consuming \n to avoid processing multiple lines
- Consume leading whitespace separately for indent calculation
- Fix ASCII_CONTENT to return false at EOF without asciiend

This fixes ERROR tokens with CRLF line endings, especially
with trailing blank lines.
This commit is contained in:
2025-11-27 01:25:06 +01:00
parent 0b78c43138
commit eaf0963459
5 changed files with 12315 additions and 12389 deletions

View File

@@ -110,11 +110,8 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
}
// Check if we're at the start of a line with 'asciiend'
if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
if (lexer->lookahead == '\n') {
lexer->advance(lexer, false);
if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
lexer->advance(lexer, false);
}
lexer->mark_end(lexer);
has_content = true;
@@ -156,8 +153,8 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
}
}
lexer->result_symbol = ASCII_CONTENT;
return has_content;
// If we reached EOF without finding asciiend, this is not valid ASCII content
return false;
}
if (scanner->queued_tokens_size > 0) {
@@ -180,10 +177,17 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
found_end_of_line = true;
indent_length = 0;
lexer->advance(lexer, false);
// After consuming \n, only consume whitespace on the SAME logical line
// Don't continue to next line
break;
} else if (lexer->lookahead == '\r') {
// Consume \r as part of line ending (for CRLF), don't skip it
lexer->advance(lexer, false);
// Continue to potentially consume \n that follows \r
} else if (lexer->lookahead == ' ') {
indent_length++;
lexer->advance(lexer, false);
} else if (lexer->lookahead == '\r' || lexer->lookahead == '\f') {
} else if (lexer->lookahead == '\f') {
indent_length = 0;
lexer->advance(lexer, false);
} else if (lexer->lookahead == '\t') {
@@ -196,6 +200,18 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
break;
}
}
// After breaking from newline, consume leading whitespace/indentation
if (found_end_of_line && !lexer->eof(lexer)) {
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
if (lexer->lookahead == ' ') {
indent_length++;
} else {
indent_length += 8;
}
lexer->advance(lexer, false);
}
}