Fix CRLF handling in external scanner
- Consume \r as part of token instead of skipping it - Break after consuming \n to avoid processing multiple lines - Consume leading whitespace separately for indent calculation - Fix ASCII_CONTENT to return false at EOF without asciiend This fixes ERROR tokens with CRLF line endings, especially with trailing blank lines.
This commit is contained in:
@@ -110,11 +110,8 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
||||
}
|
||||
|
||||
// Check if we're at the start of a line with 'asciiend'
|
||||
if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
|
||||
if (lexer->lookahead == '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
lexer->mark_end(lexer);
|
||||
has_content = true;
|
||||
|
||||
@@ -156,8 +153,8 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
||||
}
|
||||
}
|
||||
|
||||
lexer->result_symbol = ASCII_CONTENT;
|
||||
return has_content;
|
||||
// If we reached EOF without finding asciiend, this is not valid ASCII content
|
||||
return false;
|
||||
}
|
||||
|
||||
if (scanner->queued_tokens_size > 0) {
|
||||
@@ -180,10 +177,17 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
||||
found_end_of_line = true;
|
||||
indent_length = 0;
|
||||
lexer->advance(lexer, false);
|
||||
// After consuming \n, only consume whitespace on the SAME logical line
|
||||
// Don't continue to next line
|
||||
break;
|
||||
} else if (lexer->lookahead == '\r') {
|
||||
// Consume \r as part of line ending (for CRLF), don't skip it
|
||||
lexer->advance(lexer, false);
|
||||
// Continue to potentially consume \n that follows \r
|
||||
} else if (lexer->lookahead == ' ') {
|
||||
indent_length++;
|
||||
lexer->advance(lexer, false);
|
||||
} else if (lexer->lookahead == '\r' || lexer->lookahead == '\f') {
|
||||
} else if (lexer->lookahead == '\f') {
|
||||
indent_length = 0;
|
||||
lexer->advance(lexer, false);
|
||||
} else if (lexer->lookahead == '\t') {
|
||||
@@ -196,6 +200,18 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// After breaking from newline, consume leading whitespace/indentation
|
||||
if (found_end_of_line && !lexer->eof(lexer)) {
|
||||
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
|
||||
if (lexer->lookahead == ' ') {
|
||||
indent_length++;
|
||||
} else {
|
||||
indent_length += 8;
|
||||
}
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user