Fix CRLF handling in external scanner
- Consume \r as part of token instead of skipping it - Break after consuming \n to avoid processing multiple lines - Consume leading whitespace separately for indent calculation - Fix ASCII_CONTENT to return false at EOF without asciiend This fixes ERROR tokens with CRLF line endings, especially with trailing blank lines.
This commit is contained in:
5
src/grammar.json
generated
5
src/grammar.json
generated
@@ -1,5 +1,4 @@
|
|||||||
{
|
{
|
||||||
"$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json",
|
|
||||||
"name": "stonescript",
|
"name": "stonescript",
|
||||||
"word": "identifier",
|
"word": "identifier",
|
||||||
"rules": {
|
"rules": {
|
||||||
@@ -1503,6 +1502,6 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"inline": [],
|
"inline": [],
|
||||||
"supertypes": [],
|
"supertypes": []
|
||||||
"reserved": {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
7
src/node-types.json
generated
7
src/node-types.json
generated
@@ -1543,7 +1543,6 @@
|
|||||||
{
|
{
|
||||||
"type": "source_file",
|
"type": "source_file",
|
||||||
"named": true,
|
"named": true,
|
||||||
"root": true,
|
|
||||||
"fields": {},
|
"fields": {},
|
||||||
"children": {
|
"children": {
|
||||||
"multiple": true,
|
"multiple": true,
|
||||||
@@ -2054,8 +2053,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "block_comment",
|
"type": "block_comment",
|
||||||
"named": true,
|
"named": true
|
||||||
"extra": true
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "break_statement",
|
"type": "break_statement",
|
||||||
@@ -2067,8 +2065,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "comment",
|
"type": "comment",
|
||||||
"named": true,
|
"named": true
|
||||||
"extra": true
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "continue_statement",
|
"type": "continue_statement",
|
||||||
|
|||||||
24563
src/parser.c
generated
24563
src/parser.c
generated
File diff suppressed because it is too large
Load Diff
@@ -110,11 +110,8 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check if we're at the start of a line with 'asciiend'
|
// Check if we're at the start of a line with 'asciiend'
|
||||||
if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
|
if (lexer->lookahead == '\n') {
|
||||||
lexer->advance(lexer, false);
|
lexer->advance(lexer, false);
|
||||||
if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
|
|
||||||
lexer->advance(lexer, false);
|
|
||||||
}
|
|
||||||
lexer->mark_end(lexer);
|
lexer->mark_end(lexer);
|
||||||
has_content = true;
|
has_content = true;
|
||||||
|
|
||||||
@@ -156,8 +153,8 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lexer->result_symbol = ASCII_CONTENT;
|
// If we reached EOF without finding asciiend, this is not valid ASCII content
|
||||||
return has_content;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scanner->queued_tokens_size > 0) {
|
if (scanner->queued_tokens_size > 0) {
|
||||||
@@ -180,10 +177,17 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
|||||||
found_end_of_line = true;
|
found_end_of_line = true;
|
||||||
indent_length = 0;
|
indent_length = 0;
|
||||||
lexer->advance(lexer, false);
|
lexer->advance(lexer, false);
|
||||||
|
// After consuming \n, only consume whitespace on the SAME logical line
|
||||||
|
// Don't continue to next line
|
||||||
|
break;
|
||||||
|
} else if (lexer->lookahead == '\r') {
|
||||||
|
// Consume \r as part of line ending (for CRLF), don't skip it
|
||||||
|
lexer->advance(lexer, false);
|
||||||
|
// Continue to potentially consume \n that follows \r
|
||||||
} else if (lexer->lookahead == ' ') {
|
} else if (lexer->lookahead == ' ') {
|
||||||
indent_length++;
|
indent_length++;
|
||||||
lexer->advance(lexer, false);
|
lexer->advance(lexer, false);
|
||||||
} else if (lexer->lookahead == '\r' || lexer->lookahead == '\f') {
|
} else if (lexer->lookahead == '\f') {
|
||||||
indent_length = 0;
|
indent_length = 0;
|
||||||
lexer->advance(lexer, false);
|
lexer->advance(lexer, false);
|
||||||
} else if (lexer->lookahead == '\t') {
|
} else if (lexer->lookahead == '\t') {
|
||||||
@@ -197,6 +201,18 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// After breaking from newline, consume leading whitespace/indentation
|
||||||
|
if (found_end_of_line && !lexer->eof(lexer)) {
|
||||||
|
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
|
||||||
|
if (lexer->lookahead == ' ') {
|
||||||
|
indent_length++;
|
||||||
|
} else {
|
||||||
|
indent_length += 8;
|
||||||
|
}
|
||||||
|
lexer->advance(lexer, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (found_end_of_line) {
|
if (found_end_of_line) {
|
||||||
|
|||||||
@@ -13,17 +13,12 @@ extern "C" {
|
|||||||
#define ts_builtin_sym_end 0
|
#define ts_builtin_sym_end 0
|
||||||
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
|
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
|
||||||
|
|
||||||
#ifndef TREE_SITTER_API_H_
|
|
||||||
typedef uint16_t TSStateId;
|
typedef uint16_t TSStateId;
|
||||||
|
|
||||||
|
#ifndef TREE_SITTER_API_H_
|
||||||
typedef uint16_t TSSymbol;
|
typedef uint16_t TSSymbol;
|
||||||
typedef uint16_t TSFieldId;
|
typedef uint16_t TSFieldId;
|
||||||
typedef struct TSLanguage TSLanguage;
|
typedef struct TSLanguage TSLanguage;
|
||||||
typedef struct TSLanguageMetadata TSLanguageMetadata;
|
|
||||||
typedef struct TSLanguageMetadata {
|
|
||||||
uint8_t major_version;
|
|
||||||
uint8_t minor_version;
|
|
||||||
uint8_t patch_version;
|
|
||||||
} TSLanguageMetadata;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@@ -32,11 +27,10 @@ typedef struct {
|
|||||||
bool inherited;
|
bool inherited;
|
||||||
} TSFieldMapEntry;
|
} TSFieldMapEntry;
|
||||||
|
|
||||||
// Used to index the field and supertype maps.
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint16_t index;
|
uint16_t index;
|
||||||
uint16_t length;
|
uint16_t length;
|
||||||
} TSMapSlice;
|
} TSFieldMapSlice;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
bool visible;
|
bool visible;
|
||||||
@@ -54,7 +48,6 @@ struct TSLexer {
|
|||||||
uint32_t (*get_column)(TSLexer *);
|
uint32_t (*get_column)(TSLexer *);
|
||||||
bool (*is_at_included_range_start)(const TSLexer *);
|
bool (*is_at_included_range_start)(const TSLexer *);
|
||||||
bool (*eof)(const TSLexer *);
|
bool (*eof)(const TSLexer *);
|
||||||
void (*log)(const TSLexer *, const char *, ...);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
@@ -86,12 +79,6 @@ typedef struct {
|
|||||||
uint16_t external_lex_state;
|
uint16_t external_lex_state;
|
||||||
} TSLexMode;
|
} TSLexMode;
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint16_t lex_state;
|
|
||||||
uint16_t external_lex_state;
|
|
||||||
uint16_t reserved_word_set_id;
|
|
||||||
} TSLexerMode;
|
|
||||||
|
|
||||||
typedef union {
|
typedef union {
|
||||||
TSParseAction action;
|
TSParseAction action;
|
||||||
struct {
|
struct {
|
||||||
@@ -100,13 +87,8 @@ typedef union {
|
|||||||
} entry;
|
} entry;
|
||||||
} TSParseActionEntry;
|
} TSParseActionEntry;
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int32_t start;
|
|
||||||
int32_t end;
|
|
||||||
} TSCharacterRange;
|
|
||||||
|
|
||||||
struct TSLanguage {
|
struct TSLanguage {
|
||||||
uint32_t abi_version;
|
uint32_t version;
|
||||||
uint32_t symbol_count;
|
uint32_t symbol_count;
|
||||||
uint32_t alias_count;
|
uint32_t alias_count;
|
||||||
uint32_t token_count;
|
uint32_t token_count;
|
||||||
@@ -122,13 +104,13 @@ struct TSLanguage {
|
|||||||
const TSParseActionEntry *parse_actions;
|
const TSParseActionEntry *parse_actions;
|
||||||
const char * const *symbol_names;
|
const char * const *symbol_names;
|
||||||
const char * const *field_names;
|
const char * const *field_names;
|
||||||
const TSMapSlice *field_map_slices;
|
const TSFieldMapSlice *field_map_slices;
|
||||||
const TSFieldMapEntry *field_map_entries;
|
const TSFieldMapEntry *field_map_entries;
|
||||||
const TSSymbolMetadata *symbol_metadata;
|
const TSSymbolMetadata *symbol_metadata;
|
||||||
const TSSymbol *public_symbol_map;
|
const TSSymbol *public_symbol_map;
|
||||||
const uint16_t *alias_map;
|
const uint16_t *alias_map;
|
||||||
const TSSymbol *alias_sequences;
|
const TSSymbol *alias_sequences;
|
||||||
const TSLexerMode *lex_modes;
|
const TSLexMode *lex_modes;
|
||||||
bool (*lex_fn)(TSLexer *, TSStateId);
|
bool (*lex_fn)(TSLexer *, TSStateId);
|
||||||
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
|
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
|
||||||
TSSymbol keyword_capture_token;
|
TSSymbol keyword_capture_token;
|
||||||
@@ -142,48 +124,15 @@ struct TSLanguage {
|
|||||||
void (*deserialize)(void *, const char *, unsigned);
|
void (*deserialize)(void *, const char *, unsigned);
|
||||||
} external_scanner;
|
} external_scanner;
|
||||||
const TSStateId *primary_state_ids;
|
const TSStateId *primary_state_ids;
|
||||||
const char *name;
|
|
||||||
const TSSymbol *reserved_words;
|
|
||||||
uint16_t max_reserved_word_set_size;
|
|
||||||
uint32_t supertype_count;
|
|
||||||
const TSSymbol *supertype_symbols;
|
|
||||||
const TSMapSlice *supertype_map_slices;
|
|
||||||
const TSSymbol *supertype_map_entries;
|
|
||||||
TSLanguageMetadata metadata;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
|
|
||||||
uint32_t index = 0;
|
|
||||||
uint32_t size = len - index;
|
|
||||||
while (size > 1) {
|
|
||||||
uint32_t half_size = size / 2;
|
|
||||||
uint32_t mid_index = index + half_size;
|
|
||||||
const TSCharacterRange *range = &ranges[mid_index];
|
|
||||||
if (lookahead >= range->start && lookahead <= range->end) {
|
|
||||||
return true;
|
|
||||||
} else if (lookahead > range->end) {
|
|
||||||
index = mid_index;
|
|
||||||
}
|
|
||||||
size -= half_size;
|
|
||||||
}
|
|
||||||
const TSCharacterRange *range = &ranges[index];
|
|
||||||
return (lookahead >= range->start && lookahead <= range->end);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lexer Macros
|
* Lexer Macros
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
#define UNUSED __pragma(warning(suppress : 4101))
|
|
||||||
#else
|
|
||||||
#define UNUSED __attribute__((unused))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define START_LEXER() \
|
#define START_LEXER() \
|
||||||
bool result = false; \
|
bool result = false; \
|
||||||
bool skip = false; \
|
bool skip = false; \
|
||||||
UNUSED \
|
|
||||||
bool eof = false; \
|
bool eof = false; \
|
||||||
int32_t lookahead; \
|
int32_t lookahead; \
|
||||||
goto start; \
|
goto start; \
|
||||||
@@ -199,17 +148,6 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
|
|||||||
goto next_state; \
|
goto next_state; \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ADVANCE_MAP(...) \
|
|
||||||
{ \
|
|
||||||
static const uint16_t map[] = { __VA_ARGS__ }; \
|
|
||||||
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
|
|
||||||
if (map[i] == lookahead) { \
|
|
||||||
state = map[i + 1]; \
|
|
||||||
goto next_state; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SKIP(state_value) \
|
#define SKIP(state_value) \
|
||||||
{ \
|
{ \
|
||||||
skip = true; \
|
skip = true; \
|
||||||
@@ -228,7 +166,7 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
|
|||||||
* Parse Table Macros
|
* Parse Table Macros
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
|
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
|
||||||
|
|
||||||
#define STATE(id) id
|
#define STATE(id) id
|
||||||
|
|
||||||
@@ -238,7 +176,7 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
|
|||||||
{{ \
|
{{ \
|
||||||
.shift = { \
|
.shift = { \
|
||||||
.type = TSParseActionTypeShift, \
|
.type = TSParseActionTypeShift, \
|
||||||
.state = (state_value) \
|
.state = state_value \
|
||||||
} \
|
} \
|
||||||
}}
|
}}
|
||||||
|
|
||||||
@@ -246,7 +184,7 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
|
|||||||
{{ \
|
{{ \
|
||||||
.shift = { \
|
.shift = { \
|
||||||
.type = TSParseActionTypeShift, \
|
.type = TSParseActionTypeShift, \
|
||||||
.state = (state_value), \
|
.state = state_value, \
|
||||||
.repetition = true \
|
.repetition = true \
|
||||||
} \
|
} \
|
||||||
}}
|
}}
|
||||||
@@ -259,15 +197,14 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
|
|||||||
} \
|
} \
|
||||||
}}
|
}}
|
||||||
|
|
||||||
#define REDUCE(symbol_name, children, precedence, prod_id) \
|
#define REDUCE(symbol_val, child_count_val, ...) \
|
||||||
{{ \
|
{{ \
|
||||||
.reduce = { \
|
.reduce = { \
|
||||||
.type = TSParseActionTypeReduce, \
|
.type = TSParseActionTypeReduce, \
|
||||||
.symbol = symbol_name, \
|
.symbol = symbol_val, \
|
||||||
.child_count = children, \
|
.child_count = child_count_val, \
|
||||||
.dynamic_precedence = precedence, \
|
__VA_ARGS__ \
|
||||||
.production_id = prod_id \
|
}, \
|
||||||
}, \
|
|
||||||
}}
|
}}
|
||||||
|
|
||||||
#define RECOVER() \
|
#define RECOVER() \
|
||||||
|
|||||||
Reference in New Issue
Block a user