feat: major grammar improvements

- Enable ASCII blocks in print commands
- Add import as expression (not just statement)
- Fix operator precedence (& and | now lower than comparisons)
- Allow comments and newlines as top-level statements
- Fix source_file to handle leading comments and empty lines

Progress: 253 → 98 errors (155 files fixed, 55% success)
This commit is contained in:
2025-11-26 23:32:44 +01:00
parent 99dadd9ca7
commit 0b78c43138
5 changed files with 15079 additions and 15017 deletions

View File

@@ -6,7 +6,7 @@ module.exports = grammar({
_statement: $ => prec.right(seq( _statement: $ => prec.right(seq(
choice( choice(
// Comments first $._newline,
$.comment, $.comment,
$.block_comment, $.block_comment,
// Keyword-based statements (must come before generic command) // Keyword-based statements (must come before generic command)
@@ -16,7 +16,6 @@ module.exports = grammar({
$.return_statement, // 'return' $.return_statement, // 'return'
$.break_statement, // 'break' $.break_statement, // 'break'
$.continue_statement, // 'continue' $.continue_statement, // 'continue'
$.import_statement, // 'import'
// Control flow // Control flow
$.conditional, // '?' $.conditional, // '?'
$.else_clause, // ':' $.else_clause, // ':'
@@ -81,7 +80,7 @@ module.exports = grammar({
), ),
// Import // Import
import_statement: $ => seq( import_expression: $ => seq(
'import', 'import',
$.module_path $.module_path
), ),
@@ -151,7 +150,7 @@ module.exports = grammar({
print_argument: $ => prec.left(repeat1(choice( print_argument: $ => prec.left(repeat1(choice(
$.interpolation, $.interpolation,
$.string, $.string,
// $.ascii_string, $.ascii_string,
$.color_code, $.color_code,
$.print_text $.print_text
))), ))),
@@ -186,6 +185,7 @@ module.exports = grammar({
$.assignment_expression, $.assignment_expression,
$.parenthesized_expression, $.parenthesized_expression,
$.new_statement, $.new_statement,
$.import_expression,
$.ascii_string, $.ascii_string,
$.color_code $.color_code
), ),
@@ -221,19 +221,11 @@ module.exports = grammar({
// Binary operators with proper precedence // Binary operators with proper precedence
binary_expression: $ => choice( binary_expression: $ => choice(
prec.left(4, seq($._expression, '|', $._expression)), prec.left(6, seq($._expression, choice('*', '/', '%'), $._expression)),
prec.left(5, seq($._expression, '&', $._expression)), prec.left(5, seq($._expression, choice('+', '-'), $._expression)),
prec.left(7, seq($._expression, '!', $._expression)), prec.left(4, seq($._expression, choice('=', '!=', '!', '<', '>', '<=', '>='), $._expression)),
prec.left(7, seq($._expression, '=', $._expression)), prec.left(3, seq($._expression, '&', $._expression)),
prec.left(8, seq($._expression, '<', $._expression)), prec.left(2, seq($._expression, '|', $._expression))
prec.left(8, seq($._expression, '>', $._expression)),
prec.left(8, seq($._expression, '<=', $._expression)),
prec.left(8, seq($._expression, '>=', $._expression)),
prec.left(9, seq($._expression, '+', $._expression)),
prec.left(9, seq($._expression, '-', $._expression)),
prec.left(10, seq($._expression, '*', $._expression)),
prec.left(10, seq($._expression, '/', $._expression)),
prec.left(11, seq($._expression, '%', $._expression))
), ),
update_expression: $ => choice( update_expression: $ => choice(

331
src/grammar.json generated
View File

@@ -19,6 +19,10 @@
{ {
"type": "CHOICE", "type": "CHOICE",
"members": [ "members": [
{
"type": "SYMBOL",
"name": "_newline"
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "comment" "name": "comment"
@@ -51,10 +55,6 @@
"type": "SYMBOL", "type": "SYMBOL",
"name": "continue_statement" "name": "continue_statement"
}, },
{
"type": "SYMBOL",
"name": "import_statement"
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "conditional" "name": "conditional"
@@ -303,7 +303,7 @@
} }
] ]
}, },
"import_statement": { "import_expression": {
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
{ {
@@ -604,6 +604,10 @@
"type": "SYMBOL", "type": "SYMBOL",
"name": "string" "name": "string"
}, },
{
"type": "SYMBOL",
"name": "ascii_string"
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "color_code" "name": "color_code"
@@ -712,6 +716,10 @@
"type": "SYMBOL", "type": "SYMBOL",
"name": "new_statement" "name": "new_statement"
}, },
{
"type": "SYMBOL",
"name": "import_expression"
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "ascii_string" "name": "ascii_string"
@@ -891,7 +899,7 @@
"members": [ "members": [
{ {
"type": "PREC_LEFT", "type": "PREC_LEFT",
"value": 4, "value": 6,
"content": { "content": {
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
@@ -900,8 +908,21 @@
"name": "_expression" "name": "_expression"
}, },
{ {
"type": "STRING", "type": "CHOICE",
"value": "|" "members": [
{
"type": "STRING",
"value": "*"
},
{
"type": "STRING",
"value": "/"
},
{
"type": "STRING",
"value": "%"
}
]
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
@@ -913,6 +934,86 @@
{ {
"type": "PREC_LEFT", "type": "PREC_LEFT",
"value": 5, "value": 5,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "+"
},
{
"type": "STRING",
"value": "-"
}
]
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 4,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "="
},
{
"type": "STRING",
"value": "!="
},
{
"type": "STRING",
"value": "!"
},
{
"type": "STRING",
"value": "<"
},
{
"type": "STRING",
"value": ">"
},
{
"type": "STRING",
"value": "<="
},
{
"type": "STRING",
"value": ">="
}
]
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 3,
"content": { "content": {
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
@@ -933,7 +1034,7 @@
}, },
{ {
"type": "PREC_LEFT", "type": "PREC_LEFT",
"value": 7, "value": 2,
"content": { "content": {
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
@@ -943,217 +1044,7 @@
}, },
{ {
"type": "STRING", "type": "STRING",
"value": "!" "value": "|"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 7,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "="
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 8,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "<"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 8,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": ">"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 8,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "<="
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 8,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": ">="
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 9,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "+"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 9,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "-"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 10,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "*"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 10,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "/"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 11,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "%"
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",

88
src/node-types.json generated
View File

@@ -47,6 +47,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -145,6 +149,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -243,6 +251,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -326,6 +338,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -371,7 +387,7 @@
"fields": {}, "fields": {},
"children": { "children": {
"multiple": true, "multiple": true,
"required": true, "required": false,
"types": [ "types": [
{ {
"type": "block_comment", "type": "block_comment",
@@ -413,10 +429,6 @@
"type": "function_declaration", "type": "function_declaration",
"named": true "named": true
}, },
{
"type": "import_statement",
"named": true
},
{ {
"type": "print_command", "type": "print_command",
"named": true "named": true
@@ -481,6 +493,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -615,6 +631,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -702,6 +722,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -800,6 +824,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -887,6 +915,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -957,7 +989,7 @@
} }
}, },
{ {
"type": "import_statement", "type": "import_expression",
"named": true, "named": true,
"fields": {}, "fields": {},
"children": { "children": {
@@ -1015,6 +1047,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1098,6 +1134,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1181,6 +1221,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1305,6 +1349,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1367,6 +1415,10 @@
"multiple": true, "multiple": true,
"required": true, "required": true,
"types": [ "types": [
{
"type": "ascii_string",
"named": true
},
{ {
"type": "color_code", "type": "color_code",
"named": true "named": true
@@ -1445,6 +1497,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1533,10 +1589,6 @@
"type": "function_declaration", "type": "function_declaration",
"named": true "named": true
}, },
{
"type": "import_statement",
"named": true
},
{ {
"type": "print_command", "type": "print_command",
"named": true "named": true
@@ -1616,6 +1668,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1699,6 +1755,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1792,6 +1852,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1836,6 +1900,10 @@
"type": "!", "type": "!",
"named": false "named": false
}, },
{
"type": "!=",
"named": false
},
{ {
"type": "\"", "type": "\"",
"named": false "named": false

29554
src/parser.c generated

File diff suppressed because it is too large Load Diff

View File

@@ -13,12 +13,17 @@ extern "C" {
#define ts_builtin_sym_end 0 #define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_ #ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol; typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId; typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage; typedef struct TSLanguage TSLanguage;
typedef struct TSLanguageMetadata TSLanguageMetadata;
typedef struct TSLanguageMetadata {
uint8_t major_version;
uint8_t minor_version;
uint8_t patch_version;
} TSLanguageMetadata;
#endif #endif
typedef struct { typedef struct {
@@ -27,10 +32,11 @@ typedef struct {
bool inherited; bool inherited;
} TSFieldMapEntry; } TSFieldMapEntry;
// Used to index the field and supertype maps.
typedef struct { typedef struct {
uint16_t index; uint16_t index;
uint16_t length; uint16_t length;
} TSFieldMapSlice; } TSMapSlice;
typedef struct { typedef struct {
bool visible; bool visible;
@@ -48,6 +54,7 @@ struct TSLexer {
uint32_t (*get_column)(TSLexer *); uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *); bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *); bool (*eof)(const TSLexer *);
void (*log)(const TSLexer *, const char *, ...);
}; };
typedef enum { typedef enum {
@@ -79,6 +86,12 @@ typedef struct {
uint16_t external_lex_state; uint16_t external_lex_state;
} TSLexMode; } TSLexMode;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
uint16_t reserved_word_set_id;
} TSLexerMode;
typedef union { typedef union {
TSParseAction action; TSParseAction action;
struct { struct {
@@ -87,8 +100,13 @@ typedef union {
} entry; } entry;
} TSParseActionEntry; } TSParseActionEntry;
typedef struct {
int32_t start;
int32_t end;
} TSCharacterRange;
struct TSLanguage { struct TSLanguage {
uint32_t version; uint32_t abi_version;
uint32_t symbol_count; uint32_t symbol_count;
uint32_t alias_count; uint32_t alias_count;
uint32_t token_count; uint32_t token_count;
@@ -104,13 +122,13 @@ struct TSLanguage {
const TSParseActionEntry *parse_actions; const TSParseActionEntry *parse_actions;
const char * const *symbol_names; const char * const *symbol_names;
const char * const *field_names; const char * const *field_names;
const TSFieldMapSlice *field_map_slices; const TSMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries; const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata; const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map; const TSSymbol *public_symbol_map;
const uint16_t *alias_map; const uint16_t *alias_map;
const TSSymbol *alias_sequences; const TSSymbol *alias_sequences;
const TSLexMode *lex_modes; const TSLexerMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId); bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId); bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token; TSSymbol keyword_capture_token;
@@ -124,15 +142,48 @@ struct TSLanguage {
void (*deserialize)(void *, const char *, unsigned); void (*deserialize)(void *, const char *, unsigned);
} external_scanner; } external_scanner;
const TSStateId *primary_state_ids; const TSStateId *primary_state_ids;
const char *name;
const TSSymbol *reserved_words;
uint16_t max_reserved_word_set_size;
uint32_t supertype_count;
const TSSymbol *supertype_symbols;
const TSMapSlice *supertype_map_slices;
const TSSymbol *supertype_map_entries;
TSLanguageMetadata metadata;
}; };
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
const TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
index = mid_index;
}
size -= half_size;
}
const TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}
/* /*
* Lexer Macros * Lexer Macros
*/ */
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \ #define START_LEXER() \
bool result = false; \ bool result = false; \
bool skip = false; \ bool skip = false; \
UNUSED \
bool eof = false; \ bool eof = false; \
int32_t lookahead; \ int32_t lookahead; \
goto start; \ goto start; \
@@ -148,6 +199,17 @@ struct TSLanguage {
goto next_state; \ goto next_state; \
} }
#define ADVANCE_MAP(...) \
{ \
static const uint16_t map[] = { __VA_ARGS__ }; \
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
if (map[i] == lookahead) { \
state = map[i + 1]; \
goto next_state; \
} \
} \
}
#define SKIP(state_value) \ #define SKIP(state_value) \
{ \ { \
skip = true; \ skip = true; \
@@ -166,7 +228,7 @@ struct TSLanguage {
* Parse Table Macros * Parse Table Macros
*/ */
#define SMALL_STATE(id) id - LARGE_STATE_COUNT #define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id #define STATE(id) id
@@ -176,7 +238,7 @@ struct TSLanguage {
{{ \ {{ \
.shift = { \ .shift = { \
.type = TSParseActionTypeShift, \ .type = TSParseActionTypeShift, \
.state = state_value \ .state = (state_value) \
} \ } \
}} }}
@@ -184,7 +246,7 @@ struct TSLanguage {
{{ \ {{ \
.shift = { \ .shift = { \
.type = TSParseActionTypeShift, \ .type = TSParseActionTypeShift, \
.state = state_value, \ .state = (state_value), \
.repetition = true \ .repetition = true \
} \ } \
}} }}
@@ -197,14 +259,15 @@ struct TSLanguage {
} \ } \
}} }}
#define REDUCE(symbol_val, child_count_val, ...) \ #define REDUCE(symbol_name, children, precedence, prod_id) \
{{ \ {{ \
.reduce = { \ .reduce = { \
.type = TSParseActionTypeReduce, \ .type = TSParseActionTypeReduce, \
.symbol = symbol_val, \ .symbol = symbol_name, \
.child_count = child_count_val, \ .child_count = children, \
__VA_ARGS__ \ .dynamic_precedence = precedence, \
}, \ .production_id = prod_id \
}, \
}} }}
#define RECOVER() \ #define RECOVER() \