feat: major grammar improvements

- Enable ASCII blocks in print commands
- Add import as expression (not just statement)
- Fix operator precedence (& and | now lower than comparisons)
- Allow comments and newlines as top-level statements
- Fix source_file to handle leading comments and empty lines

Progress: 253 → 98 errors (155 files fixed, 55% success)
This commit is contained in:
2025-11-26 23:32:44 +01:00
parent 99dadd9ca7
commit 0b78c43138
5 changed files with 15079 additions and 15017 deletions

View File

@@ -6,7 +6,7 @@ module.exports = grammar({
_statement: $ => prec.right(seq(
choice(
// Comments first
$._newline,
$.comment,
$.block_comment,
// Keyword-based statements (must come before generic command)
@@ -16,7 +16,6 @@ module.exports = grammar({
$.return_statement, // 'return'
$.break_statement, // 'break'
$.continue_statement, // 'continue'
$.import_statement, // 'import'
// Control flow
$.conditional, // '?'
$.else_clause, // ':'
@@ -81,7 +80,7 @@ module.exports = grammar({
),
// Import
import_statement: $ => seq(
import_expression: $ => seq(
'import',
$.module_path
),
@@ -151,7 +150,7 @@ module.exports = grammar({
print_argument: $ => prec.left(repeat1(choice(
$.interpolation,
$.string,
// $.ascii_string,
$.ascii_string,
$.color_code,
$.print_text
))),
@@ -186,6 +185,7 @@ module.exports = grammar({
$.assignment_expression,
$.parenthesized_expression,
$.new_statement,
$.import_expression,
$.ascii_string,
$.color_code
),
@@ -221,19 +221,11 @@ module.exports = grammar({
// Binary operators with proper precedence
binary_expression: $ => choice(
prec.left(4, seq($._expression, '|', $._expression)),
prec.left(5, seq($._expression, '&', $._expression)),
prec.left(7, seq($._expression, '!', $._expression)),
prec.left(7, seq($._expression, '=', $._expression)),
prec.left(8, seq($._expression, '<', $._expression)),
prec.left(8, seq($._expression, '>', $._expression)),
prec.left(8, seq($._expression, '<=', $._expression)),
prec.left(8, seq($._expression, '>=', $._expression)),
prec.left(9, seq($._expression, '+', $._expression)),
prec.left(9, seq($._expression, '-', $._expression)),
prec.left(10, seq($._expression, '*', $._expression)),
prec.left(10, seq($._expression, '/', $._expression)),
prec.left(11, seq($._expression, '%', $._expression))
prec.left(6, seq($._expression, choice('*', '/', '%'), $._expression)),
prec.left(5, seq($._expression, choice('+', '-'), $._expression)),
prec.left(4, seq($._expression, choice('=', '!=', '!', '<', '>', '<=', '>='), $._expression)),
prec.left(3, seq($._expression, '&', $._expression)),
prec.left(2, seq($._expression, '|', $._expression))
),
update_expression: $ => choice(

329
src/grammar.json generated
View File

@@ -19,6 +19,10 @@
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newline"
},
{
"type": "SYMBOL",
"name": "comment"
@@ -51,10 +55,6 @@
"type": "SYMBOL",
"name": "continue_statement"
},
{
"type": "SYMBOL",
"name": "import_statement"
},
{
"type": "SYMBOL",
"name": "conditional"
@@ -303,7 +303,7 @@
}
]
},
"import_statement": {
"import_expression": {
"type": "SEQ",
"members": [
{
@@ -604,6 +604,10 @@
"type": "SYMBOL",
"name": "string"
},
{
"type": "SYMBOL",
"name": "ascii_string"
},
{
"type": "SYMBOL",
"name": "color_code"
@@ -712,6 +716,10 @@
"type": "SYMBOL",
"name": "new_statement"
},
{
"type": "SYMBOL",
"name": "import_expression"
},
{
"type": "SYMBOL",
"name": "ascii_string"
@@ -891,7 +899,7 @@
"members": [
{
"type": "PREC_LEFT",
"value": 4,
"value": 6,
"content": {
"type": "SEQ",
"members": [
@@ -899,9 +907,22 @@
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "|"
"value": "*"
},
{
"type": "STRING",
"value": "/"
},
{
"type": "STRING",
"value": "%"
}
]
},
{
"type": "SYMBOL",
@@ -913,6 +934,86 @@
{
"type": "PREC_LEFT",
"value": 5,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "+"
},
{
"type": "STRING",
"value": "-"
}
]
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 4,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "="
},
{
"type": "STRING",
"value": "!="
},
{
"type": "STRING",
"value": "!"
},
{
"type": "STRING",
"value": "<"
},
{
"type": "STRING",
"value": ">"
},
{
"type": "STRING",
"value": "<="
},
{
"type": "STRING",
"value": ">="
}
]
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 3,
"content": {
"type": "SEQ",
"members": [
@@ -933,7 +1034,7 @@
},
{
"type": "PREC_LEFT",
"value": 7,
"value": 2,
"content": {
"type": "SEQ",
"members": [
@@ -943,217 +1044,7 @@
},
{
"type": "STRING",
"value": "!"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 7,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "="
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 8,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "<"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 8,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": ">"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 8,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "<="
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 8,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": ">="
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 9,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "+"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 9,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "-"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 10,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "*"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 10,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "/"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 11,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "%"
"value": "|"
},
{
"type": "SYMBOL",

88
src/node-types.json generated
View File

@@ -47,6 +47,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -145,6 +149,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -243,6 +251,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -326,6 +338,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -371,7 +387,7 @@
"fields": {},
"children": {
"multiple": true,
"required": true,
"required": false,
"types": [
{
"type": "block_comment",
@@ -413,10 +429,6 @@
"type": "function_declaration",
"named": true
},
{
"type": "import_statement",
"named": true
},
{
"type": "print_command",
"named": true
@@ -481,6 +493,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -615,6 +631,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -702,6 +722,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -800,6 +824,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -887,6 +915,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -957,7 +989,7 @@
}
},
{
"type": "import_statement",
"type": "import_expression",
"named": true,
"fields": {},
"children": {
@@ -1015,6 +1047,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -1098,6 +1134,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -1181,6 +1221,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -1305,6 +1349,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -1367,6 +1415,10 @@
"multiple": true,
"required": true,
"types": [
{
"type": "ascii_string",
"named": true
},
{
"type": "color_code",
"named": true
@@ -1445,6 +1497,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -1533,10 +1589,6 @@
"type": "function_declaration",
"named": true
},
{
"type": "import_statement",
"named": true
},
{
"type": "print_command",
"named": true
@@ -1616,6 +1668,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -1699,6 +1755,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -1792,6 +1852,10 @@
"type": "identifier",
"named": true
},
{
"type": "import_expression",
"named": true
},
{
"type": "index_expression",
"named": true
@@ -1836,6 +1900,10 @@
"type": "!",
"named": false
},
{
"type": "!=",
"named": false
},
{
"type": "\"",
"named": false

29554
src/parser.c generated

File diff suppressed because it is too large Load Diff

View File

@@ -13,12 +13,17 @@ extern "C" {
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
typedef struct TSLanguageMetadata TSLanguageMetadata;
typedef struct TSLanguageMetadata {
uint8_t major_version;
uint8_t minor_version;
uint8_t patch_version;
} TSLanguageMetadata;
#endif
typedef struct {
@@ -27,10 +32,11 @@ typedef struct {
bool inherited;
} TSFieldMapEntry;
// Used to index the field and supertype maps.
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
} TSMapSlice;
typedef struct {
bool visible;
@@ -48,6 +54,7 @@ struct TSLexer {
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
void (*log)(const TSLexer *, const char *, ...);
};
typedef enum {
@@ -79,6 +86,12 @@ typedef struct {
uint16_t external_lex_state;
} TSLexMode;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
uint16_t reserved_word_set_id;
} TSLexerMode;
typedef union {
TSParseAction action;
struct {
@@ -87,8 +100,13 @@ typedef union {
} entry;
} TSParseActionEntry;
typedef struct {
int32_t start;
int32_t end;
} TSCharacterRange;
struct TSLanguage {
uint32_t version;
uint32_t abi_version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
@@ -104,13 +122,13 @@ struct TSLanguage {
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
const TSLexerMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
@@ -124,15 +142,48 @@ struct TSLanguage {
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
const char *name;
const TSSymbol *reserved_words;
uint16_t max_reserved_word_set_size;
uint32_t supertype_count;
const TSSymbol *supertype_symbols;
const TSMapSlice *supertype_map_slices;
const TSSymbol *supertype_map_entries;
TSLanguageMetadata metadata;
};
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
const TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
index = mid_index;
}
size -= half_size;
}
const TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}
/*
* Lexer Macros
*/
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
@@ -148,6 +199,17 @@ struct TSLanguage {
goto next_state; \
}
#define ADVANCE_MAP(...) \
{ \
static const uint16_t map[] = { __VA_ARGS__ }; \
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
if (map[i] == lookahead) { \
state = map[i + 1]; \
goto next_state; \
} \
} \
}
#define SKIP(state_value) \
{ \
skip = true; \
@@ -166,7 +228,7 @@ struct TSLanguage {
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id
@@ -176,7 +238,7 @@ struct TSLanguage {
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
.state = (state_value) \
} \
}}
@@ -184,7 +246,7 @@ struct TSLanguage {
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.state = (state_value), \
.repetition = true \
} \
}}
@@ -197,13 +259,14 @@ struct TSLanguage {
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
#define REDUCE(symbol_name, children, precedence, prod_id) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
.symbol = symbol_name, \
.child_count = children, \
.dynamic_precedence = precedence, \
.production_id = prod_id \
}, \
}}