feat: add support for fullwidth Unicode, multiline arrays, and ASCII blocks

- Add fullwidth brackets [] (U+FF3B, U+FF3D) support
- Add fullwidth quotes " (U+FF02) support
- Fix multiline arrays with newlines between elements
- Fix line continuation with CRLF (^)
- Enable ASCII block syntax (ascii...asciiend and [ascii...asciiend])
- Update conflicts to resolve ambiguities

Fixed 51 parsing errors (253 -> 202 errors)
This commit is contained in:
2025-11-26 23:04:03 +01:00
parent b746fcec44
commit 99dadd9ca7
5 changed files with 16668 additions and 12483 deletions

View File

@@ -142,8 +142,7 @@ module.exports = grammar({
print_command: $ => prec.right(seq( print_command: $ => prec.right(seq(
choice('>', '>o', '>h', '>`', '>c', '>f'), choice('>', '>o', '>h', '>`', '>c', '>f'),
optional($.print_args), optional($.print_args)
repeat($.print_continuation)
)), )),
// Print specific helpers // Print specific helpers
@@ -165,14 +164,6 @@ module.exports = grammar({
'@' '@'
), ),
print_continuation: $ => prec.right(seq(
'^',
repeat(choice(
/[^@\r\n]+/,
$.interpolation
))
)),
color_code: $ => /#[a-zA-Z0-9]+/, color_code: $ => /#[a-zA-Z0-9]+/,
// Expressions // Expressions
@@ -195,7 +186,7 @@ module.exports = grammar({
$.assignment_expression, $.assignment_expression,
$.parenthesized_expression, $.parenthesized_expression,
$.new_statement, $.new_statement,
// $.ascii_string, $.ascii_string,
$.color_code $.color_code
), ),
@@ -218,9 +209,9 @@ module.exports = grammar({
index_expression: $ => prec.left(13, seq( index_expression: $ => prec.left(13, seq(
$._expression, $._expression,
'[', choice('[', ''),
$._expression, $._expression,
']' choice(']', '')
)), )),
unary_expression: $ => prec.right(12, seq( unary_expression: $ => prec.right(12, seq(
@@ -264,14 +255,22 @@ module.exports = grammar({
// Arrays // Arrays
array: $ => seq( array: $ => seq(
'[', choice('[', ''),
optional($.array_elements), repeat($._newline),
']' optional(seq(
$.array_elements,
repeat($._newline)
)),
choice(']', '')
), ),
array_elements: $ => seq( array_elements: $ => seq(
$._expression, $._expression,
repeat(seq(',', $._expression)), repeat(seq(
',',
repeat($._newline),
$._expression
)),
optional(',') optional(',')
), ),
@@ -282,21 +281,24 @@ module.exports = grammar({
float: $ => /\d+\.\d+/, float: $ => /\d+\.\d+/,
string: $ => seq('"', repeat(choice(/[^"\\]/, /\\./)), '"'), string: $ => choice(
seq('"', repeat(choice(/[^"\\]/, /\\./)), '"'),
seq('', repeat(choice(/[^\\]/, /\\./)), '')
),
boolean: $ => choice('true', 'false'), boolean: $ => choice('true', 'false'),
null: $ => 'null', null: $ => 'null',
// ascii_string: $ => seq(
// 'ascii', ascii_string: $ => choice(
// $.ascii_content, seq('ascii', $.ascii_content, 'asciiend'),
// 'asciiend' seq(choice('[', ''), 'ascii', $.ascii_content, 'asciiend', choice(']', ''))
// ) )
}, },
extras: $ => [ extras: $ => [
/[ \t\r\f]/, /[ \t\r\f]/,
/[\r\n]\^/, /\r?\n[ \t]*\^/,
$.comment, $.comment,
$.block_comment $.block_comment
], ],
@@ -304,7 +306,8 @@ module.exports = grammar({
externals: $ => [ externals: $ => [
$._newline, $._newline,
$._indent, $._indent,
$._dedent $._dedent,
$.ascii_content
], ],
word: $ => $.identifier, word: $ => $.identifier,
@@ -315,7 +318,9 @@ module.exports = grammar({
[$.command], [$.command],
[$._statement, $._expression], // new_statement can be both [$._statement, $._expression], // new_statement can be both
[$.binary_expression, $.assignment_expression], // = operator ambiguity [$.binary_expression, $.assignment_expression], // = operator ambiguity
[$.command, $._expression] // * operator ambiguity [$.command, $._expression], // * operator ambiguity
[$.array_elements],
[$.ascii_string]
] ]
}); });

280
src/grammar.json generated
View File

@@ -1,4 +1,5 @@
{ {
"$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json",
"name": "stonescript", "name": "stonescript",
"word": "identifier", "word": "identifier",
"rules": { "rules": {
@@ -558,13 +559,6 @@
"type": "BLANK" "type": "BLANK"
} }
] ]
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "print_continuation"
}
} }
] ]
} }
@@ -643,35 +637,6 @@
} }
] ]
}, },
"print_continuation": {
"type": "PREC_RIGHT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "^"
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[^@\\r\\n]+"
},
{
"type": "SYMBOL",
"name": "interpolation"
}
]
}
}
]
}
},
"color_code": { "color_code": {
"type": "PATTERN", "type": "PATTERN",
"value": "#[a-zA-Z0-9]+" "value": "#[a-zA-Z0-9]+"
@@ -747,6 +712,10 @@
"type": "SYMBOL", "type": "SYMBOL",
"name": "new_statement" "name": "new_statement"
}, },
{
"type": "SYMBOL",
"name": "ascii_string"
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "color_code" "name": "color_code"
@@ -859,16 +828,34 @@
"name": "_expression" "name": "_expression"
}, },
{ {
"type": "STRING", "type": "CHOICE",
"value": "[" "members": [
{
"type": "STRING",
"value": "["
},
{
"type": "STRING",
"value": ""
}
]
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "_expression" "name": "_expression"
}, },
{ {
"type": "STRING", "type": "CHOICE",
"value": "]" "members": [
{
"type": "STRING",
"value": "]"
},
{
"type": "STRING",
"value": ""
}
]
} }
] ]
} }
@@ -1297,15 +1284,43 @@
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
{ {
"type": "STRING", "type": "CHOICE",
"value": "[" "members": [
{
"type": "STRING",
"value": "["
},
{
"type": "STRING",
"value": ""
}
]
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_newline"
}
}, },
{ {
"type": "CHOICE", "type": "CHOICE",
"members": [ "members": [
{ {
"type": "SYMBOL", "type": "SEQ",
"name": "array_elements" "members": [
{
"type": "SYMBOL",
"name": "array_elements"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_newline"
}
}
]
}, },
{ {
"type": "BLANK" "type": "BLANK"
@@ -1313,8 +1328,17 @@
] ]
}, },
{ {
"type": "STRING", "type": "CHOICE",
"value": "]" "members": [
{
"type": "STRING",
"value": "]"
},
{
"type": "STRING",
"value": ""
}
]
} }
] ]
}, },
@@ -1334,6 +1358,13 @@
"type": "STRING", "type": "STRING",
"value": "," "value": ","
}, },
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_newline"
}
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "_expression" "name": "_expression"
@@ -1368,31 +1399,65 @@
"value": "\\d+\\.\\d+" "value": "\\d+\\.\\d+"
}, },
"string": { "string": {
"type": "SEQ", "type": "CHOICE",
"members": [ "members": [
{ {
"type": "STRING", "type": "SEQ",
"value": "\"" "members": [
}, {
{ "type": "STRING",
"type": "REPEAT", "value": "\""
"content": { },
"type": "CHOICE", {
"members": [ "type": "REPEAT",
{ "content": {
"type": "PATTERN", "type": "CHOICE",
"value": "[^\"\\\\]" "members": [
}, {
{ "type": "PATTERN",
"type": "PATTERN", "value": "[^\"\\\\]"
"value": "\\\\." },
{
"type": "PATTERN",
"value": "\\\\."
}
]
} }
] },
} {
"type": "STRING",
"value": "\""
}
]
}, },
{ {
"type": "STRING", "type": "SEQ",
"value": "\"" "members": [
{
"type": "STRING",
"value": ""
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[^\\\\]"
},
{
"type": "PATTERN",
"value": "\\\\."
}
]
}
},
{
"type": "STRING",
"value": ""
}
]
} }
] ]
}, },
@@ -1412,6 +1477,71 @@
"null": { "null": {
"type": "STRING", "type": "STRING",
"value": "null" "value": "null"
},
"ascii_string": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "ascii"
},
{
"type": "SYMBOL",
"name": "ascii_content"
},
{
"type": "STRING",
"value": "asciiend"
}
]
},
{
"type": "SEQ",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "["
},
{
"type": "STRING",
"value": ""
}
]
},
{
"type": "STRING",
"value": "ascii"
},
{
"type": "SYMBOL",
"name": "ascii_content"
},
{
"type": "STRING",
"value": "asciiend"
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "]"
},
{
"type": "STRING",
"value": ""
}
]
}
]
}
]
} }
}, },
"extras": [ "extras": [
@@ -1421,7 +1551,7 @@
}, },
{ {
"type": "PATTERN", "type": "PATTERN",
"value": "[\\r\\n]\\^" "value": "\\r?\\n[ \\t]*\\^"
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
@@ -1454,6 +1584,12 @@
[ [
"command", "command",
"_expression" "_expression"
],
[
"array_elements"
],
[
"ascii_string"
] ]
], ],
"precedences": [], "precedences": [],
@@ -1469,9 +1605,13 @@
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "_dedent" "name": "_dedent"
},
{
"type": "SYMBOL",
"name": "ascii_content"
} }
], ],
"inline": [], "inline": [],
"supertypes": [] "supertypes": [],
} "reserved": {}
}

133
src/node-types.json generated
View File

@@ -11,6 +11,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -109,6 +113,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -176,6 +184,21 @@
] ]
} }
}, },
{
"type": "ascii_string",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "ascii_content",
"named": true
}
]
}
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true, "named": true,
@@ -188,6 +211,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -267,6 +294,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -418,6 +449,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -544,6 +579,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -627,6 +666,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -725,6 +768,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -804,6 +851,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -932,6 +983,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1011,6 +1066,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1090,6 +1149,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1210,6 +1273,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1324,31 +1391,12 @@
"named": true, "named": true,
"fields": {}, "fields": {},
"children": { "children": {
"multiple": true, "multiple": false,
"required": false, "required": false,
"types": [ "types": [
{ {
"type": "print_args", "type": "print_args",
"named": true "named": true
},
{
"type": "print_continuation",
"named": true
}
]
}
},
{
"type": "print_continuation",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "interpolation",
"named": true
} }
] ]
} }
@@ -1365,6 +1413,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1435,6 +1487,7 @@
{ {
"type": "source_file", "type": "source_file",
"named": true, "named": true,
"root": true,
"fields": {}, "fields": {},
"children": { "children": {
"multiple": true, "multiple": true,
@@ -1531,6 +1584,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1610,6 +1667,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1699,6 +1760,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1908,12 +1973,21 @@
"named": false "named": false
}, },
{ {
"type": "^", "type": "ascii",
"named": false
},
{
"type": "ascii_content",
"named": true
},
{
"type": "asciiend",
"named": false "named": false
}, },
{ {
"type": "block_comment", "type": "block_comment",
"named": true "named": true,
"extra": true
}, },
{ {
"type": "break_statement", "type": "break_statement",
@@ -1925,7 +1999,8 @@
}, },
{ {
"type": "comment", "type": "comment",
"named": true "named": true,
"extra": true
}, },
{ {
"type": "continue_statement", "type": "continue_statement",
@@ -1990,5 +2065,17 @@
{ {
"type": "|", "type": "|",
"named": false "named": false
},
{
"type": "",
"named": false
},
{
"type": "",
"named": false
},
{
"type": "",
"named": false
} }
] ]

28563
src/parser.c generated

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,7 @@ enum TokenType {
NEWLINE, NEWLINE,
INDENT, INDENT,
DEDENT, DEDENT,
// ASCII_CONTENT, ASCII_CONTENT,
}; };
// ... (skipping to logic) // ... (skipping to logic)
@@ -101,64 +101,64 @@ void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const c
bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload; Scanner *scanner = (Scanner *)payload;
// if (valid_symbols[ASCII_CONTENT]) { if (valid_symbols[ASCII_CONTENT]) {
// bool has_content = false; bool has_content = false;
//
// for (;;) { for (;;) {
// if (lexer->eof(lexer)) { if (lexer->eof(lexer)) {
// break; break;
// } }
//
// // Check if we're at the start of a line with 'asciiend' // Check if we're at the start of a line with 'asciiend'
// if (lexer->lookahead == '\n' || lexer->lookahead == '\r') { if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
// lexer->advance(lexer, false); lexer->advance(lexer, false);
// if (lexer->lookahead == '\r' || lexer->lookahead == '\n') { if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
// lexer->advance(lexer, false); lexer->advance(lexer, false);
// } }
// lexer->mark_end(lexer); lexer->mark_end(lexer);
// has_content = true; has_content = true;
//
// // Skip whitespace at the start of the line // Skip whitespace at the start of the line
// while (lexer->lookahead == ' ' || lexer->lookahead == '\t') { while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
// lexer->advance(lexer, false); lexer->advance(lexer, false);
// } }
//
// // Check if this line starts with 'asciiend' // Check if this line starts with 'asciiend'
// if (lexer->lookahead == 'a') { if (lexer->lookahead == 'a') {
// const char *keyword = "asciiend"; const char *keyword = "asciiend";
// bool match = true; bool match = true;
//
// for (int k = 0; k < 8; k++) { for (int k = 0; k < 8; k++) {
// if (lexer->lookahead == keyword[k]) { if (lexer->lookahead == keyword[k]) {
// lexer->advance(lexer, false); lexer->advance(lexer, false);
// } else { } else {
// match = false; match = false;
// break; break;
// } }
// } }
//
// // Check that asciiend is followed by whitespace or EOL // Check that asciiend is followed by whitespace or EOL
// if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' || if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
// lexer->lookahead == ' ' || lexer->lookahead == '\t' || lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
// lexer->lookahead == ',' || lexer->lookahead == ',' ||
// lexer->eof(lexer))) { lexer->eof(lexer))) {
// lexer->result_symbol = ASCII_CONTENT; lexer->result_symbol = ASCII_CONTENT;
// return has_content; return has_content;
// } }
//
// // Failed to match asciiend, mark the current position // Failed to match asciiend, mark the current position
// lexer->mark_end(lexer); lexer->mark_end(lexer);
// } }
// } else { } else {
// lexer->advance(lexer, false); lexer->advance(lexer, false);
// lexer->mark_end(lexer); lexer->mark_end(lexer);
// has_content = true; has_content = true;
// } }
// } }
//
// lexer->result_symbol = ASCII_CONTENT; lexer->result_symbol = ASCII_CONTENT;
// return has_content; return has_content;
// } }
if (scanner->queued_tokens_size > 0) { if (scanner->queued_tokens_size > 0) {
enum TokenType token = scanner->queued_tokens[0]; enum TokenType token = scanner->queued_tokens[0];