7 Commits
v0.1.0 ... main

Author SHA1 Message Date
3b8318940a update 2025-11-27 12:11:52 +01:00
36d6c3947a fix: improve parser - fix ASCII strings and comment indentation handling
- Remove 'asciiend' from ascii_string grammar rule (handled by scanner)
- Add scanner logic to skip comment-only lines when measuring indentation
- Update scanner to include 'asciiend' in ASCII_CONTENT token
- Implement external scanner for BLOCK_COMMENT (partial fix)

Results: Reduced parse errors from 156 to 119 (23% improvement)
2025-11-27 11:09:32 +01:00
06e6e3b098 Add full-width closing bracket support after asciiend
Allow ] (U+FF3D) after asciiend in ASCII content scanner.
This enables ASCII strings inside arrays with full-width brackets:
  var x = [ascii...asciiend]

The parser now correctly recognizes this as an array containing
an ASCII string, not as a syntax error.

Fixes 1 additional parsing error.
2025-11-27 10:40:00 +01:00
9a1dcb941d Allow ) and ] after asciiend in ASCII blocks
ASCII blocks can now be used as function arguments and array
elements. The scanner now accepts ) and ] as valid characters
after 'asciiend', allowing constructs like:
- var x = uiAA(ascii...asciiend)
- var y = [ascii...asciiend]

Fixes 12 parsing errors in test scripts.
2025-11-27 10:30:49 +01:00
eaf0963459 Fix CRLF handling in external scanner
- Consume \r as part of token instead of skipping it
- Break after consuming \n to avoid processing multiple lines
- Consume leading whitespace separately for indent calculation
- Fix ASCII_CONTENT to return false at EOF without asciiend

This fixes ERROR tokens with CRLF line endings, especially
with trailing blank lines.
2025-11-27 01:25:06 +01:00
0b78c43138 feat: major grammar improvements
- Enable ASCII blocks in print commands
- Add import as expression (not just statement)
- Fix operator precedence (& and | now lower than comparisons)
- Allow comments and newlines as top-level statements
- Fix source_file to handle leading comments and empty lines

Progress: 253 → 98 errors (155 files fixed, 55% success)
2025-11-26 23:32:44 +01:00
99dadd9ca7 feat: add support for fullwidth Unicode, multiline arrays, and ASCII blocks
- Add fullwidth brackets [] (U+FF3B, U+FF3D) support
- Add fullwidth quotes " (U+FF02) support
- Fix multiline arrays with newlines between elements
- Fix line continuation with CRLF (^)
- Enable ASCII block syntax (ascii...asciiend and [ascii...asciiend])
- Update conflicts to resolve ambiguities

Fixed 51 parsing errors (253 -> 202 errors)
2025-11-26 23:04:03 +01:00
7 changed files with 19468 additions and 16113 deletions

2
Cargo.lock generated
View File

@@ -80,7 +80,7 @@ dependencies = [
[[package]] [[package]]
name = "tree-sitter-stonescript" name = "tree-sitter-stonescript"
version = "0.0.1" version = "0.1.0"
dependencies = [ dependencies = [
"cc", "cc",
"tree-sitter", "tree-sitter",

22
corpus/test_ascii.txt Normal file
View File

@@ -0,0 +1,22 @@
==================
ASCII Array
==================
var x = [ascii
foo
asciiend
, ascii
bar
asciiend
]
------------------
(source_file
(variable_declaration
(identifier)
(array
(array_elements
(ascii_string (ascii_content))
(ascii_string (ascii_content))
)
)
)
)

View File

@@ -6,7 +6,7 @@ module.exports = grammar({
_statement: $ => prec.right(seq( _statement: $ => prec.right(seq(
choice( choice(
// Comments first $._newline,
$.comment, $.comment,
$.block_comment, $.block_comment,
// Keyword-based statements (must come before generic command) // Keyword-based statements (must come before generic command)
@@ -16,7 +16,6 @@ module.exports = grammar({
$.return_statement, // 'return' $.return_statement, // 'return'
$.break_statement, // 'break' $.break_statement, // 'break'
$.continue_statement, // 'continue' $.continue_statement, // 'continue'
$.import_statement, // 'import'
// Control flow // Control flow
$.conditional, // '?' $.conditional, // '?'
$.else_clause, // ':' $.else_clause, // ':'
@@ -32,12 +31,6 @@ module.exports = grammar({
// Comments // Comments
comment: $ => token(seq('//', /.*/)), comment: $ => token(seq('//', /.*/)),
block_comment: $ => token(seq(
'/*',
/[^*]*\*+(?:[^/*][^*]*\*+)*/,
'/'
)),
// Variable declaration // Variable declaration
variable_declaration: $ => seq( variable_declaration: $ => seq(
'var', 'var',
@@ -81,7 +74,7 @@ module.exports = grammar({
), ),
// Import // Import
import_statement: $ => seq( import_expression: $ => seq(
'import', 'import',
$.module_path $.module_path
), ),
@@ -142,8 +135,7 @@ module.exports = grammar({
print_command: $ => prec.right(seq( print_command: $ => prec.right(seq(
choice('>', '>o', '>h', '>`', '>c', '>f'), choice('>', '>o', '>h', '>`', '>c', '>f'),
optional($.print_args), optional($.print_args)
repeat($.print_continuation)
)), )),
// Print specific helpers // Print specific helpers
@@ -152,7 +144,7 @@ module.exports = grammar({
print_argument: $ => prec.left(repeat1(choice( print_argument: $ => prec.left(repeat1(choice(
$.interpolation, $.interpolation,
$.string, $.string,
// $.ascii_string, $.ascii_string,
$.color_code, $.color_code,
$.print_text $.print_text
))), ))),
@@ -165,14 +157,6 @@ module.exports = grammar({
'@' '@'
), ),
print_continuation: $ => prec.right(seq(
'^',
repeat(choice(
/[^@\r\n]+/,
$.interpolation
))
)),
color_code: $ => /#[a-zA-Z0-9]+/, color_code: $ => /#[a-zA-Z0-9]+/,
// Expressions // Expressions
@@ -185,6 +169,7 @@ module.exports = grammar({
$.string, $.string,
$.boolean, $.boolean,
$.null, $.null,
$.ascii_string,
$.array, $.array,
$.member_expression, $.member_expression,
$.call_expression, $.call_expression,
@@ -195,7 +180,7 @@ module.exports = grammar({
$.assignment_expression, $.assignment_expression,
$.parenthesized_expression, $.parenthesized_expression,
$.new_statement, $.new_statement,
// $.ascii_string, $.import_expression,
$.color_code $.color_code
), ),
@@ -218,9 +203,9 @@ module.exports = grammar({
index_expression: $ => prec.left(13, seq( index_expression: $ => prec.left(13, seq(
$._expression, $._expression,
'[', choice('[', ''),
$._expression, $._expression,
']' choice(']', '')
)), )),
unary_expression: $ => prec.right(12, seq( unary_expression: $ => prec.right(12, seq(
@@ -230,19 +215,11 @@ module.exports = grammar({
// Binary operators with proper precedence // Binary operators with proper precedence
binary_expression: $ => choice( binary_expression: $ => choice(
prec.left(4, seq($._expression, '|', $._expression)), prec.left(6, seq($._expression, choice('*', '/', '%'), $._expression)),
prec.left(5, seq($._expression, '&', $._expression)), prec.left(5, seq($._expression, choice('+', '-'), $._expression)),
prec.left(7, seq($._expression, '!', $._expression)), prec.left(4, seq($._expression, choice('=', '!=', '!', '<', '>', '<=', '>='), $._expression)),
prec.left(7, seq($._expression, '=', $._expression)), prec.left(3, seq($._expression, '&', $._expression)),
prec.left(8, seq($._expression, '<', $._expression)), prec.left(2, seq($._expression, '|', $._expression))
prec.left(8, seq($._expression, '>', $._expression)),
prec.left(8, seq($._expression, '<=', $._expression)),
prec.left(8, seq($._expression, '>=', $._expression)),
prec.left(9, seq($._expression, '+', $._expression)),
prec.left(9, seq($._expression, '-', $._expression)),
prec.left(10, seq($._expression, '*', $._expression)),
prec.left(10, seq($._expression, '/', $._expression)),
prec.left(11, seq($._expression, '%', $._expression))
), ),
update_expression: $ => choice( update_expression: $ => choice(
@@ -264,14 +241,22 @@ module.exports = grammar({
// Arrays // Arrays
array: $ => seq( array: $ => seq(
'[', choice('[', ''),
optional($.array_elements), repeat($._newline),
']' optional(seq(
$.array_elements,
repeat($._newline)
)),
choice(']', '')
), ),
array_elements: $ => seq( array_elements: $ => seq(
$._expression, $._expression,
repeat(seq(',', $._expression)), repeat(seq(
',',
repeat($._newline),
$._expression
)),
optional(',') optional(',')
), ),
@@ -282,21 +267,21 @@ module.exports = grammar({
float: $ => /\d+\.\d+/, float: $ => /\d+\.\d+/,
string: $ => seq('"', repeat(choice(/[^"\\]/, /\\./)), '"'), string: $ => choice(
seq('"', repeat(choice(/[^"\\]/, /\\./)), '"'),
seq('', repeat(choice(/[^\\]/, /\\./)), '')
),
boolean: $ => choice('true', 'false'), boolean: $ => choice('true', 'false'),
null: $ => 'null', null: $ => 'null',
// ascii_string: $ => seq(
// 'ascii', ascii_string: $ => seq('ascii', $.ascii_content)
// $.ascii_content,
// 'asciiend'
// )
}, },
extras: $ => [ extras: $ => [
/[ \t\r\f]/, /[ \t\r\f]/,
/[\r\n]\^/, /\r?\n[ \t]*\^/,
$.comment, $.comment,
$.block_comment $.block_comment
], ],
@@ -304,7 +289,9 @@ module.exports = grammar({
externals: $ => [ externals: $ => [
$._newline, $._newline,
$._indent, $._indent,
$._dedent $._dedent,
$.ascii_content,
$.block_comment
], ],
word: $ => $.identifier, word: $ => $.identifier,
@@ -315,7 +302,9 @@ module.exports = grammar({
[$.command], [$.command],
[$._statement, $._expression], // new_statement can be both [$._statement, $._expression], // new_statement can be both
[$.binary_expression, $.assignment_expression], // = operator ambiguity [$.binary_expression, $.assignment_expression], // = operator ambiguity
[$.command, $._expression] // * operator ambiguity [$.command, $._expression], // * operator ambiguity
[$.array_elements],
[$.ascii_string]
] ]
}); });

576
src/grammar.json generated
View File

@@ -18,6 +18,10 @@
{ {
"type": "CHOICE", "type": "CHOICE",
"members": [ "members": [
{
"type": "SYMBOL",
"name": "_newline"
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "comment" "name": "comment"
@@ -50,10 +54,6 @@
"type": "SYMBOL", "type": "SYMBOL",
"name": "continue_statement" "name": "continue_statement"
}, },
{
"type": "SYMBOL",
"name": "import_statement"
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "conditional" "name": "conditional"
@@ -111,26 +111,6 @@
] ]
} }
}, },
"block_comment": {
"type": "TOKEN",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "/*"
},
{
"type": "PATTERN",
"value": "[^*]*\\*+(?:[^/*][^*]*\\*+)*"
},
{
"type": "STRING",
"value": "/"
}
]
}
},
"variable_declaration": { "variable_declaration": {
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
@@ -302,7 +282,7 @@
} }
] ]
}, },
"import_statement": { "import_expression": {
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
{ {
@@ -558,13 +538,6 @@
"type": "BLANK" "type": "BLANK"
} }
] ]
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "print_continuation"
}
} }
] ]
} }
@@ -610,6 +583,10 @@
"type": "SYMBOL", "type": "SYMBOL",
"name": "string" "name": "string"
}, },
{
"type": "SYMBOL",
"name": "ascii_string"
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "color_code" "name": "color_code"
@@ -643,35 +620,6 @@
} }
] ]
}, },
"print_continuation": {
"type": "PREC_RIGHT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "^"
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[^@\\r\\n]+"
},
{
"type": "SYMBOL",
"name": "interpolation"
}
]
}
}
]
}
},
"color_code": { "color_code": {
"type": "PATTERN", "type": "PATTERN",
"value": "#[a-zA-Z0-9]+" "value": "#[a-zA-Z0-9]+"
@@ -707,6 +655,10 @@
"type": "SYMBOL", "type": "SYMBOL",
"name": "null" "name": "null"
}, },
{
"type": "SYMBOL",
"name": "ascii_string"
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "array" "name": "array"
@@ -747,6 +699,10 @@
"type": "SYMBOL", "type": "SYMBOL",
"name": "new_statement" "name": "new_statement"
}, },
{
"type": "SYMBOL",
"name": "import_expression"
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "color_code" "name": "color_code"
@@ -859,16 +815,34 @@
"name": "_expression" "name": "_expression"
}, },
{ {
"type": "STRING", "type": "CHOICE",
"value": "[" "members": [
{
"type": "STRING",
"value": "["
},
{
"type": "STRING",
"value": ""
}
]
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "_expression" "name": "_expression"
}, },
{ {
"type": "STRING", "type": "CHOICE",
"value": "]" "members": [
{
"type": "STRING",
"value": "]"
},
{
"type": "STRING",
"value": ""
}
]
} }
] ]
} }
@@ -904,7 +878,7 @@
"members": [ "members": [
{ {
"type": "PREC_LEFT", "type": "PREC_LEFT",
"value": 4, "value": 6,
"content": { "content": {
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
@@ -913,8 +887,21 @@
"name": "_expression" "name": "_expression"
}, },
{ {
"type": "STRING", "type": "CHOICE",
"value": "|" "members": [
{
"type": "STRING",
"value": "*"
},
{
"type": "STRING",
"value": "/"
},
{
"type": "STRING",
"value": "%"
}
]
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
@@ -926,6 +913,86 @@
{ {
"type": "PREC_LEFT", "type": "PREC_LEFT",
"value": 5, "value": 5,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "+"
},
{
"type": "STRING",
"value": "-"
}
]
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 4,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "="
},
{
"type": "STRING",
"value": "!="
},
{
"type": "STRING",
"value": "!"
},
{
"type": "STRING",
"value": "<"
},
{
"type": "STRING",
"value": ">"
},
{
"type": "STRING",
"value": "<="
},
{
"type": "STRING",
"value": ">="
}
]
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 3,
"content": { "content": {
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
@@ -946,7 +1013,7 @@
}, },
{ {
"type": "PREC_LEFT", "type": "PREC_LEFT",
"value": 7, "value": 2,
"content": { "content": {
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
@@ -956,217 +1023,7 @@
}, },
{ {
"type": "STRING", "type": "STRING",
"value": "!" "value": "|"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 7,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "="
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 8,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "<"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 8,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": ">"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 8,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "<="
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 8,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": ">="
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 9,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "+"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 9,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "-"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 10,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "*"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 10,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "/"
},
{
"type": "SYMBOL",
"name": "_expression"
}
]
}
},
{
"type": "PREC_LEFT",
"value": 11,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": "%"
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
@@ -1297,15 +1154,43 @@
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
{ {
"type": "STRING", "type": "CHOICE",
"value": "[" "members": [
{
"type": "STRING",
"value": "["
},
{
"type": "STRING",
"value": ""
}
]
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_newline"
}
}, },
{ {
"type": "CHOICE", "type": "CHOICE",
"members": [ "members": [
{ {
"type": "SYMBOL", "type": "SEQ",
"name": "array_elements" "members": [
{
"type": "SYMBOL",
"name": "array_elements"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_newline"
}
}
]
}, },
{ {
"type": "BLANK" "type": "BLANK"
@@ -1313,8 +1198,17 @@
] ]
}, },
{ {
"type": "STRING", "type": "CHOICE",
"value": "]" "members": [
{
"type": "STRING",
"value": "]"
},
{
"type": "STRING",
"value": ""
}
]
} }
] ]
}, },
@@ -1334,6 +1228,13 @@
"type": "STRING", "type": "STRING",
"value": "," "value": ","
}, },
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_newline"
}
},
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "_expression" "name": "_expression"
@@ -1368,31 +1269,65 @@
"value": "\\d+\\.\\d+" "value": "\\d+\\.\\d+"
}, },
"string": { "string": {
"type": "SEQ", "type": "CHOICE",
"members": [ "members": [
{ {
"type": "STRING", "type": "SEQ",
"value": "\"" "members": [
}, {
{ "type": "STRING",
"type": "REPEAT", "value": "\""
"content": { },
"type": "CHOICE", {
"members": [ "type": "REPEAT",
{ "content": {
"type": "PATTERN", "type": "CHOICE",
"value": "[^\"\\\\]" "members": [
}, {
{ "type": "PATTERN",
"type": "PATTERN", "value": "[^\"\\\\]"
"value": "\\\\." },
{
"type": "PATTERN",
"value": "\\\\."
}
]
} }
] },
} {
"type": "STRING",
"value": "\""
}
]
}, },
{ {
"type": "STRING", "type": "SEQ",
"value": "\"" "members": [
{
"type": "STRING",
"value": ""
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[^\\\\]"
},
{
"type": "PATTERN",
"value": "\\\\."
}
]
}
},
{
"type": "STRING",
"value": ""
}
]
} }
] ]
}, },
@@ -1412,6 +1347,19 @@
"null": { "null": {
"type": "STRING", "type": "STRING",
"value": "null" "value": "null"
},
"ascii_string": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "ascii"
},
{
"type": "SYMBOL",
"name": "ascii_content"
}
]
} }
}, },
"extras": [ "extras": [
@@ -1421,7 +1369,7 @@
}, },
{ {
"type": "PATTERN", "type": "PATTERN",
"value": "[\\r\\n]\\^" "value": "\\r?\\n[ \\t]*\\^"
}, },
{ {
"type": "SYMBOL", "type": "SYMBOL",
@@ -1454,6 +1402,12 @@
[ [
"command", "command",
"_expression" "_expression"
],
[
"array_elements"
],
[
"ascii_string"
] ]
], ],
"precedences": [], "precedences": [],
@@ -1469,6 +1423,14 @@
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "_dedent" "name": "_dedent"
},
{
"type": "SYMBOL",
"name": "ascii_content"
},
{
"type": "SYMBOL",
"name": "block_comment"
} }
], ],
"inline": [], "inline": [],

210
src/node-types.json generated
View File

@@ -11,6 +11,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -43,6 +47,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -109,6 +117,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -137,6 +149,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -176,6 +192,21 @@
] ]
} }
}, },
{
"type": "ascii_string",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "ascii_content",
"named": true
}
]
}
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true, "named": true,
@@ -188,6 +219,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -216,6 +251,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -267,6 +306,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -295,6 +338,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -340,7 +387,7 @@
"fields": {}, "fields": {},
"children": { "children": {
"multiple": true, "multiple": true,
"required": true, "required": false,
"types": [ "types": [
{ {
"type": "block_comment", "type": "block_comment",
@@ -382,10 +429,6 @@
"type": "function_declaration", "type": "function_declaration",
"named": true "named": true
}, },
{
"type": "import_statement",
"named": true
},
{ {
"type": "print_command", "type": "print_command",
"named": true "named": true
@@ -418,6 +461,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -446,6 +493,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -544,6 +595,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -576,6 +631,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -627,6 +686,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -659,6 +722,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -725,6 +792,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -753,6 +824,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -804,6 +879,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -836,6 +915,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -906,7 +989,7 @@
} }
}, },
{ {
"type": "import_statement", "type": "import_expression",
"named": true, "named": true,
"fields": {}, "fields": {},
"children": { "children": {
@@ -932,6 +1015,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -960,6 +1047,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1011,6 +1102,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1039,6 +1134,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1090,6 +1189,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1118,6 +1221,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1210,6 +1317,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1238,6 +1349,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1300,6 +1415,10 @@
"multiple": true, "multiple": true,
"required": true, "required": true,
"types": [ "types": [
{
"type": "ascii_string",
"named": true
},
{ {
"type": "color_code", "type": "color_code",
"named": true "named": true
@@ -1324,31 +1443,12 @@
"named": true, "named": true,
"fields": {}, "fields": {},
"children": { "children": {
"multiple": true, "multiple": false,
"required": false, "required": false,
"types": [ "types": [
{ {
"type": "print_args", "type": "print_args",
"named": true "named": true
},
{
"type": "print_continuation",
"named": true
}
]
}
},
{
"type": "print_continuation",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "interpolation",
"named": true
} }
] ]
} }
@@ -1365,6 +1465,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1393,6 +1497,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1480,10 +1588,6 @@
"type": "function_declaration", "type": "function_declaration",
"named": true "named": true
}, },
{
"type": "import_statement",
"named": true
},
{ {
"type": "print_command", "type": "print_command",
"named": true "named": true
@@ -1531,6 +1635,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1559,6 +1667,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1610,6 +1722,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1638,6 +1754,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1699,6 +1819,10 @@
"type": "array", "type": "array",
"named": true "named": true
}, },
{
"type": "ascii_string",
"named": true
},
{ {
"type": "assignment_expression", "type": "assignment_expression",
"named": true "named": true
@@ -1727,6 +1851,10 @@
"type": "identifier", "type": "identifier",
"named": true "named": true
}, },
{
"type": "import_expression",
"named": true
},
{ {
"type": "index_expression", "type": "index_expression",
"named": true "named": true
@@ -1771,6 +1899,10 @@
"type": "!", "type": "!",
"named": false "named": false
}, },
{
"type": "!=",
"named": false
},
{ {
"type": "\"", "type": "\"",
"named": false "named": false
@@ -1908,9 +2040,13 @@
"named": false "named": false
}, },
{ {
"type": "^", "type": "ascii",
"named": false "named": false
}, },
{
"type": "ascii_content",
"named": true
},
{ {
"type": "block_comment", "type": "block_comment",
"named": true "named": true
@@ -1990,5 +2126,17 @@
{ {
"type": "|", "type": "|",
"named": false "named": false
},
{
"type": "",
"named": false
},
{
"type": "",
"named": false
},
{
"type": "",
"named": false
} }
] ]

34482
src/parser.c generated

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,8 @@ enum TokenType {
NEWLINE, NEWLINE,
INDENT, INDENT,
DEDENT, DEDENT,
// ASCII_CONTENT, ASCII_CONTENT,
BLOCK_COMMENT,
}; };
// ... (skipping to logic) // ... (skipping to logic)
@@ -101,64 +102,89 @@ void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const c
bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload; Scanner *scanner = (Scanner *)payload;
// if (valid_symbols[ASCII_CONTENT]) { // Try to handle block comments when parser expects them
// bool has_content = false; // Only check if valid_symbols allows BLOCK_COMMENT
// if (valid_symbols[BLOCK_COMMENT] && lexer->lookahead == '/') {
// for (;;) { lexer->advance(lexer, false);
// if (lexer->eof(lexer)) { if (lexer->lookahead == '*') {
// break; lexer->advance(lexer, false);
// }
// // Consume everything until */
// // Check if we're at the start of a line with 'asciiend' while (!lexer->eof(lexer)) {
// if (lexer->lookahead == '\n' || lexer->lookahead == '\r') { if (lexer->lookahead == '*') {
// lexer->advance(lexer, false); lexer->advance(lexer, false);
// if (lexer->lookahead == '\r' || lexer->lookahead == '\n') { if (lexer->lookahead == '/') {
// lexer->advance(lexer, false); lexer->advance(lexer, false);
// } lexer->mark_end(lexer);
// lexer->mark_end(lexer); lexer->result_symbol = BLOCK_COMMENT;
// has_content = true; return true;
// }
// // Skip whitespace at the start of the line } else {
// while (lexer->lookahead == ' ' || lexer->lookahead == '\t') { lexer->advance(lexer, false);
// lexer->advance(lexer, false); }
// } }
// // Reached EOF without closing */
// // Check if this line starts with 'asciiend' return false;
// if (lexer->lookahead == 'a') { }
// const char *keyword = "asciiend"; }
// bool match = true;
// if (valid_symbols[ASCII_CONTENT]) {
// for (int k = 0; k < 8; k++) { bool has_content = false;
// if (lexer->lookahead == keyword[k]) {
// lexer->advance(lexer, false); for (;;) {
// } else { if (lexer->eof(lexer)) {
// match = false; break;
// break; }
// }
// } // Check if we're at the start of a line with 'asciiend'
// if (lexer->lookahead == '\n') {
// // Check that asciiend is followed by whitespace or EOL lexer->advance(lexer, false);
// if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' || lexer->mark_end(lexer);
// lexer->lookahead == ' ' || lexer->lookahead == '\t' || has_content = true;
// lexer->lookahead == ',' ||
// lexer->eof(lexer))) { // Skip whitespace at the start of the line
// lexer->result_symbol = ASCII_CONTENT; while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
// return has_content; lexer->advance(lexer, false);
// } }
//
// // Failed to match asciiend, mark the current position // Check if this line starts with 'asciiend'
// lexer->mark_end(lexer); if (lexer->lookahead == 'a') {
// } const char *keyword = "asciiend";
// } else { bool match = true;
// lexer->advance(lexer, false);
// lexer->mark_end(lexer); for (int k = 0; k < 8; k++) {
// has_content = true; if (lexer->lookahead == keyword[k]) {
// } lexer->advance(lexer, false);
// } } else {
// match = false;
// lexer->result_symbol = ASCII_CONTENT; break;
// return has_content; }
// } }
// Check that asciiend is followed by whitespace or EOL or closing delimiters
if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
lexer->lookahead == ',' || lexer->lookahead == ')' ||
lexer->lookahead == ']' || lexer->lookahead == 0xFF3D || // full-width
lexer->eof(lexer))) {
lexer->mark_end(lexer);
lexer->result_symbol = ASCII_CONTENT;
return has_content;
}
// Failed to match asciiend, mark the current position
lexer->mark_end(lexer);
}
} else {
lexer->advance(lexer, false);
lexer->mark_end(lexer);
has_content = true;
}
}
// If we reached EOF without finding asciiend, this is not valid ASCII content
return false;
}
if (scanner->queued_tokens_size > 0) { if (scanner->queued_tokens_size > 0) {
enum TokenType token = scanner->queued_tokens[0]; enum TokenType token = scanner->queued_tokens[0];
@@ -180,10 +206,17 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
found_end_of_line = true; found_end_of_line = true;
indent_length = 0; indent_length = 0;
lexer->advance(lexer, false); lexer->advance(lexer, false);
// After consuming \n, only consume whitespace on the SAME logical line
// Don't continue to next line
break;
} else if (lexer->lookahead == '\r') {
// Consume \r as part of line ending (for CRLF), don't skip it
lexer->advance(lexer, false);
// Continue to potentially consume \n that follows \r
} else if (lexer->lookahead == ' ') { } else if (lexer->lookahead == ' ') {
indent_length++; indent_length++;
lexer->advance(lexer, false); lexer->advance(lexer, false);
} else if (lexer->lookahead == '\r' || lexer->lookahead == '\f') { } else if (lexer->lookahead == '\f') {
indent_length = 0; indent_length = 0;
lexer->advance(lexer, false); lexer->advance(lexer, false);
} else if (lexer->lookahead == '\t') { } else if (lexer->lookahead == '\t') {
@@ -197,6 +230,55 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
} }
} }
// After breaking from newline, consume leading whitespace/indentation
if (found_end_of_line && !lexer->eof(lexer)) {
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
if (lexer->lookahead == ' ') {
indent_length++;
} else {
indent_length += 8;
}
lexer->advance(lexer, false);
}
// Skip comment-only lines when measuring indentation
while (lexer->lookahead == '/' && !lexer->eof(lexer)) {
lexer->mark_end(lexer);
lexer->advance(lexer, false);
// Check if this is a comment
if (lexer->lookahead == '/') {
// Skip the rest of the comment line
while (lexer->lookahead != '\n' && lexer->lookahead != '\r' && !lexer->eof(lexer)) {
lexer->advance(lexer, false);
}
// Skip newline
if (lexer->lookahead == '\r') {
lexer->advance(lexer, false);
}
if (lexer->lookahead == '\n') {
lexer->advance(lexer, false);
}
// Measure indentation of next line
indent_length = 0;
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
if (lexer->lookahead == ' ') {
indent_length++;
} else {
indent_length += 8;
}
lexer->advance(lexer, false);
}
} else {
// Not a comment, break
break;
}
}
}
if (found_end_of_line) { if (found_end_of_line) {