update

fix: improve parser - fix ASCII strings and comment indentation handling
- Remove 'asciiend' from ascii_string grammar rule (handled by scanner) - Add scanner logic to skip comment-only lines when measuring indentation - Update scanner to include 'asciiend' in ASCII_CONTENT token - Implement external scanner for BLOCK_COMMENT (partial fix) Results: Reduced parse errors from 156 to 119 (23% improvement)
2025-11-27 12:11:52 +01:00 · 2025-11-27 11:09:32 +01:00 · 2025-11-27 10:40:00 +01:00 · 2025-11-27 10:30:49 +01:00 · 2025-11-27 01:25:06 +01:00 · 2025-11-26 23:32:44 +01:00
7 changed files with 19468 additions and 16113 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -80,7 +80,7 @@ dependencies = [

 [[package]]
 name = "tree-sitter-stonescript"
-version = "0.0.1"
+version = "0.1.0"
 dependencies = [
 "cc",
 "tree-sitter",
--- a/corpus/test_ascii.txt
+++ b/corpus/test_ascii.txt
@@ -0,0 +1,22 @@
+==================
+ASCII Array
+==================
+var x = [ascii
+foo
+asciiend
+, ascii
+bar
+asciiend
+]
+------------------
+(source_file
+  (variable_declaration
+    (identifier)
+    (array
+      (array_elements
+        (ascii_string (ascii_content))
+        (ascii_string (ascii_content))
+      )
+    )
+  )
+)
--- a/grammar.js
+++ b/grammar.js
@@ -6,7 +6,7 @@ module.exports = grammar({

        _statement: $ => prec.right(seq(
            choice(
-                // Comments first
+                $._newline,
                $.comment,
                $.block_comment,
                // Keyword-based statements (must come before generic command)
@@ -16,7 +16,6 @@ module.exports = grammar({
                $.return_statement,          // 'return'
                $.break_statement,           // 'break'
                $.continue_statement,        // 'continue'
-                $.import_statement,          // 'import'
                // Control flow
                $.conditional,               // '?'
                $.else_clause,               // ':'
@@ -32,12 +31,6 @@ module.exports = grammar({
        // Comments
        comment: $ => token(seq('//', /.*/)),

-        block_comment: $ => token(seq(
-            '/*',
-            /[^*]*\*+(?:[^/*][^*]*\*+)*/,
-            '/'
-        )),
-
        // Variable declaration
        variable_declaration: $ => seq(
            'var',
@@ -81,7 +74,7 @@ module.exports = grammar({
        ),

        // Import
-        import_statement: $ => seq(
+        import_expression: $ => seq(
            'import',
            $.module_path
        ),
@@ -142,8 +135,7 @@ module.exports = grammar({

        print_command: $ => prec.right(seq(
            choice('>', '>o', '>h', '>`', '>c', '>f'),
-            optional($.print_args),
-            repeat($.print_continuation)
+            optional($.print_args)
        )),

        // Print specific helpers
@@ -152,7 +144,7 @@ module.exports = grammar({
        print_argument: $ => prec.left(repeat1(choice(
            $.interpolation,
            $.string,
-            // $.ascii_string,
+            $.ascii_string,
            $.color_code,
            $.print_text
        ))),
@@ -165,14 +157,6 @@ module.exports = grammar({
            '@'
        ),

-        print_continuation: $ => prec.right(seq(
-            '^',
-            repeat(choice(
-                /[^@\r\n]+/,
-                $.interpolation
-            ))
-        )),
-
        color_code: $ => /#[a-zA-Z0-9]+/,

        // Expressions
@@ -185,6 +169,7 @@ module.exports = grammar({
            $.string,
            $.boolean,
            $.null,
+            $.ascii_string,
            $.array,
            $.member_expression,
            $.call_expression,
@@ -195,7 +180,7 @@ module.exports = grammar({
            $.assignment_expression,
            $.parenthesized_expression,
            $.new_statement,
-            // $.ascii_string,
+            $.import_expression,
            $.color_code
        ),

@@ -218,9 +203,9 @@ module.exports = grammar({

        index_expression: $ => prec.left(13, seq(
            $._expression,
-            '[',
+            choice('[', '［'),
            $._expression,
-            ']'
+            choice(']', '］')
        )),

        unary_expression: $ => prec.right(12, seq(
@@ -230,19 +215,11 @@ module.exports = grammar({

        // Binary operators with proper precedence
        binary_expression: $ => choice(
-            prec.left(4, seq($._expression, '|', $._expression)),
-            prec.left(5, seq($._expression, '&', $._expression)),
-            prec.left(7, seq($._expression, '!', $._expression)),
-            prec.left(7, seq($._expression, '=', $._expression)),
-            prec.left(8, seq($._expression, '<', $._expression)),
-            prec.left(8, seq($._expression, '>', $._expression)),
-            prec.left(8, seq($._expression, '<=', $._expression)),
-            prec.left(8, seq($._expression, '>=', $._expression)),
-            prec.left(9, seq($._expression, '+', $._expression)),
-            prec.left(9, seq($._expression, '-', $._expression)),
-            prec.left(10, seq($._expression, '*', $._expression)),
-            prec.left(10, seq($._expression, '/', $._expression)),
-            prec.left(11, seq($._expression, '%', $._expression))
+            prec.left(6, seq($._expression, choice('*', '/', '%'), $._expression)),
+            prec.left(5, seq($._expression, choice('+', '-'), $._expression)),
+            prec.left(4, seq($._expression, choice('=', '!=', '!', '<', '>', '<=', '>='), $._expression)),
+            prec.left(3, seq($._expression, '&', $._expression)),
+            prec.left(2, seq($._expression, '|', $._expression))
        ),

        update_expression: $ => choice(
@@ -264,14 +241,22 @@ module.exports = grammar({

        // Arrays
        array: $ => seq(
-            '[',
-            optional($.array_elements),
-            ']'
+            choice('[', '［'),
+            repeat($._newline),
+            optional(seq(
+                $.array_elements,
+                repeat($._newline)
+            )),
+            choice(']', '］')
        ),

        array_elements: $ => seq(
            $._expression,
-            repeat(seq(',', $._expression)),
+            repeat(seq(
+                ',',
+                repeat($._newline),
+                $._expression
+            )),
            optional(',')
        ),

@@ -282,21 +267,21 @@ module.exports = grammar({

        float: $ => /\d+\.\d+/,

-        string: $ => seq('"', repeat(choice(/[^"\\]/, /\\./)), '"'),
+        string: $ => choice(
+            seq('"', repeat(choice(/[^"\\]/, /\\./)), '"'),
+            seq('＂', repeat(choice(/[^＂\\]/, /\\./)), '＂')
+        ),

        boolean: $ => choice('true', 'false'),

        null: $ => 'null',
-        // ascii_string: $ => seq(
-        //     'ascii',
-        //     $.ascii_content,
-        //     'asciiend'
-        // )
+
+        ascii_string: $ => seq('ascii', $.ascii_content)
    },

    extras: $ => [
        /[ \t\r\f]/,
-        /[\r\n]\^/,
+        /\r?\n[ \t]*\^/,
        $.comment,
        $.block_comment
    ],
@@ -304,7 +289,9 @@ module.exports = grammar({
    externals: $ => [
        $._newline,
        $._indent,
-        $._dedent
+        $._dedent,
+        $.ascii_content,
+        $.block_comment
    ],

    word: $ => $.identifier,
@@ -315,7 +302,9 @@ module.exports = grammar({
        [$.command],
        [$._statement, $._expression],  // new_statement can be both
        [$.binary_expression, $.assignment_expression],  // = operator ambiguity
-        [$.command, $._expression] // * operator ambiguity
+        [$.command, $._expression], // * operator ambiguity
+        [$.array_elements],
+        [$.ascii_string]
    ]
 });

--- a/src/grammar.json
+++ b/src/grammar.json
@@ -18,6 +18,10 @@
          {
            "type": "CHOICE",
            "members": [
+              {
+                "type": "SYMBOL",
+                "name": "_newline"
+              },
              {
                "type": "SYMBOL",
                "name": "comment"
@@ -50,10 +54,6 @@
                "type": "SYMBOL",
                "name": "continue_statement"
              },
-              {
-                "type": "SYMBOL",
-                "name": "import_statement"
-              },
              {
                "type": "SYMBOL",
                "name": "conditional"
@@ -111,26 +111,6 @@
        ]
      }
    },
-    "block_comment": {
-      "type": "TOKEN",
-      "content": {
-        "type": "SEQ",
-        "members": [
-          {
-            "type": "STRING",
-            "value": "/*"
-          },
-          {
-            "type": "PATTERN",
-            "value": "[^*]*\\*+(?:[^/*][^*]*\\*+)*"
-          },
-          {
-            "type": "STRING",
-            "value": "/"
-          }
-        ]
-      }
-    },
    "variable_declaration": {
      "type": "SEQ",
      "members": [
@@ -302,7 +282,7 @@
        }
      ]
    },
-    "import_statement": {
+    "import_expression": {
      "type": "SEQ",
      "members": [
        {
@@ -558,13 +538,6 @@
                "type": "BLANK"
              }
            ]
-          },
-          {
-            "type": "REPEAT",
-            "content": {
-              "type": "SYMBOL",
-              "name": "print_continuation"
-            }
          }
        ]
      }
@@ -610,6 +583,10 @@
              "type": "SYMBOL",
              "name": "string"
            },
+            {
+              "type": "SYMBOL",
+              "name": "ascii_string"
+            },
            {
              "type": "SYMBOL",
              "name": "color_code"
@@ -643,35 +620,6 @@
        }
      ]
    },
-    "print_continuation": {
-      "type": "PREC_RIGHT",
-      "value": 0,
-      "content": {
-        "type": "SEQ",
-        "members": [
-          {
-            "type": "STRING",
-            "value": "^"
-          },
-          {
-            "type": "REPEAT",
-            "content": {
-              "type": "CHOICE",
-              "members": [
-                {
-                  "type": "PATTERN",
-                  "value": "[^@\\r\\n]+"
-                },
-                {
-                  "type": "SYMBOL",
-                  "name": "interpolation"
-                }
-              ]
-            }
-          }
-        ]
-      }
-    },
    "color_code": {
      "type": "PATTERN",
      "value": "#[a-zA-Z0-9]+"
@@ -707,6 +655,10 @@
          "type": "SYMBOL",
          "name": "null"
        },
+        {
+          "type": "SYMBOL",
+          "name": "ascii_string"
+        },
        {
          "type": "SYMBOL",
          "name": "array"
@@ -747,6 +699,10 @@
          "type": "SYMBOL",
          "name": "new_statement"
        },
+        {
+          "type": "SYMBOL",
+          "name": "import_expression"
+        },
        {
          "type": "SYMBOL",
          "name": "color_code"
@@ -859,16 +815,34 @@
            "name": "_expression"
          },
          {
-            "type": "STRING",
-            "value": "["
+            "type": "CHOICE",
+            "members": [
+              {
+                "type": "STRING",
+                "value": "["
+              },
+              {
+                "type": "STRING",
+                "value": "［"
+              }
+            ]
          },
          {
            "type": "SYMBOL",
            "name": "_expression"
          },
          {
-            "type": "STRING",
-            "value": "]"
+            "type": "CHOICE",
+            "members": [
+              {
+                "type": "STRING",
+                "value": "]"
+              },
+              {
+                "type": "STRING",
+                "value": "］"
+              }
+            ]
          }
        ]
      }
@@ -904,7 +878,7 @@
      "members": [
        {
          "type": "PREC_LEFT",
-          "value": 4,
+          "value": 6,
          "content": {
            "type": "SEQ",
            "members": [
@@ -913,8 +887,21 @@
                "name": "_expression"
              },
              {
-                "type": "STRING",
-                "value": "|"
+                "type": "CHOICE",
+                "members": [
+                  {
+                    "type": "STRING",
+                    "value": "*"
+                  },
+                  {
+                    "type": "STRING",
+                    "value": "/"
+                  },
+                  {
+                    "type": "STRING",
+                    "value": "%"
+                  }
+                ]
              },
              {
                "type": "SYMBOL",
@@ -926,6 +913,86 @@
        {
          "type": "PREC_LEFT",
          "value": 5,
+          "content": {
+            "type": "SEQ",
+            "members": [
+              {
+                "type": "SYMBOL",
+                "name": "_expression"
+              },
+              {
+                "type": "CHOICE",
+                "members": [
+                  {
+                    "type": "STRING",
+                    "value": "+"
+                  },
+                  {
+                    "type": "STRING",
+                    "value": "-"
+                  }
+                ]
+              },
+              {
+                "type": "SYMBOL",
+                "name": "_expression"
+              }
+            ]
+          }
+        },
+        {
+          "type": "PREC_LEFT",
+          "value": 4,
+          "content": {
+            "type": "SEQ",
+            "members": [
+              {
+                "type": "SYMBOL",
+                "name": "_expression"
+              },
+              {
+                "type": "CHOICE",
+                "members": [
+                  {
+                    "type": "STRING",
+                    "value": "="
+                  },
+                  {
+                    "type": "STRING",
+                    "value": "!="
+                  },
+                  {
+                    "type": "STRING",
+                    "value": "!"
+                  },
+                  {
+                    "type": "STRING",
+                    "value": "<"
+                  },
+                  {
+                    "type": "STRING",
+                    "value": ">"
+                  },
+                  {
+                    "type": "STRING",
+                    "value": "<="
+                  },
+                  {
+                    "type": "STRING",
+                    "value": ">="
+                  }
+                ]
+              },
+              {
+                "type": "SYMBOL",
+                "name": "_expression"
+              }
+            ]
+          }
+        },
+        {
+          "type": "PREC_LEFT",
+          "value": 3,
          "content": {
            "type": "SEQ",
            "members": [
@@ -946,7 +1013,7 @@
        },
        {
          "type": "PREC_LEFT",
-          "value": 7,
+          "value": 2,
          "content": {
            "type": "SEQ",
            "members": [
@@ -956,217 +1023,7 @@
              },
              {
                "type": "STRING",
-                "value": "!"
-              },
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              }
-            ]
-          }
-        },
-        {
-          "type": "PREC_LEFT",
-          "value": 7,
-          "content": {
-            "type": "SEQ",
-            "members": [
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              },
-              {
-                "type": "STRING",
-                "value": "="
-              },
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              }
-            ]
-          }
-        },
-        {
-          "type": "PREC_LEFT",
-          "value": 8,
-          "content": {
-            "type": "SEQ",
-            "members": [
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              },
-              {
-                "type": "STRING",
-                "value": "<"
-              },
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              }
-            ]
-          }
-        },
-        {
-          "type": "PREC_LEFT",
-          "value": 8,
-          "content": {
-            "type": "SEQ",
-            "members": [
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              },
-              {
-                "type": "STRING",
-                "value": ">"
-              },
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              }
-            ]
-          }
-        },
-        {
-          "type": "PREC_LEFT",
-          "value": 8,
-          "content": {
-            "type": "SEQ",
-            "members": [
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              },
-              {
-                "type": "STRING",
-                "value": "<="
-              },
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              }
-            ]
-          }
-        },
-        {
-          "type": "PREC_LEFT",
-          "value": 8,
-          "content": {
-            "type": "SEQ",
-            "members": [
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              },
-              {
-                "type": "STRING",
-                "value": ">="
-              },
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              }
-            ]
-          }
-        },
-        {
-          "type": "PREC_LEFT",
-          "value": 9,
-          "content": {
-            "type": "SEQ",
-            "members": [
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              },
-              {
-                "type": "STRING",
-                "value": "+"
-              },
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              }
-            ]
-          }
-        },
-        {
-          "type": "PREC_LEFT",
-          "value": 9,
-          "content": {
-            "type": "SEQ",
-            "members": [
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              },
-              {
-                "type": "STRING",
-                "value": "-"
-              },
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              }
-            ]
-          }
-        },
-        {
-          "type": "PREC_LEFT",
-          "value": 10,
-          "content": {
-            "type": "SEQ",
-            "members": [
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              },
-              {
-                "type": "STRING",
-                "value": "*"
-              },
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              }
-            ]
-          }
-        },
-        {
-          "type": "PREC_LEFT",
-          "value": 10,
-          "content": {
-            "type": "SEQ",
-            "members": [
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              },
-              {
-                "type": "STRING",
-                "value": "/"
-              },
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              }
-            ]
-          }
-        },
-        {
-          "type": "PREC_LEFT",
-          "value": 11,
-          "content": {
-            "type": "SEQ",
-            "members": [
-              {
-                "type": "SYMBOL",
-                "name": "_expression"
-              },
-              {
-                "type": "STRING",
-                "value": "%"
+                "value": "|"
              },
              {
                "type": "SYMBOL",
@@ -1297,15 +1154,43 @@
      "type": "SEQ",
      "members": [
        {
-          "type": "STRING",
-          "value": "["
+          "type": "CHOICE",
+          "members": [
+            {
+              "type": "STRING",
+              "value": "["
+            },
+            {
+              "type": "STRING",
+              "value": "［"
+            }
+          ]
+        },
+        {
+          "type": "REPEAT",
+          "content": {
+            "type": "SYMBOL",
+            "name": "_newline"
+          }
        },
        {
          "type": "CHOICE",
          "members": [
            {
-              "type": "SYMBOL",
-              "name": "array_elements"
+              "type": "SEQ",
+              "members": [
+                {
+                  "type": "SYMBOL",
+                  "name": "array_elements"
+                },
+                {
+                  "type": "REPEAT",
+                  "content": {
+                    "type": "SYMBOL",
+                    "name": "_newline"
+                  }
+                }
+              ]
            },
            {
              "type": "BLANK"
@@ -1313,8 +1198,17 @@
          ]
        },
        {
-          "type": "STRING",
-          "value": "]"
+          "type": "CHOICE",
+          "members": [
+            {
+              "type": "STRING",
+              "value": "]"
+            },
+            {
+              "type": "STRING",
+              "value": "］"
+            }
+          ]
        }
      ]
    },
@@ -1334,6 +1228,13 @@
                "type": "STRING",
                "value": ","
              },
+              {
+                "type": "REPEAT",
+                "content": {
+                  "type": "SYMBOL",
+                  "name": "_newline"
+                }
+              },
              {
                "type": "SYMBOL",
                "name": "_expression"
@@ -1368,31 +1269,65 @@
      "value": "\\d+\\.\\d+"
    },
    "string": {
-      "type": "SEQ",
+      "type": "CHOICE",
      "members": [
        {
-          "type": "STRING",
-          "value": "\""
-        },
-        {
-          "type": "REPEAT",
-          "content": {
-            "type": "CHOICE",
-            "members": [
-              {
-                "type": "PATTERN",
-                "value": "[^\"\\\\]"
-              },
-              {
-                "type": "PATTERN",
-                "value": "\\\\."
+          "type": "SEQ",
+          "members": [
+            {
+              "type": "STRING",
+              "value": "\""
+            },
+            {
+              "type": "REPEAT",
+              "content": {
+                "type": "CHOICE",
+                "members": [
+                  {
+                    "type": "PATTERN",
+                    "value": "[^\"\\\\]"
+                  },
+                  {
+                    "type": "PATTERN",
+                    "value": "\\\\."
+                  }
+                ]
              }
-            ]
-          }
+            },
+            {
+              "type": "STRING",
+              "value": "\""
+            }
+          ]
        },
        {
-          "type": "STRING",
-          "value": "\""
+          "type": "SEQ",
+          "members": [
+            {
+              "type": "STRING",
+              "value": "＂"
+            },
+            {
+              "type": "REPEAT",
+              "content": {
+                "type": "CHOICE",
+                "members": [
+                  {
+                    "type": "PATTERN",
+                    "value": "[^＂\\\\]"
+                  },
+                  {
+                    "type": "PATTERN",
+                    "value": "\\\\."
+                  }
+                ]
+              }
+            },
+            {
+              "type": "STRING",
+              "value": "＂"
+            }
+          ]
        }
      ]
    },
@@ -1412,6 +1347,19 @@
    "null": {
      "type": "STRING",
      "value": "null"
+    },
+    "ascii_string": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "STRING",
+          "value": "ascii"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "ascii_content"
+        }
+      ]
    }
  },
  "extras": [
@@ -1421,7 +1369,7 @@
    },
    {
      "type": "PATTERN",
-      "value": "[\\r\\n]\\^"
+      "value": "\\r?\\n[ \\t]*\\^"
    },
    {
      "type": "SYMBOL",
@@ -1454,6 +1402,12 @@
    [
      "command",
      "_expression"
+    ],
+    [
+      "array_elements"
+    ],
+    [
+      "ascii_string"
    ]
  ],
  "precedences": [],
@@ -1469,6 +1423,14 @@
    {
      "type": "SYMBOL",
      "name": "_dedent"
+    },
+    {
+      "type": "SYMBOL",
+      "name": "ascii_content"
+    },
+    {
+      "type": "SYMBOL",
+      "name": "block_comment"
    }
  ],
  "inline": [],
--- a/src/node-types.json
+++ b/src/node-types.json
@@ -11,6 +11,10 @@
          "type": "array",
          "named": true
        },
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "assignment_expression",
          "named": true
@@ -43,6 +47,10 @@
          "type": "identifier",
          "named": true
        },
+        {
+          "type": "import_expression",
+          "named": true
+        },
        {
          "type": "index_expression",
          "named": true
@@ -109,6 +117,10 @@
          "type": "array",
          "named": true
        },
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "assignment_expression",
          "named": true
@@ -137,6 +149,10 @@
          "type": "identifier",
          "named": true
        },
+        {
+          "type": "import_expression",
+          "named": true
+        },
        {
          "type": "index_expression",
          "named": true
@@ -176,6 +192,21 @@
      ]
    }
  },
+  {
+    "type": "ascii_string",
+    "named": true,
+    "fields": {},
+    "children": {
+      "multiple": false,
+      "required": true,
+      "types": [
+        {
+          "type": "ascii_content",
+          "named": true
+        }
+      ]
+    }
+  },
  {
    "type": "assignment_expression",
    "named": true,
@@ -188,6 +219,10 @@
          "type": "array",
          "named": true
        },
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "assignment_expression",
          "named": true
@@ -216,6 +251,10 @@
          "type": "identifier",
          "named": true
        },
+        {
+          "type": "import_expression",
+          "named": true
+        },
        {
          "type": "index_expression",
          "named": true
@@ -267,6 +306,10 @@
          "type": "array",
          "named": true
        },
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "assignment_expression",
          "named": true
@@ -295,6 +338,10 @@
          "type": "identifier",
          "named": true
        },
+        {
+          "type": "import_expression",
+          "named": true
+        },
        {
          "type": "index_expression",
          "named": true
@@ -340,7 +387,7 @@
    "fields": {},
    "children": {
      "multiple": true,
-      "required": true,
+      "required": false,
      "types": [
        {
          "type": "block_comment",
@@ -382,10 +429,6 @@
          "type": "function_declaration",
          "named": true
        },
-        {
-          "type": "import_statement",
-          "named": true
-        },
        {
          "type": "print_command",
          "named": true
@@ -418,6 +461,10 @@
            "type": "array",
            "named": true
          },
+          {
+            "type": "ascii_string",
+            "named": true
+          },
          {
            "type": "assignment_expression",
            "named": true
@@ -446,6 +493,10 @@
            "type": "identifier",
            "named": true
          },
+          {
+            "type": "import_expression",
+            "named": true
+          },
          {
            "type": "index_expression",
            "named": true
@@ -544,6 +595,10 @@
          "type": "array",
          "named": true
        },
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "assignment_expression",
          "named": true
@@ -576,6 +631,10 @@
          "type": "identifier",
          "named": true
        },
+        {
+          "type": "import_expression",
+          "named": true
+        },
        {
          "type": "index_expression",
          "named": true
@@ -627,6 +686,10 @@
          "type": "array",
          "named": true
        },
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "assignment_expression",
          "named": true
@@ -659,6 +722,10 @@
          "type": "identifier",
          "named": true
        },
+        {
+          "type": "import_expression",
+          "named": true
+        },
        {
          "type": "index_expression",
          "named": true
@@ -725,6 +792,10 @@
          "type": "array",
          "named": true
        },
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "assignment_expression",
          "named": true
@@ -753,6 +824,10 @@
          "type": "identifier",
          "named": true
        },
+        {
+          "type": "import_expression",
+          "named": true
+        },
        {
          "type": "index_expression",
          "named": true
@@ -804,6 +879,10 @@
          "type": "array",
          "named": true
        },
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "assignment_expression",
          "named": true
@@ -836,6 +915,10 @@
          "type": "identifier",
          "named": true
        },
+        {
+          "type": "import_expression",
+          "named": true
+        },
        {
          "type": "index_expression",
          "named": true
@@ -906,7 +989,7 @@
    }
  },
  {
-    "type": "import_statement",
+    "type": "import_expression",
    "named": true,
    "fields": {},
    "children": {
@@ -932,6 +1015,10 @@
          "type": "array",
          "named": true
        },
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "assignment_expression",
          "named": true
@@ -960,6 +1047,10 @@
          "type": "identifier",
          "named": true
        },
+        {
+          "type": "import_expression",
+          "named": true
+        },
        {
          "type": "index_expression",
          "named": true
@@ -1011,6 +1102,10 @@
          "type": "array",
          "named": true
        },
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "assignment_expression",
          "named": true
@@ -1039,6 +1134,10 @@
          "type": "identifier",
          "named": true
        },
+        {
+          "type": "import_expression",
+          "named": true
+        },
        {
          "type": "index_expression",
          "named": true
@@ -1090,6 +1189,10 @@
            "type": "array",
            "named": true
          },
+          {
+            "type": "ascii_string",
+            "named": true
+          },
          {
            "type": "assignment_expression",
            "named": true
@@ -1118,6 +1221,10 @@
            "type": "identifier",
            "named": true
          },
+          {
+            "type": "import_expression",
+            "named": true
+          },
          {
            "type": "index_expression",
            "named": true
@@ -1210,6 +1317,10 @@
          "type": "array",
          "named": true
        },
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "assignment_expression",
          "named": true
@@ -1238,6 +1349,10 @@
          "type": "identifier",
          "named": true
        },
+        {
+          "type": "import_expression",
+          "named": true
+        },
        {
          "type": "index_expression",
          "named": true
@@ -1300,6 +1415,10 @@
      "multiple": true,
      "required": true,
      "types": [
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "color_code",
          "named": true
@@ -1324,31 +1443,12 @@
    "named": true,
    "fields": {},
    "children": {
-      "multiple": true,
+      "multiple": false,
      "required": false,
      "types": [
        {
          "type": "print_args",
          "named": true
-        },
-        {
-          "type": "print_continuation",
-          "named": true
-        }
-      ]
-    }
-  },
-  {
-    "type": "print_continuation",
-    "named": true,
-    "fields": {},
-    "children": {
-      "multiple": true,
-      "required": false,
-      "types": [
-        {
-          "type": "interpolation",
-          "named": true
        }
      ]
    }
@@ -1365,6 +1465,10 @@
          "type": "array",
          "named": true
        },
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "assignment_expression",
          "named": true
@@ -1393,6 +1497,10 @@
          "type": "identifier",
          "named": true
        },
+        {
+          "type": "import_expression",
+          "named": true
+        },
        {
          "type": "index_expression",
          "named": true
@@ -1480,10 +1588,6 @@
          "type": "function_declaration",
          "named": true
        },
-        {
-          "type": "import_statement",
-          "named": true
-        },
        {
          "type": "print_command",
          "named": true
@@ -1531,6 +1635,10 @@
          "type": "array",
          "named": true
        },
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "assignment_expression",
          "named": true
@@ -1559,6 +1667,10 @@
          "type": "identifier",
          "named": true
        },
+        {
+          "type": "import_expression",
+          "named": true
+        },
        {
          "type": "index_expression",
          "named": true
@@ -1610,6 +1722,10 @@
          "type": "array",
          "named": true
        },
+        {
+          "type": "ascii_string",
+          "named": true
+        },
        {
          "type": "assignment_expression",
          "named": true
@@ -1638,6 +1754,10 @@
          "type": "identifier",
          "named": true
        },
+        {
+          "type": "import_expression",
+          "named": true
+        },
        {
          "type": "index_expression",
          "named": true
@@ -1699,6 +1819,10 @@
            "type": "array",
            "named": true
          },
+          {
+            "type": "ascii_string",
+            "named": true
+          },
          {
            "type": "assignment_expression",
            "named": true
@@ -1727,6 +1851,10 @@
            "type": "identifier",
            "named": true
          },
+          {
+            "type": "import_expression",
+            "named": true
+          },
          {
            "type": "index_expression",
            "named": true
@@ -1771,6 +1899,10 @@
    "type": "!",
    "named": false
  },
+  {
+    "type": "!=",
+    "named": false
+  },
  {
    "type": "\"",
    "named": false
@@ -1908,9 +2040,13 @@
    "named": false
  },
  {
-    "type": "^",
+    "type": "ascii",
    "named": false
  },
+  {
+    "type": "ascii_content",
+    "named": true
+  },
  {
    "type": "block_comment",
    "named": true
@@ -1990,5 +2126,17 @@
  {
    "type": "|",
    "named": false
+  },
+  {
+    "type": "＂",
+    "named": false
+  },
+  {
+    "type": "［",
+    "named": false
+  },
+  {
+    "type": "］",
+    "named": false
  }
 ]
--- a/src/parser.c
+++ b/src/parser.c
--- a/src/scanner.c
+++ b/src/scanner.c
@@ -6,7 +6,8 @@ enum TokenType {
  NEWLINE,
  INDENT,
  DEDENT,
-//   ASCII_CONTENT,
+  ASCII_CONTENT,
+  BLOCK_COMMENT,
 };

 // ... (skipping to logic)
@@ -101,64 +102,89 @@ void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const c
 bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
  Scanner *scanner = (Scanner *)payload;

-//   if (valid_symbols[ASCII_CONTENT]) {
-//     bool has_content = false;
-//     
-//     for (;;) {
-//       if (lexer->eof(lexer)) {
-//         break;
-//       }
-//       
-//       // Check if we're at the start of a line with 'asciiend'
-//       if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
-//         lexer->advance(lexer, false);
-//         if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
-//           lexer->advance(lexer, false);
-//         }
-//         lexer->mark_end(lexer);
-//         has_content = true;
-//         
-//         // Skip whitespace at the start of the line
-//         while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
-//           lexer->advance(lexer, false);
-//         }
-//         
-//         // Check if this line starts with 'asciiend'
-//         if (lexer->lookahead == 'a') {
-//           const char *keyword = "asciiend";
-//           bool match = true;
-//           
-//           for (int k = 0; k < 8; k++) {
-//             if (lexer->lookahead == keyword[k]) {
-//               lexer->advance(lexer, false);
-//             } else {
-//               match = false;
-//               break;
-//             }
-//           }
-//           
-//           // Check that asciiend is followed by whitespace or EOL
-//           if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' || 
-//                        lexer->lookahead == ' ' || lexer->lookahead == '\t' || 
-//                        lexer->lookahead == ',' || 
-//                        lexer->eof(lexer))) {
-//             lexer->result_symbol = ASCII_CONTENT;
-//             return has_content;
-//           }
-//           
-//           // Failed to match asciiend, mark the current position
-//           lexer->mark_end(lexer);
-//         }
-//       } else {
-//         lexer->advance(lexer, false);
-//         lexer->mark_end(lexer);
-//         has_content = true;
-//       }
-//     }
-//     
-//     lexer->result_symbol = ASCII_CONTENT;
-//     return has_content;
-//   }
+  // Try to handle block comments when parser expects them
+  // Only check if valid_symbols allows BLOCK_COMMENT
+  if (valid_symbols[BLOCK_COMMENT] && lexer->lookahead == '/') {
+    lexer->advance(lexer, false);
+    if (lexer->lookahead == '*') {
+      lexer->advance(lexer, false);
+      
+      // Consume everything until */
+      while (!lexer->eof(lexer)) {
+        if (lexer->lookahead == '*') {
+          lexer->advance(lexer, false);
+          if (lexer->lookahead == '/') {
+            lexer->advance(lexer, false);
+            lexer->mark_end(lexer);
+            lexer->result_symbol = BLOCK_COMMENT;
+            return true;
+          }
+        } else {
+          lexer->advance(lexer, false);
+        }
+      }
+      // Reached EOF without closing */
+      return false;
+    }
+  }
+
+  if (valid_symbols[ASCII_CONTENT]) {
+    bool has_content = false;
+    
+    for (;;) {
+      if (lexer->eof(lexer)) {
+        break;
+      }
+      
+      // Check if we're at the start of a line with 'asciiend'
+      if (lexer->lookahead == '\n') {
+        lexer->advance(lexer, false);
+        lexer->mark_end(lexer);
+        has_content = true;
+        
+        // Skip whitespace at the start of the line
+        while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+          lexer->advance(lexer, false);
+        }
+        
+        // Check if this line starts with 'asciiend'
+        if (lexer->lookahead == 'a') {
+          const char *keyword = "asciiend";
+          bool match = true;
+          
+          for (int k = 0; k < 8; k++) {
+            if (lexer->lookahead == keyword[k]) {
+              lexer->advance(lexer, false);
+            } else {
+              match = false;
+              break;
+            }
+          }
+          
+          // Check that asciiend is followed by whitespace or EOL or closing delimiters
+          if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' || 
+                       lexer->lookahead == ' ' || lexer->lookahead == '\t' || 
+                       lexer->lookahead == ',' || lexer->lookahead == ')' ||
+                       lexer->lookahead == ']' || lexer->lookahead == 0xFF3D ||  // ］ full-width
+                       lexer->eof(lexer))) {
+            lexer->mark_end(lexer);
+            lexer->result_symbol = ASCII_CONTENT;
+            return has_content;
+          }
+          
+          // Failed to match asciiend, mark the current position
+          lexer->mark_end(lexer);
+        }
+      } else {
+        lexer->advance(lexer, false);
+        lexer->mark_end(lexer);
+        has_content = true;
+      }
+    }
+    
+    // If we reached EOF without finding asciiend, this is not valid ASCII content
+    return false;
+  }

  if (scanner->queued_tokens_size > 0) {
    enum TokenType token = scanner->queued_tokens[0];
@@ -180,10 +206,17 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
      found_end_of_line = true;
      indent_length = 0;
      lexer->advance(lexer, false);
+      // After consuming \n, only consume whitespace on the SAME logical line
+      // Don't continue to next line
+      break;
+    } else if (lexer->lookahead == '\r') {
+      // Consume \r as part of line ending (for CRLF), don't skip it
+      lexer->advance(lexer, false);
+      // Continue to potentially consume \n that follows \r
    } else if (lexer->lookahead == ' ') {
      indent_length++;
      lexer->advance(lexer, false);
-    } else if (lexer->lookahead == '\r' || lexer->lookahead == '\f') {
+    } else if (lexer->lookahead == '\f') {
      indent_length = 0;
      lexer->advance(lexer, false);
    } else if (lexer->lookahead == '\t') {
@@ -196,6 +229,55 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
      break;
    }
  }
+  
+  // After breaking from newline, consume leading whitespace/indentation
+  if (found_end_of_line && !lexer->eof(lexer)) {
+    while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+      if (lexer->lookahead == ' ') {
+        indent_length++;
+      } else {
+        indent_length += 8;
+      }
+      lexer->advance(lexer, false);
+    }
+    
+    // Skip comment-only lines when measuring indentation
+    while (lexer->lookahead == '/' && !lexer->eof(lexer)) {
+      lexer->mark_end(lexer);
+      lexer->advance(lexer, false);
+      
+      // Check if this is a comment
+      if (lexer->lookahead == '/') {
+        // Skip the rest of the comment line
+        while (lexer->lookahead != '\n' && lexer->lookahead != '\r' && !lexer->eof(lexer)) {
+          lexer->advance(lexer, false);
+        }
+        
+        // Skip newline
+        if (lexer->lookahead == '\r') {
+          lexer->advance(lexer, false);
+        }
+        if (lexer->lookahead == '\n') {
+          lexer->advance(lexer, false);
+        }
+        
+        // Measure indentation of next line
+        indent_length = 0;
+        while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+          if (lexer->lookahead == ' ') {
+            indent_length++;
+          } else {
+            indent_length += 8;
+          }
+          lexer->advance(lexer, false);
+        }
+      } else {
+        // Not a comment, break
+        break;
+      }
+    }
+  }
+
Author	SHA1	Message	Date
Bulat Kurbanov	3b8318940a	update	2025-11-27 12:11:52 +01:00
Bulat Kurbanov	36d6c3947a	fix: improve parser - fix ASCII strings and comment indentation handling - Remove 'asciiend' from ascii_string grammar rule (handled by scanner) - Add scanner logic to skip comment-only lines when measuring indentation - Update scanner to include 'asciiend' in ASCII_CONTENT token - Implement external scanner for BLOCK_COMMENT (partial fix) Results: Reduced parse errors from 156 to 119 (23% improvement)	2025-11-27 11:09:32 +01:00
Bulat Kurbanov	06e6e3b098	Add full-width closing bracket support after asciiend Allow ］ (U+FF3D) after asciiend in ASCII content scanner. This enables ASCII strings inside arrays with full-width brackets: var x = ［ascii...asciiend］ The parser now correctly recognizes this as an array containing an ASCII string, not as a syntax error. Fixes 1 additional parsing error.	2025-11-27 10:40:00 +01:00
Bulat Kurbanov	9a1dcb941d	Allow ) and ] after asciiend in ASCII blocks ASCII blocks can now be used as function arguments and array elements. The scanner now accepts ) and ] as valid characters after 'asciiend', allowing constructs like: - var x = uiAA(ascii...asciiend) - var y = [ascii...asciiend] Fixes 12 parsing errors in test scripts.	2025-11-27 10:30:49 +01:00
Bulat Kurbanov	eaf0963459	Fix CRLF handling in external scanner - Consume \r as part of token instead of skipping it - Break after consuming \n to avoid processing multiple lines - Consume leading whitespace separately for indent calculation - Fix ASCII_CONTENT to return false at EOF without asciiend This fixes ERROR tokens with CRLF line endings, especially with trailing blank lines.	2025-11-27 01:25:06 +01:00
Bulat Kurbanov	0b78c43138	feat: major grammar improvements - Enable ASCII blocks in print commands - Add import as expression (not just statement) - Fix operator precedence (& and \| now lower than comparisons) - Allow comments and newlines as top-level statements - Fix source_file to handle leading comments and empty lines Progress: 253 → 98 errors (155 files fixed, 55% success)	2025-11-26 23:32:44 +01:00
Bulat Kurbanov	99dadd9ca7	feat: add support for fullwidth Unicode, multiline arrays, and ASCII blocks - Add fullwidth brackets ［］ (U+FF3B, U+FF3D) support - Add fullwidth quotes ＂ (U+FF02) support - Fix multiline arrays with newlines between elements - Fix line continuation with CRLF (^) - Enable ASCII block syntax (ascii...asciiend and ［ascii...asciiend］) - Update conflicts to resolve ambiguities Fixed 51 parsing errors (253 -> 202 errors)	2025-11-26 23:04:03 +01:00