update

fix: improve parser - fix ASCII strings and comment indentation handling
- Remove 'asciiend' from ascii_string grammar rule (handled by scanner) - Add scanner logic to skip comment-only lines when measuring indentation - Update scanner to include 'asciiend' in ASCII_CONTENT token - Implement external scanner for BLOCK_COMMENT (partial fix) Results: Reduced parse errors from 156 to 119 (23% improvement)
2025-11-27 12:11:52 +01:00 · 2025-11-27 11:09:32 +01:00 · 2025-11-27 10:40:00 +01:00 · 2025-11-27 10:30:49 +01:00 · 2025-11-27 01:25:06 +01:00 · 2025-11-26 23:32:44 +01:00
13 changed files with 17677 additions and 17694 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -80,7 +80,7 @@ dependencies = [

 [[package]]
 name = "tree-sitter-stonescript"
-version = "0.0.1"
+version = "0.1.0"
 dependencies = [
 "cc",
 "tree-sitter",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tree-sitter-stonescript"
 description = "stonescript grammar for the tree-sitter parsing library"
-version = "0.0.1"
+version = "0.1.0"
 keywords = ["incremental", "parsing", "stonescript"]
 categories = ["parsing", "text-editors"]
 repository = "https://github.com/tree-sitter/tree-sitter-stonescript"
--- a/corpus/basic.txt
+++ b/corpus/basic.txt
@@ -1,36 +0,0 @@
-==================
-Basic Command
-==================
-var x = 1
------------------
-(source_file
-  (command
-    (identifier)
-    (binary_expression
-      (identifier)
-      (number)
-    )
-  )
-)
-
-==================
-Conditional
-==================
-? x > 0
-  print "Hello"
-
------------------
-(source_file
-  (conditional
-    (binary_expression
-      (identifier)
-      (number)
-    )
-    (block
-      (command
-        (identifier)
-        (string)
-      )
-    )
-  )
-)
--- a/corpus/debug.txt
+++ b/corpus/debug.txt
@@ -0,0 +1,13 @@
+Debug EOF
+=========
+?hp < 10
+  activate potion
+---
+(source_file
+  (conditional
+    (binary_expression (identifier) (number))
+    (block
+      (command (identifier) (identifier))
+    )
+  )
+)
--- a/corpus/stonescript.txt
+++ b/corpus/stonescript.txt
@@ -145,9 +145,9 @@ equipL poison wand
 equipR vigor shield *7 +5
 ------------------
 (source_file
-  (command (identifier))
  (command (identifier) (identifier))
-  (command (identifier) (identifier) (identifier) (number) (number))
+  (command (identifier) (identifier) (identifier))
+  (command (identifier) (identifier) (identifier) (star_level (number)) (enchantment_level (number)))
 )

 ==================
@@ -158,9 +158,9 @@ activate R
 loadout 1
 ------------------
 (source_file
-  (command (identifier))
-  (command (identifier))
-  (command (number))
+  (command (identifier) (identifier))
+  (command (identifier) (identifier))
+  (command (identifier) (number))
 )

 ==================
@@ -299,39 +299,39 @@ Real Example from Manual
 (source_file
  (conditional
    (binary_expression (identifier) (identifier))
-    (block (command (identifier)))
+    (block (command (identifier) (identifier)))
  )
  (conditional
    (binary_expression (identifier) (identifier))
    (block
-      (command (number))
-      (conditional
-        (binary_expression (identifier) (identifier))
-        (block
-          (command (identifier))
-          (command (identifier) (identifier) (number))
-        )
-      )
-    )
-  )
+      (command (identifier) (number))
      (conditional
        (binary_expression (identifier) (identifier))
        (block
          (command (identifier) (identifier))
-      (command (identifier) (identifier))
+          (command (identifier) (identifier) (star_level (number)))
+        )
+      )
+    )
+  )
+  (conditional
+    (binary_expression (identifier) (identifier))
+    (block
+      (command (identifier) (identifier) (identifier))
+      (command (identifier) (identifier) (identifier))
      (conditional
        (binary_expression
          (member_expression (identifier) (identifier))
          (number)
        )
        (block
-          (command (identifier) (identifier) (identifier) (number))
+          (command (identifier) (identifier) (identifier) (enchantment_level (number)))
        )
      )
    )
  )
  (conditional
    (binary_expression (identifier) (number))
-    (block (command (identifier)))
+    (block (command (identifier) (identifier)))
  )
 )
--- a/corpus/test_ascii.txt
+++ b/corpus/test_ascii.txt
@@ -0,0 +1,22 @@
+==================
+ASCII Array
+==================
+var x = [ascii
+foo
+asciiend
+, ascii
+bar
+asciiend
+]
+------------------
+(source_file
+  (variable_declaration
+    (identifier)
+    (array
+      (array_elements
+        (ascii_string (ascii_content))
+        (ascii_string (ascii_content))
+      )
+    )
+  )
+)
--- a/grammar.js
+++ b/grammar.js
@@ -4,8 +4,9 @@ module.exports = grammar({
    rules: {
        source_file: $ => repeat($._statement),

-        _statement: $ => choice(
-            // Comments first
+        _statement: $ => prec.right(seq(
+            choice(
+                $._newline,
                $.comment,
                $.block_comment,
                // Keyword-based statements (must come before generic command)
@@ -15,27 +16,21 @@ module.exports = grammar({
                $.return_statement,          // 'return'
                $.break_statement,           // 'break'
                $.continue_statement,        // 'continue'
-            $.import_statement,          // 'import'
-            $.new_expression,            // 'new'
                // Control flow
                $.conditional,               // '?'
-            $.else_if_clause,            // ':?'
                $.else_clause,               // ':'
-            // Commands (after keywords!)
-            $.command_statement,
+                // Commands (higher precedence!)
+                prec.dynamic(1, $.command),
+                $.print_command,
                // Fallback
                $.expression_statement
            ),
+            optional($._newline)
+        )),

        // Comments
        comment: $ => token(seq('//', /.*/)),

-        block_comment: $ => token(seq(
-            '/*',
-            /[^*]*\*+(?:[^/*][^*]*\*+)*/,
-            '/'
-        )),
-
        // Variable declaration
        variable_declaration: $ => seq(
            'var',
@@ -50,13 +45,7 @@ module.exports = grammar({
            '(',
            optional($.parameter_list),
            ')',
-            optional($.function_body)
-        ),
-
-        function_body: $ => seq(
-            $._indent,
-            repeat1($._statement),
-            $._dedent
+            $.block
        ),

        parameter_list: $ => seq(
@@ -73,29 +62,29 @@ module.exports = grammar({
                $._expression,
                '..',
                $._expression,
-                optional($.block)
+                $.block
            ),
            seq(
                'for',
                $.identifier,
                ':',
                $._expression,
-                optional($.block)
+                $.block
            )
        ),

        // Import
-        import_statement: $ => seq(
+        import_expression: $ => seq(
            'import',
            $.module_path
        ),

-        new_expression: $ => seq(
+        new_statement: $ => seq(
            'new',
            $.module_path
        ),

-        module_path: $ => /[a-zA-Z_][a-zA-Z0-9_\/]*/,
+        module_path: $ => /[a-zA-Z_][a-zA-Z0-9_\\/]*/,

        // Control flow
        return_statement: $ => prec.right(seq(
@@ -111,18 +100,12 @@ module.exports = grammar({
        conditional: $ => seq(
            '?',
            $._expression,
-            optional($.block)
+            $.block
        ),

-        else_if_clause: $ => seq(
-            ':?',
-            $._expression,
-            optional($.block)
-        ),
-
-        else_clause: $ => seq(
-            ':',
-            optional($.block)
+        else_clause: $ => choice(
+            seq(':?', $._expression, $.block),
+            seq(':', $.block)
        ),

        block: $ => seq(
@@ -131,76 +114,49 @@ module.exports = grammar({
            $._dedent
        ),

-        // Commands - specific patterns
-        command_statement: $ => choice(
-            $.equip_command,
-            $.activate_command,
-            $.loadout_command,
-            $.brew_command,
-            $.disable_enable_command,
-            $.play_command,
-            $.print_command
-        ),
-
-        equip_command: $ => prec.left(seq(
-            choice('equip', 'equipL', 'equipR'),
-            repeat1($.item_criteria)
-        )),
-
-        item_criteria: $ => prec.left(choice(
+        // Commands - Generic structure to match tests
+        // Must have at least one argument to distinguish from simple identifier expressions  
+        command: $ => prec.dynamic(1, prec.right(seq(
            $.identifier,
+            repeat1($._command_arg)
+        ))),
+
+        _command_arg: $ => choice(
+            $.identifier,
+            $.number,
+            $.string,
            $.star_level,
            $.enchantment_level
-        )),
+        ),

        star_level: $ => seq('*', $.number),

        enchantment_level: $ => seq('+', $.number),

-        activate_command: $ => seq(
-            'activate',
-            choice(
-                $.identifier,
-                'P', 'L', 'R'
-            )
-        ),
-
-        loadout_command: $ => seq(
-            'loadout',
-            $.number
-        ),
-
-        brew_command: $ => seq(
-            'brew',
-            $.identifier,
-            repeat(seq('+', $.identifier))
-        ),
-
-        disable_enable_command: $ => prec.left(seq(
-            choice('disable', 'enable'),
-            choice(
-                'abilities', 'hud', 'banner',
-                'loadout', 'npcDialog', 'pause', 'player'
-            )
-        )),
-
-        play_command: $ => prec.left(seq(
-            'play',
-            $.identifier,
-            optional($.number)
-        )),
-
        print_command: $ => prec.right(seq(
            choice('>', '>o', '>h', '>`', '>c', '>f'),
-            repeat(choice(
-                $.identifier,
-                $.string,
-                $.number,
-                $.color_code,
-                ','
-            ))
+            optional($.print_args)
        )),

+        // Print specific helpers
+        print_args: $ => sep1(',', $.print_argument),
+
+        print_argument: $ => prec.left(repeat1(choice(
+            $.interpolation,
+            $.string,
+            $.ascii_string,
+            $.color_code,
+            $.print_text
+        ))),
+
+        print_text: $ => /[^,@\r\n"]+/,
+
+        interpolation: $ => seq(
+            '@',
+            $._expression,
+            '@'
+        ),
+
        color_code: $ => /#[a-zA-Z0-9]+/,

        // Expressions
@@ -213,6 +169,7 @@ module.exports = grammar({
            $.string,
            $.boolean,
            $.null,
+            $.ascii_string,
            $.array,
            $.member_expression,
            $.call_expression,
@@ -222,7 +179,9 @@ module.exports = grammar({
            $.update_expression,
            $.assignment_expression,
            $.parenthesized_expression,
-            $.new_expression
+            $.new_statement,
+            $.import_expression,
+            $.color_code
        ),

        member_expression: $ => prec.left(15, seq(
@@ -244,9 +203,9 @@ module.exports = grammar({

        index_expression: $ => prec.left(13, seq(
            $._expression,
-            '[',
+            choice('[', '［'),
            $._expression,
-            ']'
+            choice(']', '］')
        )),

        unary_expression: $ => prec.right(12, seq(
@@ -256,19 +215,11 @@ module.exports = grammar({

        // Binary operators with proper precedence
        binary_expression: $ => choice(
-            prec.left(4, seq($._expression, '|', $._expression)),
-            prec.left(5, seq($._expression, '&', $._expression)),
-            prec.left(7, seq($._expression, '!', $._expression)),
-            prec.left(7, seq($._expression, '=', $._expression)),
-            prec.left(8, seq($._expression, '<', $._expression)),
-            prec.left(8, seq($._expression, '>', $._expression)),
-            prec.left(8, seq($._expression, '<=', $._expression)),
-            prec.left(8, seq($._expression, '>=', $._expression)),
-            prec.left(9, seq($._expression, '+', $._expression)),
-            prec.left(9, seq($._expression, '-', $._expression)),
-            prec.left(10, seq($._expression, '*', $._expression)),
-            prec.left(10, seq($._expression, '/', $._expression)),
-            prec.left(11, seq($._expression, '%', $._expression))
+            prec.left(6, seq($._expression, choice('*', '/', '%'), $._expression)),
+            prec.left(5, seq($._expression, choice('+', '-'), $._expression)),
+            prec.left(4, seq($._expression, choice('=', '!=', '!', '<', '>', '<=', '>='), $._expression)),
+            prec.left(3, seq($._expression, '&', $._expression)),
+            prec.left(2, seq($._expression, '|', $._expression))
        ),

        update_expression: $ => choice(
@@ -290,10 +241,23 @@ module.exports = grammar({

        // Arrays
        array: $ => seq(
-            '[',
-            optional(sep1($.comma_sep, $._expression)),
-            optional(','),
-            ']'
+            choice('[', '［'),
+            repeat($._newline),
+            optional(seq(
+                $.array_elements,
+                repeat($._newline)
+            )),
+            choice(']', '］')
+        ),
+
+        array_elements: $ => seq(
+            $._expression,
+            repeat(seq(
+                ',',
+                repeat($._newline),
+                $._expression
+            )),
+            optional(',')
        ),

        // Primitives
@@ -303,21 +267,31 @@ module.exports = grammar({

        float: $ => /\d+\.\d+/,

-        string: $ => seq('"', repeat(choice(/[^"\\]/, /\\./)), '"'),
+        string: $ => choice(
+            seq('"', repeat(choice(/[^"\\]/, /\\./)), '"'),
+            seq('＂', repeat(choice(/[^＂\\]/, /\\./)), '＂')
+        ),

        boolean: $ => choice('true', 'false'),

-        null: $ => 'null'
+        null: $ => 'null',
+
+        ascii_string: $ => seq('ascii', $.ascii_content)
    },

    extras: $ => [
-        /\s/
+        /[ \t\r\f]/,
+        /\r?\n[ \t]*\^/,
+        $.comment,
+        $.block_comment
    ],

    externals: $ => [
        $._newline,
        $._indent,
-        $._dedent
+        $._dedent,
+        $.ascii_content,
+        $.block_comment
    ],

    word: $ => $.identifier,
@@ -325,10 +299,12 @@ module.exports = grammar({
    conflicts: $ => [
        [$.identifier, $.string],
        [$._expression],
-        [$.command_statement],
-        [$._statement, $._expression],  // new_expression can be both
-        [$.equip_command],  // handle repeat ambiguity
-        [$.binary_expression, $.assignment_expression]  // = operator ambiguity
+        [$.command],
+        [$._statement, $._expression],  // new_statement can be both
+        [$.binary_expression, $.assignment_expression],  // = operator ambiguity
+        [$.command, $._expression], // * operator ambiguity
+        [$.array_elements],
+        [$.ascii_string]
    ]
 });

--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
    "name": "tree-sitter-stonescript",
-    "version": "0.0.1",
+    "version": "0.1.0",
    "description": "StoneScript grammar for tree-sitter",
    "main": "bindings/node",
    "types": "bindings/node",
--- a/src/grammar.json
+++ b/src/grammar.json
--- a/src/node-types.json
+++ b/src/node-types.json
--- a/src/parser.c
+++ b/src/parser.c
--- a/src/scanner.c
+++ b/src/scanner.c
@@ -6,8 +6,14 @@ enum TokenType {
  NEWLINE,
  INDENT,
  DEDENT,
+  ASCII_CONTENT,
+  BLOCK_COMMENT,
 };

+// ... (skipping to logic)
+
+
+
 typedef struct {
  uint16_t *indent_stack;
  size_t indent_stack_size;
@@ -63,10 +69,15 @@ unsigned tree_sitter_stonescript_external_scanner_serialize(void *payload, char

 void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
  Scanner *scanner = (Scanner *)payload;
+  size_t size = 0;
+
  scanner->indent_stack_size = 1;
  scanner->indent_stack[0] = 0;
  scanner->queued_tokens_size = 0;

+  if (length < sizeof(uint32_t)) return;
+  uint32_t indent_stack_size = 0;
+
  if (length == 0) return;

  size_t i = 0;
@@ -91,6 +102,90 @@ void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const c
 bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
  Scanner *scanner = (Scanner *)payload;

+  // Try to handle block comments when parser expects them
+  // Only check if valid_symbols allows BLOCK_COMMENT
+  if (valid_symbols[BLOCK_COMMENT] && lexer->lookahead == '/') {
+    lexer->advance(lexer, false);
+    if (lexer->lookahead == '*') {
+      lexer->advance(lexer, false);
+      
+      // Consume everything until */
+      while (!lexer->eof(lexer)) {
+        if (lexer->lookahead == '*') {
+          lexer->advance(lexer, false);
+          if (lexer->lookahead == '/') {
+            lexer->advance(lexer, false);
+            lexer->mark_end(lexer);
+            lexer->result_symbol = BLOCK_COMMENT;
+            return true;
+          }
+        } else {
+          lexer->advance(lexer, false);
+        }
+      }
+      // Reached EOF without closing */
+      return false;
+    }
+  }
+
+  if (valid_symbols[ASCII_CONTENT]) {
+    bool has_content = false;
+    
+    for (;;) {
+      if (lexer->eof(lexer)) {
+        break;
+      }
+      
+      // Check if we're at the start of a line with 'asciiend'
+      if (lexer->lookahead == '\n') {
+        lexer->advance(lexer, false);
+        lexer->mark_end(lexer);
+        has_content = true;
+        
+        // Skip whitespace at the start of the line
+        while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+          lexer->advance(lexer, false);
+        }
+        
+        // Check if this line starts with 'asciiend'
+        if (lexer->lookahead == 'a') {
+          const char *keyword = "asciiend";
+          bool match = true;
+          
+          for (int k = 0; k < 8; k++) {
+            if (lexer->lookahead == keyword[k]) {
+              lexer->advance(lexer, false);
+            } else {
+              match = false;
+              break;
+            }
+          }
+          
+          // Check that asciiend is followed by whitespace or EOL or closing delimiters
+          if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' || 
+                       lexer->lookahead == ' ' || lexer->lookahead == '\t' || 
+                       lexer->lookahead == ',' || lexer->lookahead == ')' ||
+                       lexer->lookahead == ']' || lexer->lookahead == 0xFF3D ||  // ］ full-width
+                       lexer->eof(lexer))) {
+            lexer->mark_end(lexer);
+            lexer->result_symbol = ASCII_CONTENT;
+            return has_content;
+          }
+          
+          // Failed to match asciiend, mark the current position
+          lexer->mark_end(lexer);
+        }
+      } else {
+        lexer->advance(lexer, false);
+        lexer->mark_end(lexer);
+        has_content = true;
+      }
+    }
+    
+    // If we reached EOF without finding asciiend, this is not valid ASCII content
+    return false;
+  }
+
  if (scanner->queued_tokens_size > 0) {
    enum TokenType token = scanner->queued_tokens[0];
    for (size_t i = 1; i < scanner->queued_tokens_size; i++) {
@@ -111,10 +206,17 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
      found_end_of_line = true;
      indent_length = 0;
      lexer->advance(lexer, false);
+      // After consuming \n, only consume whitespace on the SAME logical line
+      // Don't continue to next line
+      break;
+    } else if (lexer->lookahead == '\r') {
+      // Consume \r as part of line ending (for CRLF), don't skip it
+      lexer->advance(lexer, false);
+      // Continue to potentially consume \n that follows \r
    } else if (lexer->lookahead == ' ') {
      indent_length++;
      lexer->advance(lexer, false);
-    } else if (lexer->lookahead == '\r' || lexer->lookahead == '\f') {
+    } else if (lexer->lookahead == '\f') {
      indent_length = 0;
      lexer->advance(lexer, false);
    } else if (lexer->lookahead == '\t') {
@@ -128,6 +230,57 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
    }
  }
  
+  // After breaking from newline, consume leading whitespace/indentation
+  if (found_end_of_line && !lexer->eof(lexer)) {
+    while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+      if (lexer->lookahead == ' ') {
+        indent_length++;
+      } else {
+        indent_length += 8;
+      }
+      lexer->advance(lexer, false);
+    }
+    
+    // Skip comment-only lines when measuring indentation
+    while (lexer->lookahead == '/' && !lexer->eof(lexer)) {
+      lexer->mark_end(lexer);
+      lexer->advance(lexer, false);
+      
+      // Check if this is a comment
+      if (lexer->lookahead == '/') {
+        // Skip the rest of the comment line
+        while (lexer->lookahead != '\n' && lexer->lookahead != '\r' && !lexer->eof(lexer)) {
+          lexer->advance(lexer, false);
+        }
+        
+        // Skip newline
+        if (lexer->lookahead == '\r') {
+          lexer->advance(lexer, false);
+        }
+        if (lexer->lookahead == '\n') {
+          lexer->advance(lexer, false);
+        }
+        
+        // Measure indentation of next line
+        indent_length = 0;
+        while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+          if (lexer->lookahead == ' ') {
+            indent_length++;
+          } else {
+            indent_length += 8;
+          }
+          lexer->advance(lexer, false);
+        }
+      } else {
+        // Not a comment, break
+        break;
+      }
+    }
+  }
+
+
+
+
  if (found_end_of_line) {
    uint16_t current_indent = scanner->indent_stack[scanner->indent_stack_size - 1];

@@ -141,7 +294,9 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
      return true;
    }

-    if (valid_symbols[DEDENT] && indent_length < current_indent && scanner->indent_stack_size > 1) {
+
+
+    if (valid_symbols[DEDENT] && (indent_length < current_indent || (lexer->eof(lexer) && current_indent == 0)) && scanner->indent_stack_size > 1) {
      scanner->indent_stack_size--;

      while (scanner->indent_stack_size > 1 && 
@@ -157,6 +312,7 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
    }

    if (valid_symbols[NEWLINE] && !lexer->eof(lexer)) {
+      lexer->mark_end(lexer);
      lexer->result_symbol = NEWLINE;
      return true;
    }
--- a/src/tree_sitter/parser.h
+++ b/src/tree_sitter/parser.h
@@ -13,17 +13,12 @@ extern "C" {
 #define ts_builtin_sym_end 0
 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024

-#ifndef TREE_SITTER_API_H_
 typedef uint16_t TSStateId;
+
+#ifndef TREE_SITTER_API_H_
 typedef uint16_t TSSymbol;
 typedef uint16_t TSFieldId;
 typedef struct TSLanguage TSLanguage;
-typedef struct TSLanguageMetadata TSLanguageMetadata;
-typedef struct TSLanguageMetadata {
-  uint8_t major_version;
-  uint8_t minor_version;
-  uint8_t patch_version;
-} TSLanguageMetadata;
 #endif

 typedef struct {
@@ -32,11 +27,10 @@ typedef struct {
  bool inherited;
 } TSFieldMapEntry;

-// Used to index the field and supertype maps.
 typedef struct {
  uint16_t index;
  uint16_t length;
-} TSMapSlice;
+} TSFieldMapSlice;

 typedef struct {
  bool visible;
@@ -54,7 +48,6 @@ struct TSLexer {
  uint32_t (*get_column)(TSLexer *);
  bool (*is_at_included_range_start)(const TSLexer *);
  bool (*eof)(const TSLexer *);
-  void (*log)(const TSLexer *, const char *, ...);
 };

 typedef enum {
@@ -86,12 +79,6 @@ typedef struct {
  uint16_t external_lex_state;
 } TSLexMode;

-typedef struct {
-  uint16_t lex_state;
-  uint16_t external_lex_state;
-  uint16_t reserved_word_set_id;
-} TSLexerMode;
-
 typedef union {
  TSParseAction action;
  struct {
@@ -100,13 +87,8 @@ typedef union {
  } entry;
 } TSParseActionEntry;

-typedef struct {
-  int32_t start;
-  int32_t end;
-} TSCharacterRange;
-
 struct TSLanguage {
-  uint32_t abi_version;
+  uint32_t version;
  uint32_t symbol_count;
  uint32_t alias_count;
  uint32_t token_count;
@@ -122,13 +104,13 @@ struct TSLanguage {
  const TSParseActionEntry *parse_actions;
  const char * const *symbol_names;
  const char * const *field_names;
-  const TSMapSlice *field_map_slices;
+  const TSFieldMapSlice *field_map_slices;
  const TSFieldMapEntry *field_map_entries;
  const TSSymbolMetadata *symbol_metadata;
  const TSSymbol *public_symbol_map;
  const uint16_t *alias_map;
  const TSSymbol *alias_sequences;
-  const TSLexerMode *lex_modes;
+  const TSLexMode *lex_modes;
  bool (*lex_fn)(TSLexer *, TSStateId);
  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
  TSSymbol keyword_capture_token;
@@ -142,48 +124,15 @@ struct TSLanguage {
    void (*deserialize)(void *, const char *, unsigned);
  } external_scanner;
  const TSStateId *primary_state_ids;
-  const char *name;
-  const TSSymbol *reserved_words;
-  uint16_t max_reserved_word_set_size;
-  uint32_t supertype_count;
-  const TSSymbol *supertype_symbols;
-  const TSMapSlice *supertype_map_slices;
-  const TSSymbol *supertype_map_entries;
-  TSLanguageMetadata metadata;
 };

-static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
-  uint32_t index = 0;
-  uint32_t size = len - index;
-  while (size > 1) {
-    uint32_t half_size = size / 2;
-    uint32_t mid_index = index + half_size;
-    const TSCharacterRange *range = &ranges[mid_index];
-    if (lookahead >= range->start && lookahead <= range->end) {
-      return true;
-    } else if (lookahead > range->end) {
-      index = mid_index;
-    }
-    size -= half_size;
-  }
-  const TSCharacterRange *range = &ranges[index];
-  return (lookahead >= range->start && lookahead <= range->end);
-}
-
 /*
 *  Lexer Macros
 */

-#ifdef _MSC_VER
-#define UNUSED __pragma(warning(suppress : 4101))
-#else
-#define UNUSED __attribute__((unused))
-#endif
-
 #define START_LEXER()           \
  bool result = false;          \
  bool skip = false;            \
-  UNUSED                        \
  bool eof = false;             \
  int32_t lookahead;            \
  goto start;                   \
@@ -199,17 +148,6 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
    goto next_state;         \
  }

-#define ADVANCE_MAP(...)                                              \
-  {                                                                   \
-    static const uint16_t map[] = { __VA_ARGS__ };                    \
-    for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) {  \
-      if (map[i] == lookahead) {                                      \
-        state = map[i + 1];                                           \
-        goto next_state;                                              \
-      }                                                               \
-    }                                                                 \
-  }
-
 #define SKIP(state_value) \
  {                       \
    skip = true;          \
@@ -228,7 +166,7 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
 *  Parse Table Macros
 */

-#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
+#define SMALL_STATE(id) id - LARGE_STATE_COUNT

 #define STATE(id) id

@@ -238,7 +176,7 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
  {{                                  \
    .shift = {                        \
      .type = TSParseActionTypeShift, \
-      .state = (state_value)          \
+      .state = state_value            \
    }                                 \
  }}

@@ -246,7 +184,7 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
  {{                                  \
    .shift = {                        \
      .type = TSParseActionTypeShift, \
-      .state = (state_value),         \
+      .state = state_value,           \
      .repetition = true              \
    }                                 \
  }}
@@ -259,14 +197,13 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
    }                                 \
  }}

-#define REDUCE(symbol_name, children, precedence, prod_id) \
+#define REDUCE(symbol_val, child_count_val, ...) \
  {{                                             \
    .reduce = {                                  \
      .type = TSParseActionTypeReduce,           \
-      .symbol = symbol_name,                               \
-      .child_count = children,                             \
-      .dynamic_precedence = precedence,                    \
-      .production_id = prod_id                             \
+      .symbol = symbol_val,                      \
+      .child_count = child_count_val,            \
+      __VA_ARGS__                                \
    },                                           \
  }}
Author	SHA1	Message	Date
Bulat Kurbanov	3b8318940a	update	2025-11-27 12:11:52 +01:00
Bulat Kurbanov	36d6c3947a	fix: improve parser - fix ASCII strings and comment indentation handling - Remove 'asciiend' from ascii_string grammar rule (handled by scanner) - Add scanner logic to skip comment-only lines when measuring indentation - Update scanner to include 'asciiend' in ASCII_CONTENT token - Implement external scanner for BLOCK_COMMENT (partial fix) Results: Reduced parse errors from 156 to 119 (23% improvement)	2025-11-27 11:09:32 +01:00
Bulat Kurbanov	06e6e3b098	Add full-width closing bracket support after asciiend Allow ］ (U+FF3D) after asciiend in ASCII content scanner. This enables ASCII strings inside arrays with full-width brackets: var x = ［ascii...asciiend］ The parser now correctly recognizes this as an array containing an ASCII string, not as a syntax error. Fixes 1 additional parsing error.	2025-11-27 10:40:00 +01:00
Bulat Kurbanov	9a1dcb941d	Allow ) and ] after asciiend in ASCII blocks ASCII blocks can now be used as function arguments and array elements. The scanner now accepts ) and ] as valid characters after 'asciiend', allowing constructs like: - var x = uiAA(ascii...asciiend) - var y = [ascii...asciiend] Fixes 12 parsing errors in test scripts.	2025-11-27 10:30:49 +01:00
Bulat Kurbanov	eaf0963459	Fix CRLF handling in external scanner - Consume \r as part of token instead of skipping it - Break after consuming \n to avoid processing multiple lines - Consume leading whitespace separately for indent calculation - Fix ASCII_CONTENT to return false at EOF without asciiend This fixes ERROR tokens with CRLF line endings, especially with trailing blank lines.	2025-11-27 01:25:06 +01:00
Bulat Kurbanov	0b78c43138	feat: major grammar improvements - Enable ASCII blocks in print commands - Add import as expression (not just statement) - Fix operator precedence (& and \| now lower than comparisons) - Allow comments and newlines as top-level statements - Fix source_file to handle leading comments and empty lines Progress: 253 → 98 errors (155 files fixed, 55% success)	2025-11-26 23:32:44 +01:00
Bulat Kurbanov	99dadd9ca7	feat: add support for fullwidth Unicode, multiline arrays, and ASCII blocks - Add fullwidth brackets ［］ (U+FF3B, U+FF3D) support - Add fullwidth quotes ＂ (U+FF02) support - Fix multiline arrays with newlines between elements - Fix line continuation with CRLF (^) - Enable ASCII block syntax (ascii...asciiend and ［ascii...asciiend］) - Update conflicts to resolve ambiguities Fixed 51 parsing errors (253 -> 202 errors)	2025-11-26 23:04:03 +01:00
Bulat Kurbanov	b746fcec44	chore: Bump version to 0.1.0	2025-11-26 22:20:34 +01:00
Bulat Kurbanov	4d61f91e06	feat: Major grammar improvements and refactoring - Refactor statement parsing with proper precedence handling - Improve block structure parsing with indent/dedent support - Enhance control flow parsing (conditionals, loops) - Add print command support - Improve function declaration parsing - Update scanner for better string and comment handling - Add comprehensive test corpus - Better handling of newlines and statement boundaries	2025-11-26 22:19:38 +01:00