feat: add support for fullwidth Unicode, multiline arrays, and ASCII blocks

- Add fullwidth brackets ［］ (U+FF3B, U+FF3D) support - Add fullwidth quotes ＂ (U+FF02) support - Fix multiline arrays with newlines between elements - Fix line continuation with CRLF (^) - Enable ASCII block syntax (ascii...asciiend and ［ascii...asciiend］) - Update conflicts to resolve ambiguities Fixed 51 parsing errors (253 -> 202 errors)
2025-11-26 23:04:03 +01:00
parent b746fcec44
commit 99dadd9ca7
5 changed files with 16668 additions and 12483 deletions
--- a/src/scanner.c
+++ b/src/scanner.c
@@ -6,7 +6,7 @@ enum TokenType {
  NEWLINE,
  INDENT,
  DEDENT,
-//   ASCII_CONTENT,
+  ASCII_CONTENT,
 };

 // ... (skipping to logic)
@@ -101,64 +101,64 @@ void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const c
 bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
  Scanner *scanner = (Scanner *)payload;

-//   if (valid_symbols[ASCII_CONTENT]) {
-//     bool has_content = false;
-//     
-//     for (;;) {
-//       if (lexer->eof(lexer)) {
-//         break;
-//       }
-//       
-//       // Check if we're at the start of a line with 'asciiend'
-//       if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
-//         lexer->advance(lexer, false);
-//         if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
-//           lexer->advance(lexer, false);
-//         }
-//         lexer->mark_end(lexer);
-//         has_content = true;
-//         
-//         // Skip whitespace at the start of the line
-//         while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
-//           lexer->advance(lexer, false);
-//         }
-//         
-//         // Check if this line starts with 'asciiend'
-//         if (lexer->lookahead == 'a') {
-//           const char *keyword = "asciiend";
-//           bool match = true;
-//           
-//           for (int k = 0; k < 8; k++) {
-//             if (lexer->lookahead == keyword[k]) {
-//               lexer->advance(lexer, false);
-//             } else {
-//               match = false;
-//               break;
-//             }
-//           }
-//           
-//           // Check that asciiend is followed by whitespace or EOL
-//           if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' || 
-//                        lexer->lookahead == ' ' || lexer->lookahead == '\t' || 
-//                        lexer->lookahead == ',' || 
-//                        lexer->eof(lexer))) {
-//             lexer->result_symbol = ASCII_CONTENT;
-//             return has_content;
-//           }
-//           
-//           // Failed to match asciiend, mark the current position
-//           lexer->mark_end(lexer);
-//         }
-//       } else {
-//         lexer->advance(lexer, false);
-//         lexer->mark_end(lexer);
-//         has_content = true;
-//       }
-//     }
-//     
-//     lexer->result_symbol = ASCII_CONTENT;
-//     return has_content;
-//   }
+  if (valid_symbols[ASCII_CONTENT]) {
+    bool has_content = false;
+    
+    for (;;) {
+      if (lexer->eof(lexer)) {
+        break;
+      }
+      
+      // Check if we're at the start of a line with 'asciiend'
+      if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
+        lexer->advance(lexer, false);
+        if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
+          lexer->advance(lexer, false);
+        }
+        lexer->mark_end(lexer);
+        has_content = true;
+        
+        // Skip whitespace at the start of the line
+        while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
+          lexer->advance(lexer, false);
+        }
+        
+        // Check if this line starts with 'asciiend'
+        if (lexer->lookahead == 'a') {
+          const char *keyword = "asciiend";
+          bool match = true;
+          
+          for (int k = 0; k < 8; k++) {
+            if (lexer->lookahead == keyword[k]) {
+              lexer->advance(lexer, false);
+            } else {
+              match = false;
+              break;
+            }
+          }
+          
+          // Check that asciiend is followed by whitespace or EOL
+          if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' || 
+                       lexer->lookahead == ' ' || lexer->lookahead == '\t' || 
+                       lexer->lookahead == ',' || 
+                       lexer->eof(lexer))) {
+            lexer->result_symbol = ASCII_CONTENT;
+            return has_content;
+          }
+          
+          // Failed to match asciiend, mark the current position
+          lexer->mark_end(lexer);
+        }
+      } else {
+        lexer->advance(lexer, false);
+        lexer->mark_end(lexer);
+        has_content = true;
+      }
+    }
+    
+    lexer->result_symbol = ASCII_CONTENT;
+    return has_content;
+  }

  if (scanner->queued_tokens_size > 0) {
    enum TokenType token = scanner->queued_tokens[0];