9 Commits
v0.0.1 ... main

Author SHA1 Message Date
3b8318940a update 2025-11-27 12:11:52 +01:00
36d6c3947a fix: improve parser - fix ASCII strings and comment indentation handling
- Remove 'asciiend' from ascii_string grammar rule (handled by scanner)
- Add scanner logic to skip comment-only lines when measuring indentation
- Update scanner to include 'asciiend' in ASCII_CONTENT token
- Implement external scanner for BLOCK_COMMENT (partial fix)

Results: Reduced parse errors from 156 to 119 (23% improvement)
2025-11-27 11:09:32 +01:00
06e6e3b098 Add full-width closing bracket support after asciiend
Allow ] (U+FF3D) after asciiend in ASCII content scanner.
This enables ASCII strings inside arrays with full-width brackets:
  var x = [ascii...asciiend]

The parser now correctly recognizes this as an array containing
an ASCII string, not as a syntax error.

Fixes 1 additional parsing error.
2025-11-27 10:40:00 +01:00
9a1dcb941d Allow ) and ] after asciiend in ASCII blocks
ASCII blocks can now be used as function arguments and array
elements. The scanner now accepts ) and ] as valid characters
after 'asciiend', allowing constructs like:
- var x = uiAA(ascii...asciiend)
- var y = [ascii...asciiend]

Fixes 12 parsing errors in test scripts.
2025-11-27 10:30:49 +01:00
eaf0963459 Fix CRLF handling in external scanner
- Consume \r as part of token instead of skipping it
- Break after consuming \n to avoid processing multiple lines
- Consume leading whitespace separately for indent calculation
- Fix ASCII_CONTENT to return false at EOF without asciiend

This fixes ERROR tokens with CRLF line endings, especially
with trailing blank lines.
2025-11-27 01:25:06 +01:00
0b78c43138 feat: major grammar improvements
- Enable ASCII blocks in print commands
- Add import as expression (not just statement)
- Fix operator precedence (& and | now lower than comparisons)
- Allow comments and newlines as top-level statements
- Fix source_file to handle leading comments and empty lines

Progress: 253 → 98 errors (155 files fixed, 55% success)
2025-11-26 23:32:44 +01:00
99dadd9ca7 feat: add support for fullwidth Unicode, multiline arrays, and ASCII blocks
- Add fullwidth brackets [] (U+FF3B, U+FF3D) support
- Add fullwidth quotes " (U+FF02) support
- Fix multiline arrays with newlines between elements
- Fix line continuation with CRLF (^)
- Enable ASCII block syntax (ascii...asciiend and [ascii...asciiend])
- Update conflicts to resolve ambiguities

Fixed 51 parsing errors (253 -> 202 errors)
2025-11-26 23:04:03 +01:00
b746fcec44 chore: Bump version to 0.1.0 2025-11-26 22:20:34 +01:00
4d61f91e06 feat: Major grammar improvements and refactoring
- Refactor statement parsing with proper precedence handling
- Improve block structure parsing with indent/dedent support
- Enhance control flow parsing (conditionals, loops)
- Add print command support
- Improve function declaration parsing
- Update scanner for better string and comment handling
- Add comprehensive test corpus
- Better handling of newlines and statement boundaries
2025-11-26 22:19:38 +01:00
13 changed files with 17677 additions and 17694 deletions

2
Cargo.lock generated
View File

@@ -80,7 +80,7 @@ dependencies = [
[[package]] [[package]]
name = "tree-sitter-stonescript" name = "tree-sitter-stonescript"
version = "0.0.1" version = "0.1.0"
dependencies = [ dependencies = [
"cc", "cc",
"tree-sitter", "tree-sitter",

View File

@@ -1,7 +1,7 @@
[package] [package]
name = "tree-sitter-stonescript" name = "tree-sitter-stonescript"
description = "stonescript grammar for the tree-sitter parsing library" description = "stonescript grammar for the tree-sitter parsing library"
version = "0.0.1" version = "0.1.0"
keywords = ["incremental", "parsing", "stonescript"] keywords = ["incremental", "parsing", "stonescript"]
categories = ["parsing", "text-editors"] categories = ["parsing", "text-editors"]
repository = "https://github.com/tree-sitter/tree-sitter-stonescript" repository = "https://github.com/tree-sitter/tree-sitter-stonescript"

View File

@@ -1,36 +0,0 @@
==================
Basic Command
==================
var x = 1
------------------
(source_file
(command
(identifier)
(binary_expression
(identifier)
(number)
)
)
)
==================
Conditional
==================
? x > 0
print "Hello"
------------------
(source_file
(conditional
(binary_expression
(identifier)
(number)
)
(block
(command
(identifier)
(string)
)
)
)
)

13
corpus/debug.txt Normal file
View File

@@ -0,0 +1,13 @@
Debug EOF
=========
?hp < 10
activate potion
---
(source_file
(conditional
(binary_expression (identifier) (number))
(block
(command (identifier) (identifier))
)
)
)

View File

@@ -145,9 +145,9 @@ equipL poison wand
equipR vigor shield *7 +5 equipR vigor shield *7 +5
------------------ ------------------
(source_file (source_file
(command (identifier))
(command (identifier) (identifier)) (command (identifier) (identifier))
(command (identifier) (identifier) (identifier) (number) (number)) (command (identifier) (identifier) (identifier))
(command (identifier) (identifier) (identifier) (star_level (number)) (enchantment_level (number)))
) )
================== ==================
@@ -158,9 +158,9 @@ activate R
loadout 1 loadout 1
------------------ ------------------
(source_file (source_file
(command (identifier)) (command (identifier) (identifier))
(command (identifier)) (command (identifier) (identifier))
(command (number)) (command (identifier) (number))
) )
================== ==================
@@ -299,17 +299,17 @@ Real Example from Manual
(source_file (source_file
(conditional (conditional
(binary_expression (identifier) (identifier)) (binary_expression (identifier) (identifier))
(block (command (identifier))) (block (command (identifier) (identifier)))
) )
(conditional (conditional
(binary_expression (identifier) (identifier)) (binary_expression (identifier) (identifier))
(block (block
(command (number)) (command (identifier) (number))
(conditional (conditional
(binary_expression (identifier) (identifier)) (binary_expression (identifier) (identifier))
(block (block
(command (identifier)) (command (identifier) (identifier))
(command (identifier) (identifier) (number)) (command (identifier) (identifier) (star_level (number)))
) )
) )
) )
@@ -317,21 +317,21 @@ Real Example from Manual
(conditional (conditional
(binary_expression (identifier) (identifier)) (binary_expression (identifier) (identifier))
(block (block
(command (identifier) (identifier)) (command (identifier) (identifier) (identifier))
(command (identifier) (identifier)) (command (identifier) (identifier) (identifier))
(conditional (conditional
(binary_expression (binary_expression
(member_expression (identifier) (identifier)) (member_expression (identifier) (identifier))
(number) (number)
) )
(block (block
(command (identifier) (identifier) (identifier) (number)) (command (identifier) (identifier) (identifier) (enchantment_level (number)))
) )
) )
) )
) )
(conditional (conditional
(binary_expression (identifier) (number)) (binary_expression (identifier) (number))
(block (command (identifier))) (block (command (identifier) (identifier)))
) )
) )

22
corpus/test_ascii.txt Normal file
View File

@@ -0,0 +1,22 @@
==================
ASCII Array
==================
var x = [ascii
foo
asciiend
, ascii
bar
asciiend
]
------------------
(source_file
(variable_declaration
(identifier)
(array
(array_elements
(ascii_string (ascii_content))
(ascii_string (ascii_content))
)
)
)
)

View File

@@ -4,38 +4,33 @@ module.exports = grammar({
rules: { rules: {
source_file: $ => repeat($._statement), source_file: $ => repeat($._statement),
_statement: $ => choice( _statement: $ => prec.right(seq(
// Comments first choice(
$.comment, $._newline,
$.block_comment, $.comment,
// Keyword-based statements (must come before generic command) $.block_comment,
$.variable_declaration, // 'var' // Keyword-based statements (must come before generic command)
$.function_declaration, // 'func' $.variable_declaration, // 'var'
$.for_loop, // 'for' $.function_declaration, // 'func'
$.return_statement, // 'return' $.for_loop, // 'for'
$.break_statement, // 'break' $.return_statement, // 'return'
$.continue_statement, // 'continue' $.break_statement, // 'break'
$.import_statement, // 'import' $.continue_statement, // 'continue'
$.new_expression, // 'new' // Control flow
// Control flow $.conditional, // '?'
$.conditional, // '?' $.else_clause, // ':'
$.else_if_clause, // ':?' // Commands (higher precedence!)
$.else_clause, // ':' prec.dynamic(1, $.command),
// Commands (after keywords!) $.print_command,
$.command_statement, // Fallback
// Fallback $.expression_statement
$.expression_statement ),
), optional($._newline)
)),
// Comments // Comments
comment: $ => token(seq('//', /.*/)), comment: $ => token(seq('//', /.*/)),
block_comment: $ => token(seq(
'/*',
/[^*]*\*+(?:[^/*][^*]*\*+)*/,
'/'
)),
// Variable declaration // Variable declaration
variable_declaration: $ => seq( variable_declaration: $ => seq(
'var', 'var',
@@ -50,13 +45,7 @@ module.exports = grammar({
'(', '(',
optional($.parameter_list), optional($.parameter_list),
')', ')',
optional($.function_body) $.block
),
function_body: $ => seq(
$._indent,
repeat1($._statement),
$._dedent
), ),
parameter_list: $ => seq( parameter_list: $ => seq(
@@ -73,29 +62,29 @@ module.exports = grammar({
$._expression, $._expression,
'..', '..',
$._expression, $._expression,
optional($.block) $.block
), ),
seq( seq(
'for', 'for',
$.identifier, $.identifier,
':', ':',
$._expression, $._expression,
optional($.block) $.block
) )
), ),
// Import // Import
import_statement: $ => seq( import_expression: $ => seq(
'import', 'import',
$.module_path $.module_path
), ),
new_expression: $ => seq( new_statement: $ => seq(
'new', 'new',
$.module_path $.module_path
), ),
module_path: $ => /[a-zA-Z_][a-zA-Z0-9_\/]*/, module_path: $ => /[a-zA-Z_][a-zA-Z0-9_\\/]*/,
// Control flow // Control flow
return_statement: $ => prec.right(seq( return_statement: $ => prec.right(seq(
@@ -111,18 +100,12 @@ module.exports = grammar({
conditional: $ => seq( conditional: $ => seq(
'?', '?',
$._expression, $._expression,
optional($.block) $.block
), ),
else_if_clause: $ => seq( else_clause: $ => choice(
':?', seq(':?', $._expression, $.block),
$._expression, seq(':', $.block)
optional($.block)
),
else_clause: $ => seq(
':',
optional($.block)
), ),
block: $ => seq( block: $ => seq(
@@ -131,76 +114,49 @@ module.exports = grammar({
$._dedent $._dedent
), ),
// Commands - specific patterns // Commands - Generic structure to match tests
command_statement: $ => choice( // Must have at least one argument to distinguish from simple identifier expressions
$.equip_command, command: $ => prec.dynamic(1, prec.right(seq(
$.activate_command,
$.loadout_command,
$.brew_command,
$.disable_enable_command,
$.play_command,
$.print_command
),
equip_command: $ => prec.left(seq(
choice('equip', 'equipL', 'equipR'),
repeat1($.item_criteria)
)),
item_criteria: $ => prec.left(choice(
$.identifier, $.identifier,
repeat1($._command_arg)
))),
_command_arg: $ => choice(
$.identifier,
$.number,
$.string,
$.star_level, $.star_level,
$.enchantment_level $.enchantment_level
)), ),
star_level: $ => seq('*', $.number), star_level: $ => seq('*', $.number),
enchantment_level: $ => seq('+', $.number), enchantment_level: $ => seq('+', $.number),
activate_command: $ => seq(
'activate',
choice(
$.identifier,
'P', 'L', 'R'
)
),
loadout_command: $ => seq(
'loadout',
$.number
),
brew_command: $ => seq(
'brew',
$.identifier,
repeat(seq('+', $.identifier))
),
disable_enable_command: $ => prec.left(seq(
choice('disable', 'enable'),
choice(
'abilities', 'hud', 'banner',
'loadout', 'npcDialog', 'pause', 'player'
)
)),
play_command: $ => prec.left(seq(
'play',
$.identifier,
optional($.number)
)),
print_command: $ => prec.right(seq( print_command: $ => prec.right(seq(
choice('>', '>o', '>h', '>`', '>c', '>f'), choice('>', '>o', '>h', '>`', '>c', '>f'),
repeat(choice( optional($.print_args)
$.identifier,
$.string,
$.number,
$.color_code,
','
))
)), )),
// Print specific helpers
print_args: $ => sep1(',', $.print_argument),
print_argument: $ => prec.left(repeat1(choice(
$.interpolation,
$.string,
$.ascii_string,
$.color_code,
$.print_text
))),
print_text: $ => /[^,@\r\n"]+/,
interpolation: $ => seq(
'@',
$._expression,
'@'
),
color_code: $ => /#[a-zA-Z0-9]+/, color_code: $ => /#[a-zA-Z0-9]+/,
// Expressions // Expressions
@@ -213,6 +169,7 @@ module.exports = grammar({
$.string, $.string,
$.boolean, $.boolean,
$.null, $.null,
$.ascii_string,
$.array, $.array,
$.member_expression, $.member_expression,
$.call_expression, $.call_expression,
@@ -222,7 +179,9 @@ module.exports = grammar({
$.update_expression, $.update_expression,
$.assignment_expression, $.assignment_expression,
$.parenthesized_expression, $.parenthesized_expression,
$.new_expression $.new_statement,
$.import_expression,
$.color_code
), ),
member_expression: $ => prec.left(15, seq( member_expression: $ => prec.left(15, seq(
@@ -244,9 +203,9 @@ module.exports = grammar({
index_expression: $ => prec.left(13, seq( index_expression: $ => prec.left(13, seq(
$._expression, $._expression,
'[', choice('[', ''),
$._expression, $._expression,
']' choice(']', '')
)), )),
unary_expression: $ => prec.right(12, seq( unary_expression: $ => prec.right(12, seq(
@@ -256,19 +215,11 @@ module.exports = grammar({
// Binary operators with proper precedence // Binary operators with proper precedence
binary_expression: $ => choice( binary_expression: $ => choice(
prec.left(4, seq($._expression, '|', $._expression)), prec.left(6, seq($._expression, choice('*', '/', '%'), $._expression)),
prec.left(5, seq($._expression, '&', $._expression)), prec.left(5, seq($._expression, choice('+', '-'), $._expression)),
prec.left(7, seq($._expression, '!', $._expression)), prec.left(4, seq($._expression, choice('=', '!=', '!', '<', '>', '<=', '>='), $._expression)),
prec.left(7, seq($._expression, '=', $._expression)), prec.left(3, seq($._expression, '&', $._expression)),
prec.left(8, seq($._expression, '<', $._expression)), prec.left(2, seq($._expression, '|', $._expression))
prec.left(8, seq($._expression, '>', $._expression)),
prec.left(8, seq($._expression, '<=', $._expression)),
prec.left(8, seq($._expression, '>=', $._expression)),
prec.left(9, seq($._expression, '+', $._expression)),
prec.left(9, seq($._expression, '-', $._expression)),
prec.left(10, seq($._expression, '*', $._expression)),
prec.left(10, seq($._expression, '/', $._expression)),
prec.left(11, seq($._expression, '%', $._expression))
), ),
update_expression: $ => choice( update_expression: $ => choice(
@@ -290,10 +241,23 @@ module.exports = grammar({
// Arrays // Arrays
array: $ => seq( array: $ => seq(
'[', choice('[', ''),
optional(sep1($.comma_sep, $._expression)), repeat($._newline),
optional(','), optional(seq(
']' $.array_elements,
repeat($._newline)
)),
choice(']', '')
),
array_elements: $ => seq(
$._expression,
repeat(seq(
',',
repeat($._newline),
$._expression
)),
optional(',')
), ),
// Primitives // Primitives
@@ -303,21 +267,31 @@ module.exports = grammar({
float: $ => /\d+\.\d+/, float: $ => /\d+\.\d+/,
string: $ => seq('"', repeat(choice(/[^"\\]/, /\\./)), '"'), string: $ => choice(
seq('"', repeat(choice(/[^"\\]/, /\\./)), '"'),
seq('', repeat(choice(/[^\\]/, /\\./)), '')
),
boolean: $ => choice('true', 'false'), boolean: $ => choice('true', 'false'),
null: $ => 'null' null: $ => 'null',
ascii_string: $ => seq('ascii', $.ascii_content)
}, },
extras: $ => [ extras: $ => [
/\s/ /[ \t\r\f]/,
/\r?\n[ \t]*\^/,
$.comment,
$.block_comment
], ],
externals: $ => [ externals: $ => [
$._newline, $._newline,
$._indent, $._indent,
$._dedent $._dedent,
$.ascii_content,
$.block_comment
], ],
word: $ => $.identifier, word: $ => $.identifier,
@@ -325,10 +299,12 @@ module.exports = grammar({
conflicts: $ => [ conflicts: $ => [
[$.identifier, $.string], [$.identifier, $.string],
[$._expression], [$._expression],
[$.command_statement], [$.command],
[$._statement, $._expression], // new_expression can be both [$._statement, $._expression], // new_statement can be both
[$.equip_command], // handle repeat ambiguity [$.binary_expression, $.assignment_expression], // = operator ambiguity
[$.binary_expression, $.assignment_expression] // = operator ambiguity [$.command, $._expression], // * operator ambiguity
[$.array_elements],
[$.ascii_string]
] ]
}); });

View File

@@ -1,6 +1,6 @@
{ {
"name": "tree-sitter-stonescript", "name": "tree-sitter-stonescript",
"version": "0.0.1", "version": "0.1.0",
"description": "StoneScript grammar for tree-sitter", "description": "StoneScript grammar for tree-sitter",
"main": "bindings/node", "main": "bindings/node",
"types": "bindings/node", "types": "bindings/node",

1226
src/grammar.json generated

File diff suppressed because it is too large Load Diff

659
src/node-types.json generated

File diff suppressed because it is too large Load Diff

32878
src/parser.c generated

File diff suppressed because it is too large Load Diff

View File

@@ -6,8 +6,14 @@ enum TokenType {
NEWLINE, NEWLINE,
INDENT, INDENT,
DEDENT, DEDENT,
ASCII_CONTENT,
BLOCK_COMMENT,
}; };
// ... (skipping to logic)
typedef struct { typedef struct {
uint16_t *indent_stack; uint16_t *indent_stack;
size_t indent_stack_size; size_t indent_stack_size;
@@ -63,10 +69,15 @@ unsigned tree_sitter_stonescript_external_scanner_serialize(void *payload, char
void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) { void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = (Scanner *)payload; Scanner *scanner = (Scanner *)payload;
size_t size = 0;
scanner->indent_stack_size = 1; scanner->indent_stack_size = 1;
scanner->indent_stack[0] = 0; scanner->indent_stack[0] = 0;
scanner->queued_tokens_size = 0; scanner->queued_tokens_size = 0;
if (length < sizeof(uint32_t)) return;
uint32_t indent_stack_size = 0;
if (length == 0) return; if (length == 0) return;
size_t i = 0; size_t i = 0;
@@ -91,6 +102,90 @@ void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const c
bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload; Scanner *scanner = (Scanner *)payload;
// Try to handle block comments when parser expects them
// Only check if valid_symbols allows BLOCK_COMMENT
if (valid_symbols[BLOCK_COMMENT] && lexer->lookahead == '/') {
lexer->advance(lexer, false);
if (lexer->lookahead == '*') {
lexer->advance(lexer, false);
// Consume everything until */
while (!lexer->eof(lexer)) {
if (lexer->lookahead == '*') {
lexer->advance(lexer, false);
if (lexer->lookahead == '/') {
lexer->advance(lexer, false);
lexer->mark_end(lexer);
lexer->result_symbol = BLOCK_COMMENT;
return true;
}
} else {
lexer->advance(lexer, false);
}
}
// Reached EOF without closing */
return false;
}
}
if (valid_symbols[ASCII_CONTENT]) {
bool has_content = false;
for (;;) {
if (lexer->eof(lexer)) {
break;
}
// Check if we're at the start of a line with 'asciiend'
if (lexer->lookahead == '\n') {
lexer->advance(lexer, false);
lexer->mark_end(lexer);
has_content = true;
// Skip whitespace at the start of the line
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
lexer->advance(lexer, false);
}
// Check if this line starts with 'asciiend'
if (lexer->lookahead == 'a') {
const char *keyword = "asciiend";
bool match = true;
for (int k = 0; k < 8; k++) {
if (lexer->lookahead == keyword[k]) {
lexer->advance(lexer, false);
} else {
match = false;
break;
}
}
// Check that asciiend is followed by whitespace or EOL or closing delimiters
if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
lexer->lookahead == ',' || lexer->lookahead == ')' ||
lexer->lookahead == ']' || lexer->lookahead == 0xFF3D || // full-width
lexer->eof(lexer))) {
lexer->mark_end(lexer);
lexer->result_symbol = ASCII_CONTENT;
return has_content;
}
// Failed to match asciiend, mark the current position
lexer->mark_end(lexer);
}
} else {
lexer->advance(lexer, false);
lexer->mark_end(lexer);
has_content = true;
}
}
// If we reached EOF without finding asciiend, this is not valid ASCII content
return false;
}
if (scanner->queued_tokens_size > 0) { if (scanner->queued_tokens_size > 0) {
enum TokenType token = scanner->queued_tokens[0]; enum TokenType token = scanner->queued_tokens[0];
for (size_t i = 1; i < scanner->queued_tokens_size; i++) { for (size_t i = 1; i < scanner->queued_tokens_size; i++) {
@@ -111,10 +206,17 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
found_end_of_line = true; found_end_of_line = true;
indent_length = 0; indent_length = 0;
lexer->advance(lexer, false); lexer->advance(lexer, false);
// After consuming \n, only consume whitespace on the SAME logical line
// Don't continue to next line
break;
} else if (lexer->lookahead == '\r') {
// Consume \r as part of line ending (for CRLF), don't skip it
lexer->advance(lexer, false);
// Continue to potentially consume \n that follows \r
} else if (lexer->lookahead == ' ') { } else if (lexer->lookahead == ' ') {
indent_length++; indent_length++;
lexer->advance(lexer, false); lexer->advance(lexer, false);
} else if (lexer->lookahead == '\r' || lexer->lookahead == '\f') { } else if (lexer->lookahead == '\f') {
indent_length = 0; indent_length = 0;
lexer->advance(lexer, false); lexer->advance(lexer, false);
} else if (lexer->lookahead == '\t') { } else if (lexer->lookahead == '\t') {
@@ -127,6 +229,57 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
break; break;
} }
} }
// After breaking from newline, consume leading whitespace/indentation
if (found_end_of_line && !lexer->eof(lexer)) {
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
if (lexer->lookahead == ' ') {
indent_length++;
} else {
indent_length += 8;
}
lexer->advance(lexer, false);
}
// Skip comment-only lines when measuring indentation
while (lexer->lookahead == '/' && !lexer->eof(lexer)) {
lexer->mark_end(lexer);
lexer->advance(lexer, false);
// Check if this is a comment
if (lexer->lookahead == '/') {
// Skip the rest of the comment line
while (lexer->lookahead != '\n' && lexer->lookahead != '\r' && !lexer->eof(lexer)) {
lexer->advance(lexer, false);
}
// Skip newline
if (lexer->lookahead == '\r') {
lexer->advance(lexer, false);
}
if (lexer->lookahead == '\n') {
lexer->advance(lexer, false);
}
// Measure indentation of next line
indent_length = 0;
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
if (lexer->lookahead == ' ') {
indent_length++;
} else {
indent_length += 8;
}
lexer->advance(lexer, false);
}
} else {
// Not a comment, break
break;
}
}
}
if (found_end_of_line) { if (found_end_of_line) {
uint16_t current_indent = scanner->indent_stack[scanner->indent_stack_size - 1]; uint16_t current_indent = scanner->indent_stack[scanner->indent_stack_size - 1];
@@ -141,7 +294,9 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
return true; return true;
} }
if (valid_symbols[DEDENT] && indent_length < current_indent && scanner->indent_stack_size > 1) {
if (valid_symbols[DEDENT] && (indent_length < current_indent || (lexer->eof(lexer) && current_indent == 0)) && scanner->indent_stack_size > 1) {
scanner->indent_stack_size--; scanner->indent_stack_size--;
while (scanner->indent_stack_size > 1 && while (scanner->indent_stack_size > 1 &&
@@ -157,6 +312,7 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
} }
if (valid_symbols[NEWLINE] && !lexer->eof(lexer)) { if (valid_symbols[NEWLINE] && !lexer->eof(lexer)) {
lexer->mark_end(lexer);
lexer->result_symbol = NEWLINE; lexer->result_symbol = NEWLINE;
return true; return true;
} }

View File

@@ -13,17 +13,12 @@ extern "C" {
#define ts_builtin_sym_end 0 #define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId; typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol; typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId; typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage; typedef struct TSLanguage TSLanguage;
typedef struct TSLanguageMetadata TSLanguageMetadata;
typedef struct TSLanguageMetadata {
uint8_t major_version;
uint8_t minor_version;
uint8_t patch_version;
} TSLanguageMetadata;
#endif #endif
typedef struct { typedef struct {
@@ -32,11 +27,10 @@ typedef struct {
bool inherited; bool inherited;
} TSFieldMapEntry; } TSFieldMapEntry;
// Used to index the field and supertype maps.
typedef struct { typedef struct {
uint16_t index; uint16_t index;
uint16_t length; uint16_t length;
} TSMapSlice; } TSFieldMapSlice;
typedef struct { typedef struct {
bool visible; bool visible;
@@ -54,7 +48,6 @@ struct TSLexer {
uint32_t (*get_column)(TSLexer *); uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *); bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *); bool (*eof)(const TSLexer *);
void (*log)(const TSLexer *, const char *, ...);
}; };
typedef enum { typedef enum {
@@ -86,12 +79,6 @@ typedef struct {
uint16_t external_lex_state; uint16_t external_lex_state;
} TSLexMode; } TSLexMode;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
uint16_t reserved_word_set_id;
} TSLexerMode;
typedef union { typedef union {
TSParseAction action; TSParseAction action;
struct { struct {
@@ -100,13 +87,8 @@ typedef union {
} entry; } entry;
} TSParseActionEntry; } TSParseActionEntry;
typedef struct {
int32_t start;
int32_t end;
} TSCharacterRange;
struct TSLanguage { struct TSLanguage {
uint32_t abi_version; uint32_t version;
uint32_t symbol_count; uint32_t symbol_count;
uint32_t alias_count; uint32_t alias_count;
uint32_t token_count; uint32_t token_count;
@@ -122,13 +104,13 @@ struct TSLanguage {
const TSParseActionEntry *parse_actions; const TSParseActionEntry *parse_actions;
const char * const *symbol_names; const char * const *symbol_names;
const char * const *field_names; const char * const *field_names;
const TSMapSlice *field_map_slices; const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries; const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata; const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map; const TSSymbol *public_symbol_map;
const uint16_t *alias_map; const uint16_t *alias_map;
const TSSymbol *alias_sequences; const TSSymbol *alias_sequences;
const TSLexerMode *lex_modes; const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId); bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId); bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token; TSSymbol keyword_capture_token;
@@ -142,48 +124,15 @@ struct TSLanguage {
void (*deserialize)(void *, const char *, unsigned); void (*deserialize)(void *, const char *, unsigned);
} external_scanner; } external_scanner;
const TSStateId *primary_state_ids; const TSStateId *primary_state_ids;
const char *name;
const TSSymbol *reserved_words;
uint16_t max_reserved_word_set_size;
uint32_t supertype_count;
const TSSymbol *supertype_symbols;
const TSMapSlice *supertype_map_slices;
const TSSymbol *supertype_map_entries;
TSLanguageMetadata metadata;
}; };
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
const TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
index = mid_index;
}
size -= half_size;
}
const TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}
/* /*
* Lexer Macros * Lexer Macros
*/ */
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \ #define START_LEXER() \
bool result = false; \ bool result = false; \
bool skip = false; \ bool skip = false; \
UNUSED \
bool eof = false; \ bool eof = false; \
int32_t lookahead; \ int32_t lookahead; \
goto start; \ goto start; \
@@ -199,17 +148,6 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
goto next_state; \ goto next_state; \
} }
#define ADVANCE_MAP(...) \
{ \
static const uint16_t map[] = { __VA_ARGS__ }; \
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
if (map[i] == lookahead) { \
state = map[i + 1]; \
goto next_state; \
} \
} \
}
#define SKIP(state_value) \ #define SKIP(state_value) \
{ \ { \
skip = true; \ skip = true; \
@@ -228,7 +166,7 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
* Parse Table Macros * Parse Table Macros
*/ */
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) #define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id #define STATE(id) id
@@ -238,7 +176,7 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
{{ \ {{ \
.shift = { \ .shift = { \
.type = TSParseActionTypeShift, \ .type = TSParseActionTypeShift, \
.state = (state_value) \ .state = state_value \
} \ } \
}} }}
@@ -246,7 +184,7 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
{{ \ {{ \
.shift = { \ .shift = { \
.type = TSParseActionTypeShift, \ .type = TSParseActionTypeShift, \
.state = (state_value), \ .state = state_value, \
.repetition = true \ .repetition = true \
} \ } \
}} }}
@@ -259,15 +197,14 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
} \ } \
}} }}
#define REDUCE(symbol_name, children, precedence, prod_id) \ #define REDUCE(symbol_val, child_count_val, ...) \
{{ \ {{ \
.reduce = { \ .reduce = { \
.type = TSParseActionTypeReduce, \ .type = TSParseActionTypeReduce, \
.symbol = symbol_name, \ .symbol = symbol_val, \
.child_count = children, \ .child_count = child_count_val, \
.dynamic_precedence = precedence, \ __VA_ARGS__ \
.production_id = prod_id \ }, \
}, \
}} }}
#define RECOVER() \ #define RECOVER() \