Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 3b8318940a | |||
| 36d6c3947a | |||
| 06e6e3b098 | |||
| 9a1dcb941d | |||
| eaf0963459 | |||
| 0b78c43138 | |||
| 99dadd9ca7 | |||
| b746fcec44 | |||
| 4d61f91e06 |
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -80,7 +80,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-stonescript"
|
||||
version = "0.0.1"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "tree-sitter-stonescript"
|
||||
description = "stonescript grammar for the tree-sitter parsing library"
|
||||
version = "0.0.1"
|
||||
version = "0.1.0"
|
||||
keywords = ["incremental", "parsing", "stonescript"]
|
||||
categories = ["parsing", "text-editors"]
|
||||
repository = "https://github.com/tree-sitter/tree-sitter-stonescript"
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
==================
|
||||
Basic Command
|
||||
==================
|
||||
var x = 1
|
||||
------------------
|
||||
(source_file
|
||||
(command
|
||||
(identifier)
|
||||
(binary_expression
|
||||
(identifier)
|
||||
(number)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
==================
|
||||
Conditional
|
||||
==================
|
||||
? x > 0
|
||||
print "Hello"
|
||||
|
||||
------------------
|
||||
(source_file
|
||||
(conditional
|
||||
(binary_expression
|
||||
(identifier)
|
||||
(number)
|
||||
)
|
||||
(block
|
||||
(command
|
||||
(identifier)
|
||||
(string)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
13
corpus/debug.txt
Normal file
13
corpus/debug.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
Debug EOF
|
||||
=========
|
||||
?hp < 10
|
||||
activate potion
|
||||
---
|
||||
(source_file
|
||||
(conditional
|
||||
(binary_expression (identifier) (number))
|
||||
(block
|
||||
(command (identifier) (identifier))
|
||||
)
|
||||
)
|
||||
)
|
||||
@@ -145,9 +145,9 @@ equipL poison wand
|
||||
equipR vigor shield *7 +5
|
||||
------------------
|
||||
(source_file
|
||||
(command (identifier))
|
||||
(command (identifier) (identifier))
|
||||
(command (identifier) (identifier) (identifier) (number) (number))
|
||||
(command (identifier) (identifier) (identifier))
|
||||
(command (identifier) (identifier) (identifier) (star_level (number)) (enchantment_level (number)))
|
||||
)
|
||||
|
||||
==================
|
||||
@@ -158,9 +158,9 @@ activate R
|
||||
loadout 1
|
||||
------------------
|
||||
(source_file
|
||||
(command (identifier))
|
||||
(command (identifier))
|
||||
(command (number))
|
||||
(command (identifier) (identifier))
|
||||
(command (identifier) (identifier))
|
||||
(command (identifier) (number))
|
||||
)
|
||||
|
||||
==================
|
||||
@@ -299,39 +299,39 @@ Real Example from Manual
|
||||
(source_file
|
||||
(conditional
|
||||
(binary_expression (identifier) (identifier))
|
||||
(block (command (identifier)))
|
||||
(block (command (identifier) (identifier)))
|
||||
)
|
||||
(conditional
|
||||
(binary_expression (identifier) (identifier))
|
||||
(block
|
||||
(command (number))
|
||||
(conditional
|
||||
(binary_expression (identifier) (identifier))
|
||||
(block
|
||||
(command (identifier))
|
||||
(command (identifier) (identifier) (number))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
(command (identifier) (number))
|
||||
(conditional
|
||||
(binary_expression (identifier) (identifier))
|
||||
(block
|
||||
(command (identifier) (identifier))
|
||||
(command (identifier) (identifier))
|
||||
(command (identifier) (identifier) (star_level (number)))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
(conditional
|
||||
(binary_expression (identifier) (identifier))
|
||||
(block
|
||||
(command (identifier) (identifier) (identifier))
|
||||
(command (identifier) (identifier) (identifier))
|
||||
(conditional
|
||||
(binary_expression
|
||||
(member_expression (identifier) (identifier))
|
||||
(number)
|
||||
)
|
||||
(block
|
||||
(command (identifier) (identifier) (identifier) (number))
|
||||
(command (identifier) (identifier) (identifier) (enchantment_level (number)))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
(conditional
|
||||
(binary_expression (identifier) (number))
|
||||
(block (command (identifier)))
|
||||
(block (command (identifier) (identifier)))
|
||||
)
|
||||
)
|
||||
|
||||
22
corpus/test_ascii.txt
Normal file
22
corpus/test_ascii.txt
Normal file
@@ -0,0 +1,22 @@
|
||||
==================
|
||||
ASCII Array
|
||||
==================
|
||||
var x = [ascii
|
||||
foo
|
||||
asciiend
|
||||
, ascii
|
||||
bar
|
||||
asciiend
|
||||
]
|
||||
------------------
|
||||
(source_file
|
||||
(variable_declaration
|
||||
(identifier)
|
||||
(array
|
||||
(array_elements
|
||||
(ascii_string (ascii_content))
|
||||
(ascii_string (ascii_content))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
218
grammar.js
218
grammar.js
@@ -4,8 +4,9 @@ module.exports = grammar({
|
||||
rules: {
|
||||
source_file: $ => repeat($._statement),
|
||||
|
||||
_statement: $ => choice(
|
||||
// Comments first
|
||||
_statement: $ => prec.right(seq(
|
||||
choice(
|
||||
$._newline,
|
||||
$.comment,
|
||||
$.block_comment,
|
||||
// Keyword-based statements (must come before generic command)
|
||||
@@ -15,27 +16,21 @@ module.exports = grammar({
|
||||
$.return_statement, // 'return'
|
||||
$.break_statement, // 'break'
|
||||
$.continue_statement, // 'continue'
|
||||
$.import_statement, // 'import'
|
||||
$.new_expression, // 'new'
|
||||
// Control flow
|
||||
$.conditional, // '?'
|
||||
$.else_if_clause, // ':?'
|
||||
$.else_clause, // ':'
|
||||
// Commands (after keywords!)
|
||||
$.command_statement,
|
||||
// Commands (higher precedence!)
|
||||
prec.dynamic(1, $.command),
|
||||
$.print_command,
|
||||
// Fallback
|
||||
$.expression_statement
|
||||
),
|
||||
optional($._newline)
|
||||
)),
|
||||
|
||||
// Comments
|
||||
comment: $ => token(seq('//', /.*/)),
|
||||
|
||||
block_comment: $ => token(seq(
|
||||
'/*',
|
||||
/[^*]*\*+(?:[^/*][^*]*\*+)*/,
|
||||
'/'
|
||||
)),
|
||||
|
||||
// Variable declaration
|
||||
variable_declaration: $ => seq(
|
||||
'var',
|
||||
@@ -50,13 +45,7 @@ module.exports = grammar({
|
||||
'(',
|
||||
optional($.parameter_list),
|
||||
')',
|
||||
optional($.function_body)
|
||||
),
|
||||
|
||||
function_body: $ => seq(
|
||||
$._indent,
|
||||
repeat1($._statement),
|
||||
$._dedent
|
||||
$.block
|
||||
),
|
||||
|
||||
parameter_list: $ => seq(
|
||||
@@ -73,29 +62,29 @@ module.exports = grammar({
|
||||
$._expression,
|
||||
'..',
|
||||
$._expression,
|
||||
optional($.block)
|
||||
$.block
|
||||
),
|
||||
seq(
|
||||
'for',
|
||||
$.identifier,
|
||||
':',
|
||||
$._expression,
|
||||
optional($.block)
|
||||
$.block
|
||||
)
|
||||
),
|
||||
|
||||
// Import
|
||||
import_statement: $ => seq(
|
||||
import_expression: $ => seq(
|
||||
'import',
|
||||
$.module_path
|
||||
),
|
||||
|
||||
new_expression: $ => seq(
|
||||
new_statement: $ => seq(
|
||||
'new',
|
||||
$.module_path
|
||||
),
|
||||
|
||||
module_path: $ => /[a-zA-Z_][a-zA-Z0-9_\/]*/,
|
||||
module_path: $ => /[a-zA-Z_][a-zA-Z0-9_\\/]*/,
|
||||
|
||||
// Control flow
|
||||
return_statement: $ => prec.right(seq(
|
||||
@@ -111,18 +100,12 @@ module.exports = grammar({
|
||||
conditional: $ => seq(
|
||||
'?',
|
||||
$._expression,
|
||||
optional($.block)
|
||||
$.block
|
||||
),
|
||||
|
||||
else_if_clause: $ => seq(
|
||||
':?',
|
||||
$._expression,
|
||||
optional($.block)
|
||||
),
|
||||
|
||||
else_clause: $ => seq(
|
||||
':',
|
||||
optional($.block)
|
||||
else_clause: $ => choice(
|
||||
seq(':?', $._expression, $.block),
|
||||
seq(':', $.block)
|
||||
),
|
||||
|
||||
block: $ => seq(
|
||||
@@ -131,76 +114,49 @@ module.exports = grammar({
|
||||
$._dedent
|
||||
),
|
||||
|
||||
// Commands - specific patterns
|
||||
command_statement: $ => choice(
|
||||
$.equip_command,
|
||||
$.activate_command,
|
||||
$.loadout_command,
|
||||
$.brew_command,
|
||||
$.disable_enable_command,
|
||||
$.play_command,
|
||||
$.print_command
|
||||
),
|
||||
|
||||
equip_command: $ => prec.left(seq(
|
||||
choice('equip', 'equipL', 'equipR'),
|
||||
repeat1($.item_criteria)
|
||||
)),
|
||||
|
||||
item_criteria: $ => prec.left(choice(
|
||||
// Commands - Generic structure to match tests
|
||||
// Must have at least one argument to distinguish from simple identifier expressions
|
||||
command: $ => prec.dynamic(1, prec.right(seq(
|
||||
$.identifier,
|
||||
repeat1($._command_arg)
|
||||
))),
|
||||
|
||||
_command_arg: $ => choice(
|
||||
$.identifier,
|
||||
$.number,
|
||||
$.string,
|
||||
$.star_level,
|
||||
$.enchantment_level
|
||||
)),
|
||||
),
|
||||
|
||||
star_level: $ => seq('*', $.number),
|
||||
|
||||
enchantment_level: $ => seq('+', $.number),
|
||||
|
||||
activate_command: $ => seq(
|
||||
'activate',
|
||||
choice(
|
||||
$.identifier,
|
||||
'P', 'L', 'R'
|
||||
)
|
||||
),
|
||||
|
||||
loadout_command: $ => seq(
|
||||
'loadout',
|
||||
$.number
|
||||
),
|
||||
|
||||
brew_command: $ => seq(
|
||||
'brew',
|
||||
$.identifier,
|
||||
repeat(seq('+', $.identifier))
|
||||
),
|
||||
|
||||
disable_enable_command: $ => prec.left(seq(
|
||||
choice('disable', 'enable'),
|
||||
choice(
|
||||
'abilities', 'hud', 'banner',
|
||||
'loadout', 'npcDialog', 'pause', 'player'
|
||||
)
|
||||
)),
|
||||
|
||||
play_command: $ => prec.left(seq(
|
||||
'play',
|
||||
$.identifier,
|
||||
optional($.number)
|
||||
)),
|
||||
|
||||
print_command: $ => prec.right(seq(
|
||||
choice('>', '>o', '>h', '>`', '>c', '>f'),
|
||||
repeat(choice(
|
||||
$.identifier,
|
||||
$.string,
|
||||
$.number,
|
||||
$.color_code,
|
||||
','
|
||||
))
|
||||
optional($.print_args)
|
||||
)),
|
||||
|
||||
// Print specific helpers
|
||||
print_args: $ => sep1(',', $.print_argument),
|
||||
|
||||
print_argument: $ => prec.left(repeat1(choice(
|
||||
$.interpolation,
|
||||
$.string,
|
||||
$.ascii_string,
|
||||
$.color_code,
|
||||
$.print_text
|
||||
))),
|
||||
|
||||
print_text: $ => /[^,@\r\n"]+/,
|
||||
|
||||
interpolation: $ => seq(
|
||||
'@',
|
||||
$._expression,
|
||||
'@'
|
||||
),
|
||||
|
||||
color_code: $ => /#[a-zA-Z0-9]+/,
|
||||
|
||||
// Expressions
|
||||
@@ -213,6 +169,7 @@ module.exports = grammar({
|
||||
$.string,
|
||||
$.boolean,
|
||||
$.null,
|
||||
$.ascii_string,
|
||||
$.array,
|
||||
$.member_expression,
|
||||
$.call_expression,
|
||||
@@ -222,7 +179,9 @@ module.exports = grammar({
|
||||
$.update_expression,
|
||||
$.assignment_expression,
|
||||
$.parenthesized_expression,
|
||||
$.new_expression
|
||||
$.new_statement,
|
||||
$.import_expression,
|
||||
$.color_code
|
||||
),
|
||||
|
||||
member_expression: $ => prec.left(15, seq(
|
||||
@@ -244,9 +203,9 @@ module.exports = grammar({
|
||||
|
||||
index_expression: $ => prec.left(13, seq(
|
||||
$._expression,
|
||||
'[',
|
||||
choice('[', '['),
|
||||
$._expression,
|
||||
']'
|
||||
choice(']', ']')
|
||||
)),
|
||||
|
||||
unary_expression: $ => prec.right(12, seq(
|
||||
@@ -256,19 +215,11 @@ module.exports = grammar({
|
||||
|
||||
// Binary operators with proper precedence
|
||||
binary_expression: $ => choice(
|
||||
prec.left(4, seq($._expression, '|', $._expression)),
|
||||
prec.left(5, seq($._expression, '&', $._expression)),
|
||||
prec.left(7, seq($._expression, '!', $._expression)),
|
||||
prec.left(7, seq($._expression, '=', $._expression)),
|
||||
prec.left(8, seq($._expression, '<', $._expression)),
|
||||
prec.left(8, seq($._expression, '>', $._expression)),
|
||||
prec.left(8, seq($._expression, '<=', $._expression)),
|
||||
prec.left(8, seq($._expression, '>=', $._expression)),
|
||||
prec.left(9, seq($._expression, '+', $._expression)),
|
||||
prec.left(9, seq($._expression, '-', $._expression)),
|
||||
prec.left(10, seq($._expression, '*', $._expression)),
|
||||
prec.left(10, seq($._expression, '/', $._expression)),
|
||||
prec.left(11, seq($._expression, '%', $._expression))
|
||||
prec.left(6, seq($._expression, choice('*', '/', '%'), $._expression)),
|
||||
prec.left(5, seq($._expression, choice('+', '-'), $._expression)),
|
||||
prec.left(4, seq($._expression, choice('=', '!=', '!', '<', '>', '<=', '>='), $._expression)),
|
||||
prec.left(3, seq($._expression, '&', $._expression)),
|
||||
prec.left(2, seq($._expression, '|', $._expression))
|
||||
),
|
||||
|
||||
update_expression: $ => choice(
|
||||
@@ -290,10 +241,23 @@ module.exports = grammar({
|
||||
|
||||
// Arrays
|
||||
array: $ => seq(
|
||||
'[',
|
||||
optional(sep1($.comma_sep, $._expression)),
|
||||
optional(','),
|
||||
']'
|
||||
choice('[', '['),
|
||||
repeat($._newline),
|
||||
optional(seq(
|
||||
$.array_elements,
|
||||
repeat($._newline)
|
||||
)),
|
||||
choice(']', ']')
|
||||
),
|
||||
|
||||
array_elements: $ => seq(
|
||||
$._expression,
|
||||
repeat(seq(
|
||||
',',
|
||||
repeat($._newline),
|
||||
$._expression
|
||||
)),
|
||||
optional(',')
|
||||
),
|
||||
|
||||
// Primitives
|
||||
@@ -303,21 +267,31 @@ module.exports = grammar({
|
||||
|
||||
float: $ => /\d+\.\d+/,
|
||||
|
||||
string: $ => seq('"', repeat(choice(/[^"\\]/, /\\./)), '"'),
|
||||
string: $ => choice(
|
||||
seq('"', repeat(choice(/[^"\\]/, /\\./)), '"'),
|
||||
seq('"', repeat(choice(/[^"\\]/, /\\./)), '"')
|
||||
),
|
||||
|
||||
boolean: $ => choice('true', 'false'),
|
||||
|
||||
null: $ => 'null'
|
||||
null: $ => 'null',
|
||||
|
||||
ascii_string: $ => seq('ascii', $.ascii_content)
|
||||
},
|
||||
|
||||
extras: $ => [
|
||||
/\s/
|
||||
/[ \t\r\f]/,
|
||||
/\r?\n[ \t]*\^/,
|
||||
$.comment,
|
||||
$.block_comment
|
||||
],
|
||||
|
||||
externals: $ => [
|
||||
$._newline,
|
||||
$._indent,
|
||||
$._dedent
|
||||
$._dedent,
|
||||
$.ascii_content,
|
||||
$.block_comment
|
||||
],
|
||||
|
||||
word: $ => $.identifier,
|
||||
@@ -325,10 +299,12 @@ module.exports = grammar({
|
||||
conflicts: $ => [
|
||||
[$.identifier, $.string],
|
||||
[$._expression],
|
||||
[$.command_statement],
|
||||
[$._statement, $._expression], // new_expression can be both
|
||||
[$.equip_command], // handle repeat ambiguity
|
||||
[$.binary_expression, $.assignment_expression] // = operator ambiguity
|
||||
[$.command],
|
||||
[$._statement, $._expression], // new_statement can be both
|
||||
[$.binary_expression, $.assignment_expression], // = operator ambiguity
|
||||
[$.command, $._expression], // * operator ambiguity
|
||||
[$.array_elements],
|
||||
[$.ascii_string]
|
||||
]
|
||||
});
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "tree-sitter-stonescript",
|
||||
"version": "0.0.1",
|
||||
"version": "0.1.0",
|
||||
"description": "StoneScript grammar for tree-sitter",
|
||||
"main": "bindings/node",
|
||||
"types": "bindings/node",
|
||||
|
||||
964
src/grammar.json
generated
964
src/grammar.json
generated
File diff suppressed because it is too large
Load Diff
707
src/node-types.json
generated
707
src/node-types.json
generated
File diff suppressed because it is too large
Load Diff
32878
src/parser.c
generated
32878
src/parser.c
generated
File diff suppressed because it is too large
Load Diff
160
src/scanner.c
160
src/scanner.c
@@ -6,8 +6,14 @@ enum TokenType {
|
||||
NEWLINE,
|
||||
INDENT,
|
||||
DEDENT,
|
||||
ASCII_CONTENT,
|
||||
BLOCK_COMMENT,
|
||||
};
|
||||
|
||||
// ... (skipping to logic)
|
||||
|
||||
|
||||
|
||||
typedef struct {
|
||||
uint16_t *indent_stack;
|
||||
size_t indent_stack_size;
|
||||
@@ -63,10 +69,15 @@ unsigned tree_sitter_stonescript_external_scanner_serialize(void *payload, char
|
||||
|
||||
void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
size_t size = 0;
|
||||
|
||||
scanner->indent_stack_size = 1;
|
||||
scanner->indent_stack[0] = 0;
|
||||
scanner->queued_tokens_size = 0;
|
||||
|
||||
if (length < sizeof(uint32_t)) return;
|
||||
uint32_t indent_stack_size = 0;
|
||||
|
||||
if (length == 0) return;
|
||||
|
||||
size_t i = 0;
|
||||
@@ -91,6 +102,90 @@ void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const c
|
||||
bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
|
||||
// Try to handle block comments when parser expects them
|
||||
// Only check if valid_symbols allows BLOCK_COMMENT
|
||||
if (valid_symbols[BLOCK_COMMENT] && lexer->lookahead == '/') {
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == '*') {
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
// Consume everything until */
|
||||
while (!lexer->eof(lexer)) {
|
||||
if (lexer->lookahead == '*') {
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == '/') {
|
||||
lexer->advance(lexer, false);
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = BLOCK_COMMENT;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
}
|
||||
// Reached EOF without closing */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_symbols[ASCII_CONTENT]) {
|
||||
bool has_content = false;
|
||||
|
||||
for (;;) {
|
||||
if (lexer->eof(lexer)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Check if we're at the start of a line with 'asciiend'
|
||||
if (lexer->lookahead == '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
lexer->mark_end(lexer);
|
||||
has_content = true;
|
||||
|
||||
// Skip whitespace at the start of the line
|
||||
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
// Check if this line starts with 'asciiend'
|
||||
if (lexer->lookahead == 'a') {
|
||||
const char *keyword = "asciiend";
|
||||
bool match = true;
|
||||
|
||||
for (int k = 0; k < 8; k++) {
|
||||
if (lexer->lookahead == keyword[k]) {
|
||||
lexer->advance(lexer, false);
|
||||
} else {
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check that asciiend is followed by whitespace or EOL or closing delimiters
|
||||
if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
|
||||
lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
|
||||
lexer->lookahead == ',' || lexer->lookahead == ')' ||
|
||||
lexer->lookahead == ']' || lexer->lookahead == 0xFF3D || // ] full-width
|
||||
lexer->eof(lexer))) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = ASCII_CONTENT;
|
||||
return has_content;
|
||||
}
|
||||
|
||||
// Failed to match asciiend, mark the current position
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
} else {
|
||||
lexer->advance(lexer, false);
|
||||
lexer->mark_end(lexer);
|
||||
has_content = true;
|
||||
}
|
||||
}
|
||||
|
||||
// If we reached EOF without finding asciiend, this is not valid ASCII content
|
||||
return false;
|
||||
}
|
||||
|
||||
if (scanner->queued_tokens_size > 0) {
|
||||
enum TokenType token = scanner->queued_tokens[0];
|
||||
for (size_t i = 1; i < scanner->queued_tokens_size; i++) {
|
||||
@@ -111,10 +206,17 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
||||
found_end_of_line = true;
|
||||
indent_length = 0;
|
||||
lexer->advance(lexer, false);
|
||||
// After consuming \n, only consume whitespace on the SAME logical line
|
||||
// Don't continue to next line
|
||||
break;
|
||||
} else if (lexer->lookahead == '\r') {
|
||||
// Consume \r as part of line ending (for CRLF), don't skip it
|
||||
lexer->advance(lexer, false);
|
||||
// Continue to potentially consume \n that follows \r
|
||||
} else if (lexer->lookahead == ' ') {
|
||||
indent_length++;
|
||||
lexer->advance(lexer, false);
|
||||
} else if (lexer->lookahead == '\r' || lexer->lookahead == '\f') {
|
||||
} else if (lexer->lookahead == '\f') {
|
||||
indent_length = 0;
|
||||
lexer->advance(lexer, false);
|
||||
} else if (lexer->lookahead == '\t') {
|
||||
@@ -128,6 +230,57 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
||||
}
|
||||
}
|
||||
|
||||
// After breaking from newline, consume leading whitespace/indentation
|
||||
if (found_end_of_line && !lexer->eof(lexer)) {
|
||||
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
|
||||
if (lexer->lookahead == ' ') {
|
||||
indent_length++;
|
||||
} else {
|
||||
indent_length += 8;
|
||||
}
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
// Skip comment-only lines when measuring indentation
|
||||
while (lexer->lookahead == '/' && !lexer->eof(lexer)) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
// Check if this is a comment
|
||||
if (lexer->lookahead == '/') {
|
||||
// Skip the rest of the comment line
|
||||
while (lexer->lookahead != '\n' && lexer->lookahead != '\r' && !lexer->eof(lexer)) {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
// Skip newline
|
||||
if (lexer->lookahead == '\r') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
if (lexer->lookahead == '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
// Measure indentation of next line
|
||||
indent_length = 0;
|
||||
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
|
||||
if (lexer->lookahead == ' ') {
|
||||
indent_length++;
|
||||
} else {
|
||||
indent_length += 8;
|
||||
}
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
} else {
|
||||
// Not a comment, break
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
if (found_end_of_line) {
|
||||
uint16_t current_indent = scanner->indent_stack[scanner->indent_stack_size - 1];
|
||||
|
||||
@@ -141,7 +294,9 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
||||
return true;
|
||||
}
|
||||
|
||||
if (valid_symbols[DEDENT] && indent_length < current_indent && scanner->indent_stack_size > 1) {
|
||||
|
||||
|
||||
if (valid_symbols[DEDENT] && (indent_length < current_indent || (lexer->eof(lexer) && current_indent == 0)) && scanner->indent_stack_size > 1) {
|
||||
scanner->indent_stack_size--;
|
||||
|
||||
while (scanner->indent_stack_size > 1 &&
|
||||
@@ -157,6 +312,7 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
||||
}
|
||||
|
||||
if (valid_symbols[NEWLINE] && !lexer->eof(lexer)) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = NEWLINE;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -13,17 +13,12 @@ extern "C" {
|
||||
#define ts_builtin_sym_end 0
|
||||
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
|
||||
|
||||
#ifndef TREE_SITTER_API_H_
|
||||
typedef uint16_t TSStateId;
|
||||
|
||||
#ifndef TREE_SITTER_API_H_
|
||||
typedef uint16_t TSSymbol;
|
||||
typedef uint16_t TSFieldId;
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
typedef struct TSLanguageMetadata TSLanguageMetadata;
|
||||
typedef struct TSLanguageMetadata {
|
||||
uint8_t major_version;
|
||||
uint8_t minor_version;
|
||||
uint8_t patch_version;
|
||||
} TSLanguageMetadata;
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
@@ -32,11 +27,10 @@ typedef struct {
|
||||
bool inherited;
|
||||
} TSFieldMapEntry;
|
||||
|
||||
// Used to index the field and supertype maps.
|
||||
typedef struct {
|
||||
uint16_t index;
|
||||
uint16_t length;
|
||||
} TSMapSlice;
|
||||
} TSFieldMapSlice;
|
||||
|
||||
typedef struct {
|
||||
bool visible;
|
||||
@@ -54,7 +48,6 @@ struct TSLexer {
|
||||
uint32_t (*get_column)(TSLexer *);
|
||||
bool (*is_at_included_range_start)(const TSLexer *);
|
||||
bool (*eof)(const TSLexer *);
|
||||
void (*log)(const TSLexer *, const char *, ...);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
@@ -86,12 +79,6 @@ typedef struct {
|
||||
uint16_t external_lex_state;
|
||||
} TSLexMode;
|
||||
|
||||
typedef struct {
|
||||
uint16_t lex_state;
|
||||
uint16_t external_lex_state;
|
||||
uint16_t reserved_word_set_id;
|
||||
} TSLexerMode;
|
||||
|
||||
typedef union {
|
||||
TSParseAction action;
|
||||
struct {
|
||||
@@ -100,13 +87,8 @@ typedef union {
|
||||
} entry;
|
||||
} TSParseActionEntry;
|
||||
|
||||
typedef struct {
|
||||
int32_t start;
|
||||
int32_t end;
|
||||
} TSCharacterRange;
|
||||
|
||||
struct TSLanguage {
|
||||
uint32_t abi_version;
|
||||
uint32_t version;
|
||||
uint32_t symbol_count;
|
||||
uint32_t alias_count;
|
||||
uint32_t token_count;
|
||||
@@ -122,13 +104,13 @@ struct TSLanguage {
|
||||
const TSParseActionEntry *parse_actions;
|
||||
const char * const *symbol_names;
|
||||
const char * const *field_names;
|
||||
const TSMapSlice *field_map_slices;
|
||||
const TSFieldMapSlice *field_map_slices;
|
||||
const TSFieldMapEntry *field_map_entries;
|
||||
const TSSymbolMetadata *symbol_metadata;
|
||||
const TSSymbol *public_symbol_map;
|
||||
const uint16_t *alias_map;
|
||||
const TSSymbol *alias_sequences;
|
||||
const TSLexerMode *lex_modes;
|
||||
const TSLexMode *lex_modes;
|
||||
bool (*lex_fn)(TSLexer *, TSStateId);
|
||||
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
|
||||
TSSymbol keyword_capture_token;
|
||||
@@ -142,48 +124,15 @@ struct TSLanguage {
|
||||
void (*deserialize)(void *, const char *, unsigned);
|
||||
} external_scanner;
|
||||
const TSStateId *primary_state_ids;
|
||||
const char *name;
|
||||
const TSSymbol *reserved_words;
|
||||
uint16_t max_reserved_word_set_size;
|
||||
uint32_t supertype_count;
|
||||
const TSSymbol *supertype_symbols;
|
||||
const TSMapSlice *supertype_map_slices;
|
||||
const TSSymbol *supertype_map_entries;
|
||||
TSLanguageMetadata metadata;
|
||||
};
|
||||
|
||||
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
|
||||
uint32_t index = 0;
|
||||
uint32_t size = len - index;
|
||||
while (size > 1) {
|
||||
uint32_t half_size = size / 2;
|
||||
uint32_t mid_index = index + half_size;
|
||||
const TSCharacterRange *range = &ranges[mid_index];
|
||||
if (lookahead >= range->start && lookahead <= range->end) {
|
||||
return true;
|
||||
} else if (lookahead > range->end) {
|
||||
index = mid_index;
|
||||
}
|
||||
size -= half_size;
|
||||
}
|
||||
const TSCharacterRange *range = &ranges[index];
|
||||
return (lookahead >= range->start && lookahead <= range->end);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lexer Macros
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define UNUSED __pragma(warning(suppress : 4101))
|
||||
#else
|
||||
#define UNUSED __attribute__((unused))
|
||||
#endif
|
||||
|
||||
#define START_LEXER() \
|
||||
bool result = false; \
|
||||
bool skip = false; \
|
||||
UNUSED \
|
||||
bool eof = false; \
|
||||
int32_t lookahead; \
|
||||
goto start; \
|
||||
@@ -199,17 +148,6 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ADVANCE_MAP(...) \
|
||||
{ \
|
||||
static const uint16_t map[] = { __VA_ARGS__ }; \
|
||||
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
|
||||
if (map[i] == lookahead) { \
|
||||
state = map[i + 1]; \
|
||||
goto next_state; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define SKIP(state_value) \
|
||||
{ \
|
||||
skip = true; \
|
||||
@@ -228,7 +166,7 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
|
||||
* Parse Table Macros
|
||||
*/
|
||||
|
||||
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
|
||||
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
|
||||
|
||||
#define STATE(id) id
|
||||
|
||||
@@ -238,7 +176,7 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.state = (state_value) \
|
||||
.state = state_value \
|
||||
} \
|
||||
}}
|
||||
|
||||
@@ -246,7 +184,7 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.state = (state_value), \
|
||||
.state = state_value, \
|
||||
.repetition = true \
|
||||
} \
|
||||
}}
|
||||
@@ -259,14 +197,13 @@ static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, in
|
||||
} \
|
||||
}}
|
||||
|
||||
#define REDUCE(symbol_name, children, precedence, prod_id) \
|
||||
#define REDUCE(symbol_val, child_count_val, ...) \
|
||||
{{ \
|
||||
.reduce = { \
|
||||
.type = TSParseActionTypeReduce, \
|
||||
.symbol = symbol_name, \
|
||||
.child_count = children, \
|
||||
.dynamic_precedence = precedence, \
|
||||
.production_id = prod_id \
|
||||
.symbol = symbol_val, \
|
||||
.child_count = child_count_val, \
|
||||
__VA_ARGS__ \
|
||||
}, \
|
||||
}}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user