feat: Major grammar improvements and refactoring

- Refactor statement parsing with proper precedence handling
- Improve block structure parsing with indent/dedent support
- Enhance control flow parsing (conditionals, loops)
- Add print command support
- Improve function declaration parsing
- Update scanner for better string and comment handling
- Add comprehensive test corpus
- Better handling of newlines and statement boundaries
This commit is contained in:
2025-11-26 22:19:38 +01:00
parent b7942e9f79
commit 4d61f91e06
9 changed files with 15323 additions and 18695 deletions

View File

@@ -6,8 +6,13 @@ enum TokenType {
NEWLINE,
INDENT,
DEDENT,
// ASCII_CONTENT,
};
// ... (skipping to logic)
typedef struct {
uint16_t *indent_stack;
size_t indent_stack_size;
@@ -63,10 +68,15 @@ unsigned tree_sitter_stonescript_external_scanner_serialize(void *payload, char
void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = (Scanner *)payload;
size_t size = 0;
scanner->indent_stack_size = 1;
scanner->indent_stack[0] = 0;
scanner->queued_tokens_size = 0;
if (length < sizeof(uint32_t)) return;
uint32_t indent_stack_size = 0;
if (length == 0) return;
size_t i = 0;
@@ -91,6 +101,65 @@ void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const c
bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload;
// if (valid_symbols[ASCII_CONTENT]) {
// bool has_content = false;
//
// for (;;) {
// if (lexer->eof(lexer)) {
// break;
// }
//
// // Check if we're at the start of a line with 'asciiend'
// if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
// lexer->advance(lexer, false);
// if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
// lexer->advance(lexer, false);
// }
// lexer->mark_end(lexer);
// has_content = true;
//
// // Skip whitespace at the start of the line
// while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
// lexer->advance(lexer, false);
// }
//
// // Check if this line starts with 'asciiend'
// if (lexer->lookahead == 'a') {
// const char *keyword = "asciiend";
// bool match = true;
//
// for (int k = 0; k < 8; k++) {
// if (lexer->lookahead == keyword[k]) {
// lexer->advance(lexer, false);
// } else {
// match = false;
// break;
// }
// }
//
// // Check that asciiend is followed by whitespace or EOL
// if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
// lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
// lexer->lookahead == ',' ||
// lexer->eof(lexer))) {
// lexer->result_symbol = ASCII_CONTENT;
// return has_content;
// }
//
// // Failed to match asciiend, mark the current position
// lexer->mark_end(lexer);
// }
// } else {
// lexer->advance(lexer, false);
// lexer->mark_end(lexer);
// has_content = true;
// }
// }
//
// lexer->result_symbol = ASCII_CONTENT;
// return has_content;
// }
if (scanner->queued_tokens_size > 0) {
enum TokenType token = scanner->queued_tokens[0];
for (size_t i = 1; i < scanner->queued_tokens_size; i++) {
@@ -128,6 +197,8 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
}
}
if (found_end_of_line) {
uint16_t current_indent = scanner->indent_stack[scanner->indent_stack_size - 1];
@@ -141,7 +212,9 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
return true;
}
if (valid_symbols[DEDENT] && indent_length < current_indent && scanner->indent_stack_size > 1) {
if (valid_symbols[DEDENT] && (indent_length < current_indent || (lexer->eof(lexer) && current_indent == 0)) && scanner->indent_stack_size > 1) {
scanner->indent_stack_size--;
while (scanner->indent_stack_size > 1 &&
@@ -157,6 +230,7 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
}
if (valid_symbols[NEWLINE] && !lexer->eof(lexer)) {
lexer->mark_end(lexer);
lexer->result_symbol = NEWLINE;
return true;
}