feat: Major grammar improvements and refactoring
- Refactor statement parsing with proper precedence handling - Improve block structure parsing with indent/dedent support - Enhance control flow parsing (conditionals, loops) - Add print command support - Improve function declaration parsing - Update scanner for better string and comment handling - Add comprehensive test corpus - Better handling of newlines and statement boundaries
This commit is contained in:
@@ -6,8 +6,13 @@ enum TokenType {
|
||||
NEWLINE,
|
||||
INDENT,
|
||||
DEDENT,
|
||||
// ASCII_CONTENT,
|
||||
};
|
||||
|
||||
// ... (skipping to logic)
|
||||
|
||||
|
||||
|
||||
typedef struct {
|
||||
uint16_t *indent_stack;
|
||||
size_t indent_stack_size;
|
||||
@@ -63,10 +68,15 @@ unsigned tree_sitter_stonescript_external_scanner_serialize(void *payload, char
|
||||
|
||||
void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
size_t size = 0;
|
||||
|
||||
scanner->indent_stack_size = 1;
|
||||
scanner->indent_stack[0] = 0;
|
||||
scanner->queued_tokens_size = 0;
|
||||
|
||||
if (length < sizeof(uint32_t)) return;
|
||||
uint32_t indent_stack_size = 0;
|
||||
|
||||
if (length == 0) return;
|
||||
|
||||
size_t i = 0;
|
||||
@@ -91,6 +101,65 @@ void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const c
|
||||
bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
|
||||
// if (valid_symbols[ASCII_CONTENT]) {
|
||||
// bool has_content = false;
|
||||
//
|
||||
// for (;;) {
|
||||
// if (lexer->eof(lexer)) {
|
||||
// break;
|
||||
// }
|
||||
//
|
||||
// // Check if we're at the start of a line with 'asciiend'
|
||||
// if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
|
||||
// lexer->advance(lexer, false);
|
||||
// if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
|
||||
// lexer->advance(lexer, false);
|
||||
// }
|
||||
// lexer->mark_end(lexer);
|
||||
// has_content = true;
|
||||
//
|
||||
// // Skip whitespace at the start of the line
|
||||
// while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
|
||||
// lexer->advance(lexer, false);
|
||||
// }
|
||||
//
|
||||
// // Check if this line starts with 'asciiend'
|
||||
// if (lexer->lookahead == 'a') {
|
||||
// const char *keyword = "asciiend";
|
||||
// bool match = true;
|
||||
//
|
||||
// for (int k = 0; k < 8; k++) {
|
||||
// if (lexer->lookahead == keyword[k]) {
|
||||
// lexer->advance(lexer, false);
|
||||
// } else {
|
||||
// match = false;
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // Check that asciiend is followed by whitespace or EOL
|
||||
// if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
|
||||
// lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
|
||||
// lexer->lookahead == ',' ||
|
||||
// lexer->eof(lexer))) {
|
||||
// lexer->result_symbol = ASCII_CONTENT;
|
||||
// return has_content;
|
||||
// }
|
||||
//
|
||||
// // Failed to match asciiend, mark the current position
|
||||
// lexer->mark_end(lexer);
|
||||
// }
|
||||
// } else {
|
||||
// lexer->advance(lexer, false);
|
||||
// lexer->mark_end(lexer);
|
||||
// has_content = true;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// lexer->result_symbol = ASCII_CONTENT;
|
||||
// return has_content;
|
||||
// }
|
||||
|
||||
if (scanner->queued_tokens_size > 0) {
|
||||
enum TokenType token = scanner->queued_tokens[0];
|
||||
for (size_t i = 1; i < scanner->queued_tokens_size; i++) {
|
||||
@@ -128,6 +197,8 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (found_end_of_line) {
|
||||
uint16_t current_indent = scanner->indent_stack[scanner->indent_stack_size - 1];
|
||||
|
||||
@@ -141,7 +212,9 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
||||
return true;
|
||||
}
|
||||
|
||||
if (valid_symbols[DEDENT] && indent_length < current_indent && scanner->indent_stack_size > 1) {
|
||||
|
||||
|
||||
if (valid_symbols[DEDENT] && (indent_length < current_indent || (lexer->eof(lexer) && current_indent == 0)) && scanner->indent_stack_size > 1) {
|
||||
scanner->indent_stack_size--;
|
||||
|
||||
while (scanner->indent_stack_size > 1 &&
|
||||
@@ -157,6 +230,7 @@ bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer
|
||||
}
|
||||
|
||||
if (valid_symbols[NEWLINE] && !lexer->eof(lexer)) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = NEWLINE;
|
||||
return true;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user