#include #include #include enum TokenType { NEWLINE, INDENT, DEDENT, ASCII_CONTENT, BLOCK_COMMENT, }; // ... (skipping to logic) typedef struct { uint16_t *indent_stack; size_t indent_stack_size; size_t indent_stack_capacity; enum TokenType *queued_tokens; size_t queued_tokens_size; size_t queued_tokens_capacity; } Scanner; static void scanner_init(Scanner *scanner) { scanner->indent_stack_capacity = 16; scanner->indent_stack_size = 1; scanner->indent_stack = calloc(scanner->indent_stack_capacity, sizeof(uint16_t)); scanner->indent_stack[0] = 0; scanner->queued_tokens_capacity = 16; scanner->queued_tokens_size = 0; scanner->queued_tokens = calloc(scanner->queued_tokens_capacity, sizeof(enum TokenType)); } void *tree_sitter_stonescript_external_scanner_create() { Scanner *scanner = calloc(1, sizeof(Scanner)); scanner_init(scanner); return scanner; } void tree_sitter_stonescript_external_scanner_destroy(void *payload) { Scanner *scanner = (Scanner *)payload; free(scanner->indent_stack); free(scanner->queued_tokens); free(scanner); } unsigned tree_sitter_stonescript_external_scanner_serialize(void *payload, char *buffer) { Scanner *scanner = (Scanner *)payload; size_t i = 0; if (i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { buffer[i++] = scanner->queued_tokens_size; } for (size_t j = 0; j < scanner->queued_tokens_size && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; j++) { buffer[i++] = scanner->queued_tokens[j]; } for (size_t j = 0; j < scanner->indent_stack_size && i + 2 < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; j++) { buffer[i++] = scanner->indent_stack[j] >> 8; buffer[i++] = scanner->indent_stack[j] & 0xFF; } return i; } void tree_sitter_stonescript_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) { Scanner *scanner = (Scanner *)payload; size_t size = 0; scanner->indent_stack_size = 1; scanner->indent_stack[0] = 0; scanner->queued_tokens_size = 0; if (length < sizeof(uint32_t)) return; uint32_t indent_stack_size = 0; if (length == 0) return; size_t i = 0; if (i < length) { size_t queued_count = (uint8_t)buffer[i++]; for (size_t j = 0; j < queued_count && i < length; j++) { if (scanner->queued_tokens_size < scanner->queued_tokens_capacity) { scanner->queued_tokens[scanner->queued_tokens_size++] = (enum TokenType)buffer[i++]; } } } while (i + 1 < length) { uint16_t indent = ((uint8_t)buffer[i] << 8) | (uint8_t)buffer[i + 1]; if (scanner->indent_stack_size < scanner->indent_stack_capacity) { scanner->indent_stack[scanner->indent_stack_size++] = indent; } i += 2; } } bool tree_sitter_stonescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { Scanner *scanner = (Scanner *)payload; // Try to handle block comments whenever we see /* // This needs to run early before other checks if (lexer->lookahead == '/') { lexer->mark_end(lexer); lexer->advance(lexer, false); if (lexer->lookahead == '*') { lexer->advance(lexer, false); // Consume everything until */ while (!lexer->eof(lexer)) { if (lexer->lookahead == '*') { lexer->advance(lexer, false); if (lexer->lookahead == '/') { lexer->advance(lexer, false); lexer->mark_end(lexer); lexer->result_symbol = BLOCK_COMMENT; return true; } } else { lexer->advance(lexer, false); } } // Reached EOF without closing */ return false; } } if (valid_symbols[ASCII_CONTENT]) { bool has_content = false; for (;;) { if (lexer->eof(lexer)) { break; } // Check if we're at the start of a line with 'asciiend' if (lexer->lookahead == '\n') { lexer->advance(lexer, false); lexer->mark_end(lexer); has_content = true; // Skip whitespace at the start of the line while (lexer->lookahead == ' ' || lexer->lookahead == '\t') { lexer->advance(lexer, false); } // Check if this line starts with 'asciiend' if (lexer->lookahead == 'a') { const char *keyword = "asciiend"; bool match = true; for (int k = 0; k < 8; k++) { if (lexer->lookahead == keyword[k]) { lexer->advance(lexer, false); } else { match = false; break; } } // Check that asciiend is followed by whitespace or EOL or closing delimiters if (match && (lexer->lookahead == '\n' || lexer->lookahead == '\r' || lexer->lookahead == ' ' || lexer->lookahead == '\t' || lexer->lookahead == ',' || lexer->lookahead == ')' || lexer->lookahead == ']' || lexer->lookahead == 0xFF3D || // ] full-width lexer->eof(lexer))) { lexer->mark_end(lexer); lexer->result_symbol = ASCII_CONTENT; return has_content; } // Failed to match asciiend, mark the current position lexer->mark_end(lexer); } } else { lexer->advance(lexer, false); lexer->mark_end(lexer); has_content = true; } } // If we reached EOF without finding asciiend, this is not valid ASCII content return false; } if (scanner->queued_tokens_size > 0) { enum TokenType token = scanner->queued_tokens[0]; for (size_t i = 1; i < scanner->queued_tokens_size; i++) { scanner->queued_tokens[i - 1] = scanner->queued_tokens[i]; } scanner->queued_tokens_size--; lexer->result_symbol = token; return true; } bool found_end_of_line = false; uint32_t indent_length = 0; int32_t first_comment_indent = -1; for (;;) { if (lexer->lookahead == '\n') { found_end_of_line = true; indent_length = 0; lexer->advance(lexer, false); // After consuming \n, only consume whitespace on the SAME logical line // Don't continue to next line break; } else if (lexer->lookahead == '\r') { // Consume \r as part of line ending (for CRLF), don't skip it lexer->advance(lexer, false); // Continue to potentially consume \n that follows \r } else if (lexer->lookahead == ' ') { indent_length++; lexer->advance(lexer, false); } else if (lexer->lookahead == '\f') { indent_length = 0; lexer->advance(lexer, false); } else if (lexer->lookahead == '\t') { indent_length += 8; lexer->advance(lexer, false); } else if (lexer->eof(lexer)) { found_end_of_line = true; break; } else { break; } } // After breaking from newline, consume leading whitespace/indentation if (found_end_of_line && !lexer->eof(lexer)) { while (lexer->lookahead == ' ' || lexer->lookahead == '\t') { if (lexer->lookahead == ' ') { indent_length++; } else { indent_length += 8; } lexer->advance(lexer, false); } // Skip comment-only lines when measuring indentation while (lexer->lookahead == '/' && !lexer->eof(lexer)) { lexer->mark_end(lexer); lexer->advance(lexer, false); // Check if this is a comment if (lexer->lookahead == '/') { // Skip the rest of the comment line while (lexer->lookahead != '\n' && lexer->lookahead != '\r' && !lexer->eof(lexer)) { lexer->advance(lexer, false); } // Skip newline if (lexer->lookahead == '\r') { lexer->advance(lexer, false); } if (lexer->lookahead == '\n') { lexer->advance(lexer, false); } // Measure indentation of next line indent_length = 0; while (lexer->lookahead == ' ' || lexer->lookahead == '\t') { if (lexer->lookahead == ' ') { indent_length++; } else { indent_length += 8; } lexer->advance(lexer, false); } } else { // Not a comment, break break; } } } if (found_end_of_line) { uint16_t current_indent = scanner->indent_stack[scanner->indent_stack_size - 1]; if (valid_symbols[INDENT] && indent_length > current_indent) { if (scanner->indent_stack_size >= scanner->indent_stack_capacity) { scanner->indent_stack_capacity *= 2; scanner->indent_stack = realloc(scanner->indent_stack, scanner->indent_stack_capacity * sizeof(uint16_t)); } scanner->indent_stack[scanner->indent_stack_size++] = indent_length; lexer->result_symbol = INDENT; return true; } if (valid_symbols[DEDENT] && (indent_length < current_indent || (lexer->eof(lexer) && current_indent == 0)) && scanner->indent_stack_size > 1) { scanner->indent_stack_size--; while (scanner->indent_stack_size > 1 && indent_length < scanner->indent_stack[scanner->indent_stack_size - 1]) { scanner->indent_stack_size--; if (scanner->queued_tokens_size < scanner->queued_tokens_capacity) { scanner->queued_tokens[scanner->queued_tokens_size++] = DEDENT; } } lexer->result_symbol = DEDENT; return true; } if (valid_symbols[NEWLINE] && !lexer->eof(lexer)) { lexer->mark_end(lexer); lexer->result_symbol = NEWLINE; return true; } } return false; }