Word Unperfect
public
Read
Owner: themaster
Branch: main
Commits: 0
Git CLI clone URL
git clone https://www.xt-emporium.com/git/word-unperfect.git
Fullscreen desktop URL
Code
Commits
History
Branches
Bug Reports
Discussions
Compare
Settings
word-unperfect
/
rev
/
wp_document_model.c
File editor
#include "wp_document_model.h" #include "wp_resource_manager.h" #include <stdint.h> #include <stdlib.h> #include <string.h> static void wp_document_token_clear(WpDocumentToken *token) { if (token == 0) { return; } free(token->bytes); memset(token, 0, sizeof(*token)); } static bool wp_document_model_reserve(WpDocumentModel *model, size_t capacity) { WpDocumentToken *tokens; size_t new_capacity; if (model == 0) { return false; } if (capacity <= model->token_capacity) { return true; } new_capacity = model->token_capacity == 0U ? 16U : model->token_capacity; while (new_capacity < capacity) { if (new_capacity > (SIZE_MAX / 2U)) { return false; } new_capacity *= 2U; } tokens = (WpDocumentToken *)realloc(model->tokens, new_capacity * sizeof(*tokens)); if (tokens == 0) { return false; } memset(tokens + model->token_capacity, 0, (new_capacity - model->token_capacity) * sizeof(*tokens)); model->tokens = tokens; model->token_capacity = new_capacity; return true; } static bool wp_document_token_set_bytes(WpDocumentToken *token, WpDocumentTokenType type, WpCodeType record_type, uint8_t code, uint8_t sub_code, const uint8_t *bytes, size_t length, bool complete, bool trailer_matches) { uint8_t *copy = 0; if (token == 0 || (length != 0U && bytes == 0)) { return false; } if (length != 0U) { copy = (uint8_t *)malloc(length); if (copy == 0) { return false; } memcpy(copy, bytes, length); } memset(token, 0, sizeof(*token)); token->type = type; token->record_type = record_type; token->code = code; token->sub_code = sub_code; token->bytes = copy; token->length = length; token->complete = complete; token->trailer_matches = trailer_matches; return true; } static bool wp_document_model_append_token(WpDocumentModel *model, WpDocumentTokenType type, WpCodeType record_type, uint8_t code, uint8_t sub_code, const uint8_t *bytes, size_t length, bool complete, bool trailer_matches) { WpDocumentToken *last; uint8_t *merged; if (model == 0 || (length != 0U && bytes == 0)) { return false; } if (type == WP_DOCUMENT_TOKEN_TEXT && model->token_count != 0U) { last = &model->tokens[model->token_count - 1U]; if (last->type == WP_DOCUMENT_TOKEN_TEXT) { if (length > SIZE_MAX - last->length) { return false; } merged = (uint8_t *)realloc(last->bytes, last->length + length); if (merged == 0) { return false; } memcpy(merged + last->length, bytes, length); last->bytes = merged; last->length += length; model->body_size += length; return true; } } if (!wp_document_model_reserve(model, model->token_count + 1U)) { return false; } if (!wp_document_token_set_bytes(&model->tokens[model->token_count], type, record_type, code, sub_code, bytes, length, complete, trailer_matches)) { return false; } model->token_count++; model->body_size += length; return true; } static void wp_document_model_remove_token(WpDocumentModel *model, size_t index) { if (model == 0 || index >= model->token_count) { return; } wp_document_token_clear(&model->tokens[index]); if (index + 1U < model->token_count) { memmove(&model->tokens[index], &model->tokens[index + 1U], (model->token_count - index - 1U) * sizeof(model->tokens[0])); } model->token_count--; memset(&model->tokens[model->token_count], 0, sizeof(model->tokens[0])); } static bool wp_document_model_locate(const WpDocumentModel *model, size_t byte_offset, size_t *out_index, size_t *out_inner) { size_t i; size_t pos = 0U; size_t next; if (model == 0 || out_index == 0 || out_inner == 0 || byte_offset > model->body_size) { return false; } for (i = 0U; i < model->token_count; ++i) { if (model->tokens[i].length > SIZE_MAX - pos) { return false; } next = pos + model->tokens[i].length; if (byte_offset < next) { *out_index = i; *out_inner = byte_offset - pos; return true; } if (byte_offset == next) { *out_index = i + 1U; *out_inner = 0U; return true; } pos = next; } *out_index = model->token_count; *out_inner = 0U; return true; } static bool wp_document_ascii_is_valid(const char *text, size_t text_len) { size_t i; if (text_len != 0U && text == 0) { return false; } for (i = 0U; i < text_len; ++i) { unsigned char ch = (unsigned char)text[i]; if (ch == 0U || ch >= 0x80U) { return false; } } return true; } static bool wp_document_utf8_next(const char *text, size_t text_len, size_t *offset, uint32_t *out_codepoint) { const unsigned char *bytes; unsigned char b0; uint32_t cp; size_t i; size_t need; if (text == 0 || offset == 0 || out_codepoint == 0 || *offset >= text_len) { return false; } bytes = (const unsigned char *)text; b0 = bytes[*offset]; if (b0 < 0x80U) { *out_codepoint = (uint32_t)b0; (*offset)++; return true; } if (b0 >= 0xC2U && b0 <= 0xDFU) { cp = (uint32_t)(b0 & 0x1FU); need = 1U; } else if (b0 >= 0xE0U && b0 <= 0xEFU) { cp = (uint32_t)(b0 & 0x0FU); need = 2U; } else if (b0 >= 0xF0U && b0 <= 0xF4U) { cp = (uint32_t)(b0 & 0x07U); need = 3U; } else { return false; } if (*offset + need >= text_len) { return false; } for (i = 1U; i <= need; ++i) { unsigned char bx = bytes[*offset + i]; if ((bx & 0xC0U) != 0x80U) { return false; } cp = (cp << 6U) | (uint32_t)(bx & 0x3FU); } if ((need == 2U && cp < 0x800U) || (need == 3U && cp < 0x10000U) || (cp >= 0xD800U && cp <= 0xDFFFU) || cp > 0x10FFFFU) { return false; } *offset += need + 1U; *out_codepoint = cp; return true; } static bool wp_document_host_text_is_valid(const char *text, size_t text_len) { size_t offset = 0U; if (text_len != 0U && text == 0) { return false; } while (offset < text_len) { uint8_t set = 0U; uint8_t index = 0U; uint32_t cp; if (!wp_document_utf8_next(text, text_len, &offset, &cp)) { return false; } if (cp == '\t' || cp == '\n' || cp == '\r' || cp == '\f') { continue; } if (cp >= 0x20U && cp < 0x80U) { continue; } if (!wp_unicode_to_char(cp, &set, &index) || set == 0U) { (void)index; return false; } } return true; } static bool wp_document_host_text_stream_size(const char *text, size_t text_len, size_t *out_size) { size_t offset = 0U; size_t total = 0U; if (out_size == 0 || !wp_document_host_text_is_valid(text, text_len)) { return false; } while (offset < text_len) { uint8_t set = 0U; uint8_t index = 0U; uint32_t cp; size_t bytes; if (!wp_document_utf8_next(text, text_len, &offset, &cp)) { return false; } if (cp == '\r' && offset < text_len && text[offset] == '\n') { offset++; } if (cp >= 0x20U && cp < 0x80U) { bytes = 1U; } else if (cp == '\t' || cp == '\n' || cp == '\r' || cp == '\f') { bytes = 1U; } else if (wp_unicode_to_char(cp, &set, &index) && set != 0U) { (void)index; bytes = 4U; } else { return false; } if (bytes > SIZE_MAX - total) { return false; } total += bytes; } *out_size = total; return true; } static bool wp_document_code_is_logical_text(uint8_t code) { return code == 0x80U || code == 0x81U || code == 0x82U || code == 0x83U || code == 0x84U || code == 0x8DU; } static size_t wp_document_token_text_units(const WpDocumentToken *token) { if (token == 0) { return 0U; } if (token->type == WP_DOCUMENT_TOKEN_TEXT) { return token->length; } if (token->type == WP_DOCUMENT_TOKEN_SINGLE_BYTE && token->length == 1U && wp_document_code_is_logical_text(token->code)) { return 1U; } if (token->type == WP_DOCUMENT_TOKEN_FIXED_RECORD && token->record_type == WP_CODE_FIXED_LENGTH && token->code == 0xC0U && token->complete) { return 1U; } return 0U; } static bool wp_document_token_can_delete_semantic(const WpDocumentToken *token) { if (token == 0) { return false; } if (token->type == WP_DOCUMENT_TOKEN_TEXT) { return true; } return wp_document_token_text_units(token) != 0U; } static bool wp_document_insert_text_token(WpDocumentModel *model, size_t index, const uint8_t *bytes, size_t length) { if (model == 0 || index > model->token_count) { return false; } if (!wp_document_model_reserve(model, model->token_count + 1U)) { return false; } if (index < model->token_count) { memmove(&model->tokens[index + 1U], &model->tokens[index], (model->token_count - index) * sizeof(model->tokens[0])); } memset(&model->tokens[index], 0, sizeof(model->tokens[0])); if (!wp_document_token_set_bytes(&model->tokens[index], WP_DOCUMENT_TOKEN_TEXT, WP_CODE_CHAR, 0U, 0U, bytes, length, true, false)) { if (index + 1U < model->token_capacity) { memmove(&model->tokens[index], &model->tokens[index + 1U], (model->token_count - index) * sizeof(model->tokens[0])); memset(&model->tokens[model->token_count], 0, sizeof(model->tokens[0])); } return false; } model->token_count++; model->body_size += length; return true; } static bool wp_document_insert_single_byte_token(WpDocumentModel *model, size_t index, uint8_t code) { if (model == 0 || index > model->token_count) { return false; } if (!wp_document_model_reserve(model, model->token_count + 1U)) { return false; } if (index < model->token_count) { memmove(&model->tokens[index + 1U], &model->tokens[index], (model->token_count - index) * sizeof(model->tokens[0])); } memset(&model->tokens[index], 0, sizeof(model->tokens[0])); if (!wp_document_token_set_bytes(&model->tokens[index], WP_DOCUMENT_TOKEN_SINGLE_BYTE, WP_CODE_SINGLE_BYTE, code, 0U, &code, 1U, true, false)) { if (index + 1U < model->token_capacity) { memmove(&model->tokens[index], &model->tokens[index + 1U], (model->token_count - index) * sizeof(model->tokens[0])); memset(&model->tokens[model->token_count], 0, sizeof(model->tokens[0])); } return false; } model->token_count++; model->body_size++; return true; } static bool wp_document_split_and_insert_text(WpDocumentModel *model, size_t index, size_t inner, const uint8_t *bytes, size_t length) { WpDocumentToken *token; uint8_t *insert_bytes; uint8_t *right_bytes; size_t right_len; size_t move_count; if (model == 0 || index >= model->token_count) { return false; } token = &model->tokens[index]; if (token->type != WP_DOCUMENT_TOKEN_TEXT || inner == 0U || inner >= token->length) { return false; } insert_bytes = (uint8_t *)malloc(length); if (insert_bytes == 0) { return false; } memcpy(insert_bytes, bytes, length); right_len = token->length - inner; right_bytes = (uint8_t *)malloc(right_len); if (right_bytes == 0) { free(insert_bytes); return false; } memcpy(right_bytes, token->bytes + inner, right_len); if (!wp_document_model_reserve(model, model->token_count + 2U)) { free(insert_bytes); free(right_bytes); return false; } token = &model->tokens[index]; move_count = model->token_count - index - 1U; if (move_count != 0U) { memmove(&model->tokens[index + 3U], &model->tokens[index + 1U], move_count * sizeof(model->tokens[0])); } memset(&model->tokens[index + 1U], 0, sizeof(model->tokens[0])); memset(&model->tokens[index + 2U], 0, sizeof(model->tokens[0])); token->length = inner; model->tokens[index + 1U].type = WP_DOCUMENT_TOKEN_TEXT; model->tokens[index + 1U].record_type = WP_CODE_CHAR; model->tokens[index + 1U].bytes = insert_bytes; model->tokens[index + 1U].length = length; model->tokens[index + 1U].complete = true; model->tokens[index + 2U].type = WP_DOCUMENT_TOKEN_TEXT; model->tokens[index + 2U].record_type = WP_CODE_CHAR; model->tokens[index + 2U].bytes = right_bytes; model->tokens[index + 2U].length = right_len; model->tokens[index + 2U].complete = true; model->token_count += 2U; model->body_size += length; return true; } static bool wp_document_insert_fixed_token(WpDocumentModel *model, size_t index, uint8_t code, uint8_t payload) { uint8_t bytes[3]; if (model == 0 || index > model->token_count || code < 0xC0U || code >= 0xD0U) { return false; } bytes[0] = code; bytes[1] = payload; bytes[2] = code; if (!wp_document_model_reserve(model, model->token_count + 1U)) { return false; } if (index < model->token_count) { memmove(&model->tokens[index + 1U], &model->tokens[index], (model->token_count - index) * sizeof(model->tokens[0])); } memset(&model->tokens[index], 0, sizeof(model->tokens[0])); if (!wp_document_token_set_bytes(&model->tokens[index], WP_DOCUMENT_TOKEN_FIXED_RECORD, WP_CODE_FIXED_LENGTH, code, 0U, bytes, sizeof(bytes), true, true)) { if (index + 1U < model->token_capacity) { memmove(&model->tokens[index], &model->tokens[index + 1U], (model->token_count - index) * sizeof(model->tokens[0])); memset(&model->tokens[model->token_count], 0, sizeof(model->tokens[0])); } return false; } model->token_count++; model->body_size += sizeof(bytes); return true; } static bool wp_document_insert_extended_token(WpDocumentModel *model, size_t index, uint8_t set, uint8_t char_index) { uint8_t bytes[4]; if (model == 0 || index > model->token_count) { return false; } bytes[0] = 0xC0U; bytes[1] = set; bytes[2] = char_index; bytes[3] = 0xC0U; if (!wp_document_model_reserve(model, model->token_count + 1U)) { return false; } if (index < model->token_count) { memmove(&model->tokens[index + 1U], &model->tokens[index], (model->token_count - index) * sizeof(model->tokens[0])); } memset(&model->tokens[index], 0, sizeof(model->tokens[0])); if (!wp_document_token_set_bytes(&model->tokens[index], WP_DOCUMENT_TOKEN_FIXED_RECORD, WP_CODE_FIXED_LENGTH, 0xC0U, 0U, bytes, sizeof(bytes), true, true)) { if (index + 1U < model->token_capacity) { memmove(&model->tokens[index], &model->tokens[index + 1U], (model->token_count - index) * sizeof(model->tokens[0])); memset(&model->tokens[model->token_count], 0, sizeof(model->tokens[0])); } return false; } model->token_count++; model->body_size += sizeof(bytes); return true; } static bool wp_document_split_and_insert_single_byte(WpDocumentModel *model, size_t index, size_t inner, uint8_t code) { WpDocumentToken *token; uint8_t *right_bytes; size_t right_len; size_t move_count; if (model == 0 || index >= model->token_count) { return false; } token = &model->tokens[index]; if (token->type != WP_DOCUMENT_TOKEN_TEXT || inner == 0U || inner >= token->length) { return false; } right_len = token->length - inner; right_bytes = (uint8_t *)malloc(right_len); if (right_bytes == 0) { return false; } memcpy(right_bytes, token->bytes + inner, right_len); if (!wp_document_model_reserve(model, model->token_count + 2U)) { free(right_bytes); return false; } token = &model->tokens[index]; move_count = model->token_count - index - 1U; if (move_count != 0U) { memmove(&model->tokens[index + 3U], &model->tokens[index + 1U], move_count * sizeof(model->tokens[0])); } memset(&model->tokens[index + 1U], 0, sizeof(model->tokens[0])); memset(&model->tokens[index + 2U], 0, sizeof(model->tokens[0])); token->length = inner; if (!wp_document_token_set_bytes(&model->tokens[index + 1U], WP_DOCUMENT_TOKEN_SINGLE_BYTE, WP_CODE_SINGLE_BYTE, code, 0U, &code, 1U, true, false)) { free(right_bytes); return false; } model->tokens[index + 2U].type = WP_DOCUMENT_TOKEN_TEXT; model->tokens[index + 2U].record_type = WP_CODE_CHAR; model->tokens[index + 2U].bytes = right_bytes; model->tokens[index + 2U].length = right_len; model->tokens[index + 2U].complete = true; model->token_count += 2U; model->body_size++; return true; } static bool wp_document_split_and_insert_extended(WpDocumentModel *model, size_t index, size_t inner, uint8_t set, uint8_t char_index) { WpDocumentToken *token; uint8_t *right_bytes; uint8_t bytes[4]; size_t right_len; size_t move_count; if (model == 0 || index >= model->token_count) { return false; } token = &model->tokens[index]; if (token->type != WP_DOCUMENT_TOKEN_TEXT || inner == 0U || inner >= token->length) { return false; } bytes[0] = 0xC0U; bytes[1] = set; bytes[2] = char_index; bytes[3] = 0xC0U; right_len = token->length - inner; right_bytes = (uint8_t *)malloc(right_len); if (right_bytes == 0) { return false; } memcpy(right_bytes, token->bytes + inner, right_len); if (!wp_document_model_reserve(model, model->token_count + 2U)) { free(right_bytes); return false; } token = &model->tokens[index]; move_count = model->token_count - index - 1U; if (move_count != 0U) { memmove(&model->tokens[index + 3U], &model->tokens[index + 1U], move_count * sizeof(model->tokens[0])); } memset(&model->tokens[index + 1U], 0, sizeof(model->tokens[0])); memset(&model->tokens[index + 2U], 0, sizeof(model->tokens[0])); token->length = inner; if (!wp_document_token_set_bytes(&model->tokens[index + 1U], WP_DOCUMENT_TOKEN_FIXED_RECORD, WP_CODE_FIXED_LENGTH, 0xC0U, 0U, bytes, sizeof(bytes), true, true)) { free(right_bytes); return false; } model->tokens[index + 2U].type = WP_DOCUMENT_TOKEN_TEXT; model->tokens[index + 2U].record_type = WP_CODE_CHAR; model->tokens[index + 2U].bytes = right_bytes; model->tokens[index + 2U].length = right_len; model->tokens[index + 2U].complete = true; model->token_count += 2U; model->body_size += sizeof(bytes); return true; } static bool wp_document_split_and_insert_fixed(WpDocumentModel *model, size_t index, size_t inner, uint8_t code, uint8_t payload) { WpDocumentToken *token; uint8_t *right_bytes; uint8_t bytes[3]; size_t right_len; size_t move_count; if (model == 0 || index >= model->token_count) { return false; } token = &model->tokens[index]; if (token->type != WP_DOCUMENT_TOKEN_TEXT || inner == 0U || inner >= token->length) { return false; } bytes[0] = code; bytes[1] = payload; bytes[2] = code; right_len = token->length - inner; right_bytes = (uint8_t *)malloc(right_len); if (right_bytes == 0) { return false; } memcpy(right_bytes, token->bytes + inner, right_len); if (!wp_document_model_reserve(model, model->token_count + 2U)) { free(right_bytes); return false; } token = &model->tokens[index]; move_count = model->token_count - index - 1U; if (move_count != 0U) { memmove(&model->tokens[index + 3U], &model->tokens[index + 1U], move_count * sizeof(model->tokens[0])); } memset(&model->tokens[index + 1U], 0, sizeof(model->tokens[0])); memset(&model->tokens[index + 2U], 0, sizeof(model->tokens[0])); token->length = inner; if (!wp_document_token_set_bytes(&model->tokens[index + 1U], WP_DOCUMENT_TOKEN_FIXED_RECORD, WP_CODE_FIXED_LENGTH, code, 0U, bytes, sizeof(bytes), true, true)) { free(right_bytes); return false; } model->tokens[index + 2U].type = WP_DOCUMENT_TOKEN_TEXT; model->tokens[index + 2U].record_type = WP_CODE_CHAR; model->tokens[index + 2U].bytes = right_bytes; model->tokens[index + 2U].length = right_len; model->tokens[index + 2U].complete = true; model->token_count += 2U; model->body_size += sizeof(bytes); return true; } static bool wp_document_model_insert_single_byte_code(WpDocumentModel *model, size_t byte_offset, uint8_t code) { size_t index; size_t inner; WpDocumentToken *token; if (model == 0 || byte_offset > model->body_size || code < 0x80U || code >= 0xC0U) { return false; } if (!wp_document_model_locate(model, byte_offset, &index, &inner)) { return false; } if (index >= model->token_count) { return wp_document_insert_single_byte_token(model, model->token_count, code); } token = &model->tokens[index]; if (token->type == WP_DOCUMENT_TOKEN_TEXT && inner > 0U && inner < token->length) { return wp_document_split_and_insert_single_byte(model, index, inner, code); } if (inner == 0U) { return wp_document_insert_single_byte_token(model, index, code); } if (inner == token->length) { return wp_document_insert_single_byte_token(model, index + 1U, code); } return false; } static bool wp_document_model_insert_fixed_packet(WpDocumentModel *model, size_t byte_offset, uint8_t code, uint8_t payload) { size_t index; size_t inner; WpDocumentToken *token; if (model == 0 || byte_offset > model->body_size || code < 0xC0U || code >= 0xD0U) { return false; } if (!wp_document_model_locate(model, byte_offset, &index, &inner)) { return false; } if (index >= model->token_count) { return wp_document_insert_fixed_token(model, model->token_count, code, payload); } token = &model->tokens[index]; if (token->type == WP_DOCUMENT_TOKEN_TEXT && inner > 0U && inner < token->length) { return wp_document_split_and_insert_fixed(model, index, inner, code, payload); } if (inner == 0U) { return wp_document_insert_fixed_token(model, index, code, payload); } if (inner == token->length) { return wp_document_insert_fixed_token(model, index + 1U, code, payload); } return false; } static bool wp_document_model_insert_extended_char(WpDocumentModel *model, size_t byte_offset, uint8_t set, uint8_t char_index) { size_t index; size_t inner; WpDocumentToken *token; if (model == 0 || byte_offset > model->body_size || set == 0U) { return false; } if (!wp_document_model_locate(model, byte_offset, &index, &inner)) { return false; } if (index >= model->token_count) { return wp_document_insert_extended_token(model, model->token_count, set, char_index); } token = &model->tokens[index]; if (token->type == WP_DOCUMENT_TOKEN_TEXT && inner > 0U && inner < token->length) { return wp_document_split_and_insert_extended(model, index, inner, set, char_index); } if (inner == 0U) { return wp_document_insert_extended_token(model, index, set, char_index); } if (inner == token->length) { return wp_document_insert_extended_token(model, index + 1U, set, char_index); } return false; } static bool wp_document_model_range_is_text(const WpDocumentModel *model, size_t byte_offset, size_t byte_count) { size_t index; size_t inner; size_t remaining; size_t pos; size_t available; size_t take; const WpDocumentToken *token; if (model == 0 || byte_offset > model->body_size || byte_count > model->body_size - byte_offset) { return false; } remaining = byte_count; pos = byte_offset; while (remaining != 0U) { if (!wp_document_model_locate(model, pos, &index, &inner) || index >= model->token_count) { return false; } token = &model->tokens[index]; if (token->type != WP_DOCUMENT_TOKEN_TEXT || inner >= token->length) { return false; } available = token->length - inner; take = available < remaining ? available : remaining; remaining -= take; pos += take; } return true; } static bool wp_document_model_range_is_semantic_deletable(const WpDocumentModel *model, size_t byte_offset, size_t byte_count) { size_t i; size_t pos = 0U; size_t end; if (model == 0 || byte_offset > model->body_size || byte_count > model->body_size - byte_offset) { return false; } end = byte_offset + byte_count; for (i = 0U; i < model->token_count; ++i) { const WpDocumentToken *token = &model->tokens[i]; size_t token_end; size_t overlap_start; size_t overlap_end; if (token->length > SIZE_MAX - pos) { return false; } token_end = pos + token->length; if (token_end <= byte_offset) { pos = token_end; continue; } if (pos >= end) { break; } overlap_start = pos > byte_offset ? pos : byte_offset; overlap_end = token_end < end ? token_end : end; if (overlap_start < overlap_end) { if (!wp_document_token_can_delete_semantic(token)) { return false; } if (token->type != WP_DOCUMENT_TOKEN_TEXT && (overlap_start != pos || overlap_end != token_end)) { return false; } } pos = token_end; } return true; } static bool wp_document_model_delete_semantic_body_range(WpDocumentModel *model, size_t byte_offset, size_t byte_count) { size_t remaining; if (!wp_document_model_range_is_semantic_deletable(model, byte_offset, byte_count)) { return false; } remaining = byte_count; while (remaining != 0U) { size_t index; size_t inner; WpDocumentToken *token; size_t available; size_t take; if (!wp_document_model_locate(model, byte_offset, &index, &inner) || index >= model->token_count) { return false; } token = &model->tokens[index]; if (token->type == WP_DOCUMENT_TOKEN_TEXT) { available = token->length - inner; take = available < remaining ? available : remaining; if (take < token->length - inner) { memmove(token->bytes + inner, token->bytes + inner + take, token->length - inner - take); } token->length -= take; model->body_size -= take; remaining -= take; if (token->length == 0U) { wp_document_model_remove_token(model, index); } } else { if (inner != 0U || token->length > remaining) { return false; } remaining -= token->length; model->body_size -= token->length; wp_document_model_remove_token(model, index); } } return true; } void wp_document_model_init(WpDocumentModel *model) { if (model != 0) { memset(model, 0, sizeof(*model)); } } void wp_document_model_free(WpDocumentModel *model) { size_t i; if (model == 0) { return; } for (i = 0U; i < model->token_count; ++i) { wp_document_token_clear(&model->tokens[i]); } free(model->tokens); memset(model, 0, sizeof(*model)); } static bool wp_document_model_clone(const WpDocumentModel *source, WpDocumentModel *out_clone) { size_t i; if (source == 0 || out_clone == 0) { return false; } wp_document_model_init(out_clone); if (!wp_document_model_reserve(out_clone, source->token_count)) { return false; } for (i = 0U; i < source->token_count; ++i) { const WpDocumentToken *token = &source->tokens[i]; if (!wp_document_token_set_bytes(&out_clone->tokens[i], token->type, token->record_type, token->code, token->sub_code, token->bytes, token->length, token->complete, token->trailer_matches)) { wp_document_model_free(out_clone); return false; } } out_clone->token_count = source->token_count; out_clone->body_size = source->body_size; return true; } static void wp_document_model_commit_clone(WpDocumentModel *model, WpDocumentModel *clone) { WpDocumentModel committed; committed = *clone; memset(clone, 0, sizeof(*clone)); wp_document_model_free(model); *model = committed; } bool wp_document_model_load(WpDocumentModel *model, WpLoadedFile *file) { WpDocumentModel temp; WpLayoutGlobals wl; size_t offset = 0U; if (model == 0 || file == 0) { return false; } wp_document_model_init(&temp); memset(&wl, 0, sizeof(wl)); if (!wp_file_bind_primary_stream(file, &wl, 4096U)) { return false; } while (wl.record_used_bytes > 0 && offset < file->logical_size) { WpRecord rec; WpDocumentTokenType type; wp_parser_consume_record(&wl, &rec); if (rec.length == 0U) { wp_record_free(&rec); break; } if ((size_t)rec.length > file->logical_size - offset) { wp_record_free(&rec); wp_document_model_free(&temp); return false; } if (!rec.is_complete) { type = WP_DOCUMENT_TOKEN_RAW_RECORD; } else if (rec.type == WP_CODE_CHAR) { type = WP_DOCUMENT_TOKEN_TEXT; } else if (rec.type == WP_CODE_SINGLE_BYTE) { type = WP_DOCUMENT_TOKEN_SINGLE_BYTE; } else if (rec.type == WP_CODE_FIXED_LENGTH) { type = WP_DOCUMENT_TOKEN_FIXED_RECORD; } else { type = WP_DOCUMENT_TOKEN_VARIABLE_RECORD; } if (!wp_document_model_append_token(&temp, type, rec.type, rec.code, rec.sub_code, file->logical_bytes + offset, (size_t)rec.length, rec.is_complete, rec.trailer_matches)) { wp_record_free(&rec); wp_document_model_free(&temp); return false; } offset += (size_t)rec.length; wp_record_free(&rec); } if (offset < file->logical_size) { if (!wp_document_model_append_token(&temp, WP_DOCUMENT_TOKEN_RAW_RECORD, WP_CODE_VARIABLE_LENGTH, 0U, 0U, file->logical_bytes + offset, file->logical_size - offset, false, false)) { wp_document_model_free(&temp); return false; } } wp_document_model_free(model); *model = temp; return true; } bool wp_document_model_to_body(const WpDocumentModel *model, uint8_t **out_body, size_t *out_size) { uint8_t *body = 0; size_t offset = 0U; size_t i; if (model == 0 || out_body == 0 || out_size == 0) { return false; } if (model->body_size != 0U) { body = (uint8_t *)malloc(model->body_size); if (body == 0) { return false; } } for (i = 0U; i < model->token_count; ++i) { if (offset > model->body_size || model->tokens[i].length > model->body_size - offset) { free(body); return false; } if (model->tokens[i].length != 0U) { memcpy(body + offset, model->tokens[i].bytes, model->tokens[i].length); } offset += model->tokens[i].length; } if (offset != model->body_size) { free(body); return false; } *out_body = body; *out_size = model->body_size; return true; } bool wp_document_model_apply_to_file(const WpDocumentModel *model, WpLoadedFile *file) { uint8_t *body = 0; size_t body_size = 0U; bool ok; if (model == 0 || file == 0) { return false; } if (!wp_document_model_to_body(model, &body, &body_size)) { return false; } ok = wp_file_replace_body(file, body, body_size); free(body); return ok; } static bool wp_document_model_insert_ascii_inplace(WpDocumentModel *model, size_t byte_offset, const char *text, size_t text_len) { size_t index; size_t inner; WpDocumentToken *token; if (model == 0 || byte_offset > model->body_size || !wp_document_ascii_is_valid(text, text_len)) { return false; } if (text_len == 0U) { return true; } if (text_len > SIZE_MAX - model->body_size) { return false; } if (!wp_document_model_locate(model, byte_offset, &index, &inner)) { return false; } if (index >= model->token_count) { return wp_document_insert_text_token(model, model->token_count, (const uint8_t *)text, text_len); } token = &model->tokens[index]; if (token->type == WP_DOCUMENT_TOKEN_TEXT && inner > 0U && inner < token->length) { return wp_document_split_and_insert_text(model, index, inner, (const uint8_t *)text, text_len); } if (inner == 0U) { return wp_document_insert_text_token(model, index, (const uint8_t *)text, text_len); } if (inner == token->length) { return wp_document_insert_text_token(model, index + 1U, (const uint8_t *)text, text_len); } return false; } static bool wp_document_model_delete_text_inplace(WpDocumentModel *model, size_t byte_offset, size_t byte_count) { size_t remaining; size_t index; size_t inner; size_t available; size_t take; WpDocumentToken *token; if (model == 0 || byte_offset > model->body_size || byte_count > model->body_size - byte_offset) { return false; } if (byte_count == 0U) { return true; } if (!wp_document_model_range_is_text(model, byte_offset, byte_count)) { return false; } remaining = byte_count; while (remaining != 0U) { if (!wp_document_model_locate(model, byte_offset, &index, &inner) || index >= model->token_count) { return false; } token = &model->tokens[index]; available = token->length - inner; take = available < remaining ? available : remaining; if (take < token->length - inner) { memmove(token->bytes + inner, token->bytes + inner + take, token->length - inner - take); } token->length -= take; model->body_size -= take; remaining -= take; if (token->length == 0U) { wp_document_model_remove_token(model, index); } } return true; } bool wp_document_model_text_length(const WpDocumentModel *model, size_t *out_text_units) { size_t i; size_t total = 0U; if (model == 0 || out_text_units == 0) { return false; } for (i = 0U; i < model->token_count; ++i) { size_t units = wp_document_token_text_units(&model->tokens[i]); if (units > SIZE_MAX - total) { return false; } total += units; } *out_text_units = total; return true; } bool wp_document_model_body_offset_for_text_offset(const WpDocumentModel *model, size_t text_offset, size_t *out_body_offset) { size_t i; size_t body_pos = 0U; size_t text_pos = 0U; if (model == 0 || out_body_offset == 0) { return false; } for (i = 0U; i < model->token_count; ++i) { const WpDocumentToken *token = &model->tokens[i]; size_t units = wp_document_token_text_units(token); if (units != 0U && text_offset <= text_pos + units) { if (token->type == WP_DOCUMENT_TOKEN_TEXT) { *out_body_offset = body_pos + (text_offset - text_pos); } else if (text_offset == text_pos) { *out_body_offset = body_pos; } else { *out_body_offset = body_pos + token->length; } return true; } if (token->length > SIZE_MAX - body_pos || units > SIZE_MAX - text_pos) { return false; } body_pos += token->length; text_pos += units; } if (text_offset == text_pos) { *out_body_offset = model->body_size; return true; } return false; } static bool wp_document_model_insert_host_text_inplace(WpDocumentModel *model, size_t byte_offset, const char *text, size_t text_len) { size_t input_offset; size_t offset; if (model == 0 || byte_offset > model->body_size || !wp_document_host_text_is_valid(text, text_len)) { return false; } offset = byte_offset; input_offset = 0U; while (input_offset < text_len) { uint8_t set = 0U; uint8_t index = 0U; uint32_t cp; if (!wp_document_utf8_next(text, text_len, &input_offset, &cp)) { return false; } if (cp == '\r') { if (!wp_document_model_insert_single_byte_code(model, offset, 0x80U)) { return false; } offset++; if (input_offset < text_len && text[input_offset] == '\n') { input_offset++; } } else if (cp == '\n') { if (!wp_document_model_insert_single_byte_code(model, offset, 0x80U)) { return false; } offset++; } else if (cp == '\t') { if (!wp_document_model_insert_single_byte_code(model, offset, 0x84U)) { return false; } offset++; } else if (cp == '\f') { if (!wp_document_model_insert_single_byte_code(model, offset, 0x82U)) { return false; } offset++; } else if (cp >= 0x20U && cp < 0x80U) { char one = (char)cp; if (!wp_document_model_insert_ascii_inplace(model, offset, &one, 1U)) { return false; } offset++; } else { if (!wp_unicode_to_char(cp, &set, &index) || set == 0U) { return false; } if (!wp_document_model_insert_extended_char(model, offset, set, index)) { return false; } offset += 4U; } } return true; } static bool wp_document_model_insert_host_text_at_text_offset_inplace(WpDocumentModel *model, size_t text_offset, const char *text, size_t text_len) { size_t body_offset; if (!wp_document_model_body_offset_for_text_offset(model, text_offset, &body_offset)) { return false; } return wp_document_model_insert_host_text_inplace(model, body_offset, text, text_len); } static bool wp_document_model_delete_text_units_inplace(WpDocumentModel *model, size_t text_offset, size_t text_units) { size_t start_body; size_t end_body; if (model == 0) { return false; } if (text_units == 0U) { return true; } if (text_units > SIZE_MAX - text_offset) { return false; } if (!wp_document_model_body_offset_for_text_offset(model, text_offset, &start_body) || !wp_document_model_body_offset_for_text_offset(model, text_offset + text_units, &end_body) || end_body < start_body) { return false; } return wp_document_model_delete_semantic_body_range(model, start_body, end_body - start_body); } static bool wp_document_model_insert_attribute_span_at_text_offset_inplace(WpDocumentModel *model, size_t text_offset, uint8_t attribute_code, const char *text, size_t text_len) { size_t body_offset; size_t encoded_size; if (model == 0 || !wp_document_host_text_stream_size(text, text_len, &encoded_size)) { return false; } if (!wp_document_model_body_offset_for_text_offset(model, text_offset, &body_offset)) { return false; } if (!wp_document_model_insert_fixed_packet(model, body_offset, 0xC3U, attribute_code)) { return false; } body_offset += 3U; if (!wp_document_model_insert_host_text_inplace(model, body_offset, text, text_len)) { return false; } body_offset += encoded_size; return wp_document_model_insert_fixed_packet(model, body_offset, 0xC4U, attribute_code); } bool wp_document_model_insert_ascii(WpDocumentModel *model, size_t byte_offset, const char *text, size_t text_len) { WpDocumentModel clone; if (!wp_document_model_clone(model, &clone)) { return false; } if (!wp_document_model_insert_ascii_inplace(&clone, byte_offset, text, text_len)) { wp_document_model_free(&clone); return false; } wp_document_model_commit_clone(model, &clone); return true; } bool wp_document_model_delete_text(WpDocumentModel *model, size_t byte_offset, size_t byte_count) { WpDocumentModel clone; if (!wp_document_model_clone(model, &clone)) { return false; } if (!wp_document_model_delete_text_inplace(&clone, byte_offset, byte_count)) { wp_document_model_free(&clone); return false; } wp_document_model_commit_clone(model, &clone); return true; } bool wp_document_model_insert_host_text(WpDocumentModel *model, size_t byte_offset, const char *text, size_t text_len) { WpDocumentModel clone; if (!wp_document_model_clone(model, &clone)) { return false; } if (!wp_document_model_insert_host_text_inplace(&clone, byte_offset, text, text_len)) { wp_document_model_free(&clone); return false; } wp_document_model_commit_clone(model, &clone); return true; } bool wp_document_model_insert_host_text_at_text_offset(WpDocumentModel *model, size_t text_offset, const char *text, size_t text_len) { WpDocumentModel clone; if (!wp_document_model_clone(model, &clone)) { return false; } if (!wp_document_model_insert_host_text_at_text_offset_inplace(&clone, text_offset, text, text_len)) { wp_document_model_free(&clone); return false; } wp_document_model_commit_clone(model, &clone); return true; } bool wp_document_model_delete_text_units(WpDocumentModel *model, size_t text_offset, size_t text_units) { WpDocumentModel clone; if (!wp_document_model_clone(model, &clone)) { return false; } if (!wp_document_model_delete_text_units_inplace(&clone, text_offset, text_units)) { wp_document_model_free(&clone); return false; } wp_document_model_commit_clone(model, &clone); return true; } bool wp_document_model_insert_attribute_span_at_text_offset(WpDocumentModel *model, size_t text_offset, uint8_t attribute_code, const char *text, size_t text_len) { WpDocumentModel clone; if (!wp_document_model_clone(model, &clone)) { return false; } if (!wp_document_model_insert_attribute_span_at_text_offset_inplace(&clone, text_offset, attribute_code, text, text_len)) { wp_document_model_free(&clone); return false; } wp_document_model_commit_clone(model, &clone); return true; }
Commit message
This repository is read-only for this account.
Repository snapshot
Current branch
main
Visibility
public
Your access
Read
Remote
None
File activity
View file history