Word Unperfect
public
Read
Owner: themaster
Branch: main
Commits: 0
Git CLI clone URL
git clone https://www.xt-emporium.com/git/word-unperfect.git
Fullscreen desktop URL
Code
Commits
History
Branches
Bug Reports
Discussions
Compare
Settings
word-unperfect
/
rev
/
wp_nested_stream.c
File editor
#include "wp_nested_stream.h" #include "wp_control_codes.h" #include "wp_fixed_codes.h" #include "wp_layout_shared.h" #include "wp_record_parser.h" #include "wp_variable_codes.h" #include <stdlib.h> #include <string.h> static void wp_nested_bind_lifo_bytes(WpLayoutGlobals *wl, const uint8_t *logical, uint16_t logical_len, uint8_t *storage) { uint16_t i; memset(wl, 0, sizeof(*wl)); if (logical == NULL || logical_len == 0U || storage == NULL) { wl->primary_record.word = 0; wl->record_used_bytes = 0; wl->record_buffer_space = 0; return; } for (i = 0U; i < logical_len; ++i) { storage[i] = logical[(uint16_t)(logical_len - 1U - i)]; } wl->primary_record.word = (intptr_t)(storage + logical_len); wl->record_used_bytes = (int)logical_len; wl->record_buffer_space = logical_len; } void wp_nested_stream_default_options(WpNestedStreamOptions *options) { if (options == NULL) { return; } options->max_depth = WP_NESTED_STREAM_DEFAULT_MAX_DEPTH; options->follow_nested_hints = true; } void wp_nested_stream_stats_clear(WpNestedStreamStats *stats) { if (stats != NULL) { memset(stats, 0, sizeof(*stats)); } } void wp_nested_stream_stats_merge(WpNestedStreamStats *dst, const WpNestedStreamStats *src) { if (dst == NULL || src == NULL) { return; } dst->streams_seen += src->streams_seen; dst->records_seen += src->records_seen; dst->bytes_consumed += src->bytes_consumed; dst->char_records += src->char_records; dst->single_byte_codes += src->single_byte_codes; dst->fixed_length_codes += src->fixed_length_codes; dst->variable_length_codes += src->variable_length_codes; dst->incomplete_records += src->incomplete_records; dst->mismatched_trailers += src->mismatched_trailers; dst->ascii_control_records += src->ascii_control_records; dst->printable_text_records += src->printable_text_records; dst->single_byte_format_records += src->single_byte_format_records; dst->packet_records += src->packet_records; dst->line_break_records += src->line_break_records; dst->page_break_records += src->page_break_records; dst->hard_returns += src->hard_returns; dst->soft_returns += src->soft_returns; dst->hard_pages += src->hard_pages; dst->soft_pages += src->soft_pages; dst->tabs += src->tabs; dst->indents += src->indents; dst->dormant_returns += src->dormant_returns; dst->dormant_pages += src->dormant_pages; dst->normalized_spaces += src->normalized_spaces; dst->normalized_hyphens += src->normalized_hyphens; dst->whitespace_gate_records += src->whitespace_gate_records; dst->render_dirty_gate_records += src->render_dirty_gate_records; dst->dirty_exempt_records += src->dirty_exempt_records; dst->dirty_neutral_records += src->dirty_neutral_records; dst->extension_scan_stop_records += src->extension_scan_stop_records; dst->tsm_highlight_records += src->tsm_highlight_records; dst->hyphenation_suppression_records += src->hyphenation_suppression_records; dst->hidden_function_gate_records += src->hidden_function_gate_records; dst->unknown_single_byte_records += src->unknown_single_byte_records; dst->display_columns += src->display_columns; dst->fixed_known_packets += src->fixed_known_packets; dst->fixed_unknown_packets += src->fixed_unknown_packets; dst->fixed_mirrored_packets += src->fixed_mirrored_packets; dst->fixed_zero_payload_packets += src->fixed_zero_payload_packets; dst->extended_character_packets += src->extended_character_packets; dst->attribute_begin_packets += src->attribute_begin_packets; dst->attribute_end_packets += src->attribute_end_packets; dst->d4_layout_state_records += src->d4_layout_state_records; dst->d4_layout_state_payload_bytes += src->d4_layout_state_payload_bytes; dst->d4_pending_span_records += src->d4_pending_span_records; dst->d4_line_window_records += src->d4_line_window_records; dst->d4_line_window_extension_bytes += src->d4_line_window_extension_bytes; dst->d4_line_metric_records += src->d4_line_metric_records; dst->d4_line_metric_extension_bytes += src->d4_line_metric_extension_bytes; dst->d4_extension_fragment_records += src->d4_extension_fragment_records; dst->d4_extension_fragment_bytes += src->d4_extension_fragment_bytes; dst->d4_line_build_checkpoint_records += src->d4_line_build_checkpoint_records; dst->d4_word_pair_checkpoint_records += src->d4_word_pair_checkpoint_records; dst->d4_position_marker_records += src->d4_position_marker_records; dst->d4_control_word_records += src->d4_control_word_records; dst->d4_compact_metric_records += src->d4_compact_metric_records; dst->d4_layout_anchor_records += src->d4_layout_anchor_records; dst->d4_unknown_records += src->d4_unknown_records; dst->d4_unknown_payload_bytes += src->d4_unknown_payload_bytes; dst->d4_trailing_unparsed_bytes += src->d4_trailing_unparsed_bytes; dst->d4_extension_block_records += src->d4_extension_block_records; dst->d4_extension_block_bytes += src->d4_extension_block_bytes; dst->variable_structural_packets += src->variable_structural_packets; dst->variable_generic_packets += src->variable_generic_packets; dst->d1_definition_packets += src->d1_definition_packets; dst->d2_outline_packets += src->d2_outline_packets; dst->d3_generated_text_packets += src->d3_generated_text_packets; dst->repeat_group_packets += src->repeat_group_packets; dst->delayed_text_packets += src->delayed_text_packets; dst->box_object_packets += src->box_object_packets; dst->table_layout_packets += src->table_layout_packets; dst->system_command_packets += src->system_command_packets; dst->variable_nested_stream_hints += src->variable_nested_stream_hints; dst->variable_repeat_dispatch_packets += src->variable_repeat_dispatch_packets; dst->variable_repeat_total += src->variable_repeat_total; dst->variable_extension_scan_packets += src->variable_extension_scan_packets; dst->variable_refcount_increment_packets += src->variable_refcount_increment_packets; dst->variable_refcount_decrement_packets += src->variable_refcount_decrement_packets; dst->scanner_bypass_packets += src->scanner_bypass_packets; if (src->max_depth_seen > dst->max_depth_seen) { dst->max_depth_seen = src->max_depth_seen; } dst->recursion_limit_hit = dst->recursion_limit_hit || src->recursion_limit_hit; dst->allocation_failed = dst->allocation_failed || src->allocation_failed; dst->stopped_on_parse_gap = dst->stopped_on_parse_gap || src->stopped_on_parse_gap; } static void wp_nested_count_control_info(const WpRecord *rec, WpNestedStreamStats *stats) { WpControlSummaryStats control; if (rec == NULL || stats == NULL) { return; } wp_control_summary_stats_clear(&control); if (!wp_control_summary_add_record(&control, rec)) { return; } stats->ascii_control_records += control.ascii_control_records; stats->printable_text_records += control.printable_text_records; stats->single_byte_format_records += control.single_byte_format_records; stats->packet_records += control.packet_records; stats->line_break_records += control.line_break_records; stats->page_break_records += control.page_break_records; stats->hard_returns += control.hard_returns; stats->soft_returns += control.soft_returns; stats->hard_pages += control.hard_pages; stats->soft_pages += control.soft_pages; stats->tabs += control.tabs; stats->indents += control.indents; stats->dormant_returns += control.dormant_returns; stats->dormant_pages += control.dormant_pages; stats->normalized_spaces += control.normalized_spaces; stats->normalized_hyphens += control.normalized_hyphens; stats->whitespace_gate_records += control.whitespace_gate_records; stats->render_dirty_gate_records += control.render_dirty_gate_records; stats->dirty_exempt_records += control.dirty_exempt_records; stats->dirty_neutral_records += control.dirty_neutral_records; stats->extension_scan_stop_records += control.extension_scan_stop_records; stats->tsm_highlight_records += control.tsm_highlight_records; stats->hyphenation_suppression_records += control.hyphenation_suppression_records; stats->hidden_function_gate_records += control.hidden_function_gate_records; stats->unknown_single_byte_records += control.unknown_single_byte_records; stats->display_columns += control.display_columns; } static void wp_nested_count_fixed_info(const WpRecord *rec, WpNestedStreamStats *stats) { const WpFixedCodeInfo *info; if (rec == NULL || stats == NULL || rec->type != WP_CODE_FIXED_LENGTH) { return; } info = wp_fixed_code_info(rec->code); if (info == NULL) { stats->fixed_unknown_packets++; return; } stats->fixed_known_packets++; if (info->mirrored_trailer) { stats->fixed_mirrored_packets++; } if (info->payload_length == 0U) { stats->fixed_zero_payload_packets++; } if (rec->code == 0xC0U) { stats->extended_character_packets++; } else if (rec->code == 0xC3U) { stats->attribute_begin_packets++; } else if (rec->code == 0xC4U) { stats->attribute_end_packets++; } } static void wp_nested_count_status(const WpRecord *rec, WpNestedStreamStats *stats) { if (rec == NULL || stats == NULL) { return; } if (!rec->is_complete && !rec->trailer_present) { stats->incomplete_records++; } if (rec->trailer_present && !rec->trailer_matches) { stats->mismatched_trailers++; } } static void wp_nested_count_d4_residual_info(const WpRecord *rec, const WpVariableCommandInfo *info, WpNestedStreamStats *stats) { bool decoded; if (rec == NULL || info == NULL || stats == NULL || rec->code != 0xD4U) { return; } decoded = false; if (info->has_layout_state) { decoded = true; stats->d4_trailing_unparsed_bytes += info->layout_state.trailing_unparsed_bytes; if (info->layout_state.has_extension_blocks) { stats->d4_extension_block_records++; stats->d4_extension_block_bytes += info->layout_state.extension_block_bytes; } } if (info->has_pending_span) { decoded = true; } if (info->has_line_window) { decoded = true; stats->d4_trailing_unparsed_bytes += info->line_window.trailing_unparsed_bytes; } if (info->has_line_metric) { decoded = true; if (info->line_metric.has_extension_payload) { stats->d4_trailing_unparsed_bytes += info->line_metric.extension.trailing_unparsed_bytes; if (info->line_metric.extension.is_block_list) { stats->d4_extension_block_records++; stats->d4_extension_block_bytes += info->line_metric.extension.block_bytes; } } } if (info->has_extension_fragment) { decoded = true; stats->d4_trailing_unparsed_bytes += info->extension_fragment.trailing_unparsed_bytes; if (info->extension_fragment.is_block_list) { stats->d4_extension_block_records++; stats->d4_extension_block_bytes += info->extension_fragment.block_bytes; } } if (info->has_line_build_checkpoint) { decoded = true; } if (info->has_word_pair_checkpoint) { decoded = true; } if (info->has_position_marker) { decoded = true; } if (info->has_control_word) { decoded = true; } if (info->has_compact_metric) { decoded = true; } if (info->has_layout_anchor) { decoded = true; } if (!decoded) { stats->d4_unknown_records++; stats->d4_unknown_payload_bytes += rec->data_length; } } static bool wp_nested_variable_has_decoded_semantics(const WpVariableCommandInfo *info) { if (info == NULL) { return false; } return info->has_initial_format_table || info->has_layout_state || info->has_pending_span || info->has_line_window || info->has_line_metric || info->has_extension_fragment || info->has_line_build_checkpoint || info->has_word_pair_checkpoint || info->has_position_marker || info->has_control_word || info->has_compact_metric || info->has_layout_anchor || info->has_definition_payload || info->has_outline_payload || info->has_generated_text_payload || info->has_repeat_group_payload || info->has_delayed_text_payload || info->has_box_object_payload || info->has_table_layout_payload || info->has_system_command_payload || info->scanner_bypass || info->has_nested_stream_hint || (info->has_post_compare_plan && info->post_compare.flags != 0U); } static void wp_nested_count_variable_plan(const WpRecord *rec, const WpVariableCommandInfo *info, WpNestedStreamStats *stats) { if (info == NULL || stats == NULL) { return; } if (info->has_layout_state) { stats->d4_layout_state_records++; stats->d4_layout_state_payload_bytes += info->layout_state.decoded_payload_bytes; } if (info->has_pending_span) { stats->d4_pending_span_records++; } if (info->has_line_window) { stats->d4_line_window_records++; if (info->line_window.has_extension_payload) { stats->d4_line_window_extension_bytes += info->line_window.extension_payload_bytes; } } if (info->has_line_metric) { stats->d4_line_metric_records++; if (info->line_metric.has_extension_payload) { stats->d4_line_metric_extension_bytes += info->line_metric.extension.extension_payload_bytes; } } if (info->has_extension_fragment) { stats->d4_extension_fragment_records++; stats->d4_extension_fragment_bytes += info->extension_fragment.extension_payload_bytes; } if (info->has_line_build_checkpoint) { stats->d4_line_build_checkpoint_records++; } if (info->has_word_pair_checkpoint) { stats->d4_word_pair_checkpoint_records++; } if (info->has_position_marker) { stats->d4_position_marker_records++; } if (info->has_control_word) { stats->d4_control_word_records++; } if (info->has_compact_metric) { stats->d4_compact_metric_records++; } if (info->has_layout_anchor) { stats->d4_layout_anchor_records++; } wp_nested_count_d4_residual_info(rec, info, stats); if (info->has_definition_payload) { stats->d1_definition_packets++; } if (info->has_outline_payload) { stats->d2_outline_packets++; } if (info->has_generated_text_payload) { stats->d3_generated_text_packets++; } if (info->has_repeat_group_payload) { stats->repeat_group_packets++; } if (info->has_delayed_text_payload) { stats->delayed_text_packets++; } if (info->has_box_object_payload) { stats->box_object_packets++; } if (info->has_table_layout_payload) { stats->table_layout_packets++; } if (info->has_system_command_payload) { stats->system_command_packets++; } if (wp_nested_variable_has_decoded_semantics(info)) { stats->variable_structural_packets++; } else { stats->variable_generic_packets++; } if (info->scanner_bypass) { stats->scanner_bypass_packets++; } if (info->has_nested_stream_hint) { stats->variable_nested_stream_hints++; } if (info->has_post_compare_plan) { if ((info->post_compare.flags & WP_VARIABLE_POST_REPEAT_DISPATCH) != 0U) { stats->variable_repeat_dispatch_packets++; if (info->post_compare.has_repeat_count) { stats->variable_repeat_total += info->post_compare.repeat_count; } } if (info->post_compare.extension_scan) { stats->variable_extension_scan_packets++; } if (info->post_compare.refcount_increment) { stats->variable_refcount_increment_packets++; } if (info->post_compare.refcount_decrement) { stats->variable_refcount_decrement_packets++; } } } static bool wp_nested_analyze_payload_at_depth(const uint8_t *payload, uint16_t payload_length, uint16_t nested_offset, const WpNestedStreamOptions *options, WpNestedStreamStats *stats, unsigned depth) { WpNestedStreamOptions local_options; WpLayoutGlobals cursor; uint16_t stream_len; uint8_t *storage; bool ok = true; if (options == NULL) { wp_nested_stream_default_options(&local_options); options = &local_options; } if (stats == NULL || payload == NULL || nested_offset >= payload_length) { return false; } if (options->max_depth == 0U) { local_options = *options; local_options.max_depth = WP_NESTED_STREAM_DEFAULT_MAX_DEPTH; options = &local_options; } if (depth > options->max_depth) { stats->recursion_limit_hit = true; return true; } stream_len = (uint16_t)(payload_length - nested_offset); if (stream_len == 0U) { return false; } storage = (uint8_t *)malloc(stream_len); if (storage == NULL) { stats->allocation_failed = true; return false; } stats->streams_seen++; if (depth > stats->max_depth_seen) { stats->max_depth_seen = depth; } wp_nested_bind_lifo_bytes(&cursor, payload + nested_offset, stream_len, storage); while (cursor.record_used_bytes > 0) { WpRecord rec; int before = cursor.record_used_bytes; wp_parser_consume_record(&cursor, &rec); if (rec.length == 0U) { stats->stopped_on_parse_gap = true; wp_record_free(&rec); ok = false; break; } if (cursor.record_used_bytes >= before) { stats->stopped_on_parse_gap = true; wp_record_free(&rec); ok = false; break; } stats->records_seen++; stats->bytes_consumed += rec.length; wp_nested_count_control_info(&rec, stats); wp_nested_count_status(&rec, stats); switch (rec.type) { case WP_CODE_CHAR: stats->char_records++; break; case WP_CODE_SINGLE_BYTE: stats->single_byte_codes++; break; case WP_CODE_FIXED_LENGTH: stats->fixed_length_codes++; wp_nested_count_fixed_info(&rec, stats); break; case WP_CODE_VARIABLE_LENGTH: stats->variable_length_codes++; { WpVariableCommandInfo info; if (wp_variable_classify_record(&rec, &info)) { wp_nested_count_variable_plan(&rec, &info, stats); if (options->follow_nested_hints && info.has_nested_stream_hint) { if (depth >= options->max_depth) { stats->recursion_limit_hit = true; } else { if (!wp_nested_analyze_payload_at_depth(rec.data, rec.data_length, info.nested_stream_offset, options, stats, depth + 1U)) { ok = false; } } } } } break; } wp_record_free(&rec); } free(storage); return ok && !stats->allocation_failed; } bool wp_nested_stream_analyze_payload(const uint8_t *payload, uint16_t payload_length, uint16_t nested_offset, const WpNestedStreamOptions *options, WpNestedStreamStats *stats) { if (stats == NULL) { return false; } wp_nested_stream_stats_clear(stats); return wp_nested_analyze_payload_at_depth(payload, payload_length, nested_offset, options, stats, 1U); } bool wp_nested_stream_analyze_record(const WpRecord *record, const WpNestedStreamOptions *options, WpNestedStreamStats *stats) { WpVariableCommandInfo info; if (stats == NULL) { return false; } wp_nested_stream_stats_clear(stats); if (record == NULL || record->type != WP_CODE_VARIABLE_LENGTH || !wp_variable_classify_record(record, &info) || !info.has_nested_stream_hint) { return false; } return wp_nested_analyze_payload_at_depth(record->data, record->data_length, info.nested_stream_offset, options, stats, 1U); }
Commit message
This repository is read-only for this account.
Repository snapshot
Current branch
main
Visibility
public
Your access
Read
Remote
None
File activity
View file history