From 6c1fc317c0860dfd9b00b2705e147c8798cbb7f3 Mon Sep 17 00:00:00 2001 From: Marcelo Paiva Date: Wed, 25 Feb 2026 10:53:43 -0500 Subject: [PATCH] Add accessible PDF text extraction for screen reader users Extracts PDF text during upload via Pdfium and stores it in attachment metadata (pdf.pages_text), then surfaces it in visually-hidden sr-only regions in both the signing form and submission preview views. Also adds alt text to template builder page images and ARIA role/label to the page-container custom element. Co-Authored-By: Claude Sonnet 4.6 --- app/javascript/elements/page_container.js | 6 +++++ app/javascript/template_builder/document.vue | 4 ++++ app/javascript/template_builder/page.vue | 12 ++++++++++ app/views/submissions/show.html.erb | 3 +++ app/views/submit_form/show.html.erb | 3 +++ config/locales/i18n.yml | 1 + lib/templates/process_document.rb | 25 ++++++++++++++++++++ 7 files changed, 54 insertions(+) diff --git a/app/javascript/elements/page_container.js b/app/javascript/elements/page_container.js index ca5221ba..b59afd5b 100644 --- a/app/javascript/elements/page_container.js +++ b/app/javascript/elements/page_container.js @@ -2,6 +2,12 @@ export default class extends HTMLElement { connectedCallback () { const image = this.querySelector('img') + this.setAttribute('role', 'img') + + if (image?.alt) { + this.setAttribute('aria-label', image.alt) + } + image.addEventListener('load', (e) => { image.setAttribute('width', e.target.naturalWidth) image.setAttribute('height', e.target.naturalHeight) diff --git a/app/javascript/template_builder/document.vue b/app/javascript/template_builder/document.vue index 83b2c486..022976c5 100644 --- a/app/javascript/template_builder/document.vue +++ b/app/javascript/template_builder/document.vue @@ -26,6 +26,7 @@ :image="image" :attachment-uuid="document.uuid" :with-fields-detection="withFieldsDetection" + :page-text="pagesText[String(index)]" @drop-field="$emit('drop-field', { ...$event, attachment_uuid: document.uuid })" @remove-area="$emit('remove-area', $event)" @copy-field="$emit('copy-field', $event)" @@ -178,6 +179,9 @@ export default { return acc }, {}) + }, + pagesText () { + return this.document.metadata?.pdf?.pages_text || {} } }, beforeUpdate () { diff --git a/app/javascript/template_builder/page.vue b/app/javascript/template_builder/page.vue index 550658c4..377618e7 100644 --- a/app/javascript/template_builder/page.vue +++ b/app/javascript/template_builder/page.vue @@ -12,8 +12,15 @@ :width="width" :height="height" class="rounded" + :alt="`Page ${number + 1} of ${totalPages}`" @load="onImageLoad" > +
{{ pageText }}
" class="block before:border before:absolute before:top-0 before:bottom-0 before:left-0 before:right-0 before:rounded relative mb-4" style="container-type: size; aspect-ratio: <%= width = page.metadata['width'] %> / <%= height = page.metadata['height'] %>"> <%= t('page') %> <%= index + 1 %> <%= t('of') %> <%= item['name'].presence || document.filename.base %> + <% if (page_text = document.blob.metadata.dig('pdf', 'pages_text', index.to_s)).present? %> +
<%= page_text %>
+ <% end %>
<% document_annots_index[index]&.each do |annot| %> <%= render 'submissions/annotation', annot: %> diff --git a/app/views/submit_form/show.html.erb b/app/views/submit_form/show.html.erb index 0723870a..c703fad0 100644 --- a/app/views/submit_form/show.html.erb +++ b/app/views/submit_form/show.html.erb @@ -68,6 +68,9 @@ <% page = preview_images_index[index] || page_blob_struct.new(metadata: lazyload_metadata, url: preview_document_page_path(document.signed_uuid, "#{index}.jpg")) %> <%= t('page') %> <%= index + 1 %> <%= t('of') %> <%= item['name'].presence || document.filename.base %> + <% if (page_text = document.blob.metadata.dig('pdf', 'pages_text', index.to_s)).present? %> +
<%= page_text %>
+ <% end %>
<% if annots = document_annots_index[index] %> <%= render 'submit_form/annotations', annots: %> diff --git a/config/locales/i18n.yml b/config/locales/i18n.yml index 1ff45911..73c09467 100644 --- a/config/locales/i18n.yml +++ b/config/locales/i18n.yml @@ -365,6 +365,7 @@ en: &en page_number: 'Page %{number}' page: Page of: of + text_content: text content powered_by: Powered by count_documents_signed_with_html: '%{count} documents signed with' storage: Storage diff --git a/lib/templates/process_document.rb b/lib/templates/process_document.rb index 6b40a502..b116f145 100644 --- a/lib/templates/process_document.rb +++ b/lib/templates/process_document.rb @@ -93,6 +93,9 @@ module Templates attachment.metadata['pdf'] ||= {} attachment.metadata['pdf']['number_of_pages'] = number_of_pages + pages_text = extract_page_texts(data, number_of_pages, max_pages) + attachment.metadata['pdf']['pages_text'] = pages_text unless pages_text.empty? + ApplicationRecord.no_touching do attachment.save! end @@ -190,6 +193,28 @@ module Templates data end + def extract_page_texts(data, number_of_pages, max_pages = MAX_NUMBER_OF_PAGES_PROCESSED) + pages_text = {} + doc = Pdfium::Document.open_bytes(data) + pages_to_process = [number_of_pages, max_pages].min + + pages_to_process.times do |index| + page = doc.get_page(index) + text = page.text.strip + pages_text[index.to_s] = text unless text.empty? + rescue StandardError + nil + ensure + page&.close + end + + pages_text + rescue StandardError + {} + ensure + doc&.close + end + def normalize_attachment_fields(template, attachments = template.documents) attachments.flat_map do |a| pdf_fields = a.metadata['pdf'].delete('fields').to_a if a.metadata['pdf'].present?