Add accessible PDF text extraction for screen reader users

Extracts PDF text during upload via Pdfium and stores it in attachment
metadata (pdf.pages_text), then surfaces it in visually-hidden sr-only
regions in both the signing form and submission preview views. Also adds
alt text to template builder page images and ARIA role/label to the
page-container custom element.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
pull/599/head
Marcelo Paiva 3 weeks ago
parent 4602f0fb63
commit 6c1fc317c0

@ -2,6 +2,12 @@ export default class extends HTMLElement {
connectedCallback () { connectedCallback () {
const image = this.querySelector('img') const image = this.querySelector('img')
this.setAttribute('role', 'img')
if (image?.alt) {
this.setAttribute('aria-label', image.alt)
}
image.addEventListener('load', (e) => { image.addEventListener('load', (e) => {
image.setAttribute('width', e.target.naturalWidth) image.setAttribute('width', e.target.naturalWidth)
image.setAttribute('height', e.target.naturalHeight) image.setAttribute('height', e.target.naturalHeight)

@ -26,6 +26,7 @@
:image="image" :image="image"
:attachment-uuid="document.uuid" :attachment-uuid="document.uuid"
:with-fields-detection="withFieldsDetection" :with-fields-detection="withFieldsDetection"
:page-text="pagesText[String(index)]"
@drop-field="$emit('drop-field', { ...$event, attachment_uuid: document.uuid })" @drop-field="$emit('drop-field', { ...$event, attachment_uuid: document.uuid })"
@remove-area="$emit('remove-area', $event)" @remove-area="$emit('remove-area', $event)"
@copy-field="$emit('copy-field', $event)" @copy-field="$emit('copy-field', $event)"
@ -178,6 +179,9 @@ export default {
return acc return acc
}, {}) }, {})
},
pagesText () {
return this.document.metadata?.pdf?.pages_text || {}
} }
}, },
beforeUpdate () { beforeUpdate () {

@ -12,8 +12,15 @@
:width="width" :width="width"
:height="height" :height="height"
class="rounded" class="rounded"
:alt="`Page ${number + 1} of ${totalPages}`"
@load="onImageLoad" @load="onImageLoad"
> >
<div
v-if="pageText"
class="sr-only"
role="region"
:aria-label="`Page ${number + 1} text content`"
>{{ pageText }}</div>
<div <div
class="top-0 bottom-0 left-0 right-0 absolute" class="top-0 bottom-0 left-0 right-0 absolute"
@pointerdown="onStartDraw" @pointerdown="onStartDraw"
@ -246,6 +253,11 @@ export default {
type: Boolean, type: Boolean,
required: false, required: false,
default: false default: false
},
pageText: {
type: String,
required: false,
default: ''
} }
}, },
emits: ['draw', 'drop-field', 'remove-area', 'copy-field', 'paste-field', 'scroll-to', 'copy-selected-areas', 'delete-selected-areas', 'autodetect-fields', 'add-custom-field', 'set-draw'], emits: ['draw', 'drop-field', 'remove-area', 'copy-field', 'paste-field', 'scroll-to', 'copy-selected-areas', 'delete-selected-areas', 'autodetect-fields', 'add-custom-field', 'set-draw'],

@ -106,6 +106,9 @@
<% page = preview_images_index[index] || page_blob_struct.new(metadata: lazyload_metadata, url: preview_document_page_path(document.signed_uuid, "#{index}.jpg")) %> <% page = preview_images_index[index] || page_blob_struct.new(metadata: lazyload_metadata, url: preview_document_page_path(document.signed_uuid, "#{index}.jpg")) %>
<page-container id="<%= "page-#{document.uuid}-#{index}" %>" class="block before:border before:absolute before:top-0 before:bottom-0 before:left-0 before:right-0 before:rounded relative mb-4" style="container-type: size; aspect-ratio: <%= width = page.metadata['width'] %> / <%= height = page.metadata['height'] %>"> <page-container id="<%= "page-#{document.uuid}-#{index}" %>" class="block before:border before:absolute before:top-0 before:bottom-0 before:left-0 before:right-0 before:rounded relative mb-4" style="container-type: size; aspect-ratio: <%= width = page.metadata['width'] %> / <%= height = page.metadata['height'] %>">
<img loading="lazy" src="<%= page.url %>" width="<%= width %>" class="rounded" height="<%= height %>" alt="<%= t('page') %> <%= index + 1 %> <%= t('of') %> <%= item['name'].presence || document.filename.base %>"> <img loading="lazy" src="<%= page.url %>" width="<%= width %>" class="rounded" height="<%= height %>" alt="<%= t('page') %> <%= index + 1 %> <%= t('of') %> <%= item['name'].presence || document.filename.base %>">
<% if (page_text = document.blob.metadata.dig('pdf', 'pages_text', index.to_s)).present? %>
<div class="sr-only" role="region" aria-label="<%= t('page') %> <%= index + 1 %> <%= t('text_content') %>"><%= page_text %></div>
<% end %>
<div class="top-0 bottom-0 left-0 right-0 absolute"> <div class="top-0 bottom-0 left-0 right-0 absolute">
<% document_annots_index[index]&.each do |annot| %> <% document_annots_index[index]&.each do |annot| %>
<%= render 'submissions/annotation', annot: %> <%= render 'submissions/annotation', annot: %>

@ -68,6 +68,9 @@
<% page = preview_images_index[index] || page_blob_struct.new(metadata: lazyload_metadata, url: preview_document_page_path(document.signed_uuid, "#{index}.jpg")) %> <% page = preview_images_index[index] || page_blob_struct.new(metadata: lazyload_metadata, url: preview_document_page_path(document.signed_uuid, "#{index}.jpg")) %>
<page-container class="block relative my-4 shadow-md" style="container-type: size; aspect-ratio: <%= width = page.metadata['width'] %> / <%= height = page.metadata['height'] %>"> <page-container class="block relative my-4 shadow-md" style="container-type: size; aspect-ratio: <%= width = page.metadata['width'] %> / <%= height = page.metadata['height'] %>">
<img loading="lazy" src="<%= page.url %>" width="<%= width %>" height="<%= height %>" alt="<%= t('page') %> <%= index + 1 %> <%= t('of') %> <%= item['name'].presence || document.filename.base %>"> <img loading="lazy" src="<%= page.url %>" width="<%= width %>" height="<%= height %>" alt="<%= t('page') %> <%= index + 1 %> <%= t('of') %> <%= item['name'].presence || document.filename.base %>">
<% if (page_text = document.blob.metadata.dig('pdf', 'pages_text', index.to_s)).present? %>
<div class="sr-only" role="region" aria-label="<%= t('page') %> <%= index + 1 %> <%= t('text_content') %>"><%= page_text %></div>
<% end %>
<div id="page-<%= [document.uuid, index].join('-') %>" class="top-0 bottom-0 left-0 right-0 absolute"> <div id="page-<%= [document.uuid, index].join('-') %>" class="top-0 bottom-0 left-0 right-0 absolute">
<% if annots = document_annots_index[index] %> <% if annots = document_annots_index[index] %>
<%= render 'submit_form/annotations', annots: %> <%= render 'submit_form/annotations', annots: %>

@ -365,6 +365,7 @@ en: &en
page_number: 'Page %{number}' page_number: 'Page %{number}'
page: Page page: Page
of: of of: of
text_content: text content
powered_by: Powered by powered_by: Powered by
count_documents_signed_with_html: '<b>%{count}</b> documents signed with' count_documents_signed_with_html: '<b>%{count}</b> documents signed with'
storage: Storage storage: Storage

@ -93,6 +93,9 @@ module Templates
attachment.metadata['pdf'] ||= {} attachment.metadata['pdf'] ||= {}
attachment.metadata['pdf']['number_of_pages'] = number_of_pages attachment.metadata['pdf']['number_of_pages'] = number_of_pages
pages_text = extract_page_texts(data, number_of_pages, max_pages)
attachment.metadata['pdf']['pages_text'] = pages_text unless pages_text.empty?
ApplicationRecord.no_touching do ApplicationRecord.no_touching do
attachment.save! attachment.save!
end end
@ -190,6 +193,28 @@ module Templates
data data
end end
def extract_page_texts(data, number_of_pages, max_pages = MAX_NUMBER_OF_PAGES_PROCESSED)
pages_text = {}
doc = Pdfium::Document.open_bytes(data)
pages_to_process = [number_of_pages, max_pages].min
pages_to_process.times do |index|
page = doc.get_page(index)
text = page.text.strip
pages_text[index.to_s] = text unless text.empty?
rescue StandardError
nil
ensure
page&.close
end
pages_text
rescue StandardError
{}
ensure
doc&.close
end
def normalize_attachment_fields(template, attachments = template.documents) def normalize_attachment_fields(template, attachments = template.documents)
attachments.flat_map do |a| attachments.flat_map do |a|
pdf_fields = a.metadata['pdf'].delete('fields').to_a if a.metadata['pdf'].present? pdf_fields = a.metadata['pdf'].delete('fields').to_a if a.metadata['pdf'].present?

Loading…
Cancel
Save