diff --git a/app/javascript/elements/page_container.js b/app/javascript/elements/page_container.js
index ca5221ba..b59afd5b 100644
--- a/app/javascript/elements/page_container.js
+++ b/app/javascript/elements/page_container.js
@@ -2,6 +2,12 @@ export default class extends HTMLElement {
connectedCallback () {
const image = this.querySelector('img')
+ this.setAttribute('role', 'img')
+
+ if (image?.alt) {
+ this.setAttribute('aria-label', image.alt)
+ }
+
image.addEventListener('load', (e) => {
image.setAttribute('width', e.target.naturalWidth)
image.setAttribute('height', e.target.naturalHeight)
diff --git a/app/javascript/template_builder/document.vue b/app/javascript/template_builder/document.vue
index 83b2c486..022976c5 100644
--- a/app/javascript/template_builder/document.vue
+++ b/app/javascript/template_builder/document.vue
@@ -26,6 +26,7 @@
:image="image"
:attachment-uuid="document.uuid"
:with-fields-detection="withFieldsDetection"
+ :page-text="pagesText[String(index)]"
@drop-field="$emit('drop-field', { ...$event, attachment_uuid: document.uuid })"
@remove-area="$emit('remove-area', $event)"
@copy-field="$emit('copy-field', $event)"
@@ -178,6 +179,9 @@ export default {
return acc
}, {})
+ },
+ pagesText () {
+ return this.document.metadata?.pdf?.pages_text || {}
}
},
beforeUpdate () {
diff --git a/app/javascript/template_builder/page.vue b/app/javascript/template_builder/page.vue
index 550658c4..377618e7 100644
--- a/app/javascript/template_builder/page.vue
+++ b/app/javascript/template_builder/page.vue
@@ -12,8 +12,15 @@
:width="width"
:height="height"
class="rounded"
+ :alt="`Page ${number + 1} of ${totalPages}`"
@load="onImageLoad"
>
+
{{ pageText }}
" class="block before:border before:absolute before:top-0 before:bottom-0 before:left-0 before:right-0 before:rounded relative mb-4" style="container-type: size; aspect-ratio: <%= width = page.metadata['width'] %> / <%= height = page.metadata['height'] %>">
+ <% if (page_text = document.blob.metadata.dig('pdf', 'pages_text', index.to_s)).present? %>
+ <%= page_text %>
+ <% end %>
<% document_annots_index[index]&.each do |annot| %>
<%= render 'submissions/annotation', annot: %>
diff --git a/app/views/submit_form/show.html.erb b/app/views/submit_form/show.html.erb
index 0723870a..c703fad0 100644
--- a/app/views/submit_form/show.html.erb
+++ b/app/views/submit_form/show.html.erb
@@ -68,6 +68,9 @@
<% page = preview_images_index[index] || page_blob_struct.new(metadata: lazyload_metadata, url: preview_document_page_path(document.signed_uuid, "#{index}.jpg")) %>
+ <% if (page_text = document.blob.metadata.dig('pdf', 'pages_text', index.to_s)).present? %>
+ <%= page_text %>
+ <% end %>
<% if annots = document_annots_index[index] %>
<%= render 'submit_form/annotations', annots: %>
diff --git a/config/locales/i18n.yml b/config/locales/i18n.yml
index 1ff45911..73c09467 100644
--- a/config/locales/i18n.yml
+++ b/config/locales/i18n.yml
@@ -365,6 +365,7 @@ en: &en
page_number: 'Page %{number}'
page: Page
of: of
+ text_content: text content
powered_by: Powered by
count_documents_signed_with_html: '%{count} documents signed with'
storage: Storage
diff --git a/lib/templates/process_document.rb b/lib/templates/process_document.rb
index 6b40a502..b116f145 100644
--- a/lib/templates/process_document.rb
+++ b/lib/templates/process_document.rb
@@ -93,6 +93,9 @@ module Templates
attachment.metadata['pdf'] ||= {}
attachment.metadata['pdf']['number_of_pages'] = number_of_pages
+ pages_text = extract_page_texts(data, number_of_pages, max_pages)
+ attachment.metadata['pdf']['pages_text'] = pages_text unless pages_text.empty?
+
ApplicationRecord.no_touching do
attachment.save!
end
@@ -190,6 +193,28 @@ module Templates
data
end
+ def extract_page_texts(data, number_of_pages, max_pages = MAX_NUMBER_OF_PAGES_PROCESSED)
+ pages_text = {}
+ doc = Pdfium::Document.open_bytes(data)
+ pages_to_process = [number_of_pages, max_pages].min
+
+ pages_to_process.times do |index|
+ page = doc.get_page(index)
+ text = page.text.strip
+ pages_text[index.to_s] = text unless text.empty?
+ rescue StandardError
+ nil
+ ensure
+ page&.close
+ end
+
+ pages_text
+ rescue StandardError
+ {}
+ ensure
+ doc&.close
+ end
+
def normalize_attachment_fields(template, attachments = template.documents)
attachments.flat_map do |a|
pdf_fields = a.metadata['pdf'].delete('fields').to_a if a.metadata['pdf'].present?