optimize for large files

pull/133/head 1.2.4
DocuSeal 2 years ago
parent c639a3f733
commit ce39494f84

@ -67,6 +67,9 @@ RSpec/MultipleMemoizedHelpers:
Rails/I18nLocaleTexts: Rails/I18nLocaleTexts:
Enabled: false Enabled: false
Rails/FindEach:
Enabled: false
Rails/SkipsModelValidations: Rails/SkipsModelValidations:
Enabled: false Enabled: false

@ -16,6 +16,7 @@ module Api
render json: { render json: {
schema:, schema:,
documents: documents.as_json( documents: documents.as_json(
methods: [:metadata],
include: { include: {
preview_images: { methods: %i[url metadata filename] } preview_images: { methods: %i[url metadata filename] }
} }

@ -0,0 +1,52 @@
# frozen_string_literal: true
class PreviewDocumentPageController < ActionController::API
include ActiveStorage::SetCurrent
FORMAT = Templates::ProcessDocument::FORMAT
def show
if Docuseal.multitenant?
Rollbar.warning('load page')
return head :not_found
end
attachment = ActiveStorage::Attachment.find_by(uuid: params[:attachment_uuid])
return head :not_found unless attachment
preview_image = attachment.preview_images.joins(:blob).find_by(blob: { filename: "#{params[:id]}#{FORMAT}" })
return redirect_to preview_image.url, allow_other_host: true if preview_image
file_path =
if attachment.service.name == :disk
ActiveStorage::Blob.service.path_for(attachment.key)
else
find_or_create_document_tempfile_path(attachment)
end
io = Templates::ProcessDocument.generate_pdf_preview_from_file(attachment, file_path, params[:id].to_i)
render plain: io.tap(&:rewind).read
end
def find_or_create_document_tempfile_path(attachment)
file_path = "#{Dir.tmpdir}/#{attachment.uuid}"
File.open(file_path, File::RDWR | File::CREAT, 0o644) do |f|
f.flock(File::LOCK_EX)
# rubocop:disable Style/ZeroLengthPredicate
if f.size.zero?
f.binmode
f.write(attachment.download)
end
# rubocop:enable Style/ZeroLengthPredicate
end
file_path
end
end

@ -6,11 +6,23 @@ class SubmissionsController < ApplicationController
load_and_authorize_resource :submission, only: %i[show destroy] load_and_authorize_resource :submission, only: %i[show destroy]
PRELOAD_ALL_PAGES_AMOUNT = 200
def show def show
ActiveRecord::Associations::Preloader.new( ActiveRecord::Associations::Preloader.new(
records: [@submission], records: [@submission],
associations: [:template, { template_schema_documents: [:blob, { preview_images_attachments: :blob }] }] associations: [:template, { template_schema_documents: :blob }]
).call
total_pages =
@submission.template_schema_documents.sum { |e| e.metadata.dig('pdf', 'number_of_pages').to_i }
if total_pages < PRELOAD_ALL_PAGES_AMOUNT
ActiveRecord::Associations::Preloader.new(
records: @submission.template_schema_documents,
associations: [:blob, { preview_images_attachments: :blob }]
).call ).call
end
render :show, layout: 'plain' render :show, layout: 'plain'
end end

@ -6,15 +6,28 @@ class SubmitFormController < ApplicationController
skip_before_action :authenticate_user! skip_before_action :authenticate_user!
skip_authorization_check skip_authorization_check
PRELOAD_ALL_PAGES_AMOUNT = 200
def show def show
@submitter = @submitter = Submitter.find_by!(slug: params[:slug])
Submitter.preload(submission: [
:template, { template_schema_documents: [:blob, { preview_images_attachments: :blob }] }
])
.find_by!(slug: params[:slug])
return redirect_to submit_form_completed_path(@submitter.slug) if @submitter.completed_at? return redirect_to submit_form_completed_path(@submitter.slug) if @submitter.completed_at?
ActiveRecord::Associations::Preloader.new(
records: [@submitter],
associations: [submission: [:template, { template_schema_documents: :blob }]]
).call
total_pages =
@submitter.submission.template_schema_documents.sum { |e| e.metadata.dig('pdf', 'number_of_pages').to_i }
if total_pages < PRELOAD_ALL_PAGES_AMOUNT
ActiveRecord::Associations::Preloader.new(
records: @submitter.submission.template_schema_documents,
associations: [:blob, { preview_images_attachments: :blob }]
).call
end
Submitters::MaybeUpdateDefaultValues.call(@submitter, current_user) Submitters::MaybeUpdateDefaultValues.call(@submitter, current_user)
cookies[:submitter_sid] = @submitter.signed_id cookies[:submitter_sid] = @submitter.signed_id

@ -30,6 +30,14 @@ class TemplatesController < ApplicationController
associations: [schema_documents: { preview_images_attachments: :blob }] associations: [schema_documents: { preview_images_attachments: :blob }]
).call ).call
@template_data =
@template.as_json.merge(
documents: @template.schema_documents.as_json(
methods: [:metadata],
include: { preview_images: { methods: %i[url metadata filename] } }
)
).to_json
render :edit, layout: 'plain' render :edit, layout: 'plain'
end end

@ -56,8 +56,26 @@ export default {
} }
}, },
computed: { computed: {
numberOfPages () {
return this.document.metadata?.pdf?.number_of_pages || this.document.preview_images.length
},
sortedPreviewImages () { sortedPreviewImages () {
return [...this.document.preview_images].sort((a, b) => parseInt(a.filename) - parseInt(b.filename)) const lazyloadMetadata = this.document.preview_images[this.document.preview_images.length - 1].metadata
return [...Array(this.numberOfPages).keys()].map((i) => {
return this.previewImagesIndex[i] || {
metadata: lazyloadMetadata,
id: Math.random().toString(),
url: `/preview/${this.document.uuid}/${i}.jpg`
}
})
},
previewImagesIndex () {
return this.document.preview_images.reduce((acc, e) => {
acc[parseInt(e.filename)] = e
return acc
}, {})
} }
}, },
beforeUpdate () { beforeUpdate () {

@ -5,11 +5,12 @@
> >
<img <img
ref="image" ref="image"
loading="lazy"
:src="image.url" :src="image.url"
:width="width" :width="width"
class="border rounded mb-4"
:height="height" :height="height"
loading="lazy" class="border rounded mb-4"
@load="onImageLoad"
> >
<div <div
class="top-0 bottom-0 left-0 right-0 absolute" class="top-0 bottom-0 left-0 right-0 absolute"
@ -117,6 +118,10 @@ export default {
this.areaRefs = [] this.areaRefs = []
}, },
methods: { methods: {
onImageLoad (e) {
e.target.setAttribute('width', e.target.naturalWidth)
e.target.setAttribute('height', e.target.naturalHeight)
},
setAreaRefs (el) { setAreaRefs (el) {
if (el) { if (el) {
this.areaRefs.push(el) this.areaRefs.push(el)

@ -114,7 +114,7 @@ export default {
emits: ['scroll-to', 'change', 'remove', 'up', 'down', 'replace'], emits: ['scroll-to', 'change', 'remove', 'up', 'down', 'replace'],
computed: { computed: {
previewImage () { previewImage () {
return this.document.preview_images[0] return [...this.document.preview_images].sort((a, b) => parseInt(a.filename) - parseInt(b.filename))[0]
} }
}, },
mounted () { mounted () {

@ -1,6 +1,8 @@
# frozen_string_literal: true # frozen_string_literal: true
class SubmitterMailer < ApplicationMailer class SubmitterMailer < ApplicationMailer
MAX_ATTACHMENTS_SIZE = 10.megabytes
def invitation_email(submitter, body: nil, subject: nil) def invitation_email(submitter, body: nil, subject: nil)
@current_account = submitter.submission.template.account @current_account = submitter.submission.template.account
@submitter = submitter @submitter = submitter
@ -75,14 +77,26 @@ class SubmitterMailer < ApplicationMailer
def add_completed_email_attachments!(submitter) def add_completed_email_attachments!(submitter)
documents = Submitters.select_attachments_for_download(submitter) documents = Submitters.select_attachments_for_download(submitter)
documents.each do |attachment| total_size = 0
attachments[attachment.filename.to_s] = attachment.download audit_trail_data = nil
end
if submitter.submission.audit_trail.present? if submitter.submission.audit_trail.present?
attachments[submitter.submission.audit_trail.filename.to_s] = submitter.submission.audit_trail.download audit_trail_data = submitter.submission.audit_trail.download
total_size = audit_trail_data.size
end
documents.each do |attachment|
data = attachment.download
total_size += data.size
break if total_size >= MAX_ATTACHMENTS_SIZE
attachments[attachment.filename.to_s] = data
end end
attachments[submitter.submission.audit_trail.filename.to_s] = audit_trail_data if audit_trail_data
documents documents
end end

@ -24,7 +24,7 @@
</div> </div>
</div> </div>
<% unless @template.deleted_at? %> <% unless @template.deleted_at? %>
<%= form_for @submitter, url: start_form_path(@template.slug), data: { turbo_frame: :_top }, method: :put, html: { class: 'space-y-4' } do |f| %> <%= form_for @submitter, url: start_form_path(@template.slug), data: { turbo_frame: :_top }, method: :put, html: { class: 'space-y-4', onsubmit: 'event.submitter.disabled = true' } do |f| %>
<div class="form-control !mt-0"> <div class="form-control !mt-0">
<%= f.label :email, class: 'label' %> <%= f.label :email, class: 'label' %>
<%= f.email_field :email, value: current_user&.email, required: true, class: 'base-input', placeholder: 'Provide your email to start' %> <%= f.email_field :email, value: current_user&.email, required: true, class: 'base-input', placeholder: 'Provide your email to start' %>

@ -44,12 +44,16 @@
<% fields_index = Templates.build_field_areas_index(@submission.template_fields || @submission.template.fields) %> <% fields_index = Templates.build_field_areas_index(@submission.template_fields || @submission.template.fields) %>
<% values = @submission.submitters.reduce({}) { |acc, sub| acc.merge(sub.values) } %> <% values = @submission.submitters.reduce({}) { |acc, sub| acc.merge(sub.values) } %>
<% attachments_index = ActiveStorage::Attachment.where(record: @submission.submitters, name: :attachments).preload(:blob).index_by(&:uuid) %> <% attachments_index = ActiveStorage::Attachment.where(record: @submission.submitters, name: :attachments).preload(:blob).index_by(&:uuid) %>
<% page_blob_struct = Struct.new(:url, :metadata, keyword_init: true) %>
<% (@submission.template_schema || @submission.template.schema).each do |item| %> <% (@submission.template_schema || @submission.template.schema).each do |item| %>
<% document = @submission.template_schema_documents.find { |e| e.uuid == item['attachment_uuid'] } %> <% document = @submission.template_schema_documents.find { |e| e.uuid == item['attachment_uuid'] } %>
<% document_annots_index = document.metadata.dig('pdf', 'annotations')&.group_by { |e| e['page'] } || {} %> <% document_annots_index = document.metadata.dig('pdf', 'annotations')&.group_by { |e| e['page'] } || {} %>
<% document.preview_images.sort_by { |a| a.filename.base.to_i }.each_with_index do |page, index| %> <% preview_images_index = document.preview_images.loaded? ? document.preview_images.index_by { |e| e.filename.base.to_i } : {} %>
<% lazyload_metadata = document.preview_images.first.metadata %>
<% (document.metadata.dig('pdf', 'number_of_pages') || (document.preview_images.loaded? ? preview_images_index.size : document.preview_images.size)).times do |index| %>
<% page = preview_images_index[index] || page_blob_struct.new(metadata: lazyload_metadata, url: preview_document_page_path(document.uuid, "#{index}.jpg")) %>
<div id="<%= "page-#{document.uuid}-#{index}" %>" class="relative"> <div id="<%= "page-#{document.uuid}-#{index}" %>" class="relative">
<img src="<%= page.url %>" width="<%= page.metadata['width'] %>" class="shadow-md mb-4" height="<%= page.metadata['height'] %>" loading="lazy"> <img loading="lazy" src="<%= page.url %>" width="<%= page.metadata['width'] %>" class="shadow-md mb-4" height="<%= page.metadata['height'] %>">
<div class="top-0 bottom-0 left-0 right-0 absolute"> <div class="top-0 bottom-0 left-0 right-0 absolute">
<% document_annots_index[index]&.each do |annot| %> <% document_annots_index[index]&.each do |annot| %>
<%= render 'submissions/annotation', annot: %> <%= render 'submissions/annotation', annot: %>

@ -1,6 +1,7 @@
<% fields_index = Templates.build_field_areas_index(@submitter.submission.template_fields || @submitter.submission.template.fields) %> <% fields_index = Templates.build_field_areas_index(@submitter.submission.template_fields || @submitter.submission.template.fields) %>
<% values = @submitter.submission.submitters.reduce({}) { |acc, sub| acc.merge(sub.values) } %> <% values = @submitter.submission.submitters.reduce({}) { |acc, sub| acc.merge(sub.values) } %>
<% attachments_index = ActiveStorage::Attachment.where(record: @submitter.submission.submitters, name: :attachments).preload(:blob).index_by(&:uuid) %> <% attachments_index = ActiveStorage::Attachment.where(record: @submitter.submission.submitters, name: :attachments).preload(:blob).index_by(&:uuid) %>
<% page_blob_struct = Struct.new(:url, :metadata, keyword_init: true) %>
<div style="max-height: -webkit-fill-available;"> <div style="max-height: -webkit-fill-available;">
<div id="scrollbox"> <div id="scrollbox">
<div class="mx-auto block pb-72" style="max-width: 1000px"> <div class="mx-auto block pb-72" style="max-width: 1000px">
@ -10,9 +11,12 @@
<% (@submitter.submission.template_schema || @submitter.submission.template.schema).each do |item| %> <% (@submitter.submission.template_schema || @submitter.submission.template.schema).each do |item| %>
<% document = @submitter.submission.template_schema_documents.find { |a| a.uuid == item['attachment_uuid'] } %> <% document = @submitter.submission.template_schema_documents.find { |a| a.uuid == item['attachment_uuid'] } %>
<% document_annots_index = document.metadata.dig('pdf', 'annotations')&.group_by { |e| e['page'] } || {} %> <% document_annots_index = document.metadata.dig('pdf', 'annotations')&.group_by { |e| e['page'] } || {} %>
<% document.preview_images.sort_by { |a| a.filename.base.to_i }.each_with_index do |page, index| %> <% preview_images_index = document.preview_images.loaded? ? document.preview_images.index_by { |e| e.filename.base.to_i } : {} %>
<% lazyload_metadata = document.preview_images.last.metadata %>
<% (document.metadata.dig('pdf', 'number_of_pages') || (document.preview_images.loaded? ? preview_images_index.size : document.preview_images.size)).times do |index| %>
<% page = preview_images_index[index] || page_blob_struct.new(metadata: lazyload_metadata, url: preview_document_page_path(document.uuid, "#{index}.jpg")) %>
<div class="relative my-4 shadow-md"> <div class="relative my-4 shadow-md">
<img src="<%= page.url %>" width="<%= page.metadata['width'] %>" height="<%= page.metadata['height'] %>" loading="lazy"> <img loading="lazy" src="<%= page.url %>" width="<%= page.metadata['width'] %>" height="<%= page.metadata['height'] %>">
<div id="page-<%= [document.uuid, index].join('-') %>" class="top-0 bottom-0 left-0 right-0 absolute"> <div id="page-<%= [document.uuid, index].join('-') %>" class="top-0 bottom-0 left-0 right-0 absolute">
<% document_annots_index[index]&.each do |annot| %> <% document_annots_index[index]&.each do |annot| %>
<%= render 'submissions/annotation', annot: %> <%= render 'submissions/annotation', annot: %>

@ -1 +1 @@
<template-builder class="grid" data-is-direct-upload="<%= Docuseal.active_storage_public? %>" data-template="<%= @template.as_json.merge(documents: @template.schema_documents.as_json(include: { preview_images: { methods: %i[url metadata filename] } })).to_json %>"></template-builder> <template-builder class="grid" data-is-direct-upload="<%= Docuseal.active_storage_public? %>" data-template="<%= @template_data %>"></template-builder>

@ -67,6 +67,7 @@ Rails.application.routes.draw do
resource :folder, only: %i[edit update], controller: 'templates_folders' resource :folder, only: %i[edit update], controller: 'templates_folders'
resources :submissions_export, only: %i[index new] resources :submissions_export, only: %i[index new]
end end
resources :preview_document_page, only: %i[show], path: '/preview/:attachment_uuid'
resources :start_form, only: %i[show update], path: 'd', param: 'slug' do resources :start_form, only: %i[show update], path: 'd', param: 'slug' do
get :completed get :completed

@ -4,7 +4,7 @@ module Submissions
module EnsureResultGenerated module EnsureResultGenerated
WAIT_FOR_RETRY = 2.seconds WAIT_FOR_RETRY = 2.seconds
CHECK_EVENT_INTERVAL = 1.second CHECK_EVENT_INTERVAL = 1.second
CHECK_COMPLETE_TIMEOUT = 20.seconds CHECK_COMPLETE_TIMEOUT = 90.seconds
WaitForCompleteTimeout = Class.new(StandardError) WaitForCompleteTimeout = Class.new(StandardError)

@ -13,6 +13,7 @@ module Templates
record: template record: template
) )
ApplicationRecord.no_touching do
document.preview_images_attachments.each do |preview_image| document.preview_images_attachments.each do |preview_image|
ActiveStorage::Attachment.create!( ActiveStorage::Attachment.create!(
uuid: preview_image.uuid, uuid: preview_image.uuid,
@ -25,3 +26,4 @@ module Templates
end end
end end
end end
end

@ -3,6 +3,7 @@
module Templates module Templates
module CreateAttachments module CreateAttachments
PDF_CONTENT_TYPE = 'application/pdf' PDF_CONTENT_TYPE = 'application/pdf'
ANNOTATIONS_SIZE_LIMIT = 6.megabytes
InvalidFileType = Class.new(StandardError) InvalidFileType = Class.new(StandardError)
module_function module_function
@ -18,7 +19,10 @@ module Templates
document = template.documents.create!(blob:) document = template.documents.create!(blob:)
if blob.content_type == PDF_CONTENT_TYPE && blob.metadata['pdf'].nil? if blob.content_type == PDF_CONTENT_TYPE && blob.metadata['pdf'].nil?
blob.metadata['pdf'] = { 'annotations' => Templates::BuildAnnotations.call(document_data) } annotations =
document_data.size > ANNOTATIONS_SIZE_LIMIT ? [] : Templates::BuildAnnotations.call(document_data)
blob.metadata['pdf'] = { 'annotations' => annotations }
blob.metadata['sha256'] = Base64.urlsafe_encode64(Digest::SHA256.digest(document_data)) blob.metadata['sha256'] = Base64.urlsafe_encode64(Digest::SHA256.digest(document_data))
end end
@ -37,9 +41,10 @@ module Templates
data = file.read data = file.read
if file.content_type == PDF_CONTENT_TYPE if file.content_type == PDF_CONTENT_TYPE
annotations = data.size > ANNOTATIONS_SIZE_LIMIT ? [] : Templates::BuildAnnotations.call(data)
metadata = { 'identified' => true, 'analyzed' => true, metadata = { 'identified' => true, 'analyzed' => true,
'sha256' => Base64.urlsafe_encode64(Digest::SHA256.digest(data)), 'sha256' => Base64.urlsafe_encode64(Digest::SHA256.digest(data)),
'pdf' => { 'annotations' => Templates::BuildAnnotations.call(data) } } 'pdf' => { 'annotations' => annotations } }
end end
ActiveStorage::Blob.create_and_upload!( ActiveStorage::Blob.create_and_upload!(

@ -9,6 +9,12 @@ module Templates
PDF_CONTENT_TYPE = 'application/pdf' PDF_CONTENT_TYPE = 'application/pdf'
Q = 35 Q = 35
MAX_WIDTH = 1400 MAX_WIDTH = 1400
MAX_NUMBER_OF_PAGES_PROCESSED =
if Docuseal.multitenant?
70
else
40
end
module_function module_function
@ -42,14 +48,38 @@ module Templates
def generate_pdf_preview_images(attachment, data) def generate_pdf_preview_images(attachment, data)
ActiveStorage::Attachment.where(name: ATTACHMENT_NAME, record: attachment).destroy_all ActiveStorage::Attachment.where(name: ATTACHMENT_NAME, record: attachment).destroy_all
number_of_pages = HexaPDF::Document.new(io: StringIO.new(data)).pages.size - 1 number_of_pages = HexaPDF::Document.new(io: StringIO.new(data)).pages.size
(attachment.metadata['pdf'] ||= {})[:number_of_pages] = number_of_pages
attachment.save!
(0..number_of_pages).each do |page_number| (0..[number_of_pages - 1, MAX_NUMBER_OF_PAGES_PROCESSED].min).each do |page_number|
page = Vips::Image.new_from_buffer(data, '', dpi: DPI, page: page_number) page = Vips::Image.new_from_buffer(data, '', dpi: DPI, page: page_number)
page = page.resize(MAX_WIDTH / page.width.to_f) page = page.resize(MAX_WIDTH / page.width.to_f)
io = StringIO.new(page.write_to_buffer(FORMAT, Q: Q, interlace: true)) io = StringIO.new(page.write_to_buffer(FORMAT, Q: Q, interlace: true))
ApplicationRecord.no_touching do
ActiveStorage::Attachment.create!(
blob: ActiveStorage::Blob.create_and_upload!(
io:, filename: "#{page_number}#{FORMAT}",
metadata: { analyzed: true, identified: true, width: page.width, height: page.height }
),
name: ATTACHMENT_NAME,
record: attachment
)
end
end
end
def generate_pdf_preview_from_file(attachment, file_path, page_number)
page = Vips::Image.new_from_file(file_path, dpi: DPI, page: page_number)
page = page.resize(MAX_WIDTH / page.width.to_f)
io = StringIO.new(page.write_to_buffer(FORMAT, Q: Q, interlace: true))
ApplicationRecord.no_touching do
ActiveStorage::Attachment.create!( ActiveStorage::Attachment.create!(
blob: ActiveStorage::Blob.create_and_upload!( blob: ActiveStorage::Blob.create_and_upload!(
io:, filename: "#{page_number}#{FORMAT}", io:, filename: "#{page_number}#{FORMAT}",
@ -59,6 +89,8 @@ module Templates
record: attachment record: attachment
) )
end end
io
end end
end end
end end

Loading…
Cancel
Save