optimize for large files

pull/133/head 1.2.4
DocuSeal 2 years ago
parent c639a3f733
commit ce39494f84

@ -67,6 +67,9 @@ RSpec/MultipleMemoizedHelpers:
Rails/I18nLocaleTexts:
Enabled: false
Rails/FindEach:
Enabled: false
Rails/SkipsModelValidations:
Enabled: false

@ -16,6 +16,7 @@ module Api
render json: {
schema:,
documents: documents.as_json(
methods: [:metadata],
include: {
preview_images: { methods: %i[url metadata filename] }
}

@ -0,0 +1,52 @@
# frozen_string_literal: true
class PreviewDocumentPageController < ActionController::API
include ActiveStorage::SetCurrent
FORMAT = Templates::ProcessDocument::FORMAT
def show
if Docuseal.multitenant?
Rollbar.warning('load page')
return head :not_found
end
attachment = ActiveStorage::Attachment.find_by(uuid: params[:attachment_uuid])
return head :not_found unless attachment
preview_image = attachment.preview_images.joins(:blob).find_by(blob: { filename: "#{params[:id]}#{FORMAT}" })
return redirect_to preview_image.url, allow_other_host: true if preview_image
file_path =
if attachment.service.name == :disk
ActiveStorage::Blob.service.path_for(attachment.key)
else
find_or_create_document_tempfile_path(attachment)
end
io = Templates::ProcessDocument.generate_pdf_preview_from_file(attachment, file_path, params[:id].to_i)
render plain: io.tap(&:rewind).read
end
def find_or_create_document_tempfile_path(attachment)
file_path = "#{Dir.tmpdir}/#{attachment.uuid}"
File.open(file_path, File::RDWR | File::CREAT, 0o644) do |f|
f.flock(File::LOCK_EX)
# rubocop:disable Style/ZeroLengthPredicate
if f.size.zero?
f.binmode
f.write(attachment.download)
end
# rubocop:enable Style/ZeroLengthPredicate
end
file_path
end
end

@ -6,12 +6,24 @@ class SubmissionsController < ApplicationController
load_and_authorize_resource :submission, only: %i[show destroy]
PRELOAD_ALL_PAGES_AMOUNT = 200
def show
ActiveRecord::Associations::Preloader.new(
records: [@submission],
associations: [:template, { template_schema_documents: [:blob, { preview_images_attachments: :blob }] }]
associations: [:template, { template_schema_documents: :blob }]
).call
total_pages =
@submission.template_schema_documents.sum { |e| e.metadata.dig('pdf', 'number_of_pages').to_i }
if total_pages < PRELOAD_ALL_PAGES_AMOUNT
ActiveRecord::Associations::Preloader.new(
records: @submission.template_schema_documents,
associations: [:blob, { preview_images_attachments: :blob }]
).call
end
render :show, layout: 'plain'
end

@ -6,15 +6,28 @@ class SubmitFormController < ApplicationController
skip_before_action :authenticate_user!
skip_authorization_check
PRELOAD_ALL_PAGES_AMOUNT = 200
def show
@submitter =
Submitter.preload(submission: [
:template, { template_schema_documents: [:blob, { preview_images_attachments: :blob }] }
])
.find_by!(slug: params[:slug])
@submitter = Submitter.find_by!(slug: params[:slug])
return redirect_to submit_form_completed_path(@submitter.slug) if @submitter.completed_at?
ActiveRecord::Associations::Preloader.new(
records: [@submitter],
associations: [submission: [:template, { template_schema_documents: :blob }]]
).call
total_pages =
@submitter.submission.template_schema_documents.sum { |e| e.metadata.dig('pdf', 'number_of_pages').to_i }
if total_pages < PRELOAD_ALL_PAGES_AMOUNT
ActiveRecord::Associations::Preloader.new(
records: @submitter.submission.template_schema_documents,
associations: [:blob, { preview_images_attachments: :blob }]
).call
end
Submitters::MaybeUpdateDefaultValues.call(@submitter, current_user)
cookies[:submitter_sid] = @submitter.signed_id

@ -30,6 +30,14 @@ class TemplatesController < ApplicationController
associations: [schema_documents: { preview_images_attachments: :blob }]
).call
@template_data =
@template.as_json.merge(
documents: @template.schema_documents.as_json(
methods: [:metadata],
include: { preview_images: { methods: %i[url metadata filename] } }
)
).to_json
render :edit, layout: 'plain'
end

@ -56,8 +56,26 @@ export default {
}
},
computed: {
numberOfPages () {
return this.document.metadata?.pdf?.number_of_pages || this.document.preview_images.length
},
sortedPreviewImages () {
return [...this.document.preview_images].sort((a, b) => parseInt(a.filename) - parseInt(b.filename))
const lazyloadMetadata = this.document.preview_images[this.document.preview_images.length - 1].metadata
return [...Array(this.numberOfPages).keys()].map((i) => {
return this.previewImagesIndex[i] || {
metadata: lazyloadMetadata,
id: Math.random().toString(),
url: `/preview/${this.document.uuid}/${i}.jpg`
}
})
},
previewImagesIndex () {
return this.document.preview_images.reduce((acc, e) => {
acc[parseInt(e.filename)] = e
return acc
}, {})
}
},
beforeUpdate () {

@ -5,11 +5,12 @@
>
<img
ref="image"
loading="lazy"
:src="image.url"
:width="width"
class="border rounded mb-4"
:height="height"
loading="lazy"
class="border rounded mb-4"
@load="onImageLoad"
>
<div
class="top-0 bottom-0 left-0 right-0 absolute"
@ -117,6 +118,10 @@ export default {
this.areaRefs = []
},
methods: {
onImageLoad (e) {
e.target.setAttribute('width', e.target.naturalWidth)
e.target.setAttribute('height', e.target.naturalHeight)
},
setAreaRefs (el) {
if (el) {
this.areaRefs.push(el)

@ -114,7 +114,7 @@ export default {
emits: ['scroll-to', 'change', 'remove', 'up', 'down', 'replace'],
computed: {
previewImage () {
return this.document.preview_images[0]
return [...this.document.preview_images].sort((a, b) => parseInt(a.filename) - parseInt(b.filename))[0]
}
},
mounted () {

@ -1,6 +1,8 @@
# frozen_string_literal: true
class SubmitterMailer < ApplicationMailer
MAX_ATTACHMENTS_SIZE = 10.megabytes
def invitation_email(submitter, body: nil, subject: nil)
@current_account = submitter.submission.template.account
@submitter = submitter
@ -75,14 +77,26 @@ class SubmitterMailer < ApplicationMailer
def add_completed_email_attachments!(submitter)
documents = Submitters.select_attachments_for_download(submitter)
documents.each do |attachment|
attachments[attachment.filename.to_s] = attachment.download
end
total_size = 0
audit_trail_data = nil
if submitter.submission.audit_trail.present?
attachments[submitter.submission.audit_trail.filename.to_s] = submitter.submission.audit_trail.download
audit_trail_data = submitter.submission.audit_trail.download
total_size = audit_trail_data.size
end
documents.each do |attachment|
data = attachment.download
total_size += data.size
break if total_size >= MAX_ATTACHMENTS_SIZE
attachments[attachment.filename.to_s] = data
end
attachments[submitter.submission.audit_trail.filename.to_s] = audit_trail_data if audit_trail_data
documents
end

@ -24,7 +24,7 @@
</div>
</div>
<% unless @template.deleted_at? %>
<%= form_for @submitter, url: start_form_path(@template.slug), data: { turbo_frame: :_top }, method: :put, html: { class: 'space-y-4' } do |f| %>
<%= form_for @submitter, url: start_form_path(@template.slug), data: { turbo_frame: :_top }, method: :put, html: { class: 'space-y-4', onsubmit: 'event.submitter.disabled = true' } do |f| %>
<div class="form-control !mt-0">
<%= f.label :email, class: 'label' %>
<%= f.email_field :email, value: current_user&.email, required: true, class: 'base-input', placeholder: 'Provide your email to start' %>

@ -44,12 +44,16 @@
<% fields_index = Templates.build_field_areas_index(@submission.template_fields || @submission.template.fields) %>
<% values = @submission.submitters.reduce({}) { |acc, sub| acc.merge(sub.values) } %>
<% attachments_index = ActiveStorage::Attachment.where(record: @submission.submitters, name: :attachments).preload(:blob).index_by(&:uuid) %>
<% page_blob_struct = Struct.new(:url, :metadata, keyword_init: true) %>
<% (@submission.template_schema || @submission.template.schema).each do |item| %>
<% document = @submission.template_schema_documents.find { |e| e.uuid == item['attachment_uuid'] } %>
<% document_annots_index = document.metadata.dig('pdf', 'annotations')&.group_by { |e| e['page'] } || {} %>
<% document.preview_images.sort_by { |a| a.filename.base.to_i }.each_with_index do |page, index| %>
<% preview_images_index = document.preview_images.loaded? ? document.preview_images.index_by { |e| e.filename.base.to_i } : {} %>
<% lazyload_metadata = document.preview_images.first.metadata %>
<% (document.metadata.dig('pdf', 'number_of_pages') || (document.preview_images.loaded? ? preview_images_index.size : document.preview_images.size)).times do |index| %>
<% page = preview_images_index[index] || page_blob_struct.new(metadata: lazyload_metadata, url: preview_document_page_path(document.uuid, "#{index}.jpg")) %>
<div id="<%= "page-#{document.uuid}-#{index}" %>" class="relative">
<img src="<%= page.url %>" width="<%= page.metadata['width'] %>" class="shadow-md mb-4" height="<%= page.metadata['height'] %>" loading="lazy">
<img loading="lazy" src="<%= page.url %>" width="<%= page.metadata['width'] %>" class="shadow-md mb-4" height="<%= page.metadata['height'] %>">
<div class="top-0 bottom-0 left-0 right-0 absolute">
<% document_annots_index[index]&.each do |annot| %>
<%= render 'submissions/annotation', annot: %>

@ -1,6 +1,7 @@
<% fields_index = Templates.build_field_areas_index(@submitter.submission.template_fields || @submitter.submission.template.fields) %>
<% values = @submitter.submission.submitters.reduce({}) { |acc, sub| acc.merge(sub.values) } %>
<% attachments_index = ActiveStorage::Attachment.where(record: @submitter.submission.submitters, name: :attachments).preload(:blob).index_by(&:uuid) %>
<% page_blob_struct = Struct.new(:url, :metadata, keyword_init: true) %>
<div style="max-height: -webkit-fill-available;">
<div id="scrollbox">
<div class="mx-auto block pb-72" style="max-width: 1000px">
@ -10,9 +11,12 @@
<% (@submitter.submission.template_schema || @submitter.submission.template.schema).each do |item| %>
<% document = @submitter.submission.template_schema_documents.find { |a| a.uuid == item['attachment_uuid'] } %>
<% document_annots_index = document.metadata.dig('pdf', 'annotations')&.group_by { |e| e['page'] } || {} %>
<% document.preview_images.sort_by { |a| a.filename.base.to_i }.each_with_index do |page, index| %>
<% preview_images_index = document.preview_images.loaded? ? document.preview_images.index_by { |e| e.filename.base.to_i } : {} %>
<% lazyload_metadata = document.preview_images.last.metadata %>
<% (document.metadata.dig('pdf', 'number_of_pages') || (document.preview_images.loaded? ? preview_images_index.size : document.preview_images.size)).times do |index| %>
<% page = preview_images_index[index] || page_blob_struct.new(metadata: lazyload_metadata, url: preview_document_page_path(document.uuid, "#{index}.jpg")) %>
<div class="relative my-4 shadow-md">
<img src="<%= page.url %>" width="<%= page.metadata['width'] %>" height="<%= page.metadata['height'] %>" loading="lazy">
<img loading="lazy" src="<%= page.url %>" width="<%= page.metadata['width'] %>" height="<%= page.metadata['height'] %>">
<div id="page-<%= [document.uuid, index].join('-') %>" class="top-0 bottom-0 left-0 right-0 absolute">
<% document_annots_index[index]&.each do |annot| %>
<%= render 'submissions/annotation', annot: %>

@ -1 +1 @@
<template-builder class="grid" data-is-direct-upload="<%= Docuseal.active_storage_public? %>" data-template="<%= @template.as_json.merge(documents: @template.schema_documents.as_json(include: { preview_images: { methods: %i[url metadata filename] } })).to_json %>"></template-builder>
<template-builder class="grid" data-is-direct-upload="<%= Docuseal.active_storage_public? %>" data-template="<%= @template_data %>"></template-builder>

@ -67,6 +67,7 @@ Rails.application.routes.draw do
resource :folder, only: %i[edit update], controller: 'templates_folders'
resources :submissions_export, only: %i[index new]
end
resources :preview_document_page, only: %i[show], path: '/preview/:attachment_uuid'
resources :start_form, only: %i[show update], path: 'd', param: 'slug' do
get :completed

@ -4,7 +4,7 @@ module Submissions
module EnsureResultGenerated
WAIT_FOR_RETRY = 2.seconds
CHECK_EVENT_INTERVAL = 1.second
CHECK_COMPLETE_TIMEOUT = 20.seconds
CHECK_COMPLETE_TIMEOUT = 90.seconds
WaitForCompleteTimeout = Class.new(StandardError)

@ -13,13 +13,15 @@ module Templates
record: template
)
document.preview_images_attachments.each do |preview_image|
ActiveStorage::Attachment.create!(
uuid: preview_image.uuid,
blob_id: preview_image.blob_id,
name: 'preview_images',
record: new_document
)
ApplicationRecord.no_touching do
document.preview_images_attachments.each do |preview_image|
ActiveStorage::Attachment.create!(
uuid: preview_image.uuid,
blob_id: preview_image.blob_id,
name: 'preview_images',
record: new_document
)
end
end
end
end

@ -3,6 +3,7 @@
module Templates
module CreateAttachments
PDF_CONTENT_TYPE = 'application/pdf'
ANNOTATIONS_SIZE_LIMIT = 6.megabytes
InvalidFileType = Class.new(StandardError)
module_function
@ -18,7 +19,10 @@ module Templates
document = template.documents.create!(blob:)
if blob.content_type == PDF_CONTENT_TYPE && blob.metadata['pdf'].nil?
blob.metadata['pdf'] = { 'annotations' => Templates::BuildAnnotations.call(document_data) }
annotations =
document_data.size > ANNOTATIONS_SIZE_LIMIT ? [] : Templates::BuildAnnotations.call(document_data)
blob.metadata['pdf'] = { 'annotations' => annotations }
blob.metadata['sha256'] = Base64.urlsafe_encode64(Digest::SHA256.digest(document_data))
end
@ -37,9 +41,10 @@ module Templates
data = file.read
if file.content_type == PDF_CONTENT_TYPE
annotations = data.size > ANNOTATIONS_SIZE_LIMIT ? [] : Templates::BuildAnnotations.call(data)
metadata = { 'identified' => true, 'analyzed' => true,
'sha256' => Base64.urlsafe_encode64(Digest::SHA256.digest(data)),
'pdf' => { 'annotations' => Templates::BuildAnnotations.call(data) } }
'pdf' => { 'annotations' => annotations } }
end
ActiveStorage::Blob.create_and_upload!(

@ -9,6 +9,12 @@ module Templates
PDF_CONTENT_TYPE = 'application/pdf'
Q = 35
MAX_WIDTH = 1400
MAX_NUMBER_OF_PAGES_PROCESSED =
if Docuseal.multitenant?
70
else
40
end
module_function
@ -42,14 +48,38 @@ module Templates
def generate_pdf_preview_images(attachment, data)
ActiveStorage::Attachment.where(name: ATTACHMENT_NAME, record: attachment).destroy_all
number_of_pages = HexaPDF::Document.new(io: StringIO.new(data)).pages.size - 1
number_of_pages = HexaPDF::Document.new(io: StringIO.new(data)).pages.size
(attachment.metadata['pdf'] ||= {})[:number_of_pages] = number_of_pages
attachment.save!
(0..number_of_pages).each do |page_number|
(0..[number_of_pages - 1, MAX_NUMBER_OF_PAGES_PROCESSED].min).each do |page_number|
page = Vips::Image.new_from_buffer(data, '', dpi: DPI, page: page_number)
page = page.resize(MAX_WIDTH / page.width.to_f)
io = StringIO.new(page.write_to_buffer(FORMAT, Q: Q, interlace: true))
ApplicationRecord.no_touching do
ActiveStorage::Attachment.create!(
blob: ActiveStorage::Blob.create_and_upload!(
io:, filename: "#{page_number}#{FORMAT}",
metadata: { analyzed: true, identified: true, width: page.width, height: page.height }
),
name: ATTACHMENT_NAME,
record: attachment
)
end
end
end
def generate_pdf_preview_from_file(attachment, file_path, page_number)
page = Vips::Image.new_from_file(file_path, dpi: DPI, page: page_number)
page = page.resize(MAX_WIDTH / page.width.to_f)
io = StringIO.new(page.write_to_buffer(FORMAT, Q: Q, interlace: true))
ApplicationRecord.no_touching do
ActiveStorage::Attachment.create!(
blob: ActiveStorage::Blob.create_and_upload!(
io:, filename: "#{page_number}#{FORMAT}",
@ -59,6 +89,8 @@ module Templates
record: attachment
)
end
io
end
end
end

Loading…
Cancel
Save