mirror of https://github.com/docusealco/docuseal
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
222 lines
6.6 KiB
222 lines
6.6 KiB
# frozen_string_literal: true
|
|
|
|
module Templates
|
|
module ProcessDocument
|
|
DPI = 200
|
|
FORMAT = '.png'
|
|
ATTACHMENT_NAME = 'preview_images'
|
|
|
|
BMP_REGEXP = %r{\Aimage/(?:bmp|x-bmp|x-ms-bmp)\z}
|
|
PDF_CONTENT_TYPE = 'application/pdf'
|
|
CONCURRENCY = 2
|
|
Q = 95
|
|
JPEG_Q = ENV.fetch('PAGE_QUALITY', '35').to_i
|
|
MAX_WIDTH = 1400
|
|
MAX_NUMBER_OF_PAGES_PROCESSED = 15
|
|
MAX_FLATTEN_FILE_SIZE = 20.megabytes
|
|
GENERATE_PREVIEW_SIZE_LIMIT = 50.megabytes
|
|
US_LETTER_SIZE = { 'width' => MAX_WIDTH, 'height' => 1812 }.freeze
|
|
|
|
module_function
|
|
|
|
def call(attachment, data, extract_fields: false, max_pages: MAX_NUMBER_OF_PAGES_PROCESSED)
|
|
if attachment.content_type == PDF_CONTENT_TYPE
|
|
if extract_fields && data.size < MAX_FLATTEN_FILE_SIZE
|
|
pdf = HexaPDF::Document.new(io: StringIO.new(data))
|
|
|
|
fields = Templates::FindAcroFields.call(pdf, attachment, data)
|
|
end
|
|
|
|
generate_pdf_preview_images(attachment, data, pdf, max_pages:)
|
|
|
|
attachment.metadata['pdf']['fields'] = fields if fields
|
|
elsif attachment.image?
|
|
generate_preview_image(attachment, data)
|
|
end
|
|
|
|
attachment
|
|
end
|
|
|
|
def process(attachment, data, extract_fields: false)
|
|
if attachment.content_type == PDF_CONTENT_TYPE && extract_fields && data.size < MAX_FLATTEN_FILE_SIZE
|
|
pdf = HexaPDF::Document.new(io: StringIO.new(data))
|
|
|
|
fields = Templates::FindAcroFields.call(pdf, attachment, data)
|
|
end
|
|
|
|
pdf ||= HexaPDF::Document.new(io: StringIO.new(data))
|
|
|
|
number_of_pages = pdf.pages.size
|
|
|
|
attachment.metadata['pdf'] ||= {}
|
|
attachment.metadata['pdf']['number_of_pages'] = number_of_pages
|
|
attachment.metadata['pdf']['fields'] = fields if fields
|
|
|
|
attachment
|
|
end
|
|
|
|
def generate_preview_image(attachment, data)
|
|
ActiveStorage::Attachment.where(name: ATTACHMENT_NAME, record: attachment).destroy_all
|
|
|
|
image =
|
|
if BMP_REGEXP.match?(attachment.content_type)
|
|
LoadBmp.call(data)
|
|
else
|
|
Vips::Image.new_from_buffer(data, '')
|
|
end
|
|
|
|
image = image.autorot.resize(MAX_WIDTH / image.width.to_f)
|
|
|
|
bitdepth = 2**image.stats.to_a[1..3].pluck(2).uniq.size
|
|
|
|
io = StringIO.new(image.write_to_buffer(FORMAT, compression: 7, filter: 0, bitdepth:,
|
|
palette: true, Q: Q, dither: 0))
|
|
|
|
ActiveStorage::Attachment.create!(
|
|
blob: ActiveStorage::Blob.create_and_upload!(
|
|
io:, filename: "0#{FORMAT}",
|
|
metadata: { analyzed: true, identified: true, width: image.width, height: image.height }
|
|
),
|
|
name: ATTACHMENT_NAME,
|
|
record: attachment
|
|
)
|
|
end
|
|
|
|
def generate_pdf_preview_images(attachment, data, pdf = nil, max_pages: MAX_NUMBER_OF_PAGES_PROCESSED)
|
|
ActiveStorage::Attachment.where(name: ATTACHMENT_NAME, record: attachment).destroy_all
|
|
|
|
pdf ||= HexaPDF::Document.new(io: StringIO.new(data))
|
|
number_of_pages = pdf.pages.size
|
|
|
|
data = maybe_flatten_form(data, pdf)
|
|
|
|
attachment.metadata['pdf'] ||= {}
|
|
attachment.metadata['pdf']['number_of_pages'] = number_of_pages
|
|
|
|
ApplicationRecord.no_touching do
|
|
attachment.save!
|
|
end
|
|
|
|
max_pages_to_process = data.size < GENERATE_PREVIEW_SIZE_LIMIT ? max_pages : 1
|
|
|
|
generate_document_preview_images(attachment, data, 0..[number_of_pages - 1, max_pages_to_process].min)
|
|
end
|
|
|
|
def generate_document_preview_images(attachment, data, range, concurrency: CONCURRENCY)
|
|
doc = Pdfium::Document.open_bytes(data)
|
|
|
|
pool = Concurrent::FixedThreadPool.new(concurrency)
|
|
|
|
promises =
|
|
range.map do |page_number|
|
|
Concurrent::Promise.execute(executor: pool) { build_and_upload_blob(doc, page_number) }
|
|
end
|
|
|
|
Concurrent::Promise.zip(*promises).value!.each do |blob|
|
|
next unless blob
|
|
|
|
ApplicationRecord.no_touching do
|
|
ActiveStorage::Attachment.create!(
|
|
blob:,
|
|
name: ATTACHMENT_NAME,
|
|
record: attachment
|
|
)
|
|
end
|
|
end
|
|
ensure
|
|
doc&.close
|
|
pool&.kill
|
|
end
|
|
|
|
def build_and_upload_blob(doc, page_number, format = FORMAT)
|
|
doc_page = doc.get_page(page_number)
|
|
|
|
data, width, height = doc_page.render_to_bitmap(width: MAX_WIDTH)
|
|
|
|
page = Vips::Image.new_from_memory(data, width, height, 4, :uchar)
|
|
|
|
page = page.copy(interpretation: :srgb)
|
|
|
|
data =
|
|
if format == FORMAT
|
|
bitdepth = 2**page.stats.to_a[1..3].pluck(2).uniq.size
|
|
|
|
page.write_to_buffer(format, compression: 7, filter: 0, bitdepth:,
|
|
palette: true, Q: Q, dither: 0)
|
|
else
|
|
page.write_to_buffer(format, interlace: true, Q: JPEG_Q)
|
|
end
|
|
|
|
blob = ActiveStorage::Blob.new(
|
|
filename: "#{page_number}#{format}",
|
|
metadata: { analyzed: true, identified: true, width: page.width, height: page.height }
|
|
)
|
|
|
|
blob.upload(StringIO.new(data))
|
|
|
|
blob
|
|
rescue Vips::Error, Pdfium::PdfiumError => e
|
|
Rollbar.warning(e) if defined?(Rollbar)
|
|
|
|
nil
|
|
ensure
|
|
doc_page&.close
|
|
end
|
|
|
|
def maybe_flatten_form(data, pdf)
|
|
return data if data.size > MAX_FLATTEN_FILE_SIZE
|
|
return data if pdf.acro_form.blank?
|
|
|
|
io = StringIO.new
|
|
|
|
pdf.acro_form.each_field do |field|
|
|
next if field.field_type != :Ch ||
|
|
field[:Opt].blank? ||
|
|
%i[combo_box editable_combo_box].exclude?(field.concrete_field_type) ||
|
|
!field.field_value.to_s.match?(FindAcroFields::SELECT_PLACEHOLDER_REGEXP)
|
|
|
|
field[:V] = ''
|
|
end
|
|
|
|
pdf.acro_form.create_appearances(force: true) if pdf.acro_form[:NeedAppearances]
|
|
pdf.acro_form.flatten
|
|
|
|
pdf.write(io, incremental: false, validate: false)
|
|
|
|
io.string
|
|
rescue StandardError
|
|
raise if Rails.env.development?
|
|
|
|
data
|
|
end
|
|
|
|
def normalize_attachment_fields(template, attachments = template.documents)
|
|
attachments.flat_map do |a|
|
|
pdf_fields = a.metadata['pdf'].delete('fields').to_a if a.metadata['pdf'].present?
|
|
|
|
next [] if pdf_fields.blank?
|
|
|
|
pdf_fields.each { |f| f['submitter_uuid'] = template.submitters.first['uuid'] }
|
|
|
|
pdf_fields
|
|
end
|
|
end
|
|
|
|
def generate_pdf_preview_from_file(attachment, file_path, page_number)
|
|
doc = Pdfium::Document.open_file(file_path)
|
|
|
|
blob = build_and_upload_blob(doc, page_number, '.jpeg')
|
|
|
|
ApplicationRecord.no_touching do
|
|
ActiveStorage::Attachment.create!(
|
|
blob: blob,
|
|
name: ATTACHMENT_NAME,
|
|
record: attachment
|
|
)
|
|
end
|
|
ensure
|
|
doc&.close
|
|
end
|
|
end
|
|
end
|