You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
docuseal/lib/templates/modify_documents.rb

506 lines
17 KiB

# frozen_string_literal: true
module Templates
module ModifyDocuments
InvalidLayout = Class.new(StandardError)
A4_SIZE = [595, 842].freeze
LETTER_SIZE = [612, 792].freeze
PAGE_SIZE_TOLERANCE = 6
SCAN_WHITE_THRESHOLD = 220
SCAN_WHITE_FRACTION = 0.6
ANNOTATIONS_SIZE_LIMIT = 6.megabytes
ROTATIONS = [0, 90, 180, 270].freeze
RECT_KEYS = %w[x y w h].freeze
module_function
def call(template, documents_layout)
layout_attachment_uuids =
documents_layout.flat_map { |e| [e['attachment_uuid'], e['pages'].to_a.pluck('attachment_uuid')] }.flatten.uniq
attachments_index =
template.documents_attachments.preload(:blob).where(uuid: layout_attachment_uuids).index_by(&:uuid)
validate_layout!(template, documents_layout, attachments_index)
mapping = {}
new_schema = build_new_schema(template, documents_layout, attachments_index, mapping)
template.schema.each_with_index do |item, index|
new_schema.insert([index, new_schema.size].min, item) if item['dynamic']
end
removed_field_uuids = remap_fields(template, mapping)
template.schema = new_schema
remove_conditions(template.fields, removed_field_uuids)
remove_conditions(template.schema, removed_field_uuids)
template.save!
template
end
def build_new_schema(template, documents_layout, attachments_index, mapping)
sources = {}
Pdfium.with_instance do
documents_layout.filter_map do |entry|
schema_item =
template.schema.find { |item| item['attachment_uuid'] == entry['attachment_uuid'] } ||
{ 'attachment_uuid' => entry['attachment_uuid'],
'name' => attachments_index[entry['attachment_uuid']].filename.base }
next if entry['pages'].blank?
if unchanged_entry?(entry, attachments_index)
entry['pages'].each_with_index do |ref, index|
add_page_mapping(mapping, ref, [ref['attachment_uuid'], index])
end
schema_item
else
document = if standalone_image_entry?(entry, attachments_index)
build_image_document(template, entry, attachments_index)
else
build_document(template, schema_item, entry['pages'], attachments_index, sources)
end
entry['pages'].each_with_index do |ref, index|
add_page_mapping(mapping, ref, [document.uuid, index, ref['rotate'].to_i % 360])
end
schema_item.except('google_drive_file_id').merge('attachment_uuid' => document.uuid)
end
end
ensure
sources.each_value(&:close)
end
end
def add_page_mapping(mapping, ref, target)
mapping[[ref['attachment_uuid'], ref['page']]] = target
replaced = ref['replaced_page']
mapping[[replaced['attachment_uuid'], replaced['page']]] = target if replaced
end
def validate_layout!(template, documents_layout, attachments_index)
raise InvalidLayout if documents_layout.blank?
raise InvalidLayout if documents_layout.all? { |entry| entry['pages'].blank? }
dynamic_uuids = template.schema.select { |item| item['dynamic'] }.pluck('attachment_uuid')
non_dynamic_uuids = template.schema.pluck('attachment_uuid') - dynamic_uuids
layout_uuids = documents_layout.pluck('attachment_uuid')
raise InvalidLayout if layout_uuids.uniq.size != layout_uuids.size
raise InvalidLayout if (non_dynamic_uuids - layout_uuids).any?
raise InvalidLayout if layout_uuids.intersect?(dynamic_uuids)
raise InvalidLayout if layout_uuids.any? { |uuid| attachments_index[uuid].nil? }
refs = documents_layout.flat_map { |entry| entry['pages'].to_a }
refs.each { |ref| validate_ref!(ref, attachments_index) }
ref_keys = refs.map { |ref| [ref['attachment_uuid'], ref['page']] }
raise InvalidLayout if ref_keys.uniq.size != ref_keys.size
end
def validate_ref!(ref, attachments_index)
attachment = attachments_index[ref['attachment_uuid']]
raise InvalidLayout if attachment.nil?
raise InvalidLayout unless ref['page'].is_a?(Integer) &&
ref['page'] >= 0 && ref['page'] < page_count(attachment)
raise InvalidLayout unless ref['rotate'].nil? || ROTATIONS.include?(ref['rotate'])
validate_redact!(ref['redact'])
end
def validate_redact!(redact)
return if redact.nil?
raise InvalidLayout unless redact.is_a?(Array)
redact.each do |rect|
valid = RECT_KEYS.all? { |key| rect[key].is_a?(Numeric) && rect[key].to_f.between?(-1, 2) }
raise InvalidLayout unless valid
end
end
def page_count(attachment)
if attachment.content_type == Templates::ProcessDocument::PDF_CONTENT_TYPE
attachment.metadata.dig('pdf', 'number_of_pages').to_i
else
1
end
end
def page_objects(attachment, page_number)
Pdfium::Document.open_bytes(attachment.download) do |doc|
page = doc.get_page(page_number)
page.flatten
page.unwrap_form_objects
page.rotate
text_nodes = page.text_nodes.map do |node|
{ 'text' => node.content, 'x' => node.x, 'y' => node.y, 'w' => node.w, 'h' => node.h }
end
image_nodes = page.image_nodes.map do |node|
{ 'x' => node.x, 'y' => node.y, 'w' => node.w, 'h' => node.h }
end
{ 'text_nodes' => text_nodes, 'image_nodes' => image_nodes }
end
end
def unchanged_entry?(entry, attachments_index)
uuid = entry['attachment_uuid']
entry['pages'].size == page_count(attachments_index[uuid]) &&
entry['pages'].each_with_index.all? do |ref, index|
ref['attachment_uuid'] == uuid && ref['page'] == index && ref['rotate'].to_i.zero? && ref['redact'].blank?
end
end
def build_document(template, schema_item, page_refs, attachments_index, sources)
with_images = page_refs.any? { |ref| attachments_index[ref['attachment_uuid']].image? }
pdf_size = entry_pdf_page_size(page_refs, attachments_index, sources) if with_images
default_size = default_page_size(template.account) if with_images
io =
Pdfium::Document.create do |dest|
insert_index = 0
build_page_runs(page_refs, attachments_index).each do |uuid, pages_range, length, image_ops|
redact, rotate = image_ops
attachment = attachments_index[uuid]
key = attachment.image? ? [uuid, image_ops, pdf_size, default_size] : [uuid, image_ops]
source = sources[key] ||= open_or_build_pdf(attachment, redact:, rotate:, pdf_size:, default_size:)
dest.import_pages(source, pages: pages_range, index: insert_index)
insert_index += length
end
apply_pdf_page_ops(dest, page_refs, attachments_index)
dest.save(StringIO.new)
end
save_document(template, attachments_index[schema_item['attachment_uuid']], io.string)
end
def apply_pdf_page_ops(dest, page_refs, attachments_index)
page_refs.each_with_index do |ref, index|
next if attachments_index[ref['attachment_uuid']].image?
rotate = ref['rotate'].to_i % 360
redact = ref['redact'].to_a
next if rotate.zero? && redact.blank?
page = dest.get_page(index)
page.redact(redact) { |bitmap, pixel_rects| encode_redacted_image_jpeg(bitmap, pixel_rects) } if redact.present?
next if rotate.zero?
page.rotation = (page.rotation + (rotate / 90)) % 4
page.rotate
end
end
def build_page_runs(page_refs, attachments_index)
runs = []
page_refs.each do |ref|
image_ops =
if attachments_index[ref['attachment_uuid']].image?
[ref['redact'].presence, ref['rotate'].to_i % 360].presence
end
if runs.last && runs.last[0] == ref['attachment_uuid'] && runs.last[2] == image_ops
runs.last[1] << ref['page']
else
runs << [ref['attachment_uuid'], [ref['page']], image_ops]
end
end
runs.map do |uuid, pages, image_ops|
[uuid, pages.map { |page| page + 1 }.join(','), pages.size, image_ops]
end
end
def standalone_image_entry?(entry, attachments_index)
entry['pages'].size == 1 && attachments_index[entry['pages'].first['attachment_uuid']].image?
end
def build_image_document(template, entry, attachments_index)
ref = entry['pages'].first
attachment = attachments_index[ref['attachment_uuid']]
return attachment if ref['redact'].blank? && (ref['rotate'].to_i % 360).zero?
image = ImageUtils.load_vips(attachment.download, content_type: attachment.content_type, autorot: true)
image = draw_image_redaction(image, ref['redact']) if ref['redact'].present?
image = rotate_vips_image(image, ref['rotate'].to_i % 360)
extension, format_args =
if attachment.content_type == 'image/jpeg'
['.jpg', { Q: 90 }]
else
['.png', {}]
end
data = image.write_to_buffer(extension, **format_args)
blob = ActiveStorage::Blob.create_and_upload!(
io: StringIO.new(data),
filename: attachment.filename.to_s,
metadata: { identified: true, analyzed: true },
content_type: attachment.content_type
)
document = template.documents.create!(blob:)
Templates::ProcessDocument.call(document, data)
end
def rotate_vips_image(image, rotate)
case rotate
when 90 then image.rot90
when 180 then image.rot180
when 270 then image.rot270
else image
end
end
def encode_redacted_image_jpeg(bitmap, pixel_rects)
image = Vips::Image.new_from_memory_copy(bitmap[:data], bitmap[:width], bitmap[:height], bitmap[:bands], :uchar)
image =
case bitmap[:format]
when :bgr, :bgrx then image[2].bandjoin([image[1], image[0]])
when :bgra then image[2].bandjoin([image[1], image[0], image[3]]).flatten(background: 255)
else image
end
ink = Array.new(image.bands, 0.0)
pixel_rects.each do |left, top, rect_width, rect_height|
image = image.draw_rect(ink, left, top, rect_width, rect_height, fill: true)
end
image.write_to_buffer('.jpg', Q: 50, strip: true)
end
def draw_image_redaction(image, rects)
ink = Array.new(image.bands) { |band| band == 3 ? 255.0 : 0.0 }
rects.each do |rect|
left = (rect['x'].to_f * image.width).floor.clamp(0, image.width - 1)
top = (rect['y'].to_f * image.height).floor.clamp(0, image.height - 1)
rect_width = (rect['w'].to_f * image.width).ceil.clamp(1, image.width - left)
rect_height = (rect['h'].to_f * image.height).ceil.clamp(1, image.height - top)
image = image.draw_rect(ink, left, top, rect_width, rect_height, fill: true)
end
image
end
def open_or_build_pdf(attachment, redact: nil, rotate: nil, pdf_size: nil, default_size: nil)
data =
if attachment.image?
build_pdf_data_from_image(attachment, pdf_size, default_size, redact:, rotate:)
else
attachment.download
end
Pdfium::Document.open_bytes(data)
end
def entry_pdf_page_size(page_refs, attachments_index, sources)
pdf_ref = page_refs.rfind { |ref| !attachments_index[ref['attachment_uuid']].image? }
return if pdf_ref.nil?
uuid = pdf_ref['attachment_uuid']
source = sources[[uuid, nil]] ||= open_or_build_pdf(attachments_index[uuid])
page = source.get_page(pdf_ref['page'])
width = page.width
height = page.height
width, height = height, width unless (pdf_ref['rotate'].to_i % 180).zero?
size = standard_page_size(width, height)
return if size.nil?
width > height ? size.reverse : size
end
def standard_page_size(width, height)
[LETTER_SIZE, A4_SIZE].find do |size|
[size, size.reverse].any? do |(base_width, base_height)|
(width - base_width).abs <= PAGE_SIZE_TOLERANCE && (height - base_height).abs <= PAGE_SIZE_TOLERANCE
end
end
end
def default_page_size(account)
abbr = TimeUtils.timezone_abbr(account.timezone, Time.current.beginning_of_year)
abbr.in?(TimeUtils::US_TIMEZONES) ? LETTER_SIZE : A4_SIZE
end
def orientation_match?(size, image)
return false if size.nil?
(size[0] > size[1]) == (image.width > image.height)
end
def aspect_page_size(image)
short, long = [image.width, image.height].minmax
[LETTER_SIZE, A4_SIZE].find do |(page_short, page_long)|
((short * page_long) - (long * page_short)).abs <= page_short
end
end
def scanned_page_image?(image)
counts = image.colourspace('b-w').hist_find.to_a[0].flatten
counts[SCAN_WHITE_THRESHOLD..].sum >= counts.sum * SCAN_WHITE_FRACTION
end
def build_pdf_data_from_image(attachment, pdf_size, default_size, redact: nil, rotate: nil)
image = ImageUtils.load_vips(attachment.preview_images.first.download)
image = image.colourspace(:srgb) if image.interpretation != :srgb
image = image.flatten(background: 255) if image.has_alpha?
image = draw_image_redaction(image, redact) if redact.present?
image = rotate_vips_image(image, rotate.to_i)
bitdepth = 2**image.stats.to_a[1..3].pluck(2).uniq.size
png_data = image.write_to_buffer(Templates::ProcessDocument::FORMAT,
compression: 6, filter: 0, bitdepth:, palette: true,
Q: Templates::ProcessDocument::Q, dither: 0, strip: true)
build_image_page_pdf(image, png_data, pdf_size, default_size)
end
def build_image_page_pdf(image, png_data, pdf_size, default_size)
pdf_size = nil unless orientation_match?(pdf_size, image)
aspect_size = aspect_page_size(image) if pdf_size.nil?
page_width, page_height =
pdf_size ||
(aspect_size || default_size).then { |size| image.width > image.height ? size.reverse : size }
scale = [page_width / image.width.to_f, page_height / image.height.to_f].min
if pdf_size.nil? && aspect_size.nil? && !scanned_page_image?(image)
Templates::BuildImagePagePdf.call(png_data, page_width: image.width * scale,
page_height: image.height * scale)
else
image_width = image.width * scale
image_height = image.height * scale
Templates::BuildImagePagePdf.call(png_data, page_width:, page_height:,
image_box: [(page_width - image_width) / 2.0,
(page_height - image_height) / 2.0,
image_width, image_height])
end
end
def save_document(template, old_attachment, data)
annotations = data.size < ANNOTATIONS_SIZE_LIMIT ? Templates::BuildAnnotations.call(data) : []
sha256 = Base64.urlsafe_encode64(Digest::SHA256.digest(data))
blob = ActiveStorage::Blob.create_and_upload!(
io: StringIO.new(data),
filename: "#{old_attachment.filename.base}.pdf",
metadata: { identified: true, analyzed: true,
pdf: { annotations: }.compact_blank, sha256: }.compact_blank,
content_type: Templates::ProcessDocument::PDF_CONTENT_TYPE
)
document = template.documents.create!(blob:)
Templates::ProcessDocument.call(document, data)
end
def remap_fields(template, mapping)
non_dynamic_uuids = template.schema.reject { |item| item['dynamic'] }.pluck('attachment_uuid')
removed_field_uuids = []
template.fields = template.fields.filter_map do |field|
if field['areas'].present?
field['areas'] = field['areas'].filter_map do |area|
next area if non_dynamic_uuids.exclude?(area['attachment_uuid'])
new_uuid, new_page, rotate = mapping[[area['attachment_uuid'], area['page']]]
next if new_uuid.nil?
rotate_area(area.merge('attachment_uuid' => new_uuid, 'page' => new_page), rotate.to_i)
end
if field['areas'].blank?
removed_field_uuids << field['uuid']
next
end
end
field
end
removed_field_uuids
end
def rotate_area(area, rotate)
x, y, w, h = area.values_at('x', 'y', 'w', 'h')
case rotate
when 90
area.merge('x' => 1 - y - h, 'y' => x, 'w' => h, 'h' => w)
when 180
area.merge('x' => 1 - x - w, 'y' => 1 - y - h)
when 270
area.merge('x' => y, 'y' => 1 - x - w, 'w' => h, 'h' => w)
else
area
end
end
def remove_conditions(items, removed_field_uuids)
return if removed_field_uuids.blank?
items.each do |item|
next if item['conditions'].blank?
item['conditions'] = item['conditions'].reject { |c| removed_field_uuids.include?(c['field_uuid']) }
end
end
end
end