From 749043dfbbf8cb86fe577f08c80e70efc995a4b2 Mon Sep 17 00:00:00 2001 From: Alex Turchyn Date: Sat, 22 Jun 2024 13:16:22 +0300 Subject: [PATCH] process acro form fields --- .../template_documents_controller.rb | 2 +- app/controllers/templates_debug_controller.rb | 33 +++ .../templates_uploads_controller.rb | 9 +- app/javascript/template_builder/area.vue | 19 +- app/javascript/template_builder/builder.vue | 56 ++++- app/javascript/template_builder/i18n.js | 2 + config/routes.rb | 1 + lib/templates/create_attachments.rb | 8 +- lib/templates/find_acro_fields.rb | 231 ++++++++++++++++++ lib/templates/process_document.rb | 30 ++- 10 files changed, 374 insertions(+), 17 deletions(-) create mode 100644 app/controllers/templates_debug_controller.rb create mode 100644 lib/templates/find_acro_fields.rb diff --git a/app/controllers/template_documents_controller.rb b/app/controllers/template_documents_controller.rb index 3d89035d..c4af0e82 100644 --- a/app/controllers/template_documents_controller.rb +++ b/app/controllers/template_documents_controller.rb @@ -10,7 +10,7 @@ class TemplateDocumentsController < ApplicationController old_fields_hash = @template.fields.hash - documents = Templates::CreateAttachments.call(@template, params) + documents = Templates::CreateAttachments.call(@template, params, extract_fields: true) schema = documents.map do |doc| { attachment_uuid: doc.uuid, name: doc.filename.base } diff --git a/app/controllers/templates_debug_controller.rb b/app/controllers/templates_debug_controller.rb new file mode 100644 index 00000000..02d3d8b0 --- /dev/null +++ b/app/controllers/templates_debug_controller.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +class TemplatesDebugController < ApplicationController + load_and_authorize_resource :template + + def show + attachment = @template.documents.first + + pdf = HexaPDF::Document.new(io: StringIO.new(attachment.download)) + + fields = Templates::FindAcroFields.call(pdf, attachment) + + attachment.metadata['pdf'] ||= {} + attachment.metadata['pdf']['fields'] = fields + + @template.update!(fields: Templates::ProcessDocument.normalize_attachment_fields(@template, [attachment])) + + ActiveRecord::Associations::Preloader.new( + records: [@template], + associations: [schema_documents: { preview_images_attachments: :blob }] + ).call + + @template_data = + @template.as_json.merge( + documents: @template.schema_documents.as_json( + methods: %i[metadata signed_uuid], + include: { preview_images: { methods: %i[url metadata filename] } } + ) + ).to_json + + render 'templates/edit', layout: 'plain' + end +end diff --git a/app/controllers/templates_uploads_controller.rb b/app/controllers/templates_uploads_controller.rb index 4519eb78..db24dd80 100644 --- a/app/controllers/templates_uploads_controller.rb +++ b/app/controllers/templates_uploads_controller.rb @@ -12,11 +12,14 @@ class TemplatesUploadsController < ApplicationController save_template!(@template, url_params) - documents = Templates::CreateAttachments.call(@template, url_params || params) - + documents = Templates::CreateAttachments.call(@template, url_params || params, extract_fields: true) schema = documents.map { |doc| { attachment_uuid: doc.uuid, name: doc.filename.base } } - @template.update!(schema:) + fields = Templates::ProcessDocument.normalize_attachment_fields(@template, documents) + + schema.each { |item| item['pending_fields'] = true } if fields.present? + + @template.update!(schema:, fields:) SendTemplateCreatedWebhookRequestJob.perform_async('template_id' => @template.id) diff --git a/app/javascript/template_builder/area.vue b/app/javascript/template_builder/area.vue index 9a9aa08c..b1eade11 100644 --- a/app/javascript/template_builder/area.vue +++ b/app/javascript/template_builder/area.vue @@ -164,16 +164,16 @@
{{ t('signing_date') }} +
+
+ {{ char }} +
+
+
{ + if (item.pending_fields) { + this.pendingFieldAttachmentUuids.push(item.attachment_uuid) + } + }) }, unmounted () { document.removeEventListener('keyup', this.onKeyUp) @@ -608,6 +641,13 @@ export default { t (key) { return this.i18n[key] || i18nEn[key] || key }, + removePendingFields () { + this.template.fields = this.template.fields.filter((f) => { + return this.template.schema.find((item) => item.attachment_uuid === f.attachment_uuid && item.pending_fields) + }) + + this.save() + }, addField (type, area = null) { const field = { name: '', @@ -1066,6 +1106,18 @@ export default { } this.save() + + data.documents.forEach((attachment) => { + if (attachment.metadata?.pdf?.fields?.length) { + this.pendingFieldAttachmentUuids.push(attachment.uuid) + + attachment.metadata.pdf.fields.forEach((field) => { + field.submitter_uuid = this.selectedSubmitter.uuid + + this.template.fields.push(field) + }) + } + }) }, updateName (value) { this.template.name = value @@ -1232,6 +1284,8 @@ export default { }) }, save ({ force } = { force: false }) { + this.pendingFieldAttachmentUuids = [] + if (this.onChange) { this.onChange(this.template) } diff --git a/app/javascript/template_builder/i18n.js b/app/javascript/template_builder/i18n.js index 56fe5088..cbe6f69e 100644 --- a/app/javascript/template_builder/i18n.js +++ b/app/javascript/template_builder/i18n.js @@ -5,6 +5,8 @@ const en = { clear: 'Clear', align: 'Align', add_all_required_fields_to_continue: 'Add all required fields to continue', + uploaded_pdf_contains_form_fields_keep_or_remove_them: 'Uploaded PDF contains form fields. Keep or remove them?', + keep: 'Keep', left: 'Left', validation: 'Validation', right: 'Right', diff --git a/config/routes.rb b/config/routes.rb index ad4c7d15..e58baedb 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -76,6 +76,7 @@ Rails.application.routes.draw do resources :template_sharings_testing, only: %i[create] resources :templates, only: %i[index], controller: 'templates_dashboard' resources :templates, only: %i[new create edit update show destroy] do + resource :debug, only: %i[show], controller: 'templates_debug' if Rails.env.development? resources :documents, only: %i[create], controller: 'template_documents' resources :restore, only: %i[create], controller: 'templates_restore' resources :archived, only: %i[index], controller: 'templates_archived_submissions' diff --git a/lib/templates/create_attachments.rb b/lib/templates/create_attachments.rb index 7b479ca2..7418c1dd 100644 --- a/lib/templates/create_attachments.rb +++ b/lib/templates/create_attachments.rb @@ -9,17 +9,17 @@ module Templates module_function - def call(template, params) + def call(template, params, extract_fields: false) Array.wrap(params[:files].presence || params[:file]).map do |file| if file.content_type.exclude?('image') && file.content_type != PDF_CONTENT_TYPE next handle_file_types(template, file, params) end - handle_pdf_or_image(template, file, file.read, params) + handle_pdf_or_image(template, file, file.read, params, extract_fields:) end end - def handle_pdf_or_image(template, file, document_data = nil, params = {}) + def handle_pdf_or_image(template, file, document_data = nil, params = {}, extract_fields: false) document_data ||= file.read if file.content_type == PDF_CONTENT_TYPE @@ -44,7 +44,7 @@ module Templates document = template.documents.create!(blob:) - Templates::ProcessDocument.call(document, document_data) + Templates::ProcessDocument.call(document, document_data, extract_fields:) end def maybe_decrypt_pdf_or_raise(data, params) diff --git a/lib/templates/find_acro_fields.rb b/lib/templates/find_acro_fields.rb new file mode 100644 index 00000000..7b6dd90b --- /dev/null +++ b/lib/templates/find_acro_fields.rb @@ -0,0 +1,231 @@ +# frozen_string_literal: true + +module Templates + module FindAcroFields + PDF_CONTENT_TYPE = 'application/pdf' + + FIELD_NAME_REGEXP = /\A(?=.*\p{L})[\p{L}\d\s]+\z/ + + module_function + + # rubocop:disable Metrics + def call(pdf, attachment) + return [] unless pdf.acro_form + + fields, annots_index = build_fields_with_pages(pdf) + + fields.filter_map do |field| + areas = Array.wrap(field[:Kids] || field).filter_map do |child_field| + page = annots_index[child_field.hash] + + media_box = page[:MediaBox] + crop_box = page[:CropBox] || media_box + + media_box_start = [media_box[0], media_box[1]] + crop_shift = [crop_box[0] - media_box[0], crop_box[1] - media_box[1]] + + x0, y0, x1, y1 = child_field[:Rect] + + x0, y0 = correct_coordinates(x0, y0, crop_shift, media_box_start) + x1, y1 = correct_coordinates(x1, y1, crop_shift, media_box_start) + + page_width = media_box[2] - media_box[0] + page_height = media_box[3] - media_box[1] + + x = x0 + y = y0 + w = x1 - x0 + h = y1 - y0 + + transformed_y = page_height - y - h + + attrs = { + page: page.index, + x: x / page_width, + y: transformed_y / page_height, + w: w / page_width, + h: h / page_height, + attachment_uuid: attachment.uuid + } + + next if attrs[:w].zero? || attrs[:h].zero? + + if child_field[:MaxLen] && child_field.concrete_field_type == :comb_text_field + attrs[:cell_w] = w / page_width / child_field[:MaxLen].to_f + end + + attrs + end + + next if areas.blank? + + field_properties = build_field_properties(field) + + next if field_properties.blank? + next if field_properties[:default_value].present? + + if field_properties[:type].in?(%w[radio multiple]) + areas.each_with_index do |area, index| + area[:option_uuid] = field_properties[:options][index][:uuid] + end + end + + { + uuid: SecureRandom.uuid, + required: false, + readonly: false, + preferences: {}, + areas:, + **field_properties + } + end + rescue StandardError => e + raise if Rails.env.local? + + Rollbar.error(e) if defined?(Rollbar) + + [] + end + + def correct_coordinates(x_coord, y_coord, shift, media_box_start) + corrected_x = x_coord + shift[0] - media_box_start[0] + corrected_y = y_coord + shift[1] - media_box_start[1] + + [corrected_x, corrected_y] + end + + def build_field_properties(field) + field_name = field.full_field_name if field.full_field_name.to_s.match?(FIELD_NAME_REGEXP) + + field_name = field_name&.encode('utf-8', invalid: :replace, undef: :replace, replace: '') + + if field.field_type == :Btn && field.concrete_field_type == :radio_button && field[:Opt].present? + selected_option_index = (field.allowed_values || []).find_index(field.field_value) + selected_option = field[:Opt][selected_option_index] if selected_option_index + + { + name: field_name.to_s, + type: 'radio', + description: field[:TU], + options: build_options(field[:Opt], 'radio'), + default_value: selected_option + } + elsif field.field_type == :Btn && field.concrete_field_type == :check_box && + field[:Kids].present? && field[:Kids].size > 1 && field.allowed_values.present? + selected_option = (field.allowed_values || []).find { |v| v == field.field_value } + + return {} if field.allowed_values.include?(:BBox) + + { + name: field_name.to_s, + type: 'radio', + description: field[:TU], + options: build_options(field.allowed_values, 'radio'), + default_value: selected_option + } + elsif field.field_type == :Btn && field.concrete_field_type == :check_box + { + name: field_name.to_s, + type: 'checkbox', + description: field[:TU], + default_value: field.field_value.present? + } + elsif field.field_type == :Ch && + %i[combo_box editable_combo_box].include?(field.concrete_field_type) && field[:Opt].present? + { + name: field_name.to_s, + type: 'select', + description: field[:TU], + options: build_options(field[:Opt]), + default_value: field.field_value + } + elsif field.field_type == :Ch && field.concrete_field_type == :multi_select && field[:Opt].present? + { + name: field_name.to_s, + type: 'multiple', + description: field[:TU], + options: build_options(field[:Opt], 'multiple'), + default_value: field.field_value + } + elsif field.field_type == :Tx && field.concrete_field_type == :comb_text_field + { + name: field_name.to_s, + type: 'cells', + description: field[:TU], + default_value: field.field_value + } + elsif field.field_type == :Tx + { + name: field_name.to_s, + type: 'text', + description: field[:TU], + default_value: field.field_value + } + elsif field.field_type == :Sig + { + name: field_name.to_s, + type: 'signature', + description: field[:TU] + } + else + {} + end.compact + end + + def build_options(values, type = nil) + is_skip_single_value = type.in?(%w[radio multiple]) && values.uniq.size == 1 + + values.map do |option| + is_option_number = option.is_a?(Symbol) && option.to_s.match?(/\A\d+\z/) + + option = option.encode('utf-8', invalid: :replace, undef: :replace, replace: '') if option.is_a?(String) + + { + uuid: SecureRandom.uuid, + value: is_option_number || is_skip_single_value ? '' : option + } + end + end + + def build_fields_with_pages(pdf) + fields_index = {} + annots_index = {} + + pdf.pages.each do |page| + page.each_annotation do |annot| + annots_index[annot.hash] = page + + if !annot.key?(:Parent) && annot.key?(:FT) + fields_index[annot.hash] ||= HexaPDF::Type::AcroForm::Field.wrap(pdf, annot) + elsif annot.key?(:Parent) + field = annot[:Parent] + field = field[:Parent] while field[:Parent] + + fields_index[field.hash] ||= HexaPDF::Type::AcroForm::Field.wrap(pdf, field) + end + end + end + + [process_fields_array(pdf, fields_index.values), annots_index] + end + + def process_fields_array(pdf, array, acc = []) + array.each_with_index do |field, index| + next if field.nil? + + unless field.respond_to?(:type) && field.type == :XXAcroFormField + array[index] = field = HexaPDF::Type::AcroForm::Field.wrap(pdf, field) + end + + if field.terminal_field? + acc << field + else + process_fields_array(pdf, field[:Kids], acc) + end + end + + acc + end + # rubocop:enable Metrics + end +end diff --git a/lib/templates/process_document.rb b/lib/templates/process_document.rb index 06cd0738..be346e8f 100644 --- a/lib/templates/process_document.rb +++ b/lib/templates/process_document.rb @@ -10,14 +10,22 @@ module Templates Q = 35 MAX_WIDTH = 1400 MAX_NUMBER_OF_PAGES_PROCESSED = 15 - MAX_FLATTEN_FILE_SIZE = 15.megabytes + MAX_FLATTEN_FILE_SIZE = 20.megabytes GENERATE_PREVIEW_SIZE_LIMIT = 50.megabytes module_function - def call(attachment, data) + def call(attachment, data, extract_fields: false) if attachment.content_type == PDF_CONTENT_TYPE - generate_pdf_preview_images(attachment, data) + if extract_fields && data.size < MAX_FLATTEN_FILE_SIZE + pdf = HexaPDF::Document.new(io: StringIO.new(data)) + + fields = Templates::FindAcroFields.call(pdf, attachment) + end + + generate_pdf_preview_images(attachment, data, pdf) + + attachment.metadata['pdf']['fields'] = fields if fields elsif attachment.image? generate_preview_image(attachment, data) end @@ -43,10 +51,10 @@ module Templates ) end - def generate_pdf_preview_images(attachment, data) + def generate_pdf_preview_images(attachment, data, pdf = nil) ActiveStorage::Attachment.where(name: ATTACHMENT_NAME, record: attachment).destroy_all - pdf = HexaPDF::Document.new(io: StringIO.new(data)) + pdf ||= HexaPDF::Document.new(io: StringIO.new(data)) number_of_pages = pdf.pages.size data = maybe_flatten_form(data, pdf) @@ -97,6 +105,18 @@ module Templates data end + def normalize_attachment_fields(template, attachments = template.documents) + attachments.flat_map do |a| + pdf_fields = a.metadata['pdf'].delete('fields').to_a if a.metadata['pdf'].present? + + next [] if pdf_fields.blank? + + pdf_fields.each { |f| f['submitter_uuid'] = template.submitters.first['uuid'] } + + pdf_fields + end + end + def generate_pdf_preview_from_file(attachment, file_path, page_number) io = StringIO.new