From 747ffa8cf10fdfabbdfb55bb6a9408a2c4584850 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Wed, 15 Oct 2025 13:38:53 +0300 Subject: [PATCH 01/16] adjust signature ID size --- .../generate_result_attachments.rb | 48 ++++++++++++------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/lib/submissions/generate_result_attachments.rb b/lib/submissions/generate_result_attachments.rb index 3c584c10..9f59bb86 100644 --- a/lib/submissions/generate_result_attachments.rb +++ b/lib/submissions/generate_result_attachments.rb @@ -321,9 +321,9 @@ module Submissions end end - reason_text = HexaPDF::Layout::TextFragment.create(reason_string, - font:, - font_size: (font_size / 1.8).to_i) + base_font_size = (font_size / 1.8).to_i + + result = nil if area['h']&.positive? && (area['w'].to_f / area['h']) > 6 area_x = area['x'] * width @@ -344,12 +344,10 @@ module Submissions id_string = "ID: #{attachment.uuid}".upcase - while true - text = HexaPDF::Layout::TextFragment.create(id_string, - font:, - font_size: (font_size / 1.8).to_i) + loop do + text = HexaPDF::Layout::TextFragment.create(id_string, font:, font_size: base_font_size) - result = layouter.fit([text], half_width, (font_size / 1.8) / 0.65) + result = layouter.fit([text], half_width, base_font_size / 0.65) break if result.status == :success @@ -358,25 +356,39 @@ module Submissions break if id_string.length < 8 end - text_x = area_x + half_width - text_y = height - area_y + string = [id_string, reason_string].join("\n") + + loop do + text = HexaPDF::Layout::TextFragment.create(string, font:, font_size: base_font_size) - reason_result = layouter.fit([reason_text], half_width, height) + result = layouter.fit([text], half_width, area_h) - layouter.fit([text], half_width, (font_size / 1.8) / 0.65) - .draw(canvas, text_x + TEXT_LEFT_MARGIN, text_y) + break if result.status == :success - layouter.fit([reason_text], half_width, reason_result.lines.sum(&:height)) - .draw(canvas, text_x + TEXT_LEFT_MARGIN, text_y - TEXT_TOP_MARGIN - result.lines.sum(&:height)) + base_font_size *= 0.9 + + break if base_font_size < 2 + end + + text = HexaPDF::Layout::TextFragment.create(string, font:, font_size: base_font_size) + + text_x = area_x + half_width + text_y = height - area_y + + layouter.fit([text], half_width, area_h).draw(canvas, text_x + TEXT_LEFT_MARGIN, text_y) else + reason_text = HexaPDF::Layout::TextFragment.create(reason_string, + font:, + font_size: base_font_size) + id_string = "ID: #{attachment.uuid}".upcase loop do text = HexaPDF::Layout::TextFragment.create(id_string, font:, - font_size: (font_size / 1.8).to_i) + font_size: base_font_size) - result = layouter.fit([text], area['w'] * width, (font_size / 1.8) / 0.65) + result = layouter.fit([text], area['w'] * width, base_font_size / 0.65) break if result.status == :success @@ -395,7 +407,7 @@ module Submissions io = StringIO.new(image.resize([scale * 4, 1].select(&:positive?).min).write_to_buffer('.png')) - layouter.fit([text], area['w'] * width, (font_size / 1.8) / 0.65) + layouter.fit([text], area['w'] * width, base_font_size / 0.65) .draw(canvas, (area['x'] * width) + TEXT_LEFT_MARGIN, height - (area['y'] * height) - TEXT_TOP_MARGIN - image_height) From 23ecce51a3daa1be1d4a66aed957280671ab4d37 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Wed, 22 Oct 2025 20:37:08 +0300 Subject: [PATCH 02/16] hide archived completed page --- app/controllers/accounts_controller.rb | 1 + app/controllers/send_submission_email_controller.rb | 11 +++++++---- app/controllers/submissions_preview_controller.rb | 3 +++ app/controllers/submit_form_controller.rb | 4 +++- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/app/controllers/accounts_controller.rb b/app/controllers/accounts_controller.rb index 7a6587a5..0cd12d52 100644 --- a/app/controllers/accounts_controller.rb +++ b/app/controllers/accounts_controller.rb @@ -47,6 +47,7 @@ class AccountsController < ApplicationController authorize!(:manage, current_account) true_user.update!(locked_at: Time.current, email: true_user.email.sub('@', '+removed@')) + true_user.account.update!(archived_at: Time.current) # rubocop:disable Layout/LineLength render turbo_stream: turbo_stream.replace( diff --git a/app/controllers/send_submission_email_controller.rb b/app/controllers/send_submission_email_controller.rb index 45852360..c3a95158 100644 --- a/app/controllers/send_submission_email_controller.rb +++ b/app/controllers/send_submission_email_controller.rb @@ -29,7 +29,7 @@ class SendSubmissionEmailController < ApplicationController RateLimit.call("send-email-#{@submitter.id}", limit: 2, ttl: 5.minutes) - SubmitterMailer.documents_copy_email(@submitter, sig: true).deliver_later! unless already_sent?(@submitter) + SubmitterMailer.documents_copy_email(@submitter, sig: true).deliver_later! if can_send?(@submitter) respond_to do |f| f.html { render :success } @@ -39,8 +39,11 @@ class SendSubmissionEmailController < ApplicationController private - def already_sent?(submitter) - EmailEvent.exists?(tag: :submitter_documents_copy, email: submitter.email, emailable: submitter, - event_type: :send, created_at: SEND_DURATION.ago..Time.current) + def can_send?(submitter) + return false if submitter.account.archived_at? + return false if EmailEvent.exists?(tag: :submitter_documents_copy, email: submitter.email, emailable: submitter, + event_type: :send, created_at: SEND_DURATION.ago..Time.current) + + true end end diff --git a/app/controllers/submissions_preview_controller.rb b/app/controllers/submissions_preview_controller.rb index 40130e19..2ac30745 100644 --- a/app/controllers/submissions_preview_controller.rb +++ b/app/controllers/submissions_preview_controller.rb @@ -41,6 +41,9 @@ class SubmissionsPreviewController < ApplicationController def completed @submission = Submission.find_by!(slug: params[:submissions_preview_slug]) + + raise ActionController::RoutingError, I18n.t('not_found') if @submission.account.archived_at? + @template = @submission.template render :completed, layout: 'form' diff --git a/app/controllers/submit_form_controller.rb b/app/controllers/submit_form_controller.rb index f7bf565e..963594fa 100644 --- a/app/controllers/submit_form_controller.rb +++ b/app/controllers/submit_form_controller.rb @@ -75,7 +75,9 @@ class SubmitFormController < ApplicationController render json: { error: e.message }, status: :unprocessable_content end - def completed; end + def completed + raise ActionController::RoutingError, I18n.t('not_found') if @submitter.account.archived_at? + end def success; end From 9163f744a300de7a4b8536d3e8b15ef56e53019f Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Mon, 27 Oct 2025 20:03:33 +0200 Subject: [PATCH 03/16] remove sign up button --- app/views/submissions_preview/completed.html.erb | 8 -------- 1 file changed, 8 deletions(-) diff --git a/app/views/submissions_preview/completed.html.erb b/app/views/submissions_preview/completed.html.erb index f8346117..60370a46 100644 --- a/app/views/submissions_preview/completed.html.erb +++ b/app/views/submissions_preview/completed.html.erb @@ -38,14 +38,6 @@ <%= f.button button_title(title: t('send_copy_to_email'), disabled_with: t('starting')), class: 'base-button' %> <% end %> - <% if Docuseal.multitenant? %> -
<%= t('or') %>
- <% end %> - <% end %> - <% if Docuseal.multitenant? %> -
- <%= link_to t('create_free_account'), registration_path, class: 'white-button w-full' %> -
<% end %> From 32691b0c0dce764df1cf0d8a9129ead1ed4cb736 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Wed, 29 Oct 2025 10:10:04 +0200 Subject: [PATCH 04/16] add background --- app/javascript/submission_form/area.vue | 4 ++++ app/views/submissions/_value.html.erb | 3 ++- lib/submissions/generate_result_attachments.rb | 8 ++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/app/javascript/submission_form/area.vue b/app/javascript/submission_form/area.vue index 2a2fd20e..d3a4eaf6 100644 --- a/app/javascript/submission_form/area.vue +++ b/app/javascript/submission_form/area.vue @@ -553,6 +553,10 @@ export default { style.color = this.field.preferences.color } + if (this.field.preferences?.background) { + style.background = this.field.preferences.background + } + return style }, isNarrow () { diff --git a/app/views/submissions/_value.html.erb b/app/views/submissions/_value.html.erb index 025053ef..f0487425 100644 --- a/app/views/submissions/_value.html.erb +++ b/app/views/submissions/_value.html.erb @@ -1,10 +1,11 @@ <% align = field.dig('preferences', 'align') %> <% valign = field.dig('preferences', 'valign') %> <% color = field.dig('preferences', 'color') %> +<% bg_color = field.dig('preferences', 'background') %> <% font = field.dig('preferences', 'font') %> <% font_type = field.dig('preferences', 'font_type') %> <% font_size_px = (field.dig('preferences', 'font_size').presence || Submissions::GenerateResultAttachments::FONT_SIZE).to_i * local_assigns.fetch(:font_scale) { 1000.0 / PdfUtils::US_LETTER_W } %> -width: <%= area['w'] * 100 %>%; height: <%= area['h'] * 100 %>%; left: <%= area['x'] * 100 %>%; top: <%= area['y'] * 100 %>%; font-size: <%= fs = "clamp(1pt, #{font_size_px / 10}vw, #{font_size_px}px)" %>; line-height: calc(<%= fs %> * 1.3); font-size: <%= fs = "#{font_size_px / 10}cqmin" %>; line-height: calc(<%= fs %> * 1.3)"> +<%= "background: #{bg_color}; " if bg_color.present? %>width: <%= area['w'] * 100 %>%; height: <%= area['h'] * 100 %>%; left: <%= area['x'] * 100 %>%; top: <%= area['y'] * 100 %>%; font-size: <%= fs = "clamp(1pt, #{font_size_px / 10}vw, #{font_size_px}px)" %>; line-height: calc(<%= fs %> * 1.3); font-size: <%= fs = "#{font_size_px / 10}cqmin" %>; line-height: calc(<%= fs %> * 1.3)"> <% if field['type'] == 'signature' %> <% is_narrow = area['h']&.positive? && (area['w'].to_f / area['h']) > 6 %>
diff --git a/lib/submissions/generate_result_attachments.rb b/lib/submissions/generate_result_attachments.rb index 9f59bb86..934c86fc 100644 --- a/lib/submissions/generate_result_attachments.rb +++ b/lib/submissions/generate_result_attachments.rb @@ -244,6 +244,7 @@ module Submissions font_size ||= (([page.box.width, page.box.height].min / A4_SIZE[0].to_f) * FONT_SIZE).to_i fill_color = field.dig('preferences', 'color').to_s.delete_prefix('#').presence + bg_color = field.dig('preferences', 'background').to_s.delete_prefix('#').presence font_name = field.dig('preferences', 'font') font_variant = (field.dig('preferences', 'font_type').presence || 'none').to_sym @@ -289,6 +290,13 @@ module Submissions with_signature_id = field['preferences']['with_signature_id'] end + if bg_color.present? + canvas.fill_color(bg_color) + .rectangle(area['x'] * width, height - (area['y'] * height) - (area['h'] * height), + area['w'] * width, area['h'] * height) + .fill + end + case field_type when ->(type) { type == 'signature' && (with_signature_id || field.dig('preferences', 'reason_field_uuid')) } attachment = submitter.attachments.find { |a| a.uuid == value } From b4d5a2cad4d2e2bb0d6ef5e3e56691561e1dccf3 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Thu, 30 Oct 2025 08:34:30 +0200 Subject: [PATCH 05/16] dangerous extension error --- app/controllers/api/attachments_controller.rb | 2 ++ app/javascript/submission_form/dropzone.vue | 14 +++++++++++--- lib/submitters.rb | 17 +++++++++++++++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/app/controllers/api/attachments_controller.rb b/app/controllers/api/attachments_controller.rb index 5051cfeb..dc99668b 100644 --- a/app/controllers/api/attachments_controller.rb +++ b/app/controllers/api/attachments_controller.rb @@ -34,6 +34,8 @@ module Api end render json: attachment.as_json(only: %i[uuid created_at], methods: %i[url filename content_type]) + rescue Submitters::MaliciousFileExtension => e + render json: { error: e.message }, status: :unprocessable_entity end def build_new_cookie_signatures_json(submitter, attachment) diff --git a/app/javascript/submission_form/dropzone.vue b/app/javascript/submission_form/dropzone.vue index 89e01b3b..275b0f30 100644 --- a/app/javascript/submission_form/dropzone.vue +++ b/app/javascript/submission_form/dropzone.vue @@ -163,12 +163,20 @@ export default { return fetch(this.baseUrl + '/api/attachments', { method: 'POST', body: formData - }).then(resp => resp.json()).then((data) => { - return data + }).then(async (resp) => { + const data = await resp.json() + + if (resp.status === 422) { + alert(data.error) + } else { + return data + } }) } })).then((result) => { - this.$emit('upload', result) + if (result && result[0]) { + this.$emit('upload', result) + } }).finally(() => { this.isLoading = false }) diff --git a/lib/submitters.rb b/lib/submitters.rb index 2c08a09f..2738f773 100644 --- a/lib/submitters.rb +++ b/lib/submitters.rb @@ -13,6 +13,17 @@ module Submitters UnableToSendCode = Class.new(StandardError) InvalidOtp = Class.new(StandardError) + MaliciousFileExtension = Class.new(StandardError) + + DANGEROUS_EXTENSIONS = Set.new(%w[ + exe com bat cmd scr pif vbs vbe js jse wsf wsh msi msp + hta cpl jar app deb rpm dmg pkg mpkg dll so dylib sys + inf reg ps1 psm1 psd1 ps1xml psc1 pssc bat cmd vb vba + sh bash zsh fish run out bin elf gadget workflow lnk scf + url desktop application action workflow apk ipa xap appx + appxbundle msix msixbundle diagcab diagpkg cpl msc ocx + drv scr ins isp mst paf prf shb shs slk ws wsc inf1 inf2 + ].freeze) module_function @@ -111,6 +122,12 @@ module Submitters def create_attachment!(submitter, params) blob = if (file = params[:file]) + extension = File.extname(file.original_filename).delete_prefix('.').downcase + + if DANGEROUS_EXTENSIONS.include?(extension) + raise MaliciousFileExtension, "File type '.#{extension}' is not allowed." + end + ActiveStorage::Blob.create_and_upload!(io: file.open, filename: file.original_filename, content_type: file.content_type) From cb66eb12930d9ef1cf0fa3f2fed7ef335cef6a26 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Fri, 31 Oct 2025 10:10:19 +0200 Subject: [PATCH 06/16] fix redirect --- app/controllers/console_redirect_controller.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/app/controllers/console_redirect_controller.rb b/app/controllers/console_redirect_controller.rb index dd80e9fe..0da093a8 100644 --- a/app/controllers/console_redirect_controller.rb +++ b/app/controllers/console_redirect_controller.rb @@ -17,8 +17,10 @@ class ConsoleRedirectController < ApplicationController scope: :console, exp: 1.minute.from_now.to_i) - path = Addressable::URI.parse(params[:redir]).path if params[:redir].to_s.starts_with?(Docuseal::CONSOLE_URL) + redir_uri = Addressable::URI.parse(params[:redir]) + path = redir_uri.path if params[:redir].to_s.starts_with?(Docuseal::CONSOLE_URL) - redirect_to("#{Docuseal::CONSOLE_URL}#{path}?#{{ auth: }.to_query}", allow_other_host: true) + redirect_to "#{Docuseal::CONSOLE_URL}#{path}?#{{ **redir_uri.query_values, 'auth' => auth }.to_query}", + allow_other_host: true end end From 5ae479644eaf68b6012427359783bb9fc3e4b4dd Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Fri, 31 Oct 2025 10:18:39 +0200 Subject: [PATCH 07/16] focus on canvas click --- app/javascript/submission_form/initials_step.vue | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app/javascript/submission_form/initials_step.vue b/app/javascript/submission_form/initials_step.vue index 376fb7e9..dad1a944 100644 --- a/app/javascript/submission_form/initials_step.vue +++ b/app/javascript/submission_form/initials_step.vue @@ -123,6 +123,11 @@ v-if="!isDrawInitials" class="absolute top-0 right-0 left-0 bottom-0" /> +
+
+ +
{ + return acc + doc.metadata?.pdf?.number_of_pages || doc.preview_images.length + }, 0) + }, isShowFieldSearch () { if (this.withFieldsSearch === false) { return false @@ -389,6 +430,61 @@ export default { this.$emit('set-drag', field) }, + detectFields () { + const fields = [] + + this.fieldPagesLoaded = 0 + + this.baseFetch(`/templates/${this.template.id}/detect_fields`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + } + }).then(async (response) => { + const reader = response.body.getReader() + const decoder = new TextDecoder('utf-8') + let buffer = '' + + while (true) { + const { value, done } = await reader.read() + + if (done) break + + buffer += decoder.decode(value, { stream: true }) + + const lines = buffer.split('\n\n') + + buffer = lines.pop() + + for (const line of lines) { + if (line.startsWith('data: ')) { + const jsonStr = line.replace(/^data: /, '') + const data = JSON.parse(jsonStr) + + if (data.completed) { + this.fieldPagesLoaded = null + this.template.fields = fields + + break + } else if (data.fields) { + data.fields.forEach((f) => { + f.submitter_uuid = this.template.submitters[0].uuid + }) + + this.fieldPagesLoaded += 1 + + fields.push(...data.fields) + } + } + } + } + }).catch(error => { + console.error('Error in streaming message: ', error) + }).finally(() => { + this.fieldPagesLoaded = null + this.isFieldsLoading = false + }) + }, setDragPlaceholder (event) { this.$emit('set-drag-placeholder', { offsetX: event.offsetX, diff --git a/app/javascript/template_builder/i18n.js b/app/javascript/template_builder/i18n.js index 75b450fe..a23e5c8b 100644 --- a/app/javascript/template_builder/i18n.js +++ b/app/javascript/template_builder/i18n.js @@ -1,5 +1,6 @@ const en = { view: 'View', + autodetect_fields: 'Autodetect fields', payment_link: 'Payment link', strikeout: 'Strikeout', draw_strikethrough_the_document: 'Draw strikethrough the document', diff --git a/app/views/templates/edit.html.erb b/app/views/templates/edit.html.erb index 79a10118..9fbc8d39 100644 --- a/app/views/templates/edit.html.erb +++ b/app/views/templates/edit.html.erb @@ -6,4 +6,4 @@ <%= button_to nil, user_configs_path, method: :post, params: { user_config: { key: UserConfig::SHOW_APP_TOUR, value: true } }, class: 'hidden', id: 'start_tour_button' %> <% end %> <% end %> - + diff --git a/config/routes.rb b/config/routes.rb index e90bd2f2..43701da1 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -100,6 +100,9 @@ Rails.application.routes.draw do resource :debug, only: %i[show], controller: 'templates_debug' if Rails.env.development? resources :documents, only: %i[create], controller: 'template_documents' resources :clone_and_replace, only: %i[create], controller: 'templates_clone_and_replace' + if !Docuseal.multitenant? || Docuseal.demo? + resources :detect_fields, only: %i[create], controller: 'templates_detect_fields' + end resources :restore, only: %i[create], controller: 'templates_restore' resources :archived, only: %i[index], controller: 'templates_archived_submissions' resources :submissions, only: %i[new create] diff --git a/lib/pdfium.rb b/lib/pdfium.rb index 317dddf0..464f95e2 100644 --- a/lib/pdfium.rb +++ b/lib/pdfium.rb @@ -25,6 +25,8 @@ class Pdfium typedef :pointer, :FPDF_BITMAP typedef :pointer, :FPDF_FORMHANDLE typedef :pointer, :FPDF_TEXTPAGE + typedef :pointer, :FPDF_PAGEOBJECT + typedef :pointer, :FPDF_PATHSEGMENT MAX_SIZE = 32_767 @@ -37,6 +39,9 @@ class Pdfium FPDF_RENDER_FORCEHALFTONE = 0x400 FPDF_PRINTING = 0x800 + TextNode = Struct.new(:content, :x, :y, :w, :h, keyword_init: true) + LineNode = Struct.new(:x, :y, :w, :h, :tilt, keyword_init: true) + # rubocop:disable Naming/ClassAndModuleCamelCase class FPDF_LIBRARY_CONFIG < FFI::Struct layout :version, :int, @@ -77,6 +82,37 @@ class Pdfium attach_function :FPDFText_ClosePage, [:FPDF_TEXTPAGE], :void attach_function :FPDFText_CountChars, [:FPDF_TEXTPAGE], :int attach_function :FPDFText_GetText, %i[FPDF_TEXTPAGE int int pointer], :int + attach_function :FPDFText_GetUnicode, %i[FPDF_TEXTPAGE int], :uint + attach_function :FPDFText_GetCharBox, %i[FPDF_TEXTPAGE int pointer pointer pointer pointer], :int + attach_function :FPDFText_GetCharOrigin, %i[FPDF_TEXTPAGE int pointer pointer], :int + attach_function :FPDFText_GetCharIndexAtPos, %i[FPDF_TEXTPAGE double double double double], :int + attach_function :FPDFText_CountRects, %i[FPDF_TEXTPAGE int int], :int + attach_function :FPDFText_GetRect, %i[FPDF_TEXTPAGE int pointer pointer pointer pointer], :int + attach_function :FPDFText_GetFontSize, %i[FPDF_TEXTPAGE int], :double + + # Page object functions for extracting paths/lines + attach_function :FPDFPage_CountObjects, [:FPDF_PAGE], :int + attach_function :FPDFPage_GetObject, %i[FPDF_PAGE int], :FPDF_PAGEOBJECT + attach_function :FPDFPageObj_GetType, [:FPDF_PAGEOBJECT], :int + attach_function :FPDFPageObj_GetBounds, %i[FPDF_PAGEOBJECT pointer pointer pointer pointer], :int + attach_function :FPDFPath_CountSegments, [:FPDF_PAGEOBJECT], :int + attach_function :FPDFPath_GetPathSegment, %i[FPDF_PAGEOBJECT int], :FPDF_PATHSEGMENT + attach_function :FPDFPathSegment_GetType, [:FPDF_PATHSEGMENT], :int + attach_function :FPDFPathSegment_GetPoint, %i[FPDF_PATHSEGMENT pointer pointer], :int + + # Page object types + FPDF_PAGEOBJ_UNKNOWN = 0 + FPDF_PAGEOBJ_TEXT = 1 + FPDF_PAGEOBJ_PATH = 2 + FPDF_PAGEOBJ_IMAGE = 3 + FPDF_PAGEOBJ_SHADING = 4 + FPDF_PAGEOBJ_FORM = 5 + + # Path segment types + FPDF_SEGMENT_UNKNOWN = -1 + FPDF_SEGMENT_LINETO = 0 + FPDF_SEGMENT_BEZIERTO = 1 + FPDF_SEGMENT_MOVETO = 2 typedef :int, :FPDF_BOOL typedef :pointer, :IPDF_JSPLATFORM @@ -157,6 +193,7 @@ class Pdfium raise PdfiumError, "#{context_message}: #{error_message(error_code)} (Code: #{error_code})" end + # rubocop:disable Metrics class Document attr_reader :document_ptr, :form_handle @@ -386,6 +423,128 @@ class Pdfium Pdfium.FPDFText_ClosePage(text_page) if text_page && !text_page.null? end + def text_nodes + return @text_nodes if @text_nodes + + text_page = Pdfium.FPDFText_LoadPage(page_ptr) + char_count = Pdfium.FPDFText_CountChars(text_page) + + @text_nodes = [] + + return @text_nodes if char_count.zero? + + char_count.times do |i| + unicode = Pdfium.FPDFText_GetUnicode(text_page, i) + + char = [unicode].pack('U*') + + left_ptr = FFI::MemoryPointer.new(:double) + right_ptr = FFI::MemoryPointer.new(:double) + bottom_ptr = FFI::MemoryPointer.new(:double) + top_ptr = FFI::MemoryPointer.new(:double) + + result = Pdfium.FPDFText_GetCharBox(text_page, i, left_ptr, right_ptr, bottom_ptr, top_ptr) + + next if result.zero? + + left = left_ptr.read_double + right = right_ptr.read_double + + origin_x_ptr = FFI::MemoryPointer.new(:double) + origin_y_ptr = FFI::MemoryPointer.new(:double) + + Pdfium.FPDFText_GetCharOrigin(text_page, i, origin_x_ptr, origin_y_ptr) + + origin_y = origin_y_ptr.read_double + + font_size = Pdfium.FPDFText_GetFontSize(text_page, i) + font_size = 8 if font_size == 1 + + abs_x = left + abs_y = height - origin_y - (font_size * 0.8) + abs_width = right - left + abs_height = font_size + + x = abs_x / width + y = abs_y / height + node_width = abs_width / width + node_height = abs_height / height + + @text_nodes << TextNode.new(content: char, x: x, y: y, w: node_width, h: node_height) + end + + @text_nodes = @text_nodes.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y } + ensure + Pdfium.FPDFText_ClosePage(text_page) if text_page && !text_page.null? + end + + def line_nodes + return @line_nodes if @line_nodes + + ensure_not_closed! + + @line_nodes = [] + + object_count = Pdfium.FPDFPage_CountObjects(page_ptr) + + return @line_nodes if object_count.zero? + + object_count.times do |i| + page_object = Pdfium.FPDFPage_GetObject(page_ptr, i) + + next if page_object.null? + + obj_type = Pdfium.FPDFPageObj_GetType(page_object) + + next unless obj_type == Pdfium::FPDF_PAGEOBJ_PATH + + left_ptr = FFI::MemoryPointer.new(:float) + bottom_ptr = FFI::MemoryPointer.new(:float) + right_ptr = FFI::MemoryPointer.new(:float) + top_ptr = FFI::MemoryPointer.new(:float) + + Pdfium.FPDFPageObj_GetBounds(page_object, left_ptr, bottom_ptr, right_ptr, top_ptr) + + obj_left = left_ptr.read_float + obj_bottom = bottom_ptr.read_float + obj_right = right_ptr.read_float + obj_top = top_ptr.read_float + + obj_width = obj_right - obj_left + obj_height = obj_top - obj_bottom + + next if obj_width < 1 && obj_height < 1 + + segment_count = Pdfium.FPDFPath_CountSegments(page_object) + + next if segment_count < 2 + + next unless segment_count <= 10 && (obj_height < 10 || obj_width < 10) + + if obj_width > obj_height && obj_height < 10 + tilt = 0 + elsif obj_height > obj_width && obj_width < 10 + tilt = 90 + else + next + end + + x = obj_left + y = obj_bottom + w = obj_width + h = obj_height + + norm_x = x / width + norm_y = (height - y - h) / height + norm_w = w / width + norm_h = h / height + + @line_nodes << LineNode.new(x: norm_x, y: norm_y, w: norm_w, h: norm_h, tilt: tilt) + end + + @line_nodes = @line_nodes.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y } + end + def close return if closed? @@ -445,4 +604,5 @@ class Pdfium at_exit do cleanup_library end + # rubocop:enable Metrics end diff --git a/lib/templates/detect_fields.rb b/lib/templates/detect_fields.rb new file mode 100755 index 00000000..20bb9e3f --- /dev/null +++ b/lib/templates/detect_fields.rb @@ -0,0 +1,264 @@ +# frozen_string_literal: true + +module Templates + module DetectFields + module_function + + TextFieldBox = Struct.new(:x, :y, :w, :h, keyword_init: true) + + # rubocop:disable Metrics + def call(io, attachment: nil, confidence: 0.3, temperature: 1, + nms: 0.1, split_page: false, aspect_ratio: true, padding: 20, &) + if attachment&.image? + process_image_attachment(io, attachment:, confidence:, nms:, split_page:, + temperature:, aspect_ratio:, padding:, &) + else + process_pdf_attachment(io, attachment:, confidence:, nms:, split_page:, + temperature:, aspect_ratio:, padding:, &) + end + end + + def process_image_attachment(io, attachment:, confidence:, nms:, temperature: 1, + split_page: false, aspect_ratio: false, padding: nil) + image = Vips::Image.new_from_buffer(io.read, '') + + fields = Templates::ImageToFields.call(image, confidence:, nms:, split_page:, + temperature:, aspect_ratio:, padding:) + + fields = fields.map do |f| + { + uuid: SecureRandom.uuid, + type: f.type, + required: true, + preferences: {}, + areas: [{ + x: f.x, + y: f.y, + w: f.w, + h: f.h, + page: 0, + attachment_uuid: attachment&.uuid + }] + } + end + + yield [attachment&.uuid, 0, fields] if block_given? + + fields + end + + def process_pdf_attachment(io, attachment:, confidence:, nms:, temperature: 1, + split_page: false, aspect_ratio: false, padding: nil) + doc = Pdfium::Document.open_bytes(io.read) + + doc.page_count.times.flat_map do |page_number| + page = doc.get_page(page_number) + + data, width, height = page.render_to_bitmap(width: ImageToFields::RESOLUTION * 1.5) + + image = Vips::Image.new_from_memory(data, width, height, 4, :uchar) + + fields = Templates::ImageToFields.call(image, confidence: 0.05, nms:, split_page:, + temperature:, aspect_ratio:, padding:) + + text_fields = extract_text_fields_from_page(page) + line_fields = extract_line_fields_from_page(page) + + fields = increase_confidence_for_overlapping_fields(fields, text_fields) + fields = increase_confidence_for_overlapping_fields(fields, line_fields) + + fields = fields.filter_map do |f| + next if f.confidence < confidence + + { + uuid: SecureRandom.uuid, + type: f.type, + required: true, + preferences: {}, + areas: [{ + x: f.x, y: f.y, + w: f.w, h: f.h, + page: page_number, + attachment_uuid: attachment&.uuid + }] + } + end + + yield [attachment&.uuid, page_number, fields] if block_given? + + fields + end + end + + def extract_line_fields_from_page(page) + line_thickness = 5.0 / page.height + + vertical_lines, all_horizontal_lines = page.line_nodes.partition { |line| line.tilt == 90 } + + horizontal_lines = all_horizontal_lines.reject do |h_line| + next true if h_line.w > 0.7 && (h_line.h < 0.1 || h_line.h < 0.9) + + next false if vertical_lines.blank? + + h_x_min = h_line.x + h_x_max = h_line.x + h_line.w + h_y_avg = h_line.y + (h_line.h / 2) + + vertical_lines.any? do |v_line| + v_x_avg = v_line.x + (v_line.w / 2) + v_y_min = v_line.y + v_y_max = v_line.y + v_line.h + + h_x_min_expanded = h_x_min - line_thickness + h_x_max_expanded = h_x_max + line_thickness + h_y_min_expanded = h_y_avg - line_thickness + h_y_max_expanded = h_y_avg + line_thickness + + v_x_min_expanded = v_x_avg - line_thickness + v_x_max_expanded = v_x_avg + line_thickness + v_y_min_expanded = v_y_min - line_thickness + v_y_max_expanded = v_y_max + line_thickness + + x_overlap = v_x_min_expanded <= h_x_max_expanded && v_x_max_expanded >= h_x_min_expanded + y_overlap = h_y_min_expanded <= v_y_max_expanded && h_y_max_expanded >= v_y_min_expanded + + x_overlap && y_overlap + end + end + + node_index = 0 + + horizontal_lines = horizontal_lines.reject do |line| + nodes = [] + + loop do + node = page.text_nodes[node_index += 1] + + break unless node + + break if node.y > line.y + + next if node.x + node.w < line.x || line.x + line.w < node.x || + node.y + node.h < line.y - node.h || line.y < node.y + + nodes << node + + next if nodes.blank? + + next_node = page.text_nodes[node_index + 1] + + break if next_node.x + next_node.w < line.x || line.x + line.w < next_node.x || + next_node.y + next_node.h < line.y - next_node.h || line.y < next_node.y + end + + next if nodes.blank? + + width = nodes.last.x + nodes.last.w - nodes.first.x + + next true if width > line.w / 2.0 + end + + horizontal_lines.each do |line| + line.h += 4 * line_thickness + line.y -= 4 * line_thickness + end + end + + def extract_text_fields_from_page(page) + text_nodes = page.text_nodes + + field_boxes = [] + + i = 0 + + while i < text_nodes.length + node = text_nodes[i] + + next i += 1 if node.content != '_' + + x1 = node.x + y1 = node.y + x2 = node.x + node.w + y2 = node.y + node.h + + underscore_count = 1 + + j = i + 1 + + while j < text_nodes.length + next_node = text_nodes[j] + + break unless next_node.content == '_' + + distance = next_node.x - x2 + height_diff = (next_node.y - y1).abs + + break if distance > 0.02 || height_diff > node.h * 0.5 + + underscore_count += 1 + next_x2 = next_node.x + next_node.w + next_y2 = next_node.y + next_node.h + + x2 = next_x2 + y2 = [y2, next_y2].max + y1 = [y1, next_node.y].min + + j += 1 + end + + field_boxes << TextFieldBox.new(x: x1, y: y1, w: x2 - x1, h: y2 - y1) if underscore_count >= 2 + + i = j + end + + field_boxes + end + + def calculate_iou(box1, box2) + x1 = [box1.x, box2.x].max + y1 = [box1.y, box2.y].max + x2 = [box1.x + box1.w, box2.x + box2.w].min + y2 = [box1.y + box1.h, box2.y + box2.h].min + + intersection_width = [0, x2 - x1].max + intersection_height = [0, y2 - y1].max + intersection_area = intersection_width * intersection_height + + return 0.0 if intersection_area.zero? + + box1_area = box1.w * box1.h + box2_area = box2.w * box2.h + union_area = box1_area + box2_area - intersection_area + + intersection_area / union_area + end + + def boxes_overlap?(box1, box2) + !(box1.x + box1.w < box2.x || box2.x + box2.w < box1.x || + box1.y + box1.h < box2.y || box2.y + box2.h < box1.y) + end + + def increase_confidence_for_overlapping_fields(image_fields, text_fields, by: 1.0) + return image_fields if text_fields.blank? + + image_fields.map do |image_field| + next if image_field.type != 'text' + + field_bottom = image_field.y + image_field.h + + text_fields.each do |text_field| + break if text_field.y > field_bottom + + next if text_field.y + text_field.h < image_field.y + + next unless boxes_overlap?(image_field, text_field) && calculate_iou(image_field, text_field) > 0.5 + + break image_field.confidence += by + end + end + + image_fields + end + # rubocop:enable Metrics + end +end diff --git a/lib/templates/image_to_fields.rb b/lib/templates/image_to_fields.rb new file mode 100755 index 00000000..ad747aff --- /dev/null +++ b/lib/templates/image_to_fields.rb @@ -0,0 +1,331 @@ +# frozen_string_literal: true + +module Templates + module ImageToFields + module_function + + Field = Struct.new(:type, :x, :y, :w, :h, :confidence, keyword_init: true) + + MODEL_PATH = Rails.root.join('tmp/model_704_int8.onnx') + + RESOLUTION = 704 + + ID_TO_CLASS = %w[text checkbox].freeze + + MEAN = [0.485, 0.456, 0.406].freeze + STD = [0.229, 0.224, 0.225].freeze + + CPU_THREADS = Etc.nprocessors + + # rubocop:disable Metrics + def call(image, confidence: 0.3, nms: 0.1, temperature: 1, + split_page: false, aspect_ratio: true, padding: nil) + base_image = image.extract_band(0, n: 3) + + trimmed_base, base_offset_x, base_offset_y = trim_image_with_padding(base_image, padding) + + if split_page && image.height > image.width + half_h = trimmed_base.height / 2 + top_h = half_h + bottom_h = trimmed_base.height - half_h + + regions = [ + { img: trimmed_base.crop(0, 0, trimmed_base.width, top_h), offset_y: 0 }, + { img: trimmed_base.crop(0, top_h, trimmed_base.width, bottom_h), offset_y: top_h } + ] + + detections = { xyxy: Numo::SFloat[], confidence: Numo::SFloat[], class_id: Numo::Int32[] } + + detections = regions.reduce(detections) do |acc, r| + next detections if r[:img].height <= 0 || r[:img].width <= 0 + + input_tensor, transform_info = preprocess_image(r[:img], RESOLUTION, aspect_ratio:) + + transform_info[:trim_offset_x] = base_offset_x + transform_info[:trim_offset_y] = base_offset_y + r[:offset_y] + + outputs = model.predict({ 'input' => input_tensor }) + + postprocess_outputs(outputs, transform_info, acc, confidence:, temperature:) + end + else + input_tensor, transform_info = preprocess_image(trimmed_base, RESOLUTION, aspect_ratio:) + + transform_info[:trim_offset_x] = base_offset_x + transform_info[:trim_offset_y] = base_offset_y + + outputs = model.predict({ 'input' => input_tensor }) + + detections = postprocess_outputs(outputs, transform_info, confidence:, temperature:) + end + + detections = apply_nms(detections, nms) + + fields = Array.new(detections[:xyxy].shape[0]) do |i| + x1 = detections[:xyxy][i, 0] + y1 = detections[:xyxy][i, 1] + x2 = detections[:xyxy][i, 2] + y2 = detections[:xyxy][i, 3] + + class_id = detections[:class_id][i].to_i + + confidence = detections[:confidence][i] + + x0_norm = x1 / image.width.to_f + y0_norm = y1 / image.height.to_f + x1_norm = x2 / image.width.to_f + y1_norm = y2 / image.height.to_f + + type_name = ID_TO_CLASS[class_id] + + Field.new( + type: type_name, + x: x0_norm, + y: y0_norm, + w: (x1_norm - x0_norm), + h: (y1_norm - y0_norm), + confidence: + ) + end + + sort_fields(fields, y_threshold: 10.0 / image.height) + end + + def trim_image_with_padding(image, padding = 0) + return [image, 0, 0] if padding.nil? + + left, top, trim_width, trim_height = image.find_trim(threshold: 10, background: [255, 255, 255]) + + padded_left = [left - padding, 0].max + padded_top = [top - padding, 0].max + padded_right = [left + trim_width + padding, image.width].min + padded_bottom = [top + trim_height + padding, image.height].min + + width = padded_right - padded_left + height = padded_bottom - padded_top + + trimmed_image = image.crop(padded_left, padded_top, width, height) + + [trimmed_image, padded_left, padded_top] + end + + def preprocess_image(image, resolution, aspect_ratio: false) + scale_x = resolution.to_f / image.width + scale_y = resolution.to_f / image.height + + if aspect_ratio + scale = [scale_x, scale_y].min + + new_width = (image.width * scale).round + new_height = (image.height * scale).round + + resized = image.resize(scale, vscale: scale, kernel: :lanczos3) + + pad_x = ((resolution - new_width) / 2.0).round + pad_y = ((resolution - new_height) / 2.0).round + + image = resized.embed(pad_x, pad_y, resolution, resolution, background: [255, 255, 255]) + + transform_info = { scale_x: scale, scale_y: scale, pad_x: pad_x, pad_y: pad_y } + else + image = image.resize(scale_x, vscale: scale_y, kernel: :lanczos3) + + transform_info = { scale_x: scale_x, scale_y: scale_y, pad_x: 0, pad_y: 0 } + end + + image /= 255.0 + + image = (image - MEAN) / STD + + pixel_data = image.write_to_memory + + img_array = Numo::SFloat.from_binary(pixel_data, [resolution, resolution, 3]) + + img_array = img_array.transpose(2, 0, 1) + + [img_array.reshape(1, 3, resolution, resolution), transform_info] + end + + def nms(boxes, scores, iou_threshold = 0.5) + return Numo::Int32[] if boxes.shape[0].zero? + + x1 = boxes[true, 0] + y1 = boxes[true, 1] + x2 = boxes[true, 2] + y2 = boxes[true, 3] + + areas = (x2 - x1) * (y2 - y1) + order = scores.sort_index.reverse + + keep = [] + + while order.size.positive? + i = order[0] + keep << i + + break if order.size == 1 + + xx1 = Numo::SFloat.maximum(x1[i], x1[order[1..]]) + yy1 = Numo::SFloat.maximum(y1[i], y1[order[1..]]) + xx2 = Numo::SFloat.minimum(x2[i], x2[order[1..]]) + yy2 = Numo::SFloat.minimum(y2[i], y2[order[1..]]) + + w = Numo::SFloat.maximum(0.0, xx2 - xx1) + h = Numo::SFloat.maximum(0.0, yy2 - yy1) + + intersection = w * h + + iou = intersection / (areas[i] + areas[order[1..]] - intersection) + + inds = iou.le(iou_threshold).where + + order = order[inds + 1] + end + + Numo::Int32.cast(keep) + end + + def postprocess_outputs(outputs, transform_info, detections = nil, confidence: 0.3, temperature: 1) + boxes = Numo::SFloat.cast(outputs['dets']) + logits = Numo::SFloat.cast(outputs['labels']) + + boxes = boxes[0, true, true] # [300, 4] + logits = logits[0, true, true] # [300, num_classes] + + scaled_logits = logits / temperature + + probs = 1.0 / (1.0 + Numo::NMath.exp(-scaled_logits)) + + scores = probs.max(axis: 1) + labels = probs.argmax(axis: 1) + + cx = boxes[true, 0] + cy = boxes[true, 1] + w = boxes[true, 2] + h = boxes[true, 3] + + x1 = cx - (w / 2.0) + y1 = cy - (h / 2.0) + x2 = cx + (w / 2.0) + y2 = cy + (h / 2.0) + + boxes_xyxy = Numo::SFloat.zeros(boxes.shape[0], 4) + boxes_xyxy[true, 0] = x1 + boxes_xyxy[true, 1] = y1 + boxes_xyxy[true, 2] = x2 + boxes_xyxy[true, 3] = y2 + + boxes_xyxy *= RESOLUTION + + pad_x = transform_info[:pad_x] + pad_y = transform_info[:pad_y] + boxes_xyxy[true, 0] -= pad_x + boxes_xyxy[true, 1] -= pad_y + boxes_xyxy[true, 2] -= pad_x + boxes_xyxy[true, 3] -= pad_y + + scale_x = transform_info[:scale_x] + scale_y = transform_info[:scale_y] + boxes_xyxy[true, 0] /= scale_x + boxes_xyxy[true, 1] /= scale_y + boxes_xyxy[true, 2] /= scale_x + boxes_xyxy[true, 3] /= scale_y + + trim_offset_x = transform_info[:trim_offset_x] + trim_offset_y = transform_info[:trim_offset_y] + boxes_xyxy[true, 0] += trim_offset_x + boxes_xyxy[true, 1] += trim_offset_y + boxes_xyxy[true, 2] += trim_offset_x + boxes_xyxy[true, 3] += trim_offset_y + + keep_mask = scores.gt(confidence) + + keep_indices = keep_mask.where + + if keep_indices.empty? + detections || { + xyxy: Numo::SFloat[], + confidence: Numo::SFloat[], + class_id: Numo::Int32[] + } + else + scores = scores[keep_indices] + labels = labels[keep_indices] + boxes_xyxy = boxes_xyxy[keep_indices, true] + + if detections + existing_n = detections[:xyxy].shape[0] + new_n = boxes_xyxy.shape[0] + total = existing_n + new_n + + xyxy = Numo::SFloat.zeros(total, 4) + conf = Numo::SFloat.zeros(total) + cls = Numo::Int32.zeros(total) + + if existing_n.positive? + xyxy[0...existing_n, true] = detections[:xyxy] + conf[0...existing_n] = detections[:confidence] + cls[0...existing_n] = detections[:class_id] + end + + xyxy[existing_n...(existing_n + new_n), true] = boxes_xyxy + conf[existing_n...(existing_n + new_n)] = scores + cls[existing_n...(existing_n + new_n)] = Numo::Int32.cast(labels) + + { xyxy: xyxy, confidence: conf, class_id: cls } + else + { + xyxy: boxes_xyxy, + confidence: scores, + class_id: Numo::Int32.cast(labels) + } + end + end + end + + def sort_fields(fields, y_threshold: 0.01) + sorted_fields = fields.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y } + + lines = [] + current_line = [] + + sorted_fields.each do |field| + if current_line.blank? || (field.y - current_line.first.y).abs < y_threshold + current_line << field + else + lines << current_line.sort_by(&:x) + + current_line = [field] + end + end + + lines << current_line.sort_by(&:x) if current_line.present? + + lines.flatten + end + + def apply_nms(detections, threshold = 0.5) + return detections if detections[:xyxy].shape[0].zero? + + keep_indices = nms(detections[:xyxy], detections[:confidence], threshold) + + { + xyxy: detections[:xyxy][keep_indices, true], + confidence: detections[:confidence][keep_indices], + class_id: detections[:class_id][keep_indices] + } + end + + def model + @model ||= OnnxRuntime::Model.new( + MODEL_PATH.to_s, + inter_op_num_threads: CPU_THREADS, + intra_op_num_threads: CPU_THREADS, + enable_mem_pattern: false, + enable_cpu_mem_arena: false, + providers: ['CPUExecutionProvider'] + ) + end + # rubocop:enable Metrics + end +end From 80a738518391e3fe4d88c03aca971fda58cbe8b7 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Fri, 31 Oct 2025 11:08:44 +0200 Subject: [PATCH 10/16] fix redirect --- app/controllers/console_redirect_controller.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/controllers/console_redirect_controller.rb b/app/controllers/console_redirect_controller.rb index 0da093a8..4b910263 100644 --- a/app/controllers/console_redirect_controller.rb +++ b/app/controllers/console_redirect_controller.rb @@ -20,7 +20,7 @@ class ConsoleRedirectController < ApplicationController redir_uri = Addressable::URI.parse(params[:redir]) path = redir_uri.path if params[:redir].to_s.starts_with?(Docuseal::CONSOLE_URL) - redirect_to "#{Docuseal::CONSOLE_URL}#{path}?#{{ **redir_uri.query_values, 'auth' => auth }.to_query}", + redirect_to "#{Docuseal::CONSOLE_URL}#{path}?#{{ **redir_uri&.query_values, 'auth' => auth }.to_query}", allow_other_host: true end end From 79e25144164b023d3e73b20958dce58b0b792641 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Sun, 2 Nov 2025 08:11:31 +0200 Subject: [PATCH 11/16] fix model name --- lib/templates/image_to_fields.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/templates/image_to_fields.rb b/lib/templates/image_to_fields.rb index ad747aff..5eb39af4 100755 --- a/lib/templates/image_to_fields.rb +++ b/lib/templates/image_to_fields.rb @@ -6,7 +6,7 @@ module Templates Field = Struct.new(:type, :x, :y, :w, :h, :confidence, keyword_init: true) - MODEL_PATH = Rails.root.join('tmp/model_704_int8.onnx') + MODEL_PATH = Rails.root.join('tmp/model.onnx') RESOLUTION = 704 From a21f7d165301207296d6dec16080cc8f3e85b734 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Sun, 2 Nov 2025 08:13:40 +0200 Subject: [PATCH 12/16] fix padding --- app/javascript/template_builder/fields.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/javascript/template_builder/fields.vue b/app/javascript/template_builder/fields.vue index 7b789436..eb6f05b9 100644 --- a/app/javascript/template_builder/fields.vue +++ b/app/javascript/template_builder/fields.vue @@ -210,7 +210,7 @@