From 7ca290c080f96865560fd2bcb3c9bb5bfa772970 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Tue, 25 Nov 2025 13:43:35 +0200 Subject: [PATCH 01/15] adjust detection --- lib/templates/detect_fields.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/templates/detect_fields.rb b/lib/templates/detect_fields.rb index 1f3b5c0c..c790b034 100755 --- a/lib/templates/detect_fields.rb +++ b/lib/templates/detect_fields.rb @@ -472,7 +472,8 @@ module Templates next if text_field.y + text_field.h < image_field.y - next unless boxes_overlap?(image_field, text_field) && calculate_iou(image_field, text_field) > 0.5 + next unless boxes_overlap?(image_field, text_field) + next if calculate_iou(image_field, text_field) < 0.4 break image_field.confidence += by end From 453ebbf90a9a25c827a2cbb06bfc8ef9c6baf7f0 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Thu, 27 Nov 2025 12:23:45 +0200 Subject: [PATCH 02/15] adjust start form --- app/controllers/start_form_controller.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/controllers/start_form_controller.rb b/app/controllers/start_form_controller.rb index 08778a3e..98e5e0c3 100644 --- a/app/controllers/start_form_controller.rb +++ b/app/controllers/start_form_controller.rb @@ -81,7 +81,7 @@ class StartFormController < ApplicationController @submitter = Submitter.where(submission: @template.submissions) .where.not(completed_at: nil) - .find_by!(required_params) + .find_by!(required_params.except('name')) end private From 41f16e7d395d9fe3be94d20ea1b93fdd3a69a188 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Thu, 27 Nov 2025 17:00:26 +0200 Subject: [PATCH 03/15] adjust detection --- lib/templates/detect_fields.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/templates/detect_fields.rb b/lib/templates/detect_fields.rb index c790b034..d3e35b05 100755 --- a/lib/templates/detect_fields.rb +++ b/lib/templates/detect_fields.rb @@ -75,7 +75,7 @@ module Templates { uuid: SecureRandom.uuid, type: f.type, - required: f.type != 'checkbox', + required: f.type == 'signature', preferences: {}, areas: [{ x: f.x, @@ -128,7 +128,7 @@ module Templates { uuid: SecureRandom.uuid, type:, - required: type != 'checkbox', + required: type == 'signature', preferences: {}, areas: [{ x: field.x, y: field.y, From 68936e10c58e73bcb567bf764186f45f98760745 Mon Sep 17 00:00:00 2001 From: Alex Turchyn Date: Wed, 26 Nov 2025 14:12:08 +0200 Subject: [PATCH 04/15] add confirmation columns --- app/controllers/profile_controller.rb | 7 +- app/controllers/users_controller.rb | 7 +- app/models/user.rb | 6 +- app/views/profile/index.html.erb | 5 + app/views/users/_form.html.erb | 5 + app/views/users/index.html.erb | 8 +- config/initializers/devise.rb | 4 +- config/locales/i18n.yml | 93 ++++++++++++++++++- ...20251125194305_add_confirmable_to_users.rb | 10 ++ db/schema.rb | 6 +- 10 files changed, 144 insertions(+), 7 deletions(-) create mode 100644 db/migrate/20251125194305_add_confirmable_to_users.rb diff --git a/app/controllers/profile_controller.rb b/app/controllers/profile_controller.rb index a1ee71bf..878831d5 100644 --- a/app/controllers/profile_controller.rb +++ b/app/controllers/profile_controller.rb @@ -9,7 +9,12 @@ class ProfileController < ApplicationController def update_contact if current_user.update(contact_params) - redirect_to settings_profile_index_path, notice: I18n.t('contact_information_has_been_update') + if current_user.try(:pending_reconfirmation?) && current_user.previous_changes.key?(:unconfirmed_email) + redirect_to settings_profile_index_path, + notice: I18n.t('a_confirmation_email_has_been_sent_to_the_new_email_address') + else + redirect_to settings_profile_index_path, notice: I18n.t('contact_information_has_been_update') + end else render :index, status: :unprocessable_content end diff --git a/app/controllers/users_controller.rb b/app/controllers/users_controller.rb index 39555b59..9ab9ca68 100644 --- a/app/controllers/users_controller.rb +++ b/app/controllers/users_controller.rb @@ -65,7 +65,12 @@ class UsersController < ApplicationController end if @user.update(attrs.except(*(current_user == @user ? %i[password otp_required_for_login role] : %i[password]))) - redirect_back fallback_location: settings_users_path, notice: I18n.t('user_has_been_updated') + if @user.try(:pending_reconfirmation?) && @user.previous_changes.key?(:unconfirmed_email) + redirect_back fallback_location: settings_users_path, + notice: I18n.t('a_confirmation_email_has_been_sent_to_the_new_email_address') + else + redirect_back fallback_location: settings_users_path, notice: I18n.t('user_has_been_updated') + end else render turbo_stream: turbo_stream.replace(:modal, template: 'users/edit'), status: :unprocessable_content end diff --git a/app/models/user.rb b/app/models/user.rb index 83e88f08..7eabb059 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -6,6 +6,9 @@ # # id :bigint not null, primary key # archived_at :datetime +# confirmation_sent_at :datetime +# confirmation_token :string +# confirmed_at :datetime # consumed_timestep :integer # current_sign_in_at :datetime # current_sign_in_ip :string @@ -24,8 +27,9 @@ # reset_password_token :string # role :string not null # sign_in_count :integer default(0), not null +# unconfirmed_email :string # unlock_token :string -# uuid :text not null +# uuid :string not null # created_at :datetime not null # updated_at :datetime not null # account_id :bigint not null diff --git a/app/views/profile/index.html.erb b/app/views/profile/index.html.erb index c5422b00..9f1ec645 100644 --- a/app/views/profile/index.html.erb +++ b/app/views/profile/index.html.erb @@ -18,6 +18,11 @@
<%= f.label :email, t('email'), class: 'label' %> <%= f.email_field :email, autocomplete: 'off', class: 'base-input' %> + <% if current_user.try(:pending_reconfirmation?) %> + + <% end %>
<%= f.button button_title(title: t('update'), disabled_with: t('updating')), class: 'base-button' %> diff --git a/app/views/users/_form.html.erb b/app/views/users/_form.html.erb index 5ceaeabc..c7652916 100644 --- a/app/views/users/_form.html.erb +++ b/app/views/users/_form.html.erb @@ -13,6 +13,11 @@
<%= f.label :email, t('email'), class: 'label' %> <%= f.email_field :email, required: true, class: 'base-input' %> + <% if user.try(:pending_reconfirmation?) %> + + <% end %> <% if user.persisted? && Accounts.can_send_emails?(current_account) %> <%= t('click_here_to_send_a_reset_password_email_html') %> diff --git a/app/views/users/index.html.erb b/app/views/users/index.html.erb index a0849cec..e49a8563 100644 --- a/app/views/users/index.html.erb +++ b/app/views/users/index.html.erb @@ -52,7 +52,13 @@ <%= user.full_name %> - <%= user.email %> + <% if user.try(:pending_reconfirmation?) %> + <%= user.unconfirmed_email %> +
+ (<%= t('unconfirmed') %>) + <% else %> + <%= user.email %> + <% end %> diff --git a/config/initializers/devise.rb b/config/initializers/devise.rb index d7b80f1a..1329ce87 100644 --- a/config/initializers/devise.rb +++ b/config/initializers/devise.rb @@ -31,6 +31,7 @@ end # # Use this hook to configure devise mailer, warden hooks and so forth. # Many of these configuration options can be set straight in your model. +# rubocop:disable Metrics/BlockLength Devise.setup do |config| config.warden do |manager| manager.default_strategies(scope: :user).unshift(:two_factor_authenticatable) @@ -166,7 +167,7 @@ Devise.setup do |config| # without confirming their account. # Default is 0.days, meaning the user cannot access the website without # confirming their account. - # config.allow_unconfirmed_access_for = 2.days + config.allow_unconfirmed_access_for = nil # A period that the user is allowed to confirm their account before their # token becomes invalid. For example, if set to 3.days, the user can confirm @@ -332,3 +333,4 @@ Devise.setup do |config| ActiveSupport.run_load_hooks(:devise_config, config) end +# rubocop:enable Metrics/BlockLength diff --git a/config/locales/i18n.yml b/config/locales/i18n.yml index b9784c04..0d47623a 100644 --- a/config/locales/i18n.yml +++ b/config/locales/i18n.yml @@ -71,7 +71,7 @@ en: &en team_access: Team access document_download_filename_format: Document download filename format docuseal_trusted_signature: DocuSeal Trusted Signature - hello_name: Hello %{name} + hello_name: Hi %{name} you_are_invited_to_product_name: You are invited to %{product_name} you_have_been_invited_to_account_name_product_name_please_sign_up_using_the_link_below_: 'You have been invited to %{account_name} %{product_name}. Please sign up using the link below:' sent_using_product_name_in_testing_mode_html: 'Sent using %{product_name} in testing mode' @@ -861,6 +861,19 @@ en: &en reports: Reports completed_submissions: Completed submissions sms: SMS + a_confirmation_email_has_been_sent_to_the_new_email_address: A confirmation email has been sent to the new email address. + email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} email address is awaiting confirmation. Follow the link in the email to confirm." + please_confirm_your_email_address_using_the_link_below_: 'Please confirm your email address using the link below:' + confirm_email: Confirm email + unconfirmed: Unconfirmed + devise: + confirmations: + confirmed: Your email address has been successfully confirmed. + failure: + unconfirmed: You have to confirm your email address before continuing. + mailer: + confirmation_instructions: + subject: Confirm your email address submission_sources: api: API bulk: Bulk Send @@ -1808,6 +1821,19 @@ es: &es reports: Informes completed_submissions: Envíos completados sms: SMS + a_confirmation_email_has_been_sent_to_the_new_email_address: Se ha enviado un correo electrónico de confirmación a la nueva dirección de correo electrónico. + email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} está pendiente de confirmación. Sigue el enlace en el correo para confirmarla." + please_confirm_your_email_address_using_the_link_below_: 'Por favor, confirma tu dirección de correo electrónico utilizando el enlace a continuación:' + confirm_email: Confirmar correo + unconfirmed: No confirmado + devise: + confirmations: + confirmed: Tu dirección de correo electrónico ha sido confirmada correctamente. + failure: + unconfirmed: Debes confirmar tu dirección de correo electrónico antes de continuar. + mailer: + confirmation_instructions: + subject: Confirma tu dirección de correo electrónico submission_sources: api: API bulk: Envío masivo @@ -2756,6 +2782,19 @@ it: &it reports: Rapporti completed_submissions: Invii completati sms: SMS + a_confirmation_email_has_been_sent_to_the_new_email_address: È stata inviata un'email di conferma al nuovo indirizzo email. + email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} è in attesa di conferma. Segui il link nell'email per confermare." + please_confirm_your_email_address_using_the_link_below_: 'Conferma il tuo indirizzo email utilizzando il link qui sotto:' + confirm_email: Conferma email + unconfirmed: Non confermato + devise: + confirmations: + confirmed: Il tuo indirizzo email è stato confermato con successo. + failure: + unconfirmed: Devi confermare il tuo indirizzo email prima di continuare. + mailer: + confirmation_instructions: + subject: Conferma il tuo indirizzo email submission_sources: api: API bulk: Invio massivo @@ -3700,6 +3739,19 @@ fr: &fr reports: Rapports completed_submissions: Soumissions terminées sms: SMS + a_confirmation_email_has_been_sent_to_the_new_email_address: Un e-mail de confirmation a été envoyé à la nouvelle adresse e-mail. + email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} est en attente de confirmation. Suivez le lien dans l'e-mail pour la confirmer." + please_confirm_your_email_address_using_the_link_below_: 'Veuillez confirmer votre adresse e-mail en utilisant le lien ci-dessous :' + confirm_email: "Confirmer l'e-mail" + unconfirmed: Non confirmé + devise: + confirmations: + confirmed: Votre adresse e-mail a été confirmée avec succès. + failure: + unconfirmed: Vous devez confirmer votre adresse e-mail avant de continuer. + mailer: + confirmation_instructions: + subject: Confirmez votre adresse e-mail submission_sources: api: API bulk: Envoi en masse @@ -4647,6 +4699,19 @@ pt: &pt reports: Relatórios completed_submissions: Envios concluídos sms: SMS + a_confirmation_email_has_been_sent_to_the_new_email_address: Um e-mail de confirmação foi enviado para o novo endereço de e-mail. + email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} está aguardando confirmação. Siga o link enviado para esse endereço de e-mail para confirmar." + please_confirm_your_email_address_using_the_link_below_: 'Por favor, confirme seu endereço de e-mail usando o link abaixo:' + confirm_email: Confirmar e-mail + unconfirmed: Não confirmado + devise: + confirmations: + confirmed: Seu endereço de e-mail foi confirmado com sucesso. + failure: + unconfirmed: Você deve confirmar seu endereço de e-mail antes de continuar. + mailer: + confirmation_instructions: + subject: Confirme seu endereço de e-mail submission_sources: api: API bulk: Envio em massa @@ -5594,6 +5659,19 @@ de: &de reports: Berichte completed_submissions: Abgeschlossene Übermittlungen sms: SMS + a_confirmation_email_has_been_sent_to_the_new_email_address: Eine Bestätigungs-E-Mail wurde an die neue E-Mail-Adresse gesendet. + email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} wartet auf Bestätigung. Folgen Sie dem Link in der E-Mail, um sie zu bestätigen." + please_confirm_your_email_address_using_the_link_below_: 'Bitte bestätigen Sie Ihre E-Mail-Adresse über den folgenden Link:' + confirm_email: E-Mail bestätigen + unconfirmed: Unbestätigt + devise: + confirmations: + confirmed: Ihre E-Mail-Adresse wurde erfolgreich bestätigt. + failure: + unconfirmed: Sie müssen Ihre E-Mail-Adresse bestätigen, bevor Sie fortfahren. + mailer: + confirmation_instructions: + subject: Bestätigen Sie Ihre E-Mail-Adresse submission_sources: api: API bulk: Massenversand @@ -6902,6 +6980,19 @@ nl: &nl reports: Rapporten completed_submissions: Voltooide inzendingen sms: SMS + a_confirmation_email_has_been_sent_to_the_new_email_address: Er is een bevestigingsmail verzonden naar het nieuwe e-mailadres. + email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} wacht op bevestiging. Volg de link in de e-mail om te bevestigen." + please_confirm_your_email_address_using_the_link_below_: 'Bevestig je e-mailadres via de onderstaande link:' + confirm_email: E-mailadres bevestigen + unconfirmed: Onbevestigd + devise: + confirmations: + confirmed: Je e-mailadres is succesvol bevestigd. + failure: + unconfirmed: Je moet je e-mailadres bevestigen voordat je verdergaat. + mailer: + confirmation_instructions: + subject: Bevestig je e-mailadres submission_sources: api: API bulk: Bulkverzending diff --git a/db/migrate/20251125194305_add_confirmable_to_users.rb b/db/migrate/20251125194305_add_confirmable_to_users.rb new file mode 100644 index 00000000..ca946854 --- /dev/null +++ b/db/migrate/20251125194305_add_confirmable_to_users.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +class AddConfirmableToUsers < ActiveRecord::Migration[8.0] + def change + add_column :users, :confirmation_token, :string + add_column :users, :confirmed_at, :datetime + add_column :users, :confirmation_sent_at, :datetime + add_column :users, :unconfirmed_email, :string + end +end diff --git a/db/schema.rb b/db/schema.rb index 5ceb1f31..ea115d78 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2025_11_21_113910) do +ActiveRecord::Schema[8.0].define(version: 2025_11_25_194305) do # These are extensions that must be enabled in order to support this database enable_extension "btree_gin" enable_extension "plpgsql" @@ -451,6 +451,10 @@ ActiveRecord::Schema[8.0].define(version: 2025_11_21_113910) do t.string "otp_secret" t.integer "consumed_timestep" t.boolean "otp_required_for_login", default: false, null: false + t.string "confirmation_token" + t.datetime "confirmed_at" + t.datetime "confirmation_sent_at" + t.string "unconfirmed_email" t.index ["account_id"], name: "index_users_on_account_id" t.index ["email"], name: "index_users_on_email", unique: true t.index ["reset_password_token"], name: "index_users_on_reset_password_token", unique: true From dc178ef03f5b650a9bd0dfe2273c0386f766798b Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Fri, 28 Nov 2025 16:15:57 +0200 Subject: [PATCH 05/15] adjust router --- config/routes.rb | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/config/routes.rb b/config/routes.rb index 05abbfc4..0ccbfdfa 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -14,13 +14,8 @@ Rails.application.routes.draw do get 'up' => 'rails/health#show' get 'manifest' => 'pwa#manifest' - devise_for :users, - path: '/', only: %i[sessions passwords omniauth_callbacks], - controllers: begin - options = { sessions: 'sessions', passwords: 'passwords' } - options[:omniauth_callbacks] = 'omniauth_callbacks' if User.devise_modules.include?(:omniauthable) - options - end + devise_for :users, path: '/', only: %i[sessions passwords], + controllers: { sessions: 'sessions', passwords: 'passwords' } devise_scope :user do resource :invitation, only: %i[update] do From fd2ba57325e7a78a078278fd3ebbd03e0d1f0a1b Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Sat, 29 Nov 2025 10:34:13 +0200 Subject: [PATCH 06/15] adjust invitation --- app/controllers/invitations_controller.rb | 7 +++++++ app/views/invitations/edit.html.erb | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/app/controllers/invitations_controller.rb b/app/controllers/invitations_controller.rb index 7030e40f..c3b7e88e 100644 --- a/app/controllers/invitations_controller.rb +++ b/app/controllers/invitations_controller.rb @@ -1,4 +1,11 @@ # frozen_string_literal: true class InvitationsController < Devise::PasswordsController + def update + super do |resource| + resource.confirmed_at ||= Time.current if resource.errors.empty? + + PasswordsController::Current.user = resource + end + end end diff --git a/app/views/invitations/edit.html.erb b/app/views/invitations/edit.html.erb index 966b487f..1513d612 100644 --- a/app/views/invitations/edit.html.erb +++ b/app/views/invitations/edit.html.erb @@ -3,7 +3,7 @@ <%= svg_icon('waving_hand', class: 'h-10 w-10') %> <%= t('welcome_to_product_name', product_name: Docuseal.product_name) %> - <%= form_for(resource, as: resource_name, url: password_path(resource_name), html: { method: :put, class: 'space-y-6' }) do |f| %> + <%= form_for(resource, as: resource_name, url: invitation_path, html: { method: :put, class: 'space-y-6' }) do |f| %>
<%= render 'devise/shared/error_messages', resource: %> <%= f.hidden_field :reset_password_token %> From e6280e8f5a67f40f25831ca530dfcd8ce938034f Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Sat, 29 Nov 2025 18:28:10 +0200 Subject: [PATCH 07/15] optimize text nodes --- lib/pdfium.rb | 41 +++++++++++++++++++------------- lib/templates/image_to_fields.rb | 8 +++++-- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/lib/pdfium.rb b/lib/pdfium.rb index 464f95e2..b43c3b20 100644 --- a/lib/pdfium.rb +++ b/lib/pdfium.rb @@ -39,8 +39,17 @@ class Pdfium FPDF_RENDER_FORCEHALFTONE = 0x400 FPDF_PRINTING = 0x800 - TextNode = Struct.new(:content, :x, :y, :w, :h, keyword_init: true) - LineNode = Struct.new(:x, :y, :w, :h, :tilt, keyword_init: true) + TextNode = Struct.new(:content, :x, :y, :w, :h) do + def endx + @endx ||= x + w + end + + def endy + @endy ||= y + h + end + end + + LineNode = Struct.new(:x, :y, :w, :h, :tilt) # rubocop:disable Naming/ClassAndModuleCamelCase class FPDF_LIBRARY_CONFIG < FFI::Struct @@ -433,15 +442,15 @@ class Pdfium return @text_nodes if char_count.zero? - char_count.times do |i| - unicode = Pdfium.FPDFText_GetUnicode(text_page, i) - - char = [unicode].pack('U*') + left_ptr = FFI::MemoryPointer.new(:double) + right_ptr = FFI::MemoryPointer.new(:double) + bottom_ptr = FFI::MemoryPointer.new(:double) + top_ptr = FFI::MemoryPointer.new(:double) + origin_x_ptr = FFI::MemoryPointer.new(:double) + origin_y_ptr = FFI::MemoryPointer.new(:double) - left_ptr = FFI::MemoryPointer.new(:double) - right_ptr = FFI::MemoryPointer.new(:double) - bottom_ptr = FFI::MemoryPointer.new(:double) - top_ptr = FFI::MemoryPointer.new(:double) + char_count.times do |i| + char = Pdfium.FPDFText_GetUnicode(text_page, i).chr(Encoding::UTF_8) result = Pdfium.FPDFText_GetCharBox(text_page, i, left_ptr, right_ptr, bottom_ptr, top_ptr) @@ -450,12 +459,10 @@ class Pdfium left = left_ptr.read_double right = right_ptr.read_double - origin_x_ptr = FFI::MemoryPointer.new(:double) - origin_y_ptr = FFI::MemoryPointer.new(:double) - Pdfium.FPDFText_GetCharOrigin(text_page, i, origin_x_ptr, origin_y_ptr) origin_y = origin_y_ptr.read_double + origin_x = origin_x_ptr.read_double font_size = Pdfium.FPDFText_GetFontSize(text_page, i) font_size = 8 if font_size == 1 @@ -465,12 +472,12 @@ class Pdfium abs_width = right - left abs_height = font_size - x = abs_x / width + x = origin_x / width y = abs_y / height - node_width = abs_width / width + node_width = (abs_width + ((abs_x - origin_x).abs * 2)) / width node_height = abs_height / height - @text_nodes << TextNode.new(content: char, x: x, y: y, w: node_width, h: node_height) + @text_nodes << TextNode.new(char, x, y, node_width, node_height) end @text_nodes = @text_nodes.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y } @@ -539,7 +546,7 @@ class Pdfium norm_w = w / width norm_h = h / height - @line_nodes << LineNode.new(x: norm_x, y: norm_y, w: norm_w, h: norm_h, tilt: tilt) + @line_nodes << LineNode.new(norm_x, norm_y, norm_w, norm_h, tilt) end @line_nodes = @line_nodes.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y } diff --git a/lib/templates/image_to_fields.rb b/lib/templates/image_to_fields.rb index 786e9785..72c578a7 100755 --- a/lib/templates/image_to_fields.rb +++ b/lib/templates/image_to_fields.rb @@ -4,7 +4,11 @@ module Templates module ImageToFields module_function - Field = Struct.new(:type, :x, :y, :w, :h, :confidence, keyword_init: true) + Field = Struct.new(:type, :x, :y, :w, :h, :confidence, keyword_init: true) do + def endy + @endy ||= y + h + end + end MODEL_PATH = Rails.root.join('tmp/model.onnx') @@ -299,7 +303,7 @@ module Templates end def sort_fields(fields, y_threshold: 0.01) - sorted_fields = fields.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y } + sorted_fields = fields.sort { |a, b| a.endy == b.endy ? a.x <=> b.x : a.endy <=> b.endy } lines = [] current_line = [] From c1a5c91299b62299977d6e77be7f6a79cdf28cde Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Sat, 29 Nov 2025 20:13:00 +0200 Subject: [PATCH 08/15] adjust detection --- lib/pdfium.rb | 18 +++- lib/templates/detect_fields.rb | 179 +++++++++++++------------------ lib/templates/image_to_fields.rb | 29 +---- 3 files changed, 93 insertions(+), 133 deletions(-) diff --git a/lib/pdfium.rb b/lib/pdfium.rb index b43c3b20..6a5874e7 100644 --- a/lib/pdfium.rb +++ b/lib/pdfium.rb @@ -49,7 +49,15 @@ class Pdfium end end - LineNode = Struct.new(:x, :y, :w, :h, :tilt) + LineNode = Struct.new(:x, :y, :w, :h, :tilt) do + def endy + @endy ||= y + h + end + + def endx + @endx ||= x + w + end + end # rubocop:disable Naming/ClassAndModuleCamelCase class FPDF_LIBRARY_CONFIG < FFI::Struct @@ -480,7 +488,11 @@ class Pdfium @text_nodes << TextNode.new(char, x, y, node_width, node_height) end - @text_nodes = @text_nodes.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y } + y_threshold = 4.0 / width + + @text_nodes = @text_nodes.sort do |a, b| + (a.endy - b.endy).abs < y_threshold ? a.x <=> b.x : a.endy <=> b.endy + end ensure Pdfium.FPDFText_ClosePage(text_page) if text_page && !text_page.null? end @@ -549,7 +561,7 @@ class Pdfium @line_nodes << LineNode.new(norm_x, norm_y, norm_w, norm_h, tilt) end - @line_nodes = @line_nodes.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y } + @line_nodes = @line_nodes.sort { |a, b| a.endy == b.endy ? a.x <=> b.x : a.endy <=> b.endy } end def close diff --git a/lib/templates/detect_fields.rb b/lib/templates/detect_fields.rb index d3e35b05..46c613e7 100755 --- a/lib/templates/detect_fields.rb +++ b/lib/templates/detect_fields.rb @@ -4,7 +4,16 @@ module Templates module DetectFields module_function - TextFieldBox = Struct.new(:x, :y, :w, :h, keyword_init: true) + TextFieldBox = Struct.new(:x, :y, :w, :h, keyword_init: true) do + def endy + @endy ||= y + h + end + + def endx + @endx ||= x + w + end + end + PageNode = Struct.new(:prev, :next, :elem, :page, :attachment_uuid, keyword_init: true) DATE_REGEXP = / @@ -49,6 +58,9 @@ module Templates \s*[:-]?\s*\z /ix + LINEBREAK = ["\n", "\r"].freeze + CHECBOXES = ['☐', '□'].freeze + # rubocop:disable Metrics, Style def call(io, attachment: nil, confidence: 0.3, temperature: 1, inference: Templates::ImageToFields, nms: 0.1, split_page: false, aspect_ratio: true, padding: 20, regexp_type: true, &) @@ -71,6 +83,8 @@ module Templates fields = inference.call(image, confidence:, nms:, split_page:, temperature:, aspect_ratio:, padding:) + fields = sort_fields(fields, y_threshold: 10.0 / image.height) + fields = fields.map do |f| { uuid: SecureRandom.uuid, @@ -113,6 +127,8 @@ module Templates text_fields = extract_text_fields_from_page(page) line_fields = extract_line_fields_from_page(page) + fields = sort_fields(fields, y_threshold: 10.0 / page.height) + fields = increase_confidence_for_overlapping_fields(fields, text_fields) fields = increase_confidence_for_overlapping_fields(fields, line_fields) @@ -153,6 +169,12 @@ module Templates doc.close end + def sort_fields(fields, y_threshold: 0.01) + fields.sort do |a, b| + (a.endy - b.endy).abs < y_threshold ? a.x <=> b.x : a.endy <=> b.endy + end + end + def print_debug(head_node) current_node = head_node index = 0 @@ -189,121 +211,68 @@ module Templates def build_page_nodes(page, fields, tail_node, attachment_uuid: nil) field_nodes = [] - current_text = ''.b - - text_nodes = page.text_nodes - text_idx = 0 - field_idx = 0 + y_theshold = 10.0 / page.height - while text_idx < text_nodes.length || field_idx < fields.length - text_node = text_nodes[text_idx] - field = fields[field_idx] + text_nodes = page.text_nodes - process_text_node = false - process_field_node = false + current_field = fields.shift - if text_node && field - text_y_center = text_node.y + (text_node.h / 2.0) - field_y_center = field.y + (field.h / 2.0) - y_threshold = text_node.h / 2.0 - vertical_distance = (text_y_center - field_y_center).abs + index = 0 - if vertical_distance < y_threshold - is_underscore = text_node.content == '_' - is_left_of_field = text_node.x < field.x + prev_node = nil - if is_underscore && is_left_of_field - text_x_end = text_node.x + text_node.w + loop do + node = text_nodes[index] - distance = field.x - text_x_end - proximity_threshold = text_node.w * 3.0 + break unless node - if distance < proximity_threshold - process_field_node = true - else - process_text_node = true - end + loop do + break unless current_field - elsif is_left_of_field - process_text_node = true - else - process_field_node = true - end + if ((current_field.endy - node.endy).abs < y_theshold && + (current_field.x <= node.x || node.content.in?(LINEBREAK))) || + current_field.endy < node.y + field_node = PageNode.new(prev: tail_node, elem: current_field, page: page.page_index, attachment_uuid:) + tail_node.next = field_node + tail_node = field_node + field_nodes << tail_node - elsif text_node.y < field.y - process_text_node = true + current_field = fields.shift else - process_field_node = true + break end - - elsif text_node - process_text_node = true - elsif field - process_field_node = true end - if process_field_node - unless current_text.empty? - new_text_node = PageNode.new(prev: tail_node, elem: current_text, page: page.page_index, attachment_uuid:) - tail_node.next = new_text_node - tail_node = new_text_node - current_text = ''.b - end + if tail_node.elem.is_a?(Templates::ImageToFields::Field) + text_node = PageNode.new(prev: tail_node, elem: ''.b, page: page.page_index, attachment_uuid:) + tail_node.next = text_node - new_field_node = PageNode.new(prev: tail_node, elem: field, page: page.page_index, attachment_uuid:) - tail_node.next = new_field_node - tail_node = new_field_node - - field_nodes << tail_node - - while text_idx < text_nodes.length - text_node_to_check = text_nodes[text_idx] - - is_part_of_field = false - - if text_node_to_check.content == '_' - check_y_center = text_node_to_check.y + (text_node_to_check.h / 2.0) - check_y_dist = (check_y_center - field_y_center).abs - check_y_thresh = text_node_to_check.h / 2.0 - - if check_y_dist < check_y_thresh - padding = text_node_to_check.w * 3.0 - field_x_start = field.x - padding - field_x_end = field.x + field.w + padding - text_x_start = text_node_to_check.x - text_x_end = text_node_to_check.x + text_node_to_check.w - - is_part_of_field = true if text_x_start <= field_x_end && field_x_start <= text_x_end - end - end - - break unless is_part_of_field + tail_node = text_node + end - text_idx += 1 - end + if prev_node && (node.endy - prev_node.endy) > y_theshold && LINEBREAK.exclude?(prev_node.content) + tail_node.elem << "\n" + end - field_idx += 1 - elsif process_text_node - if text_idx > 0 - prev_text_node = text_nodes[text_idx - 1] + if node.content != '_' || !tail_node.elem.ends_with?('___') + tail_node.elem << node.content unless CHECBOXES.include?(node.content) + end - x_gap = text_node.x - (prev_text_node.x + prev_text_node.w) + prev_node = node - gap_w = text_node.w > prev_text_node.w ? text_node.w : prev_text_node.w + index += 1 + end - current_text << ' ' if x_gap > gap_w * 2 - end + loop do + break unless current_field - current_text << text_node.content - text_idx += 1 - end - end + field_node = PageNode.new(prev: tail_node, elem: current_field, page: page.page_index, attachment_uuid:) + tail_node.next = field_node + tail_node = field_node + field_nodes << tail_node - unless current_text.empty? - new_text_node = PageNode.new(prev: tail_node, elem: current_text, page: page.page_index, attachment_uuid:) - tail_node.next = new_text_node - tail_node = new_text_node + current_field = fields.shift end [field_nodes, tail_node] @@ -399,8 +368,8 @@ module Templates x1 = node.x y1 = node.y - x2 = node.x + node.w - y2 = node.y + node.h + x2 = node.endx + y2 = node.endy underscore_count = 1 @@ -417,8 +386,9 @@ module Templates break if distance > 0.02 || height_diff > node.h * 0.5 underscore_count += 1 - next_x2 = next_node.x + next_node.w - next_y2 = next_node.y + next_node.h + + next_x2 = next_node.endx + next_y2 = next_node.endy x2 = next_x2 y2 = [y2, next_y2].max @@ -438,8 +408,8 @@ module Templates def calculate_iou(box1, box2) x1 = [box1.x, box2.x].max y1 = [box1.y, box2.y].max - x2 = [box1.x + box1.w, box2.x + box2.w].min - y2 = [box1.y + box1.h, box2.y + box2.h].min + x2 = [box1.endx, box2.endx].min + y2 = [box1.endy, box2.endy].min intersection_width = [0, x2 - x1].max intersection_height = [0, y2 - y1].max @@ -455,8 +425,7 @@ module Templates end def boxes_overlap?(box1, box2) - !(box1.x + box1.w < box2.x || box2.x + box2.w < box1.x || - box1.y + box1.h < box2.y || box2.y + box2.h < box1.y) + !(box1.endx < box2.x || box2.endx < box1.x || box1.endy < box2.y || box2.endy < box1.y) end def increase_confidence_for_overlapping_fields(image_fields, text_fields, by: 1.0) @@ -465,12 +434,10 @@ module Templates image_fields.map do |image_field| next if image_field.type != 'text' - field_bottom = image_field.y + image_field.h - text_fields.each do |text_field| - break if text_field.y > field_bottom + break if text_field.y > image_field.endy - next if text_field.y + text_field.h < image_field.y + next if text_field.endy < image_field.y next unless boxes_overlap?(image_field, text_field) next if calculate_iou(image_field, text_field) < 0.4 diff --git a/lib/templates/image_to_fields.rb b/lib/templates/image_to_fields.rb index 72c578a7..d9c04221 100755 --- a/lib/templates/image_to_fields.rb +++ b/lib/templates/image_to_fields.rb @@ -8,6 +8,10 @@ module Templates def endy @endy ||= y + h end + + def endx + @endx ||= x + w + end end MODEL_PATH = Rails.root.join('tmp/model.onnx') @@ -64,9 +68,7 @@ module Templates detections = apply_nms(detections, nms) - fields = build_fields_from_detections(detections, image) - - sort_fields(fields, y_threshold: 10.0 / image.height) + build_fields_from_detections(detections, image) end def build_split_image_regions(image) @@ -302,27 +304,6 @@ module Templates end end - def sort_fields(fields, y_threshold: 0.01) - sorted_fields = fields.sort { |a, b| a.endy == b.endy ? a.x <=> b.x : a.endy <=> b.endy } - - lines = [] - current_line = [] - - sorted_fields.each do |field| - if current_line.blank? || (field.y - current_line.first.y).abs < y_threshold - current_line << field - else - lines << current_line.sort_by(&:x) - - current_line = [field] - end - end - - lines << current_line.sort_by(&:x) if current_line.present? - - lines.flatten - end - def apply_nms(detections, threshold = 0.5) return detections if detections[:xyxy].shape[0].zero? From fa1ef44b2234a7f0e6b5d2d2fb6dcc5e11f3024f Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Sun, 30 Nov 2025 11:42:36 +0200 Subject: [PATCH 09/15] add tabs --- lib/templates/detect_fields.rb | 60 +++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/lib/templates/detect_fields.rb b/lib/templates/detect_fields.rb index 46c613e7..2741e1e2 100755 --- a/lib/templates/detect_fields.rb +++ b/lib/templates/detect_fields.rb @@ -212,7 +212,8 @@ module Templates def build_page_nodes(page, fields, tail_node, attachment_uuid: nil) field_nodes = [] - y_theshold = 10.0 / page.height + y_theshold = 4.0 / page.height + x_theshold = 30.0 / page.width text_nodes = page.text_nodes @@ -227,13 +228,45 @@ module Templates break unless node + if node.content.in?(LINEBREAK) + next_node = text_nodes[index] + + if next_node && (next_node.endy - node.endy) < y_theshold + index += 1 + + next + end + end + loop do break unless current_field if ((current_field.endy - node.endy).abs < y_theshold && (current_field.x <= node.x || node.content.in?(LINEBREAK))) || current_field.endy < node.y + if tail_node.elem.is_a?(Templates::ImageToFields::Field) + divider = + if (tail_node.elem.endy - current_field.endy).abs > y_theshold + "\n".b + elsif tail_node.elem.endx - current_field.x > x_theshold + "\t".b + else + ' '.b + end + + text_node = PageNode.new(prev: tail_node, elem: divider, page: page.page_index, attachment_uuid:) + tail_node.next = text_node + + tail_node = text_node + elsif prev_node && (prev_node.endy - current_field.endy).abs > y_theshold + text_node = PageNode.new(prev: tail_node, elem: "\n".b, page: page.page_index, attachment_uuid:) + tail_node.next = text_node + + tail_node = text_node + end + field_node = PageNode.new(prev: tail_node, elem: current_field, page: page.page_index, attachment_uuid:) + tail_node.next = field_node tail_node = field_node field_nodes << tail_node @@ -245,14 +278,24 @@ module Templates end if tail_node.elem.is_a?(Templates::ImageToFields::Field) + prev_field = tail_node.elem + text_node = PageNode.new(prev: tail_node, elem: ''.b, page: page.page_index, attachment_uuid:) tail_node.next = text_node tail_node = text_node - end - if prev_node && (node.endy - prev_node.endy) > y_theshold && LINEBREAK.exclude?(prev_node.content) - tail_node.elem << "\n" + if (node.endy - prev_field.endy).abs > y_theshold + tail_node.elem << "\n" + elsif (node.x - prev_field.endx) > x_theshold + tail_node.elem << "\t" + end + elsif prev_node + if (node.endy - prev_node.endy) > y_theshold && LINEBREAK.exclude?(prev_node.content) + tail_node.elem << "\n" + elsif (node.x - prev_node.endx) > x_theshold && !tail_node.elem.ends_with?("\t") + tail_node.elem << "\t" + end end if node.content != '_' || !tail_node.elem.ends_with?('___') @@ -275,6 +318,15 @@ module Templates current_field = fields.shift end + if tail_node.elem.is_a?(Templates::ImageToFields::Field) + text_node = PageNode.new(prev: tail_node, elem: "\n".b, page: page.page_index, attachment_uuid:) + tail_node.next = text_node + + tail_node = text_node + else + tail_node.elem << "\n" + end + [field_nodes, tail_node] end From d2a0937038a6144082a5b4c9d228b29f5d9590ea Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Sun, 30 Nov 2025 14:37:05 +0200 Subject: [PATCH 10/15] fix pdf text emoji --- lib/pdfium.rb | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/lib/pdfium.rb b/lib/pdfium.rb index 6a5874e7..a20e41c7 100644 --- a/lib/pdfium.rb +++ b/lib/pdfium.rb @@ -457,22 +457,40 @@ class Pdfium origin_x_ptr = FFI::MemoryPointer.new(:double) origin_y_ptr = FFI::MemoryPointer.new(:double) - char_count.times do |i| - char = Pdfium.FPDFText_GetUnicode(text_page, i).chr(Encoding::UTF_8) + i = 0 - result = Pdfium.FPDFText_GetCharBox(text_page, i, left_ptr, right_ptr, bottom_ptr, top_ptr) + loop do + break unless i < char_count + + box_index = i + + codepoint = Pdfium.FPDFText_GetUnicode(text_page, i) + + if codepoint.between?(0xD800, 0xDBFF) && (i + 1 < char_count) + codepoint2 = Pdfium.FPDFText_GetUnicode(text_page, i + 1) + + if codepoint2.between?(0xDC00, 0xDFFF) + codepoint = 0x10000 + ((codepoint - 0xD800) << 10) + (codepoint2 - 0xDC00) + + i += 1 + end + end + + char = codepoint.chr(Encoding::UTF_8) + + result = Pdfium.FPDFText_GetCharBox(text_page, box_index, left_ptr, right_ptr, bottom_ptr, top_ptr) next if result.zero? left = left_ptr.read_double right = right_ptr.read_double - Pdfium.FPDFText_GetCharOrigin(text_page, i, origin_x_ptr, origin_y_ptr) + Pdfium.FPDFText_GetCharOrigin(text_page, box_index, origin_x_ptr, origin_y_ptr) origin_y = origin_y_ptr.read_double origin_x = origin_x_ptr.read_double - font_size = Pdfium.FPDFText_GetFontSize(text_page, i) + font_size = Pdfium.FPDFText_GetFontSize(text_page, box_index) font_size = 8 if font_size == 1 abs_x = left @@ -486,6 +504,8 @@ class Pdfium node_height = abs_height / height @text_nodes << TextNode.new(char, x, y, node_width, node_height) + ensure + i += 1 end y_threshold = 4.0 / width From dfab4c331cf0b2432283d45820725ac441cc00d9 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Sun, 30 Nov 2025 16:00:13 +0200 Subject: [PATCH 11/15] send confirmation --- app/controllers/profile_controller.rb | 2 ++ app/controllers/users_controller.rb | 2 ++ app/jobs/send_confirmation_instructions_job.rb | 11 +++++++++++ 3 files changed, 15 insertions(+) create mode 100644 app/jobs/send_confirmation_instructions_job.rb diff --git a/app/controllers/profile_controller.rb b/app/controllers/profile_controller.rb index 878831d5..1bbdb14a 100644 --- a/app/controllers/profile_controller.rb +++ b/app/controllers/profile_controller.rb @@ -10,6 +10,8 @@ class ProfileController < ApplicationController def update_contact if current_user.update(contact_params) if current_user.try(:pending_reconfirmation?) && current_user.previous_changes.key?(:unconfirmed_email) + SendConfirmationInstructionsJob.perform_async('user_id' => current_user.id) + redirect_to settings_profile_index_path, notice: I18n.t('a_confirmation_email_has_been_sent_to_the_new_email_address') else diff --git a/app/controllers/users_controller.rb b/app/controllers/users_controller.rb index 9ab9ca68..51025dd1 100644 --- a/app/controllers/users_controller.rb +++ b/app/controllers/users_controller.rb @@ -66,6 +66,8 @@ class UsersController < ApplicationController if @user.update(attrs.except(*(current_user == @user ? %i[password otp_required_for_login role] : %i[password]))) if @user.try(:pending_reconfirmation?) && @user.previous_changes.key?(:unconfirmed_email) + SendConfirmationInstructionsJob.perform_async('user_id' => @user.id) + redirect_back fallback_location: settings_users_path, notice: I18n.t('a_confirmation_email_has_been_sent_to_the_new_email_address') else diff --git a/app/jobs/send_confirmation_instructions_job.rb b/app/jobs/send_confirmation_instructions_job.rb new file mode 100644 index 00000000..7abae742 --- /dev/null +++ b/app/jobs/send_confirmation_instructions_job.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class SendConfirmationInstructionsJob + include Sidekiq::Job + + def perform(params = {}) + user = User.find(params['user_id']) + + user.send_confirmation_instructions + end +end From f4552dbaa8d32ee35bf718730e50c223b6989967 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Sun, 30 Nov 2025 16:30:56 +0200 Subject: [PATCH 12/15] skip reconfirmation --- app/controllers/accounts_controller.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/controllers/accounts_controller.rb b/app/controllers/accounts_controller.rb index 0cd12d52..b24c6b45 100644 --- a/app/controllers/accounts_controller.rb +++ b/app/controllers/accounts_controller.rb @@ -46,6 +46,7 @@ class AccountsController < ApplicationController def destroy authorize!(:manage, current_account) + true_user.skip_reconfirmation! true_user.update!(locked_at: Time.current, email: true_user.email.sub('@', '+removed@')) true_user.account.update!(archived_at: Time.current) From 53fae56c5180be595afa08f896b8e5a8339046ab Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Mon, 1 Dec 2025 10:09:27 +0200 Subject: [PATCH 13/15] fix regexp field types --- lib/templates/detect_fields.rb | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/templates/detect_fields.rb b/lib/templates/detect_fields.rb index 2741e1e2..e68f613d 100755 --- a/lib/templates/detect_fields.rb +++ b/lib/templates/detect_fields.rb @@ -21,8 +21,7 @@ module Templates date | signed\sat | datum - ) - \s*[:-]?\s*\z + )[:_\s-]*\z /ix NUMBER_REGEXP = / @@ -40,8 +39,7 @@ module Templates | menge | anzahl | stückzahl - ) - \s*[:-]?\s*\z + )[:_\s-]*\z /ix SIGNATURE_REGEXP = / @@ -54,8 +52,7 @@ module Templates | unterschrift | unterschreiben | unterzeichnen - ) - \s*[:-]?\s*\z + )[:_\s-]*\z /ix LINEBREAK = ["\n", "\r"].freeze From 3519b0e009692cf913aa7d179e3190db0e5608f7 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Mon, 1 Dec 2025 10:15:23 +0200 Subject: [PATCH 14/15] fix typo --- lib/templates/detect_fields.rb | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/templates/detect_fields.rb b/lib/templates/detect_fields.rb index e68f613d..c55358ad 100755 --- a/lib/templates/detect_fields.rb +++ b/lib/templates/detect_fields.rb @@ -56,7 +56,7 @@ module Templates /ix LINEBREAK = ["\n", "\r"].freeze - CHECBOXES = ['☐', '□'].freeze + CHECKBOXES = ['☐', '□'].freeze # rubocop:disable Metrics, Style def call(io, attachment: nil, confidence: 0.3, temperature: 1, inference: Templates::ImageToFields, @@ -209,8 +209,8 @@ module Templates def build_page_nodes(page, fields, tail_node, attachment_uuid: nil) field_nodes = [] - y_theshold = 4.0 / page.height - x_theshold = 30.0 / page.width + y_threshold = 4.0 / page.height + x_threshold = 30.0 / page.width text_nodes = page.text_nodes @@ -228,7 +228,7 @@ module Templates if node.content.in?(LINEBREAK) next_node = text_nodes[index] - if next_node && (next_node.endy - node.endy) < y_theshold + if next_node && (next_node.endy - node.endy) < y_threshold index += 1 next @@ -238,14 +238,14 @@ module Templates loop do break unless current_field - if ((current_field.endy - node.endy).abs < y_theshold && + if ((current_field.endy - node.endy).abs < y_threshold && (current_field.x <= node.x || node.content.in?(LINEBREAK))) || current_field.endy < node.y if tail_node.elem.is_a?(Templates::ImageToFields::Field) divider = - if (tail_node.elem.endy - current_field.endy).abs > y_theshold + if (tail_node.elem.endy - current_field.endy).abs > y_threshold "\n".b - elsif tail_node.elem.endx - current_field.x > x_theshold + elsif tail_node.elem.endx - current_field.x > x_threshold "\t".b else ' '.b @@ -255,7 +255,7 @@ module Templates tail_node.next = text_node tail_node = text_node - elsif prev_node && (prev_node.endy - current_field.endy).abs > y_theshold + elsif prev_node && (prev_node.endy - current_field.endy).abs > y_threshold text_node = PageNode.new(prev: tail_node, elem: "\n".b, page: page.page_index, attachment_uuid:) tail_node.next = text_node @@ -282,21 +282,21 @@ module Templates tail_node = text_node - if (node.endy - prev_field.endy).abs > y_theshold + if (node.endy - prev_field.endy).abs > y_threshold tail_node.elem << "\n" - elsif (node.x - prev_field.endx) > x_theshold + elsif (node.x - prev_field.endx) > x_threshold tail_node.elem << "\t" end elsif prev_node - if (node.endy - prev_node.endy) > y_theshold && LINEBREAK.exclude?(prev_node.content) + if (node.endy - prev_node.endy) > y_threshold && LINEBREAK.exclude?(prev_node.content) tail_node.elem << "\n" - elsif (node.x - prev_node.endx) > x_theshold && !tail_node.elem.ends_with?("\t") + elsif (node.x - prev_node.endx) > x_threshold && !tail_node.elem.ends_with?("\t") tail_node.elem << "\t" end end if node.content != '_' || !tail_node.elem.ends_with?('___') - tail_node.elem << node.content unless CHECBOXES.include?(node.content) + tail_node.elem << node.content unless CHECKBOXES.include?(node.content) end prev_node = node From 840162c9cab5515cca76089a83e983266e840e8f Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Mon, 1 Dec 2025 10:37:58 +0200 Subject: [PATCH 15/15] fix sort --- lib/templates/detect_fields.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/templates/detect_fields.rb b/lib/templates/detect_fields.rb index c55358ad..2ade0efd 100755 --- a/lib/templates/detect_fields.rb +++ b/lib/templates/detect_fields.rb @@ -124,7 +124,7 @@ module Templates text_fields = extract_text_fields_from_page(page) line_fields = extract_line_fields_from_page(page) - fields = sort_fields(fields, y_threshold: 10.0 / page.height) + fields = sort_fields(fields, y_threshold: 10.0 / image.height) fields = increase_confidence_for_overlapping_fields(fields, text_fields) fields = increase_confidence_for_overlapping_fields(fields, line_fields)