<%= f.button button_title(title: t('update'), disabled_with: t('updating')), class: 'base-button' %>
diff --git a/app/views/users/_form.html.erb b/app/views/users/_form.html.erb
index 5ceaeabc..c7652916 100644
--- a/app/views/users/_form.html.erb
+++ b/app/views/users/_form.html.erb
@@ -13,6 +13,11 @@
<%= f.label :email, t('email'), class: 'label' %>
<%= f.email_field :email, required: true, class: 'base-input' %>
+ <% if user.try(:pending_reconfirmation?) %>
+
+ <% end %>
<% if user.persisted? && Accounts.can_send_emails?(current_account) %>
<%= t('click_here_to_send_a_reset_password_email_html') %>
diff --git a/app/views/users/index.html.erb b/app/views/users/index.html.erb
index a0849cec..e49a8563 100644
--- a/app/views/users/index.html.erb
+++ b/app/views/users/index.html.erb
@@ -52,7 +52,13 @@
<%= user.full_name %>
- <%= user.email %>
+ <% if user.try(:pending_reconfirmation?) %>
+ <%= user.unconfirmed_email %>
+
+ (<%= t('unconfirmed') %>)
+ <% else %>
+ <%= user.email %>
+ <% end %>
|
diff --git a/config/initializers/devise.rb b/config/initializers/devise.rb
index d7b80f1a..1329ce87 100644
--- a/config/initializers/devise.rb
+++ b/config/initializers/devise.rb
@@ -31,6 +31,7 @@ end
#
# Use this hook to configure devise mailer, warden hooks and so forth.
# Many of these configuration options can be set straight in your model.
+# rubocop:disable Metrics/BlockLength
Devise.setup do |config|
config.warden do |manager|
manager.default_strategies(scope: :user).unshift(:two_factor_authenticatable)
@@ -166,7 +167,7 @@ Devise.setup do |config|
# without confirming their account.
# Default is 0.days, meaning the user cannot access the website without
# confirming their account.
- # config.allow_unconfirmed_access_for = 2.days
+ config.allow_unconfirmed_access_for = nil
# A period that the user is allowed to confirm their account before their
# token becomes invalid. For example, if set to 3.days, the user can confirm
@@ -332,3 +333,4 @@ Devise.setup do |config|
ActiveSupport.run_load_hooks(:devise_config, config)
end
+# rubocop:enable Metrics/BlockLength
diff --git a/config/locales/i18n.yml b/config/locales/i18n.yml
index b9784c04..0d47623a 100644
--- a/config/locales/i18n.yml
+++ b/config/locales/i18n.yml
@@ -71,7 +71,7 @@ en: &en
team_access: Team access
document_download_filename_format: Document download filename format
docuseal_trusted_signature: DocuSeal Trusted Signature
- hello_name: Hello %{name}
+ hello_name: Hi %{name}
you_are_invited_to_product_name: You are invited to %{product_name}
you_have_been_invited_to_account_name_product_name_please_sign_up_using_the_link_below_: 'You have been invited to %{account_name} %{product_name}. Please sign up using the link below:'
sent_using_product_name_in_testing_mode_html: 'Sent using %{product_name} in testing mode'
@@ -861,6 +861,19 @@ en: &en
reports: Reports
completed_submissions: Completed submissions
sms: SMS
+ a_confirmation_email_has_been_sent_to_the_new_email_address: A confirmation email has been sent to the new email address.
+ email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} email address is awaiting confirmation. Follow the link in the email to confirm."
+ please_confirm_your_email_address_using_the_link_below_: 'Please confirm your email address using the link below:'
+ confirm_email: Confirm email
+ unconfirmed: Unconfirmed
+ devise:
+ confirmations:
+ confirmed: Your email address has been successfully confirmed.
+ failure:
+ unconfirmed: You have to confirm your email address before continuing.
+ mailer:
+ confirmation_instructions:
+ subject: Confirm your email address
submission_sources:
api: API
bulk: Bulk Send
@@ -1808,6 +1821,19 @@ es: &es
reports: Informes
completed_submissions: Envíos completados
sms: SMS
+ a_confirmation_email_has_been_sent_to_the_new_email_address: Se ha enviado un correo electrónico de confirmación a la nueva dirección de correo electrónico.
+ email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} está pendiente de confirmación. Sigue el enlace en el correo para confirmarla."
+ please_confirm_your_email_address_using_the_link_below_: 'Por favor, confirma tu dirección de correo electrónico utilizando el enlace a continuación:'
+ confirm_email: Confirmar correo
+ unconfirmed: No confirmado
+ devise:
+ confirmations:
+ confirmed: Tu dirección de correo electrónico ha sido confirmada correctamente.
+ failure:
+ unconfirmed: Debes confirmar tu dirección de correo electrónico antes de continuar.
+ mailer:
+ confirmation_instructions:
+ subject: Confirma tu dirección de correo electrónico
submission_sources:
api: API
bulk: Envío masivo
@@ -2756,6 +2782,19 @@ it: &it
reports: Rapporti
completed_submissions: Invii completati
sms: SMS
+ a_confirmation_email_has_been_sent_to_the_new_email_address: È stata inviata un'email di conferma al nuovo indirizzo email.
+ email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} è in attesa di conferma. Segui il link nell'email per confermare."
+ please_confirm_your_email_address_using_the_link_below_: 'Conferma il tuo indirizzo email utilizzando il link qui sotto:'
+ confirm_email: Conferma email
+ unconfirmed: Non confermato
+ devise:
+ confirmations:
+ confirmed: Il tuo indirizzo email è stato confermato con successo.
+ failure:
+ unconfirmed: Devi confermare il tuo indirizzo email prima di continuare.
+ mailer:
+ confirmation_instructions:
+ subject: Conferma il tuo indirizzo email
submission_sources:
api: API
bulk: Invio massivo
@@ -3700,6 +3739,19 @@ fr: &fr
reports: Rapports
completed_submissions: Soumissions terminées
sms: SMS
+ a_confirmation_email_has_been_sent_to_the_new_email_address: Un e-mail de confirmation a été envoyé à la nouvelle adresse e-mail.
+ email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} est en attente de confirmation. Suivez le lien dans l'e-mail pour la confirmer."
+ please_confirm_your_email_address_using_the_link_below_: 'Veuillez confirmer votre adresse e-mail en utilisant le lien ci-dessous :'
+ confirm_email: "Confirmer l'e-mail"
+ unconfirmed: Non confirmé
+ devise:
+ confirmations:
+ confirmed: Votre adresse e-mail a été confirmée avec succès.
+ failure:
+ unconfirmed: Vous devez confirmer votre adresse e-mail avant de continuer.
+ mailer:
+ confirmation_instructions:
+ subject: Confirmez votre adresse e-mail
submission_sources:
api: API
bulk: Envoi en masse
@@ -4647,6 +4699,19 @@ pt: &pt
reports: Relatórios
completed_submissions: Envios concluídos
sms: SMS
+ a_confirmation_email_has_been_sent_to_the_new_email_address: Um e-mail de confirmação foi enviado para o novo endereço de e-mail.
+ email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} está aguardando confirmação. Siga o link enviado para esse endereço de e-mail para confirmar."
+ please_confirm_your_email_address_using_the_link_below_: 'Por favor, confirme seu endereço de e-mail usando o link abaixo:'
+ confirm_email: Confirmar e-mail
+ unconfirmed: Não confirmado
+ devise:
+ confirmations:
+ confirmed: Seu endereço de e-mail foi confirmado com sucesso.
+ failure:
+ unconfirmed: Você deve confirmar seu endereço de e-mail antes de continuar.
+ mailer:
+ confirmation_instructions:
+ subject: Confirme seu endereço de e-mail
submission_sources:
api: API
bulk: Envio em massa
@@ -5594,6 +5659,19 @@ de: &de
reports: Berichte
completed_submissions: Abgeschlossene Übermittlungen
sms: SMS
+ a_confirmation_email_has_been_sent_to_the_new_email_address: Eine Bestätigungs-E-Mail wurde an die neue E-Mail-Adresse gesendet.
+ email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} wartet auf Bestätigung. Folgen Sie dem Link in der E-Mail, um sie zu bestätigen."
+ please_confirm_your_email_address_using_the_link_below_: 'Bitte bestätigen Sie Ihre E-Mail-Adresse über den folgenden Link:'
+ confirm_email: E-Mail bestätigen
+ unconfirmed: Unbestätigt
+ devise:
+ confirmations:
+ confirmed: Ihre E-Mail-Adresse wurde erfolgreich bestätigt.
+ failure:
+ unconfirmed: Sie müssen Ihre E-Mail-Adresse bestätigen, bevor Sie fortfahren.
+ mailer:
+ confirmation_instructions:
+ subject: Bestätigen Sie Ihre E-Mail-Adresse
submission_sources:
api: API
bulk: Massenversand
@@ -6902,6 +6980,19 @@ nl: &nl
reports: Rapporten
completed_submissions: Voltooide inzendingen
sms: SMS
+ a_confirmation_email_has_been_sent_to_the_new_email_address: Er is een bevestigingsmail verzonden naar het nieuwe e-mailadres.
+ email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} wacht op bevestiging. Volg de link in de e-mail om te bevestigen."
+ please_confirm_your_email_address_using_the_link_below_: 'Bevestig je e-mailadres via de onderstaande link:'
+ confirm_email: E-mailadres bevestigen
+ unconfirmed: Onbevestigd
+ devise:
+ confirmations:
+ confirmed: Je e-mailadres is succesvol bevestigd.
+ failure:
+ unconfirmed: Je moet je e-mailadres bevestigen voordat je verdergaat.
+ mailer:
+ confirmation_instructions:
+ subject: Bevestig je e-mailadres
submission_sources:
api: API
bulk: Bulkverzending
diff --git a/config/routes.rb b/config/routes.rb
index 05abbfc4..0ccbfdfa 100644
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -14,13 +14,8 @@ Rails.application.routes.draw do
get 'up' => 'rails/health#show'
get 'manifest' => 'pwa#manifest'
- devise_for :users,
- path: '/', only: %i[sessions passwords omniauth_callbacks],
- controllers: begin
- options = { sessions: 'sessions', passwords: 'passwords' }
- options[:omniauth_callbacks] = 'omniauth_callbacks' if User.devise_modules.include?(:omniauthable)
- options
- end
+ devise_for :users, path: '/', only: %i[sessions passwords],
+ controllers: { sessions: 'sessions', passwords: 'passwords' }
devise_scope :user do
resource :invitation, only: %i[update] do
diff --git a/db/migrate/20251125194305_add_confirmable_to_users.rb b/db/migrate/20251125194305_add_confirmable_to_users.rb
new file mode 100644
index 00000000..ca946854
--- /dev/null
+++ b/db/migrate/20251125194305_add_confirmable_to_users.rb
@@ -0,0 +1,10 @@
+# frozen_string_literal: true
+
+class AddConfirmableToUsers < ActiveRecord::Migration[8.0]
+ def change
+ add_column :users, :confirmation_token, :string
+ add_column :users, :confirmed_at, :datetime
+ add_column :users, :confirmation_sent_at, :datetime
+ add_column :users, :unconfirmed_email, :string
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 5ceb1f31..ea115d78 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema[8.0].define(version: 2025_11_21_113910) do
+ActiveRecord::Schema[8.0].define(version: 2025_11_25_194305) do
# These are extensions that must be enabled in order to support this database
enable_extension "btree_gin"
enable_extension "plpgsql"
@@ -451,6 +451,10 @@ ActiveRecord::Schema[8.0].define(version: 2025_11_21_113910) do
t.string "otp_secret"
t.integer "consumed_timestep"
t.boolean "otp_required_for_login", default: false, null: false
+ t.string "confirmation_token"
+ t.datetime "confirmed_at"
+ t.datetime "confirmation_sent_at"
+ t.string "unconfirmed_email"
t.index ["account_id"], name: "index_users_on_account_id"
t.index ["email"], name: "index_users_on_email", unique: true
t.index ["reset_password_token"], name: "index_users_on_reset_password_token", unique: true
diff --git a/lib/pdfium.rb b/lib/pdfium.rb
index 464f95e2..a20e41c7 100644
--- a/lib/pdfium.rb
+++ b/lib/pdfium.rb
@@ -39,8 +39,25 @@ class Pdfium
FPDF_RENDER_FORCEHALFTONE = 0x400
FPDF_PRINTING = 0x800
- TextNode = Struct.new(:content, :x, :y, :w, :h, keyword_init: true)
- LineNode = Struct.new(:x, :y, :w, :h, :tilt, keyword_init: true)
+ TextNode = Struct.new(:content, :x, :y, :w, :h) do
+ def endx
+ @endx ||= x + w
+ end
+
+ def endy
+ @endy ||= y + h
+ end
+ end
+
+ LineNode = Struct.new(:x, :y, :w, :h, :tilt) do
+ def endy
+ @endy ||= y + h
+ end
+
+ def endx
+ @endx ||= x + w
+ end
+ end
# rubocop:disable Naming/ClassAndModuleCamelCase
class FPDF_LIBRARY_CONFIG < FFI::Struct
@@ -433,31 +450,47 @@ class Pdfium
return @text_nodes if char_count.zero?
- char_count.times do |i|
- unicode = Pdfium.FPDFText_GetUnicode(text_page, i)
+ left_ptr = FFI::MemoryPointer.new(:double)
+ right_ptr = FFI::MemoryPointer.new(:double)
+ bottom_ptr = FFI::MemoryPointer.new(:double)
+ top_ptr = FFI::MemoryPointer.new(:double)
+ origin_x_ptr = FFI::MemoryPointer.new(:double)
+ origin_y_ptr = FFI::MemoryPointer.new(:double)
+
+ i = 0
- char = [unicode].pack('U*')
+ loop do
+ break unless i < char_count
- left_ptr = FFI::MemoryPointer.new(:double)
- right_ptr = FFI::MemoryPointer.new(:double)
- bottom_ptr = FFI::MemoryPointer.new(:double)
- top_ptr = FFI::MemoryPointer.new(:double)
+ box_index = i
- result = Pdfium.FPDFText_GetCharBox(text_page, i, left_ptr, right_ptr, bottom_ptr, top_ptr)
+ codepoint = Pdfium.FPDFText_GetUnicode(text_page, i)
+
+ if codepoint.between?(0xD800, 0xDBFF) && (i + 1 < char_count)
+ codepoint2 = Pdfium.FPDFText_GetUnicode(text_page, i + 1)
+
+ if codepoint2.between?(0xDC00, 0xDFFF)
+ codepoint = 0x10000 + ((codepoint - 0xD800) << 10) + (codepoint2 - 0xDC00)
+
+ i += 1
+ end
+ end
+
+ char = codepoint.chr(Encoding::UTF_8)
+
+ result = Pdfium.FPDFText_GetCharBox(text_page, box_index, left_ptr, right_ptr, bottom_ptr, top_ptr)
next if result.zero?
left = left_ptr.read_double
right = right_ptr.read_double
- origin_x_ptr = FFI::MemoryPointer.new(:double)
- origin_y_ptr = FFI::MemoryPointer.new(:double)
-
- Pdfium.FPDFText_GetCharOrigin(text_page, i, origin_x_ptr, origin_y_ptr)
+ Pdfium.FPDFText_GetCharOrigin(text_page, box_index, origin_x_ptr, origin_y_ptr)
origin_y = origin_y_ptr.read_double
+ origin_x = origin_x_ptr.read_double
- font_size = Pdfium.FPDFText_GetFontSize(text_page, i)
+ font_size = Pdfium.FPDFText_GetFontSize(text_page, box_index)
font_size = 8 if font_size == 1
abs_x = left
@@ -465,15 +498,21 @@ class Pdfium
abs_width = right - left
abs_height = font_size
- x = abs_x / width
+ x = origin_x / width
y = abs_y / height
- node_width = abs_width / width
+ node_width = (abs_width + ((abs_x - origin_x).abs * 2)) / width
node_height = abs_height / height
- @text_nodes << TextNode.new(content: char, x: x, y: y, w: node_width, h: node_height)
+ @text_nodes << TextNode.new(char, x, y, node_width, node_height)
+ ensure
+ i += 1
end
- @text_nodes = @text_nodes.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y }
+ y_threshold = 4.0 / width
+
+ @text_nodes = @text_nodes.sort do |a, b|
+ (a.endy - b.endy).abs < y_threshold ? a.x <=> b.x : a.endy <=> b.endy
+ end
ensure
Pdfium.FPDFText_ClosePage(text_page) if text_page && !text_page.null?
end
@@ -539,10 +578,10 @@ class Pdfium
norm_w = w / width
norm_h = h / height
- @line_nodes << LineNode.new(x: norm_x, y: norm_y, w: norm_w, h: norm_h, tilt: tilt)
+ @line_nodes << LineNode.new(norm_x, norm_y, norm_w, norm_h, tilt)
end
- @line_nodes = @line_nodes.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y }
+ @line_nodes = @line_nodes.sort { |a, b| a.endy == b.endy ? a.x <=> b.x : a.endy <=> b.endy }
end
def close
diff --git a/lib/templates/detect_fields.rb b/lib/templates/detect_fields.rb
index 1f3b5c0c..2ade0efd 100755
--- a/lib/templates/detect_fields.rb
+++ b/lib/templates/detect_fields.rb
@@ -4,7 +4,16 @@ module Templates
module DetectFields
module_function
- TextFieldBox = Struct.new(:x, :y, :w, :h, keyword_init: true)
+ TextFieldBox = Struct.new(:x, :y, :w, :h, keyword_init: true) do
+ def endy
+ @endy ||= y + h
+ end
+
+ def endx
+ @endx ||= x + w
+ end
+ end
+
PageNode = Struct.new(:prev, :next, :elem, :page, :attachment_uuid, keyword_init: true)
DATE_REGEXP = /
@@ -12,8 +21,7 @@ module Templates
date
| signed\sat
| datum
- )
- \s*[:-]?\s*\z
+ )[:_\s-]*\z
/ix
NUMBER_REGEXP = /
@@ -31,8 +39,7 @@ module Templates
| menge
| anzahl
| stückzahl
- )
- \s*[:-]?\s*\z
+ )[:_\s-]*\z
/ix
SIGNATURE_REGEXP = /
@@ -45,10 +52,12 @@ module Templates
| unterschrift
| unterschreiben
| unterzeichnen
- )
- \s*[:-]?\s*\z
+ )[:_\s-]*\z
/ix
+ LINEBREAK = ["\n", "\r"].freeze
+ CHECKBOXES = ['☐', '□'].freeze
+
# rubocop:disable Metrics, Style
def call(io, attachment: nil, confidence: 0.3, temperature: 1, inference: Templates::ImageToFields,
nms: 0.1, split_page: false, aspect_ratio: true, padding: 20, regexp_type: true, &)
@@ -71,11 +80,13 @@ module Templates
fields = inference.call(image, confidence:, nms:, split_page:,
temperature:, aspect_ratio:, padding:)
+ fields = sort_fields(fields, y_threshold: 10.0 / image.height)
+
fields = fields.map do |f|
{
uuid: SecureRandom.uuid,
type: f.type,
- required: f.type != 'checkbox',
+ required: f.type == 'signature',
preferences: {},
areas: [{
x: f.x,
@@ -113,6 +124,8 @@ module Templates
text_fields = extract_text_fields_from_page(page)
line_fields = extract_line_fields_from_page(page)
+ fields = sort_fields(fields, y_threshold: 10.0 / image.height)
+
fields = increase_confidence_for_overlapping_fields(fields, text_fields)
fields = increase_confidence_for_overlapping_fields(fields, line_fields)
@@ -128,7 +141,7 @@ module Templates
{
uuid: SecureRandom.uuid,
type:,
- required: type != 'checkbox',
+ required: type == 'signature',
preferences: {},
areas: [{
x: field.x, y: field.y,
@@ -153,6 +166,12 @@ module Templates
doc.close
end
+ def sort_fields(fields, y_threshold: 0.01)
+ fields.sort do |a, b|
+ (a.endy - b.endy).abs < y_threshold ? a.x <=> b.x : a.endy <=> b.endy
+ end
+ end
+
def print_debug(head_node)
current_node = head_node
index = 0
@@ -189,121 +208,120 @@ module Templates
def build_page_nodes(page, fields, tail_node, attachment_uuid: nil)
field_nodes = []
- current_text = ''.b
-
- text_nodes = page.text_nodes
- text_idx = 0
- field_idx = 0
+ y_threshold = 4.0 / page.height
+ x_threshold = 30.0 / page.width
- while text_idx < text_nodes.length || field_idx < fields.length
- text_node = text_nodes[text_idx]
- field = fields[field_idx]
+ text_nodes = page.text_nodes
- process_text_node = false
- process_field_node = false
+ current_field = fields.shift
- if text_node && field
- text_y_center = text_node.y + (text_node.h / 2.0)
- field_y_center = field.y + (field.h / 2.0)
- y_threshold = text_node.h / 2.0
- vertical_distance = (text_y_center - field_y_center).abs
+ index = 0
- if vertical_distance < y_threshold
- is_underscore = text_node.content == '_'
- is_left_of_field = text_node.x < field.x
+ prev_node = nil
- if is_underscore && is_left_of_field
- text_x_end = text_node.x + text_node.w
+ loop do
+ node = text_nodes[index]
- distance = field.x - text_x_end
- proximity_threshold = text_node.w * 3.0
+ break unless node
- if distance < proximity_threshold
- process_field_node = true
- else
- process_text_node = true
- end
+ if node.content.in?(LINEBREAK)
+ next_node = text_nodes[index]
- elsif is_left_of_field
- process_text_node = true
- else
- process_field_node = true
- end
+ if next_node && (next_node.endy - node.endy) < y_threshold
+ index += 1
- elsif text_node.y < field.y
- process_text_node = true
- else
- process_field_node = true
+ next
end
-
- elsif text_node
- process_text_node = true
- elsif field
- process_field_node = true
end
- if process_field_node
- unless current_text.empty?
- new_text_node = PageNode.new(prev: tail_node, elem: current_text, page: page.page_index, attachment_uuid:)
- tail_node.next = new_text_node
- tail_node = new_text_node
- current_text = ''.b
- end
-
- new_field_node = PageNode.new(prev: tail_node, elem: field, page: page.page_index, attachment_uuid:)
- tail_node.next = new_field_node
- tail_node = new_field_node
-
- field_nodes << tail_node
+ loop do
+ break unless current_field
+
+ if ((current_field.endy - node.endy).abs < y_threshold &&
+ (current_field.x <= node.x || node.content.in?(LINEBREAK))) ||
+ current_field.endy < node.y
+ if tail_node.elem.is_a?(Templates::ImageToFields::Field)
+ divider =
+ if (tail_node.elem.endy - current_field.endy).abs > y_threshold
+ "\n".b
+ elsif tail_node.elem.endx - current_field.x > x_threshold
+ "\t".b
+ else
+ ' '.b
+ end
+
+ text_node = PageNode.new(prev: tail_node, elem: divider, page: page.page_index, attachment_uuid:)
+ tail_node.next = text_node
+
+ tail_node = text_node
+ elsif prev_node && (prev_node.endy - current_field.endy).abs > y_threshold
+ text_node = PageNode.new(prev: tail_node, elem: "\n".b, page: page.page_index, attachment_uuid:)
+ tail_node.next = text_node
+
+ tail_node = text_node
+ end
- while text_idx < text_nodes.length
- text_node_to_check = text_nodes[text_idx]
+ field_node = PageNode.new(prev: tail_node, elem: current_field, page: page.page_index, attachment_uuid:)
- is_part_of_field = false
+ tail_node.next = field_node
+ tail_node = field_node
+ field_nodes << tail_node
- if text_node_to_check.content == '_'
- check_y_center = text_node_to_check.y + (text_node_to_check.h / 2.0)
- check_y_dist = (check_y_center - field_y_center).abs
- check_y_thresh = text_node_to_check.h / 2.0
+ current_field = fields.shift
+ else
+ break
+ end
+ end
- if check_y_dist < check_y_thresh
- padding = text_node_to_check.w * 3.0
- field_x_start = field.x - padding
- field_x_end = field.x + field.w + padding
- text_x_start = text_node_to_check.x
- text_x_end = text_node_to_check.x + text_node_to_check.w
+ if tail_node.elem.is_a?(Templates::ImageToFields::Field)
+ prev_field = tail_node.elem
- is_part_of_field = true if text_x_start <= field_x_end && field_x_start <= text_x_end
- end
- end
+ text_node = PageNode.new(prev: tail_node, elem: ''.b, page: page.page_index, attachment_uuid:)
+ tail_node.next = text_node
- break unless is_part_of_field
+ tail_node = text_node
- text_idx += 1
+ if (node.endy - prev_field.endy).abs > y_threshold
+ tail_node.elem << "\n"
+ elsif (node.x - prev_field.endx) > x_threshold
+ tail_node.elem << "\t"
end
+ elsif prev_node
+ if (node.endy - prev_node.endy) > y_threshold && LINEBREAK.exclude?(prev_node.content)
+ tail_node.elem << "\n"
+ elsif (node.x - prev_node.endx) > x_threshold && !tail_node.elem.ends_with?("\t")
+ tail_node.elem << "\t"
+ end
+ end
- field_idx += 1
- elsif process_text_node
- if text_idx > 0
- prev_text_node = text_nodes[text_idx - 1]
+ if node.content != '_' || !tail_node.elem.ends_with?('___')
+ tail_node.elem << node.content unless CHECKBOXES.include?(node.content)
+ end
- x_gap = text_node.x - (prev_text_node.x + prev_text_node.w)
+ prev_node = node
- gap_w = text_node.w > prev_text_node.w ? text_node.w : prev_text_node.w
+ index += 1
+ end
- current_text << ' ' if x_gap > gap_w * 2
- end
+ loop do
+ break unless current_field
- current_text << text_node.content
- text_idx += 1
- end
+ field_node = PageNode.new(prev: tail_node, elem: current_field, page: page.page_index, attachment_uuid:)
+ tail_node.next = field_node
+ tail_node = field_node
+ field_nodes << tail_node
+
+ current_field = fields.shift
end
- unless current_text.empty?
- new_text_node = PageNode.new(prev: tail_node, elem: current_text, page: page.page_index, attachment_uuid:)
- tail_node.next = new_text_node
- tail_node = new_text_node
+ if tail_node.elem.is_a?(Templates::ImageToFields::Field)
+ text_node = PageNode.new(prev: tail_node, elem: "\n".b, page: page.page_index, attachment_uuid:)
+ tail_node.next = text_node
+
+ tail_node = text_node
+ else
+ tail_node.elem << "\n"
end
[field_nodes, tail_node]
@@ -399,8 +417,8 @@ module Templates
x1 = node.x
y1 = node.y
- x2 = node.x + node.w
- y2 = node.y + node.h
+ x2 = node.endx
+ y2 = node.endy
underscore_count = 1
@@ -417,8 +435,9 @@ module Templates
break if distance > 0.02 || height_diff > node.h * 0.5
underscore_count += 1
- next_x2 = next_node.x + next_node.w
- next_y2 = next_node.y + next_node.h
+
+ next_x2 = next_node.endx
+ next_y2 = next_node.endy
x2 = next_x2
y2 = [y2, next_y2].max
@@ -438,8 +457,8 @@ module Templates
def calculate_iou(box1, box2)
x1 = [box1.x, box2.x].max
y1 = [box1.y, box2.y].max
- x2 = [box1.x + box1.w, box2.x + box2.w].min
- y2 = [box1.y + box1.h, box2.y + box2.h].min
+ x2 = [box1.endx, box2.endx].min
+ y2 = [box1.endy, box2.endy].min
intersection_width = [0, x2 - x1].max
intersection_height = [0, y2 - y1].max
@@ -455,8 +474,7 @@ module Templates
end
def boxes_overlap?(box1, box2)
- !(box1.x + box1.w < box2.x || box2.x + box2.w < box1.x ||
- box1.y + box1.h < box2.y || box2.y + box2.h < box1.y)
+ !(box1.endx < box2.x || box2.endx < box1.x || box1.endy < box2.y || box2.endy < box1.y)
end
def increase_confidence_for_overlapping_fields(image_fields, text_fields, by: 1.0)
@@ -465,14 +483,13 @@ module Templates
image_fields.map do |image_field|
next if image_field.type != 'text'
- field_bottom = image_field.y + image_field.h
-
text_fields.each do |text_field|
- break if text_field.y > field_bottom
+ break if text_field.y > image_field.endy
- next if text_field.y + text_field.h < image_field.y
+ next if text_field.endy < image_field.y
- next unless boxes_overlap?(image_field, text_field) && calculate_iou(image_field, text_field) > 0.5
+ next unless boxes_overlap?(image_field, text_field)
+ next if calculate_iou(image_field, text_field) < 0.4
break image_field.confidence += by
end
diff --git a/lib/templates/image_to_fields.rb b/lib/templates/image_to_fields.rb
index 786e9785..d9c04221 100755
--- a/lib/templates/image_to_fields.rb
+++ b/lib/templates/image_to_fields.rb
@@ -4,7 +4,15 @@ module Templates
module ImageToFields
module_function
- Field = Struct.new(:type, :x, :y, :w, :h, :confidence, keyword_init: true)
+ Field = Struct.new(:type, :x, :y, :w, :h, :confidence, keyword_init: true) do
+ def endy
+ @endy ||= y + h
+ end
+
+ def endx
+ @endx ||= x + w
+ end
+ end
MODEL_PATH = Rails.root.join('tmp/model.onnx')
@@ -60,9 +68,7 @@ module Templates
detections = apply_nms(detections, nms)
- fields = build_fields_from_detections(detections, image)
-
- sort_fields(fields, y_threshold: 10.0 / image.height)
+ build_fields_from_detections(detections, image)
end
def build_split_image_regions(image)
@@ -298,27 +304,6 @@ module Templates
end
end
- def sort_fields(fields, y_threshold: 0.01)
- sorted_fields = fields.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y }
-
- lines = []
- current_line = []
-
- sorted_fields.each do |field|
- if current_line.blank? || (field.y - current_line.first.y).abs < y_threshold
- current_line << field
- else
- lines << current_line.sort_by(&:x)
-
- current_line = [field]
- end
- end
-
- lines << current_line.sort_by(&:x) if current_line.present?
-
- lines.flatten
- end
-
def apply_nms(detections, threshold = 0.5)
return detections if detections[:xyxy].shape[0].zero?
|