Merge from docusealco/wip

pull/402/head 2.2.4
Alex Turchyn 2 weeks ago committed by GitHub
commit 1aacd98460
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -46,6 +46,7 @@ class AccountsController < ApplicationController
def destroy
authorize!(:manage, current_account)
true_user.skip_reconfirmation!
true_user.update!(locked_at: Time.current, email: true_user.email.sub('@', '+removed@'))
true_user.account.update!(archived_at: Time.current)

@ -1,4 +1,11 @@
# frozen_string_literal: true
class InvitationsController < Devise::PasswordsController
def update
super do |resource|
resource.confirmed_at ||= Time.current if resource.errors.empty?
PasswordsController::Current.user = resource
end
end
end

@ -9,7 +9,14 @@ class ProfileController < ApplicationController
def update_contact
if current_user.update(contact_params)
if current_user.try(:pending_reconfirmation?) && current_user.previous_changes.key?(:unconfirmed_email)
SendConfirmationInstructionsJob.perform_async('user_id' => current_user.id)
redirect_to settings_profile_index_path,
notice: I18n.t('a_confirmation_email_has_been_sent_to_the_new_email_address')
else
redirect_to settings_profile_index_path, notice: I18n.t('contact_information_has_been_update')
end
else
render :index, status: :unprocessable_content
end

@ -81,7 +81,7 @@ class StartFormController < ApplicationController
@submitter = Submitter.where(submission: @template.submissions)
.where.not(completed_at: nil)
.find_by!(required_params)
.find_by!(required_params.except('name'))
end
private

@ -65,7 +65,14 @@ class UsersController < ApplicationController
end
if @user.update(attrs.except(*(current_user == @user ? %i[password otp_required_for_login role] : %i[password])))
if @user.try(:pending_reconfirmation?) && @user.previous_changes.key?(:unconfirmed_email)
SendConfirmationInstructionsJob.perform_async('user_id' => @user.id)
redirect_back fallback_location: settings_users_path,
notice: I18n.t('a_confirmation_email_has_been_sent_to_the_new_email_address')
else
redirect_back fallback_location: settings_users_path, notice: I18n.t('user_has_been_updated')
end
else
render turbo_stream: turbo_stream.replace(:modal, template: 'users/edit'), status: :unprocessable_content
end

@ -0,0 +1,11 @@
# frozen_string_literal: true
class SendConfirmationInstructionsJob
include Sidekiq::Job
def perform(params = {})
user = User.find(params['user_id'])
user.send_confirmation_instructions
end
end

@ -6,6 +6,9 @@
#
# id :bigint not null, primary key
# archived_at :datetime
# confirmation_sent_at :datetime
# confirmation_token :string
# confirmed_at :datetime
# consumed_timestep :integer
# current_sign_in_at :datetime
# current_sign_in_ip :string
@ -24,8 +27,9 @@
# reset_password_token :string
# role :string not null
# sign_in_count :integer default(0), not null
# unconfirmed_email :string
# unlock_token :string
# uuid :text not null
# uuid :string not null
# created_at :datetime not null
# updated_at :datetime not null
# account_id :bigint not null

@ -3,7 +3,7 @@
<%= svg_icon('waving_hand', class: 'h-10 w-10') %>
<span><%= t('welcome_to_product_name', product_name: Docuseal.product_name) %></span>
</h1>
<%= form_for(resource, as: resource_name, url: password_path(resource_name), html: { method: :put, class: 'space-y-6' }) do |f| %>
<%= form_for(resource, as: resource_name, url: invitation_path, html: { method: :put, class: 'space-y-6' }) do |f| %>
<div class="space-y-2">
<%= render 'devise/shared/error_messages', resource: %>
<%= f.hidden_field :reset_password_token %>

@ -18,6 +18,11 @@
<div class="form-control">
<%= f.label :email, t('email'), class: 'label' %>
<%= f.email_field :email, autocomplete: 'off', class: 'base-input' %>
<% if current_user.try(:pending_reconfirmation?) %>
<label class="label">
<span class="label-text-alt"><%= t('email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm', email: f.object.unconfirmed_email) %></span>
</label>
<% end %>
</div>
<div class="form-control pt-2">
<%= f.button button_title(title: t('update'), disabled_with: t('updating')), class: 'base-button' %>

@ -13,6 +13,11 @@
<div class="form-control">
<%= f.label :email, t('email'), class: 'label' %>
<%= f.email_field :email, required: true, class: 'base-input' %>
<% if user.try(:pending_reconfirmation?) %>
<label class="label">
<span class="label-text-alt"><%= t('email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm', email: f.object.unconfirmed_email) %></span>
</label>
<% end %>
<% if user.persisted? && Accounts.can_send_emails?(current_account) %>
<span class="label-text-alt mt-2 mx-1">
<%= t('click_here_to_send_a_reset_password_email_html') %>

@ -52,7 +52,13 @@
<%= user.full_name %>
</td>
<td>
<% if user.try(:pending_reconfirmation?) %>
<%= user.unconfirmed_email %>
<br>
<span class="label-text-alt">(<%= t('unconfirmed') %>)</span>
<% else %>
<%= user.email %>
<% end %>
</td>
<td>
<span class="badge badge-info badge-outline whitespace-nowrap">

@ -31,6 +31,7 @@ end
#
# Use this hook to configure devise mailer, warden hooks and so forth.
# Many of these configuration options can be set straight in your model.
# rubocop:disable Metrics/BlockLength
Devise.setup do |config|
config.warden do |manager|
manager.default_strategies(scope: :user).unshift(:two_factor_authenticatable)
@ -166,7 +167,7 @@ Devise.setup do |config|
# without confirming their account.
# Default is 0.days, meaning the user cannot access the website without
# confirming their account.
# config.allow_unconfirmed_access_for = 2.days
config.allow_unconfirmed_access_for = nil
# A period that the user is allowed to confirm their account before their
# token becomes invalid. For example, if set to 3.days, the user can confirm
@ -332,3 +333,4 @@ Devise.setup do |config|
ActiveSupport.run_load_hooks(:devise_config, config)
end
# rubocop:enable Metrics/BlockLength

@ -71,7 +71,7 @@ en: &en
team_access: Team access
document_download_filename_format: Document download filename format
docuseal_trusted_signature: DocuSeal Trusted Signature
hello_name: Hello %{name}
hello_name: Hi %{name}
you_are_invited_to_product_name: You are invited to %{product_name}
you_have_been_invited_to_account_name_product_name_please_sign_up_using_the_link_below_: 'You have been invited to %{account_name} %{product_name}. Please sign up using the link below:'
sent_using_product_name_in_testing_mode_html: 'Sent using <a href="%{product_url}">%{product_name}</a> in testing mode'
@ -861,6 +861,19 @@ en: &en
reports: Reports
completed_submissions: Completed submissions
sms: SMS
a_confirmation_email_has_been_sent_to_the_new_email_address: A confirmation email has been sent to the new email address.
email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} email address is awaiting confirmation. Follow the link in the email to confirm."
please_confirm_your_email_address_using_the_link_below_: 'Please confirm your email address using the link below:'
confirm_email: Confirm email
unconfirmed: Unconfirmed
devise:
confirmations:
confirmed: Your email address has been successfully confirmed.
failure:
unconfirmed: You have to confirm your email address before continuing.
mailer:
confirmation_instructions:
subject: Confirm your email address
submission_sources:
api: API
bulk: Bulk Send
@ -1808,6 +1821,19 @@ es: &es
reports: Informes
completed_submissions: Envíos completados
sms: SMS
a_confirmation_email_has_been_sent_to_the_new_email_address: Se ha enviado un correo electrónico de confirmación a la nueva dirección de correo electrónico.
email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} está pendiente de confirmación. Sigue el enlace en el correo para confirmarla."
please_confirm_your_email_address_using_the_link_below_: 'Por favor, confirma tu dirección de correo electrónico utilizando el enlace a continuación:'
confirm_email: Confirmar correo
unconfirmed: No confirmado
devise:
confirmations:
confirmed: Tu dirección de correo electrónico ha sido confirmada correctamente.
failure:
unconfirmed: Debes confirmar tu dirección de correo electrónico antes de continuar.
mailer:
confirmation_instructions:
subject: Confirma tu dirección de correo electrónico
submission_sources:
api: API
bulk: Envío masivo
@ -2756,6 +2782,19 @@ it: &it
reports: Rapporti
completed_submissions: Invii completati
sms: SMS
a_confirmation_email_has_been_sent_to_the_new_email_address: È stata inviata un'email di conferma al nuovo indirizzo email.
email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} è in attesa di conferma. Segui il link nell'email per confermare."
please_confirm_your_email_address_using_the_link_below_: 'Conferma il tuo indirizzo email utilizzando il link qui sotto:'
confirm_email: Conferma email
unconfirmed: Non confermato
devise:
confirmations:
confirmed: Il tuo indirizzo email è stato confermato con successo.
failure:
unconfirmed: Devi confermare il tuo indirizzo email prima di continuare.
mailer:
confirmation_instructions:
subject: Conferma il tuo indirizzo email
submission_sources:
api: API
bulk: Invio massivo
@ -3700,6 +3739,19 @@ fr: &fr
reports: Rapports
completed_submissions: Soumissions terminées
sms: SMS
a_confirmation_email_has_been_sent_to_the_new_email_address: Un e-mail de confirmation a été envoyé à la nouvelle adresse e-mail.
email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} est en attente de confirmation. Suivez le lien dans l'e-mail pour la confirmer."
please_confirm_your_email_address_using_the_link_below_: 'Veuillez confirmer votre adresse e-mail en utilisant le lien ci-dessous :'
confirm_email: "Confirmer l'e-mail"
unconfirmed: Non confirmé
devise:
confirmations:
confirmed: Votre adresse e-mail a été confirmée avec succès.
failure:
unconfirmed: Vous devez confirmer votre adresse e-mail avant de continuer.
mailer:
confirmation_instructions:
subject: Confirmez votre adresse e-mail
submission_sources:
api: API
bulk: Envoi en masse
@ -4647,6 +4699,19 @@ pt: &pt
reports: Relatórios
completed_submissions: Envios concluídos
sms: SMS
a_confirmation_email_has_been_sent_to_the_new_email_address: Um e-mail de confirmação foi enviado para o novo endereço de e-mail.
email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} está aguardando confirmação. Siga o link enviado para esse endereço de e-mail para confirmar."
please_confirm_your_email_address_using_the_link_below_: 'Por favor, confirme seu endereço de e-mail usando o link abaixo:'
confirm_email: Confirmar e-mail
unconfirmed: Não confirmado
devise:
confirmations:
confirmed: Seu endereço de e-mail foi confirmado com sucesso.
failure:
unconfirmed: Você deve confirmar seu endereço de e-mail antes de continuar.
mailer:
confirmation_instructions:
subject: Confirme seu endereço de e-mail
submission_sources:
api: API
bulk: Envio em massa
@ -5594,6 +5659,19 @@ de: &de
reports: Berichte
completed_submissions: Abgeschlossene Übermittlungen
sms: SMS
a_confirmation_email_has_been_sent_to_the_new_email_address: Eine Bestätigungs-E-Mail wurde an die neue E-Mail-Adresse gesendet.
email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} wartet auf Bestätigung. Folgen Sie dem Link in der E-Mail, um sie zu bestätigen."
please_confirm_your_email_address_using_the_link_below_: 'Bitte bestätigen Sie Ihre E-Mail-Adresse über den folgenden Link:'
confirm_email: E-Mail bestätigen
unconfirmed: Unbestätigt
devise:
confirmations:
confirmed: Ihre E-Mail-Adresse wurde erfolgreich bestätigt.
failure:
unconfirmed: Sie müssen Ihre E-Mail-Adresse bestätigen, bevor Sie fortfahren.
mailer:
confirmation_instructions:
subject: Bestätigen Sie Ihre E-Mail-Adresse
submission_sources:
api: API
bulk: Massenversand
@ -6902,6 +6980,19 @@ nl: &nl
reports: Rapporten
completed_submissions: Voltooide inzendingen
sms: SMS
a_confirmation_email_has_been_sent_to_the_new_email_address: Er is een bevestigingsmail verzonden naar het nieuwe e-mailadres.
email_address_is_awaiting_confirmation_follow_the_link_in_the_email_to_confirm: "%{email} wacht op bevestiging. Volg de link in de e-mail om te bevestigen."
please_confirm_your_email_address_using_the_link_below_: 'Bevestig je e-mailadres via de onderstaande link:'
confirm_email: E-mailadres bevestigen
unconfirmed: Onbevestigd
devise:
confirmations:
confirmed: Je e-mailadres is succesvol bevestigd.
failure:
unconfirmed: Je moet je e-mailadres bevestigen voordat je verdergaat.
mailer:
confirmation_instructions:
subject: Bevestig je e-mailadres
submission_sources:
api: API
bulk: Bulkverzending

@ -14,13 +14,8 @@ Rails.application.routes.draw do
get 'up' => 'rails/health#show'
get 'manifest' => 'pwa#manifest'
devise_for :users,
path: '/', only: %i[sessions passwords omniauth_callbacks],
controllers: begin
options = { sessions: 'sessions', passwords: 'passwords' }
options[:omniauth_callbacks] = 'omniauth_callbacks' if User.devise_modules.include?(:omniauthable)
options
end
devise_for :users, path: '/', only: %i[sessions passwords],
controllers: { sessions: 'sessions', passwords: 'passwords' }
devise_scope :user do
resource :invitation, only: %i[update] do

@ -0,0 +1,10 @@
# frozen_string_literal: true
class AddConfirmableToUsers < ActiveRecord::Migration[8.0]
def change
add_column :users, :confirmation_token, :string
add_column :users, :confirmed_at, :datetime
add_column :users, :confirmation_sent_at, :datetime
add_column :users, :unconfirmed_email, :string
end
end

@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[8.0].define(version: 2025_11_21_113910) do
ActiveRecord::Schema[8.0].define(version: 2025_11_25_194305) do
# These are extensions that must be enabled in order to support this database
enable_extension "btree_gin"
enable_extension "plpgsql"
@ -451,6 +451,10 @@ ActiveRecord::Schema[8.0].define(version: 2025_11_21_113910) do
t.string "otp_secret"
t.integer "consumed_timestep"
t.boolean "otp_required_for_login", default: false, null: false
t.string "confirmation_token"
t.datetime "confirmed_at"
t.datetime "confirmation_sent_at"
t.string "unconfirmed_email"
t.index ["account_id"], name: "index_users_on_account_id"
t.index ["email"], name: "index_users_on_email", unique: true
t.index ["reset_password_token"], name: "index_users_on_reset_password_token", unique: true

@ -39,8 +39,25 @@ class Pdfium
FPDF_RENDER_FORCEHALFTONE = 0x400
FPDF_PRINTING = 0x800
TextNode = Struct.new(:content, :x, :y, :w, :h, keyword_init: true)
LineNode = Struct.new(:x, :y, :w, :h, :tilt, keyword_init: true)
TextNode = Struct.new(:content, :x, :y, :w, :h) do
def endx
@endx ||= x + w
end
def endy
@endy ||= y + h
end
end
LineNode = Struct.new(:x, :y, :w, :h, :tilt) do
def endy
@endy ||= y + h
end
def endx
@endx ||= x + w
end
end
# rubocop:disable Naming/ClassAndModuleCamelCase
class FPDF_LIBRARY_CONFIG < FFI::Struct
@ -433,31 +450,47 @@ class Pdfium
return @text_nodes if char_count.zero?
char_count.times do |i|
unicode = Pdfium.FPDFText_GetUnicode(text_page, i)
char = [unicode].pack('U*')
left_ptr = FFI::MemoryPointer.new(:double)
right_ptr = FFI::MemoryPointer.new(:double)
bottom_ptr = FFI::MemoryPointer.new(:double)
top_ptr = FFI::MemoryPointer.new(:double)
origin_x_ptr = FFI::MemoryPointer.new(:double)
origin_y_ptr = FFI::MemoryPointer.new(:double)
i = 0
loop do
break unless i < char_count
result = Pdfium.FPDFText_GetCharBox(text_page, i, left_ptr, right_ptr, bottom_ptr, top_ptr)
box_index = i
codepoint = Pdfium.FPDFText_GetUnicode(text_page, i)
if codepoint.between?(0xD800, 0xDBFF) && (i + 1 < char_count)
codepoint2 = Pdfium.FPDFText_GetUnicode(text_page, i + 1)
if codepoint2.between?(0xDC00, 0xDFFF)
codepoint = 0x10000 + ((codepoint - 0xD800) << 10) + (codepoint2 - 0xDC00)
i += 1
end
end
char = codepoint.chr(Encoding::UTF_8)
result = Pdfium.FPDFText_GetCharBox(text_page, box_index, left_ptr, right_ptr, bottom_ptr, top_ptr)
next if result.zero?
left = left_ptr.read_double
right = right_ptr.read_double
origin_x_ptr = FFI::MemoryPointer.new(:double)
origin_y_ptr = FFI::MemoryPointer.new(:double)
Pdfium.FPDFText_GetCharOrigin(text_page, i, origin_x_ptr, origin_y_ptr)
Pdfium.FPDFText_GetCharOrigin(text_page, box_index, origin_x_ptr, origin_y_ptr)
origin_y = origin_y_ptr.read_double
origin_x = origin_x_ptr.read_double
font_size = Pdfium.FPDFText_GetFontSize(text_page, i)
font_size = Pdfium.FPDFText_GetFontSize(text_page, box_index)
font_size = 8 if font_size == 1
abs_x = left
@ -465,15 +498,21 @@ class Pdfium
abs_width = right - left
abs_height = font_size
x = abs_x / width
x = origin_x / width
y = abs_y / height
node_width = abs_width / width
node_width = (abs_width + ((abs_x - origin_x).abs * 2)) / width
node_height = abs_height / height
@text_nodes << TextNode.new(content: char, x: x, y: y, w: node_width, h: node_height)
@text_nodes << TextNode.new(char, x, y, node_width, node_height)
ensure
i += 1
end
@text_nodes = @text_nodes.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y }
y_threshold = 4.0 / width
@text_nodes = @text_nodes.sort do |a, b|
(a.endy - b.endy).abs < y_threshold ? a.x <=> b.x : a.endy <=> b.endy
end
ensure
Pdfium.FPDFText_ClosePage(text_page) if text_page && !text_page.null?
end
@ -539,10 +578,10 @@ class Pdfium
norm_w = w / width
norm_h = h / height
@line_nodes << LineNode.new(x: norm_x, y: norm_y, w: norm_w, h: norm_h, tilt: tilt)
@line_nodes << LineNode.new(norm_x, norm_y, norm_w, norm_h, tilt)
end
@line_nodes = @line_nodes.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y }
@line_nodes = @line_nodes.sort { |a, b| a.endy == b.endy ? a.x <=> b.x : a.endy <=> b.endy }
end
def close

@ -4,7 +4,16 @@ module Templates
module DetectFields
module_function
TextFieldBox = Struct.new(:x, :y, :w, :h, keyword_init: true)
TextFieldBox = Struct.new(:x, :y, :w, :h, keyword_init: true) do
def endy
@endy ||= y + h
end
def endx
@endx ||= x + w
end
end
PageNode = Struct.new(:prev, :next, :elem, :page, :attachment_uuid, keyword_init: true)
DATE_REGEXP = /
@ -12,8 +21,7 @@ module Templates
date
| signed\sat
| datum
)
\s*[:-]?\s*\z
)[:_\s-]*\z
/ix
NUMBER_REGEXP = /
@ -31,8 +39,7 @@ module Templates
| menge
| anzahl
| stückzahl
)
\s*[:-]?\s*\z
)[:_\s-]*\z
/ix
SIGNATURE_REGEXP = /
@ -45,10 +52,12 @@ module Templates
| unterschrift
| unterschreiben
| unterzeichnen
)
\s*[:-]?\s*\z
)[:_\s-]*\z
/ix
LINEBREAK = ["\n", "\r"].freeze
CHECKBOXES = ['☐', '□'].freeze
# rubocop:disable Metrics, Style
def call(io, attachment: nil, confidence: 0.3, temperature: 1, inference: Templates::ImageToFields,
nms: 0.1, split_page: false, aspect_ratio: true, padding: 20, regexp_type: true, &)
@ -71,11 +80,13 @@ module Templates
fields = inference.call(image, confidence:, nms:, split_page:,
temperature:, aspect_ratio:, padding:)
fields = sort_fields(fields, y_threshold: 10.0 / image.height)
fields = fields.map do |f|
{
uuid: SecureRandom.uuid,
type: f.type,
required: f.type != 'checkbox',
required: f.type == 'signature',
preferences: {},
areas: [{
x: f.x,
@ -113,6 +124,8 @@ module Templates
text_fields = extract_text_fields_from_page(page)
line_fields = extract_line_fields_from_page(page)
fields = sort_fields(fields, y_threshold: 10.0 / image.height)
fields = increase_confidence_for_overlapping_fields(fields, text_fields)
fields = increase_confidence_for_overlapping_fields(fields, line_fields)
@ -128,7 +141,7 @@ module Templates
{
uuid: SecureRandom.uuid,
type:,
required: type != 'checkbox',
required: type == 'signature',
preferences: {},
areas: [{
x: field.x, y: field.y,
@ -153,6 +166,12 @@ module Templates
doc.close
end
def sort_fields(fields, y_threshold: 0.01)
fields.sort do |a, b|
(a.endy - b.endy).abs < y_threshold ? a.x <=> b.x : a.endy <=> b.endy
end
end
def print_debug(head_node)
current_node = head_node
index = 0
@ -189,121 +208,120 @@ module Templates
def build_page_nodes(page, fields, tail_node, attachment_uuid: nil)
field_nodes = []
current_text = ''.b
y_threshold = 4.0 / page.height
x_threshold = 30.0 / page.width
text_nodes = page.text_nodes
text_idx = 0
field_idx = 0
current_field = fields.shift
while text_idx < text_nodes.length || field_idx < fields.length
text_node = text_nodes[text_idx]
field = fields[field_idx]
index = 0
process_text_node = false
process_field_node = false
prev_node = nil
if text_node && field
text_y_center = text_node.y + (text_node.h / 2.0)
field_y_center = field.y + (field.h / 2.0)
y_threshold = text_node.h / 2.0
vertical_distance = (text_y_center - field_y_center).abs
loop do
node = text_nodes[index]
if vertical_distance < y_threshold
is_underscore = text_node.content == '_'
is_left_of_field = text_node.x < field.x
break unless node
if is_underscore && is_left_of_field
text_x_end = text_node.x + text_node.w
if node.content.in?(LINEBREAK)
next_node = text_nodes[index]
distance = field.x - text_x_end
proximity_threshold = text_node.w * 3.0
if next_node && (next_node.endy - node.endy) < y_threshold
index += 1
if distance < proximity_threshold
process_field_node = true
else
process_text_node = true
next
end
elsif is_left_of_field
process_text_node = true
else
process_field_node = true
end
elsif text_node.y < field.y
process_text_node = true
loop do
break unless current_field
if ((current_field.endy - node.endy).abs < y_threshold &&
(current_field.x <= node.x || node.content.in?(LINEBREAK))) ||
current_field.endy < node.y
if tail_node.elem.is_a?(Templates::ImageToFields::Field)
divider =
if (tail_node.elem.endy - current_field.endy).abs > y_threshold
"\n".b
elsif tail_node.elem.endx - current_field.x > x_threshold
"\t".b
else
process_field_node = true
' '.b
end
elsif text_node
process_text_node = true
elsif field
process_field_node = true
end
text_node = PageNode.new(prev: tail_node, elem: divider, page: page.page_index, attachment_uuid:)
tail_node.next = text_node
tail_node = text_node
elsif prev_node && (prev_node.endy - current_field.endy).abs > y_threshold
text_node = PageNode.new(prev: tail_node, elem: "\n".b, page: page.page_index, attachment_uuid:)
tail_node.next = text_node
if process_field_node
unless current_text.empty?
new_text_node = PageNode.new(prev: tail_node, elem: current_text, page: page.page_index, attachment_uuid:)
tail_node.next = new_text_node
tail_node = new_text_node
current_text = ''.b
tail_node = text_node
end
new_field_node = PageNode.new(prev: tail_node, elem: field, page: page.page_index, attachment_uuid:)
tail_node.next = new_field_node
tail_node = new_field_node
field_node = PageNode.new(prev: tail_node, elem: current_field, page: page.page_index, attachment_uuid:)
tail_node.next = field_node
tail_node = field_node
field_nodes << tail_node
while text_idx < text_nodes.length
text_node_to_check = text_nodes[text_idx]
current_field = fields.shift
else
break
end
end
is_part_of_field = false
if tail_node.elem.is_a?(Templates::ImageToFields::Field)
prev_field = tail_node.elem
if text_node_to_check.content == '_'
check_y_center = text_node_to_check.y + (text_node_to_check.h / 2.0)
check_y_dist = (check_y_center - field_y_center).abs
check_y_thresh = text_node_to_check.h / 2.0
text_node = PageNode.new(prev: tail_node, elem: ''.b, page: page.page_index, attachment_uuid:)
tail_node.next = text_node
if check_y_dist < check_y_thresh
padding = text_node_to_check.w * 3.0
field_x_start = field.x - padding
field_x_end = field.x + field.w + padding
text_x_start = text_node_to_check.x
text_x_end = text_node_to_check.x + text_node_to_check.w
tail_node = text_node
is_part_of_field = true if text_x_start <= field_x_end && field_x_start <= text_x_end
if (node.endy - prev_field.endy).abs > y_threshold
tail_node.elem << "\n"
elsif (node.x - prev_field.endx) > x_threshold
tail_node.elem << "\t"
end
elsif prev_node
if (node.endy - prev_node.endy) > y_threshold && LINEBREAK.exclude?(prev_node.content)
tail_node.elem << "\n"
elsif (node.x - prev_node.endx) > x_threshold && !tail_node.elem.ends_with?("\t")
tail_node.elem << "\t"
end
end
break unless is_part_of_field
text_idx += 1
if node.content != '_' || !tail_node.elem.ends_with?('___')
tail_node.elem << node.content unless CHECKBOXES.include?(node.content)
end
field_idx += 1
elsif process_text_node
if text_idx > 0
prev_text_node = text_nodes[text_idx - 1]
prev_node = node
x_gap = text_node.x - (prev_text_node.x + prev_text_node.w)
index += 1
end
gap_w = text_node.w > prev_text_node.w ? text_node.w : prev_text_node.w
loop do
break unless current_field
current_text << ' ' if x_gap > gap_w * 2
end
field_node = PageNode.new(prev: tail_node, elem: current_field, page: page.page_index, attachment_uuid:)
tail_node.next = field_node
tail_node = field_node
field_nodes << tail_node
current_text << text_node.content
text_idx += 1
end
current_field = fields.shift
end
unless current_text.empty?
new_text_node = PageNode.new(prev: tail_node, elem: current_text, page: page.page_index, attachment_uuid:)
tail_node.next = new_text_node
tail_node = new_text_node
if tail_node.elem.is_a?(Templates::ImageToFields::Field)
text_node = PageNode.new(prev: tail_node, elem: "\n".b, page: page.page_index, attachment_uuid:)
tail_node.next = text_node
tail_node = text_node
else
tail_node.elem << "\n"
end
[field_nodes, tail_node]
@ -399,8 +417,8 @@ module Templates
x1 = node.x
y1 = node.y
x2 = node.x + node.w
y2 = node.y + node.h
x2 = node.endx
y2 = node.endy
underscore_count = 1
@ -417,8 +435,9 @@ module Templates
break if distance > 0.02 || height_diff > node.h * 0.5
underscore_count += 1
next_x2 = next_node.x + next_node.w
next_y2 = next_node.y + next_node.h
next_x2 = next_node.endx
next_y2 = next_node.endy
x2 = next_x2
y2 = [y2, next_y2].max
@ -438,8 +457,8 @@ module Templates
def calculate_iou(box1, box2)
x1 = [box1.x, box2.x].max
y1 = [box1.y, box2.y].max
x2 = [box1.x + box1.w, box2.x + box2.w].min
y2 = [box1.y + box1.h, box2.y + box2.h].min
x2 = [box1.endx, box2.endx].min
y2 = [box1.endy, box2.endy].min
intersection_width = [0, x2 - x1].max
intersection_height = [0, y2 - y1].max
@ -455,8 +474,7 @@ module Templates
end
def boxes_overlap?(box1, box2)
!(box1.x + box1.w < box2.x || box2.x + box2.w < box1.x ||
box1.y + box1.h < box2.y || box2.y + box2.h < box1.y)
!(box1.endx < box2.x || box2.endx < box1.x || box1.endy < box2.y || box2.endy < box1.y)
end
def increase_confidence_for_overlapping_fields(image_fields, text_fields, by: 1.0)
@ -465,14 +483,13 @@ module Templates
image_fields.map do |image_field|
next if image_field.type != 'text'
field_bottom = image_field.y + image_field.h
text_fields.each do |text_field|
break if text_field.y > field_bottom
break if text_field.y > image_field.endy
next if text_field.y + text_field.h < image_field.y
next if text_field.endy < image_field.y
next unless boxes_overlap?(image_field, text_field) && calculate_iou(image_field, text_field) > 0.5
next unless boxes_overlap?(image_field, text_field)
next if calculate_iou(image_field, text_field) < 0.4
break image_field.confidence += by
end

@ -4,7 +4,15 @@ module Templates
module ImageToFields
module_function
Field = Struct.new(:type, :x, :y, :w, :h, :confidence, keyword_init: true)
Field = Struct.new(:type, :x, :y, :w, :h, :confidence, keyword_init: true) do
def endy
@endy ||= y + h
end
def endx
@endx ||= x + w
end
end
MODEL_PATH = Rails.root.join('tmp/model.onnx')
@ -60,9 +68,7 @@ module Templates
detections = apply_nms(detections, nms)
fields = build_fields_from_detections(detections, image)
sort_fields(fields, y_threshold: 10.0 / image.height)
build_fields_from_detections(detections, image)
end
def build_split_image_regions(image)
@ -298,27 +304,6 @@ module Templates
end
end
def sort_fields(fields, y_threshold: 0.01)
sorted_fields = fields.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y }
lines = []
current_line = []
sorted_fields.each do |field|
if current_line.blank? || (field.y - current_line.first.y).abs < y_threshold
current_line << field
else
lines << current_line.sort_by(&:x)
current_line = [field]
end
end
lines << current_line.sort_by(&:x) if current_line.present?
lines.flatten
end
def apply_nms(detections, threshold = 0.5)
return detections if detections[:xyxy].shape[0].zero?

Loading…
Cancel
Save