detect fields

master^2
Pete Matsyburka 1 week ago
parent b46ced2b6f
commit 1c8a7b6a7c

1
.gitignore vendored

@ -37,3 +37,4 @@ yarn-debug.log*
/docuseal
/ee
dump.rdb
*.onnx

@ -9,6 +9,7 @@ RUN apk --no-cache add fontforge wget && \
wget https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSansSymbols2/hinted/ttf/NotoSansSymbols2-Regular.ttf && \
wget https://github.com/Maxattax97/gnu-freefont/raw/master/ttf/FreeSans.ttf && \
wget https://github.com/impallari/DancingScript/raw/master/OFL.txt && \
wget -O /model.onnx "https://github.com/docusealco/fields-detection/releases/download/1.0.0/model_704_int8.onnx" && \
wget -O pdfium-linux.tgz "https://github.com/docusealco/pdfium-binaries/releases/latest/download/pdfium-linux-$(uname -m | sed 's/x86_64/x64/;s/aarch64/arm64/').tgz" && \
mkdir -p /pdfium-linux && \
tar -xzf pdfium-linux.tgz -C /pdfium-linux
@ -50,7 +51,7 @@ ENV OPENSSL_CONF=/app/openssl_legacy.cnf
WORKDIR /app
RUN echo '@edge https://dl-cdn.alpinelinux.org/alpine/edge/community' >> /etc/apk/repositories && apk add --no-cache sqlite-dev libpq-dev mariadb-dev vips-dev@edge yaml-dev redis libheif@edge vips-heif@edge gcompat ttf-freefont && mkdir /fonts && rm /usr/share/fonts/freefont/FreeSans.otf
RUN apk add --no-cache sqlite-dev libpq-dev mariadb-dev vips-dev yaml-dev redis libheif vips-heif gcompat ttf-freefont && mkdir /fonts && rm /usr/share/fonts/freefont/FreeSans.otf
RUN echo $'.include = /etc/ssl/openssl.cnf\n\
\n\
@ -66,7 +67,9 @@ activate = 1' >> /app/openssl_legacy.cnf
COPY ./Gemfile ./Gemfile.lock ./
RUN apk add --no-cache build-base && bundle install && apk del --no-cache build-base && rm -rf ~/.bundle /usr/local/bundle/cache && ruby -e "puts Dir['/usr/local/bundle/**/{spec,rdoc,resources/shared,resources/collation,resources/locales}']" | xargs rm -rf
RUN apk add --no-cache build-base && bundle install && apk del --no-cache build-base && rm -rf ~/.bundle /usr/local/bundle/cache && ruby -e "puts Dir['/usr/local/bundle/**/{spec,rdoc,resources/shared,resources/collation,resources/locales}']" | xargs rm -rf && ln -sf /usr/lib/libonnxruntime.so.1 $(ruby -e "print Dir[Gem::Specification.find_by_name('onnxruntime').gem_dir + '/vendor/*.so'].first")
RUN echo 'https://dl-cdn.alpinelinux.org/alpine/edge/main' >> /etc/apk/repositories && echo 'https://dl-cdn.alpinelinux.org/alpine/edge/community' >> /etc/apk/repositories && apk add --no-cache onnxruntime
COPY ./bin ./bin
COPY ./app ./app
@ -83,6 +86,7 @@ COPY --from=download /fonts/GoNotoKurrent-Regular.ttf /fonts/GoNotoKurrent-Bold.
COPY --from=download /fonts/FreeSans.ttf /usr/share/fonts/freefont
COPY --from=download /pdfium-linux/lib/libpdfium.so /usr/lib/libpdfium.so
COPY --from=download /pdfium-linux/licenses/pdfium.txt /usr/lib/libpdfium-LICENSE.txt
COPY --from=download /model.onnx /app/tmp/model.onnx
COPY --from=webpack /app/public/packs ./public/packs
RUN ln -s /fonts /app/public/fonts

@ -24,7 +24,9 @@ gem 'image_processing'
gem 'jwt'
gem 'lograge'
gem 'mysql2', require: false
gem 'numo-narray'
gem 'oj'
gem 'onnxruntime'
gem 'pagy'
gem 'pg', require: false
gem 'premailer-rails'

@ -357,9 +357,18 @@ GEM
racc (~> 1.4)
nokogiri (1.18.9-x86_64-linux-musl)
racc (~> 1.4)
numo-narray (0.9.2.1)
oj (3.16.11)
bigdecimal (>= 3.0)
ostruct (>= 0.2)
onnxruntime (0.10.1)
ffi
onnxruntime (0.10.1-aarch64-linux)
ffi
onnxruntime (0.10.1-arm64-darwin)
ffi
onnxruntime (0.10.1-x86_64-linux)
ffi
openssl (3.3.0)
orm_adapter (0.5.0)
os (1.1.4)
@ -638,7 +647,9 @@ DEPENDENCIES
letter_opener_web
lograge
mysql2
numo-narray
oj
onnxruntime
pagy
pg
premailer-rails

@ -6,12 +6,18 @@ class TemplatesDebugController < ApplicationController
DEBUG_FILE = ''
def show
attachment = @template.documents.first
schema_uuids = @template.schema.index_by { |e| e['attachment_uuid'] }
attachment = @template.documents.find { |a| schema_uuids[a.uuid] }
data = attachment.download
pdf = HexaPDF::Document.new(io: StringIO.new(data))
fields = Templates::FindAcroFields.call(pdf, attachment, data)
unless attachment.image?
pdf = HexaPDF::Document.new(io: StringIO.new(data))
fields = Templates::FindAcroFields.call(pdf, attachment, data)
end
fields = Templates::DetectFields.call(StringIO.new(data), attachment:) if fields.blank?
attachment.metadata['pdf'] ||= {}
attachment.metadata['pdf']['fields'] = fields

@ -0,0 +1,27 @@
# frozen_string_literal: true
class TemplatesDetectFieldsController < ApplicationController
include ActionController::Live
load_and_authorize_resource :template
def create
response.headers['Content-Type'] = 'text/event-stream'
sse = SSE.new(response.stream)
documents = @template.schema_documents.preload(:blob)
documents.each do |document|
io = StringIO.new(document.download)
Templates::DetectFields.call(io, attachment: document) do |(attachment_uuid, page, fields)|
sse.write({ attachment_uuid:, page:, fields: })
end
end
sse.write({ completed: true })
ensure
response.stream.close
end
end

@ -156,6 +156,7 @@ safeRegisterElement('template-builder', class extends HTMLElement {
withPhone: this.dataset.withPhone === 'true',
withVerification: ['true', 'false'].includes(this.dataset.withVerification) ? this.dataset.withVerification === 'true' : null,
withLogo: this.dataset.withLogo !== 'false',
withFieldsDetection: this.dataset.withFieldsDetection === 'true',
editable: this.dataset.editable !== 'false',
authenticityToken: document.querySelector('meta[name="csrf-token"]')?.content,
withPayment: this.dataset.withPayment === 'true',

@ -449,6 +449,7 @@
:default-required-fields="defaultRequiredFields"
:field-types="fieldTypes"
:with-sticky-submitters="withStickySubmitters"
:with-fields-detection="withFieldsDetection"
:with-signature-id="withSignatureId"
:with-prefillable="withPrefillable"
:only-defined-fields="onlyDefinedFields"
@ -618,6 +619,11 @@ export default {
required: false,
default: true
},
withFieldsDetection: {
type: Boolean,
required: false,
default: false
},
withAddPageButton: {
type: Boolean,
required: false,

@ -208,6 +208,34 @@
</li>
</ul>
</div>
<div
v-if="withFieldsDetection && editable && fields.length < 2"
class="mt-2"
>
<button
class="btn w-full"
:class="{ 'bg-base-300': fieldPagesLoaded !== null }"
@click="fieldPagesLoaded !== null ? null : detectFields()"
>
<template v-if="fieldPagesLoaded !== null">
<IconInnerShadowTop
width="22"
class="animate-spin"
/>
<span class="hidden md:inline">
{{ fieldPagesLoaded }} / {{ numberOfPages }} {{ t('processing_') }}
</span>
</template>
<template v-else>
<IconListSearch width="22" />
<span
class="hidden md:inline"
>
{{ t('autodetect_fields') }}
</span>
</template>
</button>
</div>
<div
v-show="fields.length < 4 && editable && withHelp && showTourStartForm"
class="rounded py-2 px-4 w-full border border-dashed border-base-300"
@ -231,7 +259,7 @@
import Field from './field'
import FieldType from './field_type'
import FieldSubmitter from './field_submitter'
import { IconLock, IconCirclePlus } from '@tabler/icons-vue'
import { IconLock, IconCirclePlus, IconInnerShadowTop, IconListSearch } from '@tabler/icons-vue'
import IconDrag from './icon_drag'
export default {
@ -240,11 +268,13 @@ export default {
Field,
FieldType,
IconCirclePlus,
IconListSearch,
IconInnerShadowTop,
FieldSubmitter,
IconDrag,
IconLock
},
inject: ['save', 'backgroundColor', 'withPhone', 'withVerification', 'withPayment', 't', 'fieldsDragFieldRef'],
inject: ['save', 'backgroundColor', 'withPhone', 'withVerification', 'withPayment', 't', 'fieldsDragFieldRef', 'baseFetch'],
props: {
fields: {
type: Array,
@ -255,6 +285,11 @@ export default {
required: false,
default: null
},
withFieldsDetection: {
type: Boolean,
required: false,
default: false
},
withSignatureId: {
type: Boolean,
required: false,
@ -331,12 +366,18 @@ export default {
emits: ['add-field', 'set-draw', 'set-draw-type', 'set-drag', 'drag-end', 'scroll-to-area', 'change-submitter', 'set-drag-placeholder'],
data () {
return {
fieldPagesLoaded: null,
defaultFieldsSearch: ''
}
},
computed: {
fieldNames: FieldType.computed.fieldNames,
fieldIcons: FieldType.computed.fieldIcons,
numberOfPages () {
return this.template.documents.reduce((acc, doc) => {
return acc + doc.metadata?.pdf?.number_of_pages || doc.preview_images.length
}, 0)
},
isShowFieldSearch () {
if (this.withFieldsSearch === false) {
return false
@ -389,6 +430,61 @@ export default {
this.$emit('set-drag', field)
},
detectFields () {
const fields = []
this.fieldPagesLoaded = 0
this.baseFetch(`/templates/${this.template.id}/detect_fields`, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
}
}).then(async (response) => {
const reader = response.body.getReader()
const decoder = new TextDecoder('utf-8')
let buffer = ''
while (true) {
const { value, done } = await reader.read()
if (done) break
buffer += decoder.decode(value, { stream: true })
const lines = buffer.split('\n\n')
buffer = lines.pop()
for (const line of lines) {
if (line.startsWith('data: ')) {
const jsonStr = line.replace(/^data: /, '')
const data = JSON.parse(jsonStr)
if (data.completed) {
this.fieldPagesLoaded = null
this.template.fields = fields
break
} else if (data.fields) {
data.fields.forEach((f) => {
f.submitter_uuid = this.template.submitters[0].uuid
})
this.fieldPagesLoaded += 1
fields.push(...data.fields)
}
}
}
}
}).catch(error => {
console.error('Error in streaming message: ', error)
}).finally(() => {
this.fieldPagesLoaded = null
this.isFieldsLoading = false
})
},
setDragPlaceholder (event) {
this.$emit('set-drag-placeholder', {
offsetX: event.offsetX,

@ -1,5 +1,6 @@
const en = {
view: 'View',
autodetect_fields: 'Autodetect fields',
payment_link: 'Payment link',
strikeout: 'Strikeout',
draw_strikethrough_the_document: 'Draw strikethrough the document',

@ -6,4 +6,4 @@
<%= button_to nil, user_configs_path, method: :post, params: { user_config: { key: UserConfig::SHOW_APP_TOUR, value: true } }, class: 'hidden', id: 'start_tour_button' %>
<% end %>
<% end %>
<template-builder class="grid" data-template="<%= @template_data %>" data-with-sign-yourself-button="<%= !@template.archived_at? %>" data-with-send-button="<%= !@template.archived_at? && can?(:create, @template.submissions.new(account: current_account)) %>" data-locale="<%= I18n.locale %>" data-show-tour-start-form="<%= @show_tour_start_form %>"></template-builder>
<template-builder class="grid" data-template="<%= @template_data %>" data-with-sign-yourself-button="<%= !@template.archived_at? %>" data-with-fields-detection="true" data-with-send-button="<%= !@template.archived_at? && can?(:create, @template.submissions.new(account: current_account)) %>" data-locale="<%= I18n.locale %>" data-show-tour-start-form="<%= @show_tour_start_form %>"></template-builder>

@ -100,6 +100,9 @@ Rails.application.routes.draw do
resource :debug, only: %i[show], controller: 'templates_debug' if Rails.env.development?
resources :documents, only: %i[create], controller: 'template_documents'
resources :clone_and_replace, only: %i[create], controller: 'templates_clone_and_replace'
if !Docuseal.multitenant? || Docuseal.demo?
resources :detect_fields, only: %i[create], controller: 'templates_detect_fields'
end
resources :restore, only: %i[create], controller: 'templates_restore'
resources :archived, only: %i[index], controller: 'templates_archived_submissions'
resources :submissions, only: %i[new create]

@ -25,6 +25,8 @@ class Pdfium
typedef :pointer, :FPDF_BITMAP
typedef :pointer, :FPDF_FORMHANDLE
typedef :pointer, :FPDF_TEXTPAGE
typedef :pointer, :FPDF_PAGEOBJECT
typedef :pointer, :FPDF_PATHSEGMENT
MAX_SIZE = 32_767
@ -37,6 +39,9 @@ class Pdfium
FPDF_RENDER_FORCEHALFTONE = 0x400
FPDF_PRINTING = 0x800
TextNode = Struct.new(:content, :x, :y, :w, :h, keyword_init: true)
LineNode = Struct.new(:x, :y, :w, :h, :tilt, keyword_init: true)
# rubocop:disable Naming/ClassAndModuleCamelCase
class FPDF_LIBRARY_CONFIG < FFI::Struct
layout :version, :int,
@ -77,6 +82,37 @@ class Pdfium
attach_function :FPDFText_ClosePage, [:FPDF_TEXTPAGE], :void
attach_function :FPDFText_CountChars, [:FPDF_TEXTPAGE], :int
attach_function :FPDFText_GetText, %i[FPDF_TEXTPAGE int int pointer], :int
attach_function :FPDFText_GetUnicode, %i[FPDF_TEXTPAGE int], :uint
attach_function :FPDFText_GetCharBox, %i[FPDF_TEXTPAGE int pointer pointer pointer pointer], :int
attach_function :FPDFText_GetCharOrigin, %i[FPDF_TEXTPAGE int pointer pointer], :int
attach_function :FPDFText_GetCharIndexAtPos, %i[FPDF_TEXTPAGE double double double double], :int
attach_function :FPDFText_CountRects, %i[FPDF_TEXTPAGE int int], :int
attach_function :FPDFText_GetRect, %i[FPDF_TEXTPAGE int pointer pointer pointer pointer], :int
attach_function :FPDFText_GetFontSize, %i[FPDF_TEXTPAGE int], :double
# Page object functions for extracting paths/lines
attach_function :FPDFPage_CountObjects, [:FPDF_PAGE], :int
attach_function :FPDFPage_GetObject, %i[FPDF_PAGE int], :FPDF_PAGEOBJECT
attach_function :FPDFPageObj_GetType, [:FPDF_PAGEOBJECT], :int
attach_function :FPDFPageObj_GetBounds, %i[FPDF_PAGEOBJECT pointer pointer pointer pointer], :int
attach_function :FPDFPath_CountSegments, [:FPDF_PAGEOBJECT], :int
attach_function :FPDFPath_GetPathSegment, %i[FPDF_PAGEOBJECT int], :FPDF_PATHSEGMENT
attach_function :FPDFPathSegment_GetType, [:FPDF_PATHSEGMENT], :int
attach_function :FPDFPathSegment_GetPoint, %i[FPDF_PATHSEGMENT pointer pointer], :int
# Page object types
FPDF_PAGEOBJ_UNKNOWN = 0
FPDF_PAGEOBJ_TEXT = 1
FPDF_PAGEOBJ_PATH = 2
FPDF_PAGEOBJ_IMAGE = 3
FPDF_PAGEOBJ_SHADING = 4
FPDF_PAGEOBJ_FORM = 5
# Path segment types
FPDF_SEGMENT_UNKNOWN = -1
FPDF_SEGMENT_LINETO = 0
FPDF_SEGMENT_BEZIERTO = 1
FPDF_SEGMENT_MOVETO = 2
typedef :int, :FPDF_BOOL
typedef :pointer, :IPDF_JSPLATFORM
@ -157,6 +193,7 @@ class Pdfium
raise PdfiumError, "#{context_message}: #{error_message(error_code)} (Code: #{error_code})"
end
# rubocop:disable Metrics
class Document
attr_reader :document_ptr, :form_handle
@ -386,6 +423,128 @@ class Pdfium
Pdfium.FPDFText_ClosePage(text_page) if text_page && !text_page.null?
end
def text_nodes
return @text_nodes if @text_nodes
text_page = Pdfium.FPDFText_LoadPage(page_ptr)
char_count = Pdfium.FPDFText_CountChars(text_page)
@text_nodes = []
return @text_nodes if char_count.zero?
char_count.times do |i|
unicode = Pdfium.FPDFText_GetUnicode(text_page, i)
char = [unicode].pack('U*')
left_ptr = FFI::MemoryPointer.new(:double)
right_ptr = FFI::MemoryPointer.new(:double)
bottom_ptr = FFI::MemoryPointer.new(:double)
top_ptr = FFI::MemoryPointer.new(:double)
result = Pdfium.FPDFText_GetCharBox(text_page, i, left_ptr, right_ptr, bottom_ptr, top_ptr)
next if result.zero?
left = left_ptr.read_double
right = right_ptr.read_double
origin_x_ptr = FFI::MemoryPointer.new(:double)
origin_y_ptr = FFI::MemoryPointer.new(:double)
Pdfium.FPDFText_GetCharOrigin(text_page, i, origin_x_ptr, origin_y_ptr)
origin_y = origin_y_ptr.read_double
font_size = Pdfium.FPDFText_GetFontSize(text_page, i)
font_size = 8 if font_size == 1
abs_x = left
abs_y = height - origin_y - (font_size * 0.8)
abs_width = right - left
abs_height = font_size
x = abs_x / width
y = abs_y / height
node_width = abs_width / width
node_height = abs_height / height
@text_nodes << TextNode.new(content: char, x: x, y: y, w: node_width, h: node_height)
end
@text_nodes = @text_nodes.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y }
ensure
Pdfium.FPDFText_ClosePage(text_page) if text_page && !text_page.null?
end
def line_nodes
return @line_nodes if @line_nodes
ensure_not_closed!
@line_nodes = []
object_count = Pdfium.FPDFPage_CountObjects(page_ptr)
return @line_nodes if object_count.zero?
object_count.times do |i|
page_object = Pdfium.FPDFPage_GetObject(page_ptr, i)
next if page_object.null?
obj_type = Pdfium.FPDFPageObj_GetType(page_object)
next unless obj_type == Pdfium::FPDF_PAGEOBJ_PATH
left_ptr = FFI::MemoryPointer.new(:float)
bottom_ptr = FFI::MemoryPointer.new(:float)
right_ptr = FFI::MemoryPointer.new(:float)
top_ptr = FFI::MemoryPointer.new(:float)
Pdfium.FPDFPageObj_GetBounds(page_object, left_ptr, bottom_ptr, right_ptr, top_ptr)
obj_left = left_ptr.read_float
obj_bottom = bottom_ptr.read_float
obj_right = right_ptr.read_float
obj_top = top_ptr.read_float
obj_width = obj_right - obj_left
obj_height = obj_top - obj_bottom
next if obj_width < 1 && obj_height < 1
segment_count = Pdfium.FPDFPath_CountSegments(page_object)
next if segment_count < 2
next unless segment_count <= 10 && (obj_height < 10 || obj_width < 10)
if obj_width > obj_height && obj_height < 10
tilt = 0
elsif obj_height > obj_width && obj_width < 10
tilt = 90
else
next
end
x = obj_left
y = obj_bottom
w = obj_width
h = obj_height
norm_x = x / width
norm_y = (height - y - h) / height
norm_w = w / width
norm_h = h / height
@line_nodes << LineNode.new(x: norm_x, y: norm_y, w: norm_w, h: norm_h, tilt: tilt)
end
@line_nodes = @line_nodes.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y }
end
def close
return if closed?
@ -445,4 +604,5 @@ class Pdfium
at_exit do
cleanup_library
end
# rubocop:enable Metrics
end

@ -0,0 +1,264 @@
# frozen_string_literal: true
module Templates
module DetectFields
module_function
TextFieldBox = Struct.new(:x, :y, :w, :h, keyword_init: true)
# rubocop:disable Metrics
def call(io, attachment: nil, confidence: 0.3, temperature: 1,
nms: 0.1, split_page: false, aspect_ratio: true, padding: 20, &)
if attachment&.image?
process_image_attachment(io, attachment:, confidence:, nms:, split_page:,
temperature:, aspect_ratio:, padding:, &)
else
process_pdf_attachment(io, attachment:, confidence:, nms:, split_page:,
temperature:, aspect_ratio:, padding:, &)
end
end
def process_image_attachment(io, attachment:, confidence:, nms:, temperature: 1,
split_page: false, aspect_ratio: false, padding: nil)
image = Vips::Image.new_from_buffer(io.read, '')
fields = Templates::ImageToFields.call(image, confidence:, nms:, split_page:,
temperature:, aspect_ratio:, padding:)
fields = fields.map do |f|
{
uuid: SecureRandom.uuid,
type: f.type,
required: true,
preferences: {},
areas: [{
x: f.x,
y: f.y,
w: f.w,
h: f.h,
page: 0,
attachment_uuid: attachment&.uuid
}]
}
end
yield [attachment&.uuid, 0, fields] if block_given?
fields
end
def process_pdf_attachment(io, attachment:, confidence:, nms:, temperature: 1,
split_page: false, aspect_ratio: false, padding: nil)
doc = Pdfium::Document.open_bytes(io.read)
doc.page_count.times.flat_map do |page_number|
page = doc.get_page(page_number)
data, width, height = page.render_to_bitmap(width: ImageToFields::RESOLUTION * 1.5)
image = Vips::Image.new_from_memory(data, width, height, 4, :uchar)
fields = Templates::ImageToFields.call(image, confidence: 0.05, nms:, split_page:,
temperature:, aspect_ratio:, padding:)
text_fields = extract_text_fields_from_page(page)
line_fields = extract_line_fields_from_page(page)
fields = increase_confidence_for_overlapping_fields(fields, text_fields)
fields = increase_confidence_for_overlapping_fields(fields, line_fields)
fields = fields.filter_map do |f|
next if f.confidence < confidence
{
uuid: SecureRandom.uuid,
type: f.type,
required: true,
preferences: {},
areas: [{
x: f.x, y: f.y,
w: f.w, h: f.h,
page: page_number,
attachment_uuid: attachment&.uuid
}]
}
end
yield [attachment&.uuid, page_number, fields] if block_given?
fields
end
end
def extract_line_fields_from_page(page)
line_thickness = 5.0 / page.height
vertical_lines, all_horizontal_lines = page.line_nodes.partition { |line| line.tilt == 90 }
horizontal_lines = all_horizontal_lines.reject do |h_line|
next true if h_line.w > 0.7 && (h_line.h < 0.1 || h_line.h < 0.9)
next false if vertical_lines.blank?
h_x_min = h_line.x
h_x_max = h_line.x + h_line.w
h_y_avg = h_line.y + (h_line.h / 2)
vertical_lines.any? do |v_line|
v_x_avg = v_line.x + (v_line.w / 2)
v_y_min = v_line.y
v_y_max = v_line.y + v_line.h
h_x_min_expanded = h_x_min - line_thickness
h_x_max_expanded = h_x_max + line_thickness
h_y_min_expanded = h_y_avg - line_thickness
h_y_max_expanded = h_y_avg + line_thickness
v_x_min_expanded = v_x_avg - line_thickness
v_x_max_expanded = v_x_avg + line_thickness
v_y_min_expanded = v_y_min - line_thickness
v_y_max_expanded = v_y_max + line_thickness
x_overlap = v_x_min_expanded <= h_x_max_expanded && v_x_max_expanded >= h_x_min_expanded
y_overlap = h_y_min_expanded <= v_y_max_expanded && h_y_max_expanded >= v_y_min_expanded
x_overlap && y_overlap
end
end
node_index = 0
horizontal_lines = horizontal_lines.reject do |line|
nodes = []
loop do
node = page.text_nodes[node_index += 1]
break unless node
break if node.y > line.y
next if node.x + node.w < line.x || line.x + line.w < node.x ||
node.y + node.h < line.y - node.h || line.y < node.y
nodes << node
next if nodes.blank?
next_node = page.text_nodes[node_index + 1]
break if next_node.x + next_node.w < line.x || line.x + line.w < next_node.x ||
next_node.y + next_node.h < line.y - next_node.h || line.y < next_node.y
end
next if nodes.blank?
width = nodes.last.x + nodes.last.w - nodes.first.x
next true if width > line.w / 2.0
end
horizontal_lines.each do |line|
line.h += 4 * line_thickness
line.y -= 4 * line_thickness
end
end
def extract_text_fields_from_page(page)
text_nodes = page.text_nodes
field_boxes = []
i = 0
while i < text_nodes.length
node = text_nodes[i]
next i += 1 if node.content != '_'
x1 = node.x
y1 = node.y
x2 = node.x + node.w
y2 = node.y + node.h
underscore_count = 1
j = i + 1
while j < text_nodes.length
next_node = text_nodes[j]
break unless next_node.content == '_'
distance = next_node.x - x2
height_diff = (next_node.y - y1).abs
break if distance > 0.02 || height_diff > node.h * 0.5
underscore_count += 1
next_x2 = next_node.x + next_node.w
next_y2 = next_node.y + next_node.h
x2 = next_x2
y2 = [y2, next_y2].max
y1 = [y1, next_node.y].min
j += 1
end
field_boxes << TextFieldBox.new(x: x1, y: y1, w: x2 - x1, h: y2 - y1) if underscore_count >= 2
i = j
end
field_boxes
end
def calculate_iou(box1, box2)
x1 = [box1.x, box2.x].max
y1 = [box1.y, box2.y].max
x2 = [box1.x + box1.w, box2.x + box2.w].min
y2 = [box1.y + box1.h, box2.y + box2.h].min
intersection_width = [0, x2 - x1].max
intersection_height = [0, y2 - y1].max
intersection_area = intersection_width * intersection_height
return 0.0 if intersection_area.zero?
box1_area = box1.w * box1.h
box2_area = box2.w * box2.h
union_area = box1_area + box2_area - intersection_area
intersection_area / union_area
end
def boxes_overlap?(box1, box2)
!(box1.x + box1.w < box2.x || box2.x + box2.w < box1.x ||
box1.y + box1.h < box2.y || box2.y + box2.h < box1.y)
end
def increase_confidence_for_overlapping_fields(image_fields, text_fields, by: 1.0)
return image_fields if text_fields.blank?
image_fields.map do |image_field|
next if image_field.type != 'text'
field_bottom = image_field.y + image_field.h
text_fields.each do |text_field|
break if text_field.y > field_bottom
next if text_field.y + text_field.h < image_field.y
next unless boxes_overlap?(image_field, text_field) && calculate_iou(image_field, text_field) > 0.5
break image_field.confidence += by
end
end
image_fields
end
# rubocop:enable Metrics
end
end

@ -0,0 +1,331 @@
# frozen_string_literal: true
module Templates
module ImageToFields
module_function
Field = Struct.new(:type, :x, :y, :w, :h, :confidence, keyword_init: true)
MODEL_PATH = Rails.root.join('tmp/model_704_int8.onnx')
RESOLUTION = 704
ID_TO_CLASS = %w[text checkbox].freeze
MEAN = [0.485, 0.456, 0.406].freeze
STD = [0.229, 0.224, 0.225].freeze
CPU_THREADS = Etc.nprocessors
# rubocop:disable Metrics
def call(image, confidence: 0.3, nms: 0.1, temperature: 1,
split_page: false, aspect_ratio: true, padding: nil)
base_image = image.extract_band(0, n: 3)
trimmed_base, base_offset_x, base_offset_y = trim_image_with_padding(base_image, padding)
if split_page && image.height > image.width
half_h = trimmed_base.height / 2
top_h = half_h
bottom_h = trimmed_base.height - half_h
regions = [
{ img: trimmed_base.crop(0, 0, trimmed_base.width, top_h), offset_y: 0 },
{ img: trimmed_base.crop(0, top_h, trimmed_base.width, bottom_h), offset_y: top_h }
]
detections = { xyxy: Numo::SFloat[], confidence: Numo::SFloat[], class_id: Numo::Int32[] }
detections = regions.reduce(detections) do |acc, r|
next detections if r[:img].height <= 0 || r[:img].width <= 0
input_tensor, transform_info = preprocess_image(r[:img], RESOLUTION, aspect_ratio:)
transform_info[:trim_offset_x] = base_offset_x
transform_info[:trim_offset_y] = base_offset_y + r[:offset_y]
outputs = model.predict({ 'input' => input_tensor })
postprocess_outputs(outputs, transform_info, acc, confidence:, temperature:)
end
else
input_tensor, transform_info = preprocess_image(trimmed_base, RESOLUTION, aspect_ratio:)
transform_info[:trim_offset_x] = base_offset_x
transform_info[:trim_offset_y] = base_offset_y
outputs = model.predict({ 'input' => input_tensor })
detections = postprocess_outputs(outputs, transform_info, confidence:, temperature:)
end
detections = apply_nms(detections, nms)
fields = Array.new(detections[:xyxy].shape[0]) do |i|
x1 = detections[:xyxy][i, 0]
y1 = detections[:xyxy][i, 1]
x2 = detections[:xyxy][i, 2]
y2 = detections[:xyxy][i, 3]
class_id = detections[:class_id][i].to_i
confidence = detections[:confidence][i]
x0_norm = x1 / image.width.to_f
y0_norm = y1 / image.height.to_f
x1_norm = x2 / image.width.to_f
y1_norm = y2 / image.height.to_f
type_name = ID_TO_CLASS[class_id]
Field.new(
type: type_name,
x: x0_norm,
y: y0_norm,
w: (x1_norm - x0_norm),
h: (y1_norm - y0_norm),
confidence:
)
end
sort_fields(fields, y_threshold: 10.0 / image.height)
end
def trim_image_with_padding(image, padding = 0)
return [image, 0, 0] if padding.nil?
left, top, trim_width, trim_height = image.find_trim(threshold: 10, background: [255, 255, 255])
padded_left = [left - padding, 0].max
padded_top = [top - padding, 0].max
padded_right = [left + trim_width + padding, image.width].min
padded_bottom = [top + trim_height + padding, image.height].min
width = padded_right - padded_left
height = padded_bottom - padded_top
trimmed_image = image.crop(padded_left, padded_top, width, height)
[trimmed_image, padded_left, padded_top]
end
def preprocess_image(image, resolution, aspect_ratio: false)
scale_x = resolution.to_f / image.width
scale_y = resolution.to_f / image.height
if aspect_ratio
scale = [scale_x, scale_y].min
new_width = (image.width * scale).round
new_height = (image.height * scale).round
resized = image.resize(scale, vscale: scale, kernel: :lanczos3)
pad_x = ((resolution - new_width) / 2.0).round
pad_y = ((resolution - new_height) / 2.0).round
image = resized.embed(pad_x, pad_y, resolution, resolution, background: [255, 255, 255])
transform_info = { scale_x: scale, scale_y: scale, pad_x: pad_x, pad_y: pad_y }
else
image = image.resize(scale_x, vscale: scale_y, kernel: :lanczos3)
transform_info = { scale_x: scale_x, scale_y: scale_y, pad_x: 0, pad_y: 0 }
end
image /= 255.0
image = (image - MEAN) / STD
pixel_data = image.write_to_memory
img_array = Numo::SFloat.from_binary(pixel_data, [resolution, resolution, 3])
img_array = img_array.transpose(2, 0, 1)
[img_array.reshape(1, 3, resolution, resolution), transform_info]
end
def nms(boxes, scores, iou_threshold = 0.5)
return Numo::Int32[] if boxes.shape[0].zero?
x1 = boxes[true, 0]
y1 = boxes[true, 1]
x2 = boxes[true, 2]
y2 = boxes[true, 3]
areas = (x2 - x1) * (y2 - y1)
order = scores.sort_index.reverse
keep = []
while order.size.positive?
i = order[0]
keep << i
break if order.size == 1
xx1 = Numo::SFloat.maximum(x1[i], x1[order[1..]])
yy1 = Numo::SFloat.maximum(y1[i], y1[order[1..]])
xx2 = Numo::SFloat.minimum(x2[i], x2[order[1..]])
yy2 = Numo::SFloat.minimum(y2[i], y2[order[1..]])
w = Numo::SFloat.maximum(0.0, xx2 - xx1)
h = Numo::SFloat.maximum(0.0, yy2 - yy1)
intersection = w * h
iou = intersection / (areas[i] + areas[order[1..]] - intersection)
inds = iou.le(iou_threshold).where
order = order[inds + 1]
end
Numo::Int32.cast(keep)
end
def postprocess_outputs(outputs, transform_info, detections = nil, confidence: 0.3, temperature: 1)
boxes = Numo::SFloat.cast(outputs['dets'])
logits = Numo::SFloat.cast(outputs['labels'])
boxes = boxes[0, true, true] # [300, 4]
logits = logits[0, true, true] # [300, num_classes]
scaled_logits = logits / temperature
probs = 1.0 / (1.0 + Numo::NMath.exp(-scaled_logits))
scores = probs.max(axis: 1)
labels = probs.argmax(axis: 1)
cx = boxes[true, 0]
cy = boxes[true, 1]
w = boxes[true, 2]
h = boxes[true, 3]
x1 = cx - (w / 2.0)
y1 = cy - (h / 2.0)
x2 = cx + (w / 2.0)
y2 = cy + (h / 2.0)
boxes_xyxy = Numo::SFloat.zeros(boxes.shape[0], 4)
boxes_xyxy[true, 0] = x1
boxes_xyxy[true, 1] = y1
boxes_xyxy[true, 2] = x2
boxes_xyxy[true, 3] = y2
boxes_xyxy *= RESOLUTION
pad_x = transform_info[:pad_x]
pad_y = transform_info[:pad_y]
boxes_xyxy[true, 0] -= pad_x
boxes_xyxy[true, 1] -= pad_y
boxes_xyxy[true, 2] -= pad_x
boxes_xyxy[true, 3] -= pad_y
scale_x = transform_info[:scale_x]
scale_y = transform_info[:scale_y]
boxes_xyxy[true, 0] /= scale_x
boxes_xyxy[true, 1] /= scale_y
boxes_xyxy[true, 2] /= scale_x
boxes_xyxy[true, 3] /= scale_y
trim_offset_x = transform_info[:trim_offset_x]
trim_offset_y = transform_info[:trim_offset_y]
boxes_xyxy[true, 0] += trim_offset_x
boxes_xyxy[true, 1] += trim_offset_y
boxes_xyxy[true, 2] += trim_offset_x
boxes_xyxy[true, 3] += trim_offset_y
keep_mask = scores.gt(confidence)
keep_indices = keep_mask.where
if keep_indices.empty?
detections || {
xyxy: Numo::SFloat[],
confidence: Numo::SFloat[],
class_id: Numo::Int32[]
}
else
scores = scores[keep_indices]
labels = labels[keep_indices]
boxes_xyxy = boxes_xyxy[keep_indices, true]
if detections
existing_n = detections[:xyxy].shape[0]
new_n = boxes_xyxy.shape[0]
total = existing_n + new_n
xyxy = Numo::SFloat.zeros(total, 4)
conf = Numo::SFloat.zeros(total)
cls = Numo::Int32.zeros(total)
if existing_n.positive?
xyxy[0...existing_n, true] = detections[:xyxy]
conf[0...existing_n] = detections[:confidence]
cls[0...existing_n] = detections[:class_id]
end
xyxy[existing_n...(existing_n + new_n), true] = boxes_xyxy
conf[existing_n...(existing_n + new_n)] = scores
cls[existing_n...(existing_n + new_n)] = Numo::Int32.cast(labels)
{ xyxy: xyxy, confidence: conf, class_id: cls }
else
{
xyxy: boxes_xyxy,
confidence: scores,
class_id: Numo::Int32.cast(labels)
}
end
end
end
def sort_fields(fields, y_threshold: 0.01)
sorted_fields = fields.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y }
lines = []
current_line = []
sorted_fields.each do |field|
if current_line.blank? || (field.y - current_line.first.y).abs < y_threshold
current_line << field
else
lines << current_line.sort_by(&:x)
current_line = [field]
end
end
lines << current_line.sort_by(&:x) if current_line.present?
lines.flatten
end
def apply_nms(detections, threshold = 0.5)
return detections if detections[:xyxy].shape[0].zero?
keep_indices = nms(detections[:xyxy], detections[:confidence], threshold)
{
xyxy: detections[:xyxy][keep_indices, true],
confidence: detections[:confidence][keep_indices],
class_id: detections[:class_id][keep_indices]
}
end
def model
@model ||= OnnxRuntime::Model.new(
MODEL_PATH.to_s,
inter_op_num_threads: CPU_THREADS,
intra_op_num_threads: CPU_THREADS,
enable_mem_pattern: false,
enable_cpu_mem_arena: false,
providers: ['CPUExecutionProvider']
)
end
# rubocop:enable Metrics
end
end
Loading…
Cancel
Save