Merge from docusealco/wip

pull/402/head 2.2.1
Alex Turchyn 1 month ago committed by GitHub
commit f87ef670d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -3,6 +3,12 @@
class TemplateDocumentsController < ApplicationController
load_and_authorize_resource :template
FILES_TTL = 5.minutes
def index
render json: @template.schema_documents.map { |d| ActiveStorage::Blob.proxy_url(d.blob, expires_at: FILES_TTL.from_now.to_i) }
end
def create
if params[:blobs].blank? && params[:files].blank?
return render json: { error: I18n.t('file_is_missing') }, status: :unprocessable_content

@ -17,7 +17,7 @@ class TemplatesDebugController < ApplicationController
fields = Templates::FindAcroFields.call(pdf, attachment, data)
end
fields = Templates::DetectFields.call(StringIO.new(data), attachment:) if fields.blank?
fields, = Templates::DetectFields.call(StringIO.new(data), attachment:) if fields.blank?
attachment.metadata['pdf'] ||= {}
attachment.metadata['pdf']['fields'] = fields

@ -167,6 +167,7 @@ safeRegisterElement('template-builder', class extends HTMLElement {
withConditions: this.dataset.withConditions === 'true',
withGoogleDrive: this.dataset.withGoogleDrive === 'true',
withReplaceAndCloneUpload: true,
withDownload: true,
currencies: (this.dataset.currencies || '').split(',').filter(Boolean),
acceptFileTypes: this.dataset.acceptFileTypes,
showTourStartForm: this.dataset.showTourStartForm === 'true'

@ -175,7 +175,10 @@
{{ t('save') }}
</span>
</button>
<div class="dropdown dropdown-end">
<div
class="dropdown dropdown-end"
:class="{ 'dropdown-open': isDownloading }"
>
<label
tabindex="0"
class="base-button !rounded-l-none !pl-1 !pr-2 !border-l-neutral-500"
@ -209,6 +212,30 @@
<span class="whitespace-nowrap">{{ t('preferences') }}</span>
</a>
</li>
<li v-if="withDownload">
<button
class="flex space-x-2"
:disabled="isDownloading"
@click.stop.prevent="download"
>
<IconInnerShadowTop
v-if="isDownloading"
class="animate-spin w-6 h-6 flex-shrink-0"
/>
<IconDownload
v-else
class="w-6 h-6 flex-shrink-0"
/>
<span
v-if="isDownloading"
class="whitespace-nowrap"
>{{ t('downloading_') }}</span>
<span
v-else
class="whitespace-nowrap"
>{{ t('download') }}</span>
</button>
</li>
</ul>
</div>
</span>
@ -457,6 +484,7 @@
:show-tour-start-form="showTourStartForm"
@add-field="addField"
@set-draw="[drawField = $event.field, drawOption = $event.option]"
@select-submitter="selectedSubmitter = $event"
@set-draw-type="[drawFieldType = $event, showDrawField = true]"
@set-drag="dragField = $event"
@set-drag-placeholder="$refs.dragPlaceholder.dragPlaceholder = $event"
@ -511,7 +539,7 @@ import DocumentPreview from './preview'
import DocumentControls from './controls'
import MobileFields from './mobile_fields'
import FieldSubmitter from './field_submitter'
import { IconPlus, IconUsersPlus, IconDeviceFloppy, IconChevronDown, IconEye, IconWritingSign, IconInnerShadowTop, IconInfoCircle, IconAdjustments } from '@tabler/icons-vue'
import { IconPlus, IconUsersPlus, IconDeviceFloppy, IconChevronDown, IconEye, IconWritingSign, IconInnerShadowTop, IconInfoCircle, IconAdjustments, IconDownload } from '@tabler/icons-vue'
import { v4 } from 'uuid'
import { ref, computed, toRaw } from 'vue'
import * as i18n from './i18n'
@ -537,6 +565,7 @@ export default {
Contenteditable,
IconUsersPlus,
IconChevronDown,
IconDownload,
IconAdjustments,
IconEye,
IconDeviceFloppy
@ -584,6 +613,11 @@ export default {
required: false,
default: null
},
withDownload: {
type: Boolean,
required: false,
default: false
},
backgroundColor: {
type: String,
required: false,
@ -805,6 +839,7 @@ export default {
return {
documentRefs: [],
isBreakpointLg: false,
isDownloading: false,
isLoadingBlankPage: false,
isSaving: false,
selectedSubmitter: null,
@ -963,6 +998,75 @@ export default {
},
methods: {
toRaw,
download () {
this.isDownloading = true
this.baseFetch(`/templates/${this.template.id}/documents`).then(async (response) => {
if (response.ok) {
const urls = await response.json()
const isMobileSafariIos = 'ontouchstart' in window && navigator.maxTouchPoints > 0 && /AppleWebKit/i.test(navigator.userAgent)
const isSafariIos = isMobileSafariIos || /iPhone|iPad|iPod/i.test(navigator.userAgent)
if (isSafariIos && urls.length > 1) {
this.downloadSafariIos(urls)
} else {
this.downloadUrls(urls)
}
} else {
alert(this.t('failed_to_download_files'))
}
})
},
downloadUrls (urls) {
const fileRequests = urls.map((url) => {
return () => {
return fetch(url).then(async (resp) => {
const blobUrl = URL.createObjectURL(await resp.blob())
const link = document.createElement('a')
link.href = blobUrl
link.setAttribute('download', decodeURI(url.split('/').pop()))
link.click()
URL.revokeObjectURL(blobUrl)
})
}
})
fileRequests.reduce(
(prevPromise, request) => prevPromise.then(() => request()),
Promise.resolve()
).finally(() => {
this.isDownloading = false
})
},
downloadSafariIos (urls) {
const fileRequests = urls.map((url) => {
return fetch(url).then(async (resp) => {
const blob = await resp.blob()
const blobUrl = URL.createObjectURL(blob.slice(0, blob.size, 'application/octet-stream'))
const link = document.createElement('a')
link.href = blobUrl
link.setAttribute('download', decodeURI(url.split('/').pop()))
return link
})
})
Promise.all(fileRequests).then((links) => {
links.forEach((link, index) => {
setTimeout(() => {
link.click()
URL.revokeObjectURL(link.href)
}, index * 50)
})
}).finally(() => {
this.isDownloading = false
})
},
onDragover (e) {
if (this.$refs.dragPlaceholder?.dragPlaceholder) {
this.$refs.dragPlaceholder.isMask = e.target.id === 'mask'

@ -222,7 +222,16 @@
width="22"
class="animate-spin"
/>
<span class="hidden md:inline">
<span
v-if="analyzingProgress"
class="hidden md:inline"
>
{{ Math.round(analyzingProgress * 100) }}% {{ t('analyzing_') }}
</span>
<span
v-else
class="hidden md:inline"
>
{{ fieldPagesLoaded }} / {{ numberOfPages }} {{ t('processing_') }}
</span>
</template>
@ -363,10 +372,11 @@ export default {
default: false
}
},
emits: ['add-field', 'set-draw', 'set-draw-type', 'set-drag', 'drag-end', 'scroll-to-area', 'change-submitter', 'set-drag-placeholder'],
emits: ['add-field', 'set-draw', 'set-draw-type', 'set-drag', 'drag-end', 'scroll-to-area', 'change-submitter', 'set-drag-placeholder', 'select-submitter'],
data () {
return {
fieldPagesLoaded: null,
analyzingProgress: 0,
defaultFieldsSearch: ''
}
},
@ -448,8 +458,6 @@ export default {
while (true) {
const { value, done } = await reader.read()
if (done) break
buffer += decoder.decode(value, { stream: true })
const lines = buffer.split('\n\n')
@ -464,10 +472,21 @@ export default {
if (data.error) {
alert(data.error)
this.template.fields = data.fields || fields
break
} else if (data.analyzing) {
this.analyzingProgress = data.progress
} else if (data.completed) {
this.fieldPagesLoaded = null
this.template.fields = fields
if (data.submitters) {
this.template.submitters = data.submitters
this.$emit('select-submitter', this.template.submitters[0])
}
this.template.fields = data.fields || fields
this.save()
break
@ -484,11 +503,14 @@ export default {
}
}
}
if (done) break
}
}).catch(error => {
console.error('Error in streaming message: ', error)
}).finally(() => {
this.fieldPagesLoaded = null
this.analyzingProgress = null
this.isFieldsLoading = false
})
},

@ -1,4 +1,7 @@
const en = {
analyzing_: 'Analyzing...',
download: 'Download',
downloading_: 'Downloading...',
view: 'View',
autodetect_fields: 'Autodetect fields',
payment_link: 'Payment link',
@ -185,6 +188,9 @@ const en = {
}
const es = {
analyzing_: 'Analizando...',
download: 'Descargar',
downloading_: 'Descargando...',
view: 'Vista',
payment_link: 'Enlace de pago',
strikeout: 'Tachar',
@ -370,6 +376,9 @@ const es = {
}
const it = {
analyzing_: 'Analisi...',
download: 'Scarica',
downloading_: 'Download in corso...',
view: 'Vista',
payment_link: 'Link di pagamento',
strikeout: 'Barrato',
@ -555,6 +564,9 @@ const it = {
}
const pt = {
analyzing_: 'Analisando...',
download: 'Baixar',
downloading_: 'Baixando...',
view: 'Visualizar',
payment_link: 'Link de pagamento',
strikeout: 'Tachado',
@ -740,6 +752,9 @@ const pt = {
}
const fr = {
analyzing_: 'Analyse...',
download: 'Télécharger',
downloading_: 'Téléchargement...',
view: 'Voir',
payment_link: 'Lien de paiement',
strikeout: 'Rature',
@ -925,6 +940,9 @@ const fr = {
}
const de = {
analyzing_: 'Analysiere...',
download: 'Download',
downloading_: 'Download...',
view: 'Anzeigen',
payment_link: 'Zahlungslink',
strikeout: 'Durchstreichen',
@ -1110,6 +1128,9 @@ const de = {
}
const nl = {
analyzing_: 'Analyseren...',
download: 'Downloaden',
downloading_: 'Downloaden...',
view: 'Bekijken',
payment_link: 'Betaallink',
strikeout: 'Doorhalen',

@ -81,7 +81,7 @@
<% document = @submission.schema_documents.find { |a| item['attachment_uuid'] == a.uuid } %>
<% if document.preview_images.first %>
<scroll-to data-selector-id="page-<%= document.uuid %>-0" class="block cursor-pointer">
<img src="<%= Docuseal::URL_CACHE.fetch([document.id, document.uuid, 0].join(':'), expires_in: 10.minutes) { document.preview_images.first.url } %>" width="<%= document.preview_images.first.metadata['width'] %>" height="<%= document.preview_images.first.metadata['height'] %>" class="rounded border" loading="lazy">
<img src="<%= (document.preview_images.find { |e| e.filename.base.to_i.zero? } || document.preview_images.first).url %>" width="<%= document.preview_images.first.metadata['width'] %>" height="<%= document.preview_images.first.metadata['height'] %>" class="rounded border" loading="lazy">
<div class="pb-2 pt-1.5 text-center" dir="auto">
<%= item['name'].presence || document.filename.base %>
</div>
@ -103,7 +103,7 @@
<% (document.metadata.dig('pdf', 'number_of_pages') || (document.preview_images.loaded? ? preview_images_index.size : document.preview_images.size)).times do |index| %>
<% page = preview_images_index[index] || page_blob_struct.new(metadata: lazyload_metadata, url: preview_document_page_path(document.signed_uuid, "#{index}.jpg")) %>
<page-container id="<%= "page-#{document.uuid}-#{index}" %>" class="block before:border before:absolute before:top-0 before:bottom-0 before:left-0 before:right-0 before:rounded relative mb-4" style="container-type: size; aspect-ratio: <%= width = page.metadata['width'] %> / <%= height = page.metadata['height'] %>">
<img loading="lazy" src="<%= Docuseal::URL_CACHE.fetch([document.id, document.uuid, index].join(':'), expires_in: 10.minutes) { page.url } %>" width="<%= width %>" class="rounded" height="<%= height %>">
<img loading="lazy" src="<%= page.url %>" width="<%= width %>" class="rounded" height="<%= height %>">
<div class="top-0 bottom-0 left-0 right-0 absolute">
<% document_annots_index[index]&.each do |annot| %>
<%= render 'submissions/annotation', annot: %>

@ -98,11 +98,9 @@ Rails.application.routes.draw do
resources :submissions_filters, only: %i[show], param: 'name'
resources :templates, only: %i[new create edit update show destroy] do
resource :debug, only: %i[show], controller: 'templates_debug' if Rails.env.development?
resources :documents, only: %i[create], controller: 'template_documents'
resources :documents, only: %i[index create], controller: 'template_documents'
resources :clone_and_replace, only: %i[create], controller: 'templates_clone_and_replace'
if !Docuseal.multitenant? || Docuseal.demo?
resources :detect_fields, only: %i[create], controller: 'templates_detect_fields'
end
resources :detect_fields, only: %i[create], controller: 'templates_detect_fields' unless Docuseal.multitenant?
resources :restore, only: %i[create], controller: 'templates_restore'
resources :archived, only: %i[index], controller: 'templates_archived_submissions'
resources :submissions, only: %i[new create]

@ -39,6 +39,7 @@ Puma::Plugin.create do
configs = Sidekiq.configure_embed do |config|
config.logger.level = Logger::INFO
sidekiq_config = YAML.load_file('config/sidekiq.yml')
sidekiq_config['queues'] << 'fields' if ENV['DEMO'] == 'true'
config.queues = sidekiq_config['queues']
config.concurrency = ENV.fetch('SIDEKIQ_THREADS', 5).to_i
config.merge!(sidekiq_config)

@ -5,17 +5,63 @@ module Templates
module_function
TextFieldBox = Struct.new(:x, :y, :w, :h, keyword_init: true)
# rubocop:disable Metrics
PageNode = Struct.new(:prev, :next, :elem, :page, :attachment_uuid, keyword_init: true)
DATE_REGEXP = /
(?:
date
| signed\sat
| datum
)
\s*[:-]?\s*\z
/ix
NUMBER_REGEXP = /
(?:
price
| \$
|
| total
| quantity
| prix
| quantité
| preis
| summe
| gesamt(?:betrag)?
| menge
| anzahl
| stückzahl
)
\s*[:-]?\s*\z
/ix
SIGNATURE_REGEXP = /
(?:
signature
| sign\shere
| sign
| signez\sici
| signer\sici
| unterschrift
| unterschreiben
| unterzeichnen
)
\s*[:-]?\s*\z
/ix
# rubocop:disable Metrics, Style
def call(io, attachment: nil, confidence: 0.3, temperature: 1, inference: Templates::ImageToFields,
nms: 0.1, split_page: false, aspect_ratio: true, padding: 20, &)
nms: 0.1, split_page: false, aspect_ratio: true, padding: 20, regexp_type: true, &)
fields, head_node =
if attachment&.image?
process_image_attachment(io, attachment:, confidence:, nms:, split_page:, inference:,
temperature:, aspect_ratio:, padding:, &)
else
process_pdf_attachment(io, attachment:, confidence:, nms:, split_page:, inference:,
temperature:, aspect_ratio:, padding:, &)
temperature:, aspect_ratio:, regexp_type:, padding:, &)
end
[fields, head_node]
end
def process_image_attachment(io, attachment:, confidence:, nms:, temperature:, inference:,
@ -29,7 +75,7 @@ module Templates
{
uuid: SecureRandom.uuid,
type: f.type,
required: true,
required: f.type != 'checkbox',
preferences: {},
areas: [{
x: f.x,
@ -44,21 +90,24 @@ module Templates
yield [attachment&.uuid, 0, fields] if block_given?
fields
[fields, nil]
end
def process_pdf_attachment(io, attachment:, confidence:, nms:, temperature:, inference:,
split_page: false, aspect_ratio: false, padding: nil)
split_page: false, aspect_ratio: false, padding: nil, regexp_type: false)
doc = Pdfium::Document.open_bytes(io.read)
doc.page_count.times.flat_map do |page_number|
head_node = PageNode.new(elem: ''.b, page: 0, attachment_uuid: attachment&.uuid)
tail_node = head_node
fields = doc.page_count.times.flat_map do |page_number|
page = doc.get_page(page_number)
data, width, height = page.render_to_bitmap(width: inference::RESOLUTION * 1.5)
image = Vips::Image.new_from_memory(data, width, height, 4, :uchar)
fields = inference.call(image, confidence: 0.05, nms:, split_page:,
fields = inference.call(image, confidence: confidence / 4.0, nms:, split_page:,
temperature:, aspect_ratio:, padding:)
text_fields = extract_text_fields_from_page(page)
@ -67,17 +116,23 @@ module Templates
fields = increase_confidence_for_overlapping_fields(fields, text_fields)
fields = increase_confidence_for_overlapping_fields(fields, line_fields)
fields = fields.filter_map do |f|
next if f.confidence < confidence
fields = fields.reject { |f| f.confidence < confidence }
field_nodes, tail_node = build_page_nodes(page, fields, tail_node, attachment_uuid: attachment&.uuid)
fields = field_nodes.map do |node|
field = node.elem
type = regexp_type ? type_from_page_node(node) : field.type
{
uuid: SecureRandom.uuid,
type: f.type,
required: true,
type:,
required: type != 'checkbox',
preferences: {},
areas: [{
x: f.x, y: f.y,
w: f.w, h: f.h,
x: field.x, y: field.y,
w: field.w, h: field.h,
page: page_number,
attachment_uuid: attachment&.uuid
}]
@ -90,10 +145,170 @@ module Templates
ensure
page.close
end
print_debug(head_node) if Rails.env.development?
[fields, head_node]
ensure
doc.close
end
def print_debug(head_node)
current_node = head_node
index = 0
string = ''.b
loop do
string <<
if current_node.elem.is_a?(String)
current_node.elem
else
"[#{current_node.elem.type == 'checkbox' ? 'Checkbox' : 'Field'}_#{index += 1}]"
end
current_node = current_node.next
break unless current_node
end
Rails.logger.info(string)
end
def type_from_page_node(node)
return node.elem.type unless node.prev.elem.is_a?(String)
return node.elem.type unless node.elem.type == 'text'
string = node.prev.elem
return 'date' if string.match?(DATE_REGEXP)
return 'signature' if string.match?(SIGNATURE_REGEXP)
return 'number' if string.match?(NUMBER_REGEXP)
return 'text'
end
def build_page_nodes(page, fields, tail_node, attachment_uuid: nil)
field_nodes = []
current_text = ''.b
text_nodes = page.text_nodes
text_idx = 0
field_idx = 0
while text_idx < text_nodes.length || field_idx < fields.length
text_node = text_nodes[text_idx]
field = fields[field_idx]
process_text_node = false
process_field_node = false
if text_node && field
text_y_center = text_node.y + (text_node.h / 2.0)
field_y_center = field.y + (field.h / 2.0)
y_threshold = text_node.h / 2.0
vertical_distance = (text_y_center - field_y_center).abs
if vertical_distance < y_threshold
is_underscore = text_node.content == '_'
is_left_of_field = text_node.x < field.x
if is_underscore && is_left_of_field
text_x_end = text_node.x + text_node.w
distance = field.x - text_x_end
proximity_threshold = text_node.w * 3.0
if distance < proximity_threshold
process_field_node = true
else
process_text_node = true
end
elsif is_left_of_field
process_text_node = true
else
process_field_node = true
end
elsif text_node.y < field.y
process_text_node = true
else
process_field_node = true
end
elsif text_node
process_text_node = true
elsif field
process_field_node = true
end
if process_field_node
unless current_text.empty?
new_text_node = PageNode.new(prev: tail_node, elem: current_text, page: page.page_index, attachment_uuid:)
tail_node.next = new_text_node
tail_node = new_text_node
current_text = ''.b
end
new_field_node = PageNode.new(prev: tail_node, elem: field, page: page.page_index, attachment_uuid:)
tail_node.next = new_field_node
tail_node = new_field_node
field_nodes << tail_node
while text_idx < text_nodes.length
text_node_to_check = text_nodes[text_idx]
is_part_of_field = false
if text_node_to_check.content == '_'
check_y_center = text_node_to_check.y + (text_node_to_check.h / 2.0)
check_y_dist = (check_y_center - field_y_center).abs
check_y_thresh = text_node_to_check.h / 2.0
if check_y_dist < check_y_thresh
padding = text_node_to_check.w * 3.0
field_x_start = field.x - padding
field_x_end = field.x + field.w + padding
text_x_start = text_node_to_check.x
text_x_end = text_node_to_check.x + text_node_to_check.w
is_part_of_field = true if text_x_start <= field_x_end && field_x_start <= text_x_end
end
end
break unless is_part_of_field
text_idx += 1
end
field_idx += 1
elsif process_text_node
if text_idx > 0
prev_text_node = text_nodes[text_idx - 1]
x_gap = text_node.x - (prev_text_node.x + prev_text_node.w)
gap_w = text_node.w > prev_text_node.w ? text_node.w : prev_text_node.w
current_text << ' ' if x_gap > gap_w * 2
end
current_text << text_node.content
text_idx += 1
end
end
unless current_text.empty?
new_text_node = PageNode.new(prev: tail_node, elem: current_text, page: page.page_index, attachment_uuid:)
tail_node.next = new_text_node
tail_node = new_text_node
end
[field_nodes, tail_node]
end
def extract_line_fields_from_page(page)
line_thickness = 5.0 / page.height
@ -265,6 +480,6 @@ module Templates
image_fields
end
# rubocop:enable Metrics
# rubocop:enable Metrics, Style
end
end

Loading…
Cancel
Save