add documents editor

master^2
Pete Matsyburka 1 week ago
parent ac66809a05
commit 13874b8830

@ -104,7 +104,7 @@ jobs:
bundle install --jobs 4 --retry 4
yarn install
sudo apt-get update
sudo apt-get install libvips
sudo apt-get install libvips liblept5
- name: Run Brakeman
run: bundle exec brakeman -q --exit-on-warn
@ -162,7 +162,7 @@ jobs:
bundle install --jobs 4 --retry 4
yarn install
sudo apt-get update
sudo apt-get install -y libvips
sudo apt-get install -y libvips liblept5
wget -O pdfium-linux.tgz "https://github.com/bblanchon/pdfium-binaries/releases/latest/download/pdfium-linux-$(uname -m | sed 's/x86_64/x64/;s/aarch64/arm64/').tgz"
sudo tar -xzf pdfium-linux.tgz --strip-components=1 -C /usr/lib lib/libpdfium.so
rm -f pdfium-linux.tgz

@ -2,16 +2,20 @@ FROM ruby:4.0.5-alpine AS download
WORKDIR /fonts
RUN apk --no-cache add wget && \
RUN apk --no-cache add wget unzip && \
wget https://github.com/satbyy/go-noto-universal/releases/download/v7.0/GoNotoKurrent-Regular.ttf && \
wget https://github.com/satbyy/go-noto-universal/releases/download/v7.0/GoNotoKurrent-Bold.ttf && \
wget https://github.com/impallari/DancingScript/raw/master/fonts/DancingScript-Regular.otf && \
wget https://raw.githubusercontent.com/impallari/DancingScript/master/OFL.txt && \
wget https://raw.githubusercontent.com/notofonts/noto-fonts/refs/heads/main/LICENSE && \
wget -O /model.onnx "https://github.com/docusealco/fields-detection/releases/download/2.0.0/model_704_int8.onnx" && \
wget -O pdfium-linux.tgz "https://github.com/bblanchon/pdfium-binaries/releases/latest/download/pdfium-linux-musl-$(uname -m | sed 's/x86_64/x64/;s/aarch64/arm64/').tgz" && \
wget -O pdfium-linux.zip "https://github.com/docusealco/pdfium-binaries/releases/download/20260613/pdfium-musl-$(uname -m).zip" && \
case "$(uname -m)" in \
x86_64) echo "2c953ff72ee2dda07e7fc577e25841cc3d6464468a7c5adfaea574efcbc3b90b pdfium-linux.zip" ;; \
aarch64) echo "23bbe287d2753fdb05741c7660647eb0ef0d2e4da2ce0722bfa9d9d455bd64e2 pdfium-linux.zip" ;; \
esac | sha256sum -c - && \
mkdir -p /pdfium-linux && \
tar -xzf pdfium-linux.tgz -C /pdfium-linux
unzip -q pdfium-linux.zip -d /pdfium-linux
FROM ruby:4.0.5-alpine AS webpack
@ -48,7 +52,7 @@ ENV OPENSSL_CONF=/etc/openssl_legacy.cnf
WORKDIR /app
RUN apk add --no-cache libpq vips redis onnxruntime && \
RUN apk add --no-cache libpq vips redis onnxruntime leptonica && \
rm -f /usr/bin/onnx_test_runner /usr/bin/onnxruntime_test
RUN addgroup -g 2000 docuseal && adduser -u 2000 -G docuseal -s /bin/sh -D -h /home/docuseal docuseal
@ -82,7 +86,7 @@ COPY --chown=docuseal:docuseal .version ./public/version
COPY --chown=docuseal:docuseal --from=download /fonts/GoNotoKurrent-Regular.ttf /fonts/GoNotoKurrent-Bold.ttf /fonts/DancingScript-Regular.otf /fonts/OFL.txt /fonts/LICENSE /fonts/
COPY --from=download /pdfium-linux/lib/libpdfium.so /usr/lib/libpdfium.so
COPY --from=download /pdfium-linux/licenses/pdfium.txt /usr/lib/libpdfium-LICENSE.txt
COPY --from=download /pdfium-linux/licenses/ /usr/lib/libpdfium-licenses/
COPY --chown=docuseal:docuseal --from=download /model.onnx /app/tmp/model.onnx
COPY --chown=docuseal:docuseal --from=webpack /app/public/packs ./public/packs

@ -0,0 +1,39 @@
# frozen_string_literal: true
class TemplateDocumentsCropController < ApplicationController
load_and_authorize_resource :template
before_action :load_attachment
rescue_from Leptonica::LeptonicaError do
render json: { error: I18n.t(:unable_to_save) }, status: :unprocessable_content
end
def index
render json: { corners: Leptonica.detect_document_corners(@attachment.download) }
end
def create
authorize!(:update, @template)
document = Templates::CreateDocumentCrop.call(@template, @attachment, crop_params)
render json: {
document: document.as_json(
methods: %i[metadata signed_key],
include: {
preview_images: { methods: %i[url metadata filename] }
}
)
}
end
private
def load_attachment
@attachment = @template.documents_attachments.find_by!(uuid: params[:attachment_uuid])
end
def crop_params
params.permit(:scan, :rotate, :flip_h, :flip_v, corners: [%i[x y]])
end
end

@ -0,0 +1,32 @@
# frozen_string_literal: true
class TemplateDocumentsModifyController < ApplicationController
load_and_authorize_resource :template
def create
authorize!(:update, @template)
documents_layout =
params.require(:documents).map do |item|
item.permit(:attachment_uuid,
pages: [:attachment_uuid, :page, :rotate,
{ redact: [%i[x y w h]], replaced_page: %i[attachment_uuid page] }]).to_h
end
Templates::ModifyDocuments.call(@template, documents_layout)
render json: {
schema: @template.schema,
fields: @template.fields,
submitters: @template.submitters,
documents: @template.schema_documents.reload.preload(:blob, preview_images_attachments: :blob).as_json(
methods: %i[metadata signed_key],
include: {
preview_images: { methods: %i[url metadata filename] }
}
)
}
rescue Templates::ModifyDocuments::InvalidLayout
render json: { error: I18n.t(:unable_to_save) }, status: :unprocessable_content
end
end

@ -0,0 +1,11 @@
# frozen_string_literal: true
class TemplateDocumentsPageObjectsController < ApplicationController
load_and_authorize_resource :template
def index
attachment = @template.documents_attachments.find_by!(uuid: params[:attachment_uuid])
render json: Templates::ModifyDocuments.page_objects(attachment, params[:page].to_i)
end
end

@ -320,6 +320,7 @@
@replace="onDocumentReplace"
@up="moveDocument(item, -1)"
@reorder="reorderFields"
@edit="editModalDocumentUuid = item.attachment_uuid"
@down="moveDocument(item, 1)"
@change="save"
/>
@ -685,6 +686,17 @@
@close="isRevisionsModalOpen = false"
@apply="onRevisionApply"
/>
<DocumentsEditorModal
v-if="editModalDocumentUuid"
:template="template"
:authenticity-token="authenticityToken"
:accept-file-types="acceptFileTypes"
:base-url="baseUrl"
:page-preview-format="pagePreviewFormat"
:scroll-to-attachment-uuid="editModalDocumentUuid"
@saved="onDocumentsModified"
@close="editModalDocumentUuid = null"
/>
</div>
</div>
</template>
@ -704,6 +716,7 @@ import DocumentControls from './controls'
import MobileFields from './mobile_fields'
import FieldSubmitter from './field_submitter'
import RevisionsModal from './revisions_modal'
import DocumentsEditorModal from './documents_editor_modal'
import { IconPlus, IconUsersPlus, IconDeviceFloppy, IconChevronDown, IconEye, IconWritingSign, IconInnerShadowTop, IconInfoCircle, IconAdjustments, IconDownload, IconHistory, IconX } from '@tabler/icons-vue'
import { v4 } from 'uuid'
import { ref, computed, toRaw, defineAsyncComponent } from 'vue'
@ -747,7 +760,8 @@ export default {
IconHistory,
IconDeviceFloppy,
IconX,
RevisionsModal
RevisionsModal,
DocumentsEditorModal
},
provide () {
return {
@ -1103,6 +1117,7 @@ export default {
isDragFile: false,
isMathLoaded: false,
isRevisionsModalOpen: false,
editModalDocumentUuid: null,
revisions: [],
beforeRevisionSnapshot: null,
zoomLevel: 1
@ -1678,7 +1693,7 @@ export default {
ref.x = e.clientX - ref.offsetX
ref.y = e.clientY - ref.offsetY
} else if (e.dataTransfer?.types?.includes('Files')) {
} else if (e.dataTransfer?.types?.includes('Files') && !this.editModalDocumentUuid) {
this.isDragFile = true
}
},
@ -2121,6 +2136,10 @@ export default {
}
},
onKeyDown (event) {
if (this.editModalDocumentUuid) {
return
}
if (event.key === 'Tab' && document.activeElement === document.body) {
event.stopImmediatePropagation()
event.preventDefault()
@ -3139,6 +3158,22 @@ export default {
onDocumentsReplaceAndTemplateClone (template) {
window.Turbo.visit(`/templates/${template.id}/edit`)
},
onDocumentsModified (data) {
this.template.schema = data.schema
this.template.fields = data.fields
this.template.submitters = data.submitters
this.template.documents = data.documents
this.selectedAreasRef.value = []
if (!this.template.submitters.find((s) => s.uuid === this.selectedSubmitter?.uuid)) {
this.selectedSubmitter = this.template.submitters[0]
}
this.editModalDocumentUuid = null
this.save()
},
moveDocument (item, direction) {
const currentIndex = this.template.schema.indexOf(item)

@ -0,0 +1,328 @@
<template>
<div class="flex flex-1 min-h-0">
<div
class="flex-1 min-h-0 flex items-center justify-center px-6 py-4"
style="container-type: size"
>
<div
ref="pageEl"
class="relative select-none"
:style="pageStyle"
>
<img
:src="imageUrl"
:width="metadata.width"
:height="metadata.height"
class="absolute border rounded pointer-events-none"
style="left: 50%; top: 50%; width: 100%; height: auto"
:style="imageStyle"
>
<svg
class="absolute inset-0 w-full h-full pointer-events-none"
viewBox="0 0 100 100"
preserveAspectRatio="none"
>
<path
:d="dimPath"
fill="black"
fill-opacity="0.4"
fill-rule="evenodd"
/>
<polygon
:points="polygonPoints"
fill="none"
stroke="white"
stroke-width="0.4"
vector-effect="non-scaling-stroke"
/>
</svg>
<div
v-for="(corner, cornerIndex) in displayCorners"
:key="cornerIndex"
class="absolute w-5 h-5 -ml-2.5 -mt-2.5 rounded-full bg-white border-2 border-neutral-600 cursor-move shadow"
:style="{ left: `${corner.x * 100}%`, top: `${corner.y * 100}%` }"
@mousedown.prevent="onCornerMousedown(cornerIndex)"
/>
</div>
</div>
<div class="w-56 flex-none border-l px-4 py-4 space-y-2 flex flex-col">
<button
class="btn btn-sm w-full justify-start normal-case font-normal rounded disabled:bg-base-300"
:disabled="!!isProcessing"
@click.prevent="submit(true)"
>
<IconInnerShadowTop
v-if="isProcessing === 'scan'"
class="w-4 h-4 animate-spin"
/>
<IconScan
v-else
class="w-4 h-4"
/>
{{ t('crop_and_scan') }}
</button>
<button
class="btn btn-sm w-full justify-start normal-case font-normal rounded disabled:bg-base-300"
:disabled="!!isProcessing"
@click.prevent="submit(false)"
>
<IconInnerShadowTop
v-if="isProcessing === 'crop'"
class="w-4 h-4 animate-spin"
/>
<IconCrop
v-else
width="22"
height="22"
style="margin-left: -3px"
:stroke-width="1.5"
/>
<span :style="{ 'margin-left': isProcessing === 'crop' ? '0px' : '-3px' }">
{{ t('crop') }}
</span>
</button>
<button
class="btn btn-sm w-full justify-start normal-case font-normal rounded"
@click.prevent="$emit('cancel')"
>
<IconX class="w-4 h-4" />
{{ t('cancel') }}
</button>
<div class="border-t !mt-3 !mb-1" />
<button
class="btn btn-sm w-full justify-start normal-case font-normal rounded"
@click.prevent="rotateCw"
>
<IconRotateClockwise class="w-4 h-4" />
{{ t('rotate') }}
</button>
<button
class="btn btn-sm w-full justify-start normal-case font-normal rounded"
:class="{ 'btn-active': flipH }"
@click.prevent="toggleFlip('flipH')"
>
<IconFlipVertical class="w-4 h-4" />
{{ t('flip_horizontal') }}
</button>
<button
class="btn btn-sm w-full justify-start normal-case font-normal rounded"
:class="{ 'btn-active': flipV }"
@click.prevent="toggleFlip('flipV')"
>
<IconFlipHorizontal class="w-4 h-4" />
{{ t('flip_vertical') }}
</button>
</div>
</div>
</template>
<script>
import { IconCrop, IconScan, IconInnerShadowTop, IconX, IconRotateClockwise, IconFlipHorizontal, IconFlipVertical } from '@tabler/icons-vue'
export default {
name: 'DocumentsEditorCrop',
components: {
IconCrop,
IconScan,
IconInnerShadowTop,
IconX,
IconRotateClockwise,
IconFlipHorizontal,
IconFlipVertical
},
inject: ['t', 'baseFetch', 'isInlineSize'],
props: {
templateId: {
type: [Number, String],
required: true
},
page: {
type: Object,
required: true
},
imageUrl: {
type: String,
required: true
},
metadata: {
type: Object,
required: true
}
},
emits: ['apply', 'cancel'],
data () {
return {
corners: [
{ x: 0, y: 0 },
{ x: 1, y: 0 },
{ x: 1, y: 1 },
{ x: 0, y: 1 }
],
cornersTouched: false,
rotate: this.page.rotate || 0,
flipH: false,
flipV: false,
draggingIndex: null,
isProcessing: null
}
},
computed: {
displayWidth () {
return this.rotate % 180 ? this.metadata.height : this.metadata.width
},
displayHeight () {
return this.rotate % 180 ? this.metadata.width : this.metadata.height
},
pageStyle () {
const ratio = this.displayWidth / this.displayHeight
return {
aspectRatio: `${this.displayWidth} / ${this.displayHeight}`,
width: this.isInlineSize ? `min(100cqw, calc(100cqh * ${ratio}))` : `min(100%, calc(78vh * ${ratio}))`
}
},
imageStyle () {
const scale = this.rotate % 180 ? this.metadata.width / this.metadata.height : 1
const scaleX = (this.flipH ? -1 : 1) * scale
const scaleY = (this.flipV ? -1 : 1) * scale
return {
transform: `translate(-50%, -50%) rotate(${this.rotate}deg) scale(${scaleX}, ${scaleY})`
}
},
displayCorners () {
return this.corners.map((corner) => this.transformPoint(corner, this.rotate, this.flipH, this.flipV))
},
polygonPoints () {
return this.displayCorners.map((corner) => `${corner.x * 100},${corner.y * 100}`).join(' ')
},
dimPath () {
const quad = this.displayCorners.map((corner) => `${corner.x * 100} ${corner.y * 100}`)
return `M 0 0 H 100 V 100 H 0 Z M ${quad.join(' L ')} Z`
}
},
created () {
const query = new URLSearchParams({ attachment_uuid: this.page.sourceUuid })
this.baseFetch(`/templates/${this.templateId}/documents_crop?${query}`).then(async (resp) => {
if (resp.ok) {
const data = await resp.json()
if (data.corners?.length === 4 && !this.cornersTouched) {
this.corners = data.corners.map((corner) => ({ x: corner.x, y: corner.y }))
}
}
})
},
beforeUnmount () {
window.removeEventListener('mousemove', this.onMousemove)
window.removeEventListener('mouseup', this.onMouseup)
},
methods: {
transformPoint (point, rotate, flipH, flipV) {
let { x, y } = point
if (flipH) {
x = 1 - x
}
if (flipV) {
y = 1 - y
}
if (rotate === 90) {
return { x: 1 - y, y: x }
} else if (rotate === 180) {
return { x: 1 - x, y: 1 - y }
} else if (rotate === 270) {
return { x: y, y: 1 - x }
} else {
return { x, y }
}
},
inverseTransformPoint (point, rotate, flipH, flipV) {
let { x, y } = point
if (rotate === 90) {
[x, y] = [y, 1 - x]
} else if (rotate === 180) {
[x, y] = [1 - x, 1 - y]
} else if (rotate === 270) {
[x, y] = [1 - y, x]
}
if (flipH) {
x = 1 - x
}
if (flipV) {
y = 1 - y
}
return { x, y }
},
rotateCw () {
this.rotate = (this.rotate + 90) % 360
},
toggleFlip (key) {
this[key] = !this[key]
},
pagePoint (event) {
const rect = this.$refs.pageEl.getBoundingClientRect()
return {
x: Math.min(Math.max((event.clientX - rect.left) / rect.width, 0), 1),
y: Math.min(Math.max((event.clientY - rect.top) / rect.height, 0), 1)
}
},
onCornerMousedown (index) {
this.draggingIndex = index
this.cornersTouched = true
window.addEventListener('mousemove', this.onMousemove)
window.addEventListener('mouseup', this.onMouseup, { once: true })
},
onMousemove (event) {
if (this.draggingIndex === null) {
return
}
const point = this.inverseTransformPoint(this.pagePoint(event), this.rotate, this.flipH, this.flipV)
this.corners[this.draggingIndex] = point
},
onMouseup () {
window.removeEventListener('mousemove', this.onMousemove)
this.draggingIndex = null
},
submit (scan) {
this.isProcessing = scan ? 'scan' : 'crop'
this.baseFetch(`/templates/${this.templateId}/documents_crop`, {
method: 'POST',
body: JSON.stringify({
attachment_uuid: this.page.sourceUuid,
corners: this.corners,
rotate: this.rotate || undefined,
flip_h: this.flipH,
flip_v: this.flipV,
scan
}),
headers: { 'Content-Type': 'application/json' }
}).then(async (resp) => {
const data = await resp.json().catch(() => ({}))
if (resp.ok) {
this.$emit('apply', data.document)
} else if (data.error) {
alert(data.error)
}
}).finally(() => {
this.isProcessing = null
})
}
}
}
</script>

File diff suppressed because it is too large Load Diff

@ -0,0 +1,224 @@
<template>
<div class="relative">
<div
class="relative"
:style="boxStyle"
>
<img
:src="imageUrl"
:width="metadata.width"
:height="metadata.height"
class="rounded border pointer-events-none outline outline-1 -outline-offset-1 transition-[outline-color] duration-75"
:class="[
page.rotate % 180 ? 'absolute inset-0 m-auto w-full' : 'w-full',
selected ? 'outline-neutral-400' : 'outline-transparent'
]"
:style="imageStyle"
:loading="lazy ? 'lazy' : 'eager'"
>
<div
v-if="areas.length || page.redact.length"
class="absolute pointer-events-none"
:style="overlayStyle"
>
<div
v-for="(item, areaIndex) in areas"
:key="areaIndex"
class="absolute border rounded-sm opacity-70"
:class="[areaBorderColor(item.submitterIndex), areaBgColor(item.submitterIndex)]"
:style="{
left: `${item.area.x * 100}%`,
top: `${item.area.y * 100}%`,
width: `${item.area.w * 100}%`,
height: `${item.area.h * 100}%`
}"
/>
<div
v-for="(rect, rectIndex) in page.redact"
:key="`redact-${rectIndex}`"
class="absolute bg-black"
:style="{
left: `${rect.x * 100}%`,
top: `${rect.y * 100}%`,
width: `${rect.w * 100}%`,
height: `${rect.h * 100}%`
}"
/>
</div>
<div
v-if="withActions"
class="absolute top-1 right-1 flex space-x-1 group-hover:opacity-100"
:class="selected ? 'opacity-100' : 'opacity-0'"
>
<span
v-if="extraAction"
class="tooltip tooltip-bottom"
:data-tip="t(extraAction)"
>
<button
class="btn border-gray-300 bg-white text-base-content btn-xs rounded hover:text-base-100 hover:bg-base-content hover:border-base-content transition-colors p-0"
style="width: 22px; height: 22px; min-height: 22px"
@click.stop.prevent="$emit(extraAction)"
>
<IconEraser
v-if="extraAction === 'redact'"
:width="14"
:height="14"
:stroke-width="1.6"
/>
<IconCrop
v-else
:width="20"
:height="20"
:stroke-width="1.2"
/>
</button>
</span>
<span
class="tooltip tooltip-bottom"
:data-tip="t('rotate')"
>
<button
class="btn border-gray-300 bg-white text-base-content btn-xs rounded hover:text-base-100 hover:bg-base-content hover:border-base-content transition-colors p-0"
style="width: 22px; height: 22px; min-height: 22px"
@click.stop.prevent="$emit('rotate')"
>
<IconRotateClockwise
:width="14"
:height="14"
:stroke-width="1.6"
/>
</button>
</span>
<span
class="tooltip tooltip-bottom"
:data-tip="t('remove')"
>
<button
class="btn border-gray-300 bg-white text-base-content btn-xs rounded hover:text-base-100 hover:bg-base-content hover:border-base-content transition-colors p-0"
style="width: 22px; height: 22px; min-height: 22px"
@click.stop.prevent="$emit('remove')"
>
<IconX
:width="14"
:height="14"
:stroke-width="1.6"
/>
</button>
</span>
</div>
</div>
<div
v-if="pageNumber"
class="text-center text-sm pt-1 pointer-events-none"
>
{{ t('page') }} {{ pageNumber }}
</div>
</div>
</template>
<script>
import { IconRotateClockwise, IconX, IconEraser, IconCrop } from '@tabler/icons-vue'
import Area from './area.vue'
export default {
name: 'DocumentsEditorPage',
components: {
IconRotateClockwise,
IconX,
IconEraser,
IconCrop
},
inject: ['t'],
props: {
page: {
type: Object,
required: true
},
imageUrl: {
type: String,
required: true
},
metadata: {
type: Object,
required: true
},
areas: {
type: Array,
required: false,
default: () => []
},
selected: {
type: Boolean,
required: false,
default: false
},
withActions: {
type: Boolean,
required: false,
default: false
},
pageNumber: {
type: Number,
required: false,
default: null
},
lazy: {
type: Boolean,
required: false,
default: true
},
extraAction: {
type: String,
required: false,
default: null
}
},
emits: ['rotate', 'remove', 'redact', 'crop'],
computed: {
borderColors: Area.computed.borderColors,
bgColors: Area.computed.bgColors,
boxStyle () {
if (!this.page.rotate || !(this.page.rotate % 180)) {
return null
}
return { aspectRatio: `${this.metadata.height} / ${this.metadata.width}` }
},
imageStyle () {
if (!this.page.rotate) {
return null
}
let transform = `rotate(${this.page.rotate}deg)`
if (this.page.rotate % 180) {
transform += ` scale(${this.metadata.width / this.metadata.height})`
}
return { transform }
},
overlayStyle () {
if (!this.page.rotate || !(this.page.rotate % 180)) {
return { inset: '0', transform: this.page.rotate ? `rotate(${this.page.rotate}deg)` : undefined }
}
return {
left: '50%',
top: '50%',
width: '100%',
aspectRatio: `${this.metadata.width} / ${this.metadata.height}`,
transform: `translate(-50%, -50%) rotate(${this.page.rotate}deg) scale(${this.metadata.width / this.metadata.height})`
}
}
},
methods: {
areaBorderColor (submitterIndex) {
return this.borderColors[Math.max(submitterIndex, 0) % this.borderColors.length]
},
areaBgColor (submitterIndex) {
return this.bgColors[Math.max(submitterIndex, 0) % this.bgColors.length]
}
}
}
</script>

@ -0,0 +1,413 @@
<template>
<div class="flex flex-1 min-h-0">
<div class="flex-1 overflow-y-auto px-6 py-4">
<div
ref="pageEl"
class="relative mx-auto select-none cursor-crosshair"
:style="pageStyle"
@mousedown.prevent="onMousedown"
>
<img
:src="imageUrl"
:width="metadata.width"
:height="metadata.height"
class="absolute border rounded pointer-events-none"
style="left: 50%; top: 50%; width: 100%; height: auto"
:style="imageStyle"
>
<div
class="absolute pointer-events-none"
:style="overlayStyle"
>
<div
v-for="(rect, rectIndex) in redactRects"
:key="`rect-${rectIndex}`"
class="absolute bg-black pointer-events-none"
:style="{
left: `${rect.x * 100}%`,
top: `${rect.y * 100}%`,
width: `${rect.w * 100}%`,
height: `${rect.h * 100}%`
}"
/>
</div>
<div
v-if="marquee"
class="absolute border border-neutral-600 bg-neutral-600/10 pointer-events-none"
:style="marqueeStyle"
/>
<div
v-if="!imagePage && textNodes && !textNodes.length && !imageNodes.length"
class="absolute inset-x-0 top-0 flex justify-center pt-4 pointer-events-none"
>
<span class="bg-base-100/90 border border-neutral-200 rounded-lg shadow px-4 py-2 text-sm">
{{ t('there_is_no_text_to_redact_on_this_page') }}
</span>
</div>
</div>
</div>
<div class="w-56 flex-none border-l px-4 py-4 space-y-2">
<button
class="btn btn-sm w-full justify-start normal-case font-normal rounded disabled:bg-base-300"
:disabled="!hasRedactions && !wasReset"
@click.prevent="apply"
>
<IconCheck class="w-4 h-4" />
{{ t('apply') }}
</button>
<div class="border-t !mt-3 !mb-1" />
<button
class="btn btn-sm w-full justify-start normal-case font-normal rounded disabled:bg-base-300"
:disabled="!hasRedactions"
@click.prevent="reset"
>
<IconRotate class="w-4 h-4" />
{{ t('reset') }}
</button>
<button
class="btn btn-sm w-full justify-start normal-case font-normal rounded"
@click.prevent="$emit('cancel')"
>
<IconX class="w-4 h-4" />
{{ t('cancel') }}
</button>
</div>
</div>
</template>
<script>
import { IconCheck, IconRotate, IconX } from '@tabler/icons-vue'
export default {
name: 'DocumentsEditorRedact',
components: {
IconCheck,
IconRotate,
IconX
},
inject: ['t', 'baseFetch'],
props: {
templateId: {
type: [Number, String],
required: true
},
page: {
type: Object,
required: true
},
imageUrl: {
type: String,
required: true
},
metadata: {
type: Object,
required: true
},
imagePage: {
type: Boolean,
required: false,
default: false
},
pageObjectsCache: {
type: Object,
required: false,
default: () => ({})
}
},
emits: ['apply', 'cancel'],
data () {
return {
textNodes: null,
imageNodes: [],
selectedNodes: {},
freeRects: [],
rects: [],
wasReset: false,
marquee: null
}
},
computed: {
rotate () {
return this.page.rotate || 0
},
displayWidth () {
return this.rotate % 180 ? this.metadata.height : this.metadata.width
},
displayHeight () {
return this.rotate % 180 ? this.metadata.width : this.metadata.height
},
pageStyle () {
return { aspectRatio: `${this.displayWidth} / ${this.displayHeight}` }
},
imageStyle () {
const scale = this.rotate % 180 ? this.metadata.width / this.metadata.height : 1
return {
transform: `translate(-50%, -50%) rotate(${this.rotate}deg) scale(${scale})`
}
},
overlayStyle () {
if (!this.rotate || !(this.rotate % 180)) {
return { inset: '0', transform: this.rotate ? `rotate(${this.rotate}deg)` : undefined }
}
return {
left: '50%',
top: '50%',
width: '100%',
aspectRatio: `${this.metadata.width} / ${this.metadata.height}`,
transform: `translate(-50%, -50%) rotate(${this.rotate}deg) scale(${this.metadata.width / this.metadata.height})`
}
},
hasRedactions () {
if (this.imagePage) {
return this.rects.length > 0
}
return Object.keys(this.selectedNodes).length > 0 || this.freeRects.length > 0
},
redactRects () {
if (this.imagePage) {
return this.rects
}
return this.textNodes ? [...this.buildRedactRects(), ...this.freeRects] : []
},
marqueeStyle () {
const left = Math.min(this.marquee.x1, this.marquee.x2)
const top = Math.min(this.marquee.y1, this.marquee.y2)
return {
left: `${left * 100}%`,
top: `${top * 100}%`,
width: `${Math.abs(this.marquee.x2 - this.marquee.x1) * 100}%`,
height: `${Math.abs(this.marquee.y2 - this.marquee.y1) * 100}%`
}
}
},
created () {
if (this.imagePage) {
this.rects = (this.page.redact || []).map((rect) => ({ ...rect }))
return
}
const cacheKey = `${this.page.sourceUuid}-${this.page.sourcePage}`
if (this.pageObjectsCache[cacheKey]) {
this.textNodes = this.pageObjectsCache[cacheKey].text_nodes
this.imageNodes = this.pageObjectsCache[cacheKey].image_nodes
this.preselectNodes()
return
}
const query = new URLSearchParams({ attachment_uuid: this.page.sourceUuid, page: this.page.sourcePage })
this.baseFetch(`/templates/${this.templateId}/documents_page_objects?${query}`).then(async (resp) => {
if (resp.ok) {
const data = await resp.json()
this.pageObjectsCache[cacheKey] = data
this.textNodes = data.text_nodes
this.imageNodes = data.image_nodes
this.preselectNodes()
} else {
this.$emit('cancel')
}
})
},
beforeUnmount () {
window.removeEventListener('mousemove', this.onMousemove)
window.removeEventListener('mouseup', this.onMouseup)
},
methods: {
inverseRotatePoint (point, rotate) {
let { x, y } = point
if (rotate === 90) {
[x, y] = [y, 1 - x]
} else if (rotate === 180) {
[x, y] = [1 - x, 1 - y]
} else if (rotate === 270) {
[x, y] = [1 - y, x]
}
return { x, y }
},
apply () {
this.$emit('apply', this.redactRects)
},
reset () {
this.wasReset = true
if (this.imagePage) {
this.rects = []
} else {
this.selectedNodes = {}
this.freeRects = []
}
},
boxesIntersect (a, b) {
return a.x < b.x + b.w && a.x + a.w > b.x && a.y < b.y + b.h && a.y + a.h > b.y
},
preselectNodes () {
const nodeRects = []
;(this.page.redact || []).forEach((rect) => {
if (this.imageNodes.some((node) => this.boxesIntersect(node, rect))) {
this.freeRects.push({ ...rect })
} else {
nodeRects.push(rect)
}
})
this.textNodes.forEach((node, index) => {
const centerX = node.x + (node.w / 2)
const centerY = node.y + (node.h / 2)
const isInside = nodeRects.some((rect) => {
return centerX >= rect.x && centerX <= rect.x + rect.w &&
centerY >= rect.y && centerY <= rect.y + rect.h
})
if (isInside) {
this.selectedNodes[index] = true
}
})
},
buildRedactRects () {
const nodes = this.textNodes.filter((_, index) => this.selectedNodes[index])
const sorted = nodes.slice().sort((a, b) => {
const diff = (a.y + (a.h / 2)) - (b.y + (b.h / 2))
return Math.abs(diff) < Math.min(a.h, b.h) / 2 ? a.x - b.x : diff
})
const rects = []
sorted.forEach((node) => {
const last = rects[rects.length - 1]
const sameLine = last &&
Math.abs((node.y + (node.h / 2)) - (last.y + (last.h / 2))) < Math.max(last.h, node.h) / 2
if (sameLine && node.x <= last.x + last.w + (node.h * 0.7)) {
const right = Math.max(last.x + last.w, node.x + node.w)
const bottom = Math.max(last.y + last.h, node.y + node.h)
last.x = Math.min(last.x, node.x)
last.y = Math.min(last.y, node.y)
last.w = right - last.x
last.h = bottom - last.y
} else {
rects.push({ x: node.x, y: node.y, w: node.w, h: node.h })
}
})
return rects
},
pagePoint (event) {
const rect = this.$refs.pageEl.getBoundingClientRect()
return {
x: Math.min(Math.max((event.clientX - rect.left) / rect.width, 0), 1),
y: Math.min(Math.max((event.clientY - rect.top) / rect.height, 0), 1)
}
},
onMousedown (event) {
if (event.button !== 0 || (!this.imagePage && !this.textNodes)) {
return
}
const point = this.pagePoint(event)
this.marquee = { x1: point.x, y1: point.y, x2: point.x, y2: point.y }
window.addEventListener('mousemove', this.onMousemove)
window.addEventListener('mouseup', this.onMouseup, { once: true })
},
onMousemove (event) {
const point = this.pagePoint(event)
this.marquee.x2 = point.x
this.marquee.y2 = point.y
},
onMouseup () {
window.removeEventListener('mousemove', this.onMousemove)
if (!this.marquee) {
return
}
const start = this.inverseRotatePoint({ x: this.marquee.x1, y: this.marquee.y1 }, this.rotate)
const finish = this.inverseRotatePoint({ x: this.marquee.x2, y: this.marquee.y2 }, this.rotate)
const left = Math.min(start.x, finish.x)
const right = Math.max(start.x, finish.x)
const top = Math.min(start.y, finish.y)
const bottom = Math.max(start.y, finish.y)
this.marquee = null
if (right - left < 0.005 && bottom - top < 0.005) {
if (this.imagePage) {
const index = this.rects.findIndex((rect) => {
return left >= rect.x && left <= rect.x + rect.w && top >= rect.y && top <= rect.y + rect.h
})
if (index !== -1) {
this.rects.splice(index, 1)
}
} else {
const rectIndex = this.freeRects.findIndex((rect) => {
return left >= rect.x && left <= rect.x + rect.w && top >= rect.y && top <= rect.y + rect.h
})
if (rectIndex !== -1) {
this.freeRects.splice(rectIndex, 1)
} else {
const index = this.textNodes.findIndex((node) => {
return left >= node.x && left <= node.x + node.w && top >= node.y && top <= node.y + node.h
})
if (index !== -1) {
if (this.selectedNodes[index]) {
delete this.selectedNodes[index]
} else {
this.selectedNodes[index] = true
}
}
}
}
} else if (this.imagePage) {
this.rects.push({ x: left, y: top, w: right - left, h: bottom - top })
} else {
const marqueeBox = { x: left, y: top, w: right - left, h: bottom - top }
this.textNodes.forEach((node, index) => {
if (node.x < right && node.x + node.w > left && node.y < bottom && node.y + node.h > top) {
this.selectedNodes[index] = true
}
})
this.imageNodes.forEach((node) => {
if (!this.boxesIntersect(node, marqueeBox)) {
return
}
const x = Math.max(node.x, marqueeBox.x)
const y = Math.max(node.y, marqueeBox.y)
const w = Math.min(node.x + node.w, marqueeBox.x + marqueeBox.w) - x
const h = Math.min(node.y + node.h, marqueeBox.y + marqueeBox.h) - y
this.freeRects.push({ x, y, w, h })
})
}
}
}
}
</script>

@ -96,6 +96,23 @@ const en = {
add_pdf_documents_or_images: 'Add PDF documents or images',
add_documents_or_images: 'Add documents or images',
add_a_new_document: 'Add a new document',
edit_documents: 'Edit documents',
move_forward: 'Move forward',
move_backward: 'Move backward',
remove_page: 'Remove page',
merge_with_previous: 'Merge with previous',
merge_with_next: 'Merge with next',
move_up: 'Move up',
move_down: 'Move down',
rotate: 'Rotate',
redact: 'Redact',
crop: 'Crop',
crop_and_scan: 'Crop & Scan',
flip_horizontal: 'Flip horizontal',
flip_vertical: 'Flip vertical',
there_is_no_text_to_redact_on_this_page: 'This page contains only images. Redact tool can be used only with text pages',
reset: 'Reset',
upload_to_document: 'Upload to "{document}"',
replace_existing_document: 'Replace existing document',
clone_and_replace_documents: 'Clone and replace documents',
required: 'Required',
@ -319,6 +336,23 @@ const es = {
add_pdf_documents_or_images: 'Agregar documentos PDF o imágenes',
add_documents_or_images: 'Agregar documentos o imágenes',
add_a_new_document: 'Agregar un nuevo documento',
edit_documents: 'Editar documentos',
move_forward: 'Mover adelante',
move_backward: 'Mover atrás',
remove_page: 'Eliminar página',
merge_with_previous: 'Combinar con el anterior',
merge_with_next: 'Combinar con el siguiente',
move_up: 'Mover arriba',
move_down: 'Mover abajo',
rotate: 'Rotar',
redact: 'Censurar',
crop: 'Recortar',
crop_and_scan: 'Recortar y escanear',
flip_horizontal: 'Voltear horizontal',
flip_vertical: 'Voltear vertical',
there_is_no_text_to_redact_on_this_page: 'Esta página contiene solo imágenes. La herramienta de censura solo puede usarse con páginas de texto',
reset: 'Restablecer',
upload_to_document: 'Subir a "{document}"',
replace_existing_document: 'Reemplazar documento existente',
clone_and_replace_documents: 'Clonar y reemplazar documentos',
required: 'Requerido',
@ -548,6 +582,23 @@ const it = {
add_pdf_documents_or_images: 'Aggiungi documenti PDF o immagini',
add_documents_or_images: 'Aggiungi documenti o immagini',
add_a_new_document: 'Aggiungi un nuovo documento',
edit_documents: 'Modifica documenti',
move_forward: 'Sposta avanti',
move_backward: 'Sposta indietro',
remove_page: 'Rimuovi pagina',
merge_with_previous: 'Unisci al precedente',
merge_with_next: 'Unisci al successivo',
move_up: 'Sposta su',
move_down: 'Sposta giù',
rotate: 'Ruota',
redact: 'Oscura',
crop: 'Ritaglia',
crop_and_scan: 'Ritaglia e scansiona',
flip_horizontal: 'Rifletti orizzontale',
flip_vertical: 'Rifletti verticale',
there_is_no_text_to_redact_on_this_page: 'Questa pagina contiene solo immagini. Lo strumento di oscuramento può essere usato solo con pagine di testo',
reset: 'Reimposta',
upload_to_document: 'Carica in "{document}"',
replace_existing_document: 'Sostituisci documento esistente',
clone_and_replace_documents: 'Clona e sostituisci documenti',
required: 'Obbligatorio',
@ -771,6 +822,23 @@ const pt = {
add_pdf_documents_or_images: 'Adicionar documentos PDF ou imagens',
add_documents_or_images: 'Adicionar documentos ou imagens',
add_a_new_document: 'Adicionar um novo documento',
edit_documents: 'Editar documentos',
move_forward: 'Mover para frente',
move_backward: 'Mover para trás',
remove_page: 'Remover página',
merge_with_previous: 'Mesclar com o anterior',
merge_with_next: 'Mesclar com o próximo',
move_up: 'Mover para cima',
move_down: 'Mover para baixo',
rotate: 'Girar',
redact: 'Censurar',
crop: 'Cortar',
crop_and_scan: 'Cortar e digitalizar',
flip_horizontal: 'Inverter horizontal',
flip_vertical: 'Inverter vertical',
there_is_no_text_to_redact_on_this_page: 'Esta página contém apenas imagens. A ferramenta de censura só pode ser usada com páginas de texto',
reset: 'Redefinir',
upload_to_document: 'Enviar para "{document}"',
replace_existing_document: 'Substituir documento existente',
clone_and_replace_documents: 'Clonar e substituir documentos',
required: 'Obrigatório',
@ -1000,6 +1068,23 @@ const fr = {
add_pdf_documents_or_images: 'Ajouter des documents PDF ou des images',
add_documents_or_images: 'Ajouter des documents ou des images',
add_a_new_document: 'Ajouter un nouveau document',
edit_documents: 'Modifier les documents',
move_forward: 'Déplacer en avant',
move_backward: 'Déplacer en arrière',
remove_page: 'Supprimer la page',
merge_with_previous: 'Fusionner avec le précédent',
merge_with_next: 'Fusionner avec le suivant',
move_up: 'Déplacer vers le haut',
move_down: 'Déplacer vers le bas',
rotate: 'Pivoter',
redact: 'Caviarder',
crop: 'Rogner',
crop_and_scan: 'Rogner et numériser',
flip_horizontal: 'Miroir horizontal',
flip_vertical: 'Miroir vertical',
there_is_no_text_to_redact_on_this_page: 'Cette page ne contient que des images. L\'outil de caviardage ne peut être utilisé qu\'avec des pages de texte',
reset: 'Réinitialiser',
upload_to_document: 'Téléverser dans "{document}"',
replace_existing_document: 'Remplacer le document existant',
clone_and_replace_documents: 'Cloner et remplacer des documents',
required: 'Obligatoire',
@ -1226,6 +1311,23 @@ const de = {
add_pdf_documents_or_images: 'PDF-Dokumente oder Bilder hinzufügen',
add_documents_or_images: 'Dokumente oder Bilder hinzufügen',
add_a_new_document: 'Neues Dokument hinzufügen',
edit_documents: 'Dokumente bearbeiten',
move_forward: 'Nach vorne',
move_backward: 'Nach hinten',
remove_page: 'Seite entfernen',
merge_with_previous: 'Mit vorherigem zusammenführen',
merge_with_next: 'Mit nächstem zusammenführen',
move_up: 'Nach oben verschieben',
move_down: 'Nach unten verschieben',
rotate: 'Drehen',
redact: 'Schwärzen',
crop: 'Zuschneiden',
crop_and_scan: 'Zuschneiden & Scan',
flip_horizontal: 'Horizontal spiegeln',
flip_vertical: 'Vertikal spiegeln',
there_is_no_text_to_redact_on_this_page: 'Diese Seite enthält nur Bilder. Das Schwärzungswerkzeug kann nur mit Textseiten verwendet werden',
reset: 'Zurücksetzen',
upload_to_document: 'In "{document}" hochladen',
replace_existing_document: 'Vorhandenes Dokument ersetzen',
clone_and_replace_documents: 'Dokumente klonen und ersetzen',
required: 'Erforderlich',
@ -1452,6 +1554,23 @@ const nl = {
add_pdf_documents_or_images: 'PDF-documenten of afbeeldingen toevoegen',
add_documents_or_images: 'Documenten of afbeeldingen toevoegen',
add_a_new_document: 'Nieuw document toevoegen',
edit_documents: 'Documenten bewerken',
move_forward: 'Naar voren',
move_backward: 'Naar achteren',
remove_page: 'Pagina verwijderen',
merge_with_previous: 'Samenvoegen met vorige',
merge_with_next: 'Samenvoegen met volgende',
move_up: 'Omhoog verplaatsen',
move_down: 'Omlaag verplaatsen',
rotate: 'Draaien',
redact: 'Redigeren',
crop: 'Bijsnijden',
crop_and_scan: 'Bijsnijden en scannen',
flip_horizontal: 'Horizontaal spiegelen',
flip_vertical: 'Verticaal spiegelen',
there_is_no_text_to_redact_on_this_page: 'Deze pagina bevat alleen afbeeldingen. De redactietool kan alleen worden gebruikt met tekstpagina\'s',
reset: 'Opnieuw instellen',
upload_to_document: 'Uploaden naar "{document}"',
replace_existing_document: 'Bestaand document vervangen',
clone_and_replace_documents: 'Documenten klonen en vervangen',
required: 'Vereist',

@ -72,6 +72,15 @@
style="min-width: 170px"
@click="closeDropdown"
>
<li v-if="!item.dynamic">
<button
class="w-full px-2 py-1 rounded-md hover:bg-neutral-100 flex items-center space-x-2 text-sm whitespace-nowrap"
@click.stop="$emit('edit', item); closeDropdown()"
>
<IconPencil class="w-4 h-4" />
<span>{{ t('edit') }}</span>
</button>
</li>
<li>
<button
class="w-full px-2 py-1 rounded-md hover:bg-neutral-100 flex items-center justify-between text-sm"
@ -184,7 +193,7 @@
<script>
import Contenteditable from './contenteditable'
import Upload from './upload'
import { IconRouteAltLeft, IconSortDescending2, IconDotsVertical, IconTrashX, IconBolt, IconInnerShadowTop } from '@tabler/icons-vue'
import { IconRouteAltLeft, IconSortDescending2, IconDotsVertical, IconTrashX, IconBolt, IconInnerShadowTop, IconPencil } from '@tabler/icons-vue'
import ConditionsModal from './conditions_modal'
import ReplaceButton from './replace'
import GoogleDriveDocumentSettings from './google_drive_document_settings'
@ -204,7 +213,8 @@ export default {
IconSortDescending2,
IconDotsVertical,
IconTrashX,
IconBolt
IconBolt,
IconPencil
},
inject: ['t', 'getFieldTypeIndex', 'baseFetch'],
props: {
@ -260,7 +270,7 @@ export default {
default: true
}
},
emits: ['scroll-to', 'change', 'remove', 'up', 'down', 'replace', 'reorder'],
emits: ['scroll-to', 'change', 'remove', 'up', 'down', 'replace', 'reorder', 'edit'],
data () {
return {
isShowConditionsModal: false,

@ -128,31 +128,39 @@ function convertImage (sourceFile, targetType, quality) {
})
}
async function convertImagesInInput (input) {
if (!input.files || input.files.length === 0) return
const dt = new DataTransfer()
let didConvert = false
export async function convertUnsupportedImages (files) {
const converted = []
for (const file of Array.from(input.files)) {
let converted = file
for (const file of Array.from(files)) {
let result = file
try {
if (['image/bmp', 'image/vnd.microsoft.icon', 'image/svg+xml', 'image/gif'].includes(file.type)) {
converted = await convertImage(file, 'image/png')
didConvert = true
result = await convertImage(file, 'image/png')
} else if (['image/heic', 'image/heif', 'image/heic-sequence', 'image/heif-sequence', 'image/avif', 'image/avif-sequence', 'image/webp'].includes(file.type)) {
converted = await convertImage(file, 'image/jpeg', 0.9)
didConvert = true
result = await convertImage(file, 'image/jpeg', 0.9)
}
} catch (e) {
alert(e.message)
}
dt.items.add(converted)
converted.push(result)
}
if (didConvert) {
return converted
}
async function convertImagesInInput (input) {
if (!input.files || input.files.length === 0) return
const originals = Array.from(input.files)
const converted = await convertUnsupportedImages(originals)
if (converted.some((file, index) => file !== originals[index])) {
const dt = new DataTransfer()
converted.forEach((file) => dt.items.add(file))
input.files = dt.files
}
}

@ -99,6 +99,9 @@ Rails.application.routes.draw do
resources :clone, only: %i[new create], controller: 'templates_clone'
resource :debug, only: %i[show], controller: 'templates_debug' if Rails.env.development?
resources :documents, only: %i[index create], controller: 'template_documents'
resources :documents_modify, only: %i[create], controller: 'template_documents_modify'
resources :documents_page_objects, only: %i[index], controller: 'template_documents_page_objects'
resources :documents_crop, only: %i[index create], controller: 'template_documents_crop'
resources :clone_and_replace, only: %i[create], controller: 'templates_clone_and_replace'
resources :detect_fields, only: %i[create], controller: 'templates_detect_fields' unless Docuseal.multitenant?
resources :restore, only: %i[create], controller: 'templates_restore'

@ -0,0 +1,488 @@
# frozen_string_literal: true
module Leptonica
extend FFI::Library
begin
ffi_lib %w[
libleptonica.so.6
liblept.so.5
leptonica
/opt/homebrew/lib/libleptonica.6.dylib
/usr/local/lib/libleptonica.6.dylib
]
rescue LoadError => e
raise "Could not load leptonica library. Make sure it's installed and in your library path. Error: #{e.message}"
end
typedef :pointer, :PIX
typedef :pointer, :PTA
typedef :pointer, :BOX
typedef :pointer, :BOXA
typedef :pointer, :PIXA
L_CLONE = 2
L_SEVERITY_NONE = 6
DETECT_WIDTH = 256
MIN_QUAD_AREA_RATIO = 0.2
MIN_ORIENT_CONF = 8.0
ORIENT_CONF_RATIO = 2.5
LeptonicaError = Class.new(StandardError)
attach_function :setMsgSeverity, [:int], :int
attach_function :pixCreate, %i[int int int], :PIX
attach_function :pixSetSpp, %i[PIX int], :int
attach_function :pixClone, [:PIX], :PIX
attach_function :pixDestroy, [:pointer], :void
attach_function :pixGetWidth, [:PIX], :int
attach_function :pixGetHeight, [:PIX], :int
attach_function :pixGetDepth, [:PIX], :int
attach_function :pixGetWpl, [:PIX], :int
attach_function :pixGetData, [:PIX], :pointer
attach_function :pixConvertTo32, [:PIX], :PIX
attach_function :pixConvertTo8, %i[PIX int], :PIX
attach_function :pixInvert, %i[PIX PIX], :PIX
attach_function :pixScaleToSize, %i[PIX int int], :PIX
attach_function :pixOtsuAdaptiveThreshold, %i[PIX int int int int float pointer pointer], :int
attach_function :pixCloseBrick, %i[PIX PIX int int], :PIX
attach_function :pixOpenBrick, %i[PIX PIX int int], :PIX
attach_function :pixConnComp, %i[PIX pointer int], :BOXA
attach_function :pixCountPixels, %i[PIX pointer pointer], :int
attach_function :boxaGetCount, [:BOXA], :int
attach_function :boxaGetBox, %i[BOXA int int], :BOX
attach_function :boxaDestroy, [:pointer], :void
attach_function :boxCreate, %i[int int int int], :BOX
attach_function :boxDestroy, [:pointer], :void
attach_function :boxGetGeometry, %i[BOX pointer pointer pointer pointer], :int
attach_function :pixaGetPix, %i[PIXA int int], :PIX
attach_function :pixaDestroy, [:pointer], :void
attach_function :pixClipRectangle, %i[PIX BOX pointer], :PIX
attach_function :ptaCreate, [:int], :PTA
attach_function :ptaAddPt, %i[PTA float float], :int
attach_function :ptaDestroy, [:pointer], :void
attach_function :pixProjectivePtaColor, %i[PIX PTA PTA uint], :PIX
attach_function :pixRotateOrth, %i[PIX int], :PIX
attach_function :pixOrientDetect, %i[PIX pointer pointer int int], :int
attach_function :pixFlipLR, %i[PIX PIX], :PIX
attach_function :pixFlipTB, %i[PIX PIX], :PIX
attach_function :pixBackgroundNormSimple, %i[PIX PIX PIX], :PIX
attach_function :pixGammaTRC, %i[PIX PIX float int int], :PIX
attach_function :pixEndianByteSwap, [:PIX], :int
attach_function :dewarpSinglePage, %i[PIX int int int int pointer pointer int], :int
setMsgSeverity(L_SEVERITY_NONE)
module_function
def crop_document(image_data, corners, scan: false, rotate: nil, flip_h: false, flip_v: false)
pix = read_pix(image_data)
begin
pix32 = checked(pixConvertTo32(pix), 'Failed to convert image to 32bpp')
begin
width = pixGetWidth(pix32)
height = pixGetHeight(pix32)
points = corners.map { |point| [point['x'].to_f * width, point['y'].to_f * height] }
out_width, out_height = output_size(points, width, height)
warped = projective_crop(pix32, points, out_width, out_height)
begin
rotate = rotate.nil? ? detect_orientation(warped) : rotate.to_i
result = transform_result(warped, scan:, rotate:, flip_h:, flip_v:)
read_bytes(result)
ensure
destroy_pix(result) if result && !result.equal?(warped)
destroy_pix(warped)
end
ensure
destroy_pix(pix32)
end
ensure
destroy_pix(pix)
end
end
def detect_document_corners(image_data)
pix = read_detect_pix(image_data)
begin
mask = build_page_mask(pix)
return if mask.nil?
begin
corners = mask_corners(mask)
ensure
destroy_pix(mask)
end
return if corners.nil? || quad_area(corners) < MIN_QUAD_AREA_RATIO
corners.map { |x, y| { 'x' => x.round(6), 'y' => y.round(6) } }
ensure
destroy_pix(pix)
end
end
def read_pix(image_data)
build_pix(load_image(image_data))
end
def read_detect_pix(image_data)
image = load_image(image_data)
height = (DETECT_WIDTH * image.height / image.width.to_f).round.clamp(8, DETECT_WIDTH * 4)
image = image.resize(DETECT_WIDTH / image.width.to_f, vscale: height / image.height.to_f)
build_pix(image)
end
def build_pix(image)
pix = checked(pixCreate(image.width, image.height, 32), 'Failed to read image')
pixSetSpp(pix, 3)
pixGetData(pix).put_bytes(0, image.write_to_memory)
raise LeptonicaError, 'Failed to read image' unless pixEndianByteSwap(pix).zero?
pix
end
def load_image(image_data)
image = ImageUtils.load_vips(image_data)
image = image.colourspace(:srgb) if image.interpretation != :srgb
image = image.cast(:uchar) if image.format != :uchar
image = image.bandjoin(255) unless image.has_alpha?
image
end
def projective_crop(pix32, points, out_width, out_height)
ptas = ptaCreate(4)
ptad = ptaCreate(4)
begin
points.each { |x, y| ptaAddPt(ptas, x, y) }
[[0, 0], [out_width, 0], [out_width, out_height], [0, out_height]].each { |x, y| ptaAddPt(ptad, x, y) }
warped = checked(pixProjectivePtaColor(pix32, ptad, ptas, 0xffffff00), 'Failed to warp image')
begin
box = boxCreate(0, 0, out_width, out_height)
begin
checked(pixClipRectangle(warped, box, nil), 'Failed to clip image')
ensure
destroy_box(box)
end
ensure
destroy_pix(warped)
end
ensure
destroy_pta(ptas)
destroy_pta(ptad)
end
end
def transform_result(warped, scan:, rotate:, flip_h:, flip_v:)
steps = []
steps << ->(pix) { whiten(pix) } if scan
steps << ->(pix) { checked(pixFlipLR(nil, pix), 'Failed to flip image') } if flip_h
steps << ->(pix) { checked(pixFlipTB(nil, pix), 'Failed to flip image') } if flip_v
steps << ->(pix) { checked(pixRotateOrth(pix, rotate / 90), 'Failed to rotate image') } unless rotate.zero?
steps << ->(pix) { dewarp(pix) } if scan
steps.reduce(warped) do |current, step|
step.call(current)
ensure
destroy_pix(current) unless current.equal?(warped)
end
end
def output_size(points, width, height)
out_width = (distance(points[0], points[1]) + distance(points[3], points[2])) / 2.0
out_height = (distance(points[0], points[3]) + distance(points[1], points[2])) / 2.0
[out_width.round.clamp(8, width * 2), out_height.round.clamp(8, height * 2)]
end
def detect_orientation(pix32)
gray = pixConvertTo8(pix32, 0)
return 0 if gray.null?
binary_ptr = FFI::MemoryPointer.new(:pointer)
result = pixOtsuAdaptiveThreshold(gray, pixGetWidth(gray), pixGetHeight(gray), 0, 0, 0.1, nil, binary_ptr)
destroy_pix(gray)
binary = binary_ptr.read_pointer
return 0 if result != 0 || binary.null?
upconf_ptr = FFI::MemoryPointer.new(:float)
leftconf_ptr = FFI::MemoryPointer.new(:float)
result = pixOrientDetect(binary, upconf_ptr, leftconf_ptr, 0, 0)
destroy_pix(binary)
return 0 unless result.zero?
orientation_rotation(upconf_ptr.read_float, leftconf_ptr.read_float)
end
def orientation_rotation(upconf, leftconf)
if leftconf >= MIN_ORIENT_CONF && leftconf >= ORIENT_CONF_RATIO * upconf.abs
90
elsif -upconf >= MIN_ORIENT_CONF && -upconf >= ORIENT_CONF_RATIO * leftconf.abs
180
elsif -leftconf >= MIN_ORIENT_CONF && -leftconf >= ORIENT_CONF_RATIO * upconf.abs
270
else
0
end
end
def dewarp(pix)
out_ptr = FFI::MemoryPointer.new(:pointer)
result = dewarpSinglePage(pix, 0, 1, 1, 0, out_ptr, nil, 0)
out = out_ptr.read_pointer
return pixClone(pix) if result != 0 || out.null?
out
end
def whiten(pix)
normalized = checked(pixBackgroundNormSimple(pix, nil, nil), 'Failed to normalize background')
begin
checked(pixGammaTRC(nil, normalized, 1.0, 70, 190), 'Failed to adjust contrast')
ensure
destroy_pix(normalized)
end
end
def read_bytes(pix)
width = pixGetWidth(pix)
height = pixGetHeight(pix)
raise LeptonicaError, 'Failed to read pixels' unless pixEndianByteSwap(pix).zero?
[pixGetData(pix).read_bytes(width * height * 4), width, height]
end
def build_page_mask(pix)
gray = checked(pixConvertTo8(pix, 0), 'Failed to convert image to grayscale')
begin
binary_ptr = FFI::MemoryPointer.new(:pointer)
result = pixOtsuAdaptiveThreshold(gray, pixGetWidth(gray), pixGetHeight(gray), 0, 0, 0.1,
nil, binary_ptr)
return if result != 0 || binary_ptr.read_pointer.null?
binary = binary_ptr.read_pointer
begin
clean_mask(binary)
ensure
destroy_pix(binary)
end
ensure
destroy_pix(gray)
end
end
def clean_mask(binary)
inverted = checked(pixInvert(nil, binary), 'Failed to invert mask')
begin
closed = checked(pixCloseBrick(nil, inverted, 5, 5), 'Failed to close mask')
begin
checked(pixOpenBrick(nil, closed, 3, 3), 'Failed to open mask')
ensure
destroy_pix(closed)
end
ensure
destroy_pix(inverted)
end
end
def mask_corners(mask)
width = pixGetWidth(mask)
height = pixGetHeight(mask)
pixels = read_mask_pixels(mask)
return if pixels.empty?
bounds = largest_component_bounds(mask)
if bounds
box_x, box_y, box_w, box_h = bounds
pixels = pixels.select do |x, y|
x.between?(box_x, box_x + box_w - 1) && y.between?(box_y, box_y + box_h - 1)
end
return if pixels.empty?
end
image_corners = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]]
corners = image_corners.map do |corner_x, corner_y|
pixels.min_by { |x, y| (x - corner_x).abs + (y - corner_y).abs }
end
return if corners.uniq.size < 4
corners.map { |x, y| [x / width.to_f, y / height.to_f] }
end
def largest_component_bounds(mask)
pixa_ptr = FFI::MemoryPointer.new(:pointer)
boxa = pixConnComp(mask, pixa_ptr, 8)
return if boxa.null?
pixa = pixa_ptr.read_pointer
begin
geometry = Array.new(4) { FFI::MemoryPointer.new(:int) }
bounds = nil
best_area = 0
boxaGetCount(boxa).times do |index|
box = boxaGetBox(boxa, index, L_CLONE)
next if box.null?
boxGetGeometry(box, *geometry)
box_x, box_y, box_w, box_h = geometry.map(&:read_int)
if box_w * box_h > best_area
best_area = box_w * box_h
bounds = [box_x, box_y, box_w, box_h]
end
destroy_box(box)
end
bounds
ensure
destroy_pixa(pixa)
destroy_boxa(boxa)
end
end
def read_mask_pixels(pix)
width = pixGetWidth(pix)
height = pixGetHeight(pix)
wpl = pixGetWpl(pix)
raise LeptonicaError, 'Failed to read mask' unless pixEndianByteSwap(pix).zero?
data = pixGetData(pix).read_bytes(wpl * 4 * height)
pixEndianByteSwap(pix)
pixels = []
height.times do |y|
row_offset = y * wpl * 4
width.times do |x|
byte = data.getbyte(row_offset + (x / 8))
pixels << [x, y] if byte.anybits?(0x80 >> (x % 8))
end
end
pixels
end
def quad_area(corners)
area = 0.0
corners.each_with_index do |(x1, y1), index|
x2, y2 = corners[(index + 1) % 4]
area += (x1 * y2) - (x2 * y1)
end
(area / 2.0).abs
end
def distance(point_a, point_b)
Math.sqrt(((point_a[0] - point_b[0])**2) + ((point_a[1] - point_b[1])**2))
end
def checked(pix, message)
raise LeptonicaError, message if pix.nil? || pix.null?
pix
end
def destroy_pix(pix)
return if pix.nil? || pix.null?
pix_ptr = FFI::MemoryPointer.new(:pointer)
pix_ptr.write_pointer(pix)
pixDestroy(pix_ptr)
end
def destroy_pta(pta)
return if pta.nil? || pta.null?
pta_ptr = FFI::MemoryPointer.new(:pointer)
pta_ptr.write_pointer(pta)
ptaDestroy(pta_ptr)
end
def destroy_box(box)
return if box.nil? || box.null?
box_ptr = FFI::MemoryPointer.new(:pointer)
box_ptr.write_pointer(box)
boxDestroy(box_ptr)
end
def destroy_boxa(boxa)
return if boxa.nil? || boxa.null?
boxa_ptr = FFI::MemoryPointer.new(:pointer)
boxa_ptr.write_pointer(boxa)
boxaDestroy(boxa_ptr)
end
def destroy_pixa(pixa)
return if pixa.nil? || pixa.null?
pixa_ptr = FFI::MemoryPointer.new(:pointer)
pixa_ptr.write_pointer(pixa)
pixaDestroy(pixa_ptr)
end
end

@ -27,9 +27,12 @@ class Pdfium
typedef :pointer, :FPDF_TEXTPAGE
typedef :pointer, :FPDF_PAGEOBJECT
typedef :pointer, :FPDF_PATHSEGMENT
typedef :pointer, :FPDF_FONT
MAX_SIZE = 32_767
BLANK_TEXT_CODEPOINTS = [0x00, 0x09, 0x0A, 0x0D, 0x20, 0xA0].freeze
FPDF_ANNOT = 0x01
FPDF_LCD_TEXT = 0x02
FPDF_NO_NATIVETEXT = 0x04
@ -69,6 +72,16 @@ class Pdfium
end
end
ImageNode = Struct.new(:x, :y, :w, :h) do
def endx
@endx ||= x + w
end
def endy
@endy ||= y + h
end
end
# rubocop:disable Naming/ClassAndModuleCamelCase
class FPDF_LIBRARY_CONFIG < FFI::Struct
layout :version, :int,
@ -101,8 +114,21 @@ class Pdfium
attach_function :FPDFBitmap_GetWidth, [:FPDF_BITMAP], :int
attach_function :FPDFBitmap_GetHeight, [:FPDF_BITMAP], :int
attach_function :FPDFBitmap_GetStride, [:FPDF_BITMAP], :int
attach_function :FPDFBitmap_GetFormat, [:FPDF_BITMAP], :int
attach_function :FPDFBitmap_FillRect, %i[FPDF_BITMAP int int int int ulong], :void
FPDF_BITMAP_GRAY = 1
FPDF_BITMAP_BGR = 2
FPDF_BITMAP_BGRX = 3
FPDF_BITMAP_BGRA = 4
BITMAP_FORMAT_BANDS = {
FPDF_BITMAP_GRAY => [:gray, 1],
FPDF_BITMAP_BGR => [:bgr, 3],
FPDF_BITMAP_BGRX => [:bgrx, 4],
FPDF_BITMAP_BGRA => [:bgra, 4]
}.freeze
attach_function :FPDF_RenderPageBitmap, %i[FPDF_BITMAP FPDF_PAGE int int int int int int], :void
attach_function :FPDFText_LoadPage, [:FPDF_PAGE], :FPDF_TEXTPAGE
@ -116,6 +142,7 @@ class Pdfium
attach_function :FPDFText_CountRects, %i[FPDF_TEXTPAGE int int], :int
attach_function :FPDFText_GetRect, %i[FPDF_TEXTPAGE int pointer pointer pointer pointer], :int
attach_function :FPDFText_GetFontSize, %i[FPDF_TEXTPAGE int], :double
attach_function :FPDFText_GetLooseCharBox, %i[FPDF_TEXTPAGE int pointer], :int
# Page object functions for extracting paths/lines
attach_function :FPDFPage_CountObjects, [:FPDF_PAGE], :int
@ -131,6 +158,29 @@ class Pdfium
attach_function :FPDFTextObj_GetText, %i[FPDF_PAGEOBJECT FPDF_TEXTPAGE pointer ulong], :ulong
attach_function :FPDFTextObj_GetFontSize, %i[FPDF_PAGEOBJECT pointer], :int
attach_function :FPDFPage_InsertObject, %i[FPDF_PAGE FPDF_PAGEOBJECT], :void
attach_function :FPDFPage_RemoveObject, %i[FPDF_PAGE FPDF_PAGEOBJECT], :int
attach_function :FPDFPage_GenerateContent, [:FPDF_PAGE], :int
attach_function :FPDFPageObj_Destroy, [:FPDF_PAGEOBJECT], :void
attach_function :FPDFText_GetTextObject, %i[FPDF_TEXTPAGE int], :FPDF_PAGEOBJECT
attach_function :FPDFTextObj_GetFont, [:FPDF_PAGEOBJECT], :FPDF_FONT
attach_function :FPDFText_LoadStandardFont, %i[FPDF_DOCUMENT string], :FPDF_FONT
attach_function :FPDFPageObj_CreateTextObj, %i[FPDF_DOCUMENT FPDF_FONT float], :FPDF_PAGEOBJECT
attach_function :FPDFText_SetText, %i[FPDF_PAGEOBJECT pointer], :int
attach_function :FPDFPageObj_GetMatrix, %i[FPDF_PAGEOBJECT pointer], :int
attach_function :FPDFPageObj_SetMatrix, %i[FPDF_PAGEOBJECT pointer], :int
attach_function :FPDFPageObj_CreateNewRect, %i[float float float float], :FPDF_PAGEOBJECT
attach_function :FPDFPageObj_SetFillColor, %i[FPDF_PAGEOBJECT uint uint uint uint], :int
attach_function :FPDFPath_SetDrawMode, %i[FPDF_PAGEOBJECT int int], :int
attach_function :FPDFFormObj_CountObjects, [:FPDF_PAGEOBJECT], :int
attach_function :FPDFFormObj_GetObject, %i[FPDF_PAGEOBJECT ulong], :FPDF_PAGEOBJECT
attach_function :FPDFFormObj_RemoveObject, %i[FPDF_PAGEOBJECT FPDF_PAGEOBJECT], :int
attach_function :FPDFPageObj_Transform, %i[FPDF_PAGEOBJECT double double double double double double], :void
attach_function :FPDFImageObj_GetBitmap, [:FPDF_PAGEOBJECT], :FPDF_BITMAP
attach_function :FPDFImageObj_LoadJpegFileInline, %i[pointer int FPDF_PAGEOBJECT pointer], :int
# Page object types
FPDF_PAGEOBJ_UNKNOWN = 0
FPDF_PAGEOBJ_TEXT = 1
@ -216,7 +266,6 @@ class Pdfium
attach_function :FPDFPage_SetRotation, %i[FPDF_PAGE int], :void
attach_function :FPDFPage_TransFormWithClip, %i[FPDF_PAGE pointer pointer], :int
attach_function :FPDFPage_TransformAnnots, %i[FPDF_PAGE double double double double double double], :void
attach_function :FPDFPage_GenerateContent, [:FPDF_PAGE], :int
attach_function :FPDFPage_GetMediaBox, %i[FPDF_PAGE pointer pointer pointer pointer], :int
attach_function :FPDFPage_SetMediaBox, %i[FPDF_PAGE float float float float], :void
attach_function :FPDFPage_GetCropBox, %i[FPDF_PAGE pointer pointer pointer pointer], :int
@ -241,6 +290,12 @@ class Pdfium
layout :version, :int,
:WriteBlock, :pointer
end
class FPDF_FILEACCESS < FFI::Struct
layout :m_FileLen, :ulong,
:m_GetBlock, :pointer,
:m_Param, :pointer
end
# rubocop:enable Naming/ClassAndModuleCamelCase
attach_function :FPDF_SaveAsCopy, %i[FPDF_DOCUMENT pointer ulong], :int
@ -257,6 +312,12 @@ class Pdfium
define_singleton_method(:FPDF_ImportPages) { |*| raise PdfiumError, 'FPDF_ImportPages is not available' } # rubocop:disable Naming/MethodName
end
begin
attach_function :FPDF_RemoveOrphanObjects, [:FPDF_DOCUMENT], :int
rescue FFI::NotFoundError
define_singleton_method(:FPDF_RemoveOrphanObjects) { |*| -1 } # rubocop:disable Naming/MethodName
end
FPDF_ERR_SUCCESS = 0
FPDF_ERR_UNKNOWN = 1
FPDF_ERR_FILE = 2
@ -281,6 +342,10 @@ class Pdfium
PDFIUM_ERRORS[code] || "Unknown error code: #{code}"
end
def self.with_instance(instance = nil)
yield instance
end
def self.check_last_error(context_message = 'PDFium operation failed')
error_code = FPDF_GetLastError()
@ -303,6 +368,7 @@ class Pdfium
@source_buffer = source_buffer
@form_handle = FFI::Pointer::NULL
@form_fill_info_mem = FFI::Pointer::NULL
@presave_hooks = {}
init_form_fill_environment
end
@ -322,10 +388,10 @@ class Pdfium
@page_count ||= Pdfium.FPDF_GetPageCount(@document_ptr)
end
def import_pages(src_doc)
def import_pages(src_doc, pages: nil, index: nil)
ensure_not_closed!
result = Pdfium.FPDF_ImportPages(@document_ptr, src_doc.document_ptr, nil, page_count)
result = Pdfium.FPDF_ImportPages(@document_ptr, src_doc.document_ptr, pages, index || page_count)
raise PdfiumError, 'Failed to import pages' if result.zero?
@ -418,6 +484,8 @@ class Pdfium
def save(io, flags: Pdfium::FPDF_NO_INCREMENTAL)
ensure_not_closed!
run_presave_hooks
file_write_mem = FFI::MemoryPointer.new(FPDF_FILEWRITE.size)
file_write_struct = FPDF_FILEWRITE.new(file_write_mem)
@ -439,6 +507,24 @@ class Pdfium
io
end
def cleanup
ensure_not_closed!
Pdfium.FPDF_RemoveOrphanObjects(@document_ptr)
end
def standard_font
@standard_font ||= Pdfium.FPDFText_LoadStandardFont(@document_ptr, 'Helvetica')
end
def add_presave_hook(key, &block)
@presave_hooks[key] ||= block
end
def run_presave_hooks
@presave_hooks.each_value(&:call)
end
def close
return if closed?
@ -495,6 +581,16 @@ class Pdfium
@height ||= Pdfium.FPDF_GetPageHeightF(@page_ptr)
end
def rotation
@rotation ||= Pdfium.FPDFPage_GetRotation(@page_ptr)
end
def rotation=(value)
Pdfium.FPDFPage_SetRotation(@page_ptr, value)
@rotation = value
end
def closed?
@closed
end
@ -583,12 +679,7 @@ class Pdfium
return @text_nodes if char_count.zero?
left_ptr = FFI::MemoryPointer.new(:double)
right_ptr = FFI::MemoryPointer.new(:double)
bottom_ptr = FFI::MemoryPointer.new(:double)
top_ptr = FFI::MemoryPointer.new(:double)
origin_x_ptr = FFI::MemoryPointer.new(:double)
origin_y_ptr = FFI::MemoryPointer.new(:double)
loose_rect_ptr = FFI::MemoryPointer.new(:float, 4)
i = 0
@ -611,30 +702,16 @@ class Pdfium
char = codepoint.chr(Encoding::UTF_8)
result = Pdfium.FPDFText_GetCharBox(text_page, box_index, left_ptr, right_ptr, bottom_ptr, top_ptr)
next if result.zero?
left = left_ptr.read_double
right = right_ptr.read_double
Pdfium.FPDFText_GetCharOrigin(text_page, box_index, origin_x_ptr, origin_y_ptr)
origin_y = origin_y_ptr.read_double
origin_x = origin_x_ptr.read_double
next if Pdfium.FPDFText_GetLooseCharBox(text_page, box_index, loose_rect_ptr).zero?
font_size = Pdfium.FPDFText_GetFontSize(text_page, box_index)
font_size = 8 if font_size == 1
loose_left, loose_top, loose_right, loose_bottom = loose_rect_ptr.read_array_of_float(4)
abs_x = left
abs_y = height - origin_y - (font_size * 0.8)
abs_width = right - left
abs_height = font_size
next if loose_right <= loose_left || loose_top <= loose_bottom
x = origin_x / width
y = abs_y / height
node_width = (abs_width + ((abs_x - origin_x).abs * 2)) / width
node_height = abs_height / height
x = loose_left / width
y = (height - loose_top) / height
node_width = (loose_right - loose_left) / width
node_height = (loose_top - loose_bottom) / height
@text_nodes << TextNode.new(char, x, y, node_width, node_height)
ensure
@ -650,6 +727,378 @@ class Pdfium
Pdfium.FPDFText_ClosePage(text_page) if text_page && !text_page.null?
end
def redact(rects, &image_processor)
ensure_not_closed!
flatten
rotate
rect_bounds = rects.map do |rect|
left = rect['x'].to_f * width
top = height - (rect['y'].to_f * height)
[left, top - (rect['h'].to_f * height), left + (rect['w'].to_f * width), top]
end
unwrap_form_objects(rect_bounds)
remove_redacted_chars(rect_bounds)
redact_image_objects(rect_bounds, &image_processor) if image_processor
draw_redaction_rects(rect_bounds)
raise PdfiumError, 'Failed to generate page content' if Pdfium.FPDFPage_GenerateContent(@page_ptr).zero?
remove_blank_text_objects
@document.add_presave_hook(:cleanup) { @document.cleanup }
reset_text_memoization
nil
end
def remove_blank_text_objects
text_page = Pdfium.FPDFText_LoadPage(@page_ptr)
return if text_page.null?
blanks = []
begin
Pdfium.FPDFPage_CountObjects(@page_ptr).times do |index|
object_ptr = Pdfium.FPDFPage_GetObject(@page_ptr, index)
next if object_ptr.null?
next unless Pdfium.FPDFPageObj_GetType(object_ptr) == Pdfium::FPDF_PAGEOBJ_TEXT
needed_bytes = Pdfium.FPDFTextObj_GetText(object_ptr, text_page, FFI::Pointer::NULL, 0)
next if needed_bytes < 2
buffer = FFI::MemoryPointer.new(:uint8, needed_bytes)
written = Pdfium.FPDFTextObj_GetText(object_ptr, text_page, buffer, needed_bytes)
next if written < 2
content = buffer.read_bytes(written - 2).force_encoding('UTF-16LE').encode('UTF-8')
blanks << object_ptr if content.codepoints.all? { |code| BLANK_TEXT_CODEPOINTS.include?(code) }
end
ensure
Pdfium.FPDFText_ClosePage(text_page)
end
return if blanks.empty?
blanks.each { |object_ptr| remove_page_object(object_ptr) }
Pdfium.FPDFPage_GenerateContent(@page_ptr)
end
def remove_redacted_chars(rect_bounds)
text_page = Pdfium.FPDFText_LoadPage(@page_ptr)
raise PdfiumError, 'Failed to load text page' if text_page.null?
begin
text_objects_chars = collect_text_objects_chars(text_page, rect_bounds)
ensure
Pdfium.FPDFText_ClosePage(text_page)
end
text_objects_chars.each_value do |entry|
next if entry[:chars].none? { |char| char[:redacted] }
rebuild_text_object_survivors(entry) unless entry[:chars].all? { |char| char[:redacted] }
remove_page_object(entry[:ptr])
end
end
def unwrap_form_objects(rect_bounds = nil)
unwrapped = false
matrix_ptr = FFI::MemoryPointer.new(:float, 6)
loop do
form_ptr = find_form_object(rect_bounds)
break if form_ptr.nil?
unwrapped = true
matrix =
if Pdfium.FPDFPageObj_GetMatrix(form_ptr, matrix_ptr).zero?
[1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
else
matrix_ptr.read_array_of_float(6)
end
(Pdfium.FPDFFormObj_CountObjects(form_ptr) - 1).downto(0) do |index|
child_ptr = Pdfium.FPDFFormObj_GetObject(form_ptr, index)
next if child_ptr.null?
raise PdfiumError, 'Failed to unwrap form object' if Pdfium.FPDFFormObj_RemoveObject(form_ptr,
child_ptr).zero?
Pdfium.FPDFPageObj_Transform(child_ptr, *matrix)
Pdfium.FPDFPage_InsertObject(@page_ptr, child_ptr)
end
remove_page_object(form_ptr)
end
Pdfium.FPDFPage_GenerateContent(@page_ptr) if unwrapped
reset_text_memoization if unwrapped
end
def find_form_object(rect_bounds = nil)
bounds_ptrs = Array.new(4) { FFI::MemoryPointer.new(:float) }
Pdfium.FPDFPage_CountObjects(@page_ptr).times do |index|
object_ptr = Pdfium.FPDFPage_GetObject(@page_ptr, index)
next if object_ptr.null?
next unless Pdfium.FPDFPageObj_GetType(object_ptr) == FPDF_PAGEOBJ_FORM
return object_ptr if rect_bounds.nil?
next if Pdfium.FPDFPageObj_GetBounds(object_ptr, *bounds_ptrs).zero?
left, bottom, right, top = bounds_ptrs.map(&:read_float)
intersects = rect_bounds.any? do |rl, rb, rr, rt|
left < rr && right > rl && bottom < rt && top > rb
end
return object_ptr if intersects
end
nil
end
def collect_text_objects_chars(text_page, rect_bounds)
char_count = Pdfium.FPDFText_CountChars(text_page)
left_ptr, right_ptr, bottom_ptr, top_ptr, origin_x_ptr, origin_y_ptr =
Array.new(6) { FFI::MemoryPointer.new(:double) }
text_objects_chars = {}
index = 0
while index < char_count
object_ptr = Pdfium.FPDFText_GetTextObject(text_page, index)
codepoint = Pdfium.FPDFText_GetUnicode(text_page, index)
box_index = index
if codepoint.between?(0xD800, 0xDBFF) && (index + 1 < char_count)
codepoint2 = Pdfium.FPDFText_GetUnicode(text_page, index + 1)
if codepoint2.between?(0xDC00, 0xDFFF)
codepoint = 0x10000 + ((codepoint - 0xD800) << 10) + (codepoint2 - 0xDC00)
index += 1
end
end
index += 1
next if object_ptr.null?
next if Pdfium.FPDFText_GetCharBox(text_page, box_index, left_ptr, right_ptr, bottom_ptr, top_ptr).zero?
center_x = (left_ptr.read_double + right_ptr.read_double) / 2.0
center_y = (bottom_ptr.read_double + top_ptr.read_double) / 2.0
Pdfium.FPDFText_GetCharOrigin(text_page, box_index, origin_x_ptr, origin_y_ptr)
entry = text_objects_chars[object_ptr.address] ||= { ptr: object_ptr, chars: [] }
entry[:chars] << {
codepoint:,
origin_x: origin_x_ptr.read_double,
origin_y: origin_y_ptr.read_double,
redacted: rect_bounds.any? do |left, bottom, right, top|
center_x.between?(left, right) && center_y.between?(bottom, top)
end
}
end
text_objects_chars
end
def rebuild_text_object_survivors(entry)
font_ptr = @document.standard_font
font_size_ptr = FFI::MemoryPointer.new(:float)
font_size = Pdfium.FPDFTextObj_GetFontSize(entry[:ptr], font_size_ptr).zero? ? 12.0 : font_size_ptr.read_float
matrix_ptr = FFI::MemoryPointer.new(:float, 6)
matrix =
if Pdfium.FPDFPageObj_GetMatrix(entry[:ptr], matrix_ptr).zero?
[1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
else
matrix_ptr.read_array_of_float(6)
end
entry[:chars].reject { |char| char[:redacted] }.each do |char|
new_object = Pdfium.FPDFPageObj_CreateTextObj(@document.document_ptr, font_ptr, font_size)
next if new_object.null?
text_data = [char[:codepoint]].pack('U').encode(Encoding::UTF_16LE).b + "\x00\x00".b
text_buffer = FFI::MemoryPointer.new(:char, text_data.bytesize)
text_buffer.put_bytes(0, text_data)
if Pdfium.FPDFText_SetText(new_object, text_buffer).zero?
Pdfium.FPDFPageObj_Destroy(new_object)
next
end
matrix_ptr.write_array_of_float([matrix[0], matrix[1], matrix[2], matrix[3],
char[:origin_x], char[:origin_y]])
Pdfium.FPDFPageObj_SetMatrix(new_object, matrix_ptr)
Pdfium.FPDFPage_InsertObject(@page_ptr, new_object)
end
end
def reset_text_memoization
remove_instance_variable(:@text) if defined?(@text)
@text_nodes = nil
@text_objects = nil
@line_nodes = nil
end
def remove_page_object(object_ptr)
raise PdfiumError, 'Failed to remove page object' if Pdfium.FPDFPage_RemoveObject(@page_ptr, object_ptr).zero?
Pdfium.FPDFPageObj_Destroy(object_ptr)
end
def draw_redaction_rects(rect_bounds)
rect_bounds.each do |left, bottom, right, top|
rect_object = Pdfium.FPDFPageObj_CreateNewRect(left, bottom, right - left, top - bottom)
raise PdfiumError, 'Failed to create redaction rect' if rect_object.null?
Pdfium.FPDFPageObj_SetFillColor(rect_object, 0, 0, 0, 255)
Pdfium.FPDFPath_SetDrawMode(rect_object, 1, 0)
Pdfium.FPDFPage_InsertObject(@page_ptr, rect_object)
end
end
def redact_image_objects(rect_bounds)
bounds_ptrs = Array.new(4) { FFI::MemoryPointer.new(:float) }
matrix_ptr = FFI::MemoryPointer.new(:float, 6)
Pdfium.FPDFPage_CountObjects(@page_ptr).times do |index|
object_ptr = Pdfium.FPDFPage_GetObject(@page_ptr, index)
next if object_ptr.null?
next unless Pdfium.FPDFPageObj_GetType(object_ptr) == FPDF_PAGEOBJ_IMAGE
next if Pdfium.FPDFPageObj_GetBounds(object_ptr, *bounds_ptrs).zero?
obj_left, obj_bottom, obj_right, obj_top = bounds_ptrs.map(&:read_float)
overlapping = rect_bounds.select do |left, bottom, right, top|
obj_left < right && obj_right > left && obj_bottom < top && obj_top > bottom
end
next if overlapping.empty?
raise PdfiumError, 'Failed to get image matrix' if Pdfium.FPDFPageObj_GetMatrix(object_ptr, matrix_ptr).zero?
matrix = matrix_ptr.read_array_of_float(6)
next if ((matrix[0] * matrix[3]) - (matrix[1] * matrix[2])).abs < 1e-9
bitmap = extract_image_bitmap(object_ptr)
pixel_rects = image_pixel_rects(matrix, bitmap[:width], bitmap[:height], overlapping)
next if pixel_rects.empty?
jpeg = yield(bitmap, pixel_rects)
load_image_jpeg(object_ptr, jpeg) if jpeg
end
end
def extract_image_bitmap(object_ptr)
bitmap_ptr = Pdfium.FPDFImageObj_GetBitmap(object_ptr)
raise PdfiumError, 'Failed to get image bitmap' if bitmap_ptr.nil? || bitmap_ptr.null?
format, bands = BITMAP_FORMAT_BANDS[Pdfium.FPDFBitmap_GetFormat(bitmap_ptr)]
raise PdfiumError, 'Unsupported image bitmap format' if format.nil?
image_width = Pdfium.FPDFBitmap_GetWidth(bitmap_ptr)
image_height = Pdfium.FPDFBitmap_GetHeight(bitmap_ptr)
stride = Pdfium.FPDFBitmap_GetStride(bitmap_ptr)
data = Pdfium.FPDFBitmap_GetBuffer(bitmap_ptr).read_bytes(stride * image_height)
row_size = image_width * bands
data = Array.new(image_height) { |row| data.byteslice(row * stride, row_size) }.join if stride != row_size
{ data:, width: image_width, height: image_height, bands:, format: }
ensure
Pdfium.FPDFBitmap_Destroy(bitmap_ptr) if bitmap_ptr && !bitmap_ptr.null?
end
def image_pixel_rects(matrix, image_width, image_height, rect_bounds)
a, b, c, d, e, f = matrix
det = (a * d) - (b * c)
rect_bounds.filter_map do |left, bottom, right, top|
corners = [[left, bottom], [right, bottom], [left, top], [right, top]].map do |x, y|
u = ((d * (x - e)) - (c * (y - f))) / det
v = ((a * (y - f)) - (b * (x - e))) / det
[u * image_width, (1 - v) * image_height]
end
xs = corners.map(&:first)
ys = corners.map(&:last)
next if xs.max <= 0 || xs.min >= image_width || ys.max <= 0 || ys.min >= image_height
px_left = xs.min.floor.clamp(0, image_width - 1)
px_top = ys.min.floor.clamp(0, image_height - 1)
[px_left, px_top,
(xs.max.ceil - px_left).clamp(1, image_width - px_left),
(ys.max.ceil - px_top).clamp(1, image_height - px_top)]
end
end
def load_image_jpeg(object_ptr, jpeg)
get_block = FFI::Function.new(:int, %i[pointer ulong pointer ulong]) do |_param, position, out, size|
out.put_bytes(0, jpeg.byteslice(position, size) || ''.b)
1
end
file_access = Pdfium::FPDF_FILEACCESS.new
file_access[:m_FileLen] = jpeg.bytesize
file_access[:m_GetBlock] = get_block
file_access[:m_Param] = FFI::Pointer::NULL
pages_ptr = FFI::MemoryPointer.new(:pointer, 1)
pages_ptr.write_pointer(@page_ptr)
result = Pdfium.FPDFImageObj_LoadJpegFileInline(pages_ptr, 1, object_ptr, file_access)
raise PdfiumError, 'Failed to load redacted image' if result.zero?
end
def text_objects
return @text_objects if @text_objects
@ -801,6 +1250,35 @@ class Pdfium
@line_nodes = @line_nodes.sort { |a, b| a.endy == b.endy ? a.x <=> b.x : a.endy <=> b.endy }
end
def image_nodes
ensure_not_closed!
nodes = []
bounds_ptrs = Array.new(4) { FFI::MemoryPointer.new(:float) }
Pdfium.FPDFPage_CountObjects(@page_ptr).times do |index|
object_ptr = Pdfium.FPDFPage_GetObject(@page_ptr, index)
next if object_ptr.null?
next unless Pdfium.FPDFPageObj_GetType(object_ptr) == FPDF_PAGEOBJ_IMAGE
next if Pdfium.FPDFPageObj_GetBounds(object_ptr, *bounds_ptrs).zero?
obj_left, obj_bottom, obj_right, obj_top = bounds_ptrs.map(&:read_float)
left = (obj_left / width).clamp(0, 1)
top = ((height - obj_top) / height).clamp(0, 1)
right = (obj_right / width).clamp(0, 1)
bottom = ((height - obj_bottom) / height).clamp(0, 1)
next if right - left <= 0 || bottom - top <= 0
nodes << ImageNode.new(left, top, right - left, bottom - top)
end
nodes
end
def rotate
ensure_not_closed!
@ -864,7 +1342,8 @@ class Pdfium
Pdfium.FPDFPage_TransFormWithClip(page_ptr, matrix_ptr, FFI::Pointer::NULL)
Pdfium.FPDFPage_SetRotation(page_ptr, 0)
Pdfium.FPDFPage_GenerateContent(page_ptr)
reload
true
end
@ -880,9 +1359,25 @@ class Pdfium
raise PdfiumError, "Failed to flatten page #{page_index}"
end
reload if result == Pdfium::FLATTEN_SUCCESS
result
end
def reload
Pdfium.FPDF_ClosePage(@page_ptr)
@page_ptr = Pdfium.FPDF_LoadPage(@document.document_ptr, @page_index)
raise PdfiumError, "Failed to reload page #{page_index}" if @page_ptr.null?
@rotation = nil
@width = nil
@height = nil
reset_text_memoization
end
def close
return if closed?

@ -0,0 +1,125 @@
# frozen_string_literal: true
module Templates
module BuildImagePagePdf
InvalidPng = Class.new(StandardError)
PNG_SIGNATURE = "\x89PNG\r\n\x1a\n".b
HEADER = "%PDF-1.4\n"
CATALOG_OBJECT = '<< /Type /Catalog /Pages 2 0 R >>'
PAGES_OBJECT = '<< /Type /Pages /Kids [ 3 0 R ] /Count 1 >>'
PAGE_OBJECT_TEMPLATE =
'<< /Type /Page /Parent 2 0 R /MediaBox [ 0 0 %<page_width>s %<page_height>s ] ' \
'/Resources << /XObject << /Im0 4 0 R >> >> /Contents 5 0 R >>'
IMAGE_DICT_TEMPLATE =
'<< /Type /XObject /Subtype /Image /Width %<width>d /Height %<height>d ' \
'/BitsPerComponent %<bit_depth>d /ColorSpace %<color_space>s /Filter /FlateDecode ' \
'/DecodeParms << /Predictor 15 /Colors %<colors>d /BitsPerComponent %<bit_depth>d ' \
'/Columns %<width>d >> /Length %<length>d >>'
CONTENTS_DICT_TEMPLATE = '<< /Length %<length>d >>'
CONTENTS_TEMPLATE = "q\n%<image_width>s 0 0 %<image_height>s %<image_x>s %<image_y>s cm\n/Im0 Do\nQ"
INDEXED_COLOR_SPACE_TEMPLATE = '[ /Indexed /DeviceRGB %<high_value>d <%<palette>s> ]'
STREAM_OBJECT_TEMPLATE = "%<dict>s\nstream\n%<data>s\nendstream".b
OBJECT_TEMPLATE = "%<number>d 0 obj\n%<object>s\nendobj\n".b
XREF_HEADER_TEMPLATE = "xref\n0 %<size>d\n0000000000 65535 f \n"
XREF_ENTRY_TEMPLATE = "%<offset>010d 00000 n \n"
TRAILER_TEMPLATE = "trailer\n<< /Size %<size>d /Root 1 0 R >>\nstartxref\n%<xref_offset>d\n%%%%EOF"
module_function
def call(png_data, page_width:, page_height:, image_box: nil)
png = parse_png(png_data)
raise InvalidPng, 'interlaced png is not supported' unless png[:interlace].zero?
color_space, colors =
case png[:color_type]
when 0 then ['/DeviceGray', 1]
when 2 then ['/DeviceRGB', 3]
when 3
raise InvalidPng, 'missing palette' if png[:palette].nil?
[format(INDEXED_COLOR_SPACE_TEMPLATE,
high_value: (png[:palette].bytesize / 3) - 1,
palette: png[:palette].unpack1('H*')), 1]
else
raise InvalidPng, "unsupported color type #{png[:color_type]}"
end
build_pdf(png, color_space, colors,
[page_width, page_height].map { |value| value.round(4) },
(image_box || [0, 0, page_width, page_height]).map { |value| value.round(4) })
end
def parse_png(data)
raise InvalidPng, 'not a png' unless data.start_with?(PNG_SIGNATURE)
ihdr = nil
palette = nil
idat = +''.b
pos = 8
while pos + 8 <= data.bytesize
length = data.byteslice(pos, 4).unpack1('N')
type = data.byteslice(pos + 4, 4)
case type
when 'IHDR' then ihdr = data.byteslice(pos + 8, length)
when 'PLTE' then palette = data.byteslice(pos + 8, length)
when 'tRNS' then raise InvalidPng, 'transparency is not supported'
when 'IDAT' then idat << data.byteslice(pos + 8, length)
when 'IEND' then break
end
pos += 12 + length
end
raise InvalidPng, 'missing image data' if ihdr.nil? || ihdr.bytesize < 13 || idat.empty?
width, height, bit_depth, color_type, _compression, _filter, interlace = ihdr.unpack('N2C5')
{ width:, height:, bit_depth:, color_type:, interlace:, palette:, idat: }
end
def build_pdf(png, color_space, colors, page_size, image_box)
page_width, page_height = page_size
image_x, image_y, image_width, image_height = image_box
contents = format(CONTENTS_TEMPLATE, image_x:, image_y:, image_width:, image_height:)
image_dict = format(IMAGE_DICT_TEMPLATE,
width: png[:width], height: png[:height], bit_depth: png[:bit_depth],
color_space:, colors:, length: png[:idat].bytesize)
objects = [
CATALOG_OBJECT,
PAGES_OBJECT,
format(PAGE_OBJECT_TEMPLATE, page_width:, page_height:),
format(STREAM_OBJECT_TEMPLATE, dict: image_dict, data: png[:idat]),
format(STREAM_OBJECT_TEMPLATE, dict: format(CONTENTS_DICT_TEMPLATE, length: contents.bytesize),
data: contents)
]
pdf = +HEADER.b
offsets = []
objects.each_with_index do |object, index|
offsets << pdf.bytesize
pdf << format(OBJECT_TEMPLATE, number: index + 1, object:)
end
xref_offset = pdf.bytesize
pdf << format(XREF_HEADER_TEMPLATE, size: objects.size + 1).b
offsets.each { |offset| pdf << format(XREF_ENTRY_TEMPLATE, offset:).b }
pdf << format(TRAILER_TEMPLATE, size: objects.size + 1, xref_offset:).b
end
end
end

@ -0,0 +1,69 @@
# frozen_string_literal: true
module Templates
module CreateDocumentCrop
MAX_SCAN_SIZE = 1400
module_function
def call(template, attachment, params)
scan = params[:scan]
bytes, width, height = Leptonica.crop_document(attachment.download, params[:corners].map(&:to_h),
scan:,
rotate: params[:rotate]&.to_i,
flip_h: params[:flip_h],
flip_v: params[:flip_v])
image = load_image(bytes, width, height)
image = pad_scan_image(image, template.account) if scan
data = scan ? encode_png(image) : encode_jpeg(image)
create_document!(template, attachment, data, scan)
end
def create_document!(template, attachment, data, scan)
blob = ActiveStorage::Blob.create_and_upload!(
io: StringIO.new(data),
filename: "#{attachment.filename.base}.#{scan ? 'png' : 'jpg'}",
metadata: { identified: true, analyzed: true },
content_type: scan ? 'image/png' : 'image/jpeg'
)
document = template.documents.create!(blob:)
Templates::ProcessDocument.call(document, data)
document
end
def load_image(bytes, width, height)
Vips::Image.new_from_memory_copy(bytes, width, height, 4, :uchar)
.extract_band(0, n: 3)
.copy(interpretation: :srgb)
end
def pad_scan_image(image, account)
scale = MAX_SCAN_SIZE / [image.width, image.height].max.to_f
image = image.resize(scale) if scale < 1
base_size = Templates::ModifyDocuments.default_page_size(account)
page_width, page_height = image.width > image.height ? base_size.reverse : base_size
target_width = [image.width, (image.height * page_width / page_height.to_f).round].max
target_height = [image.height, (image.width * page_height / page_width.to_f).round].max
image.gravity('centre', target_width, target_height, extend: :background, background: 255)
end
def encode_png(image)
image.write_to_buffer(Templates::ProcessDocument::FORMAT,
compression: 6, filter: 0, bitdepth: 4, palette: true, dither: 0, strip: true)
end
def encode_jpeg(image)
image.write_to_buffer('.jpg', Q: 90, strip: true)
end
end
end

@ -0,0 +1,505 @@
# frozen_string_literal: true
module Templates
module ModifyDocuments
InvalidLayout = Class.new(StandardError)
A4_SIZE = [595, 842].freeze
LETTER_SIZE = [612, 792].freeze
PAGE_SIZE_TOLERANCE = 6
SCAN_WHITE_THRESHOLD = 220
SCAN_WHITE_FRACTION = 0.6
ANNOTATIONS_SIZE_LIMIT = 6.megabytes
ROTATIONS = [0, 90, 180, 270].freeze
RECT_KEYS = %w[x y w h].freeze
module_function
def call(template, documents_layout)
layout_attachment_uuids =
documents_layout.flat_map { |e| [e['attachment_uuid'], e['pages'].to_a.pluck('attachment_uuid')] }.flatten.uniq
attachments_index =
template.documents_attachments.preload(:blob).where(uuid: layout_attachment_uuids).index_by(&:uuid)
validate_layout!(template, documents_layout, attachments_index)
mapping = {}
new_schema = build_new_schema(template, documents_layout, attachments_index, mapping)
template.schema.each_with_index do |item, index|
new_schema.insert([index, new_schema.size].min, item) if item['dynamic']
end
removed_field_uuids = remap_fields(template, mapping)
template.schema = new_schema
remove_conditions(template.fields, removed_field_uuids)
remove_conditions(template.schema, removed_field_uuids)
template.save!
template
end
def build_new_schema(template, documents_layout, attachments_index, mapping)
sources = {}
Pdfium.with_instance do
documents_layout.filter_map do |entry|
schema_item =
template.schema.find { |item| item['attachment_uuid'] == entry['attachment_uuid'] } ||
{ 'attachment_uuid' => entry['attachment_uuid'],
'name' => attachments_index[entry['attachment_uuid']].filename.base }
next if entry['pages'].blank?
if unchanged_entry?(entry, attachments_index)
entry['pages'].each_with_index do |ref, index|
add_page_mapping(mapping, ref, [ref['attachment_uuid'], index])
end
schema_item
else
document = if standalone_image_entry?(entry, attachments_index)
build_image_document(template, entry, attachments_index)
else
build_document(template, schema_item, entry['pages'], attachments_index, sources)
end
entry['pages'].each_with_index do |ref, index|
add_page_mapping(mapping, ref, [document.uuid, index, ref['rotate'].to_i % 360])
end
schema_item.except('google_drive_file_id').merge('attachment_uuid' => document.uuid)
end
end
ensure
sources.each_value(&:close)
end
end
def add_page_mapping(mapping, ref, target)
mapping[[ref['attachment_uuid'], ref['page']]] = target
replaced = ref['replaced_page']
mapping[[replaced['attachment_uuid'], replaced['page']]] = target if replaced
end
def validate_layout!(template, documents_layout, attachments_index)
raise InvalidLayout if documents_layout.blank?
raise InvalidLayout if documents_layout.all? { |entry| entry['pages'].blank? }
dynamic_uuids = template.schema.select { |item| item['dynamic'] }.pluck('attachment_uuid')
non_dynamic_uuids = template.schema.pluck('attachment_uuid') - dynamic_uuids
layout_uuids = documents_layout.pluck('attachment_uuid')
raise InvalidLayout if layout_uuids.uniq.size != layout_uuids.size
raise InvalidLayout if (non_dynamic_uuids - layout_uuids).any?
raise InvalidLayout if layout_uuids.intersect?(dynamic_uuids)
raise InvalidLayout if layout_uuids.any? { |uuid| attachments_index[uuid].nil? }
refs = documents_layout.flat_map { |entry| entry['pages'].to_a }
refs.each { |ref| validate_ref!(ref, attachments_index) }
ref_keys = refs.map { |ref| [ref['attachment_uuid'], ref['page']] }
raise InvalidLayout if ref_keys.uniq.size != ref_keys.size
end
def validate_ref!(ref, attachments_index)
attachment = attachments_index[ref['attachment_uuid']]
raise InvalidLayout if attachment.nil?
raise InvalidLayout unless ref['page'].is_a?(Integer) &&
ref['page'] >= 0 && ref['page'] < page_count(attachment)
raise InvalidLayout unless ref['rotate'].nil? || ROTATIONS.include?(ref['rotate'])
validate_redact!(ref['redact'])
end
def validate_redact!(redact)
return if redact.nil?
raise InvalidLayout unless redact.is_a?(Array)
redact.each do |rect|
valid = RECT_KEYS.all? { |key| rect[key].is_a?(Numeric) && rect[key].to_f.between?(-1, 2) }
raise InvalidLayout unless valid
end
end
def page_count(attachment)
if attachment.content_type == Templates::ProcessDocument::PDF_CONTENT_TYPE
attachment.metadata.dig('pdf', 'number_of_pages').to_i
else
1
end
end
def page_objects(attachment, page_number)
Pdfium::Document.open_bytes(attachment.download) do |doc|
page = doc.get_page(page_number)
page.flatten
page.unwrap_form_objects
page.rotate
text_nodes = page.text_nodes.map do |node|
{ 'text' => node.content, 'x' => node.x, 'y' => node.y, 'w' => node.w, 'h' => node.h }
end
image_nodes = page.image_nodes.map do |node|
{ 'x' => node.x, 'y' => node.y, 'w' => node.w, 'h' => node.h }
end
{ 'text_nodes' => text_nodes, 'image_nodes' => image_nodes }
end
end
def unchanged_entry?(entry, attachments_index)
uuid = entry['attachment_uuid']
entry['pages'].size == page_count(attachments_index[uuid]) &&
entry['pages'].each_with_index.all? do |ref, index|
ref['attachment_uuid'] == uuid && ref['page'] == index && ref['rotate'].to_i.zero? && ref['redact'].blank?
end
end
def build_document(template, schema_item, page_refs, attachments_index, sources)
with_images = page_refs.any? { |ref| attachments_index[ref['attachment_uuid']].image? }
pdf_size = entry_pdf_page_size(page_refs, attachments_index, sources) if with_images
default_size = default_page_size(template.account) if with_images
io =
Pdfium::Document.create do |dest|
insert_index = 0
build_page_runs(page_refs, attachments_index).each do |uuid, pages_range, length, image_ops|
redact, rotate = image_ops
attachment = attachments_index[uuid]
key = attachment.image? ? [uuid, image_ops, pdf_size, default_size] : [uuid, image_ops]
source = sources[key] ||= open_or_build_pdf(attachment, redact:, rotate:, pdf_size:, default_size:)
dest.import_pages(source, pages: pages_range, index: insert_index)
insert_index += length
end
apply_pdf_page_ops(dest, page_refs, attachments_index)
dest.save(StringIO.new)
end
save_document(template, attachments_index[schema_item['attachment_uuid']], io.string)
end
def apply_pdf_page_ops(dest, page_refs, attachments_index)
page_refs.each_with_index do |ref, index|
next if attachments_index[ref['attachment_uuid']].image?
rotate = ref['rotate'].to_i % 360
redact = ref['redact'].to_a
next if rotate.zero? && redact.blank?
page = dest.get_page(index)
page.redact(redact) { |bitmap, pixel_rects| encode_redacted_image_jpeg(bitmap, pixel_rects) } if redact.present?
next if rotate.zero?
page.rotation = (page.rotation + (rotate / 90)) % 4
page.rotate
end
end
def build_page_runs(page_refs, attachments_index)
runs = []
page_refs.each do |ref|
image_ops =
if attachments_index[ref['attachment_uuid']].image?
[ref['redact'].presence, ref['rotate'].to_i % 360].presence
end
if runs.last && runs.last[0] == ref['attachment_uuid'] && runs.last[2] == image_ops
runs.last[1] << ref['page']
else
runs << [ref['attachment_uuid'], [ref['page']], image_ops]
end
end
runs.map do |uuid, pages, image_ops|
[uuid, pages.map { |page| page + 1 }.join(','), pages.size, image_ops]
end
end
def standalone_image_entry?(entry, attachments_index)
entry['pages'].size == 1 && attachments_index[entry['pages'].first['attachment_uuid']].image?
end
def build_image_document(template, entry, attachments_index)
ref = entry['pages'].first
attachment = attachments_index[ref['attachment_uuid']]
return attachment if ref['redact'].blank? && (ref['rotate'].to_i % 360).zero?
image = ImageUtils.load_vips(attachment.download, content_type: attachment.content_type, autorot: true)
image = draw_image_redaction(image, ref['redact']) if ref['redact'].present?
image = rotate_vips_image(image, ref['rotate'].to_i % 360)
extension, format_args =
if attachment.content_type == 'image/jpeg'
['.jpg', { Q: 90 }]
else
['.png', {}]
end
data = image.write_to_buffer(extension, **format_args)
blob = ActiveStorage::Blob.create_and_upload!(
io: StringIO.new(data),
filename: attachment.filename.to_s,
metadata: { identified: true, analyzed: true },
content_type: attachment.content_type
)
document = template.documents.create!(blob:)
Templates::ProcessDocument.call(document, data)
end
def rotate_vips_image(image, rotate)
case rotate
when 90 then image.rot90
when 180 then image.rot180
when 270 then image.rot270
else image
end
end
def encode_redacted_image_jpeg(bitmap, pixel_rects)
image = Vips::Image.new_from_memory_copy(bitmap[:data], bitmap[:width], bitmap[:height], bitmap[:bands], :uchar)
image =
case bitmap[:format]
when :bgr, :bgrx then image[2].bandjoin([image[1], image[0]])
when :bgra then image[2].bandjoin([image[1], image[0], image[3]]).flatten(background: 255)
else image
end
ink = Array.new(image.bands, 0.0)
pixel_rects.each do |left, top, rect_width, rect_height|
image = image.draw_rect(ink, left, top, rect_width, rect_height, fill: true)
end
image.write_to_buffer('.jpg', Q: 50, strip: true)
end
def draw_image_redaction(image, rects)
ink = Array.new(image.bands) { |band| band == 3 ? 255.0 : 0.0 }
rects.each do |rect|
left = (rect['x'].to_f * image.width).floor.clamp(0, image.width - 1)
top = (rect['y'].to_f * image.height).floor.clamp(0, image.height - 1)
rect_width = (rect['w'].to_f * image.width).ceil.clamp(1, image.width - left)
rect_height = (rect['h'].to_f * image.height).ceil.clamp(1, image.height - top)
image = image.draw_rect(ink, left, top, rect_width, rect_height, fill: true)
end
image
end
def open_or_build_pdf(attachment, redact: nil, rotate: nil, pdf_size: nil, default_size: nil)
data =
if attachment.image?
build_pdf_data_from_image(attachment, pdf_size, default_size, redact:, rotate:)
else
attachment.download
end
Pdfium::Document.open_bytes(data)
end
def entry_pdf_page_size(page_refs, attachments_index, sources)
pdf_ref = page_refs.rfind { |ref| !attachments_index[ref['attachment_uuid']].image? }
return if pdf_ref.nil?
uuid = pdf_ref['attachment_uuid']
source = sources[[uuid, nil]] ||= open_or_build_pdf(attachments_index[uuid])
page = source.get_page(pdf_ref['page'])
width = page.width
height = page.height
width, height = height, width unless (pdf_ref['rotate'].to_i % 180).zero?
size = standard_page_size(width, height)
return if size.nil?
width > height ? size.reverse : size
end
def standard_page_size(width, height)
[LETTER_SIZE, A4_SIZE].find do |size|
[size, size.reverse].any? do |(base_width, base_height)|
(width - base_width).abs <= PAGE_SIZE_TOLERANCE && (height - base_height).abs <= PAGE_SIZE_TOLERANCE
end
end
end
def default_page_size(account)
abbr = TimeUtils.timezone_abbr(account.timezone, Time.current.beginning_of_year)
abbr.in?(TimeUtils::US_TIMEZONES) ? LETTER_SIZE : A4_SIZE
end
def orientation_match?(size, image)
return false if size.nil?
(size[0] > size[1]) == (image.width > image.height)
end
def aspect_page_size(image)
short, long = [image.width, image.height].minmax
[LETTER_SIZE, A4_SIZE].find do |(page_short, page_long)|
((short * page_long) - (long * page_short)).abs <= page_short
end
end
def scanned_page_image?(image)
counts = image.colourspace('b-w').hist_find.to_a[0].flatten
counts[SCAN_WHITE_THRESHOLD..].sum >= counts.sum * SCAN_WHITE_FRACTION
end
def build_pdf_data_from_image(attachment, pdf_size, default_size, redact: nil, rotate: nil)
image = ImageUtils.load_vips(attachment.preview_images.first.download)
image = image.colourspace(:srgb) if image.interpretation != :srgb
image = image.flatten(background: 255) if image.has_alpha?
image = draw_image_redaction(image, redact) if redact.present?
image = rotate_vips_image(image, rotate.to_i)
bitdepth = 2**image.stats.to_a[1..3].pluck(2).uniq.size
png_data = image.write_to_buffer(Templates::ProcessDocument::FORMAT,
compression: 6, filter: 0, bitdepth:, palette: true,
Q: Templates::ProcessDocument::Q, dither: 0, strip: true)
build_image_page_pdf(image, png_data, pdf_size, default_size)
end
def build_image_page_pdf(image, png_data, pdf_size, default_size)
pdf_size = nil unless orientation_match?(pdf_size, image)
aspect_size = aspect_page_size(image) if pdf_size.nil?
page_width, page_height =
pdf_size ||
(aspect_size || default_size).then { |size| image.width > image.height ? size.reverse : size }
scale = [page_width / image.width.to_f, page_height / image.height.to_f].min
if pdf_size.nil? && aspect_size.nil? && !scanned_page_image?(image)
Templates::BuildImagePagePdf.call(png_data, page_width: image.width * scale,
page_height: image.height * scale)
else
image_width = image.width * scale
image_height = image.height * scale
Templates::BuildImagePagePdf.call(png_data, page_width:, page_height:,
image_box: [(page_width - image_width) / 2.0,
(page_height - image_height) / 2.0,
image_width, image_height])
end
end
def save_document(template, old_attachment, data)
annotations = data.size < ANNOTATIONS_SIZE_LIMIT ? Templates::BuildAnnotations.call(data) : []
sha256 = Base64.urlsafe_encode64(Digest::SHA256.digest(data))
blob = ActiveStorage::Blob.create_and_upload!(
io: StringIO.new(data),
filename: "#{old_attachment.filename.base}.pdf",
metadata: { identified: true, analyzed: true,
pdf: { annotations: }.compact_blank, sha256: }.compact_blank,
content_type: Templates::ProcessDocument::PDF_CONTENT_TYPE
)
document = template.documents.create!(blob:)
Templates::ProcessDocument.call(document, data)
end
def remap_fields(template, mapping)
non_dynamic_uuids = template.schema.reject { |item| item['dynamic'] }.pluck('attachment_uuid')
removed_field_uuids = []
template.fields = template.fields.filter_map do |field|
if field['areas'].present?
field['areas'] = field['areas'].filter_map do |area|
next area if non_dynamic_uuids.exclude?(area['attachment_uuid'])
new_uuid, new_page, rotate = mapping[[area['attachment_uuid'], area['page']]]
next if new_uuid.nil?
rotate_area(area.merge('attachment_uuid' => new_uuid, 'page' => new_page), rotate.to_i)
end
if field['areas'].blank?
removed_field_uuids << field['uuid']
next
end
end
field
end
removed_field_uuids
end
def rotate_area(area, rotate)
x, y, w, h = area.values_at('x', 'y', 'w', 'h')
case rotate
when 90
area.merge('x' => 1 - y - h, 'y' => x, 'w' => h, 'h' => w)
when 180
area.merge('x' => 1 - x - w, 'y' => 1 - y - h)
when 270
area.merge('x' => y, 'y' => 1 - x - w, 'w' => h, 'h' => w)
else
area
end
end
def remove_conditions(items, removed_field_uuids)
return if removed_field_uuids.blank?
items.each do |item|
next if item['conditions'].blank?
item['conditions'] = item['conditions'].reject { |c| removed_field_uuids.include?(c['field_uuid']) }
end
end
end
end
Loading…
Cancel
Save