mirror of https://github.com/docusealco/docuseal
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
113 lines
3.6 KiB
113 lines
3.6 KiB
# frozen_string_literal: true
|
|
|
|
module Templates
|
|
module CreateAttachments
|
|
PDF_CONTENT_TYPE = 'application/pdf'
|
|
ZIP_CONTENT_TYPE = 'application/zip'
|
|
X_ZIP_CONTENT_TYPE = 'application/x-zip-compressed'
|
|
JSON_CONTENT_TYPE = 'application/json'
|
|
DOCUMENT_EXTENSIONS = %w[.docx .doc .xlsx .xls .odt .rtf].freeze
|
|
|
|
DOCUMENT_CONTENT_TYPES = %w[
|
|
application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
|
application/msword
|
|
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
|
application/vnd.ms-excel
|
|
application/vnd.oasis.opendocument.text
|
|
application/rtf
|
|
].freeze
|
|
|
|
ANNOTATIONS_SIZE_LIMIT = 6.megabytes
|
|
InvalidFileType = Class.new(StandardError)
|
|
PdfEncrypted = Class.new(StandardError)
|
|
|
|
module_function
|
|
|
|
def call(template, params, extract_fields: false)
|
|
extract_zip_files(params[:files].presence || params[:file]).flat_map do |file|
|
|
handle_file_types(template, file, params, extract_fields:)
|
|
end
|
|
end
|
|
|
|
def handle_pdf_or_image(template, file, document_data = nil, params = {}, extract_fields: false)
|
|
document_data ||= file.read
|
|
|
|
if file.content_type == PDF_CONTENT_TYPE
|
|
document_data = maybe_decrypt_pdf_or_raise(document_data, params)
|
|
|
|
annotations =
|
|
document_data.size < ANNOTATIONS_SIZE_LIMIT ? Templates::BuildAnnotations.call(document_data) : []
|
|
end
|
|
|
|
sha256 = Base64.urlsafe_encode64(Digest::SHA256.digest(document_data))
|
|
|
|
blob = ActiveStorage::Blob.create_and_upload!(
|
|
io: StringIO.new(document_data),
|
|
filename: file.original_filename,
|
|
metadata: {
|
|
identified: file.content_type == PDF_CONTENT_TYPE,
|
|
analyzed: file.content_type == PDF_CONTENT_TYPE,
|
|
pdf: { annotations: }.compact_blank, sha256:
|
|
}.compact_blank,
|
|
content_type: file.content_type
|
|
)
|
|
|
|
document = template.documents.create!(blob:)
|
|
|
|
Templates::ProcessDocument.call(document, document_data, extract_fields:)
|
|
end
|
|
|
|
def maybe_decrypt_pdf_or_raise(data, params)
|
|
if data.size < ANNOTATIONS_SIZE_LIMIT && PdfUtils.encrypted?(data)
|
|
PdfUtils.decrypt(data, params[:password])
|
|
else
|
|
data
|
|
end
|
|
rescue HexaPDF::EncryptionError
|
|
raise PdfEncrypted
|
|
end
|
|
|
|
def extract_zip_files(files)
|
|
extracted_files = []
|
|
|
|
Array.wrap(files).each do |file|
|
|
if file.content_type == ZIP_CONTENT_TYPE || file.content_type == X_ZIP_CONTENT_TYPE
|
|
Zip::File.open(file.tempfile).each do |entry|
|
|
next if entry.directory?
|
|
|
|
tempfile = Tempfile.new(entry.name)
|
|
tempfile.binmode
|
|
entry.get_input_stream { |in_stream| IO.copy_stream(in_stream, tempfile) }
|
|
tempfile.rewind
|
|
|
|
type = Marcel::MimeType.for(tempfile, name: entry.name)
|
|
|
|
next if type.exclude?('image') &&
|
|
type != PDF_CONTENT_TYPE &&
|
|
type != JSON_CONTENT_TYPE &&
|
|
DOCUMENT_CONTENT_TYPES.exclude?(type)
|
|
|
|
extracted_files << ActionDispatch::Http::UploadedFile.new(
|
|
filename: File.basename(entry.name),
|
|
type:,
|
|
tempfile:
|
|
)
|
|
end
|
|
else
|
|
extracted_files << file
|
|
end
|
|
end
|
|
|
|
extracted_files
|
|
end
|
|
|
|
def handle_file_types(template, file, params, extract_fields:)
|
|
if file.content_type.include?('image') || file.content_type == PDF_CONTENT_TYPE
|
|
return handle_pdf_or_image(template, file, file.read, params, extract_fields:)
|
|
end
|
|
|
|
raise InvalidFileType, file.content_type
|
|
end
|
|
end
|
|
end
|