mirror of https://github.com/docusealco/docuseal
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
35 lines
992 B
35 lines
992 B
# frozen_string_literal: true
|
|
|
|
module DocumentMetadatas
|
|
module_function
|
|
|
|
def find_or_create_for_document(document, account_id:)
|
|
checksum = document.blob.checksum
|
|
|
|
metadata = DocumentMetadata.find_by(account_id:, blob_checksum: checksum)
|
|
metadata ||= DocumentMetadata.create!(account_id:, blob_checksum: checksum, text_runs: build_text_runs(document))
|
|
|
|
metadata
|
|
rescue ActiveRecord::RecordNotUnique
|
|
retry
|
|
end
|
|
|
|
def build_text_runs(document)
|
|
number_of_pages = document.metadata.dig('pdf', 'number_of_pages').to_i
|
|
|
|
return {} if number_of_pages.zero?
|
|
|
|
Pdfium::Document.open_bytes(document.download) do |doc|
|
|
(0...doc.page_count).each_with_object({}) do |page_index, acc|
|
|
page = doc.get_page(page_index)
|
|
|
|
acc[page_index] = page.text_objects.map do |node|
|
|
{ text: node.content, x: node.x, y: node.y, w: node.w, h: node.h, font_size: node.font_size }
|
|
end
|
|
ensure
|
|
page&.close
|
|
end
|
|
end
|
|
end
|
|
end
|