You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
docuseal/lib/document_metadatas.rb

35 lines
992 B

# frozen_string_literal: true
module DocumentMetadatas
module_function
def find_or_create_for_document(document, account_id:)
checksum = document.blob.checksum
metadata = DocumentMetadata.find_by(account_id:, blob_checksum: checksum)
metadata ||= DocumentMetadata.create!(account_id:, blob_checksum: checksum, text_runs: build_text_runs(document))
metadata
rescue ActiveRecord::RecordNotUnique
retry
end
def build_text_runs(document)
number_of_pages = document.metadata.dig('pdf', 'number_of_pages').to_i
return {} if number_of_pages.zero?
Pdfium::Document.open_bytes(document.download) do |doc|
(0...doc.page_count).each_with_object({}) do |page_index, acc|
page = doc.get_page(page_index)
acc[page_index] = page.text_objects.map do |node|
{ text: node.content, x: node.x, y: node.y, w: node.w, h: node.h, font_size: node.font_size }
end
ensure
page&.close
end
end
end
end