diff --git a/app/jobs/process_document_job.rb b/app/jobs/process_document_job.rb new file mode 100644 index 00000000..9a0a7b6f --- /dev/null +++ b/app/jobs/process_document_job.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +class ProcessDocumentJob + include Sidekiq::Job + + sidekiq_options queue: :images + + def perform(params = {}) + attachment = ActiveStorage::Attachment.find(params['attachment_id']) + + max_page = [attachment.metadata['pdf']['number_of_pages'].to_i - 1, + Templates::ProcessDocument::MAX_NUMBER_OF_PAGES_PROCESSED].min + + Templates::ProcessDocument.generate_document_preview_images(attachment, attachment.download, (1..max_page), + concurrency: 1) + end +end diff --git a/config/sidekiq.yml b/config/sidekiq.yml index 10a8a1c1..2df9d73e 100644 --- a/config/sidekiq.yml +++ b/config/sidekiq.yml @@ -1,6 +1,7 @@ queues: - [default, 1] - [webhooks, 1] + - [images, 1] - [mailers, 1] - [recurrent, 1] - [rollbar, 1] diff --git a/lib/templates/process_document.rb b/lib/templates/process_document.rb index 1ac63a6e..b840bd9a 100644 --- a/lib/templates/process_document.rb +++ b/lib/templates/process_document.rb @@ -17,7 +17,7 @@ module Templates module_function - def call(attachment, data, extract_fields: false) + def call(attachment, data, extract_fields: false, max_pages: MAX_NUMBER_OF_PAGES_PROCESSED) if attachment.content_type == PDF_CONTENT_TYPE if extract_fields && data.size < MAX_FLATTEN_FILE_SIZE pdf = HexaPDF::Document.new(io: StringIO.new(data)) @@ -25,7 +25,7 @@ module Templates fields = Templates::FindAcroFields.call(pdf, attachment) end - generate_pdf_preview_images(attachment, data, pdf) + generate_pdf_preview_images(attachment, data, pdf, max_pages:) attachment.metadata['pdf']['fields'] = fields if fields elsif attachment.image? @@ -56,7 +56,7 @@ module Templates ) end - def generate_pdf_preview_images(attachment, data, pdf = nil) + def generate_pdf_preview_images(attachment, data, pdf = nil, max_pages: MAX_NUMBER_OF_PAGES_PROCESSED) ActiveStorage::Attachment.where(name: ATTACHMENT_NAME, record: attachment).destroy_all pdf ||= HexaPDF::Document.new(io: StringIO.new(data)) @@ -71,12 +71,16 @@ module Templates attachment.save! end - max_pages_to_process = data.size < GENERATE_PREVIEW_SIZE_LIMIT ? MAX_NUMBER_OF_PAGES_PROCESSED : 1 + max_pages_to_process = data.size < GENERATE_PREVIEW_SIZE_LIMIT ? max_pages : 1 - pool = Concurrent::FixedThreadPool.new(CONCURRENCY) + generate_document_preview_images(attachment, data, (0..[number_of_pages - 1, max_pages_to_process].min)) + end + + def generate_document_preview_images(attachment, data, range, concurrency: CONCURRENCY) + pool = Concurrent::FixedThreadPool.new(concurrency) promises = - (0..[number_of_pages - 1, max_pages_to_process].min).map do |page_number| + range.map do |page_number| Concurrent::Promise.execute(executor: pool) { build_and_upload_blob(data, page_number) } end