From bc4414a36809274d4efc3b5a42a67617ff7ef219 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Wed, 11 Sep 2024 16:46:33 +0300 Subject: [PATCH] use png for previews --- app/controllers/api/templates_controller.rb | 2 +- lib/templates/process_document.rb | 31 +++++++++++++-------- lib/templates/serialize_for_api.rb | 4 +-- spec/requests/templates_spec.rb | 2 +- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/app/controllers/api/templates_controller.rb b/app/controllers/api/templates_controller.rb index d05b31fc..b7680a90 100644 --- a/app/controllers/api/templates_controller.rb +++ b/app/controllers/api/templates_controller.rb @@ -18,7 +18,7 @@ module Api preview_image_attachments = ActiveStorage::Attachment.joins(:blob) - .where(blob: { filename: '0.jpg' }) + .where(blob: { filename: '0.png' }) .where(record_id: schema_documents.map(&:id), record_type: 'ActiveStorage::Attachment', name: :preview_images) diff --git a/lib/templates/process_document.rb b/lib/templates/process_document.rb index 1b821187..c16e264b 100644 --- a/lib/templates/process_document.rb +++ b/lib/templates/process_document.rb @@ -3,11 +3,12 @@ module Templates module ProcessDocument DPI = 200 - FORMAT = '.jpg' + FORMAT = '.png' ATTACHMENT_NAME = 'preview_images' PDF_CONTENT_TYPE = 'application/pdf' - Q = ENV.fetch('PAGE_QUALITY', '35').to_i + Q = 95 + JPEG_Q = ENV.fetch('PAGE_QUALITY', '35').to_i MAX_WIDTH = 1400 MAX_NUMBER_OF_PAGES_PROCESSED = 15 MAX_FLATTEN_FILE_SIZE = 20.megabytes @@ -15,7 +16,7 @@ module Templates module_function - def call(attachment, data, extract_fields: false, image_quality: nil) + def call(attachment, data, extract_fields: false) if attachment.content_type == PDF_CONTENT_TYPE if extract_fields && data.size < MAX_FLATTEN_FILE_SIZE pdf = HexaPDF::Document.new(io: StringIO.new(data)) @@ -23,23 +24,26 @@ module Templates fields = Templates::FindAcroFields.call(pdf, attachment) end - generate_pdf_preview_images(attachment, data, pdf, image_quality:) + generate_pdf_preview_images(attachment, data, pdf) attachment.metadata['pdf']['fields'] = fields if fields elsif attachment.image? - generate_preview_image(attachment, data, image_quality:) + generate_preview_image(attachment, data) end attachment end - def generate_preview_image(attachment, data, image_quality: nil) + def generate_preview_image(attachment, data) ActiveStorage::Attachment.where(name: ATTACHMENT_NAME, record: attachment).destroy_all image = Vips::Image.new_from_buffer(data, '') image = image.autorot.resize(MAX_WIDTH / image.width.to_f) - io = StringIO.new(image.write_to_buffer(FORMAT, Q: image_quality || Q, interlace: true)) + bitdepth = 2**image.stats.to_a[1..3].pluck(2).uniq.size + + io = StringIO.new(image.write_to_buffer(FORMAT, compression: 7, filter: 0, bitdepth:, + palette: true, Q: Q, dither: 0)) ActiveStorage::Attachment.create!( blob: ActiveStorage::Blob.create_and_upload!( @@ -51,7 +55,7 @@ module Templates ) end - def generate_pdf_preview_images(attachment, data, pdf = nil, image_quality: nil) + def generate_pdf_preview_images(attachment, data, pdf = nil) ActiveStorage::Attachment.where(name: ATTACHMENT_NAME, record: attachment).destroy_all pdf ||= HexaPDF::Document.new(io: StringIO.new(data)) @@ -70,7 +74,10 @@ module Templates page = Vips::Image.new_from_buffer(data, '', dpi: DPI, page: page_number) page = page.resize(MAX_WIDTH / page.width.to_f) - io = StringIO.new(page.write_to_buffer(FORMAT, Q: image_quality || Q, interlace: true)) + bitdepth = 2**page.stats.to_a[1..3].pluck(2).uniq.size + + io = StringIO.new(page.write_to_buffer(FORMAT, compression: 7, filter: 0, bitdepth:, + palette: true, Q: Q, dither: 0)) ApplicationRecord.no_touching do ActiveStorage::Attachment.create!( @@ -115,11 +122,11 @@ module Templates end end - def generate_pdf_preview_from_file(attachment, file_path, page_number, image_quality: nil) + def generate_pdf_preview_from_file(attachment, file_path, page_number) io = StringIO.new command = [ - 'pdftocairo', '-jpeg', '-jpegopt', "progressive=y,quality=#{image_quality || Q},optimize=y", + 'pdftocairo', '-jpeg', '-jpegopt', "progressive=y,quality=#{JPEG_Q},optimize=y", '-scale-to-x', MAX_WIDTH, '-scale-to-y', '-1', '-r', DPI, '-f', page_number + 1, '-l', page_number + 1, '-singlefile', Shellwords.escape(file_path), '-' @@ -138,7 +145,7 @@ module Templates ApplicationRecord.no_touching do ActiveStorage::Attachment.create!( blob: ActiveStorage::Blob.create_and_upload!( - io:, filename: "#{page_number}#{FORMAT}", + io:, filename: "#{page_number}.jpg", metadata: { analyzed: true, identified: true, width: page.width, height: page.height } ), name: ATTACHMENT_NAME, diff --git a/lib/templates/serialize_for_api.rb b/lib/templates/serialize_for_api.rb index 569f8dc2..3087035b 100644 --- a/lib/templates/serialize_for_api.rb +++ b/lib/templates/serialize_for_api.rb @@ -19,7 +19,7 @@ module Templates preview_image_attachments ||= ActiveStorage::Attachment.joins(:blob) - .where(blob: { filename: '0.jpg' }) + .where(blob: { filename: ['0.jpg', '0.png'] }) .where(record_id: schema_documents.map(&:id), record_type: 'ActiveStorage::Attachment', name: :preview_images) @@ -29,7 +29,7 @@ module Templates attachment = schema_documents.find { |e| e.uuid == item['attachment_uuid'] } first_page_blob = preview_image_attachments.find { |e| e.record_id == attachment.id }&.blob - first_page_blob ||= attachment.preview_images.joins(:blob).find_by(blob: { filename: '0.jpg' })&.blob + first_page_blob ||= attachment.preview_images.joins(:blob).find_by(blob: { filename: ['0.jpg', '0.png'] })&.blob { id: attachment.id, diff --git a/spec/requests/templates_spec.rb b/spec/requests/templates_spec.rb index 65d542b3..aa4951bf 100644 --- a/spec/requests/templates_spec.rb +++ b/spec/requests/templates_spec.rb @@ -130,7 +130,7 @@ describe 'Templates API', type: :request do attachment = template.schema_documents.preload(:blob).find { |e| e.uuid == template_attachment_uuid } first_page_blob = ActiveStorage::Attachment.joins(:blob) - .where(blob: { filename: '0.jpg' }) + .where(blob: { filename: '0.png' }) .where(record_id: template.schema_documents.map(&:id), record_type: 'ActiveStorage::Attachment', name: :preview_images)