From ba46202a4da9997bc7d05fb2d50bcac069b77d92 Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Sat, 16 Dec 2023 03:01:25 +0200 Subject: [PATCH] add pdf processor class --- lib/pdf_processor.rb | 47 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 lib/pdf_processor.rb diff --git a/lib/pdf_processor.rb b/lib/pdf_processor.rb new file mode 100644 index 00000000..79716929 --- /dev/null +++ b/lib/pdf_processor.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +class PdfProcessor < HexaPDF::Content::Processor + attr_reader :result, :contents + attr_accessor :handler + + def initialize(page) + super + + @contents = ''.b + @result = [] + + @serializer = HexaPDF::Serializer.new + end + + def process(operator, operands = []) + super + + contents << @operators[operator].serialize( + @serializer, + *handler.call(self, operator, operands) + ) + end + + def self.call(data, pdf_handler, result_handler, acc = {}) + doc = HexaPDF::Document.new(io: StringIO.new(data)) + + doc.pages.each do |page| + processor = PdfProcessor.new(page) + processor.handler = pdf_handler + + page.process_contents(processor) + + page.contents = processor.contents + + processor.result.each do |item| + result_handler.call(item, page, acc) + end + end + + new_io = StringIO.new + + doc.write(new_io) + + [new_io.tap(&:rewind).read, acc] + end +end