mirror of https://github.com/docusealco/docuseal
				
				
				
			
			You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							80 lines
						
					
					
						
							1.8 KiB
						
					
					
				
			
		
		
	
	
							80 lines
						
					
					
						
							1.8 KiB
						
					
					
				# frozen_string_literal: true
 | 
						|
 | 
						|
class PdfProcessor < HexaPDF::Content::Processor
 | 
						|
  attr_accessor :handler, :serializer
 | 
						|
 | 
						|
  class ParseTextHandler
 | 
						|
    attr_accessor :pos, :num, :search_chars, :handler
 | 
						|
 | 
						|
    def initialize(handler)
 | 
						|
      @num = 0
 | 
						|
      @pos = 0
 | 
						|
 | 
						|
      @handler = handler
 | 
						|
      @search_chars = handler.search_chars
 | 
						|
    end
 | 
						|
 | 
						|
    TJS = %i[TJ Tj].freeze
 | 
						|
 | 
						|
    def call(processor, operator, operands)
 | 
						|
      return unless TJS.include?(operator)
 | 
						|
 | 
						|
      processor.send(:decode_text, *operands).chars.each do |char|
 | 
						|
        handler.tokens << [char, [@num, @pos]] if search_chars.include?(char)
 | 
						|
 | 
						|
        @pos += 1
 | 
						|
      end
 | 
						|
 | 
						|
      @pos = 0
 | 
						|
      @num += 1
 | 
						|
    end
 | 
						|
  end
 | 
						|
 | 
						|
  def initialize(page)
 | 
						|
    super
 | 
						|
 | 
						|
    @serializer = HexaPDF::Serializer.new
 | 
						|
  end
 | 
						|
 | 
						|
  def serialize(operator, operands)
 | 
						|
    operators[operator].serialize(serializer, *operands)
 | 
						|
  end
 | 
						|
 | 
						|
  def process(operator, operands = [])
 | 
						|
    super
 | 
						|
 | 
						|
    handler.call(self, operator, operands)
 | 
						|
  end
 | 
						|
 | 
						|
  def self.call(data, process_handler, result_handler, acc = {})
 | 
						|
    doc = HexaPDF::Document.new(io: StringIO.new(data))
 | 
						|
 | 
						|
    doc.pages.each do |page|
 | 
						|
      processor = PdfProcessor.new(page)
 | 
						|
      process_handler_instance = process_handler.new
 | 
						|
      processor.handler = ParseTextHandler.new(process_handler_instance)
 | 
						|
 | 
						|
      page.process_contents(processor)
 | 
						|
 | 
						|
      if process_handler_instance.tokens?
 | 
						|
        processor = PdfProcessor.new(page)
 | 
						|
        processor.handler = process_handler_instance
 | 
						|
 | 
						|
        page.process_contents(processor)
 | 
						|
 | 
						|
        page.contents = process_handler_instance.contents
 | 
						|
      end
 | 
						|
 | 
						|
      process_handler_instance.result.each do |item|
 | 
						|
        result_handler.call(item, page, acc)
 | 
						|
      end
 | 
						|
    end
 | 
						|
 | 
						|
    new_io = StringIO.new
 | 
						|
 | 
						|
    doc.write(new_io, validate: false)
 | 
						|
 | 
						|
    [new_io.tap(&:rewind).read, acc]
 | 
						|
  end
 | 
						|
end
 |