mirror of https://github.com/docusealco/docuseal
				
				
				
			
			You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							80 lines
						
					
					
						
							1.8 KiB
						
					
					
				
			
		
		
	
	
							80 lines
						
					
					
						
							1.8 KiB
						
					
					
				| # frozen_string_literal: true
 | |
| 
 | |
| class PdfProcessor < HexaPDF::Content::Processor
 | |
|   attr_accessor :handler, :serializer
 | |
| 
 | |
|   class ParseTextHandler
 | |
|     attr_accessor :pos, :num, :search_chars, :handler
 | |
| 
 | |
|     def initialize(handler)
 | |
|       @num = 0
 | |
|       @pos = 0
 | |
| 
 | |
|       @handler = handler
 | |
|       @search_chars = handler.search_chars
 | |
|     end
 | |
| 
 | |
|     TJS = %i[TJ Tj].freeze
 | |
| 
 | |
|     def call(processor, operator, operands)
 | |
|       return unless TJS.include?(operator)
 | |
| 
 | |
|       processor.send(:decode_text, *operands).chars.each do |char|
 | |
|         handler.tokens << [char, [@num, @pos]] if search_chars.include?(char)
 | |
| 
 | |
|         @pos += 1
 | |
|       end
 | |
| 
 | |
|       @pos = 0
 | |
|       @num += 1
 | |
|     end
 | |
|   end
 | |
| 
 | |
|   def initialize(page)
 | |
|     super
 | |
| 
 | |
|     @serializer = HexaPDF::Serializer.new
 | |
|   end
 | |
| 
 | |
|   def serialize(operator, operands)
 | |
|     operators[operator].serialize(serializer, *operands)
 | |
|   end
 | |
| 
 | |
|   def process(operator, operands = [])
 | |
|     super
 | |
| 
 | |
|     handler.call(self, operator, operands)
 | |
|   end
 | |
| 
 | |
|   def self.call(data, process_handler, result_handler, acc = {})
 | |
|     doc = HexaPDF::Document.new(io: StringIO.new(data))
 | |
| 
 | |
|     doc.pages.each do |page|
 | |
|       processor = PdfProcessor.new(page)
 | |
|       process_handler_instance = process_handler.new
 | |
|       processor.handler = ParseTextHandler.new(process_handler_instance)
 | |
| 
 | |
|       page.process_contents(processor)
 | |
| 
 | |
|       if process_handler_instance.tokens?
 | |
|         processor = PdfProcessor.new(page)
 | |
|         processor.handler = process_handler_instance
 | |
| 
 | |
|         page.process_contents(processor)
 | |
| 
 | |
|         page.contents = process_handler_instance.contents
 | |
|       end
 | |
| 
 | |
|       process_handler_instance.result.each do |item|
 | |
|         result_handler.call(item, page, acc)
 | |
|       end
 | |
|     end
 | |
| 
 | |
|     new_io = StringIO.new
 | |
| 
 | |
|     doc.write(new_io, validate: false)
 | |
| 
 | |
|     [new_io.tap(&:rewind).read, acc]
 | |
|   end
 | |
| end
 |