mirror of https://github.com/docusealco/docuseal
				
				
				
			
							parent
							
								
									a773f63a65
								
							
						
					
					
						commit
						a265ca1716
					
				| @ -1,96 +0,0 @@ | ||||
| # frozen_string_literal: true | ||||
| 
 | ||||
| class PdfProcessor < HexaPDF::Content::Processor | ||||
|   attr_accessor :handler, :serializer | ||||
| 
 | ||||
|   class ParseTextHandler | ||||
|     attr_accessor :pos, :num, :search_chars, :handler | ||||
| 
 | ||||
|     def initialize(handler) | ||||
|       @num = 0 | ||||
|       @pos = 0 | ||||
| 
 | ||||
|       @handler = handler | ||||
|       @search_chars = handler.search_chars | ||||
|     end | ||||
| 
 | ||||
|     TJS = %i[TJ Tj].freeze | ||||
| 
 | ||||
|     def call(processor, operator, operands) | ||||
|       return unless TJS.include?(operator) | ||||
| 
 | ||||
|       processor.send(:decode_text, *operands).chars.each do |char| | ||||
|         handler.tokens << [char, [@num, @pos]] if search_chars.include?(char) | ||||
| 
 | ||||
|         @pos += 1 | ||||
|       end | ||||
| 
 | ||||
|       @pos = 0 | ||||
|       @num += 1 | ||||
|     rescue HexaPDF::Error => e | ||||
|       Rails.logger.error(e.message) | ||||
| 
 | ||||
|       @pos = 0 | ||||
|       @num += 1 | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   def initialize(page) | ||||
|     super | ||||
| 
 | ||||
|     @serializer = HexaPDF::Serializer.new | ||||
|   end | ||||
| 
 | ||||
|   def serialize(operator, operands) | ||||
|     operators[operator].serialize(serializer, *operands) | ||||
|   end | ||||
| 
 | ||||
|   def process(operator, operands = []) | ||||
|     super | ||||
| 
 | ||||
|     handler.call(self, operator, operands) | ||||
|   end | ||||
| 
 | ||||
|   def self.call(data, process_handler, result_handler, acc = {}, remove_tags: true) | ||||
|     doc = HexaPDF::Document.new(io: StringIO.new(data)) | ||||
| 
 | ||||
|     doc.pages.each do |page| | ||||
|       processor = PdfProcessor.new(page) | ||||
|       process_handler_instance = process_handler.new | ||||
|       processor.handler = ParseTextHandler.new(process_handler_instance) | ||||
| 
 | ||||
|       page.process_contents(processor) | ||||
| 
 | ||||
|       if process_handler_instance.tokens? | ||||
|         processor = PdfProcessor.new(page) | ||||
|         processor.handler = process_handler_instance | ||||
| 
 | ||||
|         page.process_contents(processor) | ||||
| 
 | ||||
|         page.contents = process_handler_instance.contents if process_handler_instance.result.present? && remove_tags | ||||
|       end | ||||
| 
 | ||||
|       page[:Annots].to_a.each do |annot| | ||||
|         next unless annot | ||||
| 
 | ||||
|         text = annot[:Contents].to_s.squish | ||||
| 
 | ||||
|         next unless text.starts_with?('{{') && text.ends_with?('}}') | ||||
| 
 | ||||
|         result_handler.call({ text:, rect: annot[:Rect] }, page, acc) | ||||
| 
 | ||||
|         page[:Annots].delete(annot) | ||||
|       end | ||||
| 
 | ||||
|       process_handler_instance.result.each do |item| | ||||
|         result_handler.call(item, page, acc) | ||||
|       end | ||||
|     end | ||||
| 
 | ||||
|     new_io = StringIO.new | ||||
| 
 | ||||
|     doc.write(new_io, validate: false) | ||||
| 
 | ||||
|     [new_io.tap(&:rewind).read, acc] | ||||
|   end | ||||
| end | ||||
					Loading…
					
					
				
		Reference in new issue