mirror of https://github.com/docusealco/docuseal
				
				
				
			
			You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							113 lines
						
					
					
						
							3.6 KiB
						
					
					
				
			
		
		
	
	
							113 lines
						
					
					
						
							3.6 KiB
						
					
					
				| # frozen_string_literal: true
 | |
| 
 | |
| module Templates
 | |
|   module CreateAttachments
 | |
|     PDF_CONTENT_TYPE = 'application/pdf'
 | |
|     ZIP_CONTENT_TYPE = 'application/zip'
 | |
|     X_ZIP_CONTENT_TYPE = 'application/x-zip-compressed'
 | |
|     JSON_CONTENT_TYPE = 'application/json'
 | |
|     DOCUMENT_EXTENSIONS = %w[.docx .doc .xlsx .xls .odt .rtf].freeze
 | |
| 
 | |
|     DOCUMENT_CONTENT_TYPES = %w[
 | |
|       application/vnd.openxmlformats-officedocument.wordprocessingml.document
 | |
|       application/msword
 | |
|       application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
 | |
|       application/vnd.ms-excel
 | |
|       application/vnd.oasis.opendocument.text
 | |
|       application/rtf
 | |
|     ].freeze
 | |
| 
 | |
|     ANNOTATIONS_SIZE_LIMIT = 6.megabytes
 | |
|     InvalidFileType = Class.new(StandardError)
 | |
|     PdfEncrypted = Class.new(StandardError)
 | |
| 
 | |
|     module_function
 | |
| 
 | |
|     def call(template, params, extract_fields: false)
 | |
|       extract_zip_files(params[:files].presence || params[:file]).flat_map do |file|
 | |
|         handle_file_types(template, file, params, extract_fields:)
 | |
|       end
 | |
|     end
 | |
| 
 | |
|     def handle_pdf_or_image(template, file, document_data = nil, params = {}, extract_fields: false)
 | |
|       document_data ||= file.read
 | |
| 
 | |
|       if file.content_type == PDF_CONTENT_TYPE
 | |
|         document_data = maybe_decrypt_pdf_or_raise(document_data, params)
 | |
| 
 | |
|         annotations =
 | |
|           document_data.size < ANNOTATIONS_SIZE_LIMIT ? Templates::BuildAnnotations.call(document_data) : []
 | |
|       end
 | |
| 
 | |
|       sha256 = Base64.urlsafe_encode64(Digest::SHA256.digest(document_data))
 | |
| 
 | |
|       blob = ActiveStorage::Blob.create_and_upload!(
 | |
|         io: StringIO.new(document_data),
 | |
|         filename: file.original_filename,
 | |
|         metadata: {
 | |
|           identified: file.content_type == PDF_CONTENT_TYPE,
 | |
|           analyzed: file.content_type == PDF_CONTENT_TYPE,
 | |
|           pdf: { annotations: }.compact_blank, sha256:
 | |
|         }.compact_blank,
 | |
|         content_type: file.content_type
 | |
|       )
 | |
| 
 | |
|       document = template.documents.create!(blob:)
 | |
| 
 | |
|       Templates::ProcessDocument.call(document, document_data, extract_fields:)
 | |
|     end
 | |
| 
 | |
|     def maybe_decrypt_pdf_or_raise(data, params)
 | |
|       if data.size < ANNOTATIONS_SIZE_LIMIT && PdfUtils.encrypted?(data)
 | |
|         PdfUtils.decrypt(data, params[:password])
 | |
|       else
 | |
|         data
 | |
|       end
 | |
|     rescue HexaPDF::EncryptionError
 | |
|       raise PdfEncrypted
 | |
|     end
 | |
| 
 | |
|     def extract_zip_files(files)
 | |
|       extracted_files = []
 | |
| 
 | |
|       Array.wrap(files).each do |file|
 | |
|         if file.content_type == ZIP_CONTENT_TYPE || file.content_type == X_ZIP_CONTENT_TYPE
 | |
|           Zip::File.open(file.tempfile).each do |entry|
 | |
|             next if entry.directory?
 | |
| 
 | |
|             tempfile = Tempfile.new(entry.name)
 | |
|             tempfile.binmode
 | |
|             entry.get_input_stream { |in_stream| IO.copy_stream(in_stream, tempfile) }
 | |
|             tempfile.rewind
 | |
| 
 | |
|             type = Marcel::MimeType.for(tempfile, name: entry.name)
 | |
| 
 | |
|             next if type.exclude?('image') &&
 | |
|                     type != PDF_CONTENT_TYPE &&
 | |
|                     type != JSON_CONTENT_TYPE &&
 | |
|                     DOCUMENT_CONTENT_TYPES.exclude?(type)
 | |
| 
 | |
|             extracted_files << ActionDispatch::Http::UploadedFile.new(
 | |
|               filename: File.basename(entry.name),
 | |
|               type:,
 | |
|               tempfile:
 | |
|             )
 | |
|           end
 | |
|         else
 | |
|           extracted_files << file
 | |
|         end
 | |
|       end
 | |
| 
 | |
|       extracted_files
 | |
|     end
 | |
| 
 | |
|     def handle_file_types(template, file, params, extract_fields:)
 | |
|       if file.content_type.include?('image') || file.content_type == PDF_CONTENT_TYPE
 | |
|         return handle_pdf_or_image(template, file, file.read, params, extract_fields:)
 | |
|       end
 | |
| 
 | |
|       raise InvalidFileType, file.content_type
 | |
|     end
 | |
|   end
 | |
| end
 |