mirror of https://github.com/docusealco/docuseal
				
				
				
			
			You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							113 lines
						
					
					
						
							3.6 KiB
						
					
					
				
			
		
		
	
	
							113 lines
						
					
					
						
							3.6 KiB
						
					
					
				# frozen_string_literal: true
 | 
						|
 | 
						|
module Templates
 | 
						|
  module CreateAttachments
 | 
						|
    PDF_CONTENT_TYPE = 'application/pdf'
 | 
						|
    ZIP_CONTENT_TYPE = 'application/zip'
 | 
						|
    X_ZIP_CONTENT_TYPE = 'application/x-zip-compressed'
 | 
						|
    JSON_CONTENT_TYPE = 'application/json'
 | 
						|
    DOCUMENT_EXTENSIONS = %w[.docx .doc .xlsx .xls .odt .rtf].freeze
 | 
						|
 | 
						|
    DOCUMENT_CONTENT_TYPES = %w[
 | 
						|
      application/vnd.openxmlformats-officedocument.wordprocessingml.document
 | 
						|
      application/msword
 | 
						|
      application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
 | 
						|
      application/vnd.ms-excel
 | 
						|
      application/vnd.oasis.opendocument.text
 | 
						|
      application/rtf
 | 
						|
    ].freeze
 | 
						|
 | 
						|
    ANNOTATIONS_SIZE_LIMIT = 6.megabytes
 | 
						|
    InvalidFileType = Class.new(StandardError)
 | 
						|
    PdfEncrypted = Class.new(StandardError)
 | 
						|
 | 
						|
    module_function
 | 
						|
 | 
						|
    def call(template, params, extract_fields: false)
 | 
						|
      extract_zip_files(params[:files].presence || params[:file]).flat_map do |file|
 | 
						|
        handle_file_types(template, file, params, extract_fields:)
 | 
						|
      end
 | 
						|
    end
 | 
						|
 | 
						|
    def handle_pdf_or_image(template, file, document_data = nil, params = {}, extract_fields: false)
 | 
						|
      document_data ||= file.read
 | 
						|
 | 
						|
      if file.content_type == PDF_CONTENT_TYPE
 | 
						|
        document_data = maybe_decrypt_pdf_or_raise(document_data, params)
 | 
						|
 | 
						|
        annotations =
 | 
						|
          document_data.size < ANNOTATIONS_SIZE_LIMIT ? Templates::BuildAnnotations.call(document_data) : []
 | 
						|
      end
 | 
						|
 | 
						|
      sha256 = Base64.urlsafe_encode64(Digest::SHA256.digest(document_data))
 | 
						|
 | 
						|
      blob = ActiveStorage::Blob.create_and_upload!(
 | 
						|
        io: StringIO.new(document_data),
 | 
						|
        filename: file.original_filename,
 | 
						|
        metadata: {
 | 
						|
          identified: file.content_type == PDF_CONTENT_TYPE,
 | 
						|
          analyzed: file.content_type == PDF_CONTENT_TYPE,
 | 
						|
          pdf: { annotations: }.compact_blank, sha256:
 | 
						|
        }.compact_blank,
 | 
						|
        content_type: file.content_type
 | 
						|
      )
 | 
						|
 | 
						|
      document = template.documents.create!(blob:)
 | 
						|
 | 
						|
      Templates::ProcessDocument.call(document, document_data, extract_fields:)
 | 
						|
    end
 | 
						|
 | 
						|
    def maybe_decrypt_pdf_or_raise(data, params)
 | 
						|
      if data.size < ANNOTATIONS_SIZE_LIMIT && PdfUtils.encrypted?(data)
 | 
						|
        PdfUtils.decrypt(data, params[:password])
 | 
						|
      else
 | 
						|
        data
 | 
						|
      end
 | 
						|
    rescue HexaPDF::EncryptionError
 | 
						|
      raise PdfEncrypted
 | 
						|
    end
 | 
						|
 | 
						|
    def extract_zip_files(files)
 | 
						|
      extracted_files = []
 | 
						|
 | 
						|
      Array.wrap(files).each do |file|
 | 
						|
        if file.content_type == ZIP_CONTENT_TYPE || file.content_type == X_ZIP_CONTENT_TYPE
 | 
						|
          Zip::File.open(file.tempfile).each do |entry|
 | 
						|
            next if entry.directory?
 | 
						|
 | 
						|
            tempfile = Tempfile.new(entry.name)
 | 
						|
            tempfile.binmode
 | 
						|
            entry.get_input_stream { |in_stream| IO.copy_stream(in_stream, tempfile) }
 | 
						|
            tempfile.rewind
 | 
						|
 | 
						|
            type = Marcel::MimeType.for(tempfile, name: entry.name)
 | 
						|
 | 
						|
            next if type.exclude?('image') &&
 | 
						|
                    type != PDF_CONTENT_TYPE &&
 | 
						|
                    type != JSON_CONTENT_TYPE &&
 | 
						|
                    DOCUMENT_CONTENT_TYPES.exclude?(type)
 | 
						|
 | 
						|
            extracted_files << ActionDispatch::Http::UploadedFile.new(
 | 
						|
              filename: File.basename(entry.name),
 | 
						|
              type:,
 | 
						|
              tempfile:
 | 
						|
            )
 | 
						|
          end
 | 
						|
        else
 | 
						|
          extracted_files << file
 | 
						|
        end
 | 
						|
      end
 | 
						|
 | 
						|
      extracted_files
 | 
						|
    end
 | 
						|
 | 
						|
    def handle_file_types(template, file, params, extract_fields:)
 | 
						|
      if file.content_type.include?('image') || file.content_type == PDF_CONTENT_TYPE
 | 
						|
        return handle_pdf_or_image(template, file, file.read, params, extract_fields:)
 | 
						|
      end
 | 
						|
 | 
						|
      raise InvalidFileType, file.content_type
 | 
						|
    end
 | 
						|
  end
 | 
						|
end
 |