diff --git a/lib/templates/detect_fields.rb b/lib/templates/detect_fields.rb
index 2ade0efd..512d7804 100755
--- a/lib/templates/detect_fields.rb
+++ b/lib/templates/detect_fields.rb
@@ -59,8 +59,8 @@ module Templates
     CHECKBOXES = ['☐', '□'].freeze
 
     # rubocop:disable Metrics, Style
-    def call(io, attachment: nil, confidence: 0.3, temperature: 1, inference: Templates::ImageToFields,
-             nms: 0.1, split_page: false, aspect_ratio: true, padding: 20, regexp_type: true, &)
+    def call(io, attachment: nil, confidence: 0.3, temperature: 1, inference: Templates::ImageToFields, nms: 0.1,
+             split_page: false, aspect_ratio: true, padding: inference.model_v2? ? nil : 20, regexp_type: true, &)
       fields, head_node =
         if attachment&.image?
           process_image_attachment(io, attachment:, confidence:, nms:, split_page:, inference:,
@@ -114,7 +114,10 @@ module Templates
       fields = doc.page_count.times.flat_map do |page_number|
         page = doc.get_page(page_number)
 
-        data, width, height = page.render_to_bitmap(width: inference::RESOLUTION * 1.5)
+        size_key = page.width > page.height ? :width : :height
+        size = padding ? inference.resolution * 1.5 : inference.resolution
+
+        data, width, height = page.render_to_bitmap(size_key => size)
 
         image = Vips::Image.new_from_memory(data, width, height, 4, :uchar)
 
@@ -126,8 +129,8 @@ module Templates
 
         fields = sort_fields(fields, y_threshold: 10.0 / image.height)
 
-        fields = increase_confidence_for_overlapping_fields(fields, text_fields)
-        fields = increase_confidence_for_overlapping_fields(fields, line_fields)
+        fields = increase_confidence_for_overlapping_fields(fields, text_fields, confidence:)
+        fields = increase_confidence_for_overlapping_fields(fields, line_fields, confidence:)
 
         fields = fields.reject { |f| f.confidence < confidence }
 
@@ -477,10 +480,11 @@ module Templates
       !(box1.endx < box2.x || box2.endx < box1.x || box1.endy < box2.y || box2.endy < box1.y)
     end
 
-    def increase_confidence_for_overlapping_fields(image_fields, text_fields, by: 1.0)
+    def increase_confidence_for_overlapping_fields(image_fields, text_fields, confidence: 1, by: 1.0)
       return image_fields if text_fields.blank?
 
       image_fields.map do |image_field|
+        next if image_field.confidence >= confidence
         next if image_field.type != 'text'
 
         text_fields.each do |text_field|
diff --git a/lib/templates/image_to_fields.rb b/lib/templates/image_to_fields.rb
index d9c04221..a2dcc201 100755
--- a/lib/templates/image_to_fields.rb
+++ b/lib/templates/image_to_fields.rb
@@ -16,7 +16,7 @@ module Templates
 
     MODEL_PATH = Rails.root.join('tmp/model.onnx')
 
-    RESOLUTION = 704
+    INPUT_NAMES = %w[images input].freeze
 
     ID_TO_CLASS = %w[text checkbox].freeze
 
@@ -27,12 +27,14 @@ module Templates
 
     # rubocop:disable Metrics
     def call(image, confidence: 0.3, nms: 0.1, temperature: 1,
-             split_page: false, aspect_ratio: true, padding: nil, resolution: RESOLUTION)
-      base_image = image.extract_band(0, n: 3)
+             split_page: false, aspect_ratio: true, padding: nil, resolution: self.resolution)
+      image = image.extract_band(0, n: 3) if image.bands > 3
 
-      trimmed_base, base_offset_x, base_offset_y = trim_image_with_padding(base_image, padding)
+      trimmed_base, base_offset_x, base_offset_y = trim_image_with_padding(image, padding)
 
-      if split_page && image.height > image.width
+      if model_v2?
+        detections = call_v2(trimmed_base, base_offset_x, base_offset_y, split_page, confidence:, resolution:)
+      elsif split_page && image.height > image.width
         regions = build_split_image_regions(trimmed_base)
 
         detections = { xyxy: Numo::SFloat[], confidence: Numo::SFloat[], class_id: Numo::Int32[] }
@@ -71,6 +73,127 @@ module Templates
       build_fields_from_detections(detections, image)
     end
 
+    def call_v2(image, offset_x, offset_y, split_page, confidence:, resolution:)
+      if split_page && image.height > image.width
+        regions = build_split_image_regions(image)
+
+        detections = { xyxy: Numo::SFloat[], confidence: Numo::SFloat[], class_id: Numo::Int32[] }
+
+        regions.reduce(detections) do |acc, r|
+          next acc if r[:img].height <= 0 || r[:img].width <= 0
+
+          input_tensor, orig_size_tensor, transform_info = preprocess_image_v2(r[:img], resolution)
+
+          outputs = model.predict({ 'images' => input_tensor, 'orig_target_sizes' => orig_size_tensor },
+                                  output_type: :numo)
+
+          boxes = outputs['boxes'][0, true, true]
+          labels = outputs['labels'][0, true]
+          scores = outputs['scores'][0, true]
+
+          postprocess_outputs_v2(boxes, labels, scores, acc,
+                                 offset_x:, offset_y: offset_y + r[:offset_y],
+                                 confidence:, transform_info:)
+        end
+      else
+        input_tensor, orig_size_tensor, transform_info = preprocess_image_v2(image, resolution)
+
+        outputs = model.predict({ 'images' => input_tensor, 'orig_target_sizes' => orig_size_tensor },
+                                output_type: :numo)
+
+        boxes = outputs['boxes'][0, true, true]
+        labels = outputs['labels'][0, true]
+        scores = outputs['scores'][0, true]
+
+        postprocess_outputs_v2(boxes, labels, scores, offset_x:, offset_y:,
+                                                      confidence:, transform_info:)
+      end
+    end
+
+    def preprocess_image_v2(image, resolution)
+      image = image.extract_band(0, n: 3) if image.bands > 3
+
+      ratio = [resolution.to_f / image.width, resolution.to_f / image.height].min
+      new_width = (image.width * ratio).to_i
+      new_height = (image.height * ratio).to_i
+
+      image = image.resize(ratio, vscale: ratio, kernel: :linear) if ratio != 1
+
+      pad_w = (resolution - new_width) / 2
+      pad_h = (resolution - new_height) / 2
+
+      padded = image.embed(pad_w, pad_h, resolution, resolution, background: [0, 0, 0])
+
+      padded /= 255.0
+
+      img_array = Numo::SFloat.from_binary(padded.write_to_memory, [resolution, resolution, 3])
+
+      img_array = img_array.transpose(2, 0, 1)
+
+      input_tensor = img_array.reshape(1, 3, resolution, resolution)
+
+      orig_size_tensor = Numo::Int64[[resolution, resolution]]
+
+      transform_info = { ratio: ratio, pad_w: pad_w, pad_h: pad_h }
+
+      [input_tensor, orig_size_tensor, transform_info]
+    end
+
+    def postprocess_outputs_v2(boxes, labels, scores, detections = nil, offset_x:, offset_y:, confidence:,
+                               transform_info:)
+      keep_mask = scores.gt(confidence)
+      keep_indices = keep_mask.where
+
+      if keep_indices.empty?
+        detections || {
+          xyxy: Numo::SFloat[],
+          confidence: Numo::SFloat[],
+          class_id: Numo::Int32[]
+        }
+      else
+        scores = scores[keep_indices]
+        labels = labels[keep_indices]
+        boxes_xyxy = boxes[keep_indices, true]
+
+        ratio = transform_info[:ratio]
+        pad_w = transform_info[:pad_w]
+        pad_h = transform_info[:pad_h]
+
+        boxes_xyxy[true, 0] = ((boxes_xyxy[true, 0] - pad_w) / ratio) + offset_x
+        boxes_xyxy[true, 1] = ((boxes_xyxy[true, 1] - pad_h) / ratio) + offset_y
+        boxes_xyxy[true, 2] = ((boxes_xyxy[true, 2] - pad_w) / ratio) + offset_x
+        boxes_xyxy[true, 3] = ((boxes_xyxy[true, 3] - pad_h) / ratio) + offset_y
+
+        if detections
+          existing_n = detections[:xyxy].shape[0]
+          new_n = boxes_xyxy.shape[0]
+          total = existing_n + new_n
+
+          xyxy = Numo::SFloat.zeros(total, 4)
+          conf = Numo::SFloat.zeros(total)
+          cls = Numo::Int32.zeros(total)
+
+          if existing_n.positive?
+            xyxy[0...existing_n, true] = detections[:xyxy]
+            conf[0...existing_n] = detections[:confidence]
+            cls[0...existing_n] = detections[:class_id]
+          end
+
+          xyxy[existing_n...(existing_n + new_n), true] = boxes_xyxy
+          conf[existing_n...(existing_n + new_n)] = scores
+          cls[existing_n...(existing_n + new_n)] = labels
+
+          { xyxy: xyxy, confidence: conf, class_id: cls }
+        else
+          {
+            xyxy: boxes_xyxy,
+            confidence: scores,
+            class_id: labels
+          }
+        end
+      end
+    end
+
     def build_split_image_regions(image)
       half_h = image.height / 2
       top_h = half_h
@@ -212,7 +335,7 @@ module Templates
     end
 
     def postprocess_outputs(boxes, logits, transform_info, detections = nil, confidence: 0.3, temperature: 1,
-                            resolution: RESOLUTION)
+                            resolution: self.resolution)
       scaled_logits = logits / temperature
 
       probs = 1.0 / (1.0 + Numo::NMath.exp(-scaled_logits))
@@ -326,6 +449,14 @@ module Templates
         providers: ['CPUExecutionProvider']
       )
     end
+
+    def resolution
+      @resolution ||= model.inputs.find { |i| INPUT_NAMES.include?(i[:name]) }.dig(:shape, 2)
+    end
+
+    def model_v2?
+      @model_v2 ||= model.inputs.pluck(:name).include?('orig_target_sizes')
+    end
     # rubocop:enable Metrics
   end
 end