fix pdf text emoji

pull/402/head
Pete Matsyburka 2 weeks ago
parent fa1ef44b22
commit d2a0937038

@ -457,22 +457,40 @@ class Pdfium
origin_x_ptr = FFI::MemoryPointer.new(:double) origin_x_ptr = FFI::MemoryPointer.new(:double)
origin_y_ptr = FFI::MemoryPointer.new(:double) origin_y_ptr = FFI::MemoryPointer.new(:double)
char_count.times do |i| i = 0
char = Pdfium.FPDFText_GetUnicode(text_page, i).chr(Encoding::UTF_8)
result = Pdfium.FPDFText_GetCharBox(text_page, i, left_ptr, right_ptr, bottom_ptr, top_ptr) loop do
break unless i < char_count
box_index = i
codepoint = Pdfium.FPDFText_GetUnicode(text_page, i)
if codepoint.between?(0xD800, 0xDBFF) && (i + 1 < char_count)
codepoint2 = Pdfium.FPDFText_GetUnicode(text_page, i + 1)
if codepoint2.between?(0xDC00, 0xDFFF)
codepoint = 0x10000 + ((codepoint - 0xD800) << 10) + (codepoint2 - 0xDC00)
i += 1
end
end
char = codepoint.chr(Encoding::UTF_8)
result = Pdfium.FPDFText_GetCharBox(text_page, box_index, left_ptr, right_ptr, bottom_ptr, top_ptr)
next if result.zero? next if result.zero?
left = left_ptr.read_double left = left_ptr.read_double
right = right_ptr.read_double right = right_ptr.read_double
Pdfium.FPDFText_GetCharOrigin(text_page, i, origin_x_ptr, origin_y_ptr) Pdfium.FPDFText_GetCharOrigin(text_page, box_index, origin_x_ptr, origin_y_ptr)
origin_y = origin_y_ptr.read_double origin_y = origin_y_ptr.read_double
origin_x = origin_x_ptr.read_double origin_x = origin_x_ptr.read_double
font_size = Pdfium.FPDFText_GetFontSize(text_page, i) font_size = Pdfium.FPDFText_GetFontSize(text_page, box_index)
font_size = 8 if font_size == 1 font_size = 8 if font_size == 1
abs_x = left abs_x = left
@ -486,6 +504,8 @@ class Pdfium
node_height = abs_height / height node_height = abs_height / height
@text_nodes << TextNode.new(char, x, y, node_width, node_height) @text_nodes << TextNode.new(char, x, y, node_width, node_height)
ensure
i += 1
end end
y_threshold = 4.0 / width y_threshold = 4.0 / width

Loading…
Cancel
Save