Add PDF/Text tab switcher to template builder for sighted users

- Create pdf_text_to_html.js: JS port of the Ruby heuristic parser
  (ALL_CAPS→h2, numbered headings→h3, bullets→ul/li, body→p dir=auto)
- Add pdf_view, text_view, document_view_options keys to i18n.js (en)
- Update document.vue: tab switcher shown when all pages have extracted
  text; PDF View renders the existing page images; Text View renders
  heuristic HTML in a prose container with per-page sections
- ArrowLeft/ArrowRight keyboard navigation between tabs with focus management
- Tab is hidden entirely for scanned/image-only PDFs (hasFullText gate)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
pull/599/head
Marcelo Paiva 3 weeks ago
parent 40dd223393
commit 797fb32a37

@ -1,5 +1,35 @@
<template> <template>
<div> <div>
<div
v-if="hasFullText"
role="tablist"
:aria-label="t('document_view_options')"
class="flex border-b border-base-300 mb-2"
>
<button
role="tab"
type="button"
:aria-selected="!textViewActive ? 'true' : 'false'"
:tabindex="!textViewActive ? 0 : -1"
:class="['px-4 py-2 text-sm font-medium border-b-2 -mb-px focus:outline-none', !textViewActive ? 'border-primary text-primary' : 'border-transparent text-base-content/60 hover:text-base-content']"
@click="textViewActive = false"
@keydown="onTabKeydown($event, false)"
>
{{ t('pdf_view') }}
</button>
<button
role="tab"
type="button"
:aria-selected="textViewActive ? 'true' : 'false'"
:tabindex="textViewActive ? 0 : -1"
:class="['px-4 py-2 text-sm font-medium border-b-2 -mb-px focus:outline-none', textViewActive ? 'border-primary text-primary' : 'border-transparent text-base-content/60 hover:text-base-content']"
@click="textViewActive = true"
@keydown="onTabKeydown($event, true)"
>
{{ t('text_view') }}
</button>
</div>
<template v-if="!textViewActive">
<Page <Page
v-for="(image, index) in sortedPreviewImages" v-for="(image, index) in sortedPreviewImages"
:key="image.id" :key="image.id"
@ -39,17 +69,34 @@
@scroll-to="scrollToArea" @scroll-to="scrollToArea"
@draw="$emit('draw', { area: {...$event.area, attachment_uuid: document.uuid }, isTooSmall: $event.isTooSmall })" @draw="$emit('draw', { area: {...$event.area, attachment_uuid: document.uuid }, isTooSmall: $event.isTooSmall })"
/> />
</template>
<div
v-else
role="tabpanel"
class="prose max-w-none px-2 py-1"
>
<section
v-for="[pageIndex, pageTextContent] in pagesTextEntries"
:key="pageIndex"
:aria-label="`${t('page')} ${Number(pageIndex) + 1}`"
>
<!-- eslint-disable-next-line vue/no-v-html -->
<div v-html="pdfTextToHtml(pageTextContent)" />
</section>
</div>
</div> </div>
</template> </template>
<script> <script>
import Page from './page' import Page from './page'
import { reactive } from 'vue' import { reactive } from 'vue'
import { pdfTextToHtml } from './pdf_text_to_html'
export default { export default {
name: 'TemplateDocument', name: 'TemplateDocument',
components: { components: {
Page Page
}, },
inject: ['t'],
props: { props: {
document: { document: {
type: Object, type: Object,
@ -148,7 +195,8 @@ export default {
emits: ['draw', 'drop-field', 'remove-area', 'paste-field', 'copy-field', 'copy-selected-areas', 'delete-selected-areas', 'autodetect-fields', 'add-custom-field', 'set-draw'], emits: ['draw', 'drop-field', 'remove-area', 'paste-field', 'copy-field', 'copy-selected-areas', 'delete-selected-areas', 'autodetect-fields', 'add-custom-field', 'set-draw'],
data () { data () {
return { return {
pageRefs: [] pageRefs: [],
textViewActive: false
} }
}, },
computed: { computed: {
@ -182,12 +230,31 @@ export default {
}, },
pagesText () { pagesText () {
return this.document.metadata?.pdf?.pages_text || {} return this.document.metadata?.pdf?.pages_text || {}
},
hasFullText () {
const nPages = this.numberOfPages
return nPages > 0 && Object.keys(this.pagesText).length >= nPages
},
pagesTextEntries () {
return Object.entries(this.pagesText).sort((a, b) => Number(a[0]) - Number(b[0]))
} }
}, },
beforeUpdate () { beforeUpdate () {
this.pageRefs = [] this.pageRefs = []
}, },
methods: { methods: {
pdfTextToHtml,
onTabKeydown (e, currentIsTextView) {
if (e.key === 'ArrowRight' || e.key === 'ArrowLeft') {
e.preventDefault()
this.textViewActive = !currentIsTextView
this.$nextTick(() => {
const tabs = this.$el.querySelectorAll('[role="tab"]')
const activeTab = Array.from(tabs).find((t) => t.getAttribute('aria-selected') === 'true')
activeTab?.focus()
})
}
},
scrollToArea (area) { scrollToArea (area) {
this.$nextTick(() => { this.$nextTick(() => {
const pageRef = this.pageRefs[area.page] const pageRef = this.pageRefs[area.page]

@ -97,6 +97,9 @@ const en = {
format: 'Format', format: 'Format',
read_only: 'Read-only', read_only: 'Read-only',
page: 'Page', page: 'Page',
pdf_view: 'PDF View',
text_view: 'Text View',
document_view_options: 'Document view options',
draw_new_area: 'Draw new area', draw_new_area: 'Draw new area',
copy_to_all_pages: 'Copy to all pages', copy_to_all_pages: 'Copy to all pages',
more: 'More', more: 'More',

@ -0,0 +1,53 @@
function escapeHtml (str) {
return str
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#039;')
}
function isNumberedHeading (line) {
return line.length <= 80 && /^\d+\.\s+[A-Z]/.test(line) && !/[.!?,;]$/.test(line)
}
function isAllCapsHeading (line) {
return line.length >= 3 && !/[.!?,;]$/.test(line) && line === line.toUpperCase() && /[A-Z]/.test(line)
}
export function pdfTextToHtml (pageText) {
if (!pageText) return ''
const lines = pageText.split(/\r?\n/)
let output = ''
let inList = false
for (const line of lines) {
const stripped = line.trim()
if (!stripped) {
if (inList) { output += '</ul>'; inList = false }
continue
}
if (isNumberedHeading(stripped)) {
if (inList) { output += '</ul>'; inList = false }
output += `<h3>${escapeHtml(stripped)}</h3>`
} else if (isAllCapsHeading(stripped)) {
if (inList) { output += '</ul>'; inList = false }
output += `<h2>${escapeHtml(stripped)}</h2>`
} else {
const match = stripped.match(/^[•*-]\s+(.+)/)
if (match) {
if (!inList) { output += '<ul>'; inList = true }
output += `<li>${escapeHtml(match[1])}</li>`
} else {
if (inList) { output += '</ul>'; inList = false }
output += `<p dir="auto">${escapeHtml(stripped)}</p>`
}
}
}
if (inList) output += '</ul>'
return output
}
Loading…
Cancel
Save