diff --git a/src/components/utils/annotations.ts b/src/components/utils/annotations.ts index 010ba94..ec202fa 100644 --- a/src/components/utils/annotations.ts +++ b/src/components/utils/annotations.ts @@ -1,3 +1,4 @@ +/* eslint-disable @typescript-eslint/restrict-template-expressions */ /* eslint-disable @typescript-eslint/no-unsafe-member-access */ /* eslint-disable @typescript-eslint/no-unsafe-assignment */ /* eslint-disable no-case-declarations */ @@ -99,21 +100,49 @@ async function linkAnnotation(annotation: { unsafeUrl?: string }, PDFDoc: PDFDocumentProxy) { if (annotation.dest) { - // Get referenced page number of internal link - if (typeof annotation.dest === 'string') { + let explicitDest + if (typeof annotation.dest === 'string') + explicitDest = await PDFDoc.getDestination(annotation.dest) + else + explicitDest = annotation.dest + + if (!Array.isArray(explicitDest)) { + console.warn(`Destination "${explicitDest}" is not a valid destination (dest="${annotation.dest}")`) return buildAnnotationData(INTERNAL_LINK, { - referencedPage: Number(annotation.dest.substring(1, annotation.dest.length)), + referencedPage: null, offset: null, }) } + + let offset = null + if (explicitDest.length === 5) { + offset = { + left: annotation.dest[2], + bottom: annotation.dest[3], + } + } + + const [destRef] = explicitDest + if (Number.isInteger(destRef)) { + return buildAnnotationData(INTERNAL_LINK, { + referencedPage: Number(destRef) + 1, + offset, + }) + } + else if (typeof destRef === 'object') { + const pageNumber = await PDFDoc.getPageIndex(destRef as RefProxy) + return buildAnnotationData(INTERNAL_LINK, { + referencedPage: pageNumber + 1, + offset, + }) + } else { - const pageIndex = await PDFDoc.getPageIndex(annotation.dest[0] as RefProxy) + console.warn( + `Destination "${destRef}" is not a valid destination (dest="${annotation.dest}")`, + ) return buildAnnotationData(INTERNAL_LINK, { - referencedPage: pageIndex + 1, - offset: { - left: annotation.dest[2], - bottom: annotation.dest[3], - }, + referencedPage: null, + offset: null, }) } } diff --git a/src/components/utils/highlight.ts b/src/components/utils/highlight.ts index 0d8d34e..c5db6d2 100644 --- a/src/components/utils/highlight.ts +++ b/src/components/utils/highlight.ts @@ -5,9 +5,19 @@ import type { HighlightOptions, Match } from '../types' function searchQuery(textContent: TextContent, query: string, options: HighlightOptions) { const strs = [] for (const textItem of textContent.items as TextItem[]) { - strs.push(textItem.str) - if (textItem.hasEOL) - strs.push('\n') + if (textItem.hasEOL) { + // Remove the break line hyphen in the middle of the sentence + if (textItem.str.endsWith('-')) { + const lastHyphen = textItem.str.lastIndexOf('-') + strs.push(textItem.str.substring(0, lastHyphen)) + } + else { + strs.push(textItem.str, '\n') + } + } + else { + strs.push(textItem.str) + } } // Join the text as is presented in textlayer and then replace newlines (/n) with whitespaces @@ -34,6 +44,19 @@ function searchQuery(textContent: TextContent, query: string, options: Highlight } function convertMatches(matches: (number | string)[][], textContent: TextContent): Match[] { + function endOfLineOffset(item: TextItem) { + // When textitem has a EOL flag and the string has a hyphen at the end + // the hyphen should be removed (-1 len) so the sentence could be searched as a joined one. + // In other cases the EOL flag introduce a whitespace (+1 len) between two different sentences + if (item.hasEOL) { + if (item.str.endsWith('-')) + return -1 + else + return 1 + } + return 0 + } + let index = 0 let tindex = 0 const textItems = textContent.items as TextItem[] @@ -46,7 +69,8 @@ function convertMatches(matches: (number | string)[][], textContent: TextContent let mindex = matches[m][0] as number while (index !== end && mindex >= tindex + textItems[index].str.length) { - tindex += textItems[index].str.length + (textItems[index].hasEOL ? 1 : 0) + const item = textItems[index] + tindex += item.str.length + endOfLineOffset(item) index++ } @@ -58,7 +82,8 @@ function convertMatches(matches: (number | string)[][], textContent: TextContent mindex += matches[m][1] as number while (index !== end && mindex > tindex + textItems[index].str.length) { - tindex += textItems[index].str.length + (textItems[index].hasEOL ? 1 : 0) + const item = textItems[index] + tindex += item.str.length + endOfLineOffset(item) index++ }