diff --git a/src/Smalot/PdfParser/Font.php b/src/Smalot/PdfParser/Font.php index 56dee588..dd069cf2 100644 --- a/src/Smalot/PdfParser/Font.php +++ b/src/Smalot/PdfParser/Font.php @@ -216,45 +216,46 @@ public function loadTranslateTable(): array // Support for multiple bfrange sections if (preg_match_all('/beginbfrange(?P.*?)endbfrange/s', $content, $matches)) { foreach ($matches['sections'] as $section) { - // Support for : - $regexp = '/<(?P[0-9A-F]+)> *<(?P[0-9A-F]+)> *<(?P[0-9A-F]+)>[ \r\n]+/is'; + // Regexp to capture , , and either or [...] items. + // - (?P...) Source range's start + // - (?P...) Source range's end + // - (?P...) Destination range's offset or each char code + // Some PDF file has 2-byte Unicode values on new lines > added \r\n + $regexp = '/<(?P[0-9A-F]+)> *<(?P[0-9A-F]+)> *(?P<[0-9A-F]+>|\[[\r\n<>0-9A-F ]+\])[ \r\n]+/is'; preg_match_all($regexp, $section, $matches); foreach ($matches['from'] as $key => $from) { $char_from = hexdec($from); $char_to = hexdec($matches['to'][$key]); - $offset = hexdec($matches['offset'][$key]); + $dest = $matches['dest'][$key]; - for ($char = $char_from; $char <= $char_to; ++$char) { - $this->table[$char] = self::uchr($char - $char_from + $offset); - } - } + if (preg_match('/^<(?P[0-9A-F]+)>$/i', $dest, $offset_matches)) { + // Support for : + $offset = hexdec($offset_matches['offset']); - // Support for : [ ... ] - // Some PDF file has 2-byte Unicode values on new lines > added \r\n - $regexp = '/<(?P[0-9A-F]+)> *<(?P[0-9A-F]+)> *\[(?P[\r\n<>0-9A-F ]+)\][ \r\n]+/is'; - - preg_match_all($regexp, $section, $matches); + for ($char = $char_from; $char <= $char_to; ++$char) { + $this->table[$char] = self::uchr($char - $char_from + $offset); + } - foreach ($matches['from'] as $key => $from) { - $char_from = hexdec($from); - $strings = []; - - preg_match_all('/<(?P[0-9A-F]+)> */is', $matches['strings'][$key], $strings); - - foreach ($strings['string'] as $position => $string) { - $parts = preg_split( - '/([0-9A-F]{4})/i', - $string, - 0, - \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE - ); - $text = ''; - foreach ($parts as $part) { - $text .= self::uchr(hexdec($part)); + } else { + // Support for : [ ... ] + $strings = []; + preg_match_all('/<(?P[0-9A-F]+)> */is', $dest, $strings); + + foreach ($strings['string'] as $position => $string) { + $parts = preg_split( + '/([0-9A-F]{4})/i', + $string, + 0, + \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE + ); + $text = ''; + foreach ($parts as $part) { + $text .= self::uchr(hexdec($part)); + } + $this->table[$char_from + $position] = $text; } - $this->table[$char_from + $position] = $text; } } }