From 74f75cfe2d83fc4424a6b09832b2803de4b88a7c Mon Sep 17 00:00:00 2001 From: Tilman Hausherr Date: Fri, 24 Jan 2025 05:50:09 +0100 Subject: [PATCH] TIKA-4326: update pdfbox --- tika-parent/pom.xml | 3 +-- .../main/java/org/apache/tika/parser/pdf/PDFParserConfig.java | 3 +-- .../test/java/org/apache/tika/parser/pdf/PDFParserTest.java | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml index 863b742d85..96e2096916 100644 --- a/tika-parent/pom.xml +++ b/tika-parent/pom.xml @@ -420,8 +420,7 @@ 1.2.0 2.0.14 4.14.0 - - 3.0.3 + 3.0.4 5.4.0 3.25.5 2.5.0 diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java index af6213ba9b..bcc5b739a4 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java @@ -226,8 +226,7 @@ public void configure(PDF2XHTML pdf2XHTML) { pdf2XHTML.setDropThreshold(dropThreshold); } pdf2XHTML.setSuppressDuplicateOverlappingText(isSuppressDuplicateOverlappingText()); - // TODO TIKA-2342 activate after PDFBox release - //pdf2XHTML.setIgnoreContentStreamSpaceGlyphs(isIgnoreContentStreamSpaceGlyphs()); + pdf2XHTML.setIgnoreContentStreamSpaceGlyphs(isIgnoreContentStreamSpaceGlyphs()); } /** diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java index 1c722994e9..d3f4f9f28c 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java @@ -465,8 +465,7 @@ public void testDuplicateOverlappingText() throws Exception { } - // TODO TIKA-2342 activate after PDFBox release - // @Test + @Test public void testIgnoreContentStreamSpaceGlyphs() throws Exception { PDFParser parser = new PDFParser(); // Default is false (keep spaces, don't sort):