documentcloud · floehopper · Jul 2, 2018
diff --git a/lib/docsplit/text_extractor.rb b/lib/docsplit/text_extractor.rb
@@ -76,7 +76,7 @@ def extract_from_ocr(pdf, pages)
         escaped_tiff = ESCAPE[tiff]
         run "MAGICK_TMPDIR=#{tempdir} OMP_NUM_THREADS=2 gm convert -despeckle #{MEMORY_ARGS} #{OCR_FLAGS} #{escaped_pdf} #{escaped_tiff} 2>&1"
         #if the user says don't do orientation detection or the plugin is not installed, set psm to 0
-        run "tesseract #{escaped_tiff} #{base_path} -l #{@language} #{psm} 2>&1"
+        run "tesseract #{escaped_tiff} #{ESCAPE[base_path]} -l #{@language} #{psm} 2>&1"
         clean_text(base_path + '.txt') if @clean_ocr
       end
     ensure

diff --git a/test/unit/test_extract_text.rb b/test/unit/test_extract_text.rb
@@ -49,11 +49,16 @@ def test_password_protected
     end
   end
 
-  def test_name_escaping_while_extracting_text
+  def test_name_escaping_while_extracting_text_into_pages
     Docsplit.extract_text('test/fixtures/PDF file with spaces \'single\' and "double quotes".pdf', :pages => 'all', :output => OUTPUT)
     assert Dir["#{OUTPUT}/*.txt"].length == 2
   end
-
+
+  def test_name_escaping_while_extracting_text_using_ocr
+    Docsplit.extract_text('test/fixtures/PDF file with spaces \'single\' and "double quotes".pdf', :ocr => true, :output => OUTPUT)
+    assert Dir["#{OUTPUT}/*.txt"].length == 1
+  end
+
   def test_orientation_detected_ocr_extraction
     if Docsplit::DEPENDENCIES[:osd]
       pages = 1..4