diff --git a/lib/docsplit/image_extractor.rb b/lib/docsplit/image_extractor.rb index 8c29bbc..8bc4d1d 100755 --- a/lib/docsplit/image_extractor.rb +++ b/lib/docsplit/image_extractor.rb @@ -33,7 +33,7 @@ def convert(pdf, size, format, previous=nil) directory = directory_for(size) pages = @pages || '1-' + Docsplit.extract_length(pdf).to_s escaped_pdf = ESCAPE[pdf] - FileUtils.mkdir_p(directory) unless File.exists?(directory) + FileUtils.mkdir_p(directory) unless File.exist?(directory) common = "#{MEMORY_ARGS} -density #{@density} #{resize_arg(size)} #{quality_arg(format)}" if previous FileUtils.cp(Dir[directory_for(previous) + '/*'], directory) @@ -48,7 +48,7 @@ def convert(pdf, size, format, previous=nil) end end ensure - FileUtils.remove_entry_secure tempdir if File.exists?(tempdir) + FileUtils.remove_entry_secure tempdir if File.exist?(tempdir) end diff --git a/lib/docsplit/page_extractor.rb b/lib/docsplit/page_extractor.rb index 145c980..0aef939 100644 --- a/lib/docsplit/page_extractor.rb +++ b/lib/docsplit/page_extractor.rb @@ -10,15 +10,15 @@ def extract(pdfs, opts) [pdfs].flatten.each do |pdf| pdf_name = File.basename(pdf, File.extname(pdf)) page_path = ESCAPE[File.join(@output, "#{pdf_name}")] + "_%d.pdf" - FileUtils.mkdir_p @output unless File.exists?(@output) - + FileUtils.mkdir_p @output unless File.exist?(@output) + cmd = if DEPENDENCIES[:pdftailor] # prefer pdftailor, but keep pdftk for backwards compatability "pdftailor unstitch --output #{page_path} #{ESCAPE[pdf]} 2>&1" else "pdftk #{ESCAPE[pdf]} burst output #{page_path} 2>&1" end result = `#{cmd}`.chomp - FileUtils.rm('doc_data.txt') if File.exists?('doc_data.txt') + FileUtils.rm('doc_data.txt') if File.exist?('doc_data.txt') raise ExtractionFailed, result if $? != 0 result end diff --git a/lib/docsplit/pdf_extractor.rb b/lib/docsplit/pdf_extractor.rb index 21861e2..a479265 100644 --- a/lib/docsplit/pdf_extractor.rb +++ b/lib/docsplit/pdf_extractor.rb @@ -16,7 +16,7 @@ def osx? def linux? !!HOST_OS.match(/linux/i) end - + # The first line of the help output holds the name and version number # of the office software to be used for extraction. def version_string @@ -35,10 +35,10 @@ def libre_office? def open_office? !!version_string.match(/^OpenOffice.org/) end - + # A set of default locations to search for office software # These have been extracted from JODConverter. Each listed - # path should contain a directory "program" which in turn + # path should contain a directory "program" which in turn # contains the "soffice" executable. # see: https://github.com/mirkonasato/jodconverter/blob/master/jodconverter-core/src/main/java/org/artofsolving/jodconverter/office/OfficeUtils.java#L63-L91 def office_search_paths @@ -69,7 +69,7 @@ def office_search_paths end search_paths end - + # Identify the path to a working office executable. def office_executable paths = office_search_paths @@ -78,10 +78,10 @@ def office_executable # raise an error if that path isn't valid, otherwise, add # it to the front of our search paths. if ENV['OFFICE_PATH'] - raise ArgumentError, "No such file or directory #{ENV['OFFICE_PATH']}" unless File.exists? ENV['OFFICE_PATH'] + raise ArgumentError, "No such file or directory #{ENV['OFFICE_PATH']}" unless File.exist? ENV['OFFICE_PATH'] paths.unshift(ENV['OFFICE_PATH']) end - + # The location of the office executable is OS dependent path_pieces = ["soffice"] if windows? @@ -91,15 +91,15 @@ def office_executable else path_pieces += [["program", "soffice"]] end - + # Search for the first suitable office executable # and short circuit an executable is found. paths.each do |path| - if File.exists? path + if File.exist? path @@executable ||= path unless File.directory? path path_pieces.each do |pieces| check_path = File.join(path, pieces) - @@executable ||= check_path if File.exists? check_path + @@executable ||= check_path if File.exist? check_path end end break if @@executable @@ -107,16 +107,16 @@ def office_executable raise OfficeNotFound, "No office software found" unless @@executable @@executable end - + # Used to specify the office location for JODConverter def office_path File.dirname(File.dirname(office_executable)) end - + # Convert documents to PDF. def extract(docs, opts) out = opts[:output] || '.' - FileUtils.mkdir_p out unless File.exists?(out) + FileUtils.mkdir_p out unless File.exist?(out) [docs].flatten.each do |doc| ext = File.extname(doc) basename = File.basename(doc, ext) @@ -128,7 +128,7 @@ def extract(docs, opts) if libre_office? # Set the LibreOffice user profile, so that parallel uses of cloudcrowd don't trip over each other. ENV['SYSUSERCONFIG']="file://#{File.expand_path(escaped_out)}" - + options = "--headless --invisible --norestore --nolockcheck --convert-to pdf --outdir #{escaped_out} #{escaped_doc}" cmd = "#{office_executable} #{options} 2>&1" result = `#{cmd}`.chomp @@ -147,9 +147,9 @@ def extract(docs, opts) LOGGING = "-Djava.util.logging.config.file=#{ESCAPED_ROOT}/vendor/logging.properties" HEADLESS = "-Djava.awt.headless=true" - + private - + # Runs a Java command, with quieted logging, and the classpath set properly. def run_jod(command, pdfs, opts, return_output=false) diff --git a/lib/docsplit/text_extractor.rb b/lib/docsplit/text_extractor.rb index 93973f6..f3390e8 100644 --- a/lib/docsplit/text_extractor.rb +++ b/lib/docsplit/text_extractor.rb @@ -28,7 +28,7 @@ def initialize # Extract text from a list of PDFs. def extract(pdfs, opts) extract_options opts - FileUtils.mkdir_p @output unless File.exists?(@output) + FileUtils.mkdir_p @output unless File.exist?(@output) [pdfs].flatten.each do |pdf| @pdf_name = File.basename(pdf, File.extname(pdf)) pages = (@pages == 'all') ? 1..Docsplit.extract_length(pdf) : @pages @@ -80,7 +80,7 @@ def extract_from_ocr(pdf, pages) clean_text(base_path + '.txt') if @clean_ocr end ensure - FileUtils.remove_entry_secure tempdir if File.exists?(tempdir) + FileUtils.remove_entry_secure tempdir if File.exist?(tempdir) end diff --git a/test/test_helper.rb b/test/test_helper.rb index 9c37b2b..2357c5a 100755 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -10,7 +10,7 @@ class Minitest::Test OUTPUT = 'test/output' def clear_output - FileUtils.rm_r(OUTPUT) if File.exists?(OUTPUT) + FileUtils.rm_r(OUTPUT) if File.exist?(OUTPUT) end def teardown