diff --git a/lib/derivative_rodeo/generators/thumbnail_generator.rb b/lib/derivative_rodeo/generators/thumbnail_generator.rb index 70b3bd9..af533e2 100644 --- a/lib/derivative_rodeo/generators/thumbnail_generator.rb +++ b/lib/derivative_rodeo/generators/thumbnail_generator.rb @@ -50,13 +50,8 @@ def build_step(output_location:, input_tmp_file_path:, **) # # @see .dimensions_by_type # @see .dimensions_fallback - # - # @note TODO: This is a very quick and dirty and assumptive type detector. For the 2023-05-31 - # use case it is likely adequate (e.g. if it ends in .pdf we'll have a configured - # match). In other words, we'd love someone else to be sniffing out mime-types rather - # than doing it here. def self.dimensions_for(filename:) - type = filename.split(".")&.last&.to_sym + type = DerivativeRodeo::Services::MimeTypeService.hyrax_type(filename: filename) dimensions_by_type.fetch(type, dimensions_fallback) end diff --git a/lib/derivative_rodeo/services/mime_type_service.rb b/lib/derivative_rodeo/services/mime_type_service.rb new file mode 100644 index 0000000..149c3ff --- /dev/null +++ b/lib/derivative_rodeo/services/mime_type_service.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true +require 'marcel' + +module DerivativeRodeo + module Services + ## + # This module provides an interface for determining a mime-type. + module MimeTypeService + ## + # Hyrax has it's own compression of mime_types into conceptual types (as defined in + # Hyrax::FileSetDerivativesService). This provides a somewhat conceptual overlap with that, + # while also being more generalized. + # + # @param filename [String] + # @return [Symbol] + def self.hyrax_type(filename:) + mime = mime_type(filename: filename) + media_type, sub_type = mime.split("/") + case media_type + when "image", "audio", "text", "video" + media_type.to_sym + when "application" # The wild woolly weird world of all the things. + # TODO: Do we need to worry about office documents? + sub_type.to_sym + else + sub_type.to_sym + end + end + + ## + # Given a local :filename (e.g. downloaded and available on the server this is running), + # return the mime_type of the file. + # + # @param filename [String] + # @return [String] (e.g. "application/pdf", "text/plain") + def self.mime_type(filename:) + ## + # TODO: Does this attempt to read the whole file? That may create memory constraints. By + # using Pathname (instead of File.read), we're letting Marcel do it's best mime magic. + pathname = Pathname.new(filename) + extension = filename.split(".")&.last&.downcase + if extension + # By including a possible extension, we can help nudge Marcel into making a more + # Without extension, we will get a lot of "application/octet-stream" results. + ::Marcel::MimeType.for(pathname, extension: extension) + else + ::Marcel::MimeType.for(pathname) + end + end + end + end +end diff --git a/spec/derivative_rodeo/generators/thumbnail_generator_spec.rb b/spec/derivative_rodeo/generators/thumbnail_generator_spec.rb index 586b04e..9d8d5b4 100644 --- a/spec/derivative_rodeo/generators/thumbnail_generator_spec.rb +++ b/spec/derivative_rodeo/generators/thumbnail_generator_spec.rb @@ -22,18 +22,13 @@ describe '.dimensions_for' do subject { described_class.dimensions_for(filename: filename) } - context "given a file ending in '.pdf'" do - let(:filename) { "really-cool.pdf" } + context "given a PDF" do + let(:filename) { Fixtures.path_for("minimal-2-page.pdf") } it { is_expected.to eq described_class.dimensions_by_type.fetch(:pdf) } end - context "given a file without an extension" do - let(:filename) { "aint-no-extension-here" } - it { is_expected.to eq described_class.dimensions_fallback } - end - - context "given a file ending in '.tiff'" do - let(:filename) { "muppet-man.tiff" } + context "given a TIFF" do + let(:filename) { Fixtures.path_for("4.1.07.tiff") } it { is_expected.to eq described_class.dimensions_fallback } end end diff --git a/spec/derivative_rodeo/services/mime_type_service_spec.rb b/spec/derivative_rodeo/services/mime_type_service_spec.rb new file mode 100644 index 0000000..aa093f3 --- /dev/null +++ b/spec/derivative_rodeo/services/mime_type_service_spec.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe DerivativeRodeo::Services::MimeTypeService do + describe '.mime_type' do + subject { described_class.mime_type(filename: filename) } + { + __FILE__ => "text/x-ruby", + Fixtures.path_for('4.1.07.tiff') => "image/tiff", + Fixtures.path_for('tiff-no-ext') => "image/tiff", + Fixtures.path_for('minimal-1-page.pdf') => "application/pdf", + Fixtures.path_for('ndnp-sample1-txt.txt') => "text/plain" + }.each do |given_filename, expected_mime_type| + context "for #{File.basename(given_filename)}" do + let(:filename) { given_filename } + it { is_expected.to eq(expected_mime_type) } + end + end + end + + describe '.hyrax_type' do + subject { described_class.hyrax_type(filename: filename) } + { + Fixtures.path_for('4.1.07.tiff') => :image, + Fixtures.path_for('minimal-1-page.pdf') => :pdf, + Fixtures.path_for('ndnp-sample1-txt.txt') => :text + }.each do |given_filename, expected_hyrax_type| + context "for #{File.basename(given_filename)}" do + let(:filename) { given_filename } + it { is_expected.to eq(expected_hyrax_type) } + end + end + end +end diff --git a/spec/fixtures/files/tiff-no-ext b/spec/fixtures/files/tiff-no-ext new file mode 100644 index 0000000..97ca385 Binary files /dev/null and b/spec/fixtures/files/tiff-no-ext differ