Skip to content

Commit

Permalink
Merge pull request #47 from bodleian/develop
Browse files Browse the repository at this point in the history
Release 2019-07-03 (retry 2)
  • Loading branch information
mel-mason authored Jul 11, 2019
2 parents 7d55f10 + d54de56 commit f10b2d7
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 12 deletions.
7 changes: 7 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ Quick start
-----------

To run a full conversion on a TIFF file, with validation, format checks, XMP extraction and creation of a thumbnail JPEG:

From the command line:
::

convert_tiff_to_jp2 input.tif

In Python:
::

from image_processing.derivative_files_generator import DerivativeFilesGenerator
Expand Down
3 changes: 2 additions & 1 deletion image_processing/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def convert_to_jpg(self, input_filepath, output_filepath, resize=None, quality=N
def copy_over_embedded_metadata(self, input_image_filepath, output_image_filepath, write_only_xmp=False):
"""
Copy embedded image metadata from the input_image_filepath to the output_image_filepath
:param input_image_filepath: input filepath
:param output_image_filepath: output filepath
:param write_only_xmp: Copy all information to the same-named tags in XMP (if they exist). With JP2 it's safest to only use xmp tags, as other ones may not be supported by all software
"""
if not os.access(input_image_filepath, os.R_OK):
Expand Down
48 changes: 43 additions & 5 deletions image_processing/derivative_files_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from __future__ import print_function
from __future__ import division

import errno
import os
import shutil
import logging
Expand All @@ -15,6 +16,7 @@
DEFAULT_EMBEDDED_METADATA_FILENAME = 'full.xmp'
DEFAULT_JPG_FILENAME = 'full.jpg'
DEFAULT_LOSSLESS_JP2_FILENAME = 'full_lossless.jp2'
DEFAULT_JPYLYZER_XML_FILENAME = 'full_lossless.jp2.jpylyzer.xml'

DEFAULT_JPG_THUMBNAIL_RESIZE_VALUE = 0.6
DEFAULT_JPG_HIGH_QUALITY_VALUE = 92
Expand Down Expand Up @@ -63,14 +65,15 @@ def __init__(self, kakadu_base_path=DEFAULT_KAKADU_BASE_PATH,
self.log = logging.getLogger(__name__)

def generate_derivatives_from_jpg(self, jpg_filepath, output_folder, save_embedded_metadata=True,
check_lossless=True):
check_lossless=True, save_jpylyzer_output=False):
"""
Extracts the embedded metadata, creates a copy of the JPEG file and a validated JPEG2000 file.
Stores all in the given folder.
:param jpg_filepath: The path to the source JPEG file.
:param output_folder: The folder where the derivatives will be stored
:param save_embedded_metadata: If true, metadata will be extracted from the image file and preserved in a separate xml file
:param save_jpylyzer_output: If true, the jyplyzer output from validating the jp2 will be preserved in a separate xml file
:param check_lossless: If true, check the created JPEG2000 file is visually identical to the TIFF created from the source file
:return: filepaths of created files
"""
Expand All @@ -83,6 +86,8 @@ def generate_derivatives_from_jpg(self, jpg_filepath, output_folder, save_embedd
jpg_filepath, require_icc_profile_for_colour=self.require_icc_profile_for_colour,
require_icc_profile_for_greyscale=self.require_icc_profile_for_greyscale)

_make_dirs_if_exist(output_folder)

output_jpg_filepath = os.path.join(output_folder, self._get_filename(DEFAULT_JPG_FILENAME, source_file_name))
shutil.copy(jpg_filepath, output_jpg_filepath)
generated_files = [output_jpg_filepath]
Expand All @@ -103,15 +108,22 @@ def generate_derivatives_from_jpg(self, jpg_filepath, output_folder, save_embedd
lossless_filepath = os.path.join(output_folder,
self._get_filename(DEFAULT_LOSSLESS_JP2_FILENAME, source_file_name))
self.generate_jp2_from_tiff(scratch_tiff_filepath, lossless_filepath)
self.validate_jp2_conversion(scratch_tiff_filepath, lossless_filepath, check_lossless=check_lossless)

jpylyzer_output_filepath = None
if save_jpylyzer_output:
jpylyzer_output_filepath = os.path.join(output_folder,
self._get_filename(DEFAULT_JPYLYZER_XML_FILENAME, source_file_name))

self.validate_jp2_conversion(scratch_tiff_filepath, lossless_filepath, check_lossless=check_lossless,
jpylyzer_output_filepath=jpylyzer_output_filepath)
generated_files.append(lossless_filepath)

self.log.debug("Successfully generated derivatives for {0} in {1}".format(jpg_filepath, output_folder))

return generated_files

def generate_derivatives_from_tiff(self, tiff_filepath, output_folder, include_tiff=False, save_embedded_metadata=True,
create_jpg_as_thumbnail=True, check_lossless=True):
create_jpg_as_thumbnail=True, check_lossless=True, save_jpylyzer_output=False):
"""
Extracts the embedded metadata, creates a JPEG file and a validated JPEG2000 file.
Stores all in the given folder.
Expand All @@ -122,6 +134,7 @@ def generate_derivatives_from_tiff(self, tiff_filepath, output_folder, include_t
:param output_folder: the folder where the related dc.xml will be stored
:param include_tiff: Include copy of source tiff file in derivatives
:param save_embedded_metadata: If true, metadata will be extracted from the image file and preserved in a separate xml file
:param save_jpylyzer_output: If true, the jyplyzer output from validating the jp2 will be preserved in a separate xml file
:param check_lossless: If true, check the created jpg2000 file is visually identical to the source file
:return: filepaths of created files
"""
Expand All @@ -137,6 +150,8 @@ def generate_derivatives_from_tiff(self, tiff_filepath, output_folder, include_t
# some RGBA tiffs don't convert properly back from jp2 - kakadu warns about unassociated alpha channels
check_lossless = True

_make_dirs_if_exist(output_folder)

with tempfile.NamedTemporaryFile(prefix='image-processing_', suffix='.tif') as temp_tiff_file_obj:
# only work from a temporary file if we need to - e.g. if the tiff filepath is invalid,
# or if we need to normalise the tiff. Otherwise just use the original tiff
Expand All @@ -153,7 +168,7 @@ def generate_derivatives_from_tiff(self, tiff_filepath, output_folder, include_t
jpg_resize = self.jpg_thumbnail_resize_value if create_jpg_as_thumbnail else None

self.converter.convert_to_jpg(normalised_tiff_filepath, jpeg_filepath,
quality=jpg_quality, resize=jpg_resize)
quality=jpg_quality, resize=jpg_resize)
self.log.debug('jpeg file {0} generated'.format(jpeg_filepath))
generated_files = [jpeg_filepath]

Expand All @@ -173,7 +188,14 @@ def generate_derivatives_from_tiff(self, tiff_filepath, output_folder, include_t
lossless_filepath = os.path.join(output_folder,
self._get_filename(DEFAULT_LOSSLESS_JP2_FILENAME, source_file_name))
self.generate_jp2_from_tiff(normalised_tiff_filepath, lossless_filepath)
self.validate_jp2_conversion(normalised_tiff_filepath, lossless_filepath, check_lossless=check_lossless)

jpylyzer_output_filepath = None
if save_jpylyzer_output:
jpylyzer_output_filepath = os.path.join(output_folder,
self._get_filename(DEFAULT_JPYLYZER_XML_FILENAME, source_file_name))

self.validate_jp2_conversion(normalised_tiff_filepath, lossless_filepath, check_lossless=check_lossless,
jpylyzer_output_filepath=jpylyzer_output_filepath)
generated_files.append(lossless_filepath)

self.log.debug("Successfully generated derivatives for {0} in {1}".format(tiff_filepath, output_folder))
Expand Down Expand Up @@ -253,3 +275,19 @@ def _get_filename(self, default_filename, source_file_name):
return "{0}.xmp".format(orig_filename_base)
elif default_filename == DEFAULT_LOSSLESS_JP2_FILENAME:
return "{0}.jp2".format(orig_filename_base)
elif default_filename == DEFAULT_JPYLYZER_XML_FILENAME:
return "{0}.jp2.jpylyzer.xml".format(orig_filename_base)


def _make_dirs_if_exist(path):
"""
Create a folder if it doesn't exist. Equivalent to os.makedirs(path, exist_ok=True), but works on python 2
:param path: Path to create
"""
try:
os.makedirs(path)
except OSError as e:
if e.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
43 changes: 43 additions & 0 deletions image_processing/entry_points.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import argparse
import os

from image_processing.conversion import Converter
from image_processing.derivative_files_generator import DerivativeFilesGenerator


def generate_derivatives_from_tiff():
"""
A basic command line script that runs :func:`~image_processing.derivative_files_generator.DerivativeFilesGenerator.generate_derivatives_from_tiff`"
"""
parser = argparse.ArgumentParser(description="Generate a JP2 from a TIFF, and check the conversion is lossless. "
"Also generates a thumbnail and records for digital preservation")
parser.add_argument('tiff_filepath', help='Tiff to convert')
parser.add_argument('-o', '--output_folder', help='Folder to create derivatives in', required=False, default=None)
parser.add_argument('-k', '--kakadu_path', help='Base path to kakadu executables', required=False, default='/opt/kakadu')
args = parser.parse_args()
output_folder = args.output_folder
if not output_folder:
output_folder, _ = os.path.splitext(os.path.basename(args.tiff_filepath))
output_folder = os.path.abspath(output_folder)
generator = DerivativeFilesGenerator(require_icc_profile_for_colour=False,
require_icc_profile_for_greyscale=False,
use_default_filenames=False,
kakadu_base_path=args.kakadu_path)
generator.generate_derivatives_from_tiff(args.tiff_filepath, output_folder, include_tiff=False, save_jpylyzer_output=True)
print('Files created at {0}'.format(output_folder))


def convert_icc_profile():
"""
A basic command line script that runs :func:`~image_processing.conversion.Converter.convert_icc_profile`"
"""
parser = argparse.ArgumentParser(description="Converts the icc profile of a file")
parser.add_argument('image_filepath', help='Tiff to convert')
parser.add_argument('output_image_filepath', help='Output image path')
parser.add_argument('-i', '--icc_filepath', help='Path to an icc profile', required=True)
parser.add_argument('-c', '--colour_mode', help='New colour mode, if any', default=None, required=False)
args = parser.parse_args()
converter = Converter()
converter.convert_icc_profile(args.image_filepath, args.output_image_filepath,
icc_profile_filepath=args.icc_filepath, new_colour_mode=args.colour_mode)
print('File created at {0}'.format(args.output_image_filepath))
2 changes: 1 addition & 1 deletion image_processing/kakadu.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(self, kakadu_base_path):
if not utils.cmd_is_executable(self._command_path('kdu_expand')):
self.log.error("Could not find executable {0}. Lossless checks will not work. "
"Check kakadu is installed and kdu_expand exists at the configured path"
.format(self._command_path('kdu_expand')))
.format(self._command_path('kdu_expand')))

def _command_path(self, command):
return os.path.join(self.kakadu_base_path, command)
Expand Down
2 changes: 1 addition & 1 deletion image_processing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ def cmd_is_executable(cmd):
cmd_paths = [os.path.join(path, cmd) for path in paths]
return any(
os.path.isfile(cmd_path) and os.access(cmd_path, os.X_OK) for cmd_path in cmd_paths
)
)
13 changes: 9 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
from setuptools import setup

with open("README.rst", 'r') as f:
long_description = f.read()
long_description = f.read()

setup(name='image_processing',
version='1.8.1',
version='1.9.0',
description='Digital Bodleian image processing library',
url='http://github.com/bodleian/image-processing',
license="MIT",
long_description=long_description,
author='Mel Mason',
author_email='[email protected]',
packages=['image_processing'],
install_requires=['Pillow', 'jpylyzer']
)
install_requires=['Pillow', 'jpylyzer'],
entry_points={
'console_scripts': ['convert_tiff_to_jp2=image_processing.entry_points:generate_derivatives_from_tiff',
'convert_icc=image_processing.entry_points:convert_icc_profile'
]
}
)

0 comments on commit f10b2d7

Please sign in to comment.