-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #83 from scientist-softserv/i11-job-split-pdfs-int…
…o-child-works I11 job split pdfs into child works
- Loading branch information
Showing
21 changed files
with
603 additions
and
144 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,3 +46,4 @@ fcrepo-webapp-* | |
*.gem | ||
pkg/ | ||
*~undo-tree~ | ||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
module IiifPrint | ||
class PendingRelationship < ApplicationRecord | ||
validates :parent_id, presence: true | ||
validates :child_title, presence: true | ||
validates :child_order, presence: true | ||
end | ||
end |
11 changes: 11 additions & 0 deletions
11
db/migrate/20230109000000_create_iiif_print_pending_relationships.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
class CreateIiifPrintPendingRelationships < ActiveRecord::Migration[5.1] | ||
def change | ||
create_table :iiif_print_pending_relationships do |t| | ||
t.string :child_title, null: false | ||
t.string :parent_id, null: false | ||
t.string :child_order, null: false | ||
t.timestamps | ||
end | ||
add_index :iiif_print_pending_relationships, :parent_id | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
module IiifPrint | ||
module Jobs | ||
class ApplicationJob < ActiveJob::Base | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
module IiifPrint | ||
module Jobs | ||
class ChildWorksFromPdfJob < IiifPrint::Jobs::ApplicationJob | ||
# Break a pdf into individual pages | ||
# @param parent_work | ||
# @param pdf_paths: [<Array => String>] paths to pdfs | ||
# @param user: [User] | ||
# @param admin_set_id: [<String>] | ||
# @param prior_pdfs: [<Integer>] count of pdfs already on parent work | ||
def perform(parent_work, pdf_paths, user, admin_set_id, prior_pdfs) | ||
@parent_work = parent_work | ||
@child_admin_set_id = admin_set_id | ||
child_model = @parent_work.iiif_print_config.pdf_split_child_model | ||
|
||
# handle each input pdf | ||
pdf_paths.each_with_index do |path, pdf_idx| | ||
split_pdf(path, pdf_idx, user, prior_pdfs, child_model) | ||
end | ||
|
||
# Link newly created child works to the parent | ||
# @param user: [User] user | ||
# @param parent_id: [<String>] parent work id | ||
# @param parent_model: [<String>] parent model | ||
# @param child_model: [<String>] child model | ||
IiifPrint::Jobs::CreateRelationshipsJob.set(wait: 10.minutes).perform_later( | ||
user: user, | ||
parent_id: @parent_work.id, | ||
parent_model: @parent_work.class.to_s, | ||
child_model: child_model.to_s | ||
) | ||
|
||
# TODO: clean up image_files and pdf_paths | ||
end | ||
|
||
private | ||
|
||
def split_pdf(path, pdf_idx, user, prior_pdfs_count, child_model) | ||
image_files = @parent_work.iiif_print_config.pdf_splitter_service.new(path).to_a | ||
return if image_files.blank? | ||
|
||
pdf_sequence = pdf_idx + prior_pdfs_count | ||
prepare_import_data(pdf_sequence, image_files, user) | ||
|
||
# submit the job to create all the child works for one PDF | ||
# @param [User] user | ||
# @param [Hash<String => String>] titles | ||
# @param [Hash<String => String>] resource_types (optional) | ||
# @param [Array<String>] uploaded_files Hyrax::UploadedFile IDs | ||
# @param [Hash] attributes attributes to apply to all works, including :model | ||
# @param [Hyrax::BatchCreateOperation] operation | ||
operation = Hyrax::BatchCreateOperation.create!( | ||
user: user, | ||
operation_type: "PDF Batch Create" | ||
) | ||
BatchCreateJob.perform_later(user, | ||
@child_work_titles, | ||
{}, | ||
@uploaded_files, | ||
attributes.merge!(model: child_model.to_s).with_indifferent_access, | ||
operation) | ||
end | ||
|
||
def prepare_import_data(pdf_sequence, image_files, user) | ||
@uploaded_files = [] | ||
@child_work_titles = {} | ||
image_files.each_with_index do |image_path, idx| | ||
file_id = create_uploaded_file(user, image_path).to_s | ||
file_title = set_title(@parent_work.title.first, pdf_sequence, idx) | ||
@uploaded_files << file_id | ||
@child_work_titles[file_id] = file_title | ||
# save child work info to create the member relationships | ||
PendingRelationship.create!(child_title: file_title, | ||
parent_id: @parent_work.id, | ||
child_order: sort_order(pdf_sequence, idx)) | ||
end | ||
end | ||
|
||
def sort_order(pdf_sequence, idx) | ||
"#{pdf_sequence} #{idx}" | ||
end | ||
|
||
def create_uploaded_file(user, path) | ||
uf = Hyrax::UploadedFile.new | ||
uf.user_id = user.id | ||
uf.file = CarrierWave::SanitizedFile.new(path) | ||
uf.save! | ||
uf.id | ||
end | ||
|
||
def set_title(title, pdf_sequence, idx) | ||
pdf_index = "Pdf Nbr #{pdf_sequence + 1}" | ||
page_number = "Page #{idx + 1}" | ||
"#{title}: #{pdf_index}, #{page_number}" | ||
end | ||
|
||
# TODO: what attributes do we need to fill in from the parent work? What about AllinsonFlex? | ||
def attributes | ||
{ | ||
admin_set_id: @child_admin_set_id.to_s, | ||
creator: @parent_work.creator.to_a, | ||
rights_statement: @parent_work.rights_statement.to_a, | ||
visibility: @parent_work.visibility.to_s | ||
} | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
module IiifPrint | ||
module Jobs | ||
# Break a pdf into individual pages | ||
class CreateRelationshipsJob < IiifPrint::Jobs::ApplicationJob | ||
# Link newly created child works to the parent | ||
# @param user: [User] user | ||
# @param parent_id: [<String>] parent work id | ||
# @param parent_model: [<String>] parent model | ||
# @param child_model: [<String>] child model | ||
def perform(user:, parent_id:, parent_model:, child_model:) | ||
if completed_child_data_for(parent_id, child_model) | ||
# add the members | ||
parent_work = parent_model.constantize.find(parent_id) | ||
create_relationships(user: user, parent: parent_work, ordered_child_ids: @child_ids) | ||
@pending_children.each(&:destroy) | ||
else | ||
# reschedule the job and end this one normally | ||
reschedule(user: user, parent_id: parent_id, parent_model: parent_model, child_model: child_model) | ||
end | ||
end | ||
|
||
private | ||
|
||
# load @child_ids, and return true or false | ||
def completed_child_data_for(parent_id, child_model) | ||
@child_ids = [] | ||
found_all_children = true | ||
|
||
# find and sequence all pending children | ||
@pending_children = IiifPrint::PendingRelationship.where(parent_id: parent_id).order('child_order asc') | ||
|
||
# find child ids (skip out if any haven't yet been created) | ||
@pending_children.each do |child| | ||
# find by title... if any aren't found, the child works are not yet ready | ||
found_child = find_id_by_title_for(child.child_title, child_model) | ||
found_all_children = false if found_child.empty? | ||
break unless found_all_children == true | ||
@child_ids += found_child | ||
end | ||
# return boolean | ||
found_all_children | ||
end | ||
|
||
def find_id_by_title_for(title, model) | ||
model.constantize.where(title: title).map(&:id) | ||
end | ||
|
||
def reschedule(user:, parent_id:, parent_model:, child_model:) | ||
CreateRelationshipsJob.set(wait: 10.minutes).perform_later( | ||
user: user, | ||
parent_id: parent_id, | ||
parent_model: parent_model, | ||
child_model: child_model | ||
) | ||
end | ||
|
||
def create_relationships(user:, parent:, ordered_child_ids:) | ||
records_hash = {} | ||
ordered_child_ids.each_with_index do |child_id, i| | ||
records_hash[i] = { id: child_id } | ||
end | ||
attrs = { work_members_attributes: records_hash } | ||
parent.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX) | ||
env = Hyrax::Actors::Environment.new(parent, Ability.new(user), attrs) | ||
|
||
Hyrax::CurationConcern.actor.update(env) | ||
end | ||
end | ||
end | ||
end |
Oops, something went wrong.