Skip to content

Commit

Permalink
fix for frog batch issue #13
Browse files Browse the repository at this point in the history
  • Loading branch information
proycon committed Feb 5, 2018
1 parent e59e502 commit e6c6dcc
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 10 deletions.
10 changes: 9 additions & 1 deletion dbnl.nf
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ if (!params.foliainput) {

output:
file "${teidocument.simpleName}.folia.xml" into foliadocuments
file "${teidocument.simpleName}.folia.xml" into foliadocuments_counter

script:
"""
Expand Down Expand Up @@ -177,12 +178,13 @@ if (!params.foliainput) {
//foliadocuments_tokenized.subscribe { println it }
} else {
foliadocuments_tokenized = Channel.fromPath(params.inputdir+"/**.folia.xml")
foliadocuments_counter = Channel.fromPath(params.inputdir+"/**.folia.xml")
}


//split the tokenized documents into batches, fork into two channels
foliadocuments_tokenized
.buffer( size: params.frogs, remainder: true)
.buffer( size: Math.ceil(foliadocuments_counter.count().val / params.frogs).toInteger(), remainder: true)
.into { foliadocuments_batches_tokenized1; foliadocuments_batches_tokenized2 }

if ((params.mode == "both") || (params.mode == "simple")) {
Expand All @@ -191,6 +193,8 @@ if ((params.mode == "both") || (params.mode == "simple")) {
//Linguistic enrichment on the original text of the document (pre-modernization)
//Receives multiple input files in batches

cpus params.frogs

if ((params.entitylinking == "") && (params.mode == "simple")) {
publishDir params.outputdir, mode: 'copy', overwrite: true
}
Expand Down Expand Up @@ -243,6 +247,8 @@ if ((params.mode == "both") || (params.mode == "modernize")) {
//translate the document to contemporary dutch for PoS tagging AND run Frog on it
//adds an extra <t class="contemporary"> layer

cpus Runtime.runtime.availableProcessors()

input:
set file(inputdocuments), file(dictionary), file(preservationlexicon), file(rulefile), file(inthistlexicon) from foliadocuments_batches_withdata
val virtualenv from params.virtualenv
Expand Down Expand Up @@ -272,6 +278,8 @@ if ((params.mode == "both") || (params.mode == "modernize")) {
publishDir params.outputdir, mode: 'copy', overwrite: true
}

cpus params.frogs

input:
file inputdocuments from foliadocuments_modernized
val skip from params.skip
Expand Down
16 changes: 8 additions & 8 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ manifest {
mainScript = 'ticcl.nf'
}

process.$indexer.cpus = Runtime.runtime.availableProcessors()
process.$resolver.cpus = Runtime.runtime.availableProcessors()
process.$rank.cpus = Runtime.runtime.availableProcessors()
process.$foliacorrect.cpus = Runtime.runtime.availableProcessors()

process.$frog_original.cpus = 1
process.$modernize.cpus = Runtime.runtime.availableProcessors()
process.$frog_modernized.cpus = 1
profiles {
standard {
process.$indexer.cpus = Runtime.runtime.availableProcessors()
process.$resolver.cpus = Runtime.runtime.availableProcessors()
process.$rank.cpus = Runtime.runtime.availableProcessors()
process.$foliacorrect.cpus = Runtime.runtime.availableProcessors()
}
}

2 changes: 1 addition & 1 deletion webservice/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

setup(
name = "PICCL",
version = "0.4.1",
version = "0.4.2",
author = "Martin Reynaert",
author_email = "[email protected]",
description = ("Webservice for PICCL"),
Expand Down

0 comments on commit e6c6dcc

Please sign in to comment.