From 2157cc9652ae4da05bdd4e102b32c4d7c2a0cb9a Mon Sep 17 00:00:00 2001
From: Ivory <IvoryEC@gmail.com>
Date: Sun, 17 Jan 2021 23:42:30 -0500
Subject: [PATCH] Fix reference path for user guide page.

---
 docs/index.html                 |   4 ++--
 docs/search/search_index.json   |   2 +-
 docs/sitemap.xml.gz             | Bin 216 -> 216 bytes
 mkdocs/user-guide/docs/index.md |   2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/docs/index.html b/docs/index.html
index 6e0e29c48..fbc7ac213 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -263,7 +263,7 @@ <h2 id="biolockj-user-guide">BioLockJ User Guide:<a class="headerlink" href="#bi
 <li><a href="FAQ/">FAQ &amp; Troubleshooting</a></li>
 </ul>
 <h2 id="links-for-developers">Links for Developers<a class="headerlink" href="#links-for-developers" title="Permanent link">#</a></h2>
-<p><a href="../DevNotes-main">BioLockJ Developers Guide</a></p>
+<p><a href="DevNotes-main">BioLockJ Developers Guide</a></p>
 <p>Repository of functional tests         <br />
 <a href="https://github.com/BioLockJ-Dev-Team/sheepdog_testing_suite">https://github.com/BioLockJ-Dev-Team/sheepdog_testing_suite</a></p>
 <p>The user guide for our latest stable version                  <br />
@@ -326,5 +326,5 @@ <h2 id="citing-biolockj">Citing BioLockJ<a class="headerlink" href="#citing-biol
 
 <!--
 MkDocs version : 1.0.4
-Build Date UTC : 2021-01-18 00:03:31
+Build Date UTC : 2021-01-18 04:40:35
 -->
diff --git a/docs/search/search_index.json b/docs/search/search_index.json
index 5747133c7..3c5896dbb 100644
--- a/docs/search/search_index.json
+++ b/docs/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"# What is BioLockJ # BioLockJ optimizes your bioinformatics pipeline and metagenomics analysis. Modular design logically partitions analysis and expedites failure recovery Automated script generation eliminates syntax errors and ensures uniform execution Standardized OTU abundance tables facilitate analysis across datasets Batch scripts take advantage of parallelization on the cluster job queue configuration file consolidates project details into a principal reference document (and can reproduce analysis) BioModule interface provides a flexible mechanism for adding new functionality BioLockJ User Guide: # Getting Started Commands Pipeline Componenets the config file Properties Modules the metadata input files Dependencies Features Check Dependencies before pipeline start Failure Recovery Validation Supported Environments Expand BioLockJ by Building Modules BioLockJ API Examples and Templates Example Pipeline FAQ & Troubleshooting Links for Developers # BioLockJ Developers Guide Repository of functional tests https://github.com/BioLockJ-Dev-Team/sheepdog_testing_suite The user guide for our latest stable version https://biolockj-dev-team.github.io/BioLockJ/ The user guide for the current development version, and previous stable versions https://biolockj.readthedocs.io/en/latest/ Guidelines for new modules Building Modules Citing BioLockJ # If you use BioLockJ in your research, you should cite BioLockJ itself AND the tools that make up the pipeline. The majority of BioLockJ modules are wrappers for independent tools. See the summary of your pipeline for citation information from the modules in your pipeline. This information is also available in the modules' documentation. To cite BioLockJ itself, please cite the public project git repository (https://github.com/BioLockJ-Dev-Team/BioLockJ) and author Mike Sioda.","title":"Home"},{"location":"#what-is-biolockj","text":"BioLockJ optimizes your bioinformatics pipeline and metagenomics analysis. Modular design logically partitions analysis and expedites failure recovery Automated script generation eliminates syntax errors and ensures uniform execution Standardized OTU abundance tables facilitate analysis across datasets Batch scripts take advantage of parallelization on the cluster job queue configuration file consolidates project details into a principal reference document (and can reproduce analysis) BioModule interface provides a flexible mechanism for adding new functionality","title":"What is BioLockJ"},{"location":"#biolockj-user-guide","text":"Getting Started Commands Pipeline Componenets the config file Properties Modules the metadata input files Dependencies Features Check Dependencies before pipeline start Failure Recovery Validation Supported Environments Expand BioLockJ by Building Modules BioLockJ API Examples and Templates Example Pipeline FAQ & Troubleshooting","title":"BioLockJ User Guide:"},{"location":"#links-for-developers","text":"BioLockJ Developers Guide Repository of functional tests https://github.com/BioLockJ-Dev-Team/sheepdog_testing_suite The user guide for our latest stable version https://biolockj-dev-team.github.io/BioLockJ/ The user guide for the current development version, and previous stable versions https://biolockj.readthedocs.io/en/latest/ Guidelines for new modules Building Modules","title":"Links for Developers"},{"location":"#citing-biolockj","text":"If you use BioLockJ in your research, you should cite BioLockJ itself AND the tools that make up the pipeline. The majority of BioLockJ modules are wrappers for independent tools. See the summary of your pipeline for citation information from the modules in your pipeline. This information is also available in the modules' documentation. To cite BioLockJ itself, please cite the public project git repository (https://github.com/BioLockJ-Dev-Team/BioLockJ) and author Mike Sioda.","title":"Citing BioLockJ"},{"location":"Building-Modules/","text":"Building New Modules # Any Java class that implements the BioModule interface can be added to a BioLockJ pipeline. The BioLockJ v1.0 implementation is currently focused on metagenomics analysis, but the generalized application framework is not limited to this domain. Users can implement new BioModules to automate a wide variety of bioinformatics and report analytics. The BioModule interface was designed so that users can develop new modules on their own. Beginners # See the BioModule hello world tutorial. Coding your module # To create a new BioModule , simply extend one of the abstract Java superclasses, code it's abstract methods, and add it to your pipeline with #BioModule tag your Config file: # BioModuleImpl : Extend if a more specific interface does not apply ScriptModuleImpl : Extend if your module generates and executes bash scripts JavaModuleImpl : Extend if your module only runs Java code ClassifierModuleImpl : Extend to support a new classifier program ParserModuleImpl : Extend to parse output of a new classifier program R_Module : Extend if your module generates and executes R scripts To support a new classifier, create 3 modules that implement the following interfaces: # ClassifierModule : Implement to generate bash scripts needed to call classifier program ParserModule : Implement to parse classifier output, configure as classifier post-requisite OtuNode : Classifier specific implementation holds OTU information for 1 sequence BioModuleImpl is the top-level superclass for all modules. # Method Description checkDependencies() Must override. Called before executeTask() to identify Configuration errors and perform runtime validations. executeTask() Must override. Executes core module logic. cleanUp() Called after executeTask() to run cleanup operations, update Config properties, etc. getInputFiles() Return previous module output. getModuleDir() Return module root directory. getOutputDir() Return module output directory. getPostRequisiteModules() Returns a list of BioModules to run after the current module. getPreRequisiteModules() Returns a list of BioModules to run before the current module. getSummary() Return output directory summary. Most modules override this method by adding module specific summary details to super.getSummary(). getTempDir() Return module temp directory. setModuleDir(path) Set module directory. ScriptModuleImpl extends BioModuleImpl : superclass for script-generating modules. # Method Description buildScript(files) Must override. Called by executeTask() for datasets with forward reads only. The return type is a list of lists. Each nested list contains the bash script lines required to process 1 sample. Obtains sequence files from getInputFiles(). buildScriptForPairedReads(files) Calls back to buildScript(files) by default. Subclasses override this method to generate unique scripts for datasets containing paired reads. checkDependencies() Called before executeTask() to validate script.batchSize , script.exitOnError , script.numThreads , script.permissions , script.timeout getJobParams() Return shell command to execute the MAIN script. getScriptDir() Return module script directory. getSummary() Adds the script directory summary to super.getSummary(). Most modules override this method by adding module specific summary details to super.getSummary(). getTimeout() Return script.timeout . getWorkerScriptFunctions() Return bash script lines for any functions needed in the worker scripts. JavaModuleImpl extends ScriptModuleImpl : superclass for pure Java modules. # To avoid running code on the cluster head node, a temporary instance of BioLockJ is spawned on a cluster node which is launched by the sole worker script from the job queue. Method Description runModule() Must override. Executes core module logic. buildScript(files) This method returns a single line calling java on the BioLockJ source code, passing -d parameter to run in direct mode and the full class name of the JavaModule to indicate the module to run. getSource() Determines if running code from Jar or source code in order to write valid bash script lines. getTimeout() Return java.timeout . moduleComplete() Create the script success indicator file. moduleFailed() Create the script failures indicator file. ClassifierModuleImpl extends ScriptModuleImpl : biolockj.module.classifier superclass. # Method Description buildScriptForPairedReads(files) Called by executeTask() for datasets with paired reads. The return type is a list of lists, where each nested list contains the bash script lines required to process 1 sample. Obtains sequence files from SeqUtil .getPairedReads(getInputFiles()). checkDependencies() Validate Configuration properties exe.classifier and exe.classifierParams , verify sequence file format, log classifier version info, and verify no biolockj.module.seq modules are configured run after the ClassifierModule . Subclasses should call super.checkDependencies() if overriding this method to retain these verifications. executeTask() Call buildScript(files) or buildScriptForPairedReads(files) based input sequence format and calls BashScriptBuilder to generate the main script + 1 worker script for every script.batchSize samples. To change the batch scheme, override this method to call the alternate BashScriptBuilder .buildScripts() method signiture and hard code the batch size. All biolockj.module.classifier modules override this method. getClassifierExe() Return Configuration property exe.classifier to call the classifier program in the bash scripts. If the classifier is not included in cluster.modules , validate that value is a valid file path. If exe.classifier is undefined, replace the property prefix exe with the lowercase prefix of the module class name (less the standard module suffix classifier ). For example, use rdp.classifier for RdpClassifier and kraken.classifier for KrakenClassifier . This allows users to define all classifier programs in a default Configuration file rather than setting exe.clssifier in each project Configuration file. getClassifierParams() Return Configuration property exe.classifierParams which may contain a list of parameters (without hyphens) to pass to the classifier program in the bash scripts. If exe.classifierParams is undefined, replace the property prefix exe with the lowercase prefix of the module class name as described for exe.classifier . getSummary() Adds input directory summary to super.getSummary(). Most modules override this method to add module specific summary details to super.getSummary(). logVersion() Run exe.classifier --version to log version info. RDP overrides this method to return null since the version switch is not supported. ParserModuleImpl extends JavaModuleImpl : biolockj.module.implicit.parser superclass. # Method Description parseSamples() Must override. Called by executeTask() to populate the Set returned by getParsedSamples(). Each classifier requires a unique parser module to decode its output. This method should iterate through the classifier reports to build OtuNode s for each sample-OTU found in the report. The OtuNode s are stored in a ParsedSample and cached via addParsedSample( ParsedSample ). addParsedSample( sample ) Add the ParsedSample to the Set returned by getParsedSamples(). buildOtuTables() Generate OTU abundance tables from ClassifierModule output. checkDependencies() Validate Configuration properties ( report.minOtuCount , report.minOtuThreshold , report.logBase ) and verify no biolockj.module.classifier modules are configured to run after the ParserModule . executeTask() If report.numHits =Y, add \"Num_Hits\" column to metadata containing the number of reads that map to any OTU for each sample. Calls buildOtuTables() to generate module output. getParsedSample(id) Return the ParsedSample from the the Set returned by getParsedSamples() for a given id. getParsedSamples() Return 1 ParsedSample for each classified sample in the dataset. OtuNodeImpl is the superclass for the biolockj.node package. # Method Description addOtu(level, otu) A node represents a single OTU, each level in the taxonomic hierarchy is populated with this method. getCount() Get the OTU count. getLine() Get the classifier report line used to create the node. getOtuMap() This map may contain 1 element for each of the report.taxonomyLevels and is populated by addOtu(level, otu). getSampleId() Get the sample ID to which the OTU belongs. report() Print node info to log file as DEBUG line - not visible unless pipeline.logLevel=DEBUG . setCount(num) Set the OTU count. setLine(line) Set the classifier report line used to create the node. setSampleId(id) set the sample ID to which the OTU belongs. OtuNodeImpl methods do not need to be overridden. New OtuNode implementations should call existing methods from their constructor. Document your module # The BioLockJ API allows outside resources to get information about the BioLockJ program and any available modules. To interface with the API, your module will need to implement the ApiModule interface . API-generated html documentation # The BioLockJ documentation is stored in markdown files and rendered into html using mkdocs. The BioLockJ API is designed to generate a markdown document, which is ready to be rendered into an html file using mkdocs. Built-in descriptions # Override the getCitationString() method. This should include citation information for any tool that your module wraps and a credit to yourself for creating the wrapper. Override the getDescription() method to return a short description of what your module does, this should be one to two sentences. For a more extensive description, including details about properties, expected inputs, assumptions, etc; override the getDetails() method (optional). If your module has any pre-requisit modules or post-requisit modules, the modules Details should include the names of these modules and information about when and why these modules are added. Documenting Properties # If your module introduces any NEW configuration properties, those properties should registered to the module so the API can retrieve them. Register properties using the addNewProperty() method in the modules constructor. For example, the GenMod module defines three properties: public GenMod() { super(); addNewProperty( PARAM, Properties.STRING_TYPE, \"parameters to pass to the user's script\" ); addNewProperty( SCRIPT, Properties.FILE_PATH, \"path to user script\" ); addNewProperty( LAUNCHER, Properties.STRING_TYPE, LAUNCHER_DESC ); } protected static final String PARAM = \"genMod.param\"; protected static final String SCRIPT = \"genMod.scriptPath\"; /** * {@link biolockj.Config} property: {@value #LAUNCHER}<br> * {@value #LAUNCHER_DESC} */ protected static final String LAUNCHER = \"genMod.launcher\"; private static final String LAUNCHER_DESC = \"Define executable language command if it is not included in your $PATH\"; In this example, the descriptions for PARAM and SCRIPT are written in the addNewProperty() method. The description for LAUNCHER is stored as its own string ( LAUNCHER_DESC ), and that string is referenced in the addNewProperty method and in the javadoc description for LAUNCHER . This rather verbose option IS NOT necissary, but it allows the description to be viewed through the api AND through javadocs, and IDE's; this is appropriate if you expect other classes to use the properties defined in your module. The descriptions for properties should be brief. Additional details such as interactions between properties or the effects of different values should be part of the getDetails() method. It should always be clear to a user what will happen if the value is \"null\". If there is a logical default for the property, that can passed as an additional argument to addNewProperty() . This value will only be used if there is no value given for the property in the config file (including any defaultProps layers and standard.properties). If your module uses any general properties (beyond any uses by the the super class), then you should register it in the module's constructor using the addGeneralProperty() method. public QiimeClosedRefClassifier() { super(); addGeneralProperty( Constants.EXE_AWK ); } The existing description and type for this property (defined in biolockj.Properties) will be returned if the module is queried about this property. For a list of general properties, run: biolockj_api listProps Finally, to very polished, you should override the isValidProp() method. Be sure to include the call to super. @Override public Boolean isValidProp( String property ) throws Exception { Boolean isValid = super.isValidProp( property ); switch(property) { case HN2_KEEP_UNINTEGRATED: try {Config.getBoolean( this, HN2_KEEP_UNINTEGRATED );} catch(Exception e) { isValid = false; } isValid = true; break; case HN2_KEEP_UNMAPPED: try {Config.getBoolean( this, HN2_KEEP_UNMAPPED );} catch(Exception e) { isValid = false; } isValid = true; break; } return isValid; } In the example above, the Humann2Parser module uses two properties that are not used by any super class. The call to super.isValidProp( property ) tests the property if it is used by a super class. This class only adds checks for its newly defined properties. Any property that is not tested, but is registered in the modules constructor will return true. This method is called through the API, and should be used to test one property at a time as if that is the only property in the config file. Tests to make sure that multiple properties are compatiable with each other should go in the checkDependencies() method. Generate user guide pages # For modules in the main BioLockJ project, the user guide pages are generated using the ApiModule methods as part of the deploy process. Third party developers can use the same utilities to create matching documentation. Suppose you have created one or more modules in a package com.joesCode and saved the compiled code in a jar file, /Users/joe/dev/JoesMods.jar . Set up a mkdocs project: # See https://www.mkdocs.org/#installation pip install mkdocs mkdocs --version mkdocs new joes-modules mkdir joes-modules/docs/GENERATED This mkdocs project will render markdown (.md) files into an html site. Mkdocs supports a lot of really nice features, including a very nice default template. Generate the .md files from your modules: java -cp $BLJ/dist/BioLockJ.jar:/Users/joe/dev/JoesMods.jar \\ biolockj.api.BuildDocs \\ joes-modules/docs/GENERATED \\ com.joesCode Put a link to your list of modules in the main index page. cd joes-modules echo \"[view module list](GENERATED/all-modules.md)\" >> docs/index.md The BuildDocs utility creates the .md files, but it assumes that these are part of a larger project, and you will need to make appropriate links to the generated pages from your main page. Preview your user guide: mkdocs serve Open up http://127.0.0.1:8000/ in your browser, and you'll see the default home page being displayed, with a link at the bottom to view module list , which links to a page listing all of the modules in the joes.modules pacakge. You can build this documentation locally using mkdocs build and then push to your prefered hosting site, or set up a service such as ReadTheDocs to render and host your documentation from your docs folder. Even if you choose not to build user guide pages for your module, you should still implement the ApiModule interface. Anyone who uses your module can generate the user guide pages if they want them, and even incorporate them into a custom copy of the main BioLockJ user guide. Any other support program, such as a GUI, could make use the the ApiModule methods as well. Using External Modules # To use a module that you have created yourself or aquired from a third party, you need to: Save the compiled code in a folder on your machine, for example: /Users/joe/biolockjModules/JoesMods.jar Include your module in the module run order in your config file, for example: #BioModule com.joesCode.biolockj.RunTool Be sure to include any properties your module needs in the config file. Use the --external-modules <dir> option when you call biolockj: biolockj --external-modules /Users/joe/biolockjModules myPipeline.properties Any other modules you have made or aquired can also be in the /Users/joe/biolockjModules folder. Finding and Sharing Modules # The official repository for external BioLockJ modules is blj_ext_modules . Each module has a folder at the top level of the repository and should include the java code as well a config file to test the module alone, a test file to run a multi-module pipeline that includes the module, and (where applicable) a dockerfile. This is work in progress.","title":"Building Modules"},{"location":"Building-Modules/#building-new-modules","text":"Any Java class that implements the BioModule interface can be added to a BioLockJ pipeline. The BioLockJ v1.0 implementation is currently focused on metagenomics analysis, but the generalized application framework is not limited to this domain. Users can implement new BioModules to automate a wide variety of bioinformatics and report analytics. The BioModule interface was designed so that users can develop new modules on their own.","title":"Building New Modules"},{"location":"Building-Modules/#beginners","text":"See the BioModule hello world tutorial.","title":"Beginners"},{"location":"Building-Modules/#coding-your-module","text":"To create a new BioModule , simply extend one of the abstract Java superclasses, code it's abstract methods, and add it to your pipeline with #BioModule tag your Config file:","title":"Coding your module"},{"location":"Building-Modules/#to-support-a-new-classifier-create-3-modules-that-implement-the-following-interfaces","text":"ClassifierModule : Implement to generate bash scripts needed to call classifier program ParserModule : Implement to parse classifier output, configure as classifier post-requisite OtuNode : Classifier specific implementation holds OTU information for 1 sequence","title":"To support a new classifier, create 3 modules that implement the following interfaces:"},{"location":"Building-Modules/#biomoduleimpl-is-the-top-level-superclass-for-all-modules","text":"Method Description checkDependencies() Must override. Called before executeTask() to identify Configuration errors and perform runtime validations. executeTask() Must override. Executes core module logic. cleanUp() Called after executeTask() to run cleanup operations, update Config properties, etc. getInputFiles() Return previous module output. getModuleDir() Return module root directory. getOutputDir() Return module output directory. getPostRequisiteModules() Returns a list of BioModules to run after the current module. getPreRequisiteModules() Returns a list of BioModules to run before the current module. getSummary() Return output directory summary. Most modules override this method by adding module specific summary details to super.getSummary(). getTempDir() Return module temp directory. setModuleDir(path) Set module directory.","title":"BioModuleImpl is the top-level superclass for all modules."},{"location":"Building-Modules/#scriptmoduleimpl-extends-biomoduleimpl-superclass-for-script-generating-modules","text":"Method Description buildScript(files) Must override. Called by executeTask() for datasets with forward reads only. The return type is a list of lists. Each nested list contains the bash script lines required to process 1 sample. Obtains sequence files from getInputFiles(). buildScriptForPairedReads(files) Calls back to buildScript(files) by default. Subclasses override this method to generate unique scripts for datasets containing paired reads. checkDependencies() Called before executeTask() to validate script.batchSize , script.exitOnError , script.numThreads , script.permissions , script.timeout getJobParams() Return shell command to execute the MAIN script. getScriptDir() Return module script directory. getSummary() Adds the script directory summary to super.getSummary(). Most modules override this method by adding module specific summary details to super.getSummary(). getTimeout() Return script.timeout . getWorkerScriptFunctions() Return bash script lines for any functions needed in the worker scripts.","title":"ScriptModuleImpl extends BioModuleImpl:  superclass for script-generating modules."},{"location":"Building-Modules/#javamoduleimpl-extends-scriptmoduleimpl-superclass-for-pure-java-modules","text":"To avoid running code on the cluster head node, a temporary instance of BioLockJ is spawned on a cluster node which is launched by the sole worker script from the job queue. Method Description runModule() Must override. Executes core module logic. buildScript(files) This method returns a single line calling java on the BioLockJ source code, passing -d parameter to run in direct mode and the full class name of the JavaModule to indicate the module to run. getSource() Determines if running code from Jar or source code in order to write valid bash script lines. getTimeout() Return java.timeout . moduleComplete() Create the script success indicator file. moduleFailed() Create the script failures indicator file.","title":"JavaModuleImpl extends ScriptModuleImpl: superclass for pure Java modules."},{"location":"Building-Modules/#classifiermoduleimpl-extends-scriptmoduleimpl-biolockjmoduleclassifier-superclass","text":"Method Description buildScriptForPairedReads(files) Called by executeTask() for datasets with paired reads. The return type is a list of lists, where each nested list contains the bash script lines required to process 1 sample. Obtains sequence files from SeqUtil .getPairedReads(getInputFiles()). checkDependencies() Validate Configuration properties exe.classifier and exe.classifierParams , verify sequence file format, log classifier version info, and verify no biolockj.module.seq modules are configured run after the ClassifierModule . Subclasses should call super.checkDependencies() if overriding this method to retain these verifications. executeTask() Call buildScript(files) or buildScriptForPairedReads(files) based input sequence format and calls BashScriptBuilder to generate the main script + 1 worker script for every script.batchSize samples. To change the batch scheme, override this method to call the alternate BashScriptBuilder .buildScripts() method signiture and hard code the batch size. All biolockj.module.classifier modules override this method. getClassifierExe() Return Configuration property exe.classifier to call the classifier program in the bash scripts. If the classifier is not included in cluster.modules , validate that value is a valid file path. If exe.classifier is undefined, replace the property prefix exe with the lowercase prefix of the module class name (less the standard module suffix classifier ). For example, use rdp.classifier for RdpClassifier and kraken.classifier for KrakenClassifier . This allows users to define all classifier programs in a default Configuration file rather than setting exe.clssifier in each project Configuration file. getClassifierParams() Return Configuration property exe.classifierParams which may contain a list of parameters (without hyphens) to pass to the classifier program in the bash scripts. If exe.classifierParams is undefined, replace the property prefix exe with the lowercase prefix of the module class name as described for exe.classifier . getSummary() Adds input directory summary to super.getSummary(). Most modules override this method to add module specific summary details to super.getSummary(). logVersion() Run exe.classifier --version to log version info. RDP overrides this method to return null since the version switch is not supported.","title":"ClassifierModuleImpl extends ScriptModuleImpl: biolockj.module.classifier superclass."},{"location":"Building-Modules/#parsermoduleimpl-extends-javamoduleimpl-biolockjmoduleimplicitparser-superclass","text":"Method Description parseSamples() Must override. Called by executeTask() to populate the Set returned by getParsedSamples(). Each classifier requires a unique parser module to decode its output. This method should iterate through the classifier reports to build OtuNode s for each sample-OTU found in the report. The OtuNode s are stored in a ParsedSample and cached via addParsedSample( ParsedSample ). addParsedSample( sample ) Add the ParsedSample to the Set returned by getParsedSamples(). buildOtuTables() Generate OTU abundance tables from ClassifierModule output. checkDependencies() Validate Configuration properties ( report.minOtuCount , report.minOtuThreshold , report.logBase ) and verify no biolockj.module.classifier modules are configured to run after the ParserModule . executeTask() If report.numHits =Y, add \"Num_Hits\" column to metadata containing the number of reads that map to any OTU for each sample. Calls buildOtuTables() to generate module output. getParsedSample(id) Return the ParsedSample from the the Set returned by getParsedSamples() for a given id. getParsedSamples() Return 1 ParsedSample for each classified sample in the dataset.","title":"ParserModuleImpl extends JavaModuleImpl: biolockj.module.implicit.parser superclass."},{"location":"Building-Modules/#otunodeimpl-is-the-superclass-for-the-biolockjnode-package","text":"Method Description addOtu(level, otu) A node represents a single OTU, each level in the taxonomic hierarchy is populated with this method. getCount() Get the OTU count. getLine() Get the classifier report line used to create the node. getOtuMap() This map may contain 1 element for each of the report.taxonomyLevels and is populated by addOtu(level, otu). getSampleId() Get the sample ID to which the OTU belongs. report() Print node info to log file as DEBUG line - not visible unless pipeline.logLevel=DEBUG . setCount(num) Set the OTU count. setLine(line) Set the classifier report line used to create the node. setSampleId(id) set the sample ID to which the OTU belongs. OtuNodeImpl methods do not need to be overridden. New OtuNode implementations should call existing methods from their constructor.","title":"OtuNodeImpl is the superclass for the biolockj.node package."},{"location":"Building-Modules/#document-your-module","text":"The BioLockJ API allows outside resources to get information about the BioLockJ program and any available modules. To interface with the API, your module will need to implement the ApiModule interface .","title":"Document your module"},{"location":"Building-Modules/#api-generated-html-documentation","text":"The BioLockJ documentation is stored in markdown files and rendered into html using mkdocs. The BioLockJ API is designed to generate a markdown document, which is ready to be rendered into an html file using mkdocs.","title":"API-generated html documentation"},{"location":"Building-Modules/#built-in-descriptions","text":"Override the getCitationString() method. This should include citation information for any tool that your module wraps and a credit to yourself for creating the wrapper. Override the getDescription() method to return a short description of what your module does, this should be one to two sentences. For a more extensive description, including details about properties, expected inputs, assumptions, etc; override the getDetails() method (optional). If your module has any pre-requisit modules or post-requisit modules, the modules Details should include the names of these modules and information about when and why these modules are added.","title":"Built-in descriptions"},{"location":"Building-Modules/#documenting-properties","text":"If your module introduces any NEW configuration properties, those properties should registered to the module so the API can retrieve them. Register properties using the addNewProperty() method in the modules constructor. For example, the GenMod module defines three properties: public GenMod() { super(); addNewProperty( PARAM, Properties.STRING_TYPE, \"parameters to pass to the user's script\" ); addNewProperty( SCRIPT, Properties.FILE_PATH, \"path to user script\" ); addNewProperty( LAUNCHER, Properties.STRING_TYPE, LAUNCHER_DESC ); } protected static final String PARAM = \"genMod.param\"; protected static final String SCRIPT = \"genMod.scriptPath\"; /** * {@link biolockj.Config} property: {@value #LAUNCHER}<br> * {@value #LAUNCHER_DESC} */ protected static final String LAUNCHER = \"genMod.launcher\"; private static final String LAUNCHER_DESC = \"Define executable language command if it is not included in your $PATH\"; In this example, the descriptions for PARAM and SCRIPT are written in the addNewProperty() method. The description for LAUNCHER is stored as its own string ( LAUNCHER_DESC ), and that string is referenced in the addNewProperty method and in the javadoc description for LAUNCHER . This rather verbose option IS NOT necissary, but it allows the description to be viewed through the api AND through javadocs, and IDE's; this is appropriate if you expect other classes to use the properties defined in your module. The descriptions for properties should be brief. Additional details such as interactions between properties or the effects of different values should be part of the getDetails() method. It should always be clear to a user what will happen if the value is \"null\". If there is a logical default for the property, that can passed as an additional argument to addNewProperty() . This value will only be used if there is no value given for the property in the config file (including any defaultProps layers and standard.properties). If your module uses any general properties (beyond any uses by the the super class), then you should register it in the module's constructor using the addGeneralProperty() method. public QiimeClosedRefClassifier() { super(); addGeneralProperty( Constants.EXE_AWK ); } The existing description and type for this property (defined in biolockj.Properties) will be returned if the module is queried about this property. For a list of general properties, run: biolockj_api listProps Finally, to very polished, you should override the isValidProp() method. Be sure to include the call to super. @Override public Boolean isValidProp( String property ) throws Exception { Boolean isValid = super.isValidProp( property ); switch(property) { case HN2_KEEP_UNINTEGRATED: try {Config.getBoolean( this, HN2_KEEP_UNINTEGRATED );} catch(Exception e) { isValid = false; } isValid = true; break; case HN2_KEEP_UNMAPPED: try {Config.getBoolean( this, HN2_KEEP_UNMAPPED );} catch(Exception e) { isValid = false; } isValid = true; break; } return isValid; } In the example above, the Humann2Parser module uses two properties that are not used by any super class. The call to super.isValidProp( property ) tests the property if it is used by a super class. This class only adds checks for its newly defined properties. Any property that is not tested, but is registered in the modules constructor will return true. This method is called through the API, and should be used to test one property at a time as if that is the only property in the config file. Tests to make sure that multiple properties are compatiable with each other should go in the checkDependencies() method.","title":"Documenting Properties"},{"location":"Building-Modules/#generate-user-guide-pages","text":"For modules in the main BioLockJ project, the user guide pages are generated using the ApiModule methods as part of the deploy process. Third party developers can use the same utilities to create matching documentation. Suppose you have created one or more modules in a package com.joesCode and saved the compiled code in a jar file, /Users/joe/dev/JoesMods.jar . Set up a mkdocs project: # See https://www.mkdocs.org/#installation pip install mkdocs mkdocs --version mkdocs new joes-modules mkdir joes-modules/docs/GENERATED This mkdocs project will render markdown (.md) files into an html site. Mkdocs supports a lot of really nice features, including a very nice default template. Generate the .md files from your modules: java -cp $BLJ/dist/BioLockJ.jar:/Users/joe/dev/JoesMods.jar \\ biolockj.api.BuildDocs \\ joes-modules/docs/GENERATED \\ com.joesCode Put a link to your list of modules in the main index page. cd joes-modules echo \"[view module list](GENERATED/all-modules.md)\" >> docs/index.md The BuildDocs utility creates the .md files, but it assumes that these are part of a larger project, and you will need to make appropriate links to the generated pages from your main page. Preview your user guide: mkdocs serve Open up http://127.0.0.1:8000/ in your browser, and you'll see the default home page being displayed, with a link at the bottom to view module list , which links to a page listing all of the modules in the joes.modules pacakge. You can build this documentation locally using mkdocs build and then push to your prefered hosting site, or set up a service such as ReadTheDocs to render and host your documentation from your docs folder. Even if you choose not to build user guide pages for your module, you should still implement the ApiModule interface. Anyone who uses your module can generate the user guide pages if they want them, and even incorporate them into a custom copy of the main BioLockJ user guide. Any other support program, such as a GUI, could make use the the ApiModule methods as well.","title":"Generate user guide pages"},{"location":"Building-Modules/#using-external-modules","text":"To use a module that you have created yourself or aquired from a third party, you need to: Save the compiled code in a folder on your machine, for example: /Users/joe/biolockjModules/JoesMods.jar Include your module in the module run order in your config file, for example: #BioModule com.joesCode.biolockj.RunTool Be sure to include any properties your module needs in the config file. Use the --external-modules <dir> option when you call biolockj: biolockj --external-modules /Users/joe/biolockjModules myPipeline.properties Any other modules you have made or aquired can also be in the /Users/joe/biolockjModules folder.","title":"Using External Modules"},{"location":"Building-Modules/#finding-and-sharing-modules","text":"The official repository for external BioLockJ modules is blj_ext_modules . Each module has a folder at the top level of the repository and should include the java code as well a config file to test the module alone, a test file to run a multi-module pipeline that includes the module, and (where applicable) a dockerfile. This is work in progress.","title":"Finding and Sharing Modules"},{"location":"Built-in-modules/","text":"BioModules # Some modules are packaged with BioLockJ (see below). To use modules created by a third-party, add the compiled files (jar file) to your biolockj extentions folder. When you call biolockj , use the --external-modules arg to pass in the location of the extra modules: biolockj --external-modules </path/to/extentions/folder> <config.properties> To create your own modules, see Building-Modules . In all cases, add modules to your BioModule order section to include them in your pipeline. Built-in BioModules: # classifiers # r16s classifiers wgs classifiers implicit modules # implicit parsers module.implicit.parser.r16s.md module.implicit.parser.wgs.md implicit qiime modules report modules # humann2 report by otu report by taxon R reports taxa table modules # BuildTaxaTables AddPseudoCount NormalizeTaxaTables NormalizeByReadsPerMillion LogTransformTaxaTables AddMetadataToTaxaTables sequence modules # BioLockJ comes packaged with several modules for sequence pre-processing. AwkFastaConverter Gunzipper KneadData Multiplexer PearMergeReads RarefySeqs SeqFileValidator TrimPrimers DIY modules # GenMod Rmarkdown List All # See generated docs for all modules .","title":"BioModules"},{"location":"Built-in-modules/#biomodules","text":"Some modules are packaged with BioLockJ (see below). To use modules created by a third-party, add the compiled files (jar file) to your biolockj extentions folder. When you call biolockj , use the --external-modules arg to pass in the location of the extra modules: biolockj --external-modules </path/to/extentions/folder> <config.properties> To create your own modules, see Building-Modules . In all cases, add modules to your BioModule order section to include them in your pipeline.","title":"BioModules"},{"location":"Built-in-modules/#built-in-biomodules","text":"","title":"Built-in BioModules:"},{"location":"Built-in-modules/#classifiers","text":"r16s classifiers wgs classifiers","title":"classifiers"},{"location":"Built-in-modules/#implicit-modules","text":"implicit parsers module.implicit.parser.r16s.md module.implicit.parser.wgs.md implicit qiime modules","title":"implicit modules"},{"location":"Built-in-modules/#report-modules","text":"humann2 report by otu report by taxon R reports","title":"report modules"},{"location":"Built-in-modules/#taxa-table-modules","text":"BuildTaxaTables AddPseudoCount NormalizeTaxaTables NormalizeByReadsPerMillion LogTransformTaxaTables AddMetadataToTaxaTables","title":"taxa table modules"},{"location":"Built-in-modules/#sequence-modules","text":"BioLockJ comes packaged with several modules for sequence pre-processing. AwkFastaConverter Gunzipper KneadData Multiplexer PearMergeReads RarefySeqs SeqFileValidator TrimPrimers","title":"sequence modules"},{"location":"Built-in-modules/#diy-modules","text":"GenMod Rmarkdown","title":"DIY modules"},{"location":"Built-in-modules/#list-all","text":"See generated docs for all modules .","title":"List All"},{"location":"Check-Dependencies/","text":"BioLockJ is designed find all problems in one sitting. Every module includes a check dependencies method, which quickly detects issues that would cause an error during execution. This is run for all modules in a pipeline before the first module executes. When BioLockJ runs, it has three major phases: pipeline formation - string together the modues specified in the config file along with any additional modules that the program adds on the users behalf; and initiate the utilities needed for the pipeline (such as docker, metadata, determine input type). check dependencies - scan the pipeline for anything that may cause an error during execution run pipeline - execute each module in the sequence. Precheck a pipeline # By including the --precheck-only argument (or -p ) when running biolockj ; you are running in precheck mode. BioLockJ will do the first two phases, and then stop. This allows you to quickly test changes to your pipeline configuration without actually running a pipeline. It also allows you to see any modules that are automatically added to your pipeline.","title":"Check Dependencies"},{"location":"Check-Dependencies/#precheck-a-pipeline","text":"By including the --precheck-only argument (or -p ) when running biolockj ; you are running in precheck mode. BioLockJ will do the first two phases, and then stop. This allows you to quickly test changes to your pipeline configuration without actually running a pipeline. It also allows you to see any modules that are automatically added to your pipeline.","title":"Precheck a pipeline"},{"location":"Commands/","text":"The BioLockJ program is launched through the biolockj script. See biolockj --help . Support programs can access information about BioLockJ modules and properties through biolockj-api . There are also several helper scripts for small specific tasks, these are all found under $BLJ/script and added to the $PATH after the basic installation: Bash Commands # Command Description last-pipeline Get the path to the most recent pipeline. ideal for: cd $(last-pipeline) ls `last-pipeline` cd-blj Go to most recent pipeline & list contents. This is not a script, it is an alias that is added to your bash profile by the install script. The line defining it should look like: alias cd-blj='cd $(last-pipeline); quick_pipeline_view' quick_pipeline_view essentially just pwd and ls ; designed for the cd-blj alias. blj_reset Reset pipeline status to incomplete. If restarted, execution will start with the current module. Deprecated Commands # Command Description (Replacement) blj_log Tail last 1K lines from current or most recent pipeline log file. Replacement : cd $(last-pipeline); tail -1000 *.log blj_summary Print current or most recent pipeline summary. Replacement : cd $(last-pipeline); cat summary.txt blj_complete Manually completes the current module and pipeline status. This functionality should never be needed. For the rare occasions when it is appropriate, it can be done manually. Replacement : touch biolockjComplete blj_reset Reset pipeline status to incomplete. If restarted, execution will start with the current module. The need for this functionality is common; and a bash wrapper script still exists. Alternative : java -cp ${BLJ}/dist/BioLockJ.jar biolockj.launch.Reset ${PWD} blj_download If on cluster, extract and print the command syntax from the summary.txt file to download pipeline results to your local workstation directory: pipeline.downloadDir . no replacement : You will need to review your pipelines summary file to find the download command.","title":"Commands"},{"location":"Commands/#bash-commands","text":"Command Description last-pipeline Get the path to the most recent pipeline. ideal for: cd $(last-pipeline) ls `last-pipeline` cd-blj Go to most recent pipeline & list contents. This is not a script, it is an alias that is added to your bash profile by the install script. The line defining it should look like: alias cd-blj='cd $(last-pipeline); quick_pipeline_view' quick_pipeline_view essentially just pwd and ls ; designed for the cd-blj alias. blj_reset Reset pipeline status to incomplete. If restarted, execution will start with the current module.","title":"Bash Commands"},{"location":"Commands/#deprecated-commands","text":"Command Description (Replacement) blj_log Tail last 1K lines from current or most recent pipeline log file. Replacement : cd $(last-pipeline); tail -1000 *.log blj_summary Print current or most recent pipeline summary. Replacement : cd $(last-pipeline); cat summary.txt blj_complete Manually completes the current module and pipeline status. This functionality should never be needed. For the rare occasions when it is appropriate, it can be done manually. Replacement : touch biolockjComplete blj_reset Reset pipeline status to incomplete. If restarted, execution will start with the current module. The need for this functionality is common; and a bash wrapper script still exists. Alternative : java -cp ${BLJ}/dist/BioLockJ.jar biolockj.launch.Reset ${PWD} blj_download If on cluster, extract and print the command syntax from the summary.txt file to download pipeline results to your local workstation directory: pipeline.downloadDir . no replacement : You will need to review your pipelines summary file to find the download command.","title":"Deprecated Commands"},{"location":"Configuration/","text":"A configuration file encapsulates an analysis pipeline. BioLockJ takes a single configuration file as a runtime parameter. biolockj config.properties Every line in a BioLockJ configuration file is one of: BioModule (line starts with #BioModule ) comment (all other lines that start with # , has no effect) property ( name=value ) BioModule execution order # To include a BioModule in your pipeline, add a #BioModule line to the top your configuration file, as shown in the examples found in templates . Each line has the #BioModule keyword followed by the path for that module. For example: #BioModule biolockj.module.seq.PearMergeReads #BioModule biolockj.module.classifier.wgs.Kraken2Classifier #BioModule biolockj.module.report.r.R_PlotMds This line is given at the top of the user guide page for each module. BioModules will be executed in the order they are listed in here. A typical pipeline contians one classifier module . Any number of sequence pre-processing modules may come before the classifier module. Any number of report modules may come after the classifier module. In addition to the BioModules specified in the configuration file, BioLockJ may add implicit modules that the are required by specified modules. See Example Pipeline . A module can be given an alias by using the AS keyword in its execution line: #BioModule biolockj.module.seq.PearMergeReads AS Pear This is is generally used for modules that are used more than once in the same pipeline. Given this alias, the folder for this module will be called 01_Pear instead of 01_PearMergeReads , and any general properties directed to this module would use the prefix Pear instead of PearMergedReads . An alias must start with a capital letter, and cannot duplicate a name/alias of any other module in the same pipeline. Properties # Properties are defined as name-value pairs. List-values are comma separated. Leading and trailing whitespace is removed so \"propName=x,y\" is equivalent to \"propName = x, y\". See the list of available properties . Variables # Bash variables can be referenced in the config. They must be \"fully dressed\": ${VAR} There are two variables that BioLockJ requires: BLJ is the file path to the BioLockJ directory and BLJ_PROJ is the directory where pipelines created by BioLockJ are stored and run. After installation these are defined in the shell profile. These can referenced in the config file. The ~ (\"tilde\") is replaced with ${HOME} if (and only if) the ~ is the first character. Variables can also be defined in the config file and referenced in the same way: DIR=/path/to/big/data/dir sra.destinationDir=${DIR}/seqs sra.sraAccList=${DIR}/SraAccList.txt input.dirPaths=${DIR}/seqs Variables that are defined in the config file, can be referenced within the config file, however these variables are not added to the module script environment. If you are referencing environment variables and running in docker, you will need to use the -e parameter to biolockj to pass the variables into the docker environment (even if the variable is defined in the config file). For example: biolockj --docker -e SHEP=$SHEP,DIR=/path/to/big/data/dir config.properties Most environement variables will NOT be part of the module script environment. However, any environment variable that is referenced in the configuration file is considered necissary for the pipeline, and it is passed into the main program environment, docker containers, module runtime enviroments. Environment variables are not the best way to get information to a script because they can be difficult to trace / troubleshoot. However if your script or tool requires a particular environment variable, you can define it in your local environment, and reference it in the config file using an arbitrary property name, for example: my.variable=${QIIME_CONFIG_FP} This has essentially the same effect as using the -e QIIME_CONFIG_FP=$QIIME_CONFIG_FP argument in the biolockj command. If this variable is required this is one way to communicate that the value of QIIME_CONFIG_FP may change from one system to the next, but the existence of QIIME_CONFIG_FP is essential for the pipeline to run. Relative file paths # File paths can be given using relative paths. The path should start with ./ . The location . is interpreted as being the directory where the primary configuration file is. Example file structure: /users/joe/analysis01/ config.properties metadata.txt /sra/ SraAccList.txt Properties in config.properties can use relative paths: metadata.filePath=./metadata.txt sra.sraAccList=./sra/SraAccList.txt Note: ../ is also supported but it does not stack ( ../../../data/ is not supported). With this design, the \"analysis01\" folder could be shared or moved and the configuration file would not need to be updated to reflect the new location of the project files it references. Special properties # Some properties invoke special handling. pipeline.defaultProps # pipeline.defaultProps is a handled before any other property. It is used to link another properties file. The properties from that file are added to the MASTER set. The pipeline.defaultProps property itself is not included in the MASTER properties set. Module-specific forms # Many pipeline properties (usually those used by pipeline utilities) can be directed to a specific module. For example, script.numThreads is a general property that specifies that number of threads alloted to each script launched by any module; and PearMergeReads.numThreads overrides that property ONLY for the PearMergeReads module. exe.* properties # exe. properties are used to specify the path to common executables. exe. properties are special in that they have the automatic default of returning the the property name minus the exe. -prefex as their value. Modules are sometimes written to use a common tool, such as Rscript or bowtie . These modules will write scripts with the assumption that this command is on the $PATH when the script is executed UNLESS exe.Rscript is given specifying a path to use. The exe. properties are often specified in a defaultProps file for a given environment rather than in individual project properties files. Most often, docker containers are used because of the executables baked into them, and any exe. configurations are only applicable when not runnig in docker. In a pipeline running in docker, all references to an exe. property will return the default value (by removing the exe. prefix), regardless of how the exe. property is configured. In the rare case where you do need to give the path to executable within a container, you can specify this by using the prefix dockerExe. in place of exe. . In the even rarer case where you want to use an executable from your local machine, while running a pipeline in docker, you can specify this by using the prefix hostExe. in place of exe. . Chaining configuration files # Although all properties can be configured in one file, we recommend chaining default files through the pipeline.defaultProps option. This can often improve the portability, maintainability, and readability of the project-specific configuration files. Standard Properties # BioLockJ will always apply the standard.properties file packaged with BioLockJ under resources/config/default/ ; you do not need to specify this file in your pipeline.defaultProps chain. IFF running a pipeline in docker, then BioLockJ will apply the docker.properties file packaged with BioLockJ under resources/config/default/ . User-specified Defaults # We recommend creating an environment.properties file to assign envionment-specific defaults. Set cluster & script properties Set paths to key executables through exe properties Override standard.properties as needed. This information is the same for many (or all) projects run in this environment, and entering the info anew for each project is tedious, time-consuming and error-prone. If using a shared system, consider using a user.properties file. Set user-specific properties such as download.dir and mail.to. For shared projects, use a path that will be updated per-user, such as ~/biolock_user.properties Other logical intermediates my also present themselves. For example, some group of projects may need to override several of the defaults set in environmment.properties, but others still use the those defaults. Projects in this set can use pipeline.defaultProps=group2.properties and the group2.properties files may include pipeline.defaultProps=environment.properties Project Properties # Create a new configuration file for each pipeline to assign project-specific properties: Set the BioModule execution order Set pipeline.defaultProps = environment.properties You may use multiple default config files: pipeline.defaultProps=environment.properties,groupSettings.properties Override environment.properties and standard.properties as needed Example project configuration files can be found in templates . If the same property is given in multiple config files, the highest priority goes to the file used to launch the pipeline. Standard.properties always has the lowest priority. A copy of each configuration file is stored in the pipeline root directory to serve as primary project documentation.","title":"Configuration"},{"location":"Configuration/#biomodule-execution-order","text":"To include a BioModule in your pipeline, add a #BioModule line to the top your configuration file, as shown in the examples found in templates . Each line has the #BioModule keyword followed by the path for that module. For example: #BioModule biolockj.module.seq.PearMergeReads #BioModule biolockj.module.classifier.wgs.Kraken2Classifier #BioModule biolockj.module.report.r.R_PlotMds This line is given at the top of the user guide page for each module. BioModules will be executed in the order they are listed in here. A typical pipeline contians one classifier module . Any number of sequence pre-processing modules may come before the classifier module. Any number of report modules may come after the classifier module. In addition to the BioModules specified in the configuration file, BioLockJ may add implicit modules that the are required by specified modules. See Example Pipeline . A module can be given an alias by using the AS keyword in its execution line: #BioModule biolockj.module.seq.PearMergeReads AS Pear This is is generally used for modules that are used more than once in the same pipeline. Given this alias, the folder for this module will be called 01_Pear instead of 01_PearMergeReads , and any general properties directed to this module would use the prefix Pear instead of PearMergedReads . An alias must start with a capital letter, and cannot duplicate a name/alias of any other module in the same pipeline.","title":"BioModule execution order"},{"location":"Configuration/#properties","text":"Properties are defined as name-value pairs. List-values are comma separated. Leading and trailing whitespace is removed so \"propName=x,y\" is equivalent to \"propName = x, y\". See the list of available properties .","title":"Properties"},{"location":"Configuration/#variables","text":"Bash variables can be referenced in the config. They must be \"fully dressed\": ${VAR} There are two variables that BioLockJ requires: BLJ is the file path to the BioLockJ directory and BLJ_PROJ is the directory where pipelines created by BioLockJ are stored and run. After installation these are defined in the shell profile. These can referenced in the config file. The ~ (\"tilde\") is replaced with ${HOME} if (and only if) the ~ is the first character. Variables can also be defined in the config file and referenced in the same way: DIR=/path/to/big/data/dir sra.destinationDir=${DIR}/seqs sra.sraAccList=${DIR}/SraAccList.txt input.dirPaths=${DIR}/seqs Variables that are defined in the config file, can be referenced within the config file, however these variables are not added to the module script environment. If you are referencing environment variables and running in docker, you will need to use the -e parameter to biolockj to pass the variables into the docker environment (even if the variable is defined in the config file). For example: biolockj --docker -e SHEP=$SHEP,DIR=/path/to/big/data/dir config.properties Most environement variables will NOT be part of the module script environment. However, any environment variable that is referenced in the configuration file is considered necissary for the pipeline, and it is passed into the main program environment, docker containers, module runtime enviroments. Environment variables are not the best way to get information to a script because they can be difficult to trace / troubleshoot. However if your script or tool requires a particular environment variable, you can define it in your local environment, and reference it in the config file using an arbitrary property name, for example: my.variable=${QIIME_CONFIG_FP} This has essentially the same effect as using the -e QIIME_CONFIG_FP=$QIIME_CONFIG_FP argument in the biolockj command. If this variable is required this is one way to communicate that the value of QIIME_CONFIG_FP may change from one system to the next, but the existence of QIIME_CONFIG_FP is essential for the pipeline to run.","title":"Variables"},{"location":"Configuration/#relative-file-paths","text":"File paths can be given using relative paths. The path should start with ./ . The location . is interpreted as being the directory where the primary configuration file is. Example file structure: /users/joe/analysis01/ config.properties metadata.txt /sra/ SraAccList.txt Properties in config.properties can use relative paths: metadata.filePath=./metadata.txt sra.sraAccList=./sra/SraAccList.txt Note: ../ is also supported but it does not stack ( ../../../data/ is not supported). With this design, the \"analysis01\" folder could be shared or moved and the configuration file would not need to be updated to reflect the new location of the project files it references.","title":"Relative file paths"},{"location":"Configuration/#special-properties","text":"Some properties invoke special handling.","title":"Special properties"},{"location":"Configuration/#pipelinedefaultprops","text":"pipeline.defaultProps is a handled before any other property. It is used to link another properties file. The properties from that file are added to the MASTER set. The pipeline.defaultProps property itself is not included in the MASTER properties set.","title":"pipeline.defaultProps"},{"location":"Configuration/#module-specific-forms","text":"Many pipeline properties (usually those used by pipeline utilities) can be directed to a specific module. For example, script.numThreads is a general property that specifies that number of threads alloted to each script launched by any module; and PearMergeReads.numThreads overrides that property ONLY for the PearMergeReads module.","title":"Module-specific forms"},{"location":"Configuration/#exe-properties","text":"exe. properties are used to specify the path to common executables. exe. properties are special in that they have the automatic default of returning the the property name minus the exe. -prefex as their value. Modules are sometimes written to use a common tool, such as Rscript or bowtie . These modules will write scripts with the assumption that this command is on the $PATH when the script is executed UNLESS exe.Rscript is given specifying a path to use. The exe. properties are often specified in a defaultProps file for a given environment rather than in individual project properties files. Most often, docker containers are used because of the executables baked into them, and any exe. configurations are only applicable when not runnig in docker. In a pipeline running in docker, all references to an exe. property will return the default value (by removing the exe. prefix), regardless of how the exe. property is configured. In the rare case where you do need to give the path to executable within a container, you can specify this by using the prefix dockerExe. in place of exe. . In the even rarer case where you want to use an executable from your local machine, while running a pipeline in docker, you can specify this by using the prefix hostExe. in place of exe. .","title":"exe.* properties"},{"location":"Configuration/#chaining-configuration-files","text":"Although all properties can be configured in one file, we recommend chaining default files through the pipeline.defaultProps option. This can often improve the portability, maintainability, and readability of the project-specific configuration files.","title":"Chaining configuration files"},{"location":"Configuration/#standard-properties","text":"BioLockJ will always apply the standard.properties file packaged with BioLockJ under resources/config/default/ ; you do not need to specify this file in your pipeline.defaultProps chain. IFF running a pipeline in docker, then BioLockJ will apply the docker.properties file packaged with BioLockJ under resources/config/default/ .","title":"Standard Properties"},{"location":"Configuration/#user-specified-defaults","text":"We recommend creating an environment.properties file to assign envionment-specific defaults. Set cluster & script properties Set paths to key executables through exe properties Override standard.properties as needed. This information is the same for many (or all) projects run in this environment, and entering the info anew for each project is tedious, time-consuming and error-prone. If using a shared system, consider using a user.properties file. Set user-specific properties such as download.dir and mail.to. For shared projects, use a path that will be updated per-user, such as ~/biolock_user.properties Other logical intermediates my also present themselves. For example, some group of projects may need to override several of the defaults set in environmment.properties, but others still use the those defaults. Projects in this set can use pipeline.defaultProps=group2.properties and the group2.properties files may include pipeline.defaultProps=environment.properties","title":"User-specified Defaults"},{"location":"Configuration/#project-properties","text":"Create a new configuration file for each pipeline to assign project-specific properties: Set the BioModule execution order Set pipeline.defaultProps = environment.properties You may use multiple default config files: pipeline.defaultProps=environment.properties,groupSettings.properties Override environment.properties and standard.properties as needed Example project configuration files can be found in templates . If the same property is given in multiple config files, the highest priority goes to the file used to launch the pipeline. Standard.properties always has the lowest priority. A copy of each configuration file is stored in the pipeline root directory to serve as primary project documentation.","title":"Project Properties"},{"location":"Dependencies/","text":"BioLockJ requires Java 1.8+ and a Unix-like operating system such as Darwin/macOS , see Notes about environments . BioLockJ is a pipeline manager, designed to integrate and manage external tools. These external tools are not packaged into the BioLockJ program. BioLockJ must run in an environment where these other tools have been installed OR run through docker using docker images that have the tools installed. The core program, and all modules packaged with it, have corresponding docker images. Dependencies are required by modules listed in the BioModule Function column. Users DO NOT NEED TO INSTALL dependencies if not interested in the listed modules. For example, if you intend to classify 16S samples with RDP and WGS samples with Kraken, do not install: Bowtie2, GNU Awk, GNU Gzip, MetaPhlAn2, Python, QIIME 1, or Vsearch. # Program Version BioModule Function Link 1 Bowtie2 2.3.2 Metaphlan2Classifier : Build reference indexes download 2 GNU Awk 4.0.2 AwkFastaConverter : Convert Fastq to Fasta BuildQiimeMapping : Format metadata as QIIME mapping QiimeClosedRefClassifier : Build batch mapping files download 3 GNU Gzip 1.5 AwkFastaConverter : Decompress .gz files Gunzipper : Decompress .gz files download 4 Kraken 0.10.5-beta KrakenClassifier : Report WGS taxonomic summary download 5 MetaPhlAn2 2.0 Metaphlan2Classifier : Report WGS taxonomic summary (WGS) download 6 Python 2.7.12 BuildQiimeMapping : Run validate_mapping_file.py MergeQiimeOtuTables : Run merge_otu_tables.py QiimeClosedRefClassifier : Run pick_closed_reference_otus.py QiimeDeNovoClassifier : Run pick_de_novo_otus.py QiimeOpenRefClassifier : Run pick_open_reference_otus.py QiimeClassifier : Run add_alpha_to_mapping_file.py, add_qiime_labels.py, alpha_diversity.py, filter_otus_from_otu_table.py, print_qiime_config.py, and summarize_taxa.py Metaphlan2Classifier : Run metaphlan2.py download 7 PEAR 0.9.8 Paired-End reAd merger PearMergeReads Merge paired Fastq files since some classifiers ( RDP & QIIME ) will not accept paired reads. download 8 QIIME 1 1.9.1 Quantitative Insights Into Microbial Ecology BuildQiimeMapping : Validate QIIME mapping MergeQiimeOtuTables : Merge otu_table.biom files QiimeClosedRefClassifier : Pick OTUs by reference QiimeDeNovoClassifier : Pick OTUs by clustering QiimeOpenRefClassifier : Pick OTUs by reference and clustering QiimeClassifier : Report 16S taxonomic summary download 9 R 3.5.0 R_CalculateStats : Statistical modeling R_PlotPvalHistograms : Plot p-value histograms for each reportable metadata field R_PlotOtus : Build OTU-metadata boxplots and scatterplots R_PlotMds : Plot by top MDS axis R_PlotEffectSize : Build barplot of effect magnetude by OTU/taxa download 10 R-coin 1.2 COnditional Inference procedures in a permutatioN test framework R_CalculateStats : Compute exact Wilcox_test p-values download 11 R-ggpubr 0.1.8 R_PlotPvalHistograms : Set color palette R_PlotMds : Set color palette R_PlotEffectSize : Set color palette download 12 R-Kendall 2.2 R_CalculateStats : Compute rank correlation p-values for continuous data types download 13 R-properties 0.0-9 R_Module : Reads in the MASTER configuration properties file from the pipeline root directory download 14 R-stringr 1.2.0 R_Module : For string manipulation for handling Configuration properties download 15 R-vegan 2.5-2 R_PlotMds : Ordination methods, diversity analysis and other functions for ecologists. download 16 RDP 2.12 Ribosomal Database Project RdpClassifier : Report 16S taxonomic summary download 17 Vsearch 2.4.3 QiimeDeNovoClassifier : Chimera detection QiimeOpenRefClassifier : Chimera detection download Version Dependencies # The Version column contains the version tested during BioLockJ development, but other versions can often be substituted. Major releases (such as Python 2 vs. Python 3) contain API changes that will not integrate with the current BioLockJ code. Application APIs often change over time, so not all versions are supported. For example, Bowtie2 did not add the large index functionality until version 2.3.2.","title":"Dependencies"},{"location":"Dependencies/#version-dependencies","text":"The Version column contains the version tested during BioLockJ development, but other versions can often be substituted. Major releases (such as Python 2 vs. Python 3) contain API changes that will not integrate with the current BioLockJ code. Application APIs often change over time, so not all versions are supported. For example, Bowtie2 did not add the large index functionality until version 2.3.2.","title":"Version Dependencies"},{"location":"DevNotes-main/","text":"BioLockJ Developers Guide # Release process # Release process Javadocs # https://BioLockJ-Dev-Team.github.io/BioLockJ/javadocs/ Guidelines for new modules # Building Modules","title":"BioLockJ Developers Guide"},{"location":"DevNotes-main/#biolockj-developers-guide","text":"","title":"BioLockJ Developers Guide"},{"location":"DevNotes-main/#release-process","text":"Release process","title":"Release process"},{"location":"DevNotes-main/#javadocs","text":"https://BioLockJ-Dev-Team.github.io/BioLockJ/javadocs/","title":"Javadocs"},{"location":"DevNotes-main/#guidelines-for-new-modules","text":"Building Modules","title":"Guidelines for new modules"},{"location":"DevNotes-releaseProcess/","text":"Release process # The release process must be performed by someone with write permission on the main BioLockJ repository. Since that repository is owned by a GitHub group, anyone with owner permission in the group can perform the steps. Merge any pull requests that should be included in the release. Edit the version file to show the release version (ie, remove the \"-dev\" suffix) Render all documentation: cd $BLJ/resources; ant userguide Commit these changes, often with with message \"version++ to vx.y.z; render docs\" Tag the current master with the tag \"v.x.y.z-rc\" (\"release candidate\") Run release tests ( see details below ) Tag the current main branch of the BioLockJ repository with the official release tag. After saving the results of tests, use the same tag for the sheepdog_testing_suite main branch. Push the commits and tags to the central main: git push --tags upstream Build the distribution tarball ( see details below ) In GitHub, go to tags, select the new release tag, edit it, and upload the tarball you just created. Trigger DockerHub builds by pushing to linked github repository ( see details below ) Set new dev version Use next patch release (even if the next release is expected to be major). After release v1.3.14, set the version file to say \"v1.3.15-dev\". Commit this with message \"Dev continues toward v1.3.15\". Review : Use the link to the latest release on the Getting-Started page, and make sure the release appears correct. Make sure the user guide link(s) in the top repo README both reflect the latest release The view through github.io is controlled under the Settings for the BioLockJ repository. The view through readthedocs is controlled by the biolockj project which has multiple admins. Look for failed docker builds . The auto builds are configured through the biolockjdevteam organization on DockerHub, which as of late 2020 is a paid account, and has multiple admins, Running release tests # Use the tools in the repository: BioLockJ_Dev_Team/sheepdog_testing_suite. The tools in this suite will automatically build the BioLockJ program from source, but they will not build the updated docker image. Many tests run in docker use the --blj arg so that the current BioLockJ folder is mapped in, so there is no need to update the image to test a local copy of BioLockJ. For individual modules, the corresponding docker image probably hasn't changed since the last version, so you can save a bit of time during testing by simply re-tagging the old images with the new version: $BLJ/resources/docker/docker_build_scripts/retagForLocalTests.sh v1.3.15 v1.3.16 Any image whose dockerfile was changed should be built. And the biolockj_controller should be built (since presumably that has changed since the last version). To build all images, use the buildDockerImages.sh script with no args. With one arg, any image matching that string will be built. $BLJ/resources/docker/docker_build_scripts/buildDockerImages.sh controller The sheepdog_testing_suite has further instructions for setting up the tests. Use the main branch, and tag it with the same release candidate tag used for the BioLockJ repository. Run each of the /test/run_*_testCollection.sh scripts in the corresponding environment. Save results file under archived_testCollection_results (see existing examples for which files to save) (recommended) Locally save the pipelines for all tests for later reference. But DO NOT commit these in either repository. If tests fail (that previously passed), reconsider the release. Make and commit quick fixes if that is feasible. Assuming tests pass, proceed with release process. Building for deployment # Best practice for packaging the official release is to download a fresh copy of the official repo, and build within a docker image. The fresh clone ensures that git-ignored files that are in the local repo copy are incorporated in the official deployment. Using the docker image promotes consistency, and reduces the chances of invisible dependencies. (Not to mention its downright convenient!) git clone https://github.com/BioLockJ-Dev-Team/BioLockJ.git cd BioLockJ docker run --rm -v $PWD:/biolockj biolockjdevteam/build_and_deploy If needed, the git clone command could be replaced with wget https://github.com/BioLockJ-Dev-Team/BioLockJ/archive/main.zip , or any other download command. Triggering docker builds # BioLockJ docker images, most importantly biolockj_controller, are hosted on docker hub under the organization \"biolockjdevteam\". The images for modules in that are packaged with the main program, and the image for the BioLockJ program itself, are set up to build on docker hub infrastructure automatically. For the modules, this typically creates an identical image, and gives it a new tag matching the current release version. This automated build is triggered when a tag matching our version format (ie v1.2.3) is pushed to the linked github repository. As of this writting, dockerhub and github have a nice integration, but it does not allow for linking to a repository owned by an organization (like our BioLockJ repository is owned by the biolockj_dev_team organization). So we have a separate fork of the repository that exists solely to trigger builds on dockerhub. The bot user is \"biolockjBuilder\". In order to push to this repo, you will need permission. Any new user who will do the release process will need to be added as a collaborator to that repository. (first time only) Set up the biolockjBuilder fork as a remote for you BioLockJ git repository: git remote add DockerBuilder https://github.com/biolockjBuilder/BioLockJ.git Push the release tag to this repository. git push DockerBuilder --tags Within a few minutes there should be builds scheduled on DockerHub for the auto-build repositories. They may take some time to actually build. After a few hours, check the repositories to see that new builds exist and that no builds failed. Failed docker builds # Sometimes there are random failures (maybe a website was down temporarily) and you will need to build the image locally and push it with the desired tag. If the build fails for the biolockj_controller image, that is a big problem and you need to figure out why. If the build fails for one of the modules, that usually means that a url in the dockerfile needs to be updated. In some cases, some dependency is no longer available (no longer hosted). In that case, pull the previous version of the image, retag it with the current tag and push to dockerhub. Make in issue to resolve the problem before the next release. If the dockerfile can be updated to create a functional image to run the module, great, do that. If that is not possible, then the most recent image is the image, and the module's docker tag method should no longer use the current biolockj version, but should instead by hard-coded to the most recent version. Turn off auto-builds for that image. This is probably a red-flag that the software is no longer supported, and the module will (eventually) need to be replaced.","title":"Release process"},{"location":"DevNotes-releaseProcess/#release-process","text":"The release process must be performed by someone with write permission on the main BioLockJ repository. Since that repository is owned by a GitHub group, anyone with owner permission in the group can perform the steps. Merge any pull requests that should be included in the release. Edit the version file to show the release version (ie, remove the \"-dev\" suffix) Render all documentation: cd $BLJ/resources; ant userguide Commit these changes, often with with message \"version++ to vx.y.z; render docs\" Tag the current master with the tag \"v.x.y.z-rc\" (\"release candidate\") Run release tests ( see details below ) Tag the current main branch of the BioLockJ repository with the official release tag. After saving the results of tests, use the same tag for the sheepdog_testing_suite main branch. Push the commits and tags to the central main: git push --tags upstream Build the distribution tarball ( see details below ) In GitHub, go to tags, select the new release tag, edit it, and upload the tarball you just created. Trigger DockerHub builds by pushing to linked github repository ( see details below ) Set new dev version Use next patch release (even if the next release is expected to be major). After release v1.3.14, set the version file to say \"v1.3.15-dev\". Commit this with message \"Dev continues toward v1.3.15\". Review : Use the link to the latest release on the Getting-Started page, and make sure the release appears correct. Make sure the user guide link(s) in the top repo README both reflect the latest release The view through github.io is controlled under the Settings for the BioLockJ repository. The view through readthedocs is controlled by the biolockj project which has multiple admins. Look for failed docker builds . The auto builds are configured through the biolockjdevteam organization on DockerHub, which as of late 2020 is a paid account, and has multiple admins,","title":"Release process"},{"location":"DevNotes-releaseProcess/#running-release-tests","text":"Use the tools in the repository: BioLockJ_Dev_Team/sheepdog_testing_suite. The tools in this suite will automatically build the BioLockJ program from source, but they will not build the updated docker image. Many tests run in docker use the --blj arg so that the current BioLockJ folder is mapped in, so there is no need to update the image to test a local copy of BioLockJ. For individual modules, the corresponding docker image probably hasn't changed since the last version, so you can save a bit of time during testing by simply re-tagging the old images with the new version: $BLJ/resources/docker/docker_build_scripts/retagForLocalTests.sh v1.3.15 v1.3.16 Any image whose dockerfile was changed should be built. And the biolockj_controller should be built (since presumably that has changed since the last version). To build all images, use the buildDockerImages.sh script with no args. With one arg, any image matching that string will be built. $BLJ/resources/docker/docker_build_scripts/buildDockerImages.sh controller The sheepdog_testing_suite has further instructions for setting up the tests. Use the main branch, and tag it with the same release candidate tag used for the BioLockJ repository. Run each of the /test/run_*_testCollection.sh scripts in the corresponding environment. Save results file under archived_testCollection_results (see existing examples for which files to save) (recommended) Locally save the pipelines for all tests for later reference. But DO NOT commit these in either repository. If tests fail (that previously passed), reconsider the release. Make and commit quick fixes if that is feasible. Assuming tests pass, proceed with release process.","title":"Running release tests"},{"location":"DevNotes-releaseProcess/#building-for-deployment","text":"Best practice for packaging the official release is to download a fresh copy of the official repo, and build within a docker image. The fresh clone ensures that git-ignored files that are in the local repo copy are incorporated in the official deployment. Using the docker image promotes consistency, and reduces the chances of invisible dependencies. (Not to mention its downright convenient!) git clone https://github.com/BioLockJ-Dev-Team/BioLockJ.git cd BioLockJ docker run --rm -v $PWD:/biolockj biolockjdevteam/build_and_deploy If needed, the git clone command could be replaced with wget https://github.com/BioLockJ-Dev-Team/BioLockJ/archive/main.zip , or any other download command.","title":"Building for deployment"},{"location":"DevNotes-releaseProcess/#triggering-docker-builds","text":"BioLockJ docker images, most importantly biolockj_controller, are hosted on docker hub under the organization \"biolockjdevteam\". The images for modules in that are packaged with the main program, and the image for the BioLockJ program itself, are set up to build on docker hub infrastructure automatically. For the modules, this typically creates an identical image, and gives it a new tag matching the current release version. This automated build is triggered when a tag matching our version format (ie v1.2.3) is pushed to the linked github repository. As of this writting, dockerhub and github have a nice integration, but it does not allow for linking to a repository owned by an organization (like our BioLockJ repository is owned by the biolockj_dev_team organization). So we have a separate fork of the repository that exists solely to trigger builds on dockerhub. The bot user is \"biolockjBuilder\". In order to push to this repo, you will need permission. Any new user who will do the release process will need to be added as a collaborator to that repository. (first time only) Set up the biolockjBuilder fork as a remote for you BioLockJ git repository: git remote add DockerBuilder https://github.com/biolockjBuilder/BioLockJ.git Push the release tag to this repository. git push DockerBuilder --tags Within a few minutes there should be builds scheduled on DockerHub for the auto-build repositories. They may take some time to actually build. After a few hours, check the repositories to see that new builds exist and that no builds failed.","title":"Triggering docker builds"},{"location":"DevNotes-releaseProcess/#failed-docker-builds","text":"Sometimes there are random failures (maybe a website was down temporarily) and you will need to build the image locally and push it with the desired tag. If the build fails for the biolockj_controller image, that is a big problem and you need to figure out why. If the build fails for one of the modules, that usually means that a url in the dockerfile needs to be updated. In some cases, some dependency is no longer available (no longer hosted). In that case, pull the previous version of the image, retag it with the current tag and push to dockerhub. Make in issue to resolve the problem before the next release. If the dockerfile can be updated to create a functional image to run the module, great, do that. If that is not possible, then the most recent image is the image, and the module's docker tag method should no longer use the current biolockj version, but should instead by hard-coded to the most recent version. Turn off auto-builds for that image. This is probably a red-flag that the software is no longer supported, and the module will (eventually) need to be replaced.","title":"Failed docker builds"},{"location":"Example-Pipeline/","text":"In our example analysis, we investigate the differences between the microbiome of 20 rural and 20 recently urbanized subjects from the Chinese province of Hunan. For more information on this dataset, please review the analysis Fodor Lab published in the Sep 2017 issue of the journal Microbiome: https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-017-0338-7 Step 1: Prepare BioLockJ Config File # The BioLockJ project Config chinaKrakenFullDB.properties lists 5 BioModules to run (lines 3-7) + 13 properties: #BioModule biolockj.module.implicit.RegisterNumReads #BioModule biolockj.module.classifier.wgs.KrakenClassifier #BioModule biolockj.module.report.taxa.NormalizeTaxaTables #BioModule biolockj.module.report.r.R_PlotPvalHistograms #BioModule biolockj.module.report.r.R_PlotOtus In addition to the 5 listed BioModules, 4 additional implicit BioModules will also run: Mod# Module Description 1 ImportMetadata Always run 1st (for all pipelines) 2 KrakenParser Always run after KrakenClassifier 3 AddMetadataToOtuTables Always run just before the 1st R module 4 CalculateStats Always run as the 1st R module. Key properties: Line# Property Description 08 cluster.jobHeader Each script will run on 1 node, 16 cores, and 128GB RAM for up to 30 minutes 10 pipeline.defaultProps Default config file defines most properties \u2013 in this case copperhead.properties 12 input.dirPaths Directory path containing 40 gzipped whole genome sequencing (WGS) fastq files 18 metadata.filePath Metadata file path: chinaMetadata.tsv BioLockJ must associate sequence files in input.dirPaths with the correct metadata row. This is done by matching sequence file names to the 1st column in the metadata file. If the Sample ID is not found in your file names, the file names must be updated. Use the following properties to ignore a file prefix or suffix when matching the sample IDs. input.suffixFw input.suffixRv input.trimPrefix input.trimSuffix Sample IDs from 1st column of the metadata file: 081A, 082A, 083A...etc. Sequence file names: 081A_R1.fq.gz, 082A_R1.fq.gz, 083A_R1.fq.gz...etc. The default Config file, copperhead.properties, has its own default Config file standard.properties which defines the property input.suffixFw=_R1 . As a result, all characters starting with (and including) \u201c_R1\u201d are ignored when matching the file name to the metadata sample ID. Step 2: Run BioLockJ Pipeline # > biolockj ~/chinaKrakenFullDB.properties Look in the BioLockJ pipeline output directory defined by $BLJ_PROJ for a new pipeline directory named after the property file + today\u2019s date: ~/projects/chinaKrakenFullDB_2018Apr09 The 5 configured modules have run in order, with the addition of 2 implicit modules (1st and last) which are added to all pipelines automatically. The biolockjComplete file indicates the pipeline ran successfully. Step 3: Review Pipeline Summary # Run the blj_summary command to review the pipeline execution summary. > blj_summary Pipeline Summary Step 4: Download R Reports # Run the blj_download command to get the command needed to download the analysis. > blj_download > rsync Step 5: Analyze R Reports # Open downloadDir on your local filesystem to review the analysis. This directory contains: Output Description /temp Directory where R log files are saved if R script runs locally. /tables Directory containing the OTU tables. /local Directory where R script output is saved if R script runs locally and r.debug=Y . *.RData The saved R sessions for R modules run if r.saveRData=Y . chinaKrakenFullDB.log The pipeline Java log file. MAIN_*.R Each R script for each module that generated reports has been updated to run on your local filesystem. *.tsv files Spreadsheets containing p-value and R^2 statistics for each OTU in the taxonomy level. *.pdf files P-value histograms, and bar-charts or scatterplots for each OTU in the taxonomy level. Each R module generates a report for each report.taxonomyLevel configured: Open chinaKrakenFullDB_Log10_genus.pdf # The report begins with the unadjusted P-Value Distributions: Since r.numHistogramBreaks=20 so the 1st bar represents the p-values < 0.05. The ruralUrban attribute appears significant, as indicated by the high number p-values < 0.05. For each OTU, a bar-chart or scatterplot is output with adjusted parametric and non-parametric p-values formatted using in the plot header. The p-value format is defined by r.pValFormat . The p-adjust method is defined by rStats.pAdjustMethod . P-values that meet the r.pvalCutoff threshold are highlighted with r.colorHighlight .","title":"Example Pipeline"},{"location":"Example-Pipeline/#step-1-prepare-biolockj-config-file","text":"The BioLockJ project Config chinaKrakenFullDB.properties lists 5 BioModules to run (lines 3-7) + 13 properties: #BioModule biolockj.module.implicit.RegisterNumReads #BioModule biolockj.module.classifier.wgs.KrakenClassifier #BioModule biolockj.module.report.taxa.NormalizeTaxaTables #BioModule biolockj.module.report.r.R_PlotPvalHistograms #BioModule biolockj.module.report.r.R_PlotOtus In addition to the 5 listed BioModules, 4 additional implicit BioModules will also run: Mod# Module Description 1 ImportMetadata Always run 1st (for all pipelines) 2 KrakenParser Always run after KrakenClassifier 3 AddMetadataToOtuTables Always run just before the 1st R module 4 CalculateStats Always run as the 1st R module. Key properties: Line# Property Description 08 cluster.jobHeader Each script will run on 1 node, 16 cores, and 128GB RAM for up to 30 minutes 10 pipeline.defaultProps Default config file defines most properties \u2013 in this case copperhead.properties 12 input.dirPaths Directory path containing 40 gzipped whole genome sequencing (WGS) fastq files 18 metadata.filePath Metadata file path: chinaMetadata.tsv BioLockJ must associate sequence files in input.dirPaths with the correct metadata row. This is done by matching sequence file names to the 1st column in the metadata file. If the Sample ID is not found in your file names, the file names must be updated. Use the following properties to ignore a file prefix or suffix when matching the sample IDs. input.suffixFw input.suffixRv input.trimPrefix input.trimSuffix Sample IDs from 1st column of the metadata file: 081A, 082A, 083A...etc. Sequence file names: 081A_R1.fq.gz, 082A_R1.fq.gz, 083A_R1.fq.gz...etc. The default Config file, copperhead.properties, has its own default Config file standard.properties which defines the property input.suffixFw=_R1 . As a result, all characters starting with (and including) \u201c_R1\u201d are ignored when matching the file name to the metadata sample ID.","title":"Step 1: Prepare BioLockJ Config File"},{"location":"Example-Pipeline/#step-2-run-biolockj-pipeline","text":"> biolockj ~/chinaKrakenFullDB.properties Look in the BioLockJ pipeline output directory defined by $BLJ_PROJ for a new pipeline directory named after the property file + today\u2019s date: ~/projects/chinaKrakenFullDB_2018Apr09 The 5 configured modules have run in order, with the addition of 2 implicit modules (1st and last) which are added to all pipelines automatically. The biolockjComplete file indicates the pipeline ran successfully.","title":"Step 2: Run BioLockJ Pipeline"},{"location":"Example-Pipeline/#step-3-review-pipeline-summary","text":"Run the blj_summary command to review the pipeline execution summary. > blj_summary Pipeline Summary","title":"Step 3: Review Pipeline Summary"},{"location":"Example-Pipeline/#step-4-download-r-reports","text":"Run the blj_download command to get the command needed to download the analysis. > blj_download > rsync","title":"Step 4: Download R Reports"},{"location":"Example-Pipeline/#step-5-analyze-r-reports","text":"Open downloadDir on your local filesystem to review the analysis. This directory contains: Output Description /temp Directory where R log files are saved if R script runs locally. /tables Directory containing the OTU tables. /local Directory where R script output is saved if R script runs locally and r.debug=Y . *.RData The saved R sessions for R modules run if r.saveRData=Y . chinaKrakenFullDB.log The pipeline Java log file. MAIN_*.R Each R script for each module that generated reports has been updated to run on your local filesystem. *.tsv files Spreadsheets containing p-value and R^2 statistics for each OTU in the taxonomy level. *.pdf files P-value histograms, and bar-charts or scatterplots for each OTU in the taxonomy level. Each R module generates a report for each report.taxonomyLevel configured:","title":"Step 5: Analyze R Reports"},{"location":"Example-Pipeline/#open-chinakrakenfulldb_log10_genuspdf","text":"The report begins with the unadjusted P-Value Distributions: Since r.numHistogramBreaks=20 so the 1st bar represents the p-values < 0.05. The ruralUrban attribute appears significant, as indicated by the high number p-values < 0.05. For each OTU, a bar-chart or scatterplot is output with adjusted parametric and non-parametric p-values formatted using in the plot header. The p-value format is defined by r.pValFormat . The p-adjust method is defined by rStats.pAdjustMethod . P-values that meet the r.pvalCutoff threshold are highlighted with r.colorHighlight .","title":"Open chinaKrakenFullDB_Log10_genus.pdf"},{"location":"FAQ/","text":"FAQ, Troublshooting and Special Cases # Question: How much does it cost to use BioLockJ ? # Answer: BioLockJ itself free and open-source. BioLockJ is designed for large data sets; and it is often necissary to purchase computational resources to handle large datasets and to run the processes that BioLockJ will manage. This cost often comes in the form of buying an effective computer, subscribing to a cluster, or purchasing cloud computeing power. Question: What are the system requirements for running BioLockJ ? # Answer: Either unix-and-java or docker, details below. Easy mode: you have a unix system and you can run docker. You're covered. BioLockJ requires java, but if you can run docker, then all of the java-components can run inside the docker container. Easy-ish mode: no unix, but you can run docker. See Pure-Docker . Local host mode: No docker. You need to have a unix-like system and java 1.8 or later. The launch process for BioLockJ will be easy, but the majority of modules have essential dependencies and you will have to install each of those dependencies on your own system. See Dependencies . In terms of memory, ram and cpus; the amount required really depends on the size of the data you are processing and the needs of the algorithms you are running. In general, processing sequence data requires a computer cluster or a cloud-computing system (more than a typical individual-user machine). After sequence data have been summarized as tables, all subsequent steps are orders of magnetude smaller and can usually run on a laptop within a matter of minutes. Most datasets can be dramatically sub-sampled to allow a laptop user to run a test of the pipeline; this does not produce usable results, but allows the user to test and troubleshoot the pipeline in a convenient setting before moving it to a bigger system. Question: BioLockJ says that my pipeline is running...now what? # Answer: Check on your pipeline's progress. See the Getting Started page . If you are using a unix-like system, you can use the cd-blj alias to jump to the most recent pipeline. On any system, the path to the new pipeline is printed during the launch process, it will be folder immediatly under your $BLJ_PROJ folder. Look in that directory. When I pipeline forms it creates the \"precheckStarted\" flag and then replaces that with the \"precheckComplete\" flag when all dependencies/settings are confirmed. Then the pipeline starts the first module, and the flag is replaced with \"biolockjStarted\". This generally takes a few seconds or less. The subfolder for the current module will also have the \"biolockjStarted\" flag. When a module is finished, the module flag is replaced with \"biolockjComplete\". When the last module is finished, the pipeline flag is finally changed to \"biolockjComplete\". From the pipeline folder, ls 0* is a quick way to see the current progress, becuase that will show the flag files and subfolders for each of the first ten modules. (That's \"LS zero star\", or \"LS one star\" if you have more than ten modules.) If any module encounters an error, and cannot complete, then that module is marked with the \"biolockjFailed\" flag, the pipeline shuts down, and the pipeline is also marked with \"biolockjFailed\". Extensive information is available in the pipeline's log file. A more concise message describing the error, and sometimes solutions, is written to the biolockjFailed flag. If your pipeline fails, use cat biolockjFailed to see the error message. Question: My pipeline failed...now what? # Answer: See Failure Recovery Most often, there is a consice error message that may even have instructions for fixing the pipeline. cd-blj cat biolockjFailed Don't be discouraged. It is normal to go through several, even many, failed attempts as you figure out how all the parts come together. Question: If biolockj indicates that my pipeline may have failed to start, how do I determine the cause of the failure? # Answer: Use -f . By default, BioLockJ runs the java component in the background, and only a minimal, helpful message is printed on the screen. If there was some problem in getting that short, helpful message to the screen, you can use the --foreground or -f option to force biolockj to run in the foreground, thus printing everything to the screen. Often the print-out ends shortly after a helpful message. Question: Sometimes BioLockJ adds modules to my pipeline. How can I tell what modules will be added? # Answer: Read the docs; or use -p With the --precheck-only or -p option, BioLockJ will create the pipeline and go through the check-dependencies phase for each module, but even without finding errors it will not actually run the pipeline. This allows you see what modules will be run, see the pipeline folder layout, and see if any errors will prevent the pipeline from starting. This is also ideal when you know you want to change more options or add more modules before you run the pipeline; but you want to check if there anything that needs to be fixed in what you have so far. In the documentation for each module, there is a section called \"Adds modules\". A module may give the class path of another module that it adds before or after itself. Many modules say \"none found\" to indicate that this module does not add any other modules before or after itself. Sometimes this section will say \"pipeline-dependent\" and more details are given in the \"Details\" section to explain which other modules might be added and when / why. Modules that are added by other modules are called pre-requisite modules . Modules that are added by the BioLockJ backbone are called implicit modules . These can be disabled with the properties pipeline.disableAddPreReqModules and pipeline.disableAddImplicitModules , respectively. Question: I get an error message about a property, but I have that property configured correctly. What gives? # Answer: Use -u . This is often the result of a typo somewhere. Generally, BioLockJ runs a check-dependencies protocol on each module, and all required properties should be checked during that process, and it stops when it first finds a problem. With the --unused-props or -u option, biolockj will check dependencies for all modules, even after one fails, and any properties that were never used will be printed to the screen. This often highlights typos in property names, or properties that are not used by the currenlty configured modules. Keep in mind, this only reports properties in your primary config file, not in any of your defaultProps files. Question: A module script is failing because an environent variable is missing. But I know I defined that variable, and I can see it with echo . Why can't the script see it ? # Answer: Use -e ; or reference it in your configuration file in the ${VAR} format Where possible, avoid relying on environment variables. Consider defining a value in your config file and/or adding the value to a parameter list that will be used with the script. Variables from your local envirnment must be explicitly passed into the module environments. See the Configuration page . Question: On a cluster system, I need a particular module to run on the head node. # Answer: Use module-specific properties to control the cluster properties for that module. See the Configuration page for more details about module-specific forms of general properties. Example: # On this cluster, the compute nodes do not have internet access, only the head node does. The first module in the pipeline is the SraDownload module to get the data, which requries internet access. All pipelines run on this cluster include a reference to the properties set up specifically for this cluster: pipeline.defaultProps=${BLJ}/ourCluster.properties This group chose to store their system configurations in the BioLockJ folder, which they reference using the fully dressed ${BLJ} variable. In this file, they have configurations for launching jobs: cluster.batchCommand = qsub SraDownload.batchCommand = /bin/bash BioLockJ launches jobs using qsub <script> . For ONLY the SraDownload module, the property SraDownload.batchCommand overrides cluster.batchCommand ; so for only this module, the jobs will be launched using /bin/bash which runs on the current node rather than launching a compute node. All config files that reference this file and launch on the head node, will run the SraDownload modude on the head node. Question: How do I configure my pipeline for multiplexed data? # Answer: See the Demultiplexer module Details . Question: How should I configure my properties for a dataset that is one-sample-per-file (ie not multiplexed)? # Answer1: BioLockJ can extract the sample name from the filename; see Input . OR Answer2: BioLockJ can connect the sample id to the file name given in one or more columns in the metadata; see Metadata . Question: Shutting down a pipeline. How do I stop a pipeline that is running? # Answer: Use kill , docker stop and possibly scheduler commands such as qdel . BioLockJ will shut down appropriately on its own when a pipeline either completes or fails. Sometimes , it is necessary to shut down the program pre-maturely. This is not an ideal exit and the steps depend on your environment. The main program is terminated by killing the java process. Any worker-processes that are still in progress will need to be shut down directly (or allowed to time out). If you are allowing worker-processess to time, you must NOT delete the pipeline folder. Those processes will write to that pipeline folder, and any new pipeline you make will get a new folder as long as the original still exists. To kill the BioLockJ program on a local system, get the id of the java process and kill it: ps # PID TTY TIME CMD # 1776 pts/0 00:00:00 bash # 1728 pts/0 00:00:00 ps # 4437 pts/0 00:00:00 java kill 4437 On a local system, workers are under the main program, so they will also be terminated. To kill the BioLockJ program running in docker, get the ID of the docker container and use docker stop . docker ps # CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES # f55a39311eb5 ubuntu \"/bin/bash\" 16 minutes ago Up 16 minutes brave_cori docker stop f55a39311eb5 In a docker pipeline, the container IDs for workers will also appear under ps. If you need to distinguish the BioLockJ containers from other docker containers running on your machine, you can see a list of them in the current modules script directory in a file named MAIN*.sh_Started . To kill the BioLockJ program that is run in the foreground (ie, the -f arg was used), then killing the current process will kill the program. This is usually done with ctrl + c . To kill the BioLockJ program on a cluster environment, use kill just like the local case to stop a process on the head node, and use qdel (or the equivilent on your scheduler) to terminate workers running on compute nodes. Question: How can I get color-coded syntax for a BioLockJ config file? # Answer: Treat it like a java properties file. A BioLockJ config file is a java properties file, with added \"#BioModule\" lines indicating which modules to run. BioLockJ config files typically use the extension \".config\" (preferred) or \".properties\". Some editors have configurations to color-code text in a meaningful way based on the type of file. For example, Sublime text will automatically apply the syntax highlighting for a java properties file to a file that ends in \".properties\". To extend this Sublime Text functionality to files that in \".config\": 1. open a file with the \".config\" extension, 2. go to View -> Syntax -> Open all with current extension -> Java -> Java Properties ( Much thanks to this helpful post: stack over flow ) Question: Why doesn't my pipeline run in docker ? # Answer: test docker, test file sharing First of all, make sure docker is installed in running. docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly. If this failed, then you need to troubleshoot docker before you move forward. When docker is fundamentally working, a common problem is file-sharing. In most cases, file sharing is enabled and you will see a pop-up window asking for permission to share folders you have not shared previously. If this is not the case, you may need to enable file sharing. File sharing, also called volume sharing, is what allows programs inside docker containers to interact with files stored on your computer. Depending on your version of Docker Desktop, this setting may be under Docker > Preferences > File Sharing , or Preferences > Resources > File Sharing or something similar. Make sure this feature is enabled. Any file that must be read by any part of the BioLockJ pipeline must be under one of the share-enabled folders. The BioLockJ Projects directory (BLJ_PROJ) must also be under one of these share-enabled folders. A quick test of file sharing: mkdir ~/testFolder echo 'hello sharing!' > ~/testFolder/testFile.txt docker run --rm -v ~/testFolder:/testFolder ubuntu cat /testFolder/testFile.txt # hello sharing! If this fails, then you'll need to troubleshoot docker file sharing before you move forward. The start process may be slow. The first time that you run a pipeline, docker downloads the images called for in that pipeline. For some images, this download may take several minutes. For pipelines with many modules, each using distinct docker images, there may be many 'several minute' downloads. As of version 1.3.15, these downloads happen in the background and no messages are printed to the screen from the BioLockJ start command; which can give the appearance of a frozen start-up. In another tab, run docker image ls , wait several minutes, and run the same command again. If the list is growing, then BioLockJ is not frozen, it is downloading images while (unfortunately) printing nothing. If you set docker.verifyImage=N in your config file, then images will be downloaded when the module is reached, rather than all at once at the start.","title":"FAQ & Troubleshooting"},{"location":"FAQ/#faq-troublshooting-and-special-cases","text":"","title":"FAQ, Troublshooting and Special Cases"},{"location":"FAQ/#question-how-much-does-it-cost-to-use-biolockj","text":"Answer: BioLockJ itself free and open-source. BioLockJ is designed for large data sets; and it is often necissary to purchase computational resources to handle large datasets and to run the processes that BioLockJ will manage. This cost often comes in the form of buying an effective computer, subscribing to a cluster, or purchasing cloud computeing power.","title":"Question: How much does it cost to use BioLockJ ?"},{"location":"FAQ/#question-what-are-the-system-requirements-for-running-biolockj","text":"Answer: Either unix-and-java or docker, details below. Easy mode: you have a unix system and you can run docker. You're covered. BioLockJ requires java, but if you can run docker, then all of the java-components can run inside the docker container. Easy-ish mode: no unix, but you can run docker. See Pure-Docker . Local host mode: No docker. You need to have a unix-like system and java 1.8 or later. The launch process for BioLockJ will be easy, but the majority of modules have essential dependencies and you will have to install each of those dependencies on your own system. See Dependencies . In terms of memory, ram and cpus; the amount required really depends on the size of the data you are processing and the needs of the algorithms you are running. In general, processing sequence data requires a computer cluster or a cloud-computing system (more than a typical individual-user machine). After sequence data have been summarized as tables, all subsequent steps are orders of magnetude smaller and can usually run on a laptop within a matter of minutes. Most datasets can be dramatically sub-sampled to allow a laptop user to run a test of the pipeline; this does not produce usable results, but allows the user to test and troubleshoot the pipeline in a convenient setting before moving it to a bigger system.","title":"Question: What are the system requirements for running BioLockJ ?"},{"location":"FAQ/#question-biolockj-says-that-my-pipeline-is-runningnow-what","text":"Answer: Check on your pipeline's progress. See the Getting Started page . If you are using a unix-like system, you can use the cd-blj alias to jump to the most recent pipeline. On any system, the path to the new pipeline is printed during the launch process, it will be folder immediatly under your $BLJ_PROJ folder. Look in that directory. When I pipeline forms it creates the \"precheckStarted\" flag and then replaces that with the \"precheckComplete\" flag when all dependencies/settings are confirmed. Then the pipeline starts the first module, and the flag is replaced with \"biolockjStarted\". This generally takes a few seconds or less. The subfolder for the current module will also have the \"biolockjStarted\" flag. When a module is finished, the module flag is replaced with \"biolockjComplete\". When the last module is finished, the pipeline flag is finally changed to \"biolockjComplete\". From the pipeline folder, ls 0* is a quick way to see the current progress, becuase that will show the flag files and subfolders for each of the first ten modules. (That's \"LS zero star\", or \"LS one star\" if you have more than ten modules.) If any module encounters an error, and cannot complete, then that module is marked with the \"biolockjFailed\" flag, the pipeline shuts down, and the pipeline is also marked with \"biolockjFailed\". Extensive information is available in the pipeline's log file. A more concise message describing the error, and sometimes solutions, is written to the biolockjFailed flag. If your pipeline fails, use cat biolockjFailed to see the error message.","title":"Question: BioLockJ says that my pipeline is running...now what?"},{"location":"FAQ/#question-my-pipeline-failednow-what","text":"Answer: See Failure Recovery Most often, there is a consice error message that may even have instructions for fixing the pipeline. cd-blj cat biolockjFailed Don't be discouraged. It is normal to go through several, even many, failed attempts as you figure out how all the parts come together.","title":"Question: My pipeline failed...now what?"},{"location":"FAQ/#question-if-biolockj-indicates-that-my-pipeline-may-have-failed-to-start-how-do-i-determine-the-cause-of-the-failure","text":"Answer: Use -f . By default, BioLockJ runs the java component in the background, and only a minimal, helpful message is printed on the screen. If there was some problem in getting that short, helpful message to the screen, you can use the --foreground or -f option to force biolockj to run in the foreground, thus printing everything to the screen. Often the print-out ends shortly after a helpful message.","title":"Question: If biolockj indicates that my pipeline may have failed to start, how do I determine the cause of the failure?"},{"location":"FAQ/#question-sometimes-biolockj-adds-modules-to-my-pipeline-how-can-i-tell-what-modules-will-be-added","text":"Answer: Read the docs; or use -p With the --precheck-only or -p option, BioLockJ will create the pipeline and go through the check-dependencies phase for each module, but even without finding errors it will not actually run the pipeline. This allows you see what modules will be run, see the pipeline folder layout, and see if any errors will prevent the pipeline from starting. This is also ideal when you know you want to change more options or add more modules before you run the pipeline; but you want to check if there anything that needs to be fixed in what you have so far. In the documentation for each module, there is a section called \"Adds modules\". A module may give the class path of another module that it adds before or after itself. Many modules say \"none found\" to indicate that this module does not add any other modules before or after itself. Sometimes this section will say \"pipeline-dependent\" and more details are given in the \"Details\" section to explain which other modules might be added and when / why. Modules that are added by other modules are called pre-requisite modules . Modules that are added by the BioLockJ backbone are called implicit modules . These can be disabled with the properties pipeline.disableAddPreReqModules and pipeline.disableAddImplicitModules , respectively.","title":"Question: Sometimes BioLockJ adds modules to my pipeline.  How can I tell what modules will be added?"},{"location":"FAQ/#question-i-get-an-error-message-about-a-property-but-i-have-that-property-configured-correctly-what-gives","text":"Answer: Use -u . This is often the result of a typo somewhere. Generally, BioLockJ runs a check-dependencies protocol on each module, and all required properties should be checked during that process, and it stops when it first finds a problem. With the --unused-props or -u option, biolockj will check dependencies for all modules, even after one fails, and any properties that were never used will be printed to the screen. This often highlights typos in property names, or properties that are not used by the currenlty configured modules. Keep in mind, this only reports properties in your primary config file, not in any of your defaultProps files.","title":"Question: I get an error message about a property, but I have that property configured correctly.  What gives?"},{"location":"FAQ/#question-a-module-script-is-failing-because-an-environent-variable-is-missing-but-i-know-i-defined-that-variable-and-i-can-see-it-with-echo-why-cant-the-script-see-it","text":"Answer: Use -e ; or reference it in your configuration file in the ${VAR} format Where possible, avoid relying on environment variables. Consider defining a value in your config file and/or adding the value to a parameter list that will be used with the script. Variables from your local envirnment must be explicitly passed into the module environments. See the Configuration page .","title":"Question: A module script is failing because an environent variable is missing. But I know I defined that variable, and I can see it with echo. Why can't the script see it ?"},{"location":"FAQ/#question-on-a-cluster-system-i-need-a-particular-module-to-run-on-the-head-node","text":"Answer: Use module-specific properties to control the cluster properties for that module. See the Configuration page for more details about module-specific forms of general properties.","title":"Question: On a cluster system, I need a particular module to run on the head node."},{"location":"FAQ/#example","text":"On this cluster, the compute nodes do not have internet access, only the head node does. The first module in the pipeline is the SraDownload module to get the data, which requries internet access. All pipelines run on this cluster include a reference to the properties set up specifically for this cluster: pipeline.defaultProps=${BLJ}/ourCluster.properties This group chose to store their system configurations in the BioLockJ folder, which they reference using the fully dressed ${BLJ} variable. In this file, they have configurations for launching jobs: cluster.batchCommand = qsub SraDownload.batchCommand = /bin/bash BioLockJ launches jobs using qsub <script> . For ONLY the SraDownload module, the property SraDownload.batchCommand overrides cluster.batchCommand ; so for only this module, the jobs will be launched using /bin/bash which runs on the current node rather than launching a compute node. All config files that reference this file and launch on the head node, will run the SraDownload modude on the head node.","title":"Example:"},{"location":"FAQ/#question-how-do-i-configure-my-pipeline-for-multiplexed-data","text":"Answer: See the Demultiplexer module Details .","title":"Question: How do I configure my pipeline for multiplexed data?"},{"location":"FAQ/#question-how-should-i-configure-my-properties-for-a-dataset-that-is-one-sample-per-file-ie-not-multiplexed","text":"Answer1: BioLockJ can extract the sample name from the filename; see Input . OR Answer2: BioLockJ can connect the sample id to the file name given in one or more columns in the metadata; see Metadata .","title":"Question: How should I configure my properties for a dataset that is one-sample-per-file (ie not multiplexed)?"},{"location":"FAQ/#question-shutting-down-a-pipeline-how-do-i-stop-a-pipeline-that-is-running","text":"Answer: Use kill , docker stop and possibly scheduler commands such as qdel . BioLockJ will shut down appropriately on its own when a pipeline either completes or fails. Sometimes , it is necessary to shut down the program pre-maturely. This is not an ideal exit and the steps depend on your environment. The main program is terminated by killing the java process. Any worker-processes that are still in progress will need to be shut down directly (or allowed to time out). If you are allowing worker-processess to time, you must NOT delete the pipeline folder. Those processes will write to that pipeline folder, and any new pipeline you make will get a new folder as long as the original still exists. To kill the BioLockJ program on a local system, get the id of the java process and kill it: ps # PID TTY TIME CMD # 1776 pts/0 00:00:00 bash # 1728 pts/0 00:00:00 ps # 4437 pts/0 00:00:00 java kill 4437 On a local system, workers are under the main program, so they will also be terminated. To kill the BioLockJ program running in docker, get the ID of the docker container and use docker stop . docker ps # CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES # f55a39311eb5 ubuntu \"/bin/bash\" 16 minutes ago Up 16 minutes brave_cori docker stop f55a39311eb5 In a docker pipeline, the container IDs for workers will also appear under ps. If you need to distinguish the BioLockJ containers from other docker containers running on your machine, you can see a list of them in the current modules script directory in a file named MAIN*.sh_Started . To kill the BioLockJ program that is run in the foreground (ie, the -f arg was used), then killing the current process will kill the program. This is usually done with ctrl + c . To kill the BioLockJ program on a cluster environment, use kill just like the local case to stop a process on the head node, and use qdel (or the equivilent on your scheduler) to terminate workers running on compute nodes.","title":"Question: Shutting down a pipeline.  How do I stop a pipeline that is running?"},{"location":"FAQ/#question-how-can-i-get-color-coded-syntax-for-a-biolockj-config-file","text":"Answer: Treat it like a java properties file. A BioLockJ config file is a java properties file, with added \"#BioModule\" lines indicating which modules to run. BioLockJ config files typically use the extension \".config\" (preferred) or \".properties\". Some editors have configurations to color-code text in a meaningful way based on the type of file. For example, Sublime text will automatically apply the syntax highlighting for a java properties file to a file that ends in \".properties\". To extend this Sublime Text functionality to files that in \".config\": 1. open a file with the \".config\" extension, 2. go to View -> Syntax -> Open all with current extension -> Java -> Java Properties ( Much thanks to this helpful post: stack over flow )","title":"Question: How can I get color-coded syntax for a BioLockJ config file?"},{"location":"FAQ/#question-why-doesnt-my-pipeline-run-in-docker","text":"Answer: test docker, test file sharing First of all, make sure docker is installed in running. docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly. If this failed, then you need to troubleshoot docker before you move forward. When docker is fundamentally working, a common problem is file-sharing. In most cases, file sharing is enabled and you will see a pop-up window asking for permission to share folders you have not shared previously. If this is not the case, you may need to enable file sharing. File sharing, also called volume sharing, is what allows programs inside docker containers to interact with files stored on your computer. Depending on your version of Docker Desktop, this setting may be under Docker > Preferences > File Sharing , or Preferences > Resources > File Sharing or something similar. Make sure this feature is enabled. Any file that must be read by any part of the BioLockJ pipeline must be under one of the share-enabled folders. The BioLockJ Projects directory (BLJ_PROJ) must also be under one of these share-enabled folders. A quick test of file sharing: mkdir ~/testFolder echo 'hello sharing!' > ~/testFolder/testFile.txt docker run --rm -v ~/testFolder:/testFolder ubuntu cat /testFolder/testFile.txt # hello sharing! If this fails, then you'll need to troubleshoot docker file sharing before you move forward. The start process may be slow. The first time that you run a pipeline, docker downloads the images called for in that pipeline. For some images, this download may take several minutes. For pipelines with many modules, each using distinct docker images, there may be many 'several minute' downloads. As of version 1.3.15, these downloads happen in the background and no messages are printed to the screen from the BioLockJ start command; which can give the appearance of a frozen start-up. In another tab, run docker image ls , wait several minutes, and run the same command again. If the list is growing, then BioLockJ is not frozen, it is downloading images while (unfortunately) printing nothing. If you set docker.verifyImage=N in your config file, then images will be downloaded when the module is reached, rather than all at once at the start.","title":"Question: Why doesn't my pipeline run in docker ?"},{"location":"Failure-Recovery/","text":"Failure Recovery # It is normal to go through several, even many, failed attempts. It is not time effecient to read and fully understand all of the documentation to compeltely correctly set all parameters for every part of your pipeline. Most users find it more effecient to read just enough to make an attempt, and then make corrections as needed. As errors occurr, BioLockJ can highlight the relevant documentation or parameters. An error message is not a reprimand, it is part of the conversation between the program and the user. With this in mind, BioLockJ is designed so that, where feasible, any errors that will occur for a pipeline are found within a few seconds of the initial launch. Find the problem # Look at your pipeline folder. If the pipeline failed, there is a flag file \"biolockjFailed\". cd-blj cat biolockjFailed This message often contains instructions about what to fix. Sometimes it may only be able to describe the problem and you will have to figure out the solution. In some cases it may be necessary to look a the log file at the top level, or at the log files of an individual module. But in all cases, you should start by looking at the message in the biolockjFailed file. In very rare cases, BioLockJ may fail to form a pipeline directory, which means there is no central place for messages to be saved to. If that is the case, launch the pipeline again but add the --foreground or -f flag (See See biolockj --help . This will almost certianly hit the same error, but the messages along the way will be printed to the screen. In most cases, the end of the printed output will have the same error message format as the biolockjFailed file. Try again # Once you have fixed the problem (changed a property in the config file, installed dependency, removed bad inputs, etc) you can launch your pipeline again. Ideally, the failure happened quickly, and the first attempt can be discarded. It may take several attempts to successfully launch the pipeline--and that's ok! Each new attempt will get a new folder, with _number to distinguish the different attempts. When you are happy with your pipeline, simply delete failed attempts. If a pipeline is still running, or has any workers / jobs still running, you should leave the pipeline folder in place until you are sure those jobs are done. This ensures that any files created or changed by those jobs are separated from any new runs. Sometimes, the pipeline fails someitme after the check-dependencies phase. If a failed pipeline has progress that you want to keep, then you can restart that pipeline rather than launching from scratch. Failed pipelines can be restarted to save the progress made by successfully completed modules. To restart a failed pipeline, add the --restart flag. See biolockj --help . biolockj -r <pipeline root> or biolockj --restart <pipeline root> This will preserve the output of any module that has been marked with \"biolockjComplete\". All other module directories will be deleted and recreated. back tracking # The restart process will automatically determine the first module to run as the first module that is not marked as complete. In some cases, you may want to re-run a completed module. You can manually indicate which module should be the first one to restart by resetting the pipeline to that module before restarting the pipeline. cd <dir of module to restart> blj_reset $PWD cd .. biolockj --restart $PWD Any module after that one in the module run order will also be rerun. build on completed pipelines # When you initially build out your pipeline, its best to take small steps. Configure a few modules, and launch the pipeline. You will probably have to go through a few attempts to get everything right for those modules. biolockj myAnalysis.properties Once you are happy with that, you can add modules to your module run order, and restart the completed pipeline to pick up where it left off. biolockj -c myAnalysis.properties --restart ${BLJ_PROJ}/myAnalysis_5_2020Aug20 Any modules that were complete in the myAnalysis_5_2020Aug20 pipeline will remain. This is only valid for appending an existing module run order. Often, you may use biolockj to encapsulate the \"boiler plate\" processing steps in your pipeline, and then take the outputs to work with manually for data exploration. After exploring the data, you will refine your analyis question and settle on the figures you want to produce, and the code to do that. Then come back to BioLockJ , and add your downstream scripts as GenMod steps, building on the pipeline you started with. This makes your custome downstream steps just as organized and reproducible as the initial boiler-plate phase.","title":"Failure Recovery"},{"location":"Failure-Recovery/#failure-recovery","text":"It is normal to go through several, even many, failed attempts. It is not time effecient to read and fully understand all of the documentation to compeltely correctly set all parameters for every part of your pipeline. Most users find it more effecient to read just enough to make an attempt, and then make corrections as needed. As errors occurr, BioLockJ can highlight the relevant documentation or parameters. An error message is not a reprimand, it is part of the conversation between the program and the user. With this in mind, BioLockJ is designed so that, where feasible, any errors that will occur for a pipeline are found within a few seconds of the initial launch.","title":"Failure Recovery"},{"location":"Failure-Recovery/#find-the-problem","text":"Look at your pipeline folder. If the pipeline failed, there is a flag file \"biolockjFailed\". cd-blj cat biolockjFailed This message often contains instructions about what to fix. Sometimes it may only be able to describe the problem and you will have to figure out the solution. In some cases it may be necessary to look a the log file at the top level, or at the log files of an individual module. But in all cases, you should start by looking at the message in the biolockjFailed file. In very rare cases, BioLockJ may fail to form a pipeline directory, which means there is no central place for messages to be saved to. If that is the case, launch the pipeline again but add the --foreground or -f flag (See See biolockj --help . This will almost certianly hit the same error, but the messages along the way will be printed to the screen. In most cases, the end of the printed output will have the same error message format as the biolockjFailed file.","title":"Find the problem"},{"location":"Failure-Recovery/#try-again","text":"Once you have fixed the problem (changed a property in the config file, installed dependency, removed bad inputs, etc) you can launch your pipeline again. Ideally, the failure happened quickly, and the first attempt can be discarded. It may take several attempts to successfully launch the pipeline--and that's ok! Each new attempt will get a new folder, with _number to distinguish the different attempts. When you are happy with your pipeline, simply delete failed attempts. If a pipeline is still running, or has any workers / jobs still running, you should leave the pipeline folder in place until you are sure those jobs are done. This ensures that any files created or changed by those jobs are separated from any new runs. Sometimes, the pipeline fails someitme after the check-dependencies phase. If a failed pipeline has progress that you want to keep, then you can restart that pipeline rather than launching from scratch. Failed pipelines can be restarted to save the progress made by successfully completed modules. To restart a failed pipeline, add the --restart flag. See biolockj --help . biolockj -r <pipeline root> or biolockj --restart <pipeline root> This will preserve the output of any module that has been marked with \"biolockjComplete\". All other module directories will be deleted and recreated.","title":"Try again"},{"location":"Failure-Recovery/#back-tracking","text":"The restart process will automatically determine the first module to run as the first module that is not marked as complete. In some cases, you may want to re-run a completed module. You can manually indicate which module should be the first one to restart by resetting the pipeline to that module before restarting the pipeline. cd <dir of module to restart> blj_reset $PWD cd .. biolockj --restart $PWD Any module after that one in the module run order will also be rerun.","title":"back tracking"},{"location":"Failure-Recovery/#build-on-completed-pipelines","text":"When you initially build out your pipeline, its best to take small steps. Configure a few modules, and launch the pipeline. You will probably have to go through a few attempts to get everything right for those modules. biolockj myAnalysis.properties Once you are happy with that, you can add modules to your module run order, and restart the completed pipeline to pick up where it left off. biolockj -c myAnalysis.properties --restart ${BLJ_PROJ}/myAnalysis_5_2020Aug20 Any modules that were complete in the myAnalysis_5_2020Aug20 pipeline will remain. This is only valid for appending an existing module run order. Often, you may use biolockj to encapsulate the \"boiler plate\" processing steps in your pipeline, and then take the outputs to work with manually for data exploration. After exploring the data, you will refine your analyis question and settle on the figures you want to produce, and the code to do that. Then come back to BioLockJ , and add your downstream scripts as GenMod steps, building on the pipeline you started with. This makes your custome downstream steps just as organized and reproducible as the initial boiler-plate phase.","title":"build on completed pipelines"},{"location":"Getting-Started-Windows/","text":"BioLockJ on Windows # Currently, windows is not officially supported as a system to run BioLockJ There are a few avenues for running BioLockJ on a Widows machine. The WSL2 avenue is the most likely to be supported into the future. Windows Subsystem for Linux (WSL2) # This option has been shown to work, but it is not rigorously tested in our current release testing process, and so is not officially supported. 1. Install WSL2 # Set up WSL2 on your machine and link it to your linux subsystem, see the Microsoft documentation . 2. Install linux distribution # For our tests, we chose the most recent Ubuntu distribution, see tested environments ; presumably others also work. 3. Install java # Install java in the linux subsystem. apt-get update sudo apt install default-jre 4. Install docker (with WSL2) # Set up docker to work with your linux subsystem on Windows. See docker documentation You'll need to be able to run the docker hello world example: docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly. 4. Install BioLockJ # Follow the standard instructions to for Getting Started with BioLockJ, operating within the linux subsystem. In theory, operating within this system will be identical to working with BioLockJ in a unix-like operating system. However we recommend (and run tests) using docker, as this removes the added troubleshooting of adapting to subtle differences across environments, which could be compounded by the system stacking, not to mention the often tedious task of installing all dependencies for all pipelines. Alternative: Docker with a native launch # In this case, java running on the host machine is required to launch the program; but the manager process and the required environment to run each module is all handled by Docker containers. This feature exists, but is still experimental. It is not guaranteed to work. All code chunks in this section assume you are running PowerShell as administrator . 1. Download the latest release & unpack the tarball. # Third party tools such as 7Zip allow you to unzip tar files on Windows. Save the uncompressed folder wherever you like to keep executables. If you choose to download the source code, you will need to compile it by running ant with the build.xml file in the resources folder. 2. Set PowerShell variables # In PowerShell, navigate (cd) into the BioLockJ folder, and run Set-Variable -Name BLJ -Value $PWD Add-Content $profile \"Set-Variable -Name BLJ -Value $BLJ\" cd into a folder of your choice, such as C:Users\\Documents\\biolockj_pipelines, and run Set-Variable -Name BLJ_PROJ -Value $PWD Add-Content $profile \"Set-Variable -Name BLJ_PROJ -Value $BLJ_PROJ\" Test the variables. $BLJ $BLJ_PROJ Note: The Set-Variable lines apply to the current session; the Add-content lines apply to future sessions. If the Add-Conent lines throw an error to effect \"could not find path\", then you may need to create the parent folder and try again, for example: $profile ## see file path of the profile: $HOME\\Documents\\WindowsPowerShell\\Microsoft.PowerShell_profile.ps1 mkdir $HOME\\Documents\\WindowsPowerShell\\ 3. Set an alias for the biolockj executable. # Set-Alias -Name biolockj -Value $BLJ\\script\\run-biolockj.ps1 Add-Content $profile \"Set-Alias -Name biolockj -Value $BLJ\\script\\run-biolockj.ps1\" Allow PowerShell to execute scripts on this machine: Set-ExecutionPolicy RemoteSigned Test that calling this alias makes a call to the BioLockJ program. biolockj --version biolockj --help This should show the biolockj help menu. Set an alias for the biolockj supporting tool: biolockj-api. Set-Alias -Name biolockj-api -Value $BLJ\\script\\run-biolockj-api.ps1 Add-Content $profile \"Set-Alias -Name biolockj-api -Value $BLJ\\script\\run-biolockj-api.ps1\" Test that calling this alias makes a call to the BioLockJ program. biolockj-api This should show the biolockj-api help menu. 4. Install docker # See the current instructions for installing docker on your system: https://docs.docker.com/get-started/ You'll need to be able to run the docker hello world example: docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly. 5. Run test pipeline # When you run the program, you will see a pop-up window asking for permission to share specific folders. Say yes. If that does not appear, see why doesn't my pipeline run in docker . biolockj -d $BLJ\\templates\\myFirstPipeline\\myFirstPipeline.properties # # Docker container id: 336259e7d3b8d9ab2fa71202258b562664be1bf9645d503a790ae5e9da15ce97 # Initializing BioLockJ.. # Building pipeline: /Users/joe/apps/BioLockJ/pipelines/myFirstPipeline_2020Jan17 # Fetching pipeline status # # Pipeline is complete. Alternative: Pure-Docker # In the pure-docker case, a handful of power-shell commands are used to launch a Docker container. The Docker container includes all the required software and environment, including java to run the launch process and manager process, and the required environment to run each module. This feature exists, but is still experimental. It has been shown to work anecdotally, but is not guaranteed to work. See Working in Pure Docker .","title":"Getting Started Windows"},{"location":"Getting-Started-Windows/#biolockj-on-windows","text":"Currently, windows is not officially supported as a system to run BioLockJ There are a few avenues for running BioLockJ on a Widows machine. The WSL2 avenue is the most likely to be supported into the future.","title":"BioLockJ on Windows"},{"location":"Getting-Started-Windows/#windows-subsystem-for-linux-wsl2","text":"This option has been shown to work, but it is not rigorously tested in our current release testing process, and so is not officially supported.","title":"Windows Subsystem for Linux (WSL2)"},{"location":"Getting-Started-Windows/#1-install-wsl2","text":"Set up WSL2 on your machine and link it to your linux subsystem, see the Microsoft documentation .","title":"1. Install WSL2"},{"location":"Getting-Started-Windows/#2-install-linux-distribution","text":"For our tests, we chose the most recent Ubuntu distribution, see tested environments ; presumably others also work.","title":"2. Install linux distribution"},{"location":"Getting-Started-Windows/#3-install-java","text":"Install java in the linux subsystem. apt-get update sudo apt install default-jre","title":"3. Install java"},{"location":"Getting-Started-Windows/#4-install-docker-with-wsl2","text":"Set up docker to work with your linux subsystem on Windows. See docker documentation You'll need to be able to run the docker hello world example: docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly.","title":"4. Install docker (with WSL2)"},{"location":"Getting-Started-Windows/#4-install-biolockj","text":"Follow the standard instructions to for Getting Started with BioLockJ, operating within the linux subsystem. In theory, operating within this system will be identical to working with BioLockJ in a unix-like operating system. However we recommend (and run tests) using docker, as this removes the added troubleshooting of adapting to subtle differences across environments, which could be compounded by the system stacking, not to mention the often tedious task of installing all dependencies for all pipelines.","title":"4. Install BioLockJ"},{"location":"Getting-Started-Windows/#alternative-docker-with-a-native-launch","text":"In this case, java running on the host machine is required to launch the program; but the manager process and the required environment to run each module is all handled by Docker containers. This feature exists, but is still experimental. It is not guaranteed to work. All code chunks in this section assume you are running PowerShell as administrator .","title":"Alternative: Docker with a native launch"},{"location":"Getting-Started-Windows/#1-download-the-latest-release-unpack-the-tarball","text":"Third party tools such as 7Zip allow you to unzip tar files on Windows. Save the uncompressed folder wherever you like to keep executables. If you choose to download the source code, you will need to compile it by running ant with the build.xml file in the resources folder.","title":"1. Download the latest release &amp; unpack the tarball."},{"location":"Getting-Started-Windows/#2-set-powershell-variables","text":"In PowerShell, navigate (cd) into the BioLockJ folder, and run Set-Variable -Name BLJ -Value $PWD Add-Content $profile \"Set-Variable -Name BLJ -Value $BLJ\" cd into a folder of your choice, such as C:Users\\Documents\\biolockj_pipelines, and run Set-Variable -Name BLJ_PROJ -Value $PWD Add-Content $profile \"Set-Variable -Name BLJ_PROJ -Value $BLJ_PROJ\" Test the variables. $BLJ $BLJ_PROJ Note: The Set-Variable lines apply to the current session; the Add-content lines apply to future sessions. If the Add-Conent lines throw an error to effect \"could not find path\", then you may need to create the parent folder and try again, for example: $profile ## see file path of the profile: $HOME\\Documents\\WindowsPowerShell\\Microsoft.PowerShell_profile.ps1 mkdir $HOME\\Documents\\WindowsPowerShell\\","title":"2. Set PowerShell variables"},{"location":"Getting-Started-Windows/#3-set-an-alias-for-the-biolockj-executable","text":"Set-Alias -Name biolockj -Value $BLJ\\script\\run-biolockj.ps1 Add-Content $profile \"Set-Alias -Name biolockj -Value $BLJ\\script\\run-biolockj.ps1\" Allow PowerShell to execute scripts on this machine: Set-ExecutionPolicy RemoteSigned Test that calling this alias makes a call to the BioLockJ program. biolockj --version biolockj --help This should show the biolockj help menu. Set an alias for the biolockj supporting tool: biolockj-api. Set-Alias -Name biolockj-api -Value $BLJ\\script\\run-biolockj-api.ps1 Add-Content $profile \"Set-Alias -Name biolockj-api -Value $BLJ\\script\\run-biolockj-api.ps1\" Test that calling this alias makes a call to the BioLockJ program. biolockj-api This should show the biolockj-api help menu.","title":"3. Set an alias for the biolockj executable."},{"location":"Getting-Started-Windows/#4-install-docker","text":"See the current instructions for installing docker on your system: https://docs.docker.com/get-started/ You'll need to be able to run the docker hello world example: docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly.","title":"4. Install docker"},{"location":"Getting-Started-Windows/#5-run-test-pipeline","text":"When you run the program, you will see a pop-up window asking for permission to share specific folders. Say yes. If that does not appear, see why doesn't my pipeline run in docker . biolockj -d $BLJ\\templates\\myFirstPipeline\\myFirstPipeline.properties # # Docker container id: 336259e7d3b8d9ab2fa71202258b562664be1bf9645d503a790ae5e9da15ce97 # Initializing BioLockJ.. # Building pipeline: /Users/joe/apps/BioLockJ/pipelines/myFirstPipeline_2020Jan17 # Fetching pipeline status # # Pipeline is complete.","title":"5. Run test pipeline"},{"location":"Getting-Started-Windows/#alternative-pure-docker","text":"In the pure-docker case, a handful of power-shell commands are used to launch a Docker container. The Docker container includes all the required software and environment, including java to run the launch process and manager process, and the required environment to run each module. This feature exists, but is still experimental. It has been shown to work anecdotally, but is not guaranteed to work. See Working in Pure Docker .","title":"Alternative: Pure-Docker"},{"location":"Getting-Started/","text":"Installation and test # Basic installation # The basic installation assumes you have java 1.8+ and a unix-like environment. Some features assume a bash shell, see Notes about environments . 1. Download the latest release & unpack the tarball. # tar -zxf BioLockJ-v*.tar.gz Save the folder wherever you like to keep executables. If you choose to download the source code, you will need to compile it by running ant with the build.xml file in the resources folder. 2. Run the install script # The install script updates the $USER bash profile to call blj_config . See Commands for a full description of blj_config cd BioLockJ ./install # Saved backup: /users/joe/.bash_profile~ # Saved profile: /users/joe/.bash_profile # BioLockJ installation complete! This will add the required variables to your path when you start your next session. exit # exit and start a new session Start a new bash session and verify that biolockj is on your $PATH . A new terminal window or a fresh log in will start a new session. biolockj --version biolockj --help 3. Run the test pipeline # echo $BLJ # /path/to/BioLockJ biolockj ${BLJ}/templates/myFirstPipeline/myFirstPipeline.properties # Initializing BioLockJ.. # Building pipeline: /Users/joe/apps/BioLockJ/pipelines/myFirstPipeline_2020Jan17 # cd-blj -> Move to pipeline output directory # Fetching pipeline status # # Pipeline is complete. Notice the use of the $BLJ variable. This variable is created by the installation process; it points to the BioLockJ folder. The myFirstPipeline project is the first in the tutorial series designed to introduce new users to the layout of a BioLockJ pipeline. You should take a moment to review your first pipeline . Docker installation # This install runs the launch process on your host machine, and the main program and all modules are run in docker. To run even the launch process in docker, see Working in Pure Docker . 1. Install docker # See the current instructions for installing docker on your system: https://docs.docker.com/get-started/ You'll need to be able to run the docker hello world example: docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly. 2. Download and install BioLockj # Follow the download and install steps in the Basic Installation download and install instructions. 3. Run the test pipeline in docker # When you run the program, you will see a pop-up window asking for permission to share specific files. Say yes. If that does not appear, see why doesn't my pipeline run in docker . biolockj --docker --blj ${BLJ}/templates/myFirstPipeline/myFirstPipeline.properties # # Docker container id: 336259e7d3b8d9ab2fa71202258b562664be1bf9645d503a790ae5e9da15ce97 # Initializing BioLockJ.. # Building pipeline: /Users/joe/apps/BioLockJ/pipelines/myFirstPipeline_2020Jan17 # Fetching pipeline status # # Pipeline is complete. You should take a moment to review your first pipeline . Cluster installation # Installing BioLockJ on a cluster follows the same process as the Basic Installation . EACH USER must run the install script in order to run the BioLockJ launch scripts. Use the property pipeline.env=cluster in your pipeline configuration to take advantage of parallell computing through the cluster. Review your first pipeline # The variable $BLJ_PROJ points to your projects folder. See a list of all of the pipelines in your projects folder. ls $BLJ_PROJ By default, $BLJ_PROJ is set to the \"pipelines\" folder in BioLockJ ( $BLJ/pipelines ). To change this, add a line to your bash_profile (or equivilent file): export BLJ_PROJ=/path/to/my/projects . This line must be after the call to the blj_config script. Look at your most recent pipeline: cd-blj This folder represents the analysis pipeline that you launched when you called biolockj on the file ${BLJ}/templates/myFirstPipeline/myFirstPipeline.properties . Notice that the original configuration (\"config\") file has been copied to this folder. Review the config file that was used to launch this pipeline: cat myFirstPipeline.properties Notice that modules are specified in the config using the keyword #BioModule . Each module in the pipeline creates a folder in the pipeline directory. Notice that an additional module \"00_ImportMetaData\" was added automatically. At the top level of the pipeline we see an empty flag file \"biolockjComplete\" which indicates that the pipeline finished successfully. While the pipeline is still in progress, the flag is \"biolockjStarted\"; and if the pipeline stops due to an error, the flag is \"biolockjFailed\". The summary.txt file is a summary of each module as it ran during pipeline execution. This is the best place to start when reviewing a pipeline. The file \"MASTER_myFirstPipeline_<DATE>.properties\" is the complete list of all properties used during this pipeline. This includes properties that were set in the primary config file (\"myFirstPipeline.properties\"), and properties that are set as defaults in the BioLockJ program, and properties that are set in user-supplied default config files, which are specified in the primary config file using the pipeline.defaultProps= property. This \"MASTER_*.properties\" file contains all of the settings required to reproduce this pipeline. If the pipeline was run using docker, a file named dockerInfo.json will show the container information. The pipeline log file \"myFirstPipeline_<DATE>.log\" is an excellent resource for troubleshooting. The validation has tables recording the MD5 sum for each output from each module. If the pipeline is run again, this folder can be used to determine if the results in the new run are an exact match for this run. Within each module's folder, we see the \"biolockjComplete\" flag (the same flags are used in modules and at the top level). All output-producing modules have a subfolder called output . Most modules also have folders script and temp . The output folder is used as input to down-stream modules. Modules are the building blocks of pipelines. For more information about modules, see Built-in BioModules . Making your own pipeline # Now that you have a working example, you can make your own pipeline. You may want to modify the example above, or look at others under /templates . Things are seldom perfect the first time. Its safe to assume you will make iterative changes to your pipeline configuration. BioLockJ offers some tools to facilitate this process. Check your pipeline using precheck mode Add modules onto your partial pipeline using restart Look through the base set of modules and even create your own A recommended practice is to make a subset of your data, and use that to develop your pipeline. Installing multiple versions on a single machine # You may want to have multiple versions of BioLockJ on the same machine. Create a folder to hold all versions, suggested name: BioLockJ_versions . In that folder, download and unpack the latest version of BioLockJ. This creates a folder called \"BioLockJ\". Rename that folder to include the version name (recommended: match the name of the downloaded file), for example: mv BioLockJ BioLockJ-v1.3.13 . Create a symbolic link: ln -s /full/path/to/BioLockJ_versions/BioLockJ-v1.3.13 current cd into the \"current\" folder, and run the install script. This will set the $BLJ variable in your profile to point to the this link. Download and unpack another release of BioLockJ in the BioLockJ_versions folder, repeating the process from above. Now you have two versions of BioLockJ, a BLJ variable in your profile that points to a current symbolic link. On systems with multiple users, an administrator should control the \"group default\" version by controlling the current link, and individual users can change their BLJ variable to point to the current group default or to a specific version. On single-user systems, you can edit a link or the variable, whichever is more convenient. All new versions of BioLockJ should be installed in the same BioLockJ_versions folder, in the same way. Other notes for starting out # Install any/all software Dependencies required by the modules you wish to include in your pipeline. BioLockJ is a pipeline manager, designed to integrate and manage external tools. These external tools are not packaged into the BioLockJ program. BioLockJ must run in an environment where these other tools have been installed OR run through docker using the docker images that have the tools installed. The core program, and all modules packaged with it, have corresponding docker images. BioLockJ will shut down appropriately on its own when a pipeline either completes or fails. Sometimes , it is necessary to shut down the program pre-maturely. See the FAQ page for more details.","title":"Getting Started"},{"location":"Getting-Started/#installation-and-test","text":"","title":"Installation and test"},{"location":"Getting-Started/#basic-installation","text":"The basic installation assumes you have java 1.8+ and a unix-like environment. Some features assume a bash shell, see Notes about environments .","title":"Basic installation"},{"location":"Getting-Started/#1-download-the-latest-release-unpack-the-tarball","text":"tar -zxf BioLockJ-v*.tar.gz Save the folder wherever you like to keep executables. If you choose to download the source code, you will need to compile it by running ant with the build.xml file in the resources folder.","title":"1. Download the latest release &amp; unpack the tarball."},{"location":"Getting-Started/#2-run-the-install-script","text":"The install script updates the $USER bash profile to call blj_config . See Commands for a full description of blj_config cd BioLockJ ./install # Saved backup: /users/joe/.bash_profile~ # Saved profile: /users/joe/.bash_profile # BioLockJ installation complete! This will add the required variables to your path when you start your next session. exit # exit and start a new session Start a new bash session and verify that biolockj is on your $PATH . A new terminal window or a fresh log in will start a new session. biolockj --version biolockj --help","title":"2. Run the install script"},{"location":"Getting-Started/#3-run-the-test-pipeline","text":"echo $BLJ # /path/to/BioLockJ biolockj ${BLJ}/templates/myFirstPipeline/myFirstPipeline.properties # Initializing BioLockJ.. # Building pipeline: /Users/joe/apps/BioLockJ/pipelines/myFirstPipeline_2020Jan17 # cd-blj -> Move to pipeline output directory # Fetching pipeline status # # Pipeline is complete. Notice the use of the $BLJ variable. This variable is created by the installation process; it points to the BioLockJ folder. The myFirstPipeline project is the first in the tutorial series designed to introduce new users to the layout of a BioLockJ pipeline. You should take a moment to review your first pipeline .","title":"3. Run the test pipeline"},{"location":"Getting-Started/#docker-installation","text":"This install runs the launch process on your host machine, and the main program and all modules are run in docker. To run even the launch process in docker, see Working in Pure Docker .","title":"Docker installation"},{"location":"Getting-Started/#1-install-docker","text":"See the current instructions for installing docker on your system: https://docs.docker.com/get-started/ You'll need to be able to run the docker hello world example: docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly.","title":"1. Install docker"},{"location":"Getting-Started/#2-download-and-install-biolockj","text":"Follow the download and install steps in the Basic Installation download and install instructions.","title":"2. Download and install BioLockj"},{"location":"Getting-Started/#3-run-the-test-pipeline-in-docker","text":"When you run the program, you will see a pop-up window asking for permission to share specific files. Say yes. If that does not appear, see why doesn't my pipeline run in docker . biolockj --docker --blj ${BLJ}/templates/myFirstPipeline/myFirstPipeline.properties # # Docker container id: 336259e7d3b8d9ab2fa71202258b562664be1bf9645d503a790ae5e9da15ce97 # Initializing BioLockJ.. # Building pipeline: /Users/joe/apps/BioLockJ/pipelines/myFirstPipeline_2020Jan17 # Fetching pipeline status # # Pipeline is complete. You should take a moment to review your first pipeline .","title":"3. Run the test pipeline in docker"},{"location":"Getting-Started/#cluster-installation","text":"Installing BioLockJ on a cluster follows the same process as the Basic Installation . EACH USER must run the install script in order to run the BioLockJ launch scripts. Use the property pipeline.env=cluster in your pipeline configuration to take advantage of parallell computing through the cluster.","title":"Cluster installation"},{"location":"Getting-Started/#review-your-first-pipeline","text":"The variable $BLJ_PROJ points to your projects folder. See a list of all of the pipelines in your projects folder. ls $BLJ_PROJ By default, $BLJ_PROJ is set to the \"pipelines\" folder in BioLockJ ( $BLJ/pipelines ). To change this, add a line to your bash_profile (or equivilent file): export BLJ_PROJ=/path/to/my/projects . This line must be after the call to the blj_config script. Look at your most recent pipeline: cd-blj This folder represents the analysis pipeline that you launched when you called biolockj on the file ${BLJ}/templates/myFirstPipeline/myFirstPipeline.properties . Notice that the original configuration (\"config\") file has been copied to this folder. Review the config file that was used to launch this pipeline: cat myFirstPipeline.properties Notice that modules are specified in the config using the keyword #BioModule . Each module in the pipeline creates a folder in the pipeline directory. Notice that an additional module \"00_ImportMetaData\" was added automatically. At the top level of the pipeline we see an empty flag file \"biolockjComplete\" which indicates that the pipeline finished successfully. While the pipeline is still in progress, the flag is \"biolockjStarted\"; and if the pipeline stops due to an error, the flag is \"biolockjFailed\". The summary.txt file is a summary of each module as it ran during pipeline execution. This is the best place to start when reviewing a pipeline. The file \"MASTER_myFirstPipeline_<DATE>.properties\" is the complete list of all properties used during this pipeline. This includes properties that were set in the primary config file (\"myFirstPipeline.properties\"), and properties that are set as defaults in the BioLockJ program, and properties that are set in user-supplied default config files, which are specified in the primary config file using the pipeline.defaultProps= property. This \"MASTER_*.properties\" file contains all of the settings required to reproduce this pipeline. If the pipeline was run using docker, a file named dockerInfo.json will show the container information. The pipeline log file \"myFirstPipeline_<DATE>.log\" is an excellent resource for troubleshooting. The validation has tables recording the MD5 sum for each output from each module. If the pipeline is run again, this folder can be used to determine if the results in the new run are an exact match for this run. Within each module's folder, we see the \"biolockjComplete\" flag (the same flags are used in modules and at the top level). All output-producing modules have a subfolder called output . Most modules also have folders script and temp . The output folder is used as input to down-stream modules. Modules are the building blocks of pipelines. For more information about modules, see Built-in BioModules .","title":"Review your first pipeline"},{"location":"Getting-Started/#making-your-own-pipeline","text":"Now that you have a working example, you can make your own pipeline. You may want to modify the example above, or look at others under /templates . Things are seldom perfect the first time. Its safe to assume you will make iterative changes to your pipeline configuration. BioLockJ offers some tools to facilitate this process. Check your pipeline using precheck mode Add modules onto your partial pipeline using restart Look through the base set of modules and even create your own A recommended practice is to make a subset of your data, and use that to develop your pipeline.","title":"Making your own pipeline"},{"location":"Getting-Started/#installing-multiple-versions-on-a-single-machine","text":"You may want to have multiple versions of BioLockJ on the same machine. Create a folder to hold all versions, suggested name: BioLockJ_versions . In that folder, download and unpack the latest version of BioLockJ. This creates a folder called \"BioLockJ\". Rename that folder to include the version name (recommended: match the name of the downloaded file), for example: mv BioLockJ BioLockJ-v1.3.13 . Create a symbolic link: ln -s /full/path/to/BioLockJ_versions/BioLockJ-v1.3.13 current cd into the \"current\" folder, and run the install script. This will set the $BLJ variable in your profile to point to the this link. Download and unpack another release of BioLockJ in the BioLockJ_versions folder, repeating the process from above. Now you have two versions of BioLockJ, a BLJ variable in your profile that points to a current symbolic link. On systems with multiple users, an administrator should control the \"group default\" version by controlling the current link, and individual users can change their BLJ variable to point to the current group default or to a specific version. On single-user systems, you can edit a link or the variable, whichever is more convenient. All new versions of BioLockJ should be installed in the same BioLockJ_versions folder, in the same way.","title":"Installing multiple versions on a single machine"},{"location":"Getting-Started/#other-notes-for-starting-out","text":"Install any/all software Dependencies required by the modules you wish to include in your pipeline. BioLockJ is a pipeline manager, designed to integrate and manage external tools. These external tools are not packaged into the BioLockJ program. BioLockJ must run in an environment where these other tools have been installed OR run through docker using the docker images that have the tools installed. The core program, and all modules packaged with it, have corresponding docker images. BioLockJ will shut down appropriately on its own when a pipeline either completes or fails. Sometimes , it is necessary to shut down the program pre-maturely. See the FAQ page for more details.","title":"Other notes for starting out"},{"location":"Pure-Docker/","text":"Pure Docker (experimental) # This option is still in the experimental stages. If you are running from any system that supports docker, you can run all commands through docker containers. This assumes that you have docker up and running. Double check that docker is working on your system: docker run hello-world For Windows systems, you will need to run PowerShell as administrator . For all systems, notice that many of these commands require a full path. Here we use $PWD so that the commands in code blocks can be copy/pasted. Be mindful of your current working directory. Step 1: # In powershell or terminal, navigate to a directory where you would like to store all of your BioLockJ materials, and enter the command below. Docker may prompt you to grant permission to access the workspace/ folder. If docker does not allow you to map in the folder, and does not prompt you, you may need to open the file sharing section of your docker preferences and add this folder to the list of files docker is allowed to share. Mac / unix mkdir workspace docker run --rm \\ -v /var/run/docker.sock:/var/run/docker.sock \\ -v $PWD/workspace:/workspace \\ -e HOST_OS_SCRIPT=bash \\ biolockjdevteam/biolockj_controller:latest setup_workspace PowerShell mkdir workspace docker run --rm ` -v /var/run/docker.sock:/var/run/docker.sock ` -v $PWD\\workspace:/workspace ` -e HOST_OS_SCRIPT=ps1 ` biolockjdevteam/biolockj_controller:latest setup_workspace This will create your docker preamble command. The docker preamble passes your biolockj commands into a docker environment. The command is saved as a docker-wrapper script. Step 2: # Make your system treat the docker-wrapper script as an executable file. You could choose to name the alias something other than \"biolockj\" if you also call the biolockj command locally and want to avoid ambiguity. Notice that these commands use $PWD with the assumption that your working directory has not changed since step 1. Mac / unix Depending on your system, you may use ~/.bash_profile or ~/.bashrc or ~/.zshrc , etc. echo 'PATH='\"$PWD\"'/workspace/script:$PATH' >> ~/.bash_profile (optional) Aliases are not universally supported; but where they are supported they are convenient and you will be able to copy/paste example commands that designed for local system calls. echo 'alias biolockj='\"$PWD\"'/workspace/script/docker-biolockj' >> ~/.bash_profile echo 'alias biolockj-api='\"$PWD\"'/workspace/script/docker-biolockj-api' >> ~/.bash_profile Start a new session, or simply source your profile: . ~/.bash_profile PowerShell Allow powershell to execute scripts created on this machine: Set-ExecutionPolicy RemoteSigned This only affects the current session: Set-Alias -Name biolockj -Value $PWD\\workspace\\script\\docker-biolockj.ps1 Set-Alias -Name biolockj-api -Value $PWD\\workspace\\script\\docker-biolockj-api.ps1 This makes the command available to future sessions: Add-Content $profile \"Set-Alias -Name biolockj -Value $PWD\\workspace\\script\\docker-biolockj.ps1\" Add-Content $profile \"Set-Alias -Name biolockj-api -Value $PWD\\workspace\\script\\docker-biolockj-api.ps1\" If that command throws an error \u201ccould not find part of the path\u201d, you may need to create the parent folder and try again, for example: mkdir $HOME\\Documents\\WindowsPowerShell\\ Step 3: (optional) # Run the test pipeline. Mac / unix AND PowerShell This command does NOT use $PWD. If you completed step 2, then this is how the command will look regardless of your system, or where you are in the file system. Notice that the path to the property file is given from the /workspace/, and all file-separators are \u201c/\u201d. biolockj /workspace/templates/myFirstPipeline/myFirstPipeline.properties If you did not complete step 2, then you will need to give the full path to the docker-wrapper, or even copy the command from there to the terminal. Ever after: # Use biolockj exactly the way that it is described in the rest the documentation WITH THESE EXCEPTIONS: The path to your properties file (or restart dir) given in your biolockj command must be under /workspace/ (the workspace directory you created in step 1). The path can be given by starting with \"/workspace/\" or by using the 'dockerified' absolute file path. If you used a different name for your alias in step 2 (or chose not use the alias at all), you will call the docker-biolockj script (or docker-biolockj.ps1 script) in place of \u201cbiolockj\u201d. File paths in your property file CAN use: (recommended) The \u201c./\u201d relative path format relative to the property file's directory. Relative paths should always use / as the file separator, regardless of host system. For files in the project folder, file paths given in this format will work consistenetly when the pipeline is run locally, in docker, or on the cloud; and when the project is copied to a different location in the file system or to a different machine. Full file paths using dockerified file paths: (mac) /Users/user/Documents/file.txt (windows) /host_mnt/c/Users/user/Documents/file.txt Full file paths using your native system (work in progress) (mac) /Users/user/Documents/file.txt (windows) C:\\Users\\user\\Documents\\file.txt Variables that are defined in the property file. VAR=/Users/user/Documents/file.txt file.path=${VAR}/data/file.txt Tips for the pure-docker user: # Your property file may include file paths that are not under the project, or even under the workspace directory; but you will need to make sure that docker is configured to share those folders. It is generally simpler to keep everything under the workspace folder. Use biolockj-api listMounts --config /workspace/<path/to/file> to see what is going to be mapped in based on a given property file. If any paths are missing, you may need to add them to your file sharing preferences. Developers working in pure-docker: # After making changes to the source code, build the program by building the docker image. Use $BLJ/resources/docker/docker_build_scripts/buildDockerImages.sh controller if you can use that script, or copy/paste the build commands from it. This will make a local copy of the docker image with the compiled changes. The image will be tagged with the current development version number (one ahead of the current public release). This is independent of any build you might have on your local system.","title":"Pure Docker"},{"location":"Pure-Docker/#pure-docker-experimental","text":"This option is still in the experimental stages. If you are running from any system that supports docker, you can run all commands through docker containers. This assumes that you have docker up and running. Double check that docker is working on your system: docker run hello-world For Windows systems, you will need to run PowerShell as administrator . For all systems, notice that many of these commands require a full path. Here we use $PWD so that the commands in code blocks can be copy/pasted. Be mindful of your current working directory.","title":"Pure Docker (experimental)"},{"location":"Pure-Docker/#step-1","text":"In powershell or terminal, navigate to a directory where you would like to store all of your BioLockJ materials, and enter the command below. Docker may prompt you to grant permission to access the workspace/ folder. If docker does not allow you to map in the folder, and does not prompt you, you may need to open the file sharing section of your docker preferences and add this folder to the list of files docker is allowed to share. Mac / unix mkdir workspace docker run --rm \\ -v /var/run/docker.sock:/var/run/docker.sock \\ -v $PWD/workspace:/workspace \\ -e HOST_OS_SCRIPT=bash \\ biolockjdevteam/biolockj_controller:latest setup_workspace PowerShell mkdir workspace docker run --rm ` -v /var/run/docker.sock:/var/run/docker.sock ` -v $PWD\\workspace:/workspace ` -e HOST_OS_SCRIPT=ps1 ` biolockjdevteam/biolockj_controller:latest setup_workspace This will create your docker preamble command. The docker preamble passes your biolockj commands into a docker environment. The command is saved as a docker-wrapper script.","title":"Step 1:"},{"location":"Pure-Docker/#step-2","text":"Make your system treat the docker-wrapper script as an executable file. You could choose to name the alias something other than \"biolockj\" if you also call the biolockj command locally and want to avoid ambiguity. Notice that these commands use $PWD with the assumption that your working directory has not changed since step 1. Mac / unix Depending on your system, you may use ~/.bash_profile or ~/.bashrc or ~/.zshrc , etc. echo 'PATH='\"$PWD\"'/workspace/script:$PATH' >> ~/.bash_profile (optional) Aliases are not universally supported; but where they are supported they are convenient and you will be able to copy/paste example commands that designed for local system calls. echo 'alias biolockj='\"$PWD\"'/workspace/script/docker-biolockj' >> ~/.bash_profile echo 'alias biolockj-api='\"$PWD\"'/workspace/script/docker-biolockj-api' >> ~/.bash_profile Start a new session, or simply source your profile: . ~/.bash_profile PowerShell Allow powershell to execute scripts created on this machine: Set-ExecutionPolicy RemoteSigned This only affects the current session: Set-Alias -Name biolockj -Value $PWD\\workspace\\script\\docker-biolockj.ps1 Set-Alias -Name biolockj-api -Value $PWD\\workspace\\script\\docker-biolockj-api.ps1 This makes the command available to future sessions: Add-Content $profile \"Set-Alias -Name biolockj -Value $PWD\\workspace\\script\\docker-biolockj.ps1\" Add-Content $profile \"Set-Alias -Name biolockj-api -Value $PWD\\workspace\\script\\docker-biolockj-api.ps1\" If that command throws an error \u201ccould not find part of the path\u201d, you may need to create the parent folder and try again, for example: mkdir $HOME\\Documents\\WindowsPowerShell\\","title":"Step 2:"},{"location":"Pure-Docker/#step-3-optional","text":"Run the test pipeline. Mac / unix AND PowerShell This command does NOT use $PWD. If you completed step 2, then this is how the command will look regardless of your system, or where you are in the file system. Notice that the path to the property file is given from the /workspace/, and all file-separators are \u201c/\u201d. biolockj /workspace/templates/myFirstPipeline/myFirstPipeline.properties If you did not complete step 2, then you will need to give the full path to the docker-wrapper, or even copy the command from there to the terminal.","title":"Step 3: (optional)"},{"location":"Pure-Docker/#ever-after","text":"Use biolockj exactly the way that it is described in the rest the documentation WITH THESE EXCEPTIONS: The path to your properties file (or restart dir) given in your biolockj command must be under /workspace/ (the workspace directory you created in step 1). The path can be given by starting with \"/workspace/\" or by using the 'dockerified' absolute file path. If you used a different name for your alias in step 2 (or chose not use the alias at all), you will call the docker-biolockj script (or docker-biolockj.ps1 script) in place of \u201cbiolockj\u201d. File paths in your property file CAN use: (recommended) The \u201c./\u201d relative path format relative to the property file's directory. Relative paths should always use / as the file separator, regardless of host system. For files in the project folder, file paths given in this format will work consistenetly when the pipeline is run locally, in docker, or on the cloud; and when the project is copied to a different location in the file system or to a different machine. Full file paths using dockerified file paths: (mac) /Users/user/Documents/file.txt (windows) /host_mnt/c/Users/user/Documents/file.txt Full file paths using your native system (work in progress) (mac) /Users/user/Documents/file.txt (windows) C:\\Users\\user\\Documents\\file.txt Variables that are defined in the property file. VAR=/Users/user/Documents/file.txt file.path=${VAR}/data/file.txt","title":"Ever after:"},{"location":"Pure-Docker/#tips-for-the-pure-docker-user","text":"Your property file may include file paths that are not under the project, or even under the workspace directory; but you will need to make sure that docker is configured to share those folders. It is generally simpler to keep everything under the workspace folder. Use biolockj-api listMounts --config /workspace/<path/to/file> to see what is going to be mapped in based on a given property file. If any paths are missing, you may need to add them to your file sharing preferences.","title":"Tips for the pure-docker user:"},{"location":"Pure-Docker/#developers-working-in-pure-docker","text":"After making changes to the source code, build the program by building the docker image. Use $BLJ/resources/docker/docker_build_scripts/buildDockerImages.sh controller if you can use that script, or copy/paste the build commands from it. This will make a local copy of the docker image with the compiled changes. The image will be tagged with the current development version number (one ahead of the current public release). This is independent of any build you might have on your local system.","title":"Developers working in pure-docker:"},{"location":"Supported-Environments/","text":"Notes about environments # The main BioLockJ program can be used in these environments: a local machine with a unix-like system (ie, linux or Mac, some features require a bash shell) a local machine with a unix-like system (See tested environments ) running docker * (coming soon) Windows 10 running docker * a cluster, running a supported scheduler such as torque (coming soon) any machine running docker (see Working in Pure Docker ) (* The launch scripts will still be run from your local machine, this requires java, but not a bash shell) The launch process requires a unix-like environment. This includes linux, macOS, or an ubuntu environment running on Windows. Windows support is still in development, see BioLockJ on Windows . If using docker , you will need to run the install script to create the variables used by the launch scripts, even though the main BioLockJ program will run within the biolockj_controller container. If using AWS , you will need to run the install script to create the variables used by the launch scripts, even though the main BioLockJ program will run on AWS. This is still experimental. If you are using BioLockJ on a shared system where another user has already installed BioLockJ, you will need to run the install script to create the required variables in your own user profile. There is also the option to run purely in docker, without installing even the launch scripts on your local machine. However this is considered a niche case scenario and not well supported. The helper commands (such as cd-blj ) assume a bash shell, though others may also work. To see what shell you currently using, run echo $0 . If you are not in a bash shell, you can change your current session to a bash shell, run chsh -s /bin/bash . Choosing an environment # The major resources that come together in a pipeline are: data (project data and reference data) compute resources (memory, ram, cpu) key executables In theory, you could install all the tools you need on your laptop; put your data on your laptop, and run your whole analysis on your laptop. This would be a \"local\" pipeline; a single compute node is handling everything. However, in practice, a single machine typically doesn't have enough compute resources to run a modern bioinformatics pipeline in a realistic time frame; and the tools may be difficult to install, or even impossible to install on a given system. Docker provides key executables by packaging them into containers. After the initial hurdle of installing docker itself, the 'install' of executables that are available in docker images is trivial, and they produce very consistent results; even when different steps in your pipeline have conflicting system requirements. The underlying tools for all modules packaged with the main BioLockJ program are available via docker containers. Docker is the most recommended way to run a pipeline. However, these executables still have to come together with some compute resources. A computer cluster offers large amounts of compute resources and plenty of storage. Some clusters also have administrators (or other users) who will install tools for you and mechanisms for you to install tools yourself. Downsides: cluster systems have their own idiosyncrasies and not everyone has access to one. AWS provides large amounts of compute resources and interfaces very well with docker and uses S3 for convenient and efficient data storage. Downsides: costs money for each use ; has its own learning curve. Tested environments # We try make our software as system-agnostic as possible; but it is impossible to verify every possible stack of hardware / operating system / software. We test multiple environments for each release, and we try to represent the resources of our user base. Release Testing # The most recent release testing was done on: locally on Mac OS Version 10.15.7 (2.2 GHz 6-Core Intel Core i7, 16 GB 2400 MHz DDR4) through docker on Mac OS Version 10.15.7 (same as above), using docker desktop Version 2.5.0.0 locally and as a cluster using Red Hat Enterprise Linux Server 7.5 (Maipo) through docker on Windows Version 1909 using docker desktop version 3.0.0 and Ubuntu 20.04.1 using WSL2 Anecdotal tests # BioLockJ has run successfully on: locally on Mac OS Version 10.15.5 (1.7 GHz Quad-Core Intel Core i7, 16 GB 2133 MHz LPDDR3) through docker on Mac OS Version 11.1 (2.3GHz Quad-Core Intel Core i5, 8 GB 2133MHz LPDDR3) with docker Version 3.0.3 through docker on Windows 10 Education version 1909; Intel Core-i7 6700K CPU @ 4.00 GHZ (hand-built box); ubuntu 20.04 and docker 20.10.0","title":"Environments"},{"location":"Supported-Environments/#notes-about-environments","text":"The main BioLockJ program can be used in these environments: a local machine with a unix-like system (ie, linux or Mac, some features require a bash shell) a local machine with a unix-like system (See tested environments ) running docker * (coming soon) Windows 10 running docker * a cluster, running a supported scheduler such as torque (coming soon) any machine running docker (see Working in Pure Docker ) (* The launch scripts will still be run from your local machine, this requires java, but not a bash shell) The launch process requires a unix-like environment. This includes linux, macOS, or an ubuntu environment running on Windows. Windows support is still in development, see BioLockJ on Windows . If using docker , you will need to run the install script to create the variables used by the launch scripts, even though the main BioLockJ program will run within the biolockj_controller container. If using AWS , you will need to run the install script to create the variables used by the launch scripts, even though the main BioLockJ program will run on AWS. This is still experimental. If you are using BioLockJ on a shared system where another user has already installed BioLockJ, you will need to run the install script to create the required variables in your own user profile. There is also the option to run purely in docker, without installing even the launch scripts on your local machine. However this is considered a niche case scenario and not well supported. The helper commands (such as cd-blj ) assume a bash shell, though others may also work. To see what shell you currently using, run echo $0 . If you are not in a bash shell, you can change your current session to a bash shell, run chsh -s /bin/bash .","title":"Notes about environments"},{"location":"Supported-Environments/#choosing-an-environment","text":"The major resources that come together in a pipeline are: data (project data and reference data) compute resources (memory, ram, cpu) key executables In theory, you could install all the tools you need on your laptop; put your data on your laptop, and run your whole analysis on your laptop. This would be a \"local\" pipeline; a single compute node is handling everything. However, in practice, a single machine typically doesn't have enough compute resources to run a modern bioinformatics pipeline in a realistic time frame; and the tools may be difficult to install, or even impossible to install on a given system. Docker provides key executables by packaging them into containers. After the initial hurdle of installing docker itself, the 'install' of executables that are available in docker images is trivial, and they produce very consistent results; even when different steps in your pipeline have conflicting system requirements. The underlying tools for all modules packaged with the main BioLockJ program are available via docker containers. Docker is the most recommended way to run a pipeline. However, these executables still have to come together with some compute resources. A computer cluster offers large amounts of compute resources and plenty of storage. Some clusters also have administrators (or other users) who will install tools for you and mechanisms for you to install tools yourself. Downsides: cluster systems have their own idiosyncrasies and not everyone has access to one. AWS provides large amounts of compute resources and interfaces very well with docker and uses S3 for convenient and efficient data storage. Downsides: costs money for each use ; has its own learning curve.","title":"Choosing an environment"},{"location":"Supported-Environments/#tested-environments","text":"We try make our software as system-agnostic as possible; but it is impossible to verify every possible stack of hardware / operating system / software. We test multiple environments for each release, and we try to represent the resources of our user base.","title":"Tested environments"},{"location":"Supported-Environments/#release-testing","text":"The most recent release testing was done on: locally on Mac OS Version 10.15.7 (2.2 GHz 6-Core Intel Core i7, 16 GB 2400 MHz DDR4) through docker on Mac OS Version 10.15.7 (same as above), using docker desktop Version 2.5.0.0 locally and as a cluster using Red Hat Enterprise Linux Server 7.5 (Maipo) through docker on Windows Version 1909 using docker desktop version 3.0.0 and Ubuntu 20.04.1 using WSL2","title":"Release Testing"},{"location":"Supported-Environments/#anecdotal-tests","text":"BioLockJ has run successfully on: locally on Mac OS Version 10.15.5 (1.7 GHz Quad-Core Intel Core i7, 16 GB 2133 MHz LPDDR3) through docker on Mac OS Version 11.1 (2.3GHz Quad-Core Intel Core i5, 8 GB 2133MHz LPDDR3) with docker Version 3.0.3 through docker on Windows 10 Education version 1909; Intel Core-i7 6700K CPU @ 4.00 GHZ (hand-built box); ubuntu 20.04 and docker 20.10.0","title":"Anecdotal tests"},{"location":"help-biolockj/","text":"The biolockj help menu: biolockj --help BioLockJ v1.2.6-dev - UNCC Fodor Lab July 2018 Usage: biolockj [options] <config|pipeline> Options: -v --version Show version -h --help Show help menu -p --precheck-only Set up pipeline and check dependencies and then STOP; do not execute the pipeline. This is helpful when testing edits to config files. -r --restart Resume an existing pipeline -c --config-override <file> New config file (if restarting a pipeline) --password <password> Encrypt password -d --docker Run in docker -a --aws Run on aws -g --gui Start the BioLockJ GUI -f --foreground Run the java process in the foreground without nohup -w --wait-for-start Do not release terminal until pipeline completes check-dependencies step. --external-modules <dir> Directory with compiled java code giving additional modules --blj Map $BLJ folder into the docker container; this replaces BioLockJ packaged in a docker container with the local copy. -e --env-var <var=val> Environment variables to be passed to the BioLockJ environment. Can be a comma-sep list. Values take the form: a=foo,b=bar,c=baz --blj_proj <dir> Directory that contains BioLockJ pipelines. If not supplied, biolockj will use the value of environment variable \"BLJ_PROJ\".","title":"Help biolockj"},{"location":"GENERATED/BioLockJ-Api/","text":"BioLockJ API # BioLockJ comes with an API. For the most up-to-date information about how to use the API, see the help menu: biolockj-api help BioLockJ API v1.3.16 - UNCC Fodor Lab Usage: (bash) biolockj-api <query> [options...] (java) java -cp /path/to/BioLockJ.jar biolockj.api.BioLockJ_API <query> [options...] For some uses, redirecting stderr is recommended: biolockj-api <query> [options...] 2> /dev/null Options shown in [ ] are optional for a given query. Use biolockj-api without args to get help menu. Options: --external-modules <dir> path to a directory containing additional modules --module <module_path> class path for a specific module --property <property> a specific property --value <value> a vlue to use for a specific property --config <file> file path for a configuration file giving one or more property values --verbose true flag indicating that all messages should go to standard err, including some that are typically disabled. query: last-pipeline Returns the path to the most recent pipeline. listModules [ --external-modules <dir> ] Returns a list of classpaths to the classes that extend BioModule. listApiModules [--external-modules <dir> ] Like listModules but limit list to modules that implement the ApiModule interface. listProps [ --module <module_path> ] Returns a list of properties. If no args, it returns the list of properties used by the BioLockJ backbone. If a modules is given, then it returns a list of all properties used by that module. listAllProps [ --external-modules <dir> ] Returns a list of all properties, include all backbone properties and all module properties. Optionally supply the path to a directory containing additional modules to include their properties. propType --property <property> [ --module <module_path> [ --external-modules <dir> ] ] Returns the type expected for the property: String, list, integer, positive number, etc. If a module is supplied, then the modules propType method is used. describeProp --property <property> [ --module <module_path> [ --external-modules <dir> ] ] Returns a description of the property. If a module is supplied, then the modules getDescription method is used. propValue --property <property> [ --config <file> ] [ --module <module_path> ] Returns the value for that property given that config file (optional) or no config file (ie the default value) isValidProp --property <property> --value <value> [ --module <module_path> [--external-modules <dir>] ] T/F/NA. Returns true if the value (val) for the property (prop) is valid; false if prop is a property but val is not a valid value, and NA if prop is not a recognized property. IF a module is supplied, then additionally call the validateProp(key, value) for that module, or for EACH module if a comma-separated list is given. propInfo Returns a json formatted list of the general properties (listProps) with the type, descrption and default for each property moduleInfo [--external-modules <dir>] Returns a json formatted list of all modules and for each module that implements the ApiModule interface, it lists the props used by the module, and for each prop the type, descrption and default. listMounts --config <file> Returns a list of directories that would need to be mounted in order for the files listed in the config file to be available to a pipeline running in docker. listUploads --config <file> Returns a list of file and directories that would need to be uploaded in order for the files listed in the config file to be available to a pipeline running in the cloud. help (or no args) Print help menu. listModules and listApiModules are nearly identical. The methods that allow the API to interface with modules are in the ApiModule interface, not all BioModules implement that interface. Once all of the build-in modules have those methods, then these two functions will be identical; the BioModule interface will absorb the ApiModule interface, and listApiModules will be depricated. listAllProps is the union of all possible output from listProps . propInfo returns information equivelent to calling biolockj_api listProps and creating a for-loop to call biolockj_api propType $PROP , biolockj_api describeProp $PROP and biolockj_api propValue $PROP for each PROP in the list. moduleInfo returns information equivelent to calling biolockj_api listModules and creating a for-loop to call biolockj_api listProps $MODULE and for each of its properties.","title":"BioLockJ API"},{"location":"GENERATED/BioLockJ-Api/#biolockj-api","text":"BioLockJ comes with an API. For the most up-to-date information about how to use the API, see the help menu: biolockj-api help BioLockJ API v1.3.16 - UNCC Fodor Lab Usage: (bash) biolockj-api <query> [options...] (java) java -cp /path/to/BioLockJ.jar biolockj.api.BioLockJ_API <query> [options...] For some uses, redirecting stderr is recommended: biolockj-api <query> [options...] 2> /dev/null Options shown in [ ] are optional for a given query. Use biolockj-api without args to get help menu. Options: --external-modules <dir> path to a directory containing additional modules --module <module_path> class path for a specific module --property <property> a specific property --value <value> a vlue to use for a specific property --config <file> file path for a configuration file giving one or more property values --verbose true flag indicating that all messages should go to standard err, including some that are typically disabled. query: last-pipeline Returns the path to the most recent pipeline. listModules [ --external-modules <dir> ] Returns a list of classpaths to the classes that extend BioModule. listApiModules [--external-modules <dir> ] Like listModules but limit list to modules that implement the ApiModule interface. listProps [ --module <module_path> ] Returns a list of properties. If no args, it returns the list of properties used by the BioLockJ backbone. If a modules is given, then it returns a list of all properties used by that module. listAllProps [ --external-modules <dir> ] Returns a list of all properties, include all backbone properties and all module properties. Optionally supply the path to a directory containing additional modules to include their properties. propType --property <property> [ --module <module_path> [ --external-modules <dir> ] ] Returns the type expected for the property: String, list, integer, positive number, etc. If a module is supplied, then the modules propType method is used. describeProp --property <property> [ --module <module_path> [ --external-modules <dir> ] ] Returns a description of the property. If a module is supplied, then the modules getDescription method is used. propValue --property <property> [ --config <file> ] [ --module <module_path> ] Returns the value for that property given that config file (optional) or no config file (ie the default value) isValidProp --property <property> --value <value> [ --module <module_path> [--external-modules <dir>] ] T/F/NA. Returns true if the value (val) for the property (prop) is valid; false if prop is a property but val is not a valid value, and NA if prop is not a recognized property. IF a module is supplied, then additionally call the validateProp(key, value) for that module, or for EACH module if a comma-separated list is given. propInfo Returns a json formatted list of the general properties (listProps) with the type, descrption and default for each property moduleInfo [--external-modules <dir>] Returns a json formatted list of all modules and for each module that implements the ApiModule interface, it lists the props used by the module, and for each prop the type, descrption and default. listMounts --config <file> Returns a list of directories that would need to be mounted in order for the files listed in the config file to be available to a pipeline running in docker. listUploads --config <file> Returns a list of file and directories that would need to be uploaded in order for the files listed in the config file to be available to a pipeline running in the cloud. help (or no args) Print help menu. listModules and listApiModules are nearly identical. The methods that allow the API to interface with modules are in the ApiModule interface, not all BioModules implement that interface. Once all of the build-in modules have those methods, then these two functions will be identical; the BioModule interface will absorb the ApiModule interface, and listApiModules will be depricated. listAllProps is the union of all possible output from listProps . propInfo returns information equivelent to calling biolockj_api listProps and creating a for-loop to call biolockj_api propType $PROP , biolockj_api describeProp $PROP and biolockj_api propValue $PROP for each PROP in the list. moduleInfo returns information equivelent to calling biolockj_api listModules and creating a for-loop to call biolockj_api listProps $MODULE and for each of its properties.","title":"BioLockJ API"},{"location":"GENERATED/Cluster/","text":"cluster properties # The cluster.* properties are ONLY relevant if pipeline.env=cluster . BioLockJ was originally designed to optimize effeciency on a cluster system, specifically one with a torque scheduler. We recomend chaining configuration properties across multiple files. The cluster.* properties would go in the configuration file for you environement. Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.host string The remote cluster host URL (used for ssh, scp, rsync, etc) default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.returnsBatchIds boolean Options Y/N. Does the cluster.batchCommand return a job id. If Y, if a job is submitted and no id is returned, that will be treated as a failure and the pipeline will stop. default: Y cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null For example, the following values are used for a torque scheduler in the file: cluster.properties pipeline.env=cluster cluster.batchCommand=qsub -q copperhead cluster.host=hpc.uncc.edu cluster.statusCommand=qstat Each project's indicidule configuration file includes pipeline.defaultProps = cluster.properties","title":"cluster"},{"location":"GENERATED/Cluster/#cluster-properties","text":"The cluster.* properties are ONLY relevant if pipeline.env=cluster . BioLockJ was originally designed to optimize effeciency on a cluster system, specifically one with a torque scheduler. We recomend chaining configuration properties across multiple files. The cluster.* properties would go in the configuration file for you environement. Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.host string The remote cluster host URL (used for ssh, scp, rsync, etc) default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.returnsBatchIds boolean Options Y/N. Does the cluster.batchCommand return a job id. If Y, if a job is submitted and no id is returned, that will be treated as a failure and the pipeline will stop. default: Y cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null For example, the following values are used for a torque scheduler in the file: cluster.properties pipeline.env=cluster cluster.batchCommand=qsub -q copperhead cluster.host=hpc.uncc.edu cluster.statusCommand=qstat Each project's indicidule configuration file includes pipeline.defaultProps = cluster.properties","title":"cluster properties"},{"location":"GENERATED/Docker/","text":"Docker # Docker is a powerful tool in creating reproducible results. Property Description docker.imageName string The name of a docker image to override whatever a module says to use. default: null docker.imageOwner string name of the Docker Hub user that owns the docker containers default: null docker.imageTag string indicate specific version of Docker images default: null docker.mountSock boolean should /var/run/docker.sock be mounted for modules. default: N docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null All BioLockJ modules are intended to be compatable with a docker environment. Each module has a default docker image; an environment where the module has been tested and that can spun up again for future use. This can be altered by the user.","title":"docker"},{"location":"GENERATED/Docker/#docker","text":"Docker is a powerful tool in creating reproducible results. Property Description docker.imageName string The name of a docker image to override whatever a module says to use. default: null docker.imageOwner string name of the Docker Hub user that owns the docker containers default: null docker.imageTag string indicate specific version of Docker images default: null docker.mountSock boolean should /var/run/docker.sock be mounted for modules. default: N docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null All BioLockJ modules are intended to be compatable with a docker environment. Each module has a default docker image; an environment where the module has been tested and that can spun up again for future use. This can be altered by the user.","title":"Docker"},{"location":"GENERATED/General-Properties/","text":"biolockj # Property Description biolockj.version string Property giving the biolockj version that was used to generate the config file. default: null cluster # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.host string The remote cluster host URL (used for ssh, scp, rsync, etc) default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.returnsBatchIds boolean Options Y/N. Does the cluster.batchCommand return a job id. If Y, if a job is submitted and no id is returned, that will be treated as a failure and the pipeline will stop. default: Y cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null demultiplexer # Property Description demultiplexer.barcodeCutoff numeric Options: (0.0 - 1.0); if defined, pipeline will fail if the percentage of reads with a barcode is less than this cutoff. default: 0.05 demultiplexer.barcodeRevComp boolean Options: Y/N. Use reverse compliment of metadata.barcodeColumn if demultimplexer.strategy = barcode_in_header or barcode_in_seq. default: null demultiplexer.strategy string Options: barcode_in_header, barcode_in_seq, id_in_header, do_not_demux.If using barcodes, they must be provided in the metadata file within column defined by metadata.barcodeColumn . default: null docker # Property Description docker.imageName string The name of a docker image to override whatever a module says to use. default: null docker.imageOwner string name of the Docker Hub user that owns the docker containers default: null docker.imageTag string indicate specific version of Docker images default: null docker.mountSock boolean should /var/run/docker.sock be mounted for modules. default: N docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe # Property Description exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null exe.awk executable Path for the \"awk\" executable; if not supplied, any script that needs the awk command will assume it is on the PATH. default: null exe.docker executable Path for the \"docker\" executable; if not supplied, any script that needs the docker command will assume it is on the PATH. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null exe.java executable Path for the \"java\" executable; if not supplied, any script that needs the java command will assume it is on the PATH. default: null exe.python executable Path for the \"python\" executable; if not supplied, any script that needs the python command will assume it is on the PATH. default: null humann2 # Property Description humann2.disableGeneFamilies boolean disable HumanN2 Gene Family report default: null humann2.disablePathAbundance boolean disable HumanN2 Pathway Abundance report default: null humann2.disablePathCoverage boolean disable HumanN2 Pathway Coverage report default: null input # Property Description input.allowDuplicateNames boolean Should files with the same name be permitted in inputs. File names are used to link data to metadata, and duplicated names create ambiguity. However in some pipelines, duplicates are appropriate. default: N input.dirPaths list of file paths List of one or more directories containing the pipeline input data. default: null input.ignoreFiles list file names to ignore if found in input directories default: null input.requireCompletePairs boolean Require all sequence input files have matching paired reads default: Y input.suffixFw regex file suffix used to identify forward reads ininput.dirPaths default: _R1 input.suffixRv regex file suffix used to identify reverse reads ininput.dirPaths default: _R2 input.trimPrefix string Prefix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text before it will be removed. default: null input.trimSuffix string Suffix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text after it will be removed. default: null metadata # Property Description metadata.barcodeColumn string metadata column with identifying barcodes default: BarcodeSequence metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.commentChar string metadata file comment indicator; Empty string is a valid option indicating no comments in metadata file. default: null metadata.fileNameColumn list name of the metadata column(s) with input file names default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.nullValue string metadata cells with this value will be treated as empty default: NA metadata.required boolean If Y, require metadata row for each sample with sequence data in input dirs; If N, samples without metadata are ignored. default: N metadata.useEveryRow boolean If Y, require a sequence file for every SampleID (every row) in metadata file; If N, metadata can include extraneous SampleIDs. default: null pipeline # Property Description pipeline.copyInput boolean copy input files into pipeline root directory default: null pipeline.defaultDemultiplexer string Java class name for default module used to demultiplex data default: biolockj.module.implicit.Demultiplexer pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultProps list of file paths file path of default property file(s); Nested default properties are supported (so the default property file can also have a default, and so on). default: null pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats pipeline.deleteTempFiles boolean delete files in temp directories default: null pipeline.detachJavaModules boolean If true Java modules do not run with main BioLockJ Java application. Instead they run on compute nodes on the CLUSTER or AWS environments. default: Y pipeline.disableAddImplicitModules boolean If set to true, implicit modules will not be added to the pipeline. default: null pipeline.disableAddPreReqModules boolean If set to true, prerequisite modules will not be added to the pipeline. default: null pipeline.downloadDir file path local directory used as the destination in the download command default: $HOME/projects/downloads pipeline.env string Environment in which a pipeline is run. Options: cluster, aws, local default: local pipeline.envVars list list of variables that should be passed into the runtime environment for all modules. default: BLJ pipeline.inputTypes list List of file types. This manually overrides the recommended auto-detection. default: null pipeline.limitDebugClasses list limit classes that log debug statements default: null pipeline.logLevel string Options: DEBUG, INFO, WARN, ERROR default: INFO pipeline.permissions string Set chmod -R command security bits on pipeline root directory (Ex. 770) default: 770 pipeline.setSeed integer set the seed for a random process. Must be positive integer. default: null pipeline.useEnvVars boolean when evaluating variables in the ${VAR} format, should environment variables be used. Regardless, priority is given to variable values defined in the config file. default: Y pipeline.userProfile file path Bash profile - may be ~/.bash_profile or ~/.bashrc or others default: null qiime # Property Description qiime.alphaMetrics list alpha diversity metrics to calculate through qiime; For complete list of skbio.diversity.alpha options, see http://scikit-bio.org/docs/latest/generated/skbio.diversity.alpha.html default: shannon qiime.plotAlphaMetrics boolean default: Y r # Property Description r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.colorPoint string default color of scatterplot and strip-chart plot points default: black r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.excludeFields list Fields from the metadata that will be excluded from any auto-determined typing, or plotting; R reports must contain at least one valid nominal or numeric metadata field. default: null r.nominalFields list Override default property type by explicitly listing it as nominal. default: null r.numericFields list Override default property type by explicitly listing it as numeric. default: null r.pch integer Sets R plot pch parameter for PDF report default: 21 r.pvalCutoff numeric p-value cutoff used to assign label r.colorHighlight default: 0.05 r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.reportFields list Metadata fields to include in reports; Fields listed here must exist in the metadata file. R reports must contain at least one valid field. default: null r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to * timeout. If set to 0, an estimate is used. default: 0 r.useUniqueColors boolean force to use a unique color for every value in every field plotted; only recommended for low numbers of metadata columns/values. default: null r_PlotMds # Property Description r_PlotMds.reportFields list Metadata column names indicating fields to include in the MDS report; Fields listed here must exist in the metadata file. default: null report # Property Description report.logBase string Options: 10,e,null. If e, use natural log (base e); if 10, use log base 10; if not set, counts will not be converted to a log scale. default: 10 report.minCount integer minimum table count allowed, if a count less that this value is found, it is set to 0. default: 2 report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.numReads boolean Options: Y/N. If Y, and add Num_Reads to metadata default: Y report.scarceCountCutoff numeric Minimum percentage of samples that must contain a count value for it to be kept. default: 0.25 report.scarceSampleCutoff numeric Minimum percentage of data columns that must be non-zero to keep the sample. default: 0.25 report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels default: phylum,class,order,family,genus report.unclassifiedTaxa boolean report unclassified taxa default: Y script # Property Description script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.fileRefreshDelay integer delay this many seconds after scripts complete to allow the file system to reflect changes from a worker node/container/virtual machine. default: 1 script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null validation # Property Description validation.compareOn list Which columns in the expectation file should be used for the comparison. Options: name, size, md5. Default: use all columns in the expectation file. default: null validation.disableValidation boolean Turn off validation. No validation file output is produced. Options: Y/N. default: N default: null validation.expectationFile file path file path that gives the expected values for file metrics (probably generated by a previous run of the same pipeline) default: null validation.reportOn list Which attributes of the file should be included in the validation report file. Options: name, size, md5 default: null validation.sizeWithinPercent numeric What percentage difference is permitted between an output file and its expectation. Options: any positive number default: null validation.stopPipeline boolean If enabled, the validation utlility will stop the pipeline if any module fails validation. Options: Y/N default: N aws # Property Description aws.copyDbToS3 boolean If true, save all input files to S3 default: null aws.copyPipelineToS3 boolean If enabled save pipeline to S3 default: null aws.copyReportsToS3 boolean If enabled save reports to S3 default: null aws.ec2AcquisitionStrategy string The AWS acquisition strategy (SPOT or DEMAND) sets the service SLA for procuring new EC2 instances default: null aws.ec2InstanceID string ID of an existing ec2 instance to use as the head node default: null aws.ec2InstanceType string AWS instance type determines initial resource class (t2.micro is common) default: null aws.ec2SpotPer __ default: null aws.ec2TerminateHead boolean default: null aws.profile file path default: null aws.purgeEfsInputs boolean If enabled delete all EFS dirs (except pipelines) default: null aws.purgeEfsOutput boolean If enabled delete all EFS/pipelines default: null aws.ram string AWS memory set in Nextflow main.nf default: null aws.region string default: null aws.s3 string AWS S3 pipeline output directory used by Nextflow main.nf default: null aws.s3TransferTimeout integer Set the max number of minutes to allow for S3 transfers to complete. default: null aws.saveCloud boolean default: null aws.stack string An existing aws cloud stack ID default: null aws.walltime __ default: null","title":"General Properties"},{"location":"GENERATED/General-Properties/#biolockj","text":"Property Description biolockj.version string Property giving the biolockj version that was used to generate the config file. default: null","title":"biolockj"},{"location":"GENERATED/General-Properties/#cluster","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.host string The remote cluster host URL (used for ssh, scp, rsync, etc) default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.returnsBatchIds boolean Options Y/N. Does the cluster.batchCommand return a job id. If Y, if a job is submitted and no id is returned, that will be treated as a failure and the pipeline will stop. default: Y cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null","title":"cluster"},{"location":"GENERATED/General-Properties/#demultiplexer","text":"Property Description demultiplexer.barcodeCutoff numeric Options: (0.0 - 1.0); if defined, pipeline will fail if the percentage of reads with a barcode is less than this cutoff. default: 0.05 demultiplexer.barcodeRevComp boolean Options: Y/N. Use reverse compliment of metadata.barcodeColumn if demultimplexer.strategy = barcode_in_header or barcode_in_seq. default: null demultiplexer.strategy string Options: barcode_in_header, barcode_in_seq, id_in_header, do_not_demux.If using barcodes, they must be provided in the metadata file within column defined by metadata.barcodeColumn . default: null","title":"demultiplexer"},{"location":"GENERATED/General-Properties/#docker","text":"Property Description docker.imageName string The name of a docker image to override whatever a module says to use. default: null docker.imageOwner string name of the Docker Hub user that owns the docker containers default: null docker.imageTag string indicate specific version of Docker images default: null docker.mountSock boolean should /var/run/docker.sock be mounted for modules. default: N docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null","title":"docker"},{"location":"GENERATED/General-Properties/#exe","text":"Property Description exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null exe.awk executable Path for the \"awk\" executable; if not supplied, any script that needs the awk command will assume it is on the PATH. default: null exe.docker executable Path for the \"docker\" executable; if not supplied, any script that needs the docker command will assume it is on the PATH. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null exe.java executable Path for the \"java\" executable; if not supplied, any script that needs the java command will assume it is on the PATH. default: null exe.python executable Path for the \"python\" executable; if not supplied, any script that needs the python command will assume it is on the PATH. default: null","title":"exe"},{"location":"GENERATED/General-Properties/#humann2","text":"Property Description humann2.disableGeneFamilies boolean disable HumanN2 Gene Family report default: null humann2.disablePathAbundance boolean disable HumanN2 Pathway Abundance report default: null humann2.disablePathCoverage boolean disable HumanN2 Pathway Coverage report default: null","title":"humann2"},{"location":"GENERATED/General-Properties/#input","text":"Property Description input.allowDuplicateNames boolean Should files with the same name be permitted in inputs. File names are used to link data to metadata, and duplicated names create ambiguity. However in some pipelines, duplicates are appropriate. default: N input.dirPaths list of file paths List of one or more directories containing the pipeline input data. default: null input.ignoreFiles list file names to ignore if found in input directories default: null input.requireCompletePairs boolean Require all sequence input files have matching paired reads default: Y input.suffixFw regex file suffix used to identify forward reads ininput.dirPaths default: _R1 input.suffixRv regex file suffix used to identify reverse reads ininput.dirPaths default: _R2 input.trimPrefix string Prefix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text before it will be removed. default: null input.trimSuffix string Suffix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text after it will be removed. default: null","title":"input"},{"location":"GENERATED/General-Properties/#metadata","text":"Property Description metadata.barcodeColumn string metadata column with identifying barcodes default: BarcodeSequence metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.commentChar string metadata file comment indicator; Empty string is a valid option indicating no comments in metadata file. default: null metadata.fileNameColumn list name of the metadata column(s) with input file names default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.nullValue string metadata cells with this value will be treated as empty default: NA metadata.required boolean If Y, require metadata row for each sample with sequence data in input dirs; If N, samples without metadata are ignored. default: N metadata.useEveryRow boolean If Y, require a sequence file for every SampleID (every row) in metadata file; If N, metadata can include extraneous SampleIDs. default: null","title":"metadata"},{"location":"GENERATED/General-Properties/#pipeline","text":"Property Description pipeline.copyInput boolean copy input files into pipeline root directory default: null pipeline.defaultDemultiplexer string Java class name for default module used to demultiplex data default: biolockj.module.implicit.Demultiplexer pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultProps list of file paths file path of default property file(s); Nested default properties are supported (so the default property file can also have a default, and so on). default: null pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats pipeline.deleteTempFiles boolean delete files in temp directories default: null pipeline.detachJavaModules boolean If true Java modules do not run with main BioLockJ Java application. Instead they run on compute nodes on the CLUSTER or AWS environments. default: Y pipeline.disableAddImplicitModules boolean If set to true, implicit modules will not be added to the pipeline. default: null pipeline.disableAddPreReqModules boolean If set to true, prerequisite modules will not be added to the pipeline. default: null pipeline.downloadDir file path local directory used as the destination in the download command default: $HOME/projects/downloads pipeline.env string Environment in which a pipeline is run. Options: cluster, aws, local default: local pipeline.envVars list list of variables that should be passed into the runtime environment for all modules. default: BLJ pipeline.inputTypes list List of file types. This manually overrides the recommended auto-detection. default: null pipeline.limitDebugClasses list limit classes that log debug statements default: null pipeline.logLevel string Options: DEBUG, INFO, WARN, ERROR default: INFO pipeline.permissions string Set chmod -R command security bits on pipeline root directory (Ex. 770) default: 770 pipeline.setSeed integer set the seed for a random process. Must be positive integer. default: null pipeline.useEnvVars boolean when evaluating variables in the ${VAR} format, should environment variables be used. Regardless, priority is given to variable values defined in the config file. default: Y pipeline.userProfile file path Bash profile - may be ~/.bash_profile or ~/.bashrc or others default: null","title":"pipeline"},{"location":"GENERATED/General-Properties/#qiime","text":"Property Description qiime.alphaMetrics list alpha diversity metrics to calculate through qiime; For complete list of skbio.diversity.alpha options, see http://scikit-bio.org/docs/latest/generated/skbio.diversity.alpha.html default: shannon qiime.plotAlphaMetrics boolean default: Y","title":"qiime"},{"location":"GENERATED/General-Properties/#r","text":"Property Description r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.colorPoint string default color of scatterplot and strip-chart plot points default: black r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.excludeFields list Fields from the metadata that will be excluded from any auto-determined typing, or plotting; R reports must contain at least one valid nominal or numeric metadata field. default: null r.nominalFields list Override default property type by explicitly listing it as nominal. default: null r.numericFields list Override default property type by explicitly listing it as numeric. default: null r.pch integer Sets R plot pch parameter for PDF report default: 21 r.pvalCutoff numeric p-value cutoff used to assign label r.colorHighlight default: 0.05 r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.reportFields list Metadata fields to include in reports; Fields listed here must exist in the metadata file. R reports must contain at least one valid field. default: null r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to * timeout. If set to 0, an estimate is used. default: 0 r.useUniqueColors boolean force to use a unique color for every value in every field plotted; only recommended for low numbers of metadata columns/values. default: null","title":"r"},{"location":"GENERATED/General-Properties/#r_plotmds","text":"Property Description r_PlotMds.reportFields list Metadata column names indicating fields to include in the MDS report; Fields listed here must exist in the metadata file. default: null","title":"r_PlotMds"},{"location":"GENERATED/General-Properties/#report","text":"Property Description report.logBase string Options: 10,e,null. If e, use natural log (base e); if 10, use log base 10; if not set, counts will not be converted to a log scale. default: 10 report.minCount integer minimum table count allowed, if a count less that this value is found, it is set to 0. default: 2 report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.numReads boolean Options: Y/N. If Y, and add Num_Reads to metadata default: Y report.scarceCountCutoff numeric Minimum percentage of samples that must contain a count value for it to be kept. default: 0.25 report.scarceSampleCutoff numeric Minimum percentage of data columns that must be non-zero to keep the sample. default: 0.25 report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels default: phylum,class,order,family,genus report.unclassifiedTaxa boolean report unclassified taxa default: Y","title":"report"},{"location":"GENERATED/General-Properties/#script","text":"Property Description script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.fileRefreshDelay integer delay this many seconds after scripts complete to allow the file system to reflect changes from a worker node/container/virtual machine. default: 1 script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"script"},{"location":"GENERATED/General-Properties/#validation","text":"Property Description validation.compareOn list Which columns in the expectation file should be used for the comparison. Options: name, size, md5. Default: use all columns in the expectation file. default: null validation.disableValidation boolean Turn off validation. No validation file output is produced. Options: Y/N. default: N default: null validation.expectationFile file path file path that gives the expected values for file metrics (probably generated by a previous run of the same pipeline) default: null validation.reportOn list Which attributes of the file should be included in the validation report file. Options: name, size, md5 default: null validation.sizeWithinPercent numeric What percentage difference is permitted between an output file and its expectation. Options: any positive number default: null validation.stopPipeline boolean If enabled, the validation utlility will stop the pipeline if any module fails validation. Options: Y/N default: N","title":"validation"},{"location":"GENERATED/General-Properties/#aws","text":"Property Description aws.copyDbToS3 boolean If true, save all input files to S3 default: null aws.copyPipelineToS3 boolean If enabled save pipeline to S3 default: null aws.copyReportsToS3 boolean If enabled save reports to S3 default: null aws.ec2AcquisitionStrategy string The AWS acquisition strategy (SPOT or DEMAND) sets the service SLA for procuring new EC2 instances default: null aws.ec2InstanceID string ID of an existing ec2 instance to use as the head node default: null aws.ec2InstanceType string AWS instance type determines initial resource class (t2.micro is common) default: null aws.ec2SpotPer __ default: null aws.ec2TerminateHead boolean default: null aws.profile file path default: null aws.purgeEfsInputs boolean If enabled delete all EFS dirs (except pipelines) default: null aws.purgeEfsOutput boolean If enabled delete all EFS/pipelines default: null aws.ram string AWS memory set in Nextflow main.nf default: null aws.region string default: null aws.s3 string AWS S3 pipeline output directory used by Nextflow main.nf default: null aws.s3TransferTimeout integer Set the max number of minutes to allow for S3 transfers to complete. default: null aws.saveCloud boolean default: null aws.stack string An existing aws cloud stack ID default: null aws.walltime __ default: null","title":"aws"},{"location":"GENERATED/Input/","text":"Input # Specify the input data for the pipeline by providing the path to one or more directories using input.dirPaths . If using multiple paths, they should be separated by a comma. Property Description input.allowDuplicateNames boolean Should files with the same name be permitted in inputs. File names are used to link data to metadata, and duplicated names create ambiguity. However in some pipelines, duplicates are appropriate. default: N input.dirPaths list of file paths List of one or more directories containing the pipeline input data. default: null input.ignoreFiles list file names to ignore if found in input directories default: null input.requireCompletePairs boolean Require all sequence input files have matching paired reads default: Y input.suffixFw regex file suffix used to identify forward reads ininput.dirPaths default: _R1 input.suffixRv regex file suffix used to identify reverse reads ininput.dirPaths default: _R2 input.trimPrefix string Prefix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text before it will be removed. default: null input.trimSuffix string Suffix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text after it will be removed. default: null BioLockJ will assume that the sample name for a given file is the same as the file name after removing the file suffix. This is often not-quite-enough. Use input.trimPrefix and input.trimSuffix to indicate additional text to remove from the file name to get the sample name. If using paired-end sequences, use input.suffixFw and input.suffixRv to indicate the forward and reverse reads for a given sample; these will also be removed when deriving the sample name. Example # Sample IDs = mbs1, mbs2, mbs3, mbs4 Example File names + gut_mbs1.fq.gz + gut_mbs2.fq.gz + oral_mbs3.fq + oral_mbs4.fq Config Properties + input.trimPrefix =_ + input.trimSuffix =.fq All characters before (and including) the 1st \"_\" in the file name are trimmed All characters after (and including) the 1st \".fq\" in the file name are trimmed BioLockJ automatically trims extensions \".fasta\" and \".fastq\" as if configured in input.trimSuffix . Sometimes, there is no way to derive the sample name from the file name; or its simply inconvenient to. An alternative way to link files to sample names is to list the file names in the metadata in one or more columns (one file name per cell) and list the names of these columns in metadata.fileNameColumn ; see Metatdata . If you want process only a subset of the files in your input directories, then specifying the file names in the metadata is much more effecient than list all files to ignore in input.ignoreFiles . Note that BioLockJ determines some information based on the type of data in the input directories. This is very helpful in determining appropriate sequence pre-processing steps. However it can be problematic when using an unusual input type. To avoid this automatic determineation, manually set pipeline.inputTypes . Setting this to \"other\" will avoid all assumptions that BioLockJ might make based on the input types.","title":"input"},{"location":"GENERATED/Input/#input","text":"Specify the input data for the pipeline by providing the path to one or more directories using input.dirPaths . If using multiple paths, they should be separated by a comma. Property Description input.allowDuplicateNames boolean Should files with the same name be permitted in inputs. File names are used to link data to metadata, and duplicated names create ambiguity. However in some pipelines, duplicates are appropriate. default: N input.dirPaths list of file paths List of one or more directories containing the pipeline input data. default: null input.ignoreFiles list file names to ignore if found in input directories default: null input.requireCompletePairs boolean Require all sequence input files have matching paired reads default: Y input.suffixFw regex file suffix used to identify forward reads ininput.dirPaths default: _R1 input.suffixRv regex file suffix used to identify reverse reads ininput.dirPaths default: _R2 input.trimPrefix string Prefix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text before it will be removed. default: null input.trimSuffix string Suffix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text after it will be removed. default: null BioLockJ will assume that the sample name for a given file is the same as the file name after removing the file suffix. This is often not-quite-enough. Use input.trimPrefix and input.trimSuffix to indicate additional text to remove from the file name to get the sample name. If using paired-end sequences, use input.suffixFw and input.suffixRv to indicate the forward and reverse reads for a given sample; these will also be removed when deriving the sample name.","title":"Input"},{"location":"GENERATED/Input/#example","text":"Sample IDs = mbs1, mbs2, mbs3, mbs4 Example File names + gut_mbs1.fq.gz + gut_mbs2.fq.gz + oral_mbs3.fq + oral_mbs4.fq Config Properties + input.trimPrefix =_ + input.trimSuffix =.fq All characters before (and including) the 1st \"_\" in the file name are trimmed All characters after (and including) the 1st \".fq\" in the file name are trimmed BioLockJ automatically trims extensions \".fasta\" and \".fastq\" as if configured in input.trimSuffix . Sometimes, there is no way to derive the sample name from the file name; or its simply inconvenient to. An alternative way to link files to sample names is to list the file names in the metadata in one or more columns (one file name per cell) and list the names of these columns in metadata.fileNameColumn ; see Metatdata . If you want process only a subset of the files in your input directories, then specifying the file names in the metadata is much more effecient than list all files to ignore in input.ignoreFiles . Note that BioLockJ determines some information based on the type of data in the input directories. This is very helpful in determining appropriate sequence pre-processing steps. However it can be problematic when using an unusual input type. To avoid this automatic determineation, manually set pipeline.inputTypes . Setting this to \"other\" will avoid all assumptions that BioLockJ might make based on the input types.","title":"Example"},{"location":"GENERATED/Metadata/","text":"Metadata # Any information that is given on a per-sample basis is metadata. BioLockJ pipelines do not separate biological information from technical information. Specify the path to the metadata table using metadata.filePath . Property Description metadata.barcodeColumn string metadata column with identifying barcodes default: BarcodeSequence metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.commentChar string metadata file comment indicator; Empty string is a valid option indicating no comments in metadata file. default: null metadata.fileNameColumn list name of the metadata column(s) with input file names default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.nullValue string metadata cells with this value will be treated as empty default: NA metadata.required boolean If Y, require metadata row for each sample with sequence data in input dirs; If N, samples without metadata are ignored. default: N metadata.useEveryRow boolean If Y, require a sequence file for every SampleID (every row) in metadata file; If N, metadata can include extraneous SampleIDs. default: null The first row in the metadata file is assumed to be column names. The first column (regardless of its name) is assumed to be the sample names. If no metadata table is supplied to the pipeline, then the ImportMetaData module will look at the input samples and create an empty metadata file. This module is implicitly added to all pipelines. The properties metadata.required and metadata.useEveryRow control how BioLockJ handles a mis-match between the data and the metadata. If both are set to Y, then BioLockJ will throw an error if there is not a 1-to-1 matchup between sample names in the first column of the metadata and the file names in the input.dirPaths (ignoring any files specified by input.ignoreFiles ). If there are files that do not have any corresponding metadata, and this ok, use metadata.required=N . If there are rows in the metadata that do not have corresponding files, and this is ok, use metadata.useEveryRow=N . Setting both to Y is recommended because in most cases, we have a perfect 1-to-1 match up, and if BioLockJ thinks otherwise it because of an error in matching the up files with the samples, and its best to fail early and fix the problem. The metadata.fileNameColumn property allows you to explicity state which input file should match up to a given sample row. This can be a list of columns; for example if you have paired reads you would have a column for the forward reads, a column for the reverse reads, and both column names would be given with a comma separating them. Example: metadata.fileNameColumn = forwardReadFile, reverseReadFile , where \"forwardReadFile\" and \"reverseReadFile\" are both column names in the metadata file. Alternatively, you can specify input properties telling BioLockJ how to trim away the file name to get the sample name. Some modules look for specific information supplied in the metadata. Those modules often have a property to supply a column name referencing the metadata. The metadata can change through the execution of the pipeline. Some modules add information to the metadata, such as number of reads in each file, or number of reads classified. Some modules may filter samples, and removed samples are removed from the metadata as well. The original metadata file is never changed, and a copy of the original is stored in the pipeline folder. Each time the data is updated, a new file is saved in the current module folder.","title":"metadata"},{"location":"GENERATED/Metadata/#metadata","text":"Any information that is given on a per-sample basis is metadata. BioLockJ pipelines do not separate biological information from technical information. Specify the path to the metadata table using metadata.filePath . Property Description metadata.barcodeColumn string metadata column with identifying barcodes default: BarcodeSequence metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.commentChar string metadata file comment indicator; Empty string is a valid option indicating no comments in metadata file. default: null metadata.fileNameColumn list name of the metadata column(s) with input file names default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.nullValue string metadata cells with this value will be treated as empty default: NA metadata.required boolean If Y, require metadata row for each sample with sequence data in input dirs; If N, samples without metadata are ignored. default: N metadata.useEveryRow boolean If Y, require a sequence file for every SampleID (every row) in metadata file; If N, metadata can include extraneous SampleIDs. default: null The first row in the metadata file is assumed to be column names. The first column (regardless of its name) is assumed to be the sample names. If no metadata table is supplied to the pipeline, then the ImportMetaData module will look at the input samples and create an empty metadata file. This module is implicitly added to all pipelines. The properties metadata.required and metadata.useEveryRow control how BioLockJ handles a mis-match between the data and the metadata. If both are set to Y, then BioLockJ will throw an error if there is not a 1-to-1 matchup between sample names in the first column of the metadata and the file names in the input.dirPaths (ignoring any files specified by input.ignoreFiles ). If there are files that do not have any corresponding metadata, and this ok, use metadata.required=N . If there are rows in the metadata that do not have corresponding files, and this is ok, use metadata.useEveryRow=N . Setting both to Y is recommended because in most cases, we have a perfect 1-to-1 match up, and if BioLockJ thinks otherwise it because of an error in matching the up files with the samples, and its best to fail early and fix the problem. The metadata.fileNameColumn property allows you to explicity state which input file should match up to a given sample row. This can be a list of columns; for example if you have paired reads you would have a column for the forward reads, a column for the reverse reads, and both column names would be given with a comma separating them. Example: metadata.fileNameColumn = forwardReadFile, reverseReadFile , where \"forwardReadFile\" and \"reverseReadFile\" are both column names in the metadata file. Alternatively, you can specify input properties telling BioLockJ how to trim away the file name to get the sample name. Some modules look for specific information supplied in the metadata. Those modules often have a property to supply a column name referencing the metadata. The metadata can change through the execution of the pipeline. Some modules add information to the metadata, such as number of reads in each file, or number of reads classified. Some modules may filter samples, and removed samples are removed from the metadata as well. The original metadata file is never changed, and a copy of the original is stored in the pipeline folder. Each time the data is updated, a new file is saved in the current module folder.","title":"Metadata"},{"location":"GENERATED/R/","text":"r properties # These properties are directed to R modules. Property Description r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.colorPoint string default color of scatterplot and strip-chart plot points default: black r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.excludeFields list Fields from the metadata that will be excluded from any auto-determined typing, or plotting; R reports must contain at least one valid nominal or numeric metadata field. default: null r.nominalFields list Override default property type by explicitly listing it as nominal. default: null r.numericFields list Override default property type by explicitly listing it as numeric. default: null r.pch integer Sets R plot pch parameter for PDF report default: 21 r.pvalCutoff numeric p-value cutoff used to assign label r.colorHighlight default: 0.05 r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.reportFields list Metadata fields to include in reports; Fields listed here must exist in the metadata file. R reports must contain at least one valid field. default: null r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to * timeout. If set to 0, an estimate is used. default: 0 r.useUniqueColors boolean force to use a unique color for every value in every field plotted; only recommended for low numbers of metadata columns/values. default: null Several plotting options are available.","title":"r"},{"location":"GENERATED/R/#r-properties","text":"These properties are directed to R modules. Property Description r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.colorPoint string default color of scatterplot and strip-chart plot points default: black r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.excludeFields list Fields from the metadata that will be excluded from any auto-determined typing, or plotting; R reports must contain at least one valid nominal or numeric metadata field. default: null r.nominalFields list Override default property type by explicitly listing it as nominal. default: null r.numericFields list Override default property type by explicitly listing it as numeric. default: null r.pch integer Sets R plot pch parameter for PDF report default: 21 r.pvalCutoff numeric p-value cutoff used to assign label r.colorHighlight default: 0.05 r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.reportFields list Metadata fields to include in reports; Fields listed here must exist in the metadata file. R reports must contain at least one valid field. default: null r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to * timeout. If set to 0, an estimate is used. default: 0 r.useUniqueColors boolean force to use a unique color for every value in every field plotted; only recommended for low numbers of metadata columns/values. default: null Several plotting options are available.","title":"r properties"},{"location":"GENERATED/Script/","text":"script properties # Nearly all modules are \"script modules\". The module writes one or more scripts to divide the work load, and each script is run on an independent cluster node (if pipeline.env=cluster ), or on an independent aws node (if pipeline.env=aws ), or one at a time on the current machine (if pipeline.env=local ). Property Description script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.fileRefreshDelay integer delay this many seconds after scripts complete to allow the file system to reflect changes from a worker node/container/virtual machine. default: 1 script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null All script modules have a \"script\" subdirectory. There is one \"MAIN\" script for the module, which launches the worker scripts. Each worker script does the work for one batch ; a set of samples.","title":"script"},{"location":"GENERATED/Script/#script-properties","text":"Nearly all modules are \"script modules\". The module writes one or more scripts to divide the work load, and each script is run on an independent cluster node (if pipeline.env=cluster ), or on an independent aws node (if pipeline.env=aws ), or one at a time on the current machine (if pipeline.env=local ). Property Description script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.fileRefreshDelay integer delay this many seconds after scripts complete to allow the file system to reflect changes from a worker node/container/virtual machine. default: 1 script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null All script modules have a \"script\" subdirectory. There is one \"MAIN\" script for the module, which launches the worker scripts. Each worker script does the work for one batch ; a set of samples.","title":"script properties"},{"location":"GENERATED/Validation/","text":"Validation # Summary # Description: Validation checks whether the output files of a pipeline match the expectation . Property Description validation.compareOn list Which columns in the expectation file should be used for the comparison. Options: name, size, md5. Default: use all columns in the expectation file. default: null validation.disableValidation boolean Turn off validation. No validation file output is produced. Options: Y/N. default: N default: null validation.expectationFile file path file path that gives the expected values for file metrics (probably generated by a previous run of the same pipeline) default: null validation.reportOn list Which attributes of the file should be included in the validation report file. Options: name, size, md5 default: null validation.sizeWithinPercent numeric What percentage difference is permitted between an output file and its expectation. Options: any positive number default: null validation.stopPipeline boolean If enabled, the validation utlility will stop the pipeline if any module fails validation. Options: Y/N default: N The validation utility creates a table for the output of each module where it reports the file name, size and md5. These tables are saved in the validation folder; the validation folder generated by a pipeline can be used as the expectations when re-running the same pipeline. If there are no expectations, these values are reported in the validation folder. If there are expectations, these values are reported and compared against the expected values; the result of the comparison is reported as either PASS or FAIL for each file. If validation.stopPipeline=Y , the validation utility will halt the pipeline if any outputs FAIL to meet expectations, otherwise the result is reported and the pipeline moves forward. Soft Validation # Many components of a pipeline have the potential for tiny variation: maybe a date is stored in the output, or a reported confidence level is based on a random sampling. With these tiny variations, the file is practically the same, but it will FAIL md5 validation. The file might also be a few bytes bigger or smaller, so it will also FAIL size validation. \"Soft validation\" is the practice of allowing some wiggle room. If the config file gives validation.sizeWithinPercent=1 , then an output file will PASS size validation as long as it is within 1.0% of the expected file size. By default, this value is 0, and a file must be exactly the expected size to pass size validation. Expectations # Give the file path to the expectation file using validation.expectationFile=/path/to/saved/validation . This path can either point to a tab-delimited table giving the expectations for a single module, or it can point to a folder, in which case BioLockJ assumes that a file within this folder has a name that matches the module being validated. When validating an entire pipeline, the expectation file for all modules can be passed with a single file path. The validation folder created by a pipeline is designed to be used as this input. The expectation file format is: The expectation file is a tab-delimited table. The first row is column names. The first column (labeled \"name\") gives the file names. Optional column \"size\" gives the file size in bytes. Optional column \"md5\" gives the md5 string. Optional column \"MATCHED_EXPECTION\" is always ignored. The file should not have any other columns. Use cases # The expectation is usually based on a previous run of the same pipeline. Maybe some software has been updated and the results are not expected to change, but you have to re-do your analysis with the latest version to appease reviewers. Maybe you added a filtering step. Maybe you just want to change module 5, and you expect 1-4 to produce the same outputs they did last time. Maybe this analysis has been published and the the original researcher made their pipeline available to you; you want to re-run it and know if the output you generated by running the pipeline is the same as what they had. The expectation can be set by hand. This is recommended for validation using name only, or soft validation using size only. This is a way to prevent a pipeline from continuing after it is effectively doomed. For example: Maybe module 5 is a resource-intensive classifier, and modules 1-4 are processing and filtering steps ending with the SeqFileValidator. If modules 1-4 filter out too much, you might not want to move forward with module 5 until you've made adjustments to the earlier modules. You could create an expectation file for module 4, that just lists the names of the files and their pre-filtering file size (in bytes), and set validation.sizeWithinPercent=80 and SeqFileValidator.stopPipeline=Y . With this, the pipeline will stop if any of those files are not in the module 4 output or if any of them have been reduced by more than 80%. The output file names are predictable if you've ever seen output from that module before. Other notes # gzip is a common utility, frequently used with sequence data. It can incorporate metadata into the zipped file, a minor variation which can cause md5 checks to fail. To avoid these misleading failures, the validation utility will take the md5 of the decompressed form of the file for any file that ends in \".gz\". Thus, the md5 reported for a fastq file is the same regardless of whether it has been gzipped.","title":"Validation"},{"location":"GENERATED/Validation/#validation","text":"","title":"Validation"},{"location":"GENERATED/Validation/#summary","text":"Description: Validation checks whether the output files of a pipeline match the expectation . Property Description validation.compareOn list Which columns in the expectation file should be used for the comparison. Options: name, size, md5. Default: use all columns in the expectation file. default: null validation.disableValidation boolean Turn off validation. No validation file output is produced. Options: Y/N. default: N default: null validation.expectationFile file path file path that gives the expected values for file metrics (probably generated by a previous run of the same pipeline) default: null validation.reportOn list Which attributes of the file should be included in the validation report file. Options: name, size, md5 default: null validation.sizeWithinPercent numeric What percentage difference is permitted between an output file and its expectation. Options: any positive number default: null validation.stopPipeline boolean If enabled, the validation utlility will stop the pipeline if any module fails validation. Options: Y/N default: N The validation utility creates a table for the output of each module where it reports the file name, size and md5. These tables are saved in the validation folder; the validation folder generated by a pipeline can be used as the expectations when re-running the same pipeline. If there are no expectations, these values are reported in the validation folder. If there are expectations, these values are reported and compared against the expected values; the result of the comparison is reported as either PASS or FAIL for each file. If validation.stopPipeline=Y , the validation utility will halt the pipeline if any outputs FAIL to meet expectations, otherwise the result is reported and the pipeline moves forward.","title":"Summary"},{"location":"GENERATED/Validation/#soft-validation","text":"Many components of a pipeline have the potential for tiny variation: maybe a date is stored in the output, or a reported confidence level is based on a random sampling. With these tiny variations, the file is practically the same, but it will FAIL md5 validation. The file might also be a few bytes bigger or smaller, so it will also FAIL size validation. \"Soft validation\" is the practice of allowing some wiggle room. If the config file gives validation.sizeWithinPercent=1 , then an output file will PASS size validation as long as it is within 1.0% of the expected file size. By default, this value is 0, and a file must be exactly the expected size to pass size validation.","title":"Soft Validation"},{"location":"GENERATED/Validation/#expectations","text":"Give the file path to the expectation file using validation.expectationFile=/path/to/saved/validation . This path can either point to a tab-delimited table giving the expectations for a single module, or it can point to a folder, in which case BioLockJ assumes that a file within this folder has a name that matches the module being validated. When validating an entire pipeline, the expectation file for all modules can be passed with a single file path. The validation folder created by a pipeline is designed to be used as this input. The expectation file format is: The expectation file is a tab-delimited table. The first row is column names. The first column (labeled \"name\") gives the file names. Optional column \"size\" gives the file size in bytes. Optional column \"md5\" gives the md5 string. Optional column \"MATCHED_EXPECTION\" is always ignored. The file should not have any other columns.","title":"Expectations"},{"location":"GENERATED/Validation/#use-cases","text":"The expectation is usually based on a previous run of the same pipeline. Maybe some software has been updated and the results are not expected to change, but you have to re-do your analysis with the latest version to appease reviewers. Maybe you added a filtering step. Maybe you just want to change module 5, and you expect 1-4 to produce the same outputs they did last time. Maybe this analysis has been published and the the original researcher made their pipeline available to you; you want to re-run it and know if the output you generated by running the pipeline is the same as what they had. The expectation can be set by hand. This is recommended for validation using name only, or soft validation using size only. This is a way to prevent a pipeline from continuing after it is effectively doomed. For example: Maybe module 5 is a resource-intensive classifier, and modules 1-4 are processing and filtering steps ending with the SeqFileValidator. If modules 1-4 filter out too much, you might not want to move forward with module 5 until you've made adjustments to the earlier modules. You could create an expectation file for module 4, that just lists the names of the files and their pre-filtering file size (in bytes), and set validation.sizeWithinPercent=80 and SeqFileValidator.stopPipeline=Y . With this, the pipeline will stop if any of those files are not in the module 4 output or if any of them have been reduced by more than 80%. The output file names are predictable if you've ever seen output from that module before.","title":"Use cases"},{"location":"GENERATED/Validation/#other-notes","text":"gzip is a common utility, frequently used with sequence data. It can incorporate metadata into the zipped file, a minor variation which can cause md5 checks to fail. To avoid these misleading failures, the validation utility will take the md5 of the decompressed form of the file for any file that ends in \".gz\". Thus, the md5 reported for a fastq file is the same regardless of whether it has been gzipped.","title":"Other notes"},{"location":"GENERATED/all-modules/","text":"All Modules # This is an auto-generated list of all modules with links to auto-generated module documentation. AddMetadataToPathwayTables AddMetadataToTaxaTables - Map metadata onto taxa tables using sample ID. AddPseudoCount - Add a pseudocount (+1) to each value in each taxa table. AwkFastaConverter - Convert fastq files into fasta format. BuildQiimeMapping BuildTaxaTables - Convert OTU-tables split by sample into taxa tables split by level. CompileOtuCounts DESeq2 - Determine statistically significant differences using DESeq2. Demultiplexer - Demultiplex samples into separate files for each sample. EdgeR - Determine statistically significant differences using edgeR. Email - Send an email containing the pipeline summary when the pipeline either completes or fails. ForEachFile - Like GenMod, but done for each file in a previous module's output dir. ForEachLevel - Like GenMod, but done for each taxonomic level. ForEachLoop - Like GenMod, but done for each string in a comma-separated list. ForEachSample - Like GenMod, but done for each sample listed in the metadata. GenMod - Allows user to add their own scripts into the BioLockJ pipeline. GenomeAssembly - Assemble WGS sequences with MetaSPAdes, bin contigs with Metabat2 and check quality with checkM. Gunzipper - Decompress gzipped files. HUMAnN2 - Profile the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. Humann2Parser - Build OTU tables from HumanN2 classifier module output. ImportMetadata - Read existing metadata file, or create a default one. JsonReport KneadData - Run the Biobakery KneadData program to remove contaminated DNA. Kraken2Classifier - Classify WGS samples with KRAKEN 2 . Kraken2Parser - Build OTU tables from KRAKEN mpa-format reports. KrakenClassifier - Classify WGS samples with KRAKEN. KrakenParser - Build OTU tables from KRAKEN mpa-format reports. LogTransformTaxaTables - Log-transform the raw taxa counts on Log10 or Log-e scales. MergeQiimeOtuTables Metaphlan2Classifier - Classify WGS samples with MetaPhlAn2 . Metaphlan2Parser Multiplexer - Multiplex samples into a single file, or two files (one with forward reads, one with reverse reads) if multiplexing paired reads. NormalizeByReadsPerMillion - new counts = counts / (total counts in sample / 1 million) NormalizeTaxaTables - Normalize taxa tables for sequencing depth. PearMergeReads - Run pear, the Paired-End reAd mergeR QiimeClassifier QiimeClosedRefClassifier - Pick OTUs using a closed reference database and construct an OTU table via the QIIME script pick_closed_reference_otus.py QiimeDeNovoClassifier - Run the QIIME pick_de_novo_otus.py script on all fasta sequence files QiimeOpenRefClassifier - Run the QIIME pick_open_reference_otus.py script on all fasta sequence files QiimeParser R_CalculateStats - Generate a summary statistics table with [adjusted and unadjusted] [parameteric and non-parametirc] p-values and r 2 values for each reportable metadata field and each report.taxonomyLevel configured. R_PlotEffectSize - Generate horizontal barplot representing effect size (Cohen's d, r 2 , and/or fold change) for each reportable metadata field and each report.taxonomyLevel configured. R_PlotMds - Generate sets of multidimensional scaling plots showing 2 axes at a time (up to the < r_PlotMds.numAxis >th axis) with color coding based on each categorical metadata field (default) or by each field given in r_PlotMds.reportFields R_PlotOtus - Generate OTU-metadata box-plots and scatter-plots for each reportable metadata field and each report.taxonomyLevel configured R_PlotPvalHistograms - Generate p-value histograms for each reportable metadata field and each report.taxonomyLevel configured RarefyOtuCounts - Applies a mean iterative post-OTU classification rarefication algorithm so that each output sample will have approximately the same number of OTUs. RarefySeqs - Randomly sub-sample sequences to reduce all samples to the configured maximum. RdpClassifier - Classify 16s samples with RDP . RdpHierParser - Create taxa tables from the _hierarchicalCount.tsv files output by RDP. RdpParser - Build OTU tables from RDP reports. RegisterNumReads RemoveLowOtuCounts - Removes OTUs with counts below report.minCount. RemoveLowPathwayCounts RemoveScarceOtuCounts RemoveScarcePathwayCounts Rmarkdown - Render a custom R markdown. SeqFileValidator - This BioModule validates fasta/fastq file formats are valid and enforces min/max read lengths. ShannonDiversity - Calculate shannon diversity as sum p(logp) SraDownload - SraDownload downloads and compresses short read archive (SRA) files to fastq.gz SraMetaDB - Makes sure that the SRAmetadb exists, downloads if it does not already exist. SraMetaData - Extract metadata via pysradb from local copy of SRAmetadb.sqlite. SrpSrrConverter - Create an SraAccList.txt file from an SRA project identifier. Stop - Stop a pipeline. TrimPrimers - Remove primers from reads, option to discard reads unless primers are attached to both forward and reverse reads.","title":"Modules"},{"location":"GENERATED/all-modules/#all-modules","text":"This is an auto-generated list of all modules with links to auto-generated module documentation. AddMetadataToPathwayTables AddMetadataToTaxaTables - Map metadata onto taxa tables using sample ID. AddPseudoCount - Add a pseudocount (+1) to each value in each taxa table. AwkFastaConverter - Convert fastq files into fasta format. BuildQiimeMapping BuildTaxaTables - Convert OTU-tables split by sample into taxa tables split by level. CompileOtuCounts DESeq2 - Determine statistically significant differences using DESeq2. Demultiplexer - Demultiplex samples into separate files for each sample. EdgeR - Determine statistically significant differences using edgeR. Email - Send an email containing the pipeline summary when the pipeline either completes or fails. ForEachFile - Like GenMod, but done for each file in a previous module's output dir. ForEachLevel - Like GenMod, but done for each taxonomic level. ForEachLoop - Like GenMod, but done for each string in a comma-separated list. ForEachSample - Like GenMod, but done for each sample listed in the metadata. GenMod - Allows user to add their own scripts into the BioLockJ pipeline. GenomeAssembly - Assemble WGS sequences with MetaSPAdes, bin contigs with Metabat2 and check quality with checkM. Gunzipper - Decompress gzipped files. HUMAnN2 - Profile the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. Humann2Parser - Build OTU tables from HumanN2 classifier module output. ImportMetadata - Read existing metadata file, or create a default one. JsonReport KneadData - Run the Biobakery KneadData program to remove contaminated DNA. Kraken2Classifier - Classify WGS samples with KRAKEN 2 . Kraken2Parser - Build OTU tables from KRAKEN mpa-format reports. KrakenClassifier - Classify WGS samples with KRAKEN. KrakenParser - Build OTU tables from KRAKEN mpa-format reports. LogTransformTaxaTables - Log-transform the raw taxa counts on Log10 or Log-e scales. MergeQiimeOtuTables Metaphlan2Classifier - Classify WGS samples with MetaPhlAn2 . Metaphlan2Parser Multiplexer - Multiplex samples into a single file, or two files (one with forward reads, one with reverse reads) if multiplexing paired reads. NormalizeByReadsPerMillion - new counts = counts / (total counts in sample / 1 million) NormalizeTaxaTables - Normalize taxa tables for sequencing depth. PearMergeReads - Run pear, the Paired-End reAd mergeR QiimeClassifier QiimeClosedRefClassifier - Pick OTUs using a closed reference database and construct an OTU table via the QIIME script pick_closed_reference_otus.py QiimeDeNovoClassifier - Run the QIIME pick_de_novo_otus.py script on all fasta sequence files QiimeOpenRefClassifier - Run the QIIME pick_open_reference_otus.py script on all fasta sequence files QiimeParser R_CalculateStats - Generate a summary statistics table with [adjusted and unadjusted] [parameteric and non-parametirc] p-values and r 2 values for each reportable metadata field and each report.taxonomyLevel configured. R_PlotEffectSize - Generate horizontal barplot representing effect size (Cohen's d, r 2 , and/or fold change) for each reportable metadata field and each report.taxonomyLevel configured. R_PlotMds - Generate sets of multidimensional scaling plots showing 2 axes at a time (up to the < r_PlotMds.numAxis >th axis) with color coding based on each categorical metadata field (default) or by each field given in r_PlotMds.reportFields R_PlotOtus - Generate OTU-metadata box-plots and scatter-plots for each reportable metadata field and each report.taxonomyLevel configured R_PlotPvalHistograms - Generate p-value histograms for each reportable metadata field and each report.taxonomyLevel configured RarefyOtuCounts - Applies a mean iterative post-OTU classification rarefication algorithm so that each output sample will have approximately the same number of OTUs. RarefySeqs - Randomly sub-sample sequences to reduce all samples to the configured maximum. RdpClassifier - Classify 16s samples with RDP . RdpHierParser - Create taxa tables from the _hierarchicalCount.tsv files output by RDP. RdpParser - Build OTU tables from RDP reports. RegisterNumReads RemoveLowOtuCounts - Removes OTUs with counts below report.minCount. RemoveLowPathwayCounts RemoveScarceOtuCounts RemoveScarcePathwayCounts Rmarkdown - Render a custom R markdown. SeqFileValidator - This BioModule validates fasta/fastq file formats are valid and enforces min/max read lengths. ShannonDiversity - Calculate shannon diversity as sum p(logp) SraDownload - SraDownload downloads and compresses short read archive (SRA) files to fastq.gz SraMetaDB - Makes sure that the SRAmetadb exists, downloads if it does not already exist. SraMetaData - Extract metadata via pysradb from local copy of SRAmetadb.sqlite. SrpSrrConverter - Create an SraAccList.txt file from an SRA project identifier. Stop - Stop a pipeline. TrimPrimers - Remove primers from reads, option to discard reads unless primers are attached to both forward and reverse reads.","title":"All Modules"},{"location":"GENERATED/biolockj-help/","text":"The biolockj help menu: biolockj --help BioLockJ v1.3.16 - UNCC Fodor Lab 2021 Usage: (bash) biolockj [options] <config|pipeline> (java) java -jar /path/to/BioLockJ.jar [options] <config|pipeline> Options: -v --version Show version -h --help Show help menu -p --precheck-only Set up pipeline and check dependencies and then STOP; do not execute the pipeline. This is helpful when testing edits to config files. -u --unused-props Check dependencies for all modules and report unused properties. Implies -p. This helps remove unnecessary properties and highlights errors in property names. -r --restart Resume an existing pipeline -c --config-override <file> New config file (if restarting a pipeline) --password <password> Encrypt password -d --docker Run in docker -a --aws Run on aws -g --gui Start the BioLockJ GUI -f --foreground Run the java process in the foreground without nohup -w --wait-for-start Do not release terminal until pipeline completes check-dependencies step. --external-modules <dir> Directory with compiled java code giving additional modules --blj Map $BLJ folder into the docker container; this replaces BioLockJ packaged in a docker container with the local copy. --verbose Equivalent to adding `pipeline.logLevel=DEBUG` to the config file. -e --env-var <var=val> Environment variables to be passed to the BioLockJ environment. Can be a comma-sep list. Values take the form: a=foo,b=bar,c=baz --blj_proj <dir> Directory that contains BioLockJ pipelines. If not supplied, biolockj will use the value of environment variable \"BLJ_PROJ\".","title":"BioLockJ help menu"},{"location":"GENERATED/biolockj.module/Stop/","text":"Stop # Add to module run order: #BioModule biolockj.module.Stop Description # Stop a pipeline. Properties # Properties are the name=value pairs in the configuration file. Stop properties: # none General properties applicable to this module: # none Details # This module immediatley stops a pipeline. This is useful when troubleshooting a pipeline, or while a pipeline is a work-in-progress. Any downstream modules will be checked in the checkDependencies phase, but will not be reached during the module execution phase. This module and the current pipeline will be flagged as biolockjFailed . To progress a pipeline past this module, remove this module from the BioModule run order, and restart the pipeline. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: STOP.imageOwner STOP.imageName STOP.imageTag Citation # Module created by Ivory Blakley","title":"Stop"},{"location":"GENERATED/biolockj.module/Stop/#stop","text":"Add to module run order: #BioModule biolockj.module.Stop","title":"Stop"},{"location":"GENERATED/biolockj.module/Stop/#description","text":"Stop a pipeline.","title":"Description"},{"location":"GENERATED/biolockj.module/Stop/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module/Stop/#stop-properties","text":"none","title":"Stop properties:"},{"location":"GENERATED/biolockj.module/Stop/#general-properties-applicable-to-this-module","text":"none","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module/Stop/#details","text":"This module immediatley stops a pipeline. This is useful when troubleshooting a pipeline, or while a pipeline is a work-in-progress. Any downstream modules will be checked in the checkDependencies phase, but will not be reached during the module execution phase. This module and the current pipeline will be flagged as biolockjFailed . To progress a pipeline past this module, remove this module from the BioModule run order, and restart the pipeline.","title":"Details"},{"location":"GENERATED/biolockj.module/Stop/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module/Stop/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: STOP.imageOwner STOP.imageName STOP.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module/Stop/#citation","text":"Module created by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/","text":"GenomeAssembly # Add to module run order: #BioModule biolockj.module.assembly.GenomeAssembly Description # Assemble WGS sequences with MetaSPAdes, bin contigs with Metabat2 and check quality with checkM. Properties # Properties are the name=value pairs in the configuration file. GenomeAssembly properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/genome_assembly:v1.2.0 This can be modified using the following properties: GenomeAssembly.imageOwner GenomeAssembly.imageName GenomeAssembly.imageTag Citation # checkM database https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz HMMER http://eddylab.org/software/hmmer/hmmer.tar.gz MASH https://github.com/marbl/Mash/releases/download/v2.2/mash-Linux64-v2.2.tar Metabat2 https://bitbucket.org/berkeleylab/metabat/downloads/metabat-static-binary-linux-x64_v2.12.1.tar.gz MetaSPAdes https://github.com/ablab/spades/releases/download/v3.13.0/SPAdes-3.13.0-Linux.tar.gz PPLACER https://github.com/matsen/pplacer/releases/download/v1.1.alpha19/pplacer-linux-v1.1.alpha19.zip PRODIGAL https://github.com/hyattpd/Prodigal/releases/download/v2.6.3/prodigal.linux Module developed by Shan Sun BioLockJ v1.3.16","title":"GenomeAssembly"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#genomeassembly","text":"Add to module run order: #BioModule biolockj.module.assembly.GenomeAssembly","title":"GenomeAssembly"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#description","text":"Assemble WGS sequences with MetaSPAdes, bin contigs with Metabat2 and check quality with checkM.","title":"Description"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#genomeassembly-properties","text":"none","title":"GenomeAssembly properties:"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/genome_assembly:v1.2.0 This can be modified using the following properties: GenomeAssembly.imageOwner GenomeAssembly.imageName GenomeAssembly.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#citation","text":"checkM database https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz HMMER http://eddylab.org/software/hmmer/hmmer.tar.gz MASH https://github.com/marbl/Mash/releases/download/v2.2/mash-Linux64-v2.2.tar Metabat2 https://bitbucket.org/berkeleylab/metabat/downloads/metabat-static-binary-linux-x64_v2.12.1.tar.gz MetaSPAdes https://github.com/ablab/spades/releases/download/v3.13.0/SPAdes-3.13.0-Linux.tar.gz PPLACER https://github.com/matsen/pplacer/releases/download/v1.1.alpha19/pplacer-linux-v1.1.alpha19.zip PRODIGAL https://github.com/hyattpd/Prodigal/releases/download/v2.6.3/prodigal.linux Module developed by Shan Sun BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/","text":"QiimeClosedRefClassifier # Add to module run order: #BioModule biolockj.module.classifier.r16s.QiimeClosedRefClassifier Description # Pick OTUs using a closed reference database and construct an OTU table via the QIIME script pick_closed_reference_otus.py Properties # Properties are the name=value pairs in the configuration file. QiimeClosedRefClassifier properties: # Property Description exe.vsearch executable Path for the \"vsearch\" executable; if not supplied, any script that needs the vsearch command will assume it is on the PATH. default: null qiime.params list Parameters for qiime default: null qiime.pynastAlignDB file path path to define ~/.qiime_config pynast_template_alignment_fp default: null qiime.refSeqDB file path path to define ~/.qiime_config pick_otus_reference_seqs_fp and assign_taxonomy_reference_seqs_fp default: null qiime.removeChimeras boolean if vsearch is needed for chimera removal default: Y qiime.taxaDB file path path to define ~/.qiime_config assign_taxonomy_id_to_taxonomy_fp default: null qiime.vsearchParams list Parameters for vsearch default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.awk executable Path for the \"awk\" executable; if not supplied, any script that needs the awk command will assume it is on the PATH. default: null pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This module picks OTUs using a closed reference database and constructs an OTU table via the QIIME script pick_closed_reference_otus.py . Taxonomy is assigned using a pre-defined taxonomy map of reference sequence OTU to taxonomy. This is the fastest OTU picking method since samples can be processed in parallel batches. Before the QIIME script is run, batches are prepared in the temp directory, with each batch directory containing a fasta directory with script.batchSize fasta files and a QIIME mapping file, created with awk, called batchMapping.tsv for the batch of samples. Inherits from QiimeClassifier . Adds modules # pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.qiime.QiimeClassifier biolockj.module.implicit.parser.r16s.QiimeParser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeClosedRefClassifier.imageOwner QiimeClosedRefClassifier.imageName QiimeClosedRefClassifier.imageTag Citation # QIIME allows analysis of high-throughput community sequencing data J Gregory Caporaso, Justin Kuczynski, Jesse Stombaugh, Kyle Bittinger, Frederic D Bushman, Elizabeth K Costello, Noah Fierer, Antonio Gonzalez Pena, Julia K Goodrich, Jeffrey I Gordon, Gavin A Huttley, Scott T Kelley, Dan Knights, Jeremy E Koenig, Ruth E Ley, Catherine A Lozupone, Daniel McDonald, Brian D Muegge, Meg Pirrung, Jens Reeder, Joel R Sevinsky, Peter J Turnbaugh, William A Walters, Jeremy Widmann, Tanya Yatsunenko, Jesse Zaneveld and Rob Knight; Nature Methods, 2010; doi:10.1038/nmeth.f.303 (needs further citation) http://www.wernerlab.org/software/macqiime/citations","title":"QiimeClosedRefClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#qiimeclosedrefclassifier","text":"Add to module run order: #BioModule biolockj.module.classifier.r16s.QiimeClosedRefClassifier","title":"QiimeClosedRefClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#description","text":"Pick OTUs using a closed reference database and construct an OTU table via the QIIME script pick_closed_reference_otus.py","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#qiimeclosedrefclassifier-properties","text":"Property Description exe.vsearch executable Path for the \"vsearch\" executable; if not supplied, any script that needs the vsearch command will assume it is on the PATH. default: null qiime.params list Parameters for qiime default: null qiime.pynastAlignDB file path path to define ~/.qiime_config pynast_template_alignment_fp default: null qiime.refSeqDB file path path to define ~/.qiime_config pick_otus_reference_seqs_fp and assign_taxonomy_reference_seqs_fp default: null qiime.removeChimeras boolean if vsearch is needed for chimera removal default: Y qiime.taxaDB file path path to define ~/.qiime_config assign_taxonomy_id_to_taxonomy_fp default: null qiime.vsearchParams list Parameters for vsearch default: null","title":"QiimeClosedRefClassifier properties:"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.awk executable Path for the \"awk\" executable; if not supplied, any script that needs the awk command will assume it is on the PATH. default: null pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#details","text":"This module picks OTUs using a closed reference database and constructs an OTU table via the QIIME script pick_closed_reference_otus.py . Taxonomy is assigned using a pre-defined taxonomy map of reference sequence OTU to taxonomy. This is the fastest OTU picking method since samples can be processed in parallel batches. Before the QIIME script is run, batches are prepared in the temp directory, with each batch directory containing a fasta directory with script.batchSize fasta files and a QIIME mapping file, created with awk, called batchMapping.tsv for the batch of samples. Inherits from QiimeClassifier .","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.qiime.QiimeClassifier biolockj.module.implicit.parser.r16s.QiimeParser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeClosedRefClassifier.imageOwner QiimeClosedRefClassifier.imageName QiimeClosedRefClassifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#citation","text":"QIIME allows analysis of high-throughput community sequencing data J Gregory Caporaso, Justin Kuczynski, Jesse Stombaugh, Kyle Bittinger, Frederic D Bushman, Elizabeth K Costello, Noah Fierer, Antonio Gonzalez Pena, Julia K Goodrich, Jeffrey I Gordon, Gavin A Huttley, Scott T Kelley, Dan Knights, Jeremy E Koenig, Ruth E Ley, Catherine A Lozupone, Daniel McDonald, Brian D Muegge, Meg Pirrung, Jens Reeder, Joel R Sevinsky, Peter J Turnbaugh, William A Walters, Jeremy Widmann, Tanya Yatsunenko, Jesse Zaneveld and Rob Knight; Nature Methods, 2010; doi:10.1038/nmeth.f.303 (needs further citation) http://www.wernerlab.org/software/macqiime/citations","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/","text":"QiimeDeNovoClassifier # Add to module run order: #BioModule biolockj.module.classifier.r16s.QiimeDeNovoClassifier Description # Run the QIIME pick_de_novo_otus.py script on all fasta sequence files Properties # Properties are the name=value pairs in the configuration file. QiimeDeNovoClassifier properties: # Property Description exe.vsearch executable Path for the \"vsearch\" executable; if not supplied, any script that needs the vsearch command will assume it is on the PATH. default: null qiime.params list Parameters for qiime default: null qiime.pynastAlignDB file path path to define ~/.qiime_config pynast_template_alignment_fp default: null qiime.refSeqDB file path path to define ~/.qiime_config pick_otus_reference_seqs_fp and assign_taxonomy_reference_seqs_fp default: null qiime.removeChimeras boolean if vsearch is needed for chimera removal default: Y qiime.taxaDB file path path to define ~/.qiime_config assign_taxonomy_id_to_taxonomy_fp default: null qiime.vsearchParams list Parameters for vsearch default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This module runs the QIIME pick_de_novo_otus.py script on all fasta sequence files in a single script since OTUs are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier . Adds modules # pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.qiime.QiimeClassifier biolockj.module.implicit.parser.r16s.QiimeParser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeDeNovoClassifier.imageOwner QiimeDeNovoClassifier.imageName QiimeDeNovoClassifier.imageTag Citation # QIIME allows analysis of high-throughput community sequencing data J Gregory Caporaso, Justin Kuczynski, Jesse Stombaugh, Kyle Bittinger, Frederic D Bushman, Elizabeth K Costello, Noah Fierer, Antonio Gonzalez Pena, Julia K Goodrich, Jeffrey I Gordon, Gavin A Huttley, Scott T Kelley, Dan Knights, Jeremy E Koenig, Ruth E Ley, Catherine A Lozupone, Daniel McDonald, Brian D Muegge, Meg Pirrung, Jens Reeder, Joel R Sevinsky, Peter J Turnbaugh, William A Walters, Jeremy Widmann, Tanya Yatsunenko, Jesse Zaneveld and Rob Knight; Nature Methods, 2010; doi:10.1038/nmeth.f.303 (needs further citation) http://www.wernerlab.org/software/macqiime/citations","title":"QiimeDeNovoClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#qiimedenovoclassifier","text":"Add to module run order: #BioModule biolockj.module.classifier.r16s.QiimeDeNovoClassifier","title":"QiimeDeNovoClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#description","text":"Run the QIIME pick_de_novo_otus.py script on all fasta sequence files","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#qiimedenovoclassifier-properties","text":"Property Description exe.vsearch executable Path for the \"vsearch\" executable; if not supplied, any script that needs the vsearch command will assume it is on the PATH. default: null qiime.params list Parameters for qiime default: null qiime.pynastAlignDB file path path to define ~/.qiime_config pynast_template_alignment_fp default: null qiime.refSeqDB file path path to define ~/.qiime_config pick_otus_reference_seqs_fp and assign_taxonomy_reference_seqs_fp default: null qiime.removeChimeras boolean if vsearch is needed for chimera removal default: Y qiime.taxaDB file path path to define ~/.qiime_config assign_taxonomy_id_to_taxonomy_fp default: null qiime.vsearchParams list Parameters for vsearch default: null","title":"QiimeDeNovoClassifier properties:"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#details","text":"This module runs the QIIME pick_de_novo_otus.py script on all fasta sequence files in a single script since OTUs are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier .","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.qiime.QiimeClassifier biolockj.module.implicit.parser.r16s.QiimeParser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeDeNovoClassifier.imageOwner QiimeDeNovoClassifier.imageName QiimeDeNovoClassifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#citation","text":"QIIME allows analysis of high-throughput community sequencing data J Gregory Caporaso, Justin Kuczynski, Jesse Stombaugh, Kyle Bittinger, Frederic D Bushman, Elizabeth K Costello, Noah Fierer, Antonio Gonzalez Pena, Julia K Goodrich, Jeffrey I Gordon, Gavin A Huttley, Scott T Kelley, Dan Knights, Jeremy E Koenig, Ruth E Ley, Catherine A Lozupone, Daniel McDonald, Brian D Muegge, Meg Pirrung, Jens Reeder, Joel R Sevinsky, Peter J Turnbaugh, William A Walters, Jeremy Widmann, Tanya Yatsunenko, Jesse Zaneveld and Rob Knight; Nature Methods, 2010; doi:10.1038/nmeth.f.303 (needs further citation) http://www.wernerlab.org/software/macqiime/citations","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/","text":"QiimeOpenRefClassifier # Add to module run order: #BioModule biolockj.module.classifier.r16s.QiimeOpenRefClassifier Description # Run the QIIME pick_open_reference_otus.py script on all fasta sequence files Properties # Properties are the name=value pairs in the configuration file. QiimeOpenRefClassifier properties: # Property Description exe.vsearch executable Path for the \"vsearch\" executable; if not supplied, any script that needs the vsearch command will assume it is on the PATH. default: null qiime.params list Parameters for qiime default: null qiime.pynastAlignDB file path path to define ~/.qiime_config pynast_template_alignment_fp default: null qiime.refSeqDB file path path to define ~/.qiime_config pick_otus_reference_seqs_fp and assign_taxonomy_reference_seqs_fp default: null qiime.removeChimeras boolean if vsearch is needed for chimera removal default: Y qiime.taxaDB file path path to define ~/.qiime_config assign_taxonomy_id_to_taxonomy_fp default: null qiime.vsearchParams list Parameters for vsearch default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This module runs the QIIME pick_open_reference_otus.py script on all fasta sequence files in a single script since clusters not identified in the reference database are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier . Adds modules # pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.qiime.QiimeClassifier biolockj.module.implicit.parser.r16s.QiimeParser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeOpenRefClassifier.imageOwner QiimeOpenRefClassifier.imageName QiimeOpenRefClassifier.imageTag Citation # QIIME allows analysis of high-throughput community sequencing data J Gregory Caporaso, Justin Kuczynski, Jesse Stombaugh, Kyle Bittinger, Frederic D Bushman, Elizabeth K Costello, Noah Fierer, Antonio Gonzalez Pena, Julia K Goodrich, Jeffrey I Gordon, Gavin A Huttley, Scott T Kelley, Dan Knights, Jeremy E Koenig, Ruth E Ley, Catherine A Lozupone, Daniel McDonald, Brian D Muegge, Meg Pirrung, Jens Reeder, Joel R Sevinsky, Peter J Turnbaugh, William A Walters, Jeremy Widmann, Tanya Yatsunenko, Jesse Zaneveld and Rob Knight; Nature Methods, 2010; doi:10.1038/nmeth.f.303 (needs further citation) http://www.wernerlab.org/software/macqiime/citations","title":"QiimeOpenRefClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#qiimeopenrefclassifier","text":"Add to module run order: #BioModule biolockj.module.classifier.r16s.QiimeOpenRefClassifier","title":"QiimeOpenRefClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#description","text":"Run the QIIME pick_open_reference_otus.py script on all fasta sequence files","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#qiimeopenrefclassifier-properties","text":"Property Description exe.vsearch executable Path for the \"vsearch\" executable; if not supplied, any script that needs the vsearch command will assume it is on the PATH. default: null qiime.params list Parameters for qiime default: null qiime.pynastAlignDB file path path to define ~/.qiime_config pynast_template_alignment_fp default: null qiime.refSeqDB file path path to define ~/.qiime_config pick_otus_reference_seqs_fp and assign_taxonomy_reference_seqs_fp default: null qiime.removeChimeras boolean if vsearch is needed for chimera removal default: Y qiime.taxaDB file path path to define ~/.qiime_config assign_taxonomy_id_to_taxonomy_fp default: null qiime.vsearchParams list Parameters for vsearch default: null","title":"QiimeOpenRefClassifier properties:"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#details","text":"This module runs the QIIME pick_open_reference_otus.py script on all fasta sequence files in a single script since clusters not identified in the reference database are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier .","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.qiime.QiimeClassifier biolockj.module.implicit.parser.r16s.QiimeParser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeOpenRefClassifier.imageOwner QiimeOpenRefClassifier.imageName QiimeOpenRefClassifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#citation","text":"QIIME allows analysis of high-throughput community sequencing data J Gregory Caporaso, Justin Kuczynski, Jesse Stombaugh, Kyle Bittinger, Frederic D Bushman, Elizabeth K Costello, Noah Fierer, Antonio Gonzalez Pena, Julia K Goodrich, Jeffrey I Gordon, Gavin A Huttley, Scott T Kelley, Dan Knights, Jeremy E Koenig, Ruth E Ley, Catherine A Lozupone, Daniel McDonald, Brian D Muegge, Meg Pirrung, Jens Reeder, Joel R Sevinsky, Peter J Turnbaugh, William A Walters, Jeremy Widmann, Tanya Yatsunenko, Jesse Zaneveld and Rob Knight; Nature Methods, 2010; doi:10.1038/nmeth.f.303 (needs further citation) http://www.wernerlab.org/software/macqiime/citations","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/","text":"RdpClassifier # Add to module run order: #BioModule biolockj.module.classifier.r16s.RdpClassifier Description # Classify 16s samples with RDP . Properties # Properties are the name=value pairs in the configuration file. RdpClassifier properties: # Property Description rdp.containerJar string File path for RDP java executable JAR in docker. default: /app/classifier.jar rdp.db file path File path used to define an alternate RDP database default: null rdp.hierCounts boolean Generate TaxaTables using the RDP --hier_outfile option; uses the RdpHierParser instead of the standard RdpParser module. default: null rdp.jar file path File path for RDP java executable JAR default: null rdp.javaParams list the parameters to java when running rdp. default: null rdp.minThresholdScore numeric IFF rdp.hierCounts=Y, RdpClassifier will ignore OTU assignments below this threshold score (0-100) default: 80 rdp.params list parameters to use when running rdp. (must include \"-f fixrank\") default: -f fixrank General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.r16s.RdpParser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/rdp_classifier:v1.3.16 This can be modified using the following properties: RdpClassifier.imageOwner RdpClassifier.imageName RdpClassifier.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"RdpClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#rdpclassifier","text":"Add to module run order: #BioModule biolockj.module.classifier.r16s.RdpClassifier","title":"RdpClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#description","text":"Classify 16s samples with RDP .","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#rdpclassifier-properties","text":"Property Description rdp.containerJar string File path for RDP java executable JAR in docker. default: /app/classifier.jar rdp.db file path File path used to define an alternate RDP database default: null rdp.hierCounts boolean Generate TaxaTables using the RDP --hier_outfile option; uses the RdpHierParser instead of the standard RdpParser module. default: null rdp.jar file path File path for RDP java executable JAR default: null rdp.javaParams list the parameters to java when running rdp. default: null rdp.minThresholdScore numeric IFF rdp.hierCounts=Y, RdpClassifier will ignore OTU assignments below this threshold score (0-100) default: 80 rdp.params list parameters to use when running rdp. (must include \"-f fixrank\") default: -f fixrank","title":"RdpClassifier properties:"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#adds-modules","text":"pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.r16s.RdpParser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/rdp_classifier:v1.3.16 This can be modified using the following properties: RdpClassifier.imageOwner RdpClassifier.imageName RdpClassifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/","text":"HUMAnN2 # Add to module run order: #BioModule biolockj.module.classifier.wgs.Humann2Classifier Description # Profile the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. Properties # Properties are the name=value pairs in the configuration file. HUMAnN2 properties: # Property Description exe.humann2 executable Path for the \"humann2\" executable; if not supplied, any script that needs the humann2 command will assume it is on the PATH. default: null humann2.humann2JoinTableParams list The parameters to be used with humann2_join_tables default: null humann2.humann2Params list The humann2 executable params default: null humann2.humann2RenormTableParams list The parameters to use with humann2_renorm_table default: null humann2.nuclDB file path Directory containing the nucleotide database default: null humann2.protDB file path Directory containing the protein nucleotide database default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.Humann2Parser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/humann2_classifier:v1.3.16 This can be modified using the following properties: Humann2Classifier.imageOwner Humann2Classifier.imageName Humann2Classifier.imageTag Citation # Franzosa EA , McIver LJ , Rahnavard G, Thompson LR, Schirmer M, Weingart G, Schwarzberg Lipson K, Knight R, Caporaso JG, Segata N, Huttenhower C. Species-level functional profiling of metagenomes and metatranscriptomes. Nat Methods 15: 962-968 (2018). http://huttenhower.sph.harvard.edu/humann2 BioLockJ module developed by Mike Siota","title":"HUMAnN2"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#humann2","text":"Add to module run order: #BioModule biolockj.module.classifier.wgs.Humann2Classifier","title":"HUMAnN2"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#description","text":"Profile the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data.","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#humann2-properties","text":"Property Description exe.humann2 executable Path for the \"humann2\" executable; if not supplied, any script that needs the humann2 command will assume it is on the PATH. default: null humann2.humann2JoinTableParams list The parameters to be used with humann2_join_tables default: null humann2.humann2Params list The humann2 executable params default: null humann2.humann2RenormTableParams list The parameters to use with humann2_renorm_table default: null humann2.nuclDB file path Directory containing the nucleotide database default: null humann2.protDB file path Directory containing the protein nucleotide database default: null","title":"HUMAnN2 properties:"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#adds-modules","text":"pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.Humann2Parser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/humann2_classifier:v1.3.16 This can be modified using the following properties: Humann2Classifier.imageOwner Humann2Classifier.imageName Humann2Classifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#citation","text":"Franzosa EA , McIver LJ , Rahnavard G, Thompson LR, Schirmer M, Weingart G, Schwarzberg Lipson K, Knight R, Caporaso JG, Segata N, Huttenhower C. Species-level functional profiling of metagenomes and metatranscriptomes. Nat Methods 15: 962-968 (2018). http://huttenhower.sph.harvard.edu/humann2 BioLockJ module developed by Mike Siota","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/","text":"Kraken2Classifier # Add to module run order: #BioModule biolockj.module.classifier.wgs.Kraken2Classifier Description # Classify WGS samples with KRAKEN 2 . Properties # Properties are the name=value pairs in the configuration file. Kraken2Classifier properties: # Property Description exe.kraken2 executable Path for the \"kraken2\" executable; if not supplied, any script that needs the kraken2 command will assume it is on the PATH. default: null kraken2.db file path file path to Kraken2 kmer database directory default: null kraken2.kraken2Params list additional parameters to use with kraken2 default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.Kraken2Parser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/kraken2_classifier:v1.3.16 This can be modified using the following properties: Kraken2Classifier.imageOwner Kraken2Classifier.imageName Kraken2Classifier.imageTag Citation # Improved metagenomic analysis with Kraken 2 Derrick E. Wood, Jennifer Lu, Ben Langmead bioRxiv 762302; doi: https://doi.org/10.1101/762302","title":"Kraken2Classifier"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#kraken2classifier","text":"Add to module run order: #BioModule biolockj.module.classifier.wgs.Kraken2Classifier","title":"Kraken2Classifier"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#description","text":"Classify WGS samples with KRAKEN 2 .","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#kraken2classifier-properties","text":"Property Description exe.kraken2 executable Path for the \"kraken2\" executable; if not supplied, any script that needs the kraken2 command will assume it is on the PATH. default: null kraken2.db file path file path to Kraken2 kmer database directory default: null kraken2.kraken2Params list additional parameters to use with kraken2 default: null","title":"Kraken2Classifier properties:"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#adds-modules","text":"pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.Kraken2Parser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/kraken2_classifier:v1.3.16 This can be modified using the following properties: Kraken2Classifier.imageOwner Kraken2Classifier.imageName Kraken2Classifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#citation","text":"Improved metagenomic analysis with Kraken 2 Derrick E. Wood, Jennifer Lu, Ben Langmead bioRxiv 762302; doi: https://doi.org/10.1101/762302","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/","text":"KrakenClassifier # Add to module run order: #BioModule biolockj.module.classifier.wgs.KrakenClassifier Description # Classify WGS samples with KRAKEN. Properties # Properties are the name=value pairs in the configuration file. KrakenClassifier properties: # Property Description exe.kraken executable Path for the \"kraken\" executable; if not supplied, any script that needs the kraken command will assume it is on the PATH. default: null kraken.db file path file path to Kraken kmer database directory default: null kraken.krakenParams list additional parameters to use with kraken default: --only-classified-output, --preload General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # Classify WGS samples with KRAKEN . If running in docker, the default docker container contains a kmer database which will be used if no database is supplied through the kraken.db property. Adds modules # pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.KrakenParser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/kraken_classifier:v1.3.16 This can be modified using the following properties: KrakenClassifier.imageOwner KrakenClassifier.imageName KrakenClassifier.imageTag Citation # Wood DE, Salzberg SL: Kraken: ultrafast metagenomic sequence classification using exact alignments. Genome Biology 2014, 15:R46.","title":"KrakenClassifier"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#krakenclassifier","text":"Add to module run order: #BioModule biolockj.module.classifier.wgs.KrakenClassifier","title":"KrakenClassifier"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#description","text":"Classify WGS samples with KRAKEN.","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#krakenclassifier-properties","text":"Property Description exe.kraken executable Path for the \"kraken\" executable; if not supplied, any script that needs the kraken command will assume it is on the PATH. default: null kraken.db file path file path to Kraken kmer database directory default: null kraken.krakenParams list additional parameters to use with kraken default: --only-classified-output, --preload","title":"KrakenClassifier properties:"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#details","text":"Classify WGS samples with KRAKEN . If running in docker, the default docker container contains a kmer database which will be used if no database is supplied through the kraken.db property.","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#adds-modules","text":"pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.KrakenParser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/kraken_classifier:v1.3.16 This can be modified using the following properties: KrakenClassifier.imageOwner KrakenClassifier.imageName KrakenClassifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#citation","text":"Wood DE, Salzberg SL: Kraken: ultrafast metagenomic sequence classification using exact alignments. Genome Biology 2014, 15:R46.","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/","text":"Metaphlan2Classifier # Add to module run order: #BioModule biolockj.module.classifier.wgs.Metaphlan2Classifier Description # Classify WGS samples with MetaPhlAn2 . Properties # Properties are the name=value pairs in the configuration file. Metaphlan2Classifier properties: # Property Description exe.metaphlan2 executable Path for the \"metaphlan2\" executable; if not supplied, any script that needs the metaphlan2 command will assume it is on the PATH. default: null metaphlan2.db file path Directory containing alternate database. Must always be paired with metaphlan2.mpa_pkl default: null metaphlan2.metaphlan2Params list additional parameters to use with metaphlan2 default: null metaphlan2.mpa_pkl file path path to the mpa_pkl file used to reference an alternate DB. Must always be paired with metaphlan2.db default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.Metaphlan2Parser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/metaphlan2_classifier:v1.3.16 This can be modified using the following properties: Metaphlan2Classifier.imageOwner Metaphlan2Classifier.imageName Metaphlan2Classifier.imageTag Citation # MetaPhlAn2 for enhanced metagenomic taxonomic profiling. Duy Tin Truong, Eric A Franzosa, Timothy L Tickle, Matthias Scholz, George Weingart, Edoardo Pasolli, Adrian Tett, Curtis Huttenhower & Nicola Segata. Nature Methods 12, 902-903 (2015)","title":"Metaphlan2Classifier"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#metaphlan2classifier","text":"Add to module run order: #BioModule biolockj.module.classifier.wgs.Metaphlan2Classifier","title":"Metaphlan2Classifier"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#description","text":"Classify WGS samples with MetaPhlAn2 .","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#metaphlan2classifier-properties","text":"Property Description exe.metaphlan2 executable Path for the \"metaphlan2\" executable; if not supplied, any script that needs the metaphlan2 command will assume it is on the PATH. default: null metaphlan2.db file path Directory containing alternate database. Must always be paired with metaphlan2.mpa_pkl default: null metaphlan2.metaphlan2Params list additional parameters to use with metaphlan2 default: null metaphlan2.mpa_pkl file path path to the mpa_pkl file used to reference an alternate DB. Must always be paired with metaphlan2.db default: null","title":"Metaphlan2Classifier properties:"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#adds-modules","text":"pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.Metaphlan2Parser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/metaphlan2_classifier:v1.3.16 This can be modified using the following properties: Metaphlan2Classifier.imageOwner Metaphlan2Classifier.imageName Metaphlan2Classifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#citation","text":"MetaPhlAn2 for enhanced metagenomic taxonomic profiling. Duy Tin Truong, Eric A Franzosa, Timothy L Tickle, Matthias Scholz, George Weingart, Edoardo Pasolli, Adrian Tett, Curtis Huttenhower & Nicola Segata. Nature Methods 12, 902-903 (2015)","title":"Citation"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/","text":"ShannonDiversity # Add to module run order: #BioModule biolockj.module.diversity.ShannonDiversity Description # Calculate shannon diversity as sum p(logp) Properties # Properties are the name=value pairs in the configuration file. ShannonDiversity properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: ShannonDiversity.imageOwner ShannonDiversity.imageName ShannonDiversity.imageTag Citation # Module developed by Anthony Fodor BioLockJ v1.3.16","title":"ShannonDiversity"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#shannondiversity","text":"Add to module run order: #BioModule biolockj.module.diversity.ShannonDiversity","title":"ShannonDiversity"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#description","text":"Calculate shannon diversity as sum p(logp)","title":"Description"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#shannondiversity-properties","text":"none","title":"ShannonDiversity properties:"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#details","text":"If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite.","title":"Details"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: ShannonDiversity.imageOwner ShannonDiversity.imageName ShannonDiversity.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#citation","text":"Module developed by Anthony Fodor BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/","text":"ForEachFile # Add to module run order: #BioModule biolockj.module.diy.ForEachFile Description # Like GenMod, but done for each file in a previous module's output dir. Properties # Properties are the name=value pairs in the configuration file. ForEachFile properties: # Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This is an extention of the GenMod module. The given script is run for each file in the previous modules output dir. If there is no previous module, then the input files are used.The user script is run using a command: [launcher] <script> <file path> [params] Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachFile.imageOwner ForEachFile.imageName ForEachFile.imageTag Citation # BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"ForEachFile"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#foreachfile","text":"Add to module run order: #BioModule biolockj.module.diy.ForEachFile","title":"ForEachFile"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#description","text":"Like GenMod, but done for each file in a previous module's output dir.","title":"Description"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#foreachfile-properties","text":"Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null","title":"ForEachFile properties:"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#details","text":"This is an extention of the GenMod module. The given script is run for each file in the previous modules output dir. If there is no previous module, then the input files are used.The user script is run using a command: [launcher] <script> <file path> [params]","title":"Details"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachFile.imageOwner ForEachFile.imageName ForEachFile.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#citation","text":"BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/","text":"ForEachLevel # Add to module run order: #BioModule biolockj.module.diy.ForEachLevel Description # Like GenMod, but done for each taxonomic level. Properties # Properties are the name=value pairs in the configuration file. ForEachLevel properties: # Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels -> Used as the looping mechanism for this module. default: phylum,class,order,family,genus General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels -> Used as the looping mechanism for this module. default: phylum,class,order,family,genus script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This is an extention of the GenMod module. This module runs the specified script for each of the configured taxonomic levels, see report.taxonomyLevels under (General Properties)[GENERATED/General-Properties/#report].The user script is run using a command: [launcher] <script> <level> [params] Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachLevel.imageOwner ForEachLevel.imageName ForEachLevel.imageTag Citation # BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"ForEachLevel"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#foreachlevel","text":"Add to module run order: #BioModule biolockj.module.diy.ForEachLevel","title":"ForEachLevel"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#description","text":"Like GenMod, but done for each taxonomic level.","title":"Description"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#foreachlevel-properties","text":"Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels -> Used as the looping mechanism for this module. default: phylum,class,order,family,genus","title":"ForEachLevel properties:"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels -> Used as the looping mechanism for this module. default: phylum,class,order,family,genus script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#details","text":"This is an extention of the GenMod module. This module runs the specified script for each of the configured taxonomic levels, see report.taxonomyLevels under (General Properties)[GENERATED/General-Properties/#report].The user script is run using a command: [launcher] <script> <level> [params]","title":"Details"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachLevel.imageOwner ForEachLevel.imageName ForEachLevel.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#citation","text":"BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/","text":"ForEachLoop # Add to module run order: #BioModule biolockj.module.diy.ForEachLoop Description # Like GenMod, but done for each string in a comma-separated list. Properties # Properties are the name=value pairs in the configuration file. ForEachLoop properties: # Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.loopBy list List used as the looping mechanism for this module. default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This is an extention of the GenMod module. The given script is run for each element given in the comma-separated list genMod.loopBy .The user script is run using a command: [launcher] <script> <loop-element> [param] Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachLoop.imageOwner ForEachLoop.imageName ForEachLoop.imageTag Citation # BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"ForEachLoop"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#foreachloop","text":"Add to module run order: #BioModule biolockj.module.diy.ForEachLoop","title":"ForEachLoop"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#description","text":"Like GenMod, but done for each string in a comma-separated list.","title":"Description"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#foreachloop-properties","text":"Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.loopBy list List used as the looping mechanism for this module. default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null","title":"ForEachLoop properties:"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#details","text":"This is an extention of the GenMod module. The given script is run for each element given in the comma-separated list genMod.loopBy .The user script is run using a command: [launcher] <script> <loop-element> [param]","title":"Details"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachLoop.imageOwner ForEachLoop.imageName ForEachLoop.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#citation","text":"BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/","text":"ForEachSample # Add to module run order: #BioModule biolockj.module.diy.ForEachSample Description # Like GenMod, but done for each sample listed in the metadata. Properties # Properties are the name=value pairs in the configuration file. ForEachSample properties: # Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> The row names of the metadata are used as the looping mechanism for this module. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> The row names of the metadata are used as the looping mechanism for this module. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This is an extention of the GenMod module. For the purpose of this module, a sample is defined as a row of the metadata file.The user script is run using a command: [launcher] <script> <sample> [params] Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachSample.imageOwner ForEachSample.imageName ForEachSample.imageTag Citation # BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"ForEachSample"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#foreachsample","text":"Add to module run order: #BioModule biolockj.module.diy.ForEachSample","title":"ForEachSample"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#description","text":"Like GenMod, but done for each sample listed in the metadata.","title":"Description"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#foreachsample-properties","text":"Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> The row names of the metadata are used as the looping mechanism for this module. default: null","title":"ForEachSample properties:"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> The row names of the metadata are used as the looping mechanism for this module. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#details","text":"This is an extention of the GenMod module. For the purpose of this module, a sample is defined as a row of the metadata file.The user script is run using a command: [launcher] <script> <sample> [params]","title":"Details"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachSample.imageOwner ForEachSample.imageName ForEachSample.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#citation","text":"BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.diy/GenMod/","text":"GenMod # Add to module run order: #BioModule biolockj.module.diy.GenMod Description # Allows user to add their own scripts into the BioLockJ pipeline. Properties # Properties are the name=value pairs in the configuration file. GenMod properties: # Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # The specified script is executed using the modules script directory as the current working directory. A scriptPath is required. If specified, the launcher program (ie R, Python) will be used. If specified, any param will be listed as arguments to the script. If running in docker, dockerContainerName is required. This is ideal for: Custom analysis for a given pipeline, such as an R or python script Any steps where an appropriate BioLockJ module does not exist Any step in your analysis process that might otherwise have to be done manually can be stored as a custom script so that the entire process is as reproducible as possible. It is STRONGLY encouraged that users write scripts using common module conventions: use relative file paths (starting with . or .. ) put all generated output in the modules output directory ( ../output ) put any temporary files in the modules temp directory ( ../tmep ). the main pipeline directory would be ../.. , and the output of a previous module such as PearMergedReads would be in ../../*_PearMergedReads/output To use the GenMod module multiple times in a single pipeline, use the AS keyword to direct properties to the correct instance of the module. For example: #BioModule biolockj.module.diy.GenMod AS Part1 #<other modules> #BioModule biolockj.module.diy.GenMod AS Part2 Part1.launcher=python Part1.script=path/to/first/script.py Part2.script=path/to/bash/script/doLast.sh With this, script.py will be run using python. Then other modules will run. Then doLast.sh will be run using the default system (probably bash, unless it has a shebang line specifiying something else). Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: GenMod.imageOwner GenMod.imageName GenMod.imageTag Citation # BioLockJ v1.3.16","title":"GenMod"},{"location":"GENERATED/biolockj.module.diy/GenMod/#genmod","text":"Add to module run order: #BioModule biolockj.module.diy.GenMod","title":"GenMod"},{"location":"GENERATED/biolockj.module.diy/GenMod/#description","text":"Allows user to add their own scripts into the BioLockJ pipeline.","title":"Description"},{"location":"GENERATED/biolockj.module.diy/GenMod/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.diy/GenMod/#genmod-properties","text":"Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null","title":"GenMod properties:"},{"location":"GENERATED/biolockj.module.diy/GenMod/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.diy/GenMod/#details","text":"The specified script is executed using the modules script directory as the current working directory. A scriptPath is required. If specified, the launcher program (ie R, Python) will be used. If specified, any param will be listed as arguments to the script. If running in docker, dockerContainerName is required. This is ideal for: Custom analysis for a given pipeline, such as an R or python script Any steps where an appropriate BioLockJ module does not exist Any step in your analysis process that might otherwise have to be done manually can be stored as a custom script so that the entire process is as reproducible as possible. It is STRONGLY encouraged that users write scripts using common module conventions: use relative file paths (starting with . or .. ) put all generated output in the modules output directory ( ../output ) put any temporary files in the modules temp directory ( ../tmep ). the main pipeline directory would be ../.. , and the output of a previous module such as PearMergedReads would be in ../../*_PearMergedReads/output To use the GenMod module multiple times in a single pipeline, use the AS keyword to direct properties to the correct instance of the module. For example: #BioModule biolockj.module.diy.GenMod AS Part1 #<other modules> #BioModule biolockj.module.diy.GenMod AS Part2 Part1.launcher=python Part1.script=path/to/first/script.py Part2.script=path/to/bash/script/doLast.sh With this, script.py will be run using python. Then other modules will run. Then doLast.sh will be run using the default system (probably bash, unless it has a shebang line specifiying something else).","title":"Details"},{"location":"GENERATED/biolockj.module.diy/GenMod/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.diy/GenMod/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: GenMod.imageOwner GenMod.imageName GenMod.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.diy/GenMod/#citation","text":"BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/","text":"Rmarkdown # Add to module run order: #BioModule biolockj.module.diy.Rmarkdown Description # Render a custom R markdown. Properties # Properties are the name=value pairs in the configuration file. Rmarkdown properties: # Property Description rmarkdown.resources list of file paths path to one or more files to be copied to the module resource folder. default: null rmarkdown.rmarkdown file path path to an R markdown file (.Rmd) to be rendered. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: rocker/r-rmd:latest This can be modified using the following properties: Rmarkdown.imageOwner Rmarkdown.imageName Rmarkdown.imageTag Citation # Module created by Ivory Blakley BioLockJ v1.3.16","title":"Rmarkdown"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#rmarkdown","text":"Add to module run order: #BioModule biolockj.module.diy.Rmarkdown","title":"Rmarkdown"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#description","text":"Render a custom R markdown.","title":"Description"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#rmarkdown-properties","text":"Property Description rmarkdown.resources list of file paths path to one or more files to be copied to the module resource folder. default: null rmarkdown.rmarkdown file path path to an R markdown file (.Rmd) to be rendered. default: null","title":"Rmarkdown properties:"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#docker","text":"If running in docker, this module will run in a docker container from this image: rocker/r-rmd:latest This can be modified using the following properties: Rmarkdown.imageOwner Rmarkdown.imageName Rmarkdown.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#citation","text":"Module created by Ivory Blakley BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/","text":"SraDownload # Add to module run order: #BioModule biolockj.module.getData.sra.SraDownload Description # SraDownload downloads and compresses short read archive (SRA) files to fastq.gz Properties # Properties are the name=value pairs in the configuration file. SraDownload properties: # Property Description exe.fasterq-dump executable Path for the \"fasterq-dump\" executable; if not supplied, any script that needs the fasterq-dump command will assume it is on the PATH. default: null sra.accessionIdColumn string Specifies the metadata file column name containing SRA run ids default: null sra.destinationDir file path Path to directory where downloaded files should be saved. If specified, it must exist. default: null sra.sraAccList file path A file that has one SRA accession per line and nothing else. default: null sra.sraProjectId list The project id(s) referencesing a project in the NCBI SRA. example: SRP009633, ERP016051 default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # Downloading and compressing files requires fasterq-dump and gzip.The accessions to download can be specified using any ONE of the following: 1. A metadata file (given by metadata.filePath that has column sra.accessionIdColumn . 2. sra.sraProjectId , OR 3. sra.sraAccList sra.destinationDir gives an external directory that can be shared across pipelines. This is recommended. If it is not specified, the files will be downlaoded to this modules output directory. Suggested: input.dirPaths = ${sra.destinationDir} Typically, BioLockJ will automatically determine modules to add to the pipeline to process sequence data. If the files are not present on the system when the pipeline starts, then it is up to the user to configure any and all sequence processing modules. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/sratoolkit:v1.3.16 This can be modified using the following properties: SraDownload.imageOwner SraDownload.imageName SraDownload.imageTag Citation # sra-tools Module developed by Philip Badzuh BioLockJ v1.3.16","title":"SraDownload"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#sradownload","text":"Add to module run order: #BioModule biolockj.module.getData.sra.SraDownload","title":"SraDownload"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#description","text":"SraDownload downloads and compresses short read archive (SRA) files to fastq.gz","title":"Description"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#sradownload-properties","text":"Property Description exe.fasterq-dump executable Path for the \"fasterq-dump\" executable; if not supplied, any script that needs the fasterq-dump command will assume it is on the PATH. default: null sra.accessionIdColumn string Specifies the metadata file column name containing SRA run ids default: null sra.destinationDir file path Path to directory where downloaded files should be saved. If specified, it must exist. default: null sra.sraAccList file path A file that has one SRA accession per line and nothing else. default: null sra.sraProjectId list The project id(s) referencesing a project in the NCBI SRA. example: SRP009633, ERP016051 default: null","title":"SraDownload properties:"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#details","text":"Downloading and compressing files requires fasterq-dump and gzip.The accessions to download can be specified using any ONE of the following: 1. A metadata file (given by metadata.filePath that has column sra.accessionIdColumn . 2. sra.sraProjectId , OR 3. sra.sraAccList sra.destinationDir gives an external directory that can be shared across pipelines. This is recommended. If it is not specified, the files will be downlaoded to this modules output directory. Suggested: input.dirPaths = ${sra.destinationDir} Typically, BioLockJ will automatically determine modules to add to the pipeline to process sequence data. If the files are not present on the system when the pipeline starts, then it is up to the user to configure any and all sequence processing modules.","title":"Details"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/sratoolkit:v1.3.16 This can be modified using the following properties: SraDownload.imageOwner SraDownload.imageName SraDownload.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#citation","text":"sra-tools Module developed by Philip Badzuh BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/","text":"SraMetaDB # Add to module run order: #BioModule biolockj.module.getData.sra.SraMetaDB Description # Makes sure that the SRAmetadb exists, downloads if it does not already exist. Properties # Properties are the name=value pairs in the configuration file. SraMetaDB properties: # Property Description exe.gunzip executable Path for the \"gunzip\" executable; if not supplied, any script that needs the gunzip command will assume it is on the PATH. default: null exe.wget executable Path for the \"wget\" executable; if not supplied, any script that needs the wget command will assume it is on the PATH. default: null sra.forceDbUpdate boolean Y/N: download a newer verionsion if available. default: N sra.metaDataDir file path path to the directory where the SRAmetadb.sqlite database is stored. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # If sra.forceDbUpdate is set to Y, then the zipped form of the database is downloaded, and kept and used to compare the local version to the server version; and the server version is downloaded if it is newer. Server version location: https://starbuck1.s3.amazonaws.com/sradb/SRAmetadb.sqlite.gz sra.metaDataDir directory must exist. If the database does not exist at that location, it will be downloaded. The download process is somewhat error-prone, especially in docker. The download is about 4GB and the unzipped database is up to 30GB.It is generally recommended to download and unzip the database manually: wget https://starbuck1.s3.amazonaws.com/sradb/SRAmetadb.sqlite.gz; gunzip SRAmetadb.sqlite Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: SraMetaDB.imageOwner SraMetaDB.imageName SraMetaDB.imageTag Citation # Module developed by Malcolm Zapatas and Ivory Blakley BioLockJ v1.3.16","title":"SraMetaDB"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#srametadb","text":"Add to module run order: #BioModule biolockj.module.getData.sra.SraMetaDB","title":"SraMetaDB"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#description","text":"Makes sure that the SRAmetadb exists, downloads if it does not already exist.","title":"Description"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#srametadb-properties","text":"Property Description exe.gunzip executable Path for the \"gunzip\" executable; if not supplied, any script that needs the gunzip command will assume it is on the PATH. default: null exe.wget executable Path for the \"wget\" executable; if not supplied, any script that needs the wget command will assume it is on the PATH. default: null sra.forceDbUpdate boolean Y/N: download a newer verionsion if available. default: N sra.metaDataDir file path path to the directory where the SRAmetadb.sqlite database is stored. default: null","title":"SraMetaDB properties:"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#details","text":"If sra.forceDbUpdate is set to Y, then the zipped form of the database is downloaded, and kept and used to compare the local version to the server version; and the server version is downloaded if it is newer. Server version location: https://starbuck1.s3.amazonaws.com/sradb/SRAmetadb.sqlite.gz sra.metaDataDir directory must exist. If the database does not exist at that location, it will be downloaded. The download process is somewhat error-prone, especially in docker. The download is about 4GB and the unzipped database is up to 30GB.It is generally recommended to download and unzip the database manually: wget https://starbuck1.s3.amazonaws.com/sradb/SRAmetadb.sqlite.gz; gunzip SRAmetadb.sqlite","title":"Details"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: SraMetaDB.imageOwner SraMetaDB.imageName SraMetaDB.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#citation","text":"Module developed by Malcolm Zapatas and Ivory Blakley BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/","text":"SraMetaData # Add to module run order: #BioModule biolockj.module.getData.sra.SraMetaData Description # Extract metadata via pysradb from local copy of SRAmetadb.sqlite. Properties # Properties are the name=value pairs in the configuration file. SraMetaData properties: # Property Description exe.pysradb executable Path for the \"pysradb\" executable; if not supplied, any script that needs the pysradb command will assume it is on the PATH. default: null sra.metaDataDir file path path to the directory where the SRAmetadb.sqlite database is stored. default: null sra.sraProjectId list The project id(s) referencesing a project in the NCBI SRA. example: SRP009633, ERP016051 default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # The biolockj.module.getData.sra.SraMetaDB module is added a pre-requisite to ensure that the database is available. Adds modules # pre-requisite modules biolockj.module.getData.sra.SraMetaDB post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/pysradb:v1.3.16 This can be modified using the following properties: SraMetaData.imageOwner SraMetaData.imageName SraMetaData.imageTag Citation # Module developed by Malcolm Zapatas and Ivory Blakley BioLockJ v1.3.16","title":"SraMetaData"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#srametadata","text":"Add to module run order: #BioModule biolockj.module.getData.sra.SraMetaData","title":"SraMetaData"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#description","text":"Extract metadata via pysradb from local copy of SRAmetadb.sqlite.","title":"Description"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#srametadata-properties","text":"Property Description exe.pysradb executable Path for the \"pysradb\" executable; if not supplied, any script that needs the pysradb command will assume it is on the PATH. default: null sra.metaDataDir file path path to the directory where the SRAmetadb.sqlite database is stored. default: null sra.sraProjectId list The project id(s) referencesing a project in the NCBI SRA. example: SRP009633, ERP016051 default: null","title":"SraMetaData properties:"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#details","text":"The biolockj.module.getData.sra.SraMetaDB module is added a pre-requisite to ensure that the database is available.","title":"Details"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#adds-modules","text":"pre-requisite modules biolockj.module.getData.sra.SraMetaDB post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/pysradb:v1.3.16 This can be modified using the following properties: SraMetaData.imageOwner SraMetaData.imageName SraMetaData.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#citation","text":"Module developed by Malcolm Zapatas and Ivory Blakley BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/","text":"SrpSrrConverter # Add to module run order: #BioModule biolockj.module.getData.sra.SrpSrrConverter Description # Create an SraAccList.txt file from an SRA project identifier. Properties # Properties are the name=value pairs in the configuration file. SrpSrrConverter properties: # Property Description exe.efetch executable Path for the \"efetch\" executable; if not supplied, any script that needs the efetch command will assume it is on the PATH. default: null exe.esearch executable Path for the \"esearch\" executable; if not supplied, any script that needs the esearch command will assume it is on the PATH. default: null exe.xtract executable Path for the \"xtract\" executable; if not supplied, any script that needs the xtract command will assume it is on the PATH. default: null sra.sraProjectId list The project id(s) referencesing a project in the NCBI SRA. example: SRP009633, ERP016051 default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # Typcially, this module is only added to the pipeline when SraDownload needs it. This sets the value of sra.sraAccList to the SraAccList.txt file in this modules output directory Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: ncbi/edirect:latest This can be modified using the following properties: SrpSrrConverter.imageOwner SrpSrrConverter.imageName SrpSrrConverter.imageTag Citation # Module developed by Malcolm Zapatas and Ivory Blakley BioLockJ v1.3.16","title":"SrpSrrConverter"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#srpsrrconverter","text":"Add to module run order: #BioModule biolockj.module.getData.sra.SrpSrrConverter","title":"SrpSrrConverter"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#description","text":"Create an SraAccList.txt file from an SRA project identifier.","title":"Description"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#srpsrrconverter-properties","text":"Property Description exe.efetch executable Path for the \"efetch\" executable; if not supplied, any script that needs the efetch command will assume it is on the PATH. default: null exe.esearch executable Path for the \"esearch\" executable; if not supplied, any script that needs the esearch command will assume it is on the PATH. default: null exe.xtract executable Path for the \"xtract\" executable; if not supplied, any script that needs the xtract command will assume it is on the PATH. default: null sra.sraProjectId list The project id(s) referencesing a project in the NCBI SRA. example: SRP009633, ERP016051 default: null","title":"SrpSrrConverter properties:"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#details","text":"Typcially, this module is only added to the pipeline when SraDownload needs it. This sets the value of sra.sraAccList to the SraAccList.txt file in this modules output directory","title":"Details"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#docker","text":"If running in docker, this module will run in a docker container from this image: ncbi/edirect:latest This can be modified using the following properties: SrpSrrConverter.imageOwner SrpSrrConverter.imageName SrpSrrConverter.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#citation","text":"Module developed by Malcolm Zapatas and Ivory Blakley BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/","text":"Demultiplexer # Add to module run order: #BioModule biolockj.module.implicit.Demultiplexer Description # Demultiplex samples into separate files for each sample. Properties # Properties are the name=value pairs in the configuration file. Demultiplexer properties: # Property Description demultiplexer.barcodeCutoff numeric Options: (0.0 - 1.0); if defined, pipeline will fail if the percentage of reads with a barcode is less than this cutoff. -> (DeuxUtil) default: 0.05 demultiplexer.barcodeRevComp boolean Options: Y/N. Use reverse compliment of metadata.barcodeColumn if demultimplexer.strategy = barcode_in_header or barcode_in_seq. -> (DeuxUtil) default: null demultiplexer.strategy string Options: barcode_in_header, barcode_in_seq, id_in_header, do_not_demux.If using barcodes, they must be provided in the metadata file within column defined by metadata.barcodeColumn . -> (DeuxUtil) default: null metadata.barcodeColumn string metadata column with identifying barcodes -> Values must be unique. default: BarcodeSequence metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> Used for matching sample id to barcodes. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null demultiplexer.barcodeCutoff numeric Options: (0.0 - 1.0); if defined, pipeline will fail if the percentage of reads with a barcode is less than this cutoff. -> (DeuxUtil) default: 0.05 demultiplexer.barcodeRevComp boolean Options: Y/N. Use reverse compliment of metadata.barcodeColumn if demultimplexer.strategy = barcode_in_header or barcode_in_seq. -> (DeuxUtil) default: null demultiplexer.strategy string Options: barcode_in_header, barcode_in_seq, id_in_header, do_not_demux.If using barcodes, they must be provided in the metadata file within column defined by metadata.barcodeColumn . -> (DeuxUtil) default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.barcodeColumn string metadata column with identifying barcodes -> Values must be unique. default: BarcodeSequence metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> Used for matching sample id to barcodes. default: null pipeline.defaultDemultiplexer string Java class name for default module used to demultiplex data default: biolockj.module.implicit.Demultiplexer script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # When BioLockJ detects that the input is multiplexed data, BioLockJ automatically adds a Demultiplexer as the 2nd module, using the class path supplied via the pipeline.defaultDemultiplexer property. ( ImportMetadata is added as the first module.) This Demultiplexer requires that the sequence headers contain either the Sample ID or an identifying barcode. Optionally, the barcode can be contained in the sequence itself. If your data does not conform to one of the following scenarios you will need to pre-process your sequence data to conform to a valid format. If samples are not identified by sample ID in the sequence headers: # Set demux.strategy =id_in_header Set input.trimPrefix to a character string that precedes the sample ID for all samples . Set input.trimSuffix to a character string that comes after the sample ID for all samples . Sample IDs = mbs1, mbs2, mbs3, mbs4 Scenario 1: Your multiplexed files include Sample IDs in the fastq sequence headers @mbs1_134_M01825:384:000000000-BCYPK:1:2106:23543:1336 1:N:0 @mbs2_12_M02825:384:000000000-BCYPK:1:1322:23543:1336 1:N:0 @mbs3_551_M03825:384:000000000-BCYPK:1:1123:23543:1336 1:N:0 @mbs4_1234_M04825:384:000000000-BCYPK:1:9872:23543:1336 1:N:0 Required Config + input.trimPrefix =@ + input.trimSuffix =_ All characters before (and including) the 1st \"@\" in the sequence header are trimmed All characters after (and including) the 1st \"_\" in the sequence header are trimmed If samples are identified by barcode (in the header or sequence): # Set demux.strategy =barcode_in_header or demux.strategy =barcode_in_seq Set metadata.filePath to metadata file path. Set metadata.barcodeColumn to the barcode column name. If the metadata barcodes are listed as reverse compliments, set demultiplexer.barcodeRevComp =Y. The metadata file must be prepared by adding a unique sequence barcode in the metadata.barcodeColumn column. This information is often available in a mapping file provided by the sequencing center that produced the raw data. Metadata file ID BarcodeColumn mbs1 GAGGCATGACTGGATA mbs2 NAGGCATATTTGCACA mbs3 GACCCATGACTGCATA mbs4 TACCCAGCACCGCTTA Scenario 2: Your multiplexed files include a barcode in the headers @M01825:384:000000000-BCYPK:1:2106:23543:1336 1:N:0:GAGGCATGACTGGATA @M01825:384:000000000-BCYPK:1:1322:23543:1336 1:N:0:NAGGCATATTTGCACA @M01825:384:000000000-BCYPK:1:1123:23543:1336 1:N:0:GACCCATGACTGCATA @M01825:384:000000000-BCYPK:1:9872:23543:1336 1:N:0:TACCCAGCACCGCTTA Required Config + demux.strategy =barcode_in_header + metadata.barcodeColumn =BarcodeColumn + metadata.filePath = Scenario 3: Your multiplexed files include a barcode in the sequences >M01825:384:000000000-BCYPK:1:2106:23543:1336 1:N:0: GAGGCATGACTGGATATATACATACTGAGGCATGACTACTTACTATAAGGCTTACTGACTGGTTACTGACTGGGAGGCATGACTACTTACTATAA >M01825:384:000000000-BCYPK:1:1322:23543:1336 1:N:0: CAGGCATATTTGCACACTAGAGGCAAGTTACTGACTGGATATACTGAGGCATGGGAGGCATGACTCTATAAGGCTTACTGACTGGTTACTGACTG >M01825:384:000000000-BCYPK:1:1123:23543:1336 1:N:0: CCATGAGACCTGCATA CCATGAGACCTGCATACACTGTGGGAGGCATGACTCACTATAAACTACTACTGACTGGATATACTGAGGCATACTGACTGGTTACTTATAAGGCT >M01825:384:000000000-BCYPK:1:9872:23543:1336 1:N:0:TACCCAGCACCGCTTA TACCCAGCACCGCTTCCTTGACTTGGGAGGCATGACTCACTATAAACTACTACTGACTGGATATACTGAGGCATACTGACTGGTTACTTATAAGG Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Demultiplexer.imageOwner Demultiplexer.imageName Demultiplexer.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"Demultiplexer"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#demultiplexer","text":"Add to module run order: #BioModule biolockj.module.implicit.Demultiplexer","title":"Demultiplexer"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#description","text":"Demultiplex samples into separate files for each sample.","title":"Description"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#demultiplexer-properties","text":"Property Description demultiplexer.barcodeCutoff numeric Options: (0.0 - 1.0); if defined, pipeline will fail if the percentage of reads with a barcode is less than this cutoff. -> (DeuxUtil) default: 0.05 demultiplexer.barcodeRevComp boolean Options: Y/N. Use reverse compliment of metadata.barcodeColumn if demultimplexer.strategy = barcode_in_header or barcode_in_seq. -> (DeuxUtil) default: null demultiplexer.strategy string Options: barcode_in_header, barcode_in_seq, id_in_header, do_not_demux.If using barcodes, they must be provided in the metadata file within column defined by metadata.barcodeColumn . -> (DeuxUtil) default: null metadata.barcodeColumn string metadata column with identifying barcodes -> Values must be unique. default: BarcodeSequence metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> Used for matching sample id to barcodes. default: null","title":"Demultiplexer properties:"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null demultiplexer.barcodeCutoff numeric Options: (0.0 - 1.0); if defined, pipeline will fail if the percentage of reads with a barcode is less than this cutoff. -> (DeuxUtil) default: 0.05 demultiplexer.barcodeRevComp boolean Options: Y/N. Use reverse compliment of metadata.barcodeColumn if demultimplexer.strategy = barcode_in_header or barcode_in_seq. -> (DeuxUtil) default: null demultiplexer.strategy string Options: barcode_in_header, barcode_in_seq, id_in_header, do_not_demux.If using barcodes, they must be provided in the metadata file within column defined by metadata.barcodeColumn . -> (DeuxUtil) default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.barcodeColumn string metadata column with identifying barcodes -> Values must be unique. default: BarcodeSequence metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> Used for matching sample id to barcodes. default: null pipeline.defaultDemultiplexer string Java class name for default module used to demultiplex data default: biolockj.module.implicit.Demultiplexer script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#details","text":"When BioLockJ detects that the input is multiplexed data, BioLockJ automatically adds a Demultiplexer as the 2nd module, using the class path supplied via the pipeline.defaultDemultiplexer property. ( ImportMetadata is added as the first module.) This Demultiplexer requires that the sequence headers contain either the Sample ID or an identifying barcode. Optionally, the barcode can be contained in the sequence itself. If your data does not conform to one of the following scenarios you will need to pre-process your sequence data to conform to a valid format.","title":"Details"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#if-samples-are-not-identified-by-sample-id-in-the-sequence-headers","text":"Set demux.strategy =id_in_header Set input.trimPrefix to a character string that precedes the sample ID for all samples . Set input.trimSuffix to a character string that comes after the sample ID for all samples . Sample IDs = mbs1, mbs2, mbs3, mbs4 Scenario 1: Your multiplexed files include Sample IDs in the fastq sequence headers @mbs1_134_M01825:384:000000000-BCYPK:1:2106:23543:1336 1:N:0 @mbs2_12_M02825:384:000000000-BCYPK:1:1322:23543:1336 1:N:0 @mbs3_551_M03825:384:000000000-BCYPK:1:1123:23543:1336 1:N:0 @mbs4_1234_M04825:384:000000000-BCYPK:1:9872:23543:1336 1:N:0 Required Config + input.trimPrefix =@ + input.trimSuffix =_ All characters before (and including) the 1st \"@\" in the sequence header are trimmed All characters after (and including) the 1st \"_\" in the sequence header are trimmed","title":"If samples are not identified by sample ID in the sequence headers:"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#if-samples-are-identified-by-barcode-in-the-header-or-sequence","text":"Set demux.strategy =barcode_in_header or demux.strategy =barcode_in_seq Set metadata.filePath to metadata file path. Set metadata.barcodeColumn to the barcode column name. If the metadata barcodes are listed as reverse compliments, set demultiplexer.barcodeRevComp =Y. The metadata file must be prepared by adding a unique sequence barcode in the metadata.barcodeColumn column. This information is often available in a mapping file provided by the sequencing center that produced the raw data. Metadata file ID BarcodeColumn mbs1 GAGGCATGACTGGATA mbs2 NAGGCATATTTGCACA mbs3 GACCCATGACTGCATA mbs4 TACCCAGCACCGCTTA Scenario 2: Your multiplexed files include a barcode in the headers @M01825:384:000000000-BCYPK:1:2106:23543:1336 1:N:0:GAGGCATGACTGGATA @M01825:384:000000000-BCYPK:1:1322:23543:1336 1:N:0:NAGGCATATTTGCACA @M01825:384:000000000-BCYPK:1:1123:23543:1336 1:N:0:GACCCATGACTGCATA @M01825:384:000000000-BCYPK:1:9872:23543:1336 1:N:0:TACCCAGCACCGCTTA Required Config + demux.strategy =barcode_in_header + metadata.barcodeColumn =BarcodeColumn + metadata.filePath = Scenario 3: Your multiplexed files include a barcode in the sequences >M01825:384:000000000-BCYPK:1:2106:23543:1336 1:N:0: GAGGCATGACTGGATATATACATACTGAGGCATGACTACTTACTATAAGGCTTACTGACTGGTTACTGACTGGGAGGCATGACTACTTACTATAA >M01825:384:000000000-BCYPK:1:1322:23543:1336 1:N:0: CAGGCATATTTGCACACTAGAGGCAAGTTACTGACTGGATATACTGAGGCATGGGAGGCATGACTCTATAAGGCTTACTGACTGGTTACTGACTG >M01825:384:000000000-BCYPK:1:1123:23543:1336 1:N:0: CCATGAGACCTGCATA CCATGAGACCTGCATACACTGTGGGAGGCATGACTCACTATAAACTACTACTGACTGGATATACTGAGGCATACTGACTGGTTACTTATAAGGCT >M01825:384:000000000-BCYPK:1:9872:23543:1336 1:N:0:TACCCAGCACCGCTTA TACCCAGCACCGCTTCCTTGACTTGGGAGGCATGACTCACTATAAACTACTACTGACTGGATATACTGAGGCATACTGACTGGTTACTTATAAGG","title":"If samples are identified by barcode (in the header or sequence):"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Demultiplexer.imageOwner Demultiplexer.imageName Demultiplexer.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/","text":"ImportMetadata # Add to module run order: #BioModule biolockj.module.implicit.ImportMetadata Description # Read existing metadata file, or create a default one. Properties # Properties are the name=value pairs in the configuration file. ImportMetadata properties: # none General properties applicable to this module: # Property Description metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.useEveryRow boolean If Y, require a sequence file for every SampleID (every row) in metadata file; If N, metadata can include extraneous SampleIDs. default: null Details # This module is automatically added to the beginning of every pipeline. This module ensures that every pipeline has a metadata file, which is requried for modules that add columns to the metadata. If the configuration file does not specify a metadata file, this module will create an empty table with a row for each file in the input directory. This also ensures that any pre-existing metadata file has a suitable format. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: ImportMetadata.imageOwner ImportMetadata.imageName ImportMetadata.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"ImportMetadata"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#importmetadata","text":"Add to module run order: #BioModule biolockj.module.implicit.ImportMetadata","title":"ImportMetadata"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#description","text":"Read existing metadata file, or create a default one.","title":"Description"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#importmetadata-properties","text":"none","title":"ImportMetadata properties:"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#general-properties-applicable-to-this-module","text":"Property Description metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.useEveryRow boolean If Y, require a sequence file for every SampleID (every row) in metadata file; If N, metadata can include extraneous SampleIDs. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#details","text":"This module is automatically added to the beginning of every pipeline. This module ensures that every pipeline has a metadata file, which is requried for modules that add columns to the metadata. If the configuration file does not specify a metadata file, this module will create an empty table with a row for each file in the input directory. This also ensures that any pre-existing metadata file has a suitable format.","title":"Details"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: ImportMetadata.imageOwner ImportMetadata.imageName ImportMetadata.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit/RegisterNumReads/","text":"RegisterNumReads # Add to module run order: #BioModule biolockj.module.implicit.RegisterNumReads This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RegisterNumReads.imageOwner RegisterNumReads.imageName RegisterNumReads.imageTag","title":"RegisterNumReads"},{"location":"GENERATED/biolockj.module.implicit/RegisterNumReads/#registernumreads","text":"Add to module run order: #BioModule biolockj.module.implicit.RegisterNumReads This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"RegisterNumReads"},{"location":"GENERATED/biolockj.module.implicit/RegisterNumReads/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit/RegisterNumReads/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RegisterNumReads.imageOwner RegisterNumReads.imageName RegisterNumReads.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/QiimeParser/","text":"QiimeParser # Add to module run order: #BioModule biolockj.module.implicit.parser.r16s.QiimeParser This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: QiimeParser.imageOwner QiimeParser.imageName QiimeParser.imageTag","title":"QiimeParser"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/QiimeParser/#qiimeparser","text":"Add to module run order: #BioModule biolockj.module.implicit.parser.r16s.QiimeParser This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"QiimeParser"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/QiimeParser/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/QiimeParser/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: QiimeParser.imageOwner QiimeParser.imageName QiimeParser.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/","text":"RdpHierParser # Add to module run order: #BioModule biolockj.module.implicit.parser.r16s.RdpHierParser Description # Create taxa tables from the _hierarchicalCount.tsv files output by RDP. Properties # Properties are the name=value pairs in the configuration file. RdpHierParser properties: # Property Description rdp.hierCounts boolean Set this property to \"Y\" to use this module instead as the follow-up to the RdpClassifier module. default: null rdp.minThresholdScore numeric RdpClassifier will use this property and ignore OTU assignments below this threshold score (0-100) default: 80 General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This module requires that rdp.hierCounts =Y for the RdpClassifier module to make the required output type. As long as rdp.hierCounts is set, this module will automatically be added to the module run order by the RdpClassifier module. If this module is in the module run order, it adds biolockj.module.classifier.r16s.RdpClassifier as a pre-quisite module. To use this module without the RDP module, include ModuleOutput[RdpClassifier] in the list of input types: pipeline.inputTypes=ModuleOutput[RdpClassifier] When using input from a directory, this module takes exactly one input directory. This module is an alternative to the default parser, RdpParser. The two parsers produce nearly identical output. The RdpParser module parses the output for each sequence and determines counts for each taxanomic unit. It fills in missing levels so all sequences are counted for all taxanomic levels; this means reads that are unclassified are reported as an OTU with \"unclassified\" in the name.By contrast, the RdpHierParser module relies on RDP to determine these totals.When using RdpParser the confidence threshold is applied by the parser, when using RdpHierParser the coinfidence threshold is applied by RDP during classification. Adds modules # pre-requisite modules biolockj.module.classifier.r16s.RdpClassifier post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RdpHierParser.imageOwner RdpHierParser.imageName RdpHierParser.imageTag Citation # Module created by Ivory Blakley","title":"RdpHierParser"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#rdphierparser","text":"Add to module run order: #BioModule biolockj.module.implicit.parser.r16s.RdpHierParser","title":"RdpHierParser"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#description","text":"Create taxa tables from the _hierarchicalCount.tsv files output by RDP.","title":"Description"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#rdphierparser-properties","text":"Property Description rdp.hierCounts boolean Set this property to \"Y\" to use this module instead as the follow-up to the RdpClassifier module. default: null rdp.minThresholdScore numeric RdpClassifier will use this property and ignore OTU assignments below this threshold score (0-100) default: 80","title":"RdpHierParser properties:"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#details","text":"This module requires that rdp.hierCounts =Y for the RdpClassifier module to make the required output type. As long as rdp.hierCounts is set, this module will automatically be added to the module run order by the RdpClassifier module. If this module is in the module run order, it adds biolockj.module.classifier.r16s.RdpClassifier as a pre-quisite module. To use this module without the RDP module, include ModuleOutput[RdpClassifier] in the list of input types: pipeline.inputTypes=ModuleOutput[RdpClassifier] When using input from a directory, this module takes exactly one input directory. This module is an alternative to the default parser, RdpParser. The two parsers produce nearly identical output. The RdpParser module parses the output for each sequence and determines counts for each taxanomic unit. It fills in missing levels so all sequences are counted for all taxanomic levels; this means reads that are unclassified are reported as an OTU with \"unclassified\" in the name.By contrast, the RdpHierParser module relies on RDP to determine these totals.When using RdpParser the confidence threshold is applied by the parser, when using RdpHierParser the coinfidence threshold is applied by RDP during classification.","title":"Details"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#adds-modules","text":"pre-requisite modules biolockj.module.classifier.r16s.RdpClassifier post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RdpHierParser.imageOwner RdpHierParser.imageName RdpHierParser.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#citation","text":"Module created by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/","text":"RdpParser # Add to module run order: #BioModule biolockj.module.implicit.parser.r16s.RdpParser Description # Build OTU tables from RDP reports. Properties # Properties are the name=value pairs in the configuration file. RdpParser properties: # Property Description rdp.minThresholdScore numeric RdpParser will ignore OTU assignments below this threshold score (0-100) default: 80 General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RdpParser.imageOwner RdpParser.imageName RdpParser.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"RdpParser"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#rdpparser","text":"Add to module run order: #BioModule biolockj.module.implicit.parser.r16s.RdpParser","title":"RdpParser"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#description","text":"Build OTU tables from RDP reports.","title":"Description"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#rdpparser-properties","text":"Property Description rdp.minThresholdScore numeric RdpParser will ignore OTU assignments below this threshold score (0-100) default: 80","title":"RdpParser properties:"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RdpParser.imageOwner RdpParser.imageName RdpParser.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/","text":"Humann2Parser # Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.Humann2Parser Description # Build OTU tables from HumanN2 classifier module output. Properties # Properties are the name=value pairs in the configuration file. Humann2Parser properties: # Property Description humann2.keepUnintegrated boolean if true, keep UNINTEGRATED column in count tables default: null humann2.keepUnmapped boolean if true, keep UNMAPPED column in count tables default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Humann2Parser.imageOwner Humann2Parser.imageName Humann2Parser.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"Humann2Parser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#humann2parser","text":"Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.Humann2Parser","title":"Humann2Parser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#description","text":"Build OTU tables from HumanN2 classifier module output.","title":"Description"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#humann2parser-properties","text":"Property Description humann2.keepUnintegrated boolean if true, keep UNINTEGRATED column in count tables default: null humann2.keepUnmapped boolean if true, keep UNMAPPED column in count tables default: null","title":"Humann2Parser properties:"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Humann2Parser.imageOwner Humann2Parser.imageName Humann2Parser.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/","text":"Kraken2Parser # Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.Kraken2Parser Description # Build OTU tables from KRAKEN mpa-format reports. Properties # Properties are the name=value pairs in the configuration file. Kraken2Parser properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Kraken2Parser.imageOwner Kraken2Parser.imageName Kraken2Parser.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"Kraken2Parser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#kraken2parser","text":"Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.Kraken2Parser","title":"Kraken2Parser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#description","text":"Build OTU tables from KRAKEN mpa-format reports.","title":"Description"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#kraken2parser-properties","text":"none","title":"Kraken2Parser properties:"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Kraken2Parser.imageOwner Kraken2Parser.imageName Kraken2Parser.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/","text":"KrakenParser # Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.KrakenParser Description # Build OTU tables from KRAKEN mpa-format reports. Properties # Properties are the name=value pairs in the configuration file. KrakenParser properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: KrakenParser.imageOwner KrakenParser.imageName KrakenParser.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"KrakenParser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#krakenparser","text":"Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.KrakenParser","title":"KrakenParser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#description","text":"Build OTU tables from KRAKEN mpa-format reports.","title":"Description"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#krakenparser-properties","text":"none","title":"KrakenParser properties:"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: KrakenParser.imageOwner KrakenParser.imageName KrakenParser.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Metaphlan2Parser/","text":"Metaphlan2Parser # Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.Metaphlan2Parser This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Metaphlan2Parser.imageOwner Metaphlan2Parser.imageName Metaphlan2Parser.imageTag","title":"Metaphlan2Parser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Metaphlan2Parser/#metaphlan2parser","text":"Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.Metaphlan2Parser This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"Metaphlan2Parser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Metaphlan2Parser/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Metaphlan2Parser/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Metaphlan2Parser.imageOwner Metaphlan2Parser.imageName Metaphlan2Parser.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.qiime/BuildQiimeMapping/","text":"BuildQiimeMapping # Add to module run order: #BioModule biolockj.module.implicit.qiime.BuildQiimeMapping This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.3.16 This can be modified using the following properties: BuildQiimeMapping.imageOwner BuildQiimeMapping.imageName BuildQiimeMapping.imageTag","title":"BuildQiimeMapping"},{"location":"GENERATED/biolockj.module.implicit.qiime/BuildQiimeMapping/#buildqiimemapping","text":"Add to module run order: #BioModule biolockj.module.implicit.qiime.BuildQiimeMapping This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"BuildQiimeMapping"},{"location":"GENERATED/biolockj.module.implicit.qiime/BuildQiimeMapping/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.qiime/BuildQiimeMapping/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.3.16 This can be modified using the following properties: BuildQiimeMapping.imageOwner BuildQiimeMapping.imageName BuildQiimeMapping.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.qiime/MergeQiimeOtuTables/","text":"MergeQiimeOtuTables # Add to module run order: #BioModule biolockj.module.implicit.qiime.MergeQiimeOtuTables This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.3.16 This can be modified using the following properties: MergeQiimeOtuTables.imageOwner MergeQiimeOtuTables.imageName MergeQiimeOtuTables.imageTag","title":"MergeQiimeOtuTables"},{"location":"GENERATED/biolockj.module.implicit.qiime/MergeQiimeOtuTables/#mergeqiimeotutables","text":"Add to module run order: #BioModule biolockj.module.implicit.qiime.MergeQiimeOtuTables This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"MergeQiimeOtuTables"},{"location":"GENERATED/biolockj.module.implicit.qiime/MergeQiimeOtuTables/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.qiime/MergeQiimeOtuTables/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.3.16 This can be modified using the following properties: MergeQiimeOtuTables.imageOwner MergeQiimeOtuTables.imageName MergeQiimeOtuTables.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.qiime/QiimeClassifier/","text":"QiimeClassifier # Add to module run order: #BioModule biolockj.module.implicit.qiime.QiimeClassifier This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.parser.r16s.QiimeParser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeClassifier.imageOwner QiimeClassifier.imageName QiimeClassifier.imageTag","title":"QiimeClassifier"},{"location":"GENERATED/biolockj.module.implicit.qiime/QiimeClassifier/#qiimeclassifier","text":"Add to module run order: #BioModule biolockj.module.implicit.qiime.QiimeClassifier This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"QiimeClassifier"},{"location":"GENERATED/biolockj.module.implicit.qiime/QiimeClassifier/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.parser.r16s.QiimeParser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.qiime/QiimeClassifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeClassifier.imageOwner QiimeClassifier.imageName QiimeClassifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report/Email/","text":"Email # Add to module run order: #BioModule biolockj.module.report.Email Description # Send an email containing the pipeline summary when the pipeline either completes or fails. Properties # Properties are the name=value pairs in the configuration file. Email properties: # Property Description mail.encryptedPassword string The Base 64 encrypted password is stored in the Config file using this property. default: 7GYvu1m+Yv1Gk7Cd9BLaznJ/jq33g0q1 mail.from string Admin email address used to send user pipeline notifications default: biolockj@gmail.com mail.smtp.auth string default: Y mail.smtp.host string javax.mail.Session SMTP host default: smtp.gmail.com mail.smtp.port integer default: 587 mail.smtp.starttls.enable boolean default: Y mail.to string default: null General properties applicable to this module: # none Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Email.imageOwner Email.imageName Email.imageTag Citation # Module developed by Mike Sioda. BioLockJ v1.3.16","title":"Email"},{"location":"GENERATED/biolockj.module.report/Email/#email","text":"Add to module run order: #BioModule biolockj.module.report.Email","title":"Email"},{"location":"GENERATED/biolockj.module.report/Email/#description","text":"Send an email containing the pipeline summary when the pipeline either completes or fails.","title":"Description"},{"location":"GENERATED/biolockj.module.report/Email/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report/Email/#email-properties","text":"Property Description mail.encryptedPassword string The Base 64 encrypted password is stored in the Config file using this property. default: 7GYvu1m+Yv1Gk7Cd9BLaznJ/jq33g0q1 mail.from string Admin email address used to send user pipeline notifications default: biolockj@gmail.com mail.smtp.auth string default: Y mail.smtp.host string javax.mail.Session SMTP host default: smtp.gmail.com mail.smtp.port integer default: 587 mail.smtp.starttls.enable boolean default: Y mail.to string default: null","title":"Email properties:"},{"location":"GENERATED/biolockj.module.report/Email/#general-properties-applicable-to-this-module","text":"none","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report/Email/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report/Email/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report/Email/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Email.imageOwner Email.imageName Email.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report/Email/#citation","text":"Module developed by Mike Sioda. BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report/JsonReport/","text":"JsonReport # Add to module run order: #BioModule biolockj.module.report.JsonReport This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules biolockj.module.report.otu.CompileOtuCounts post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: JsonReport.imageOwner JsonReport.imageName JsonReport.imageTag","title":"JsonReport"},{"location":"GENERATED/biolockj.module.report/JsonReport/#jsonreport","text":"Add to module run order: #BioModule biolockj.module.report.JsonReport This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"JsonReport"},{"location":"GENERATED/biolockj.module.report/JsonReport/#adds-modules","text":"pre-requisite modules biolockj.module.report.otu.CompileOtuCounts post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report/JsonReport/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: JsonReport.imageOwner JsonReport.imageName JsonReport.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.humann2/AddMetadataToPathwayTables/","text":"AddMetadataToPathwayTables # Add to module run order: #BioModule biolockj.module.report.humann2.AddMetadataToPathwayTables This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: AddMetadataToPathwayTables.imageOwner AddMetadataToPathwayTables.imageName AddMetadataToPathwayTables.imageTag","title":"AddMetadataToPathwayTables"},{"location":"GENERATED/biolockj.module.report.humann2/AddMetadataToPathwayTables/#addmetadatatopathwaytables","text":"Add to module run order: #BioModule biolockj.module.report.humann2.AddMetadataToPathwayTables This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"AddMetadataToPathwayTables"},{"location":"GENERATED/biolockj.module.report.humann2/AddMetadataToPathwayTables/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.humann2/AddMetadataToPathwayTables/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: AddMetadataToPathwayTables.imageOwner AddMetadataToPathwayTables.imageName AddMetadataToPathwayTables.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveLowPathwayCounts/","text":"RemoveLowPathwayCounts # Add to module run order: #BioModule biolockj.module.report.humann2.RemoveLowPathwayCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveLowPathwayCounts.imageOwner RemoveLowPathwayCounts.imageName RemoveLowPathwayCounts.imageTag","title":"RemoveLowPathwayCounts"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveLowPathwayCounts/#removelowpathwaycounts","text":"Add to module run order: #BioModule biolockj.module.report.humann2.RemoveLowPathwayCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"RemoveLowPathwayCounts"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveLowPathwayCounts/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveLowPathwayCounts/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveLowPathwayCounts.imageOwner RemoveLowPathwayCounts.imageName RemoveLowPathwayCounts.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveScarcePathwayCounts/","text":"RemoveScarcePathwayCounts # Add to module run order: #BioModule biolockj.module.report.humann2.RemoveScarcePathwayCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveScarcePathwayCounts.imageOwner RemoveScarcePathwayCounts.imageName RemoveScarcePathwayCounts.imageTag","title":"RemoveScarcePathwayCounts"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveScarcePathwayCounts/#removescarcepathwaycounts","text":"Add to module run order: #BioModule biolockj.module.report.humann2.RemoveScarcePathwayCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"RemoveScarcePathwayCounts"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveScarcePathwayCounts/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveScarcePathwayCounts/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveScarcePathwayCounts.imageOwner RemoveScarcePathwayCounts.imageName RemoveScarcePathwayCounts.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.otu/CompileOtuCounts/","text":"CompileOtuCounts # Add to module run order: #BioModule biolockj.module.report.otu.CompileOtuCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: CompileOtuCounts.imageOwner CompileOtuCounts.imageName CompileOtuCounts.imageTag","title":"CompileOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/CompileOtuCounts/#compileotucounts","text":"Add to module run order: #BioModule biolockj.module.report.otu.CompileOtuCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"CompileOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/CompileOtuCounts/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.otu/CompileOtuCounts/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: CompileOtuCounts.imageOwner CompileOtuCounts.imageName CompileOtuCounts.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/","text":"RarefyOtuCounts # Add to module run order: #BioModule biolockj.module.report.otu.RarefyOtuCounts Description # Applies a mean iterative post-OTU classification rarefication algorithm so that each output sample will have approximately the same number of OTUs. Properties # Properties are the name=value pairs in the configuration file. RarefyOtuCounts properties: # Property Description rarefyOtuCounts.iterations integer (positive integer) the number of iterations to randomly select the rarefyOtuCounts.quantile of OTUs default: 10 rarefyOtuCounts.lowAbundantCutoff numeric (positive double) minimum percentage of samples that must contain an OTU. default: 0.01 rarefyOtuCounts.quantile numeric Quantile for rarefication. The number of OTUs/sample are ordered, all samples with more OTUs than the quantile sample are subselected without replacement until they have the same number of OTUs as the quantile sample default: 0.5 rarefyOtuCounts.rmLowSamples boolean Options: Y/N. If Y, all samples below the rarefyOtuCounts.quantile quantile sample are removed default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RarefyOtuCounts.imageOwner RarefyOtuCounts.imageName RarefyOtuCounts.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"RarefyOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#rarefyotucounts","text":"Add to module run order: #BioModule biolockj.module.report.otu.RarefyOtuCounts","title":"RarefyOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#description","text":"Applies a mean iterative post-OTU classification rarefication algorithm so that each output sample will have approximately the same number of OTUs.","title":"Description"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#rarefyotucounts-properties","text":"Property Description rarefyOtuCounts.iterations integer (positive integer) the number of iterations to randomly select the rarefyOtuCounts.quantile of OTUs default: 10 rarefyOtuCounts.lowAbundantCutoff numeric (positive double) minimum percentage of samples that must contain an OTU. default: 0.01 rarefyOtuCounts.quantile numeric Quantile for rarefication. The number of OTUs/sample are ordered, all samples with more OTUs than the quantile sample are subselected without replacement until they have the same number of OTUs as the quantile sample default: 0.5 rarefyOtuCounts.rmLowSamples boolean Options: Y/N. If Y, all samples below the rarefyOtuCounts.quantile quantile sample are removed default: null","title":"RarefyOtuCounts properties:"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RarefyOtuCounts.imageOwner RarefyOtuCounts.imageName RarefyOtuCounts.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/","text":"RemoveLowOtuCounts # Add to module run order: #BioModule biolockj.module.report.otu.RemoveLowOtuCounts Description # Removes OTUs with counts below report.minCount. Properties # Properties are the name=value pairs in the configuration file. RemoveLowOtuCounts properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveLowOtuCounts.imageOwner RemoveLowOtuCounts.imageName RemoveLowOtuCounts.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"RemoveLowOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#removelowotucounts","text":"Add to module run order: #BioModule biolockj.module.report.otu.RemoveLowOtuCounts","title":"RemoveLowOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#description","text":"Removes OTUs with counts below report.minCount.","title":"Description"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#removelowotucounts-properties","text":"none","title":"RemoveLowOtuCounts properties:"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveLowOtuCounts.imageOwner RemoveLowOtuCounts.imageName RemoveLowOtuCounts.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.otu/RemoveScarceOtuCounts/","text":"RemoveScarceOtuCounts # Add to module run order: #BioModule biolockj.module.report.otu.RemoveScarceOtuCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveScarceOtuCounts.imageOwner RemoveScarceOtuCounts.imageName RemoveScarceOtuCounts.imageTag","title":"RemoveScarceOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/RemoveScarceOtuCounts/#removescarceotucounts","text":"Add to module run order: #BioModule biolockj.module.report.otu.RemoveScarceOtuCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"RemoveScarceOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/RemoveScarceOtuCounts/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.otu/RemoveScarceOtuCounts/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveScarceOtuCounts.imageOwner RemoveScarceOtuCounts.imageName RemoveScarceOtuCounts.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/","text":"R_CalculateStats # Add to module run order: #BioModule biolockj.module.report.r.R_CalculateStats Description # Generate a summary statistics table with [adjusted and unadjusted] [parameteric and non-parametirc] p-values and r 2 values for each reportable metadata field and each report.taxonomyLevel configured. Properties # Properties are the name=value pairs in the configuration file. R_CalculateStats properties: # Property Description r_CalculateStats.customScript file path Path to a custom R script to use in place of the built-in module script. default: null r_CalculateStats.pAdjustMethod string the p.adjust \"method\" parameter default: BH r_CalculateStats.pAdjustScope string defines R p.adjust( n ) parameter is calculated. Options: GLOBAL, LOCAL, TAXA, ATTRIBUTE default: LOCAL General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to * timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_CalculateStats.imageOwner R_CalculateStats.imageName R_CalculateStats.imageTag Citation # BioLockJ v1.3.16 Module created by Mike Sioda and developed by Ivory Blakley","title":"R_CalculateStats"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#r_calculatestats","text":"Add to module run order: #BioModule biolockj.module.report.r.R_CalculateStats","title":"R_CalculateStats"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#description","text":"Generate a summary statistics table with [adjusted and unadjusted] [parameteric and non-parametirc] p-values and r 2 values for each reportable metadata field and each report.taxonomyLevel configured.","title":"Description"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#r_calculatestats-properties","text":"Property Description r_CalculateStats.customScript file path Path to a custom R script to use in place of the built-in module script. default: null r_CalculateStats.pAdjustMethod string the p.adjust \"method\" parameter default: BH r_CalculateStats.pAdjustScope string defines R p.adjust( n ) parameter is calculated. Options: GLOBAL, LOCAL, TAXA, ATTRIBUTE default: LOCAL","title":"R_CalculateStats properties:"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to * timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_CalculateStats.imageOwner R_CalculateStats.imageName R_CalculateStats.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#citation","text":"BioLockJ v1.3.16 Module created by Mike Sioda and developed by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/","text":"R_PlotEffectSize # Add to module run order: #BioModule biolockj.module.report.r.R_PlotEffectSize Description # Generate horizontal barplot representing effect size (Cohen's d, r 2 , and/or fold change) for each reportable metadata field and each report.taxonomyLevel configured. Properties # Properties are the name=value pairs in the configuration file. R_PlotEffectSize properties: # Property Description r_PlotEffectSize.customScript file path Path to a custom R script to use in place of the built-in module script. default: null r_PlotEffectSize.disableCohensD boolean Options: Y/N. If N (default), produce plots for binary attributes showing effect size calculated as Cohen's d. If Y, skip this plot type. default: null r_PlotEffectSize.disableFoldChange boolean Options: Y/N. If N (default), produce plots for binary attributes showing the fold change. If Y, skip this plot type. default: Y r_PlotEffectSize.disablePvalAdj boolean Options: Y/N. If Y, the non-adjusted p-value is used when determining which taxa to include in the plot and which should get a ( ). If N (default), the adjusted p-value is used. default: null* r_PlotEffectSize.disableRSquared boolean Options: Y/N. If N (default), produce plots showing effect size calculated as the r-squared value. If Y, skip this plot type. default: null r_PlotEffectSize.excludePvalAbove numeric Options: [0,1], Taxa with a p-value above this value are excluded from the plot. default: 1 r_PlotEffectSize.maxNumTaxa integer Each plot is given one page. This is the maximum number of bars to include in each one-page plot. default: 40 r_PlotEffectSize.parametricPval boolean Options: Y/N. If Y, the parametric p-value is used when determining which taxa to include in the plot and which should get a ( ). If N (default), the non-parametric p-value is used. default: null* r_PlotEffectSize.taxa list Override other criteria for selecting which taxa to include in the plot by specifying wich taxa should be included default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to * timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules biolockj.module.report.r.R_CalculateStats post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotEffectSize.imageOwner R_PlotEffectSize.imageName R_PlotEffectSize.imageTag Citation # BioLockJ v1.3.16 Module developed by Ivory Blakley.","title":"R_PlotEffectSize"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#r_ploteffectsize","text":"Add to module run order: #BioModule biolockj.module.report.r.R_PlotEffectSize","title":"R_PlotEffectSize"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#description","text":"Generate horizontal barplot representing effect size (Cohen's d, r 2 , and/or fold change) for each reportable metadata field and each report.taxonomyLevel configured.","title":"Description"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#r_ploteffectsize-properties","text":"Property Description r_PlotEffectSize.customScript file path Path to a custom R script to use in place of the built-in module script. default: null r_PlotEffectSize.disableCohensD boolean Options: Y/N. If N (default), produce plots for binary attributes showing effect size calculated as Cohen's d. If Y, skip this plot type. default: null r_PlotEffectSize.disableFoldChange boolean Options: Y/N. If N (default), produce plots for binary attributes showing the fold change. If Y, skip this plot type. default: Y r_PlotEffectSize.disablePvalAdj boolean Options: Y/N. If Y, the non-adjusted p-value is used when determining which taxa to include in the plot and which should get a ( ). If N (default), the adjusted p-value is used. default: null* r_PlotEffectSize.disableRSquared boolean Options: Y/N. If N (default), produce plots showing effect size calculated as the r-squared value. If Y, skip this plot type. default: null r_PlotEffectSize.excludePvalAbove numeric Options: [0,1], Taxa with a p-value above this value are excluded from the plot. default: 1 r_PlotEffectSize.maxNumTaxa integer Each plot is given one page. This is the maximum number of bars to include in each one-page plot. default: 40 r_PlotEffectSize.parametricPval boolean Options: Y/N. If Y, the parametric p-value is used when determining which taxa to include in the plot and which should get a ( ). If N (default), the non-parametric p-value is used. default: null* r_PlotEffectSize.taxa list Override other criteria for selecting which taxa to include in the plot by specifying wich taxa should be included default: null","title":"R_PlotEffectSize properties:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to * timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#adds-modules","text":"pre-requisite modules biolockj.module.report.r.R_CalculateStats post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotEffectSize.imageOwner R_PlotEffectSize.imageName R_PlotEffectSize.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#citation","text":"BioLockJ v1.3.16 Module developed by Ivory Blakley.","title":"Citation"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/","text":"R_PlotMds # Add to module run order: #BioModule biolockj.module.report.r.R_PlotMds Description # Generate sets of multidimensional scaling plots showing 2 axes at a time (up to the < r_PlotMds.numAxis >th axis) with color coding based on each categorical metadata field (default) or by each field given in r_PlotMds.reportFields Properties # Properties are the name=value pairs in the configuration file. R_PlotMds properties: # Property Description r_PlotMds.customScript file path Path to a custom R script to use in place of the built-in module script. default: null r_PlotMds.distance string distance metric for calculating MDS (default: bray) default: bray r_PlotMds.numAxis integer Sets # MDS axis to plot; default (3) produces mds1 vs mds2, mds1 vs mds3, and mds2 vs mds3 default: 3 r_PlotMds.reportFields list Override field used to explicitly list metadata columns to build MDS plots. If left undefined, all columns are reported default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.pch integer Sets R plot pch parameter for PDF report default: 21 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to * timeout. If set to 0, an estimate is used. default: 0 r_PlotMds.reportFields list Override field used to explicitly list metadata columns to build MDS plots. If left undefined, all columns are reported default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotMds.imageOwner R_PlotMds.imageName R_PlotMds.imageTag Citation # BioLockJ v1.3.16 Module created by Mike Sioda and developed by Ivory Blakley","title":"R_PlotMds"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#r_plotmds","text":"Add to module run order: #BioModule biolockj.module.report.r.R_PlotMds","title":"R_PlotMds"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#description","text":"Generate sets of multidimensional scaling plots showing 2 axes at a time (up to the < r_PlotMds.numAxis >th axis) with color coding based on each categorical metadata field (default) or by each field given in r_PlotMds.reportFields","title":"Description"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#r_plotmds-properties","text":"Property Description r_PlotMds.customScript file path Path to a custom R script to use in place of the built-in module script. default: null r_PlotMds.distance string distance metric for calculating MDS (default: bray) default: bray r_PlotMds.numAxis integer Sets # MDS axis to plot; default (3) produces mds1 vs mds2, mds1 vs mds3, and mds2 vs mds3 default: 3 r_PlotMds.reportFields list Override field used to explicitly list metadata columns to build MDS plots. If left undefined, all columns are reported default: null","title":"R_PlotMds properties:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.pch integer Sets R plot pch parameter for PDF report default: 21 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to * timeout. If set to 0, an estimate is used. default: 0 r_PlotMds.reportFields list Override field used to explicitly list metadata columns to build MDS plots. If left undefined, all columns are reported default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotMds.imageOwner R_PlotMds.imageName R_PlotMds.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#citation","text":"BioLockJ v1.3.16 Module created by Mike Sioda and developed by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/","text":"R_PlotOtus # Add to module run order: #BioModule biolockj.module.report.r.R_PlotOtus Description # Generate OTU-metadata box-plots and scatter-plots for each reportable metadata field and each report.taxonomyLevel configured Properties # Properties are the name=value pairs in the configuration file. R_PlotOtus properties: # Property Description r.pValFormat string Sets the format used in R sprintf() function default: %1.2g r_PlotOtus.customScript file path Path to a custom R script to use in place of the built-in module script. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.colorPoint string default color of scatterplot and strip-chart plot points default: black r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.pch integer Sets R plot pch parameter for PDF report default: 21 r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to * timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotOtus.imageOwner R_PlotOtus.imageName R_PlotOtus.imageTag Citation # BioLockJ v1.3.16 Module created by Mike Sioda and developed by Ivory Blakley","title":"R_PlotOtus"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#r_plototus","text":"Add to module run order: #BioModule biolockj.module.report.r.R_PlotOtus","title":"R_PlotOtus"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#description","text":"Generate OTU-metadata box-plots and scatter-plots for each reportable metadata field and each report.taxonomyLevel configured","title":"Description"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#r_plototus-properties","text":"Property Description r.pValFormat string Sets the format used in R sprintf() function default: %1.2g r_PlotOtus.customScript file path Path to a custom R script to use in place of the built-in module script. default: null","title":"R_PlotOtus properties:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.colorPoint string default color of scatterplot and strip-chart plot points default: black r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.pch integer Sets R plot pch parameter for PDF report default: 21 r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to * timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotOtus.imageOwner R_PlotOtus.imageName R_PlotOtus.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#citation","text":"BioLockJ v1.3.16 Module created by Mike Sioda and developed by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/","text":"R_PlotPvalHistograms # Add to module run order: #BioModule biolockj.module.report.r.R_PlotPvalHistograms Description # Generate p-value histograms for each reportable metadata field and each report.taxonomyLevel configured Properties # Properties are the name=value pairs in the configuration file. R_PlotPvalHistograms properties: # Property Description plotPvalHistograms.customScript file path Path to a custom R script to use in place of the built-in module script. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.pvalCutoff numeric p-value cutoff used to assign label r.colorHighlight default: 0.05 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to * timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotPvalHistograms.imageOwner R_PlotPvalHistograms.imageName R_PlotPvalHistograms.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"R_PlotPvalHistograms"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#r_plotpvalhistograms","text":"Add to module run order: #BioModule biolockj.module.report.r.R_PlotPvalHistograms","title":"R_PlotPvalHistograms"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#description","text":"Generate p-value histograms for each reportable metadata field and each report.taxonomyLevel configured","title":"Description"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#r_plotpvalhistograms-properties","text":"Property Description plotPvalHistograms.customScript file path Path to a custom R script to use in place of the built-in module script. default: null","title":"R_PlotPvalHistograms properties:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.pvalCutoff numeric p-value cutoff used to assign label r.colorHighlight default: 0.05 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to * timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotPvalHistograms.imageOwner R_PlotPvalHistograms.imageName R_PlotPvalHistograms.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/","text":"AddMetadataToTaxaTables # Add to module run order: #BioModule biolockj.module.report.taxa.AddMetadataToTaxaTables Description # Map metadata onto taxa tables using sample ID. Properties # Properties are the name=value pairs in the configuration file. AddMetadataToTaxaTables properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.commentChar string metadata file comment indicator; Empty string is a valid option indicating no comments in metadata file. default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.nullValue string metadata cells with this value will be treated as empty default: NA report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels default: phylum,class,order,family,genus script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # The output of this module will have a row for each sample (just like the metadata and the taxa tables) and columns for data AND metadata. If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: AddMetadataToTaxaTables.imageOwner AddMetadataToTaxaTables.imageName AddMetadataToTaxaTables.imageTag Citation # Module developed by Mike Sioda and Anthony Fodor BioLockJ v1.3.16","title":"AddMetadataToTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#addmetadatatotaxatables","text":"Add to module run order: #BioModule biolockj.module.report.taxa.AddMetadataToTaxaTables","title":"AddMetadataToTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#description","text":"Map metadata onto taxa tables using sample ID.","title":"Description"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#addmetadatatotaxatables-properties","text":"none","title":"AddMetadataToTaxaTables properties:"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.commentChar string metadata file comment indicator; Empty string is a valid option indicating no comments in metadata file. default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.nullValue string metadata cells with this value will be treated as empty default: NA report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels default: phylum,class,order,family,genus script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#details","text":"The output of this module will have a row for each sample (just like the metadata and the taxa tables) and columns for data AND metadata. If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite.","title":"Details"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: AddMetadataToTaxaTables.imageOwner AddMetadataToTaxaTables.imageName AddMetadataToTaxaTables.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#citation","text":"Module developed by Mike Sioda and Anthony Fodor BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/","text":"AddPseudoCount # Add to module run order: #BioModule biolockj.module.report.taxa.AddPseudoCount Description # Add a pseudocount (+1) to each value in each taxa table. Properties # Properties are the name=value pairs in the configuration file. AddPseudoCount properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: AddPseudoCount.imageOwner AddPseudoCount.imageName AddPseudoCount.imageTag Citation # BioLockJ v1.3.16","title":"AddPseudoCount"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#addpseudocount","text":"Add to module run order: #BioModule biolockj.module.report.taxa.AddPseudoCount","title":"AddPseudoCount"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#description","text":"Add a pseudocount (+1) to each value in each taxa table.","title":"Description"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#addpseudocount-properties","text":"none","title":"AddPseudoCount properties:"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#details","text":"If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite.","title":"Details"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: AddPseudoCount.imageOwner AddPseudoCount.imageName AddPseudoCount.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#citation","text":"BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/","text":"BuildTaxaTables # Add to module run order: #BioModule biolockj.module.report.taxa.BuildTaxaTables Description # Convert OTU-tables split by sample into taxa tables split by level. Properties # Properties are the name=value pairs in the configuration file. BuildTaxaTables properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # Each classifier module has a parser module that converts the classifier-specific output format into a common OTU table format. This module merges those tables from all samples, and splits the tables by taxonomic level. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: BuildTaxaTables.imageOwner BuildTaxaTables.imageName BuildTaxaTables.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"BuildTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#buildtaxatables","text":"Add to module run order: #BioModule biolockj.module.report.taxa.BuildTaxaTables","title":"BuildTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#description","text":"Convert OTU-tables split by sample into taxa tables split by level.","title":"Description"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#buildtaxatables-properties","text":"none","title":"BuildTaxaTables properties:"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#details","text":"Each classifier module has a parser module that converts the classifier-specific output format into a common OTU table format. This module merges those tables from all samples, and splits the tables by taxonomic level.","title":"Details"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: BuildTaxaTables.imageOwner BuildTaxaTables.imageName BuildTaxaTables.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/","text":"LogTransformTaxaTables # Add to module run order: #BioModule biolockj.module.report.taxa.LogTransformTaxaTables Description # Log-transform the raw taxa counts on Log10 or Log-e scales. Properties # Properties are the name=value pairs in the configuration file. LogTransformTaxaTables properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.logBase string Options: 10,e,null. If e, use natural log (base e); if 10, use log base 10; if not set, counts will not be converted to a log scale. default: 10 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: LogTransformTaxaTables.imageOwner LogTransformTaxaTables.imageName LogTransformTaxaTables.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"LogTransformTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#logtransformtaxatables","text":"Add to module run order: #BioModule biolockj.module.report.taxa.LogTransformTaxaTables","title":"LogTransformTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#description","text":"Log-transform the raw taxa counts on Log10 or Log-e scales.","title":"Description"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#logtransformtaxatables-properties","text":"none","title":"LogTransformTaxaTables properties:"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.logBase string Options: 10,e,null. If e, use natural log (base e); if 10, use log base 10; if not set, counts will not be converted to a log scale. default: 10 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#details","text":"If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite.","title":"Details"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: LogTransformTaxaTables.imageOwner LogTransformTaxaTables.imageName LogTransformTaxaTables.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/","text":"NormalizeByReadsPerMillion # Add to module run order: #BioModule biolockj.module.report.taxa.NormalizeByReadsPerMillion Description # new counts = counts / (total counts in sample / 1 million) Properties # Properties are the name=value pairs in the configuration file. NormalizeByReadsPerMillion properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # Represent each count value in the table as: counts / (total counts in sample / 1 million) If report log base is not null, LogTransformTaxaTables is added as a post-requisite to do the log transformation and AddPseudoCount is added as a pre-requisite before normalization to avoid taking the log of 0. If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.report.taxa.LogTransformTaxaTables Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: NormalizeByReadsPerMillion.imageOwner NormalizeByReadsPerMillion.imageName NormalizeByReadsPerMillion.imageTag Citation # Module developed by Ivory Blakley BioLockj v1.3.16","title":"NormalizeByReadsPerMillion"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#normalizebyreadspermillion","text":"Add to module run order: #BioModule biolockj.module.report.taxa.NormalizeByReadsPerMillion","title":"NormalizeByReadsPerMillion"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#description","text":"new counts = counts / (total counts in sample / 1 million)","title":"Description"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#normalizebyreadspermillion-properties","text":"none","title":"NormalizeByReadsPerMillion properties:"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#details","text":"Represent each count value in the table as: counts / (total counts in sample / 1 million) If report log base is not null, LogTransformTaxaTables is added as a post-requisite to do the log transformation and AddPseudoCount is added as a pre-requisite before normalization to avoid taking the log of 0. If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite.","title":"Details"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.report.taxa.LogTransformTaxaTables","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: NormalizeByReadsPerMillion.imageOwner NormalizeByReadsPerMillion.imageName NormalizeByReadsPerMillion.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#citation","text":"Module developed by Ivory Blakley BioLockj v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/","text":"NormalizeTaxaTables # Add to module run order: #BioModule biolockj.module.report.taxa.NormalizeTaxaTables Description # Normalize taxa tables for sequencing depth. Properties # Properties are the name=value pairs in the configuration file. NormalizeTaxaTables properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.logBase string Options: 10,e,null. If e, use natural log (base e); if 10, use log base 10; if not set, counts will not be converted to a log scale. default: 10 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # Normalize taxa tables based on formula: counts_{normalized} = \\frac{counts_{raw}}{n} \\frac{\\sum (x)}{N} +1 Where: counts_{raw} = raw count; the cell value before normalizing n = number of sequences in the sample (total within a sample) \\sum (x) = total number of counts in the table (total across samples) N = total number of samples Typically the data is put on a Log_{10} scale, so the full forumula is: counts_{final} = Log_{10} \\biggl( \\frac{counts_{raw}}{n} \\frac{\\sum (x)}{N} +1 \\biggr) The counts_{final} values will be in output dir of the LogTransformTaxaTables module. The counts_{normalized} values will be in the output of the NormalizeTaxaTables module. For further explanation regarding the normalization scheme, please read The ISME Journal 2013 paper by Dr. Anthony Fodor: \"Stochastic changes over time and not founder effects drive cage effects in microbial community assembly in a mouse model\" If report.logBase is not null, then the LogTransformTaxaTables will be added as a post-requisite module. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.report.taxa.LogTransformTaxaTables Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: NormalizeTaxaTables.imageOwner NormalizeTaxaTables.imageName NormalizeTaxaTables.imageTag Citation # \"Stochastic changes over time and not founder effects drive cage effects in microbial community assembly in a mouse model\" Module developed by Mike Sioda BioLockJ v1.3.16","title":"NormalizeTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#normalizetaxatables","text":"Add to module run order: #BioModule biolockj.module.report.taxa.NormalizeTaxaTables","title":"NormalizeTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#description","text":"Normalize taxa tables for sequencing depth.","title":"Description"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#normalizetaxatables-properties","text":"none","title":"NormalizeTaxaTables properties:"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.logBase string Options: 10,e,null. If e, use natural log (base e); if 10, use log base 10; if not set, counts will not be converted to a log scale. default: 10 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#details","text":"Normalize taxa tables based on formula: counts_{normalized} = \\frac{counts_{raw}}{n} \\frac{\\sum (x)}{N} +1 Where: counts_{raw} = raw count; the cell value before normalizing n = number of sequences in the sample (total within a sample) \\sum (x) = total number of counts in the table (total across samples) N = total number of samples Typically the data is put on a Log_{10} scale, so the full forumula is: counts_{final} = Log_{10} \\biggl( \\frac{counts_{raw}}{n} \\frac{\\sum (x)}{N} +1 \\biggr) The counts_{final} values will be in output dir of the LogTransformTaxaTables module. The counts_{normalized} values will be in the output of the NormalizeTaxaTables module. For further explanation regarding the normalization scheme, please read The ISME Journal 2013 paper by Dr. Anthony Fodor: \"Stochastic changes over time and not founder effects drive cage effects in microbial community assembly in a mouse model\" If report.logBase is not null, then the LogTransformTaxaTables will be added as a post-requisite module.","title":"Details"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.report.taxa.LogTransformTaxaTables","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: NormalizeTaxaTables.imageOwner NormalizeTaxaTables.imageName NormalizeTaxaTables.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#citation","text":"\"Stochastic changes over time and not founder effects drive cage effects in microbial community assembly in a mouse model\" Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/","text":"DESeq2 # Add to module run order: #BioModule biolockj.module.rnaseq.DESeq2 Description # Determine statistically significant differences using DESeq2. Properties # Properties are the name=value pairs in the configuration file. DESeq2 properties: # Property Description deseq2.designFactors list A comma-separated list of metadata columns to include as factors in the design forumula used with DESeq2. default: null deseq2.designFormula string The exact string to use as the design the call to DESeqDataSetFromMatrix(). default: null deseq2.scriptPath file path An R script to use in place of the default script to call DESeq2. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # The two methods of expresison the design are mutually exclusive. deseq2.designFormula is used as an exact string to pass as the design argument to DESeqDataSetFromMatrix(); example: ~ Location:SoilType . deseq2.designFactors is a list (such as \"fist,second\") of one or more metadata columns to use in a formula. Using this method, the formula will take the form: ~ first + second The following two lines are equivilent: deseq2.designFormula = ~ treatment + batch deseq2.designFactors = treatment,batch Advanced users may want to make more advanced modifications to the call to the DESeq2 functions. The easiest way to do this is to run the module with the default script, and treat that as a working template (ie, see how input/outputs are passed to/from the R script). Modify the script in that first pipeline, and save the modified script to a stable location. Then run the pipeline with deseq2.scriptPath giving the path to the modified script. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_deseq2:v1.3.16 This can be modified using the following properties: DESeq2.imageOwner DESeq2.imageName DESeq2.imageTag Citation # R Core Team (2019). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. Love, M.I., Huber, W., Anders, S. (2014) Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology, 15:550. 10.1186/s13059-014-0550-8 Module developed by Ivory, Ke and Rosh BioLockJ v1.3.16","title":"DESeq2"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#deseq2","text":"Add to module run order: #BioModule biolockj.module.rnaseq.DESeq2","title":"DESeq2"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#description","text":"Determine statistically significant differences using DESeq2.","title":"Description"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#deseq2-properties","text":"Property Description deseq2.designFactors list A comma-separated list of metadata columns to include as factors in the design forumula used with DESeq2. default: null deseq2.designFormula string The exact string to use as the design the call to DESeqDataSetFromMatrix(). default: null deseq2.scriptPath file path An R script to use in place of the default script to call DESeq2. default: null","title":"DESeq2 properties:"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#details","text":"The two methods of expresison the design are mutually exclusive. deseq2.designFormula is used as an exact string to pass as the design argument to DESeqDataSetFromMatrix(); example: ~ Location:SoilType . deseq2.designFactors is a list (such as \"fist,second\") of one or more metadata columns to use in a formula. Using this method, the formula will take the form: ~ first + second The following two lines are equivilent: deseq2.designFormula = ~ treatment + batch deseq2.designFactors = treatment,batch Advanced users may want to make more advanced modifications to the call to the DESeq2 functions. The easiest way to do this is to run the module with the default script, and treat that as a working template (ie, see how input/outputs are passed to/from the R script). Modify the script in that first pipeline, and save the modified script to a stable location. Then run the pipeline with deseq2.scriptPath giving the path to the modified script.","title":"Details"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_deseq2:v1.3.16 This can be modified using the following properties: DESeq2.imageOwner DESeq2.imageName DESeq2.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#citation","text":"R Core Team (2019). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. Love, M.I., Huber, W., Anders, S. (2014) Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology, 15:550. 10.1186/s13059-014-0550-8 Module developed by Ivory, Ke and Rosh BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/","text":"EdgeR # Add to module run order: #BioModule biolockj.module.rnaseq.EdgeR Description # Determine statistically significant differences using edgeR. Properties # Properties are the name=value pairs in the configuration file. EdgeR properties: # Property Description edgeR.designFactors list A comma-separated list of metadata columns to include as factors in the design forumula used with edgeR. default: null edgeR.designFormula string The exact string to use as the design the call to model.matrix(). default: null edgeR.scriptPath file path An R script to use in place of the default script to call edgeR. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # The two methods of expresison the design are mutually exclusive. edgeR.designFormula is used as an exact string to pass as the design argument to model.matrix(); example: ~ Location:SoilType. edgeR.designFactors is a list (such as \"fist,second\") of one or more metadata columns to use in a formula. Using this method, the formula will take the form: ~ first + second The following two lines are equivilent: edgeR.designFormula = ~ treatment + batch edgeR.designFactors = treatment,batch Advanced users may want to make more advanced modifications to the call to the edgeR functions. The easiest way to do this is to run the module with the default script, and treat that as a working template (ie, see how input/outputs are passed to/from the R script). Modify the script in that first pipeline, and save the modified script to a stable location. Then run the pipeline with edgeR.scriptPath giving the path to the modified script. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_edger:v1.3.16 This can be modified using the following properties: EdgeR.imageOwner EdgeR.imageName EdgeR.imageTag Citation # R Core Team (2019). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. Mark D. Robinson, Davis J. McCarthy, Gordon K. Smyth, edgeR: a Bioconductor package for differential expression analysis of digital gene expression data, Bioinformatics, Volume 26, Issue 1, 1 January 2010, Pages 139\u2013140, https://doi.org/10.1093/bioinformatics/btp616 Module developed by Ivory, Ke and Rosh BioLockJ v1.3.16","title":"EdgeR"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#edger","text":"Add to module run order: #BioModule biolockj.module.rnaseq.EdgeR","title":"EdgeR"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#description","text":"Determine statistically significant differences using edgeR.","title":"Description"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#edger-properties","text":"Property Description edgeR.designFactors list A comma-separated list of metadata columns to include as factors in the design forumula used with edgeR. default: null edgeR.designFormula string The exact string to use as the design the call to model.matrix(). default: null edgeR.scriptPath file path An R script to use in place of the default script to call edgeR. default: null","title":"EdgeR properties:"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#details","text":"The two methods of expresison the design are mutually exclusive. edgeR.designFormula is used as an exact string to pass as the design argument to model.matrix(); example: ~ Location:SoilType. edgeR.designFactors is a list (such as \"fist,second\") of one or more metadata columns to use in a formula. Using this method, the formula will take the form: ~ first + second The following two lines are equivilent: edgeR.designFormula = ~ treatment + batch edgeR.designFactors = treatment,batch Advanced users may want to make more advanced modifications to the call to the edgeR functions. The easiest way to do this is to run the module with the default script, and treat that as a working template (ie, see how input/outputs are passed to/from the R script). Modify the script in that first pipeline, and save the modified script to a stable location. Then run the pipeline with edgeR.scriptPath giving the path to the modified script.","title":"Details"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_edger:v1.3.16 This can be modified using the following properties: EdgeR.imageOwner EdgeR.imageName EdgeR.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#citation","text":"R Core Team (2019). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. Mark D. Robinson, Davis J. McCarthy, Gordon K. Smyth, edgeR: a Bioconductor package for differential expression analysis of digital gene expression data, Bioinformatics, Volume 26, Issue 1, 1 January 2010, Pages 139\u2013140, https://doi.org/10.1093/bioinformatics/btp616 Module developed by Ivory, Ke and Rosh BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/","text":"AwkFastaConverter # Add to module run order: #BioModule biolockj.module.seq.AwkFastaConverter Description # Convert fastq files into fasta format. Properties # Properties are the name=value pairs in the configuration file. AwkFastaConverter properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.awk executable Path for the \"awk\" executable; if not supplied, any script that needs the awk command will assume it is on the PATH. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This module was first introduced because it was required for QIIME . Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: AwkFastaConverter.imageOwner AwkFastaConverter.imageName AwkFastaConverter.imageTag Citation # BioLockJ v1.3.16 Module developed by Mike Sioda","title":"AwkFastaConverter"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#awkfastaconverter","text":"Add to module run order: #BioModule biolockj.module.seq.AwkFastaConverter","title":"AwkFastaConverter"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#description","text":"Convert fastq files into fasta format.","title":"Description"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#awkfastaconverter-properties","text":"none","title":"AwkFastaConverter properties:"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.awk executable Path for the \"awk\" executable; if not supplied, any script that needs the awk command will assume it is on the PATH. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#details","text":"This module was first introduced because it was required for QIIME .","title":"Details"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: AwkFastaConverter.imageOwner AwkFastaConverter.imageName AwkFastaConverter.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#citation","text":"BioLockJ v1.3.16 Module developed by Mike Sioda","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/","text":"Gunzipper # Add to module run order: #BioModule biolockj.module.seq.Gunzipper Description # Decompress gzipped files. Properties # Properties are the name=value pairs in the configuration file. Gunzipper properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: Gunzipper.imageOwner Gunzipper.imageName Gunzipper.imageTag Citation # BioLockJ v1.3.16 Module developed by Mike Sioda","title":"Gunzipper"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#gunzipper","text":"Add to module run order: #BioModule biolockj.module.seq.Gunzipper","title":"Gunzipper"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#description","text":"Decompress gzipped files.","title":"Description"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#gunzipper-properties","text":"none","title":"Gunzipper properties:"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: Gunzipper.imageOwner Gunzipper.imageName Gunzipper.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#citation","text":"BioLockJ v1.3.16 Module developed by Mike Sioda","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/KneadData/","text":"KneadData # Add to module run order: #BioModule biolockj.module.seq.KneadData Description # Run the Biobakery KneadData program to remove contaminated DNA. Properties # Properties are the name=value pairs in the configuration file. KneadData properties: # Property Description exe.kneaddata executable Path for the \"kneaddata\" executable; if not supplied, any script that needs the kneaddata command will assume it is on the PATH. default: null kneaddata.dbs file path Path to database for KneadData program default: null kneaddata.kneaddataParams string Optional parameters passed to kneaddata default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/kraken_classifier:v1.3.16 This can be modified using the following properties: KneadData.imageOwner KneadData.imageName KneadData.imageTag Citation # https://bitbucket.org/biobakery/kneaddata/wiki/Home Module developed by Mike Sioda","title":"KneadData"},{"location":"GENERATED/biolockj.module.seq/KneadData/#kneaddata","text":"Add to module run order: #BioModule biolockj.module.seq.KneadData","title":"KneadData"},{"location":"GENERATED/biolockj.module.seq/KneadData/#description","text":"Run the Biobakery KneadData program to remove contaminated DNA.","title":"Description"},{"location":"GENERATED/biolockj.module.seq/KneadData/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/KneadData/#kneaddata-properties","text":"Property Description exe.kneaddata executable Path for the \"kneaddata\" executable; if not supplied, any script that needs the kneaddata command will assume it is on the PATH. default: null kneaddata.dbs file path Path to database for KneadData program default: null kneaddata.kneaddataParams string Optional parameters passed to kneaddata default: null","title":"KneadData properties:"},{"location":"GENERATED/biolockj.module.seq/KneadData/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/KneadData/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.seq/KneadData/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/KneadData/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/kraken_classifier:v1.3.16 This can be modified using the following properties: KneadData.imageOwner KneadData.imageName KneadData.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/KneadData/#citation","text":"https://bitbucket.org/biobakery/kneaddata/wiki/Home Module developed by Mike Sioda","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/","text":"Multiplexer # Add to module run order: #BioModule biolockj.module.seq.Multiplexer Description # Multiplex samples into a single file, or two files (one with forward reads, one with reverse reads) if multiplexing paired reads. Properties # Properties are the name=value pairs in the configuration file. Multiplexer properties: # Property Description metadata.barcodeColumn string metadata column with identifying barcodes -> Values should be unique. default: BarcodeSequence multiplexer.gzip boolean If enabled, the multiplexed output will be gzipped default: Y General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.barcodeColumn string metadata column with identifying barcodes -> Values should be unique. default: BarcodeSequence script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Multiplexer.imageOwner Multiplexer.imageName Multiplexer.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"Multiplexer"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#multiplexer","text":"Add to module run order: #BioModule biolockj.module.seq.Multiplexer","title":"Multiplexer"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#description","text":"Multiplex samples into a single file, or two files (one with forward reads, one with reverse reads) if multiplexing paired reads.","title":"Description"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#multiplexer-properties","text":"Property Description metadata.barcodeColumn string metadata column with identifying barcodes -> Values should be unique. default: BarcodeSequence multiplexer.gzip boolean If enabled, the multiplexed output will be gzipped default: Y","title":"Multiplexer properties:"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.barcodeColumn string metadata column with identifying barcodes -> Values should be unique. default: BarcodeSequence script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Multiplexer.imageOwner Multiplexer.imageName Multiplexer.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/","text":"PearMergeReads # Add to module run order: #BioModule biolockj.module.seq.PearMergeReads Description # Run pear, the Paired-End reAd mergeR Properties # Properties are the name=value pairs in the configuration file. PearMergeReads properties: # Property Description exe.pear executable Path for the \"pear\" executable; if not supplied, any script that needs the pear command will assume it is on the PATH. default: null pearMergeReads.pearParams string optionally pass additional parameters to pear. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/pear:v1.3.16 This can be modified using the following properties: PearMergeReads.imageOwner PearMergeReads.imageName PearMergeReads.imageTag Citation # Jiajie Zhang, Kassian Kobert, Tom\u00e1\u0161 Flouri, Alexandros Stamatakis, PEAR: a fast and accurate Illumina Paired-End reAd mergeR, Bioinformatics, Volume 30, Issue 5, 1 March 2014, Pages 614\u2013620, https://doi.org/10.1093/bioinformatics/btt593 https://cme.h-its.org/exelixis/web/software/pear/doc.html Module developed by Mike Sioda","title":"PearMergeReads"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#pearmergereads","text":"Add to module run order: #BioModule biolockj.module.seq.PearMergeReads","title":"PearMergeReads"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#description","text":"Run pear, the Paired-End reAd mergeR","title":"Description"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#pearmergereads-properties","text":"Property Description exe.pear executable Path for the \"pear\" executable; if not supplied, any script that needs the pear command will assume it is on the PATH. default: null pearMergeReads.pearParams string optionally pass additional parameters to pear. default: null","title":"PearMergeReads properties:"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/pear:v1.3.16 This can be modified using the following properties: PearMergeReads.imageOwner PearMergeReads.imageName PearMergeReads.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#citation","text":"Jiajie Zhang, Kassian Kobert, Tom\u00e1\u0161 Flouri, Alexandros Stamatakis, PEAR: a fast and accurate Illumina Paired-End reAd mergeR, Bioinformatics, Volume 30, Issue 5, 1 March 2014, Pages 614\u2013620, https://doi.org/10.1093/bioinformatics/btt593 https://cme.h-its.org/exelixis/web/software/pear/doc.html Module developed by Mike Sioda","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/","text":"RarefySeqs # Add to module run order: #BioModule biolockj.module.seq.RarefySeqs Description # Randomly sub-sample sequences to reduce all samples to the configured maximum. Properties # Properties are the name=value pairs in the configuration file. RarefySeqs properties: # Property Description rarefySeqs.max numeric Randomly select this number of sequences to keep in each sample default: null rarefySeqs.min numeric Discard samples with less than minimum number of sequences default: 1 General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # Randomly sub-sample sequences to reduce all samples to the configured maximum rarefySeqs.max . Samples with less than the minimum number of reads rarefySeqs.min are discarded. This module will add biolockj.module.implicit.RegisterNumReads if there is not already a module to count starting reads per sample. If the input data are paired reads, this module will add a sequence merger, based on property pipeline.defaultSeqMerger (currently: biolockj.module.seq.PearMergeReads). Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RarefySeqs.imageOwner RarefySeqs.imageName RarefySeqs.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"RarefySeqs"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#rarefyseqs","text":"Add to module run order: #BioModule biolockj.module.seq.RarefySeqs","title":"RarefySeqs"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#description","text":"Randomly sub-sample sequences to reduce all samples to the configured maximum.","title":"Description"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#rarefyseqs-properties","text":"Property Description rarefySeqs.max numeric Randomly select this number of sequences to keep in each sample default: null rarefySeqs.min numeric Discard samples with less than minimum number of sequences default: 1","title":"RarefySeqs properties:"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#details","text":"Randomly sub-sample sequences to reduce all samples to the configured maximum rarefySeqs.max . Samples with less than the minimum number of reads rarefySeqs.min are discarded. This module will add biolockj.module.implicit.RegisterNumReads if there is not already a module to count starting reads per sample. If the input data are paired reads, this module will add a sequence merger, based on property pipeline.defaultSeqMerger (currently: biolockj.module.seq.PearMergeReads).","title":"Details"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RarefySeqs.imageOwner RarefySeqs.imageName RarefySeqs.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/","text":"SeqFileValidator # Add to module run order: #BioModule biolockj.module.seq.SeqFileValidator Description # This BioModule validates fasta/fastq file formats are valid and enforces min/max read lengths. Properties # Properties are the name=value pairs in the configuration file. SeqFileValidator properties: # Property Description seqFileValidator.requireEqualNumPairs boolean Options: Y/N; require number of forward and reverse reads default: Y seqFileValidator.seqMaxLen integer maximum number of bases per read default: null seqFileValidator.seqMinLen integer minimum number of bases per read default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: SeqFileValidator.imageOwner SeqFileValidator.imageName SeqFileValidator.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"SeqFileValidator"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#seqfilevalidator","text":"Add to module run order: #BioModule biolockj.module.seq.SeqFileValidator","title":"SeqFileValidator"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#description","text":"This BioModule validates fasta/fastq file formats are valid and enforces min/max read lengths.","title":"Description"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#seqfilevalidator-properties","text":"Property Description seqFileValidator.requireEqualNumPairs boolean Options: Y/N; require number of forward and reverse reads default: Y seqFileValidator.seqMaxLen integer maximum number of bases per read default: null seqFileValidator.seqMinLen integer minimum number of bases per read default: null","title":"SeqFileValidator properties:"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: SeqFileValidator.imageOwner SeqFileValidator.imageName SeqFileValidator.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/","text":"TrimPrimers # Add to module run order: #BioModule biolockj.module.seq.TrimPrimers Description # Remove primers from reads, option to discard reads unless primers are attached to both forward and reverse reads. Properties # Properties are the name=value pairs in the configuration file. TrimPrimers properties: # Property Description trimPrimers.filePath file path file path to file containing one primer sequence per line. default: null trimPrimers.requirePrimer boolean Options: Y/N. If Y, TrimPrimers will discard reads that do not include a primer sequence. default: Y General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: TrimPrimers.imageOwner TrimPrimers.imageName TrimPrimers.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"TrimPrimers"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#trimprimers","text":"Add to module run order: #BioModule biolockj.module.seq.TrimPrimers","title":"TrimPrimers"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#description","text":"Remove primers from reads, option to discard reads unless primers are attached to both forward and reverse reads.","title":"Description"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#trimprimers-properties","text":"Property Description trimPrimers.filePath file path file path to file containing one primer sequence per line. default: null trimPrimers.requirePrimer boolean Options: Y/N. If Y, TrimPrimers will discard reads that do not include a primer sequence. default: Y","title":"TrimPrimers properties:"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: TrimPrimers.imageOwner TrimPrimers.imageName TrimPrimers.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"module/classifier/module.classifier/","text":"Classifier Package # Modules in the biolockj.module.classifier package categorize micbrobial samples into Operational Taxonomic Units (OTUs) either by reference or with clustering algorithms. This package contains 2 sub-packages: module.classifier.r16s contains modules designed to classify 16S data. module.classifier.wgs contains modules designed to classify whole genome sequence data. Modules in these sub-packages extend the ClassifierModuleImpl class. ClassifierModuleImpl # Description: Abstract implementation of the ClassifierModule interface that the other classifier modules extend to inherit standard functionality. Abstract modules cannot be included in the pipeline run order.","title":"Classifier Package"},{"location":"module/classifier/module.classifier/#classifier-package","text":"Modules in the biolockj.module.classifier package categorize micbrobial samples into Operational Taxonomic Units (OTUs) either by reference or with clustering algorithms. This package contains 2 sub-packages: module.classifier.r16s contains modules designed to classify 16S data. module.classifier.wgs contains modules designed to classify whole genome sequence data. Modules in these sub-packages extend the ClassifierModuleImpl class.","title":"Classifier Package"},{"location":"module/classifier/module.classifier/#classifiermoduleimpl","text":"Description: Abstract implementation of the ClassifierModule interface that the other classifier modules extend to inherit standard functionality. Abstract modules cannot be included in the pipeline run order.","title":"ClassifierModuleImpl"},{"location":"module/classifier/module.classifier.r16s/","text":"biolockj.module.classifier.r16s is a sub-package of module.classifier. Package modules extend ClassifierModuleImpl to cluster and classify 16S micbrobial samples for taxonomy assignment. QiimeClosedRefClassifier # #BioModule biolockj.module.classifier.r16s.QiimeClosedRefClassifier Description: This module picks OTUs using a closed reference database and constructs an OTU table via the QIIME script pick_closed_reference_otus.py . Taxonomy is assigned using a pre-defined taxonomy map of reference sequence OTU to taxonomy. This is the fastest OTU picking method since samples can be processed in parallel batches. Before the QIIME script is run, batches are prepared in the temp directory, with each batch directory containing a fasta directory with script.batchSize fasta files and a QIIME mapping file, created with awk, called batchMapping.tsv for the batch of samples. Inherits from QiimeClassifier . Options: exe.awk QiimeDeNovoClassifier # #BioModule biolockj.module.classifier.r16s.QiimeDeNovoClassifier Description: This module runs the QIIME pick_de_novo_otus.py script on all fasta sequence files in a single script since OTUs are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier . Options: exe.vsearch exe.vsearchParams qiime.removeChimeras QiimeOpenRefClassifier # #BioModule biolockj.module.classifier.r16s.QiimeOpenRefClassifier Description: This module runs the QIIME pick_open_reference_otus.py script on all fasta sequence files in a single script since clusters not identified in the reference database are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier . Options: exe.vsearch exe.vsearchParams qiime.removeChimeras RdpClassifier # #BioModule biolockj.module.classifier.r16s.RdpClassifier Description: Classify 16s samples with RDP . Options: exe.java rdp.db rdp.jar rdp.minThresholdScore See also: Typical QIIME Pipeline","title":"Module.classifier.r16s"},{"location":"module/classifier/module.classifier.r16s/#qiimeclosedrefclassifier","text":"#BioModule biolockj.module.classifier.r16s.QiimeClosedRefClassifier Description: This module picks OTUs using a closed reference database and constructs an OTU table via the QIIME script pick_closed_reference_otus.py . Taxonomy is assigned using a pre-defined taxonomy map of reference sequence OTU to taxonomy. This is the fastest OTU picking method since samples can be processed in parallel batches. Before the QIIME script is run, batches are prepared in the temp directory, with each batch directory containing a fasta directory with script.batchSize fasta files and a QIIME mapping file, created with awk, called batchMapping.tsv for the batch of samples. Inherits from QiimeClassifier . Options: exe.awk","title":"QiimeClosedRefClassifier"},{"location":"module/classifier/module.classifier.r16s/#qiimedenovoclassifier","text":"#BioModule biolockj.module.classifier.r16s.QiimeDeNovoClassifier Description: This module runs the QIIME pick_de_novo_otus.py script on all fasta sequence files in a single script since OTUs are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier . Options: exe.vsearch exe.vsearchParams qiime.removeChimeras","title":"QiimeDeNovoClassifier"},{"location":"module/classifier/module.classifier.r16s/#qiimeopenrefclassifier","text":"#BioModule biolockj.module.classifier.r16s.QiimeOpenRefClassifier Description: This module runs the QIIME pick_open_reference_otus.py script on all fasta sequence files in a single script since clusters not identified in the reference database are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier . Options: exe.vsearch exe.vsearchParams qiime.removeChimeras","title":"QiimeOpenRefClassifier"},{"location":"module/classifier/module.classifier.r16s/#rdpclassifier","text":"#BioModule biolockj.module.classifier.r16s.RdpClassifier Description: Classify 16s samples with RDP . Options: exe.java rdp.db rdp.jar rdp.minThresholdScore See also: Typical QIIME Pipeline","title":"RdpClassifier"},{"location":"module/classifier/module.classifier.wgs/","text":"Whole Genome Sequence Classifiers # biolockj.module.classifier.wgs is a sub-package of module.classifier. Package modules categorize whole genome sequence micbrobial samples into Operational Taxonomic Units (OTUs) either by reference or with clustering algorithms. Humann2Classifier # #BioModule biolockj.module.classifier.wgs.Humann2Classifier Description: Use the Biobakery HumanN2 program to generate the HMP Unified Metabolic Analysis Network. Options: humann2.disablePathAbundance humann2.disablePathCoverage humann2.disableGeneFamilies humann2.nuclDB humann2.protDB KrakenClassifier # #BioModule biolockj.module.classifier.wgs.KrakenClassifier Description: Classify WGS samples with KRAKEN . Options: kraken.db Kraken2Classifier # #BioModule biolockj.module.classifier.wgs.Kraken2Classifier Description: Classify WGS samples with KRAKEN 2 . Options: kraken2.db Metaphlan2Classifier # #BioModule biolockj.module.classifier.wgs.Metaphlan2Classifier Description: Classify WGS samples with MetaPhlAn . Options: exe.python metaphlan2.db metaphlan2.mpa_pkl","title":"Whole Genome Sequence Classifiers"},{"location":"module/classifier/module.classifier.wgs/#whole-genome-sequence-classifiers","text":"biolockj.module.classifier.wgs is a sub-package of module.classifier. Package modules categorize whole genome sequence micbrobial samples into Operational Taxonomic Units (OTUs) either by reference or with clustering algorithms.","title":"Whole Genome Sequence Classifiers"},{"location":"module/classifier/module.classifier.wgs/#humann2classifier","text":"#BioModule biolockj.module.classifier.wgs.Humann2Classifier Description: Use the Biobakery HumanN2 program to generate the HMP Unified Metabolic Analysis Network. Options: humann2.disablePathAbundance humann2.disablePathCoverage humann2.disableGeneFamilies humann2.nuclDB humann2.protDB","title":"Humann2Classifier"},{"location":"module/classifier/module.classifier.wgs/#krakenclassifier","text":"#BioModule biolockj.module.classifier.wgs.KrakenClassifier Description: Classify WGS samples with KRAKEN . Options: kraken.db","title":"KrakenClassifier"},{"location":"module/classifier/module.classifier.wgs/#kraken2classifier","text":"#BioModule biolockj.module.classifier.wgs.Kraken2Classifier Description: Classify WGS samples with KRAKEN 2 . Options: kraken2.db","title":"Kraken2Classifier"},{"location":"module/classifier/module.classifier.wgs/#metaphlan2classifier","text":"#BioModule biolockj.module.classifier.wgs.Metaphlan2Classifier Description: Classify WGS samples with MetaPhlAn . Options: exe.python metaphlan2.db metaphlan2.mpa_pkl","title":"Metaphlan2Classifier"},{"location":"module/implicit/module.implicit/","text":"biolockj.module.implicit modules are added to BioLockJ pipelines automatically if needed. Implicit modules are ignored if included in the Config file unless project.allowImplicitModules=Y This package contains the following sub-packages: module.implicit.parser contains ParserModule interface & ParserModuleImpl superclass. module.implicit.parser.r16s contains 16S parser modules. module.implicit.parser.wgs contains WGS parser modules. module.implicit.qiime contains QIIME Script wrappers. Demultiplexer # (added by BioLockJ) #BioModule biolockj.module.implicit.ImportMetadata Description: Demultiplex samples into separate files for each sample. Options: demultiplexer.barcodeCutoff demultiplexer.barcodeRevComp demultiplexer.strategy metadata.filePath ImportMetadata # (added by BioLockJ) #BioModule biolockj.module.implicit.ImportMetadata Description: Required 1st module in every pipeline. If metadata.filePath is undefined, a new metadata file will be created with only a single column \"SAMPLE_ID\". The imported file is converted to required BioLockJ metadata format: tab-delimited, with unique column headers, and empty cells are now populated with metadata.nullValue or \"NA\" if undefined. Options: metadata.columnDelim metadata.commentChar metadata.filePath metadata.nullValue RegisterNumReads # (added by BioLockJ) #BioModule biolockj.module.implicit.RegisterNumReads Description: Add \"Num_Reads\" column to metadata file to document the total number of reads per sample. Options: report.numReads","title":"Module.implicit"},{"location":"module/implicit/module.implicit/#demultiplexer","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.ImportMetadata Description: Demultiplex samples into separate files for each sample. Options: demultiplexer.barcodeCutoff demultiplexer.barcodeRevComp demultiplexer.strategy metadata.filePath","title":"Demultiplexer"},{"location":"module/implicit/module.implicit/#importmetadata","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.ImportMetadata Description: Required 1st module in every pipeline. If metadata.filePath is undefined, a new metadata file will be created with only a single column \"SAMPLE_ID\". The imported file is converted to required BioLockJ metadata format: tab-delimited, with unique column headers, and empty cells are now populated with metadata.nullValue or \"NA\" if undefined. Options: metadata.columnDelim metadata.commentChar metadata.filePath metadata.nullValue","title":"ImportMetadata"},{"location":"module/implicit/module.implicit/#registernumreads","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.RegisterNumReads Description: Add \"Num_Reads\" column to metadata file to document the total number of reads per sample. Options: report.numReads","title":"RegisterNumReads"},{"location":"module/implicit/module.implicit.parser/","text":"biolockj.module.implicit.parser modules parse classifier output to generate OTU tables. Implicit modules are ignored if included in the Config file unless project.allowImplicitModules =Y. This package contains the following sub-packages: module.implicit.parser.r16s modules parse module.classifier.r16s reports. module.implicit.parser.wgs modules parse module.classifier.wgs reports. ParserModuleImpl # cannot be included in the pipeline run order Description: Abstract implementation of ParserModule that the other modules extend to inherit standard functionality. Abstract modules cannot be added to a pipeline, but the r16s & WGS sub-packages contain modules that inherit standard parser functionality from this class. Options: report.numHits","title":"Module.implicit.parser"},{"location":"module/implicit/module.implicit.parser/#parsermoduleimpl","text":"cannot be included in the pipeline run order Description: Abstract implementation of ParserModule that the other modules extend to inherit standard functionality. Abstract modules cannot be added to a pipeline, but the r16s & WGS sub-packages contain modules that inherit standard parser functionality from this class. Options: report.numHits","title":"ParserModuleImpl"},{"location":"module/implicit/module.implicit.parser.r16s/","text":"biolockj.module.implicit.parser.r16s is a sub package of module.implicit.parser. Package modules extend ParserModuleImpl to generate OTU tables from 16S classifier output. Implicit modules are ignored if included in the Config file unless project.allowImplicitModules =Y. RdpParser # (added by BioLockJ) #BioModule biolockj.module.implicit.parser.r16s.RdpParser Description: Build OTU tables from RDP reports. Options: rdp.minThresholdScore QiimeParser # (added by BioLockJ) #BioModule biolockj.module.implicit.parser.r16s.QiimeParser Description: Build OTU tables from QIIME summarize_taxa.py otu_table text file reports. Options: none","title":"Module.implicit.parser.r16s"},{"location":"module/implicit/module.implicit.parser.r16s/#rdpparser","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.parser.r16s.RdpParser Description: Build OTU tables from RDP reports. Options: rdp.minThresholdScore","title":"RdpParser"},{"location":"module/implicit/module.implicit.parser.r16s/#qiimeparser","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.parser.r16s.QiimeParser Description: Build OTU tables from QIIME summarize_taxa.py otu_table text file reports. Options: none","title":"QiimeParser"},{"location":"module/implicit/module.implicit.parser.wgs/","text":"biolockj.module.implicit.parser.wgs is a sub package of module.implicit.parser. Package modules extend ParserModuleImpl to generate OTU tables from WGS classifier output. Implicit modules are ignored if included in the Config file unless project.allowImplicitModules =Y. Humann2Parser # (added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.Humann2Parser Description: Build OTU tables from HumanN2 classifier module output. Options: none KrakenParser # (added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.KrakenParser Description: Build OTU tables from KRAKEN mpa-format reports. Options: none Kraken2Parser # (added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.Kraken2Parser Description: Build OTU tables from KRAKEN 2 mpa-format reports. Options: none Metaphlan2Parser # (added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.MetaphlanParser Description: Build OTU tables from Metaphlan2 classifier module reports. Options: none","title":"Module.implicit.parser.wgs"},{"location":"module/implicit/module.implicit.parser.wgs/#humann2parser","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.Humann2Parser Description: Build OTU tables from HumanN2 classifier module output. Options: none","title":"Humann2Parser"},{"location":"module/implicit/module.implicit.parser.wgs/#krakenparser","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.KrakenParser Description: Build OTU tables from KRAKEN mpa-format reports. Options: none","title":"KrakenParser"},{"location":"module/implicit/module.implicit.parser.wgs/#kraken2parser","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.Kraken2Parser Description: Build OTU tables from KRAKEN 2 mpa-format reports. Options: none","title":"Kraken2Parser"},{"location":"module/implicit/module.implicit.parser.wgs/#metaphlan2parser","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.MetaphlanParser Description: Build OTU tables from Metaphlan2 classifier module reports. Options: none","title":"Metaphlan2Parser"},{"location":"module/implicit/module.implicit.qiime/","text":"biolockj.module.implicit.qiime modules are QIIME Script wrappers implicitly added (if needed). Implicit modules are ignored if included in the Config file unless project.allowImplicitModules =Y. BuildQiimeMapping # (added by BioLockJ) #BioModule biolockj.module.implicit.qiime.BuildQiimeMapping Description: This module builds a QIIME mapping file from the metadata. If the metadata file contains the correct columns out of order, awk is used to correct the column order. The updated mapping file is verified with the QIIME script validate_mapping_file.py Options: exe.awk QiimeClassifier # (added by BioLockJ) #BioModule biolockj.module.implicit.qiime.QiimeClassifier Description: Generates bash script lines to summarize QIIME results, build taxonomy reports, and add alpha diversity metrics (if configured). For a complete list of available metrics, see: http://scikit-bio.org/docs/latest/generated/skbio.diversity.alpha.html Options: qiime.alphaMetrics qiime.pynastAlignDB qiime.refSeqDB qiime.removeChimeras qiime.taxaDB MergeQiimeOtuTables # (added by BioLockJ) #BioModule biolockj.module.implicit.qiime.MergeQiimeOtuTables Description: This module runs the QIIME script merge_otu_tables.py to combine the multiple otu_table.biom files output by its required prerequisite module QiimeClosedRefClassifier , so is only necessary if #samples > script.batchSize . Options: none","title":"Module.implicit.qiime"},{"location":"module/implicit/module.implicit.qiime/#buildqiimemapping","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.qiime.BuildQiimeMapping Description: This module builds a QIIME mapping file from the metadata. If the metadata file contains the correct columns out of order, awk is used to correct the column order. The updated mapping file is verified with the QIIME script validate_mapping_file.py Options: exe.awk","title":"BuildQiimeMapping"},{"location":"module/implicit/module.implicit.qiime/#qiimeclassifier","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.qiime.QiimeClassifier Description: Generates bash script lines to summarize QIIME results, build taxonomy reports, and add alpha diversity metrics (if configured). For a complete list of available metrics, see: http://scikit-bio.org/docs/latest/generated/skbio.diversity.alpha.html Options: qiime.alphaMetrics qiime.pynastAlignDB qiime.refSeqDB qiime.removeChimeras qiime.taxaDB","title":"QiimeClassifier"},{"location":"module/implicit/module.implicit.qiime/#mergeqiimeotutables","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.qiime.MergeQiimeOtuTables Description: This module runs the QIIME script merge_otu_tables.py to combine the multiple otu_table.biom files output by its required prerequisite module QiimeClosedRefClassifier , so is only necessary if #samples > script.batchSize . Options: none","title":"MergeQiimeOtuTables"},{"location":"module/report/module.report.humann2/","text":"Pathway Modules # Modules in the biolockj.module.report.humann2 sub-package use ParserModule output to produce and process pathway tables, such as those produced by HumanN2 . Humann2CountModule # cannot be included in the pipeline run order Description: Abstract class extends JavaModuleImpl that other humann2 classes extend to inherit shared functionality. Abstract modules cannot be included in the pipeline run order. Options: humann2.disablePathAbundance humann2.disablePathCoverage humann2.disableGeneFamilies AddMetadataToPathwayTables # #BioModule biolockj.module.report.humann2.AddMetadataToPathwayTables Description: Add metadata columns to the OTU abundance tables. Options: none RemoveLowPathwayCounts # #BioModule biolockj.module.report.humann2.RemoveLowPathwayCounts Description: This BioModule Pathway counts below a configured threshold to zero. These low sample counts are assumed to be miscategorized or genomic contamination. Options: report.minCount RemoveScarcePathwayCounts # #BioModule biolockj.module.report.humann2.RemoveScarcePathwayCounts Description: This BioModule removes scarce pathways not found in enough samples. Each pathway must be found in a configurable percentage of samples to be retained. Options: report.scarceCountCutoff","title":"Pathway Modules"},{"location":"module/report/module.report.humann2/#pathway-modules","text":"Modules in the biolockj.module.report.humann2 sub-package use ParserModule output to produce and process pathway tables, such as those produced by HumanN2 .","title":"Pathway Modules"},{"location":"module/report/module.report.humann2/#humann2countmodule","text":"cannot be included in the pipeline run order Description: Abstract class extends JavaModuleImpl that other humann2 classes extend to inherit shared functionality. Abstract modules cannot be included in the pipeline run order. Options: humann2.disablePathAbundance humann2.disablePathCoverage humann2.disableGeneFamilies","title":"Humann2CountModule"},{"location":"module/report/module.report.humann2/#addmetadatatopathwaytables","text":"#BioModule biolockj.module.report.humann2.AddMetadataToPathwayTables Description: Add metadata columns to the OTU abundance tables. Options: none","title":"AddMetadataToPathwayTables"},{"location":"module/report/module.report.humann2/#removelowpathwaycounts","text":"#BioModule biolockj.module.report.humann2.RemoveLowPathwayCounts Description: This BioModule Pathway counts below a configured threshold to zero. These low sample counts are assumed to be miscategorized or genomic contamination. Options: report.minCount","title":"RemoveLowPathwayCounts"},{"location":"module/report/module.report.humann2/#removescarcepathwaycounts","text":"#BioModule biolockj.module.report.humann2.RemoveScarcePathwayCounts Description: This BioModule removes scarce pathways not found in enough samples. Each pathway must be found in a configurable percentage of samples to be retained. Options: report.scarceCountCutoff","title":"RemoveScarcePathwayCounts"},{"location":"module/report/module.report/","text":"Report Package # Modules in the biolockj.module.report package process ParserModule output, merge the OTU tables with the metadata, and can generate various reports and notifications. This package contains the following sub-packages: module.report.otu contains modules designed to produce or process otu tables. module.report.taxa contains modules designed to produce or process taxa tables. module.report.r contains modules that use R to generate statistics and/or visualizations. module.report.humann2 contains modules designed to produce or process pathway tables. Email # #BioModule biolockj.module.report.Email Description: Notify user pipeline is complete by emailing out the pipeline summary. Options: mail.encryptedPassword mail.from mail.smtp.auth mail.smtp.host mail.smtp.port mail.smtp.starttls.enable mail.to JsonReport # #BioModule biolockj.module.report.JsonReport Description: This module builds a JSON file from the ParserModule output. Options: report.logBase report.taxonomyLevels","title":"Report Package"},{"location":"module/report/module.report/#report-package","text":"Modules in the biolockj.module.report package process ParserModule output, merge the OTU tables with the metadata, and can generate various reports and notifications. This package contains the following sub-packages: module.report.otu contains modules designed to produce or process otu tables. module.report.taxa contains modules designed to produce or process taxa tables. module.report.r contains modules that use R to generate statistics and/or visualizations. module.report.humann2 contains modules designed to produce or process pathway tables.","title":"Report Package"},{"location":"module/report/module.report/#email","text":"#BioModule biolockj.module.report.Email Description: Notify user pipeline is complete by emailing out the pipeline summary. Options: mail.encryptedPassword mail.from mail.smtp.auth mail.smtp.host mail.smtp.port mail.smtp.starttls.enable mail.to","title":"Email"},{"location":"module/report/module.report/#jsonreport","text":"#BioModule biolockj.module.report.JsonReport Description: This module builds a JSON file from the ParserModule output. Options: report.logBase report.taxonomyLevels","title":"JsonReport"},{"location":"module/report/module.report.otu/","text":"OTU report modules # Modules in the biolockj.module.report sub-pakcage normalize ParserModule output, merge the OTU tables with the metadata, or process OTU tables. CompileOtuCounts # #BioModule biolockj.module.report.otu.CompileOtuCounts Description: Compiles the counts from all OTU count files into a single summary OTU count file containing OTU counts for the entire dataset. Options: none RarefyOtuCounts # #BioModule biolockj.module.report.otu.RarefyOtuCounts Description: Applies a mean iterative post-OTU classification rarefication algorithm so that each output sample will have approximately the same number of OTUs. Options: rarefyOtuCounts.iterations rarefyOtuCounts.lowAbundantCutoff rarefyOtuCounts.quantile rarefyOtuCounts.removeSamplesBelowQuantile RemoveLowOtuCounts # #BioModule biolockj.module.report.otu.RemoveLowOtuCounts Description: Removes OTUs with counts below report.minCount . Options: report.minCount report.numHits RemoveScarceOtuCounts # #BioModule biolockj.module.report.otu.RemoveScarceOtuCounts Description: Removes OTUs that are not found in enough samples. Options: report.scarceCountCutoff","title":"OTU report modules"},{"location":"module/report/module.report.otu/#otu-report-modules","text":"Modules in the biolockj.module.report sub-pakcage normalize ParserModule output, merge the OTU tables with the metadata, or process OTU tables.","title":"OTU report modules"},{"location":"module/report/module.report.otu/#compileotucounts","text":"#BioModule biolockj.module.report.otu.CompileOtuCounts Description: Compiles the counts from all OTU count files into a single summary OTU count file containing OTU counts for the entire dataset. Options: none","title":"CompileOtuCounts"},{"location":"module/report/module.report.otu/#rarefyotucounts","text":"#BioModule biolockj.module.report.otu.RarefyOtuCounts Description: Applies a mean iterative post-OTU classification rarefication algorithm so that each output sample will have approximately the same number of OTUs. Options: rarefyOtuCounts.iterations rarefyOtuCounts.lowAbundantCutoff rarefyOtuCounts.quantile rarefyOtuCounts.removeSamplesBelowQuantile","title":"RarefyOtuCounts"},{"location":"module/report/module.report.otu/#removelowotucounts","text":"#BioModule biolockj.module.report.otu.RemoveLowOtuCounts Description: Removes OTUs with counts below report.minCount . Options: report.minCount report.numHits","title":"RemoveLowOtuCounts"},{"location":"module/report/module.report.otu/#removescarceotucounts","text":"#BioModule biolockj.module.report.otu.RemoveScarceOtuCounts Description: Removes OTUs that are not found in enough samples. Options: report.scarceCountCutoff","title":"RemoveScarceOtuCounts"},{"location":"module/report/module.report.r/","text":"R Report Modules # Modules in the biolockj.module.report.r sub-package generate the statistical analysis and visualizations by executing R scripts. The statistical analysis is performed on the taxa abundance tables generated by AddMetadataToTaxaTables. R_Module # cannot be included in the pipeline run order Description: Abstract implementation of ScriptModule that other R modules extend to inherit standard R script functionality. Abstract modules cannot be included in the pipeline run order. Options: exe.rScript r.debug r.nominalFields r.numericFields r.rareOtuThreshold r.reportFields r.saveRData r.timeout report.numHits report.numReads report.taxonomyLevel R_CalculateStats # #BioModule biolockj.module.report.r.R_CalculateStats Description: Generate a summary statistics table with [adjusted and unadjusted] [parameteric and non-parametirc] p-values and r 2 values for each reportable metadata field and each report.taxonomyLevel configured. Options: r_CalculateStats.pAdjustMethod r_CalculateStats.pAdjustScope R_PlotEffectSize # #BioModule biolockj.module.report.r.R_PlotEffectSize Description: Generate horizontal barplot representing effect size (Cohen's d, r 2 , and/or fold change) for each reportable metadata field and each report.taxonomyLevel configured. Options: r_PlotEffectSize.parametricPval r_PlotEffectSize.disablePvalAdj r_PlotEffectSize.excludePvalAbove r_PlotEffectSize.taxa r_PlotEffectSize.maxNumTaxa r_PlotEffectSize.disableCohensD r_PlotEffectSize.disableRSquared r_PlotEffectSize.disableFoldChange r.colorHighlight r.pvalCutoff R_PlotMds # #BioModule biolockj.module.report.r.R_PlotMds Description: Generate sets of multidimensional scaling plots showing 2 axes at a time (up to the < r_PlotMds.numAxis >th axis) with color coding based on each categorical metadata field (default) or by each field given in r_PlotMds.reportFields . Options: r_PlotMds.numAxis r_PlotMds.reportFields r_PlotMds.distance r.colorPalette r.colorPoint r.pch r.pvalCutoff r.pValFormat R_PlotOtus # #BioModule biolockj.module.report.r.R_PlotOtus Description: Generate OTU-metadata box-plots and scatter-plots for each reportable metadata field and each report.taxonomyLevel configured Options: r.colorBase r.colorHighlight r.colorPalette r.colorPoint r.pch r.pvalCutoff r.rareOtuThreshold r.pValFormat R_PlotPvalHistograms # #BioModule biolockj.module.report.r.R_PlotPvalHistograms Description: Generate p-value histograms for each reportable metadata field and each report.taxonomyLevel configured Options: r.pvalCutoff","title":"R Report Modules"},{"location":"module/report/module.report.r/#r-report-modules","text":"Modules in the biolockj.module.report.r sub-package generate the statistical analysis and visualizations by executing R scripts. The statistical analysis is performed on the taxa abundance tables generated by AddMetadataToTaxaTables.","title":"R Report Modules"},{"location":"module/report/module.report.r/#r_module","text":"cannot be included in the pipeline run order Description: Abstract implementation of ScriptModule that other R modules extend to inherit standard R script functionality. Abstract modules cannot be included in the pipeline run order. Options: exe.rScript r.debug r.nominalFields r.numericFields r.rareOtuThreshold r.reportFields r.saveRData r.timeout report.numHits report.numReads report.taxonomyLevel","title":"R_Module"},{"location":"module/report/module.report.r/#r_calculatestats","text":"#BioModule biolockj.module.report.r.R_CalculateStats Description: Generate a summary statistics table with [adjusted and unadjusted] [parameteric and non-parametirc] p-values and r 2 values for each reportable metadata field and each report.taxonomyLevel configured. Options: r_CalculateStats.pAdjustMethod r_CalculateStats.pAdjustScope","title":"R_CalculateStats"},{"location":"module/report/module.report.r/#r_ploteffectsize","text":"#BioModule biolockj.module.report.r.R_PlotEffectSize Description: Generate horizontal barplot representing effect size (Cohen's d, r 2 , and/or fold change) for each reportable metadata field and each report.taxonomyLevel configured. Options: r_PlotEffectSize.parametricPval r_PlotEffectSize.disablePvalAdj r_PlotEffectSize.excludePvalAbove r_PlotEffectSize.taxa r_PlotEffectSize.maxNumTaxa r_PlotEffectSize.disableCohensD r_PlotEffectSize.disableRSquared r_PlotEffectSize.disableFoldChange r.colorHighlight r.pvalCutoff","title":"R_PlotEffectSize"},{"location":"module/report/module.report.r/#r_plotmds","text":"#BioModule biolockj.module.report.r.R_PlotMds Description: Generate sets of multidimensional scaling plots showing 2 axes at a time (up to the < r_PlotMds.numAxis >th axis) with color coding based on each categorical metadata field (default) or by each field given in r_PlotMds.reportFields . Options: r_PlotMds.numAxis r_PlotMds.reportFields r_PlotMds.distance r.colorPalette r.colorPoint r.pch r.pvalCutoff r.pValFormat","title":"R_PlotMds"},{"location":"module/report/module.report.r/#r_plototus","text":"#BioModule biolockj.module.report.r.R_PlotOtus Description: Generate OTU-metadata box-plots and scatter-plots for each reportable metadata field and each report.taxonomyLevel configured Options: r.colorBase r.colorHighlight r.colorPalette r.colorPoint r.pch r.pvalCutoff r.rareOtuThreshold r.pValFormat","title":"R_PlotOtus"},{"location":"module/report/module.report.r/#r_plotpvalhistograms","text":"#BioModule biolockj.module.report.r.R_PlotPvalHistograms Description: Generate p-value histograms for each reportable metadata field and each report.taxonomyLevel configured Options: r.pvalCutoff","title":"R_PlotPvalHistograms"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"# What is BioLockJ # BioLockJ optimizes your bioinformatics pipeline and metagenomics analysis. Modular design logically partitions analysis and expedites failure recovery Automated script generation eliminates syntax errors and ensures uniform execution Standardized OTU abundance tables facilitate analysis across datasets Batch scripts take advantage of parallelization on the cluster job queue configuration file consolidates project details into a principal reference document (and can reproduce analysis) BioModule interface provides a flexible mechanism for adding new functionality BioLockJ User Guide: # Getting Started Commands Pipeline Componenets the config file Properties Modules the metadata input files Dependencies Features Check Dependencies before pipeline start Failure Recovery Validation Supported Environments Expand BioLockJ by Building Modules BioLockJ API Examples and Templates Example Pipeline FAQ & Troubleshooting Links for Developers # BioLockJ Developers Guide Repository of functional tests https://github.com/BioLockJ-Dev-Team/sheepdog_testing_suite The user guide for our latest stable version https://biolockj-dev-team.github.io/BioLockJ/ The user guide for the current development version, and previous stable versions https://biolockj.readthedocs.io/en/latest/ Guidelines for new modules Building Modules Citing BioLockJ # If you use BioLockJ in your research, you should cite BioLockJ itself AND the tools that make up the pipeline. The majority of BioLockJ modules are wrappers for independent tools. See the summary of your pipeline for citation information from the modules in your pipeline. This information is also available in the modules' documentation. To cite BioLockJ itself, please cite the public project git repository (https://github.com/BioLockJ-Dev-Team/BioLockJ) and author Mike Sioda.","title":"Home"},{"location":"#what-is-biolockj","text":"BioLockJ optimizes your bioinformatics pipeline and metagenomics analysis. Modular design logically partitions analysis and expedites failure recovery Automated script generation eliminates syntax errors and ensures uniform execution Standardized OTU abundance tables facilitate analysis across datasets Batch scripts take advantage of parallelization on the cluster job queue configuration file consolidates project details into a principal reference document (and can reproduce analysis) BioModule interface provides a flexible mechanism for adding new functionality","title":"What is BioLockJ"},{"location":"#biolockj-user-guide","text":"Getting Started Commands Pipeline Componenets the config file Properties Modules the metadata input files Dependencies Features Check Dependencies before pipeline start Failure Recovery Validation Supported Environments Expand BioLockJ by Building Modules BioLockJ API Examples and Templates Example Pipeline FAQ & Troubleshooting","title":"BioLockJ User Guide:"},{"location":"#links-for-developers","text":"BioLockJ Developers Guide Repository of functional tests https://github.com/BioLockJ-Dev-Team/sheepdog_testing_suite The user guide for our latest stable version https://biolockj-dev-team.github.io/BioLockJ/ The user guide for the current development version, and previous stable versions https://biolockj.readthedocs.io/en/latest/ Guidelines for new modules Building Modules","title":"Links for Developers"},{"location":"#citing-biolockj","text":"If you use BioLockJ in your research, you should cite BioLockJ itself AND the tools that make up the pipeline. The majority of BioLockJ modules are wrappers for independent tools. See the summary of your pipeline for citation information from the modules in your pipeline. This information is also available in the modules' documentation. To cite BioLockJ itself, please cite the public project git repository (https://github.com/BioLockJ-Dev-Team/BioLockJ) and author Mike Sioda.","title":"Citing BioLockJ"},{"location":"Building-Modules/","text":"Building New Modules # Any Java class that implements the BioModule interface can be added to a BioLockJ pipeline. The BioLockJ v1.0 implementation is currently focused on metagenomics analysis, but the generalized application framework is not limited to this domain. Users can implement new BioModules to automate a wide variety of bioinformatics and report analytics. The BioModule interface was designed so that users can develop new modules on their own. Beginners # See the BioModule hello world tutorial. Coding your module # To create a new BioModule , simply extend one of the abstract Java superclasses, code it's abstract methods, and add it to your pipeline with #BioModule tag your Config file: # BioModuleImpl : Extend if a more specific interface does not apply ScriptModuleImpl : Extend if your module generates and executes bash scripts JavaModuleImpl : Extend if your module only runs Java code ClassifierModuleImpl : Extend to support a new classifier program ParserModuleImpl : Extend to parse output of a new classifier program R_Module : Extend if your module generates and executes R scripts To support a new classifier, create 3 modules that implement the following interfaces: # ClassifierModule : Implement to generate bash scripts needed to call classifier program ParserModule : Implement to parse classifier output, configure as classifier post-requisite OtuNode : Classifier specific implementation holds OTU information for 1 sequence BioModuleImpl is the top-level superclass for all modules. # Method Description checkDependencies() Must override. Called before executeTask() to identify Configuration errors and perform runtime validations. executeTask() Must override. Executes core module logic. cleanUp() Called after executeTask() to run cleanup operations, update Config properties, etc. getInputFiles() Return previous module output. getModuleDir() Return module root directory. getOutputDir() Return module output directory. getPostRequisiteModules() Returns a list of BioModules to run after the current module. getPreRequisiteModules() Returns a list of BioModules to run before the current module. getSummary() Return output directory summary. Most modules override this method by adding module specific summary details to super.getSummary(). getTempDir() Return module temp directory. setModuleDir(path) Set module directory. ScriptModuleImpl extends BioModuleImpl : superclass for script-generating modules. # Method Description buildScript(files) Must override. Called by executeTask() for datasets with forward reads only. The return type is a list of lists. Each nested list contains the bash script lines required to process 1 sample. Obtains sequence files from getInputFiles(). buildScriptForPairedReads(files) Calls back to buildScript(files) by default. Subclasses override this method to generate unique scripts for datasets containing paired reads. checkDependencies() Called before executeTask() to validate script.batchSize , script.exitOnError , script.numThreads , script.permissions , script.timeout getJobParams() Return shell command to execute the MAIN script. getScriptDir() Return module script directory. getSummary() Adds the script directory summary to super.getSummary(). Most modules override this method by adding module specific summary details to super.getSummary(). getTimeout() Return script.timeout . getWorkerScriptFunctions() Return bash script lines for any functions needed in the worker scripts. JavaModuleImpl extends ScriptModuleImpl : superclass for pure Java modules. # To avoid running code on the cluster head node, a temporary instance of BioLockJ is spawned on a cluster node which is launched by the sole worker script from the job queue. Method Description runModule() Must override. Executes core module logic. buildScript(files) This method returns a single line calling java on the BioLockJ source code, passing -d parameter to run in direct mode and the full class name of the JavaModule to indicate the module to run. getSource() Determines if running code from Jar or source code in order to write valid bash script lines. getTimeout() Return java.timeout . moduleComplete() Create the script success indicator file. moduleFailed() Create the script failures indicator file. ClassifierModuleImpl extends ScriptModuleImpl : biolockj.module.classifier superclass. # Method Description buildScriptForPairedReads(files) Called by executeTask() for datasets with paired reads. The return type is a list of lists, where each nested list contains the bash script lines required to process 1 sample. Obtains sequence files from SeqUtil .getPairedReads(getInputFiles()). checkDependencies() Validate Configuration properties exe.classifier and exe.classifierParams , verify sequence file format, log classifier version info, and verify no biolockj.module.seq modules are configured run after the ClassifierModule . Subclasses should call super.checkDependencies() if overriding this method to retain these verifications. executeTask() Call buildScript(files) or buildScriptForPairedReads(files) based input sequence format and calls BashScriptBuilder to generate the main script + 1 worker script for every script.batchSize samples. To change the batch scheme, override this method to call the alternate BashScriptBuilder .buildScripts() method signiture and hard code the batch size. All biolockj.module.classifier modules override this method. getClassifierExe() Return Configuration property exe.classifier to call the classifier program in the bash scripts. If the classifier is not included in cluster.modules , validate that value is a valid file path. If exe.classifier is undefined, replace the property prefix exe with the lowercase prefix of the module class name (less the standard module suffix classifier ). For example, use rdp.classifier for RdpClassifier and kraken.classifier for KrakenClassifier . This allows users to define all classifier programs in a default Configuration file rather than setting exe.clssifier in each project Configuration file. getClassifierParams() Return Configuration property exe.classifierParams which may contain a list of parameters (without hyphens) to pass to the classifier program in the bash scripts. If exe.classifierParams is undefined, replace the property prefix exe with the lowercase prefix of the module class name as described for exe.classifier . getSummary() Adds input directory summary to super.getSummary(). Most modules override this method to add module specific summary details to super.getSummary(). logVersion() Run exe.classifier --version to log version info. RDP overrides this method to return null since the version switch is not supported. ParserModuleImpl extends JavaModuleImpl : biolockj.module.implicit.parser superclass. # Method Description parseSamples() Must override. Called by executeTask() to populate the Set returned by getParsedSamples(). Each classifier requires a unique parser module to decode its output. This method should iterate through the classifier reports to build OtuNode s for each sample-OTU found in the report. The OtuNode s are stored in a ParsedSample and cached via addParsedSample( ParsedSample ). addParsedSample( sample ) Add the ParsedSample to the Set returned by getParsedSamples(). buildOtuTables() Generate OTU abundance tables from ClassifierModule output. checkDependencies() Validate Configuration properties ( report.minOtuCount , report.minOtuThreshold , report.logBase ) and verify no biolockj.module.classifier modules are configured to run after the ParserModule . executeTask() If report.numHits =Y, add \"Num_Hits\" column to metadata containing the number of reads that map to any OTU for each sample. Calls buildOtuTables() to generate module output. getParsedSample(id) Return the ParsedSample from the the Set returned by getParsedSamples() for a given id. getParsedSamples() Return 1 ParsedSample for each classified sample in the dataset. OtuNodeImpl is the superclass for the biolockj.node package. # Method Description addOtu(level, otu) A node represents a single OTU, each level in the taxonomic hierarchy is populated with this method. getCount() Get the OTU count. getLine() Get the classifier report line used to create the node. getOtuMap() This map may contain 1 element for each of the report.taxonomyLevels and is populated by addOtu(level, otu). getSampleId() Get the sample ID to which the OTU belongs. report() Print node info to log file as DEBUG line - not visible unless pipeline.logLevel=DEBUG . setCount(num) Set the OTU count. setLine(line) Set the classifier report line used to create the node. setSampleId(id) set the sample ID to which the OTU belongs. OtuNodeImpl methods do not need to be overridden. New OtuNode implementations should call existing methods from their constructor. Document your module # The BioLockJ API allows outside resources to get information about the BioLockJ program and any available modules. To interface with the API, your module will need to implement the ApiModule interface . API-generated html documentation # The BioLockJ documentation is stored in markdown files and rendered into html using mkdocs. The BioLockJ API is designed to generate a markdown document, which is ready to be rendered into an html file using mkdocs. Built-in descriptions # Override the getCitationString() method. This should include citation information for any tool that your module wraps and a credit to yourself for creating the wrapper. Override the getDescription() method to return a short description of what your module does, this should be one to two sentences. For a more extensive description, including details about properties, expected inputs, assumptions, etc; override the getDetails() method (optional). If your module has any pre-requisit modules or post-requisit modules, the modules Details should include the names of these modules and information about when and why these modules are added. Documenting Properties # If your module introduces any NEW configuration properties, those properties should registered to the module so the API can retrieve them. Register properties using the addNewProperty() method in the modules constructor. For example, the GenMod module defines three properties: public GenMod() { super(); addNewProperty( PARAM, Properties.STRING_TYPE, \"parameters to pass to the user's script\" ); addNewProperty( SCRIPT, Properties.FILE_PATH, \"path to user script\" ); addNewProperty( LAUNCHER, Properties.STRING_TYPE, LAUNCHER_DESC ); } protected static final String PARAM = \"genMod.param\"; protected static final String SCRIPT = \"genMod.scriptPath\"; /** * {@link biolockj.Config} property: {@value #LAUNCHER}<br> * {@value #LAUNCHER_DESC} */ protected static final String LAUNCHER = \"genMod.launcher\"; private static final String LAUNCHER_DESC = \"Define executable language command if it is not included in your $PATH\"; In this example, the descriptions for PARAM and SCRIPT are written in the addNewProperty() method. The description for LAUNCHER is stored as its own string ( LAUNCHER_DESC ), and that string is referenced in the addNewProperty method and in the javadoc description for LAUNCHER . This rather verbose option IS NOT necissary, but it allows the description to be viewed through the api AND through javadocs, and IDE's; this is appropriate if you expect other classes to use the properties defined in your module. The descriptions for properties should be brief. Additional details such as interactions between properties or the effects of different values should be part of the getDetails() method. It should always be clear to a user what will happen if the value is \"null\". If there is a logical default for the property, that can passed as an additional argument to addNewProperty() . This value will only be used if there is no value given for the property in the config file (including any defaultProps layers and standard.properties). If your module uses any general properties (beyond any uses by the the super class), then you should register it in the module's constructor using the addGeneralProperty() method. public QiimeClosedRefClassifier() { super(); addGeneralProperty( Constants.EXE_AWK ); } The existing description and type for this property (defined in biolockj.Properties) will be returned if the module is queried about this property. For a list of general properties, run: biolockj_api listProps Finally, to very polished, you should override the isValidProp() method. Be sure to include the call to super. @Override public Boolean isValidProp( String property ) throws Exception { Boolean isValid = super.isValidProp( property ); switch(property) { case HN2_KEEP_UNINTEGRATED: try {Config.getBoolean( this, HN2_KEEP_UNINTEGRATED );} catch(Exception e) { isValid = false; } isValid = true; break; case HN2_KEEP_UNMAPPED: try {Config.getBoolean( this, HN2_KEEP_UNMAPPED );} catch(Exception e) { isValid = false; } isValid = true; break; } return isValid; } In the example above, the Humann2Parser module uses two properties that are not used by any super class. The call to super.isValidProp( property ) tests the property if it is used by a super class. This class only adds checks for its newly defined properties. Any property that is not tested, but is registered in the modules constructor will return true. This method is called through the API, and should be used to test one property at a time as if that is the only property in the config file. Tests to make sure that multiple properties are compatiable with each other should go in the checkDependencies() method. Generate user guide pages # For modules in the main BioLockJ project, the user guide pages are generated using the ApiModule methods as part of the deploy process. Third party developers can use the same utilities to create matching documentation. Suppose you have created one or more modules in a package com.joesCode and saved the compiled code in a jar file, /Users/joe/dev/JoesMods.jar . Set up a mkdocs project: # See https://www.mkdocs.org/#installation pip install mkdocs mkdocs --version mkdocs new joes-modules mkdir joes-modules/docs/GENERATED This mkdocs project will render markdown (.md) files into an html site. Mkdocs supports a lot of really nice features, including a very nice default template. Generate the .md files from your modules: java -cp $BLJ/dist/BioLockJ.jar:/Users/joe/dev/JoesMods.jar \\ biolockj.api.BuildDocs \\ joes-modules/docs/GENERATED \\ com.joesCode Put a link to your list of modules in the main index page. cd joes-modules echo \"[view module list](GENERATED/all-modules.md)\" >> docs/index.md The BuildDocs utility creates the .md files, but it assumes that these are part of a larger project, and you will need to make appropriate links to the generated pages from your main page. Preview your user guide: mkdocs serve Open up http://127.0.0.1:8000/ in your browser, and you'll see the default home page being displayed, with a link at the bottom to view module list , which links to a page listing all of the modules in the joes.modules pacakge. You can build this documentation locally using mkdocs build and then push to your prefered hosting site, or set up a service such as ReadTheDocs to render and host your documentation from your docs folder. Even if you choose not to build user guide pages for your module, you should still implement the ApiModule interface. Anyone who uses your module can generate the user guide pages if they want them, and even incorporate them into a custom copy of the main BioLockJ user guide. Any other support program, such as a GUI, could make use the the ApiModule methods as well. Using External Modules # To use a module that you have created yourself or aquired from a third party, you need to: Save the compiled code in a folder on your machine, for example: /Users/joe/biolockjModules/JoesMods.jar Include your module in the module run order in your config file, for example: #BioModule com.joesCode.biolockj.RunTool Be sure to include any properties your module needs in the config file. Use the --external-modules <dir> option when you call biolockj: biolockj --external-modules /Users/joe/biolockjModules myPipeline.properties Any other modules you have made or aquired can also be in the /Users/joe/biolockjModules folder. Finding and Sharing Modules # The official repository for external BioLockJ modules is blj_ext_modules . Each module has a folder at the top level of the repository and should include the java code as well a config file to test the module alone, a test file to run a multi-module pipeline that includes the module, and (where applicable) a dockerfile. This is work in progress.","title":"Building Modules"},{"location":"Building-Modules/#building-new-modules","text":"Any Java class that implements the BioModule interface can be added to a BioLockJ pipeline. The BioLockJ v1.0 implementation is currently focused on metagenomics analysis, but the generalized application framework is not limited to this domain. Users can implement new BioModules to automate a wide variety of bioinformatics and report analytics. The BioModule interface was designed so that users can develop new modules on their own.","title":"Building New Modules"},{"location":"Building-Modules/#beginners","text":"See the BioModule hello world tutorial.","title":"Beginners"},{"location":"Building-Modules/#coding-your-module","text":"To create a new BioModule , simply extend one of the abstract Java superclasses, code it's abstract methods, and add it to your pipeline with #BioModule tag your Config file:","title":"Coding your module"},{"location":"Building-Modules/#to-support-a-new-classifier-create-3-modules-that-implement-the-following-interfaces","text":"ClassifierModule : Implement to generate bash scripts needed to call classifier program ParserModule : Implement to parse classifier output, configure as classifier post-requisite OtuNode : Classifier specific implementation holds OTU information for 1 sequence","title":"To support a new classifier, create 3 modules that implement the following interfaces:"},{"location":"Building-Modules/#biomoduleimpl-is-the-top-level-superclass-for-all-modules","text":"Method Description checkDependencies() Must override. Called before executeTask() to identify Configuration errors and perform runtime validations. executeTask() Must override. Executes core module logic. cleanUp() Called after executeTask() to run cleanup operations, update Config properties, etc. getInputFiles() Return previous module output. getModuleDir() Return module root directory. getOutputDir() Return module output directory. getPostRequisiteModules() Returns a list of BioModules to run after the current module. getPreRequisiteModules() Returns a list of BioModules to run before the current module. getSummary() Return output directory summary. Most modules override this method by adding module specific summary details to super.getSummary(). getTempDir() Return module temp directory. setModuleDir(path) Set module directory.","title":"BioModuleImpl is the top-level superclass for all modules."},{"location":"Building-Modules/#scriptmoduleimpl-extends-biomoduleimpl-superclass-for-script-generating-modules","text":"Method Description buildScript(files) Must override. Called by executeTask() for datasets with forward reads only. The return type is a list of lists. Each nested list contains the bash script lines required to process 1 sample. Obtains sequence files from getInputFiles(). buildScriptForPairedReads(files) Calls back to buildScript(files) by default. Subclasses override this method to generate unique scripts for datasets containing paired reads. checkDependencies() Called before executeTask() to validate script.batchSize , script.exitOnError , script.numThreads , script.permissions , script.timeout getJobParams() Return shell command to execute the MAIN script. getScriptDir() Return module script directory. getSummary() Adds the script directory summary to super.getSummary(). Most modules override this method by adding module specific summary details to super.getSummary(). getTimeout() Return script.timeout . getWorkerScriptFunctions() Return bash script lines for any functions needed in the worker scripts.","title":"ScriptModuleImpl extends BioModuleImpl:  superclass for script-generating modules."},{"location":"Building-Modules/#javamoduleimpl-extends-scriptmoduleimpl-superclass-for-pure-java-modules","text":"To avoid running code on the cluster head node, a temporary instance of BioLockJ is spawned on a cluster node which is launched by the sole worker script from the job queue. Method Description runModule() Must override. Executes core module logic. buildScript(files) This method returns a single line calling java on the BioLockJ source code, passing -d parameter to run in direct mode and the full class name of the JavaModule to indicate the module to run. getSource() Determines if running code from Jar or source code in order to write valid bash script lines. getTimeout() Return java.timeout . moduleComplete() Create the script success indicator file. moduleFailed() Create the script failures indicator file.","title":"JavaModuleImpl extends ScriptModuleImpl: superclass for pure Java modules."},{"location":"Building-Modules/#classifiermoduleimpl-extends-scriptmoduleimpl-biolockjmoduleclassifier-superclass","text":"Method Description buildScriptForPairedReads(files) Called by executeTask() for datasets with paired reads. The return type is a list of lists, where each nested list contains the bash script lines required to process 1 sample. Obtains sequence files from SeqUtil .getPairedReads(getInputFiles()). checkDependencies() Validate Configuration properties exe.classifier and exe.classifierParams , verify sequence file format, log classifier version info, and verify no biolockj.module.seq modules are configured run after the ClassifierModule . Subclasses should call super.checkDependencies() if overriding this method to retain these verifications. executeTask() Call buildScript(files) or buildScriptForPairedReads(files) based input sequence format and calls BashScriptBuilder to generate the main script + 1 worker script for every script.batchSize samples. To change the batch scheme, override this method to call the alternate BashScriptBuilder .buildScripts() method signiture and hard code the batch size. All biolockj.module.classifier modules override this method. getClassifierExe() Return Configuration property exe.classifier to call the classifier program in the bash scripts. If the classifier is not included in cluster.modules , validate that value is a valid file path. If exe.classifier is undefined, replace the property prefix exe with the lowercase prefix of the module class name (less the standard module suffix classifier ). For example, use rdp.classifier for RdpClassifier and kraken.classifier for KrakenClassifier . This allows users to define all classifier programs in a default Configuration file rather than setting exe.clssifier in each project Configuration file. getClassifierParams() Return Configuration property exe.classifierParams which may contain a list of parameters (without hyphens) to pass to the classifier program in the bash scripts. If exe.classifierParams is undefined, replace the property prefix exe with the lowercase prefix of the module class name as described for exe.classifier . getSummary() Adds input directory summary to super.getSummary(). Most modules override this method to add module specific summary details to super.getSummary(). logVersion() Run exe.classifier --version to log version info. RDP overrides this method to return null since the version switch is not supported.","title":"ClassifierModuleImpl extends ScriptModuleImpl: biolockj.module.classifier superclass."},{"location":"Building-Modules/#parsermoduleimpl-extends-javamoduleimpl-biolockjmoduleimplicitparser-superclass","text":"Method Description parseSamples() Must override. Called by executeTask() to populate the Set returned by getParsedSamples(). Each classifier requires a unique parser module to decode its output. This method should iterate through the classifier reports to build OtuNode s for each sample-OTU found in the report. The OtuNode s are stored in a ParsedSample and cached via addParsedSample( ParsedSample ). addParsedSample( sample ) Add the ParsedSample to the Set returned by getParsedSamples(). buildOtuTables() Generate OTU abundance tables from ClassifierModule output. checkDependencies() Validate Configuration properties ( report.minOtuCount , report.minOtuThreshold , report.logBase ) and verify no biolockj.module.classifier modules are configured to run after the ParserModule . executeTask() If report.numHits =Y, add \"Num_Hits\" column to metadata containing the number of reads that map to any OTU for each sample. Calls buildOtuTables() to generate module output. getParsedSample(id) Return the ParsedSample from the the Set returned by getParsedSamples() for a given id. getParsedSamples() Return 1 ParsedSample for each classified sample in the dataset.","title":"ParserModuleImpl extends JavaModuleImpl: biolockj.module.implicit.parser superclass."},{"location":"Building-Modules/#otunodeimpl-is-the-superclass-for-the-biolockjnode-package","text":"Method Description addOtu(level, otu) A node represents a single OTU, each level in the taxonomic hierarchy is populated with this method. getCount() Get the OTU count. getLine() Get the classifier report line used to create the node. getOtuMap() This map may contain 1 element for each of the report.taxonomyLevels and is populated by addOtu(level, otu). getSampleId() Get the sample ID to which the OTU belongs. report() Print node info to log file as DEBUG line - not visible unless pipeline.logLevel=DEBUG . setCount(num) Set the OTU count. setLine(line) Set the classifier report line used to create the node. setSampleId(id) set the sample ID to which the OTU belongs. OtuNodeImpl methods do not need to be overridden. New OtuNode implementations should call existing methods from their constructor.","title":"OtuNodeImpl is the superclass for the biolockj.node package."},{"location":"Building-Modules/#document-your-module","text":"The BioLockJ API allows outside resources to get information about the BioLockJ program and any available modules. To interface with the API, your module will need to implement the ApiModule interface .","title":"Document your module"},{"location":"Building-Modules/#api-generated-html-documentation","text":"The BioLockJ documentation is stored in markdown files and rendered into html using mkdocs. The BioLockJ API is designed to generate a markdown document, which is ready to be rendered into an html file using mkdocs.","title":"API-generated html documentation"},{"location":"Building-Modules/#built-in-descriptions","text":"Override the getCitationString() method. This should include citation information for any tool that your module wraps and a credit to yourself for creating the wrapper. Override the getDescription() method to return a short description of what your module does, this should be one to two sentences. For a more extensive description, including details about properties, expected inputs, assumptions, etc; override the getDetails() method (optional). If your module has any pre-requisit modules or post-requisit modules, the modules Details should include the names of these modules and information about when and why these modules are added.","title":"Built-in descriptions"},{"location":"Building-Modules/#documenting-properties","text":"If your module introduces any NEW configuration properties, those properties should registered to the module so the API can retrieve them. Register properties using the addNewProperty() method in the modules constructor. For example, the GenMod module defines three properties: public GenMod() { super(); addNewProperty( PARAM, Properties.STRING_TYPE, \"parameters to pass to the user's script\" ); addNewProperty( SCRIPT, Properties.FILE_PATH, \"path to user script\" ); addNewProperty( LAUNCHER, Properties.STRING_TYPE, LAUNCHER_DESC ); } protected static final String PARAM = \"genMod.param\"; protected static final String SCRIPT = \"genMod.scriptPath\"; /** * {@link biolockj.Config} property: {@value #LAUNCHER}<br> * {@value #LAUNCHER_DESC} */ protected static final String LAUNCHER = \"genMod.launcher\"; private static final String LAUNCHER_DESC = \"Define executable language command if it is not included in your $PATH\"; In this example, the descriptions for PARAM and SCRIPT are written in the addNewProperty() method. The description for LAUNCHER is stored as its own string ( LAUNCHER_DESC ), and that string is referenced in the addNewProperty method and in the javadoc description for LAUNCHER . This rather verbose option IS NOT necissary, but it allows the description to be viewed through the api AND through javadocs, and IDE's; this is appropriate if you expect other classes to use the properties defined in your module. The descriptions for properties should be brief. Additional details such as interactions between properties or the effects of different values should be part of the getDetails() method. It should always be clear to a user what will happen if the value is \"null\". If there is a logical default for the property, that can passed as an additional argument to addNewProperty() . This value will only be used if there is no value given for the property in the config file (including any defaultProps layers and standard.properties). If your module uses any general properties (beyond any uses by the the super class), then you should register it in the module's constructor using the addGeneralProperty() method. public QiimeClosedRefClassifier() { super(); addGeneralProperty( Constants.EXE_AWK ); } The existing description and type for this property (defined in biolockj.Properties) will be returned if the module is queried about this property. For a list of general properties, run: biolockj_api listProps Finally, to very polished, you should override the isValidProp() method. Be sure to include the call to super. @Override public Boolean isValidProp( String property ) throws Exception { Boolean isValid = super.isValidProp( property ); switch(property) { case HN2_KEEP_UNINTEGRATED: try {Config.getBoolean( this, HN2_KEEP_UNINTEGRATED );} catch(Exception e) { isValid = false; } isValid = true; break; case HN2_KEEP_UNMAPPED: try {Config.getBoolean( this, HN2_KEEP_UNMAPPED );} catch(Exception e) { isValid = false; } isValid = true; break; } return isValid; } In the example above, the Humann2Parser module uses two properties that are not used by any super class. The call to super.isValidProp( property ) tests the property if it is used by a super class. This class only adds checks for its newly defined properties. Any property that is not tested, but is registered in the modules constructor will return true. This method is called through the API, and should be used to test one property at a time as if that is the only property in the config file. Tests to make sure that multiple properties are compatiable with each other should go in the checkDependencies() method.","title":"Documenting Properties"},{"location":"Building-Modules/#generate-user-guide-pages","text":"For modules in the main BioLockJ project, the user guide pages are generated using the ApiModule methods as part of the deploy process. Third party developers can use the same utilities to create matching documentation. Suppose you have created one or more modules in a package com.joesCode and saved the compiled code in a jar file, /Users/joe/dev/JoesMods.jar . Set up a mkdocs project: # See https://www.mkdocs.org/#installation pip install mkdocs mkdocs --version mkdocs new joes-modules mkdir joes-modules/docs/GENERATED This mkdocs project will render markdown (.md) files into an html site. Mkdocs supports a lot of really nice features, including a very nice default template. Generate the .md files from your modules: java -cp $BLJ/dist/BioLockJ.jar:/Users/joe/dev/JoesMods.jar \\ biolockj.api.BuildDocs \\ joes-modules/docs/GENERATED \\ com.joesCode Put a link to your list of modules in the main index page. cd joes-modules echo \"[view module list](GENERATED/all-modules.md)\" >> docs/index.md The BuildDocs utility creates the .md files, but it assumes that these are part of a larger project, and you will need to make appropriate links to the generated pages from your main page. Preview your user guide: mkdocs serve Open up http://127.0.0.1:8000/ in your browser, and you'll see the default home page being displayed, with a link at the bottom to view module list , which links to a page listing all of the modules in the joes.modules pacakge. You can build this documentation locally using mkdocs build and then push to your prefered hosting site, or set up a service such as ReadTheDocs to render and host your documentation from your docs folder. Even if you choose not to build user guide pages for your module, you should still implement the ApiModule interface. Anyone who uses your module can generate the user guide pages if they want them, and even incorporate them into a custom copy of the main BioLockJ user guide. Any other support program, such as a GUI, could make use the the ApiModule methods as well.","title":"Generate user guide pages"},{"location":"Building-Modules/#using-external-modules","text":"To use a module that you have created yourself or aquired from a third party, you need to: Save the compiled code in a folder on your machine, for example: /Users/joe/biolockjModules/JoesMods.jar Include your module in the module run order in your config file, for example: #BioModule com.joesCode.biolockj.RunTool Be sure to include any properties your module needs in the config file. Use the --external-modules <dir> option when you call biolockj: biolockj --external-modules /Users/joe/biolockjModules myPipeline.properties Any other modules you have made or aquired can also be in the /Users/joe/biolockjModules folder.","title":"Using External Modules"},{"location":"Building-Modules/#finding-and-sharing-modules","text":"The official repository for external BioLockJ modules is blj_ext_modules . Each module has a folder at the top level of the repository and should include the java code as well a config file to test the module alone, a test file to run a multi-module pipeline that includes the module, and (where applicable) a dockerfile. This is work in progress.","title":"Finding and Sharing Modules"},{"location":"Built-in-modules/","text":"BioModules # Some modules are packaged with BioLockJ (see below). To use modules created by a third-party, add the compiled files (jar file) to your biolockj extentions folder. When you call biolockj , use the --external-modules arg to pass in the location of the extra modules: biolockj --external-modules </path/to/extentions/folder> <config.properties> To create your own modules, see Building-Modules . In all cases, add modules to your BioModule order section to include them in your pipeline. Built-in BioModules: # classifiers # r16s classifiers wgs classifiers implicit modules # implicit parsers module.implicit.parser.r16s.md module.implicit.parser.wgs.md implicit qiime modules report modules # humann2 report by otu report by taxon R reports taxa table modules # BuildTaxaTables AddPseudoCount NormalizeTaxaTables NormalizeByReadsPerMillion LogTransformTaxaTables AddMetadataToTaxaTables sequence modules # BioLockJ comes packaged with several modules for sequence pre-processing. AwkFastaConverter Gunzipper KneadData Multiplexer PearMergeReads RarefySeqs SeqFileValidator TrimPrimers DIY modules # GenMod Rmarkdown List All # See generated docs for all modules .","title":"BioModules"},{"location":"Built-in-modules/#biomodules","text":"Some modules are packaged with BioLockJ (see below). To use modules created by a third-party, add the compiled files (jar file) to your biolockj extentions folder. When you call biolockj , use the --external-modules arg to pass in the location of the extra modules: biolockj --external-modules </path/to/extentions/folder> <config.properties> To create your own modules, see Building-Modules . In all cases, add modules to your BioModule order section to include them in your pipeline.","title":"BioModules"},{"location":"Built-in-modules/#built-in-biomodules","text":"","title":"Built-in BioModules:"},{"location":"Built-in-modules/#classifiers","text":"r16s classifiers wgs classifiers","title":"classifiers"},{"location":"Built-in-modules/#implicit-modules","text":"implicit parsers module.implicit.parser.r16s.md module.implicit.parser.wgs.md implicit qiime modules","title":"implicit modules"},{"location":"Built-in-modules/#report-modules","text":"humann2 report by otu report by taxon R reports","title":"report modules"},{"location":"Built-in-modules/#taxa-table-modules","text":"BuildTaxaTables AddPseudoCount NormalizeTaxaTables NormalizeByReadsPerMillion LogTransformTaxaTables AddMetadataToTaxaTables","title":"taxa table modules"},{"location":"Built-in-modules/#sequence-modules","text":"BioLockJ comes packaged with several modules for sequence pre-processing. AwkFastaConverter Gunzipper KneadData Multiplexer PearMergeReads RarefySeqs SeqFileValidator TrimPrimers","title":"sequence modules"},{"location":"Built-in-modules/#diy-modules","text":"GenMod Rmarkdown","title":"DIY modules"},{"location":"Built-in-modules/#list-all","text":"See generated docs for all modules .","title":"List All"},{"location":"Check-Dependencies/","text":"BioLockJ is designed find all problems in one sitting. Every module includes a check dependencies method, which quickly detects issues that would cause an error during execution. This is run for all modules in a pipeline before the first module executes. When BioLockJ runs, it has three major phases: pipeline formation - string together the modues specified in the config file along with any additional modules that the program adds on the users behalf; and initiate the utilities needed for the pipeline (such as docker, metadata, determine input type). check dependencies - scan the pipeline for anything that may cause an error during execution run pipeline - execute each module in the sequence. Precheck a pipeline # By including the --precheck-only argument (or -p ) when running biolockj ; you are running in precheck mode. BioLockJ will do the first two phases, and then stop. This allows you to quickly test changes to your pipeline configuration without actually running a pipeline. It also allows you to see any modules that are automatically added to your pipeline.","title":"Check Dependencies"},{"location":"Check-Dependencies/#precheck-a-pipeline","text":"By including the --precheck-only argument (or -p ) when running biolockj ; you are running in precheck mode. BioLockJ will do the first two phases, and then stop. This allows you to quickly test changes to your pipeline configuration without actually running a pipeline. It also allows you to see any modules that are automatically added to your pipeline.","title":"Precheck a pipeline"},{"location":"Commands/","text":"The BioLockJ program is launched through the biolockj script. See biolockj --help . Support programs can access information about BioLockJ modules and properties through biolockj-api . There are also several helper scripts for small specific tasks, these are all found under $BLJ/script and added to the $PATH after the basic installation: Bash Commands # Command Description last-pipeline Get the path to the most recent pipeline. ideal for: cd $(last-pipeline) ls `last-pipeline` cd-blj Go to most recent pipeline & list contents. This is not a script, it is an alias that is added to your bash profile by the install script. The line defining it should look like: alias cd-blj='cd $(last-pipeline); quick_pipeline_view' quick_pipeline_view essentially just pwd and ls ; designed for the cd-blj alias. blj_reset Reset pipeline status to incomplete. If restarted, execution will start with the current module. Deprecated Commands # Command Description (Replacement) blj_log Tail last 1K lines from current or most recent pipeline log file. Replacement : cd $(last-pipeline); tail -1000 *.log blj_summary Print current or most recent pipeline summary. Replacement : cd $(last-pipeline); cat summary.txt blj_complete Manually completes the current module and pipeline status. This functionality should never be needed. For the rare occasions when it is appropriate, it can be done manually. Replacement : touch biolockjComplete blj_reset Reset pipeline status to incomplete. If restarted, execution will start with the current module. The need for this functionality is common; and a bash wrapper script still exists. Alternative : java -cp ${BLJ}/dist/BioLockJ.jar biolockj.launch.Reset ${PWD} blj_download If on cluster, extract and print the command syntax from the summary.txt file to download pipeline results to your local workstation directory: pipeline.downloadDir . no replacement : You will need to review your pipelines summary file to find the download command.","title":"Commands"},{"location":"Commands/#bash-commands","text":"Command Description last-pipeline Get the path to the most recent pipeline. ideal for: cd $(last-pipeline) ls `last-pipeline` cd-blj Go to most recent pipeline & list contents. This is not a script, it is an alias that is added to your bash profile by the install script. The line defining it should look like: alias cd-blj='cd $(last-pipeline); quick_pipeline_view' quick_pipeline_view essentially just pwd and ls ; designed for the cd-blj alias. blj_reset Reset pipeline status to incomplete. If restarted, execution will start with the current module.","title":"Bash Commands"},{"location":"Commands/#deprecated-commands","text":"Command Description (Replacement) blj_log Tail last 1K lines from current or most recent pipeline log file. Replacement : cd $(last-pipeline); tail -1000 *.log blj_summary Print current or most recent pipeline summary. Replacement : cd $(last-pipeline); cat summary.txt blj_complete Manually completes the current module and pipeline status. This functionality should never be needed. For the rare occasions when it is appropriate, it can be done manually. Replacement : touch biolockjComplete blj_reset Reset pipeline status to incomplete. If restarted, execution will start with the current module. The need for this functionality is common; and a bash wrapper script still exists. Alternative : java -cp ${BLJ}/dist/BioLockJ.jar biolockj.launch.Reset ${PWD} blj_download If on cluster, extract and print the command syntax from the summary.txt file to download pipeline results to your local workstation directory: pipeline.downloadDir . no replacement : You will need to review your pipelines summary file to find the download command.","title":"Deprecated Commands"},{"location":"Configuration/","text":"A configuration file encapsulates an analysis pipeline. BioLockJ takes a single configuration file as a runtime parameter. biolockj config.properties Every line in a BioLockJ configuration file is one of: BioModule (line starts with #BioModule ) comment (all other lines that start with # , has no effect) property ( name=value ) BioModule execution order # To include a BioModule in your pipeline, add a #BioModule line to the top your configuration file, as shown in the examples found in templates . Each line has the #BioModule keyword followed by the path for that module. For example: #BioModule biolockj.module.seq.PearMergeReads #BioModule biolockj.module.classifier.wgs.Kraken2Classifier #BioModule biolockj.module.report.r.R_PlotMds This line is given at the top of the user guide page for each module. BioModules will be executed in the order they are listed in here. A typical pipeline contians one classifier module . Any number of sequence pre-processing modules may come before the classifier module. Any number of report modules may come after the classifier module. In addition to the BioModules specified in the configuration file, BioLockJ may add implicit modules that the are required by specified modules. See Example Pipeline . A module can be given an alias by using the AS keyword in its execution line: #BioModule biolockj.module.seq.PearMergeReads AS Pear This is is generally used for modules that are used more than once in the same pipeline. Given this alias, the folder for this module will be called 01_Pear instead of 01_PearMergeReads , and any general properties directed to this module would use the prefix Pear instead of PearMergedReads . An alias must start with a capital letter, and cannot duplicate a name/alias of any other module in the same pipeline. Properties # Properties are defined as name-value pairs. List-values are comma separated. Leading and trailing whitespace is removed so \"propName=x,y\" is equivalent to \"propName = x, y\". See the list of available properties . Variables # Bash variables can be referenced in the config. They must be \"fully dressed\": ${VAR} There are two variables that BioLockJ requires: BLJ is the file path to the BioLockJ directory and BLJ_PROJ is the directory where pipelines created by BioLockJ are stored and run. After installation these are defined in the shell profile. These can referenced in the config file. The ~ (\"tilde\") is replaced with ${HOME} if (and only if) the ~ is the first character. Variables can also be defined in the config file and referenced in the same way: DIR=/path/to/big/data/dir sra.destinationDir=${DIR}/seqs sra.sraAccList=${DIR}/SraAccList.txt input.dirPaths=${DIR}/seqs Variables that are defined in the config file, can be referenced within the config file, however these variables are not added to the module script environment. If you are referencing environment variables and running in docker, you will need to use the -e parameter to biolockj to pass the variables into the docker environment (even if the variable is defined in the config file). For example: biolockj --docker -e SHEP=$SHEP,DIR=/path/to/big/data/dir config.properties Most environement variables will NOT be part of the module script environment. However, any environment variable that is referenced in the configuration file is considered necissary for the pipeline, and it is passed into the main program environment, docker containers, module runtime enviroments. Environment variables are not the best way to get information to a script because they can be difficult to trace / troubleshoot. However if your script or tool requires a particular environment variable, you can define it in your local environment, and reference it in the config file using an arbitrary property name, for example: my.variable=${QIIME_CONFIG_FP} This has essentially the same effect as using the -e QIIME_CONFIG_FP=$QIIME_CONFIG_FP argument in the biolockj command. If this variable is required this is one way to communicate that the value of QIIME_CONFIG_FP may change from one system to the next, but the existence of QIIME_CONFIG_FP is essential for the pipeline to run. Relative file paths # File paths can be given using relative paths. The path should start with ./ . The location . is interpreted as being the directory where the primary configuration file is. Example file structure: /users/joe/analysis01/ config.properties metadata.txt /sra/ SraAccList.txt Properties in config.properties can use relative paths: metadata.filePath=./metadata.txt sra.sraAccList=./sra/SraAccList.txt Note: ../ is also supported but it does not stack ( ../../../data/ is not supported). With this design, the \"analysis01\" folder could be shared or moved and the configuration file would not need to be updated to reflect the new location of the project files it references. Special properties # Some properties invoke special handling. pipeline.defaultProps # pipeline.defaultProps is a handled before any other property. It is used to link another properties file. The properties from that file are added to the MASTER set. The pipeline.defaultProps property itself is not included in the MASTER properties set. Module-specific forms # Many pipeline properties (usually those used by pipeline utilities) can be directed to a specific module. For example, script.numThreads is a general property that specifies that number of threads alloted to each script launched by any module; and PearMergeReads.numThreads overrides that property ONLY for the PearMergeReads module. exe.* properties # exe. properties are used to specify the path to common executables. exe. properties are special in that they have the automatic default of returning the the property name minus the exe. -prefex as their value. Modules are sometimes written to use a common tool, such as Rscript or bowtie . These modules will write scripts with the assumption that this command is on the $PATH when the script is executed UNLESS exe.Rscript is given specifying a path to use. The exe. properties are often specified in a defaultProps file for a given environment rather than in individual project properties files. Most often, docker containers are used because of the executables baked into them, and any exe. configurations are only applicable when not runnig in docker. In a pipeline running in docker, all references to an exe. property will return the default value (by removing the exe. prefix), regardless of how the exe. property is configured. In the rare case where you do need to give the path to executable within a container, you can specify this by using the prefix dockerExe. in place of exe. . In the even rarer case where you want to use an executable from your local machine, while running a pipeline in docker, you can specify this by using the prefix hostExe. in place of exe. . Chaining configuration files # Although all properties can be configured in one file, we recommend chaining default files through the pipeline.defaultProps option. This can often improve the portability, maintainability, and readability of the project-specific configuration files. Standard Properties # BioLockJ will always apply the standard.properties file packaged with BioLockJ under resources/config/default/ ; you do not need to specify this file in your pipeline.defaultProps chain. IFF running a pipeline in docker, then BioLockJ will apply the docker.properties file packaged with BioLockJ under resources/config/default/ . User-specified Defaults # We recommend creating an environment.properties file to assign envionment-specific defaults. Set cluster & script properties Set paths to key executables through exe properties Override standard.properties as needed. This information is the same for many (or all) projects run in this environment, and entering the info anew for each project is tedious, time-consuming and error-prone. If using a shared system, consider using a user.properties file. Set user-specific properties such as download.dir and mail.to. For shared projects, use a path that will be updated per-user, such as ~/biolock_user.properties Other logical intermediates my also present themselves. For example, some group of projects may need to override several of the defaults set in environmment.properties, but others still use the those defaults. Projects in this set can use pipeline.defaultProps=group2.properties and the group2.properties files may include pipeline.defaultProps=environment.properties Project Properties # Create a new configuration file for each pipeline to assign project-specific properties: Set the BioModule execution order Set pipeline.defaultProps = environment.properties You may use multiple default config files: pipeline.defaultProps=environment.properties,groupSettings.properties Override environment.properties and standard.properties as needed Example project configuration files can be found in templates . If the same property is given in multiple config files, the highest priority goes to the file used to launch the pipeline. Standard.properties always has the lowest priority. A copy of each configuration file is stored in the pipeline root directory to serve as primary project documentation.","title":"Configuration"},{"location":"Configuration/#biomodule-execution-order","text":"To include a BioModule in your pipeline, add a #BioModule line to the top your configuration file, as shown in the examples found in templates . Each line has the #BioModule keyword followed by the path for that module. For example: #BioModule biolockj.module.seq.PearMergeReads #BioModule biolockj.module.classifier.wgs.Kraken2Classifier #BioModule biolockj.module.report.r.R_PlotMds This line is given at the top of the user guide page for each module. BioModules will be executed in the order they are listed in here. A typical pipeline contians one classifier module . Any number of sequence pre-processing modules may come before the classifier module. Any number of report modules may come after the classifier module. In addition to the BioModules specified in the configuration file, BioLockJ may add implicit modules that the are required by specified modules. See Example Pipeline . A module can be given an alias by using the AS keyword in its execution line: #BioModule biolockj.module.seq.PearMergeReads AS Pear This is is generally used for modules that are used more than once in the same pipeline. Given this alias, the folder for this module will be called 01_Pear instead of 01_PearMergeReads , and any general properties directed to this module would use the prefix Pear instead of PearMergedReads . An alias must start with a capital letter, and cannot duplicate a name/alias of any other module in the same pipeline.","title":"BioModule execution order"},{"location":"Configuration/#properties","text":"Properties are defined as name-value pairs. List-values are comma separated. Leading and trailing whitespace is removed so \"propName=x,y\" is equivalent to \"propName = x, y\". See the list of available properties .","title":"Properties"},{"location":"Configuration/#variables","text":"Bash variables can be referenced in the config. They must be \"fully dressed\": ${VAR} There are two variables that BioLockJ requires: BLJ is the file path to the BioLockJ directory and BLJ_PROJ is the directory where pipelines created by BioLockJ are stored and run. After installation these are defined in the shell profile. These can referenced in the config file. The ~ (\"tilde\") is replaced with ${HOME} if (and only if) the ~ is the first character. Variables can also be defined in the config file and referenced in the same way: DIR=/path/to/big/data/dir sra.destinationDir=${DIR}/seqs sra.sraAccList=${DIR}/SraAccList.txt input.dirPaths=${DIR}/seqs Variables that are defined in the config file, can be referenced within the config file, however these variables are not added to the module script environment. If you are referencing environment variables and running in docker, you will need to use the -e parameter to biolockj to pass the variables into the docker environment (even if the variable is defined in the config file). For example: biolockj --docker -e SHEP=$SHEP,DIR=/path/to/big/data/dir config.properties Most environement variables will NOT be part of the module script environment. However, any environment variable that is referenced in the configuration file is considered necissary for the pipeline, and it is passed into the main program environment, docker containers, module runtime enviroments. Environment variables are not the best way to get information to a script because they can be difficult to trace / troubleshoot. However if your script or tool requires a particular environment variable, you can define it in your local environment, and reference it in the config file using an arbitrary property name, for example: my.variable=${QIIME_CONFIG_FP} This has essentially the same effect as using the -e QIIME_CONFIG_FP=$QIIME_CONFIG_FP argument in the biolockj command. If this variable is required this is one way to communicate that the value of QIIME_CONFIG_FP may change from one system to the next, but the existence of QIIME_CONFIG_FP is essential for the pipeline to run.","title":"Variables"},{"location":"Configuration/#relative-file-paths","text":"File paths can be given using relative paths. The path should start with ./ . The location . is interpreted as being the directory where the primary configuration file is. Example file structure: /users/joe/analysis01/ config.properties metadata.txt /sra/ SraAccList.txt Properties in config.properties can use relative paths: metadata.filePath=./metadata.txt sra.sraAccList=./sra/SraAccList.txt Note: ../ is also supported but it does not stack ( ../../../data/ is not supported). With this design, the \"analysis01\" folder could be shared or moved and the configuration file would not need to be updated to reflect the new location of the project files it references.","title":"Relative file paths"},{"location":"Configuration/#special-properties","text":"Some properties invoke special handling.","title":"Special properties"},{"location":"Configuration/#pipelinedefaultprops","text":"pipeline.defaultProps is a handled before any other property. It is used to link another properties file. The properties from that file are added to the MASTER set. The pipeline.defaultProps property itself is not included in the MASTER properties set.","title":"pipeline.defaultProps"},{"location":"Configuration/#module-specific-forms","text":"Many pipeline properties (usually those used by pipeline utilities) can be directed to a specific module. For example, script.numThreads is a general property that specifies that number of threads alloted to each script launched by any module; and PearMergeReads.numThreads overrides that property ONLY for the PearMergeReads module.","title":"Module-specific forms"},{"location":"Configuration/#exe-properties","text":"exe. properties are used to specify the path to common executables. exe. properties are special in that they have the automatic default of returning the the property name minus the exe. -prefex as their value. Modules are sometimes written to use a common tool, such as Rscript or bowtie . These modules will write scripts with the assumption that this command is on the $PATH when the script is executed UNLESS exe.Rscript is given specifying a path to use. The exe. properties are often specified in a defaultProps file for a given environment rather than in individual project properties files. Most often, docker containers are used because of the executables baked into them, and any exe. configurations are only applicable when not runnig in docker. In a pipeline running in docker, all references to an exe. property will return the default value (by removing the exe. prefix), regardless of how the exe. property is configured. In the rare case where you do need to give the path to executable within a container, you can specify this by using the prefix dockerExe. in place of exe. . In the even rarer case where you want to use an executable from your local machine, while running a pipeline in docker, you can specify this by using the prefix hostExe. in place of exe. .","title":"exe.* properties"},{"location":"Configuration/#chaining-configuration-files","text":"Although all properties can be configured in one file, we recommend chaining default files through the pipeline.defaultProps option. This can often improve the portability, maintainability, and readability of the project-specific configuration files.","title":"Chaining configuration files"},{"location":"Configuration/#standard-properties","text":"BioLockJ will always apply the standard.properties file packaged with BioLockJ under resources/config/default/ ; you do not need to specify this file in your pipeline.defaultProps chain. IFF running a pipeline in docker, then BioLockJ will apply the docker.properties file packaged with BioLockJ under resources/config/default/ .","title":"Standard Properties"},{"location":"Configuration/#user-specified-defaults","text":"We recommend creating an environment.properties file to assign envionment-specific defaults. Set cluster & script properties Set paths to key executables through exe properties Override standard.properties as needed. This information is the same for many (or all) projects run in this environment, and entering the info anew for each project is tedious, time-consuming and error-prone. If using a shared system, consider using a user.properties file. Set user-specific properties such as download.dir and mail.to. For shared projects, use a path that will be updated per-user, such as ~/biolock_user.properties Other logical intermediates my also present themselves. For example, some group of projects may need to override several of the defaults set in environmment.properties, but others still use the those defaults. Projects in this set can use pipeline.defaultProps=group2.properties and the group2.properties files may include pipeline.defaultProps=environment.properties","title":"User-specified Defaults"},{"location":"Configuration/#project-properties","text":"Create a new configuration file for each pipeline to assign project-specific properties: Set the BioModule execution order Set pipeline.defaultProps = environment.properties You may use multiple default config files: pipeline.defaultProps=environment.properties,groupSettings.properties Override environment.properties and standard.properties as needed Example project configuration files can be found in templates . If the same property is given in multiple config files, the highest priority goes to the file used to launch the pipeline. Standard.properties always has the lowest priority. A copy of each configuration file is stored in the pipeline root directory to serve as primary project documentation.","title":"Project Properties"},{"location":"Dependencies/","text":"BioLockJ requires Java 1.8+ and a Unix-like operating system such as Darwin/macOS , see Notes about environments . BioLockJ is a pipeline manager, designed to integrate and manage external tools. These external tools are not packaged into the BioLockJ program. BioLockJ must run in an environment where these other tools have been installed OR run through docker using docker images that have the tools installed. The core program, and all modules packaged with it, have corresponding docker images. Dependencies are required by modules listed in the BioModule Function column. Users DO NOT NEED TO INSTALL dependencies if not interested in the listed modules. For example, if you intend to classify 16S samples with RDP and WGS samples with Kraken, do not install: Bowtie2, GNU Awk, GNU Gzip, MetaPhlAn2, Python, QIIME 1, or Vsearch. # Program Version BioModule Function Link 1 Bowtie2 2.3.2 Metaphlan2Classifier : Build reference indexes download 2 GNU Awk 4.0.2 AwkFastaConverter : Convert Fastq to Fasta BuildQiimeMapping : Format metadata as QIIME mapping QiimeClosedRefClassifier : Build batch mapping files download 3 GNU Gzip 1.5 AwkFastaConverter : Decompress .gz files Gunzipper : Decompress .gz files download 4 Kraken 0.10.5-beta KrakenClassifier : Report WGS taxonomic summary download 5 MetaPhlAn2 2.0 Metaphlan2Classifier : Report WGS taxonomic summary (WGS) download 6 Python 2.7.12 BuildQiimeMapping : Run validate_mapping_file.py MergeQiimeOtuTables : Run merge_otu_tables.py QiimeClosedRefClassifier : Run pick_closed_reference_otus.py QiimeDeNovoClassifier : Run pick_de_novo_otus.py QiimeOpenRefClassifier : Run pick_open_reference_otus.py QiimeClassifier : Run add_alpha_to_mapping_file.py, add_qiime_labels.py, alpha_diversity.py, filter_otus_from_otu_table.py, print_qiime_config.py, and summarize_taxa.py Metaphlan2Classifier : Run metaphlan2.py download 7 PEAR 0.9.8 Paired-End reAd merger PearMergeReads Merge paired Fastq files since some classifiers ( RDP & QIIME ) will not accept paired reads. download 8 QIIME 1 1.9.1 Quantitative Insights Into Microbial Ecology BuildQiimeMapping : Validate QIIME mapping MergeQiimeOtuTables : Merge otu_table.biom files QiimeClosedRefClassifier : Pick OTUs by reference QiimeDeNovoClassifier : Pick OTUs by clustering QiimeOpenRefClassifier : Pick OTUs by reference and clustering QiimeClassifier : Report 16S taxonomic summary download 9 R 3.5.0 R_CalculateStats : Statistical modeling R_PlotPvalHistograms : Plot p-value histograms for each reportable metadata field R_PlotOtus : Build OTU-metadata boxplots and scatterplots R_PlotMds : Plot by top MDS axis R_PlotEffectSize : Build barplot of effect magnetude by OTU/taxa download 10 R-coin 1.2 COnditional Inference procedures in a permutatioN test framework R_CalculateStats : Compute exact Wilcox_test p-values download 11 R-ggpubr 0.1.8 R_PlotPvalHistograms : Set color palette R_PlotMds : Set color palette R_PlotEffectSize : Set color palette download 12 R-Kendall 2.2 R_CalculateStats : Compute rank correlation p-values for continuous data types download 13 R-properties 0.0-9 R_Module : Reads in the MASTER configuration properties file from the pipeline root directory download 14 R-stringr 1.2.0 R_Module : For string manipulation for handling Configuration properties download 15 R-vegan 2.5-2 R_PlotMds : Ordination methods, diversity analysis and other functions for ecologists. download 16 RDP 2.12 Ribosomal Database Project RdpClassifier : Report 16S taxonomic summary download 17 Vsearch 2.4.3 QiimeDeNovoClassifier : Chimera detection QiimeOpenRefClassifier : Chimera detection download Version Dependencies # The Version column contains the version tested during BioLockJ development, but other versions can often be substituted. Major releases (such as Python 2 vs. Python 3) contain API changes that will not integrate with the current BioLockJ code. Application APIs often change over time, so not all versions are supported. For example, Bowtie2 did not add the large index functionality until version 2.3.2.","title":"Dependencies"},{"location":"Dependencies/#version-dependencies","text":"The Version column contains the version tested during BioLockJ development, but other versions can often be substituted. Major releases (such as Python 2 vs. Python 3) contain API changes that will not integrate with the current BioLockJ code. Application APIs often change over time, so not all versions are supported. For example, Bowtie2 did not add the large index functionality until version 2.3.2.","title":"Version Dependencies"},{"location":"DevNotes-main/","text":"BioLockJ Developers Guide # Release process # Release process Javadocs # https://BioLockJ-Dev-Team.github.io/BioLockJ/javadocs/ Guidelines for new modules # Building Modules","title":"BioLockJ Developers Guide"},{"location":"DevNotes-main/#biolockj-developers-guide","text":"","title":"BioLockJ Developers Guide"},{"location":"DevNotes-main/#release-process","text":"Release process","title":"Release process"},{"location":"DevNotes-main/#javadocs","text":"https://BioLockJ-Dev-Team.github.io/BioLockJ/javadocs/","title":"Javadocs"},{"location":"DevNotes-main/#guidelines-for-new-modules","text":"Building Modules","title":"Guidelines for new modules"},{"location":"DevNotes-releaseProcess/","text":"Release process # The release process must be performed by someone with write permission on the main BioLockJ repository. Since that repository is owned by a GitHub group, anyone with owner permission in the group can perform the steps. Merge any pull requests that should be included in the release. Edit the version file to show the release version (ie, remove the \"-dev\" suffix) Render all documentation: cd $BLJ/resources; ant userguide Commit these changes, often with with message \"version++ to vx.y.z; render docs\" Tag the current master with the tag \"v.x.y.z-rc\" (\"release candidate\") Run release tests ( see details below ) Tag the current main branch of the BioLockJ repository with the official release tag. After saving the results of tests, use the same tag for the sheepdog_testing_suite main branch. Push the commits and tags to the central main: git push --tags upstream Build the distribution tarball ( see details below ) In GitHub, go to tags, select the new release tag, edit it, and upload the tarball you just created. Trigger DockerHub builds by pushing to linked github repository ( see details below ) Set new dev version Use next patch release (even if the next release is expected to be major). After release v1.3.14, set the version file to say \"v1.3.15-dev\". Commit this with message \"Dev continues toward v1.3.15\". Review : Use the link to the latest release on the Getting-Started page, and make sure the release appears correct. Make sure the user guide link(s) in the top repo README both reflect the latest release The view through github.io is controlled under the Settings for the BioLockJ repository. The view through readthedocs is controlled by the biolockj project which has multiple admins. Look for failed docker builds . The auto builds are configured through the biolockjdevteam organization on DockerHub, which as of late 2020 is a paid account, and has multiple admins, Running release tests # Use the tools in the repository: BioLockJ_Dev_Team/sheepdog_testing_suite. The tools in this suite will automatically build the BioLockJ program from source, but they will not build the updated docker image. Many tests run in docker use the --blj arg so that the current BioLockJ folder is mapped in, so there is no need to update the image to test a local copy of BioLockJ. For individual modules, the corresponding docker image probably hasn't changed since the last version, so you can save a bit of time during testing by simply re-tagging the old images with the new version: $BLJ/resources/docker/docker_build_scripts/retagForLocalTests.sh v1.3.15 v1.3.16 Any image whose dockerfile was changed should be built. And the biolockj_controller should be built (since presumably that has changed since the last version). To build all images, use the buildDockerImages.sh script with no args. With one arg, any image matching that string will be built. $BLJ/resources/docker/docker_build_scripts/buildDockerImages.sh controller The sheepdog_testing_suite has further instructions for setting up the tests. Use the main branch, and tag it with the same release candidate tag used for the BioLockJ repository. Run each of the /test/run_*_testCollection.sh scripts in the corresponding environment. Save results file under archived_testCollection_results (see existing examples for which files to save) (recommended) Locally save the pipelines for all tests for later reference. But DO NOT commit these in either repository. If tests fail (that previously passed), reconsider the release. Make and commit quick fixes if that is feasible. Assuming tests pass, proceed with release process. Building for deployment # Best practice for packaging the official release is to download a fresh copy of the official repo, and build within a docker image. The fresh clone ensures that git-ignored files that are in the local repo copy are incorporated in the official deployment. Using the docker image promotes consistency, and reduces the chances of invisible dependencies. (Not to mention its downright convenient!) git clone https://github.com/BioLockJ-Dev-Team/BioLockJ.git cd BioLockJ docker run --rm -v $PWD:/biolockj biolockjdevteam/build_and_deploy If needed, the git clone command could be replaced with wget https://github.com/BioLockJ-Dev-Team/BioLockJ/archive/main.zip , or any other download command. Triggering docker builds # BioLockJ docker images, most importantly biolockj_controller, are hosted on docker hub under the organization \"biolockjdevteam\". The images for modules in that are packaged with the main program, and the image for the BioLockJ program itself, are set up to build on docker hub infrastructure automatically. For the modules, this typically creates an identical image, and gives it a new tag matching the current release version. This automated build is triggered when a tag matching our version format (ie v1.2.3) is pushed to the linked github repository. As of this writting, dockerhub and github have a nice integration, but it does not allow for linking to a repository owned by an organization (like our BioLockJ repository is owned by the biolockj_dev_team organization). So we have a separate fork of the repository that exists solely to trigger builds on dockerhub. The bot user is \"biolockjBuilder\". In order to push to this repo, you will need permission. Any new user who will do the release process will need to be added as a collaborator to that repository. (first time only) Set up the biolockjBuilder fork as a remote for you BioLockJ git repository: git remote add DockerBuilder https://github.com/biolockjBuilder/BioLockJ.git Push the release tag to this repository. git push DockerBuilder --tags Within a few minutes there should be builds scheduled on DockerHub for the auto-build repositories. They may take some time to actually build. After a few hours, check the repositories to see that new builds exist and that no builds failed. Failed docker builds # Sometimes there are random failures (maybe a website was down temporarily) and you will need to build the image locally and push it with the desired tag. If the build fails for the biolockj_controller image, that is a big problem and you need to figure out why. If the build fails for one of the modules, that usually means that a url in the dockerfile needs to be updated. In some cases, some dependency is no longer available (no longer hosted). In that case, pull the previous version of the image, retag it with the current tag and push to dockerhub. Make in issue to resolve the problem before the next release. If the dockerfile can be updated to create a functional image to run the module, great, do that. If that is not possible, then the most recent image is the image, and the module's docker tag method should no longer use the current biolockj version, but should instead by hard-coded to the most recent version. Turn off auto-builds for that image. This is probably a red-flag that the software is no longer supported, and the module will (eventually) need to be replaced.","title":"Release process"},{"location":"DevNotes-releaseProcess/#release-process","text":"The release process must be performed by someone with write permission on the main BioLockJ repository. Since that repository is owned by a GitHub group, anyone with owner permission in the group can perform the steps. Merge any pull requests that should be included in the release. Edit the version file to show the release version (ie, remove the \"-dev\" suffix) Render all documentation: cd $BLJ/resources; ant userguide Commit these changes, often with with message \"version++ to vx.y.z; render docs\" Tag the current master with the tag \"v.x.y.z-rc\" (\"release candidate\") Run release tests ( see details below ) Tag the current main branch of the BioLockJ repository with the official release tag. After saving the results of tests, use the same tag for the sheepdog_testing_suite main branch. Push the commits and tags to the central main: git push --tags upstream Build the distribution tarball ( see details below ) In GitHub, go to tags, select the new release tag, edit it, and upload the tarball you just created. Trigger DockerHub builds by pushing to linked github repository ( see details below ) Set new dev version Use next patch release (even if the next release is expected to be major). After release v1.3.14, set the version file to say \"v1.3.15-dev\". Commit this with message \"Dev continues toward v1.3.15\". Review : Use the link to the latest release on the Getting-Started page, and make sure the release appears correct. Make sure the user guide link(s) in the top repo README both reflect the latest release The view through github.io is controlled under the Settings for the BioLockJ repository. The view through readthedocs is controlled by the biolockj project which has multiple admins. Look for failed docker builds . The auto builds are configured through the biolockjdevteam organization on DockerHub, which as of late 2020 is a paid account, and has multiple admins,","title":"Release process"},{"location":"DevNotes-releaseProcess/#running-release-tests","text":"Use the tools in the repository: BioLockJ_Dev_Team/sheepdog_testing_suite. The tools in this suite will automatically build the BioLockJ program from source, but they will not build the updated docker image. Many tests run in docker use the --blj arg so that the current BioLockJ folder is mapped in, so there is no need to update the image to test a local copy of BioLockJ. For individual modules, the corresponding docker image probably hasn't changed since the last version, so you can save a bit of time during testing by simply re-tagging the old images with the new version: $BLJ/resources/docker/docker_build_scripts/retagForLocalTests.sh v1.3.15 v1.3.16 Any image whose dockerfile was changed should be built. And the biolockj_controller should be built (since presumably that has changed since the last version). To build all images, use the buildDockerImages.sh script with no args. With one arg, any image matching that string will be built. $BLJ/resources/docker/docker_build_scripts/buildDockerImages.sh controller The sheepdog_testing_suite has further instructions for setting up the tests. Use the main branch, and tag it with the same release candidate tag used for the BioLockJ repository. Run each of the /test/run_*_testCollection.sh scripts in the corresponding environment. Save results file under archived_testCollection_results (see existing examples for which files to save) (recommended) Locally save the pipelines for all tests for later reference. But DO NOT commit these in either repository. If tests fail (that previously passed), reconsider the release. Make and commit quick fixes if that is feasible. Assuming tests pass, proceed with release process.","title":"Running release tests"},{"location":"DevNotes-releaseProcess/#building-for-deployment","text":"Best practice for packaging the official release is to download a fresh copy of the official repo, and build within a docker image. The fresh clone ensures that git-ignored files that are in the local repo copy are incorporated in the official deployment. Using the docker image promotes consistency, and reduces the chances of invisible dependencies. (Not to mention its downright convenient!) git clone https://github.com/BioLockJ-Dev-Team/BioLockJ.git cd BioLockJ docker run --rm -v $PWD:/biolockj biolockjdevteam/build_and_deploy If needed, the git clone command could be replaced with wget https://github.com/BioLockJ-Dev-Team/BioLockJ/archive/main.zip , or any other download command.","title":"Building for deployment"},{"location":"DevNotes-releaseProcess/#triggering-docker-builds","text":"BioLockJ docker images, most importantly biolockj_controller, are hosted on docker hub under the organization \"biolockjdevteam\". The images for modules in that are packaged with the main program, and the image for the BioLockJ program itself, are set up to build on docker hub infrastructure automatically. For the modules, this typically creates an identical image, and gives it a new tag matching the current release version. This automated build is triggered when a tag matching our version format (ie v1.2.3) is pushed to the linked github repository. As of this writting, dockerhub and github have a nice integration, but it does not allow for linking to a repository owned by an organization (like our BioLockJ repository is owned by the biolockj_dev_team organization). So we have a separate fork of the repository that exists solely to trigger builds on dockerhub. The bot user is \"biolockjBuilder\". In order to push to this repo, you will need permission. Any new user who will do the release process will need to be added as a collaborator to that repository. (first time only) Set up the biolockjBuilder fork as a remote for you BioLockJ git repository: git remote add DockerBuilder https://github.com/biolockjBuilder/BioLockJ.git Push the release tag to this repository. git push DockerBuilder --tags Within a few minutes there should be builds scheduled on DockerHub for the auto-build repositories. They may take some time to actually build. After a few hours, check the repositories to see that new builds exist and that no builds failed.","title":"Triggering docker builds"},{"location":"DevNotes-releaseProcess/#failed-docker-builds","text":"Sometimes there are random failures (maybe a website was down temporarily) and you will need to build the image locally and push it with the desired tag. If the build fails for the biolockj_controller image, that is a big problem and you need to figure out why. If the build fails for one of the modules, that usually means that a url in the dockerfile needs to be updated. In some cases, some dependency is no longer available (no longer hosted). In that case, pull the previous version of the image, retag it with the current tag and push to dockerhub. Make in issue to resolve the problem before the next release. If the dockerfile can be updated to create a functional image to run the module, great, do that. If that is not possible, then the most recent image is the image, and the module's docker tag method should no longer use the current biolockj version, but should instead by hard-coded to the most recent version. Turn off auto-builds for that image. This is probably a red-flag that the software is no longer supported, and the module will (eventually) need to be replaced.","title":"Failed docker builds"},{"location":"Example-Pipeline/","text":"In our example analysis, we investigate the differences between the microbiome of 20 rural and 20 recently urbanized subjects from the Chinese province of Hunan. For more information on this dataset, please review the analysis Fodor Lab published in the Sep 2017 issue of the journal Microbiome: https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-017-0338-7 Step 1: Prepare BioLockJ Config File # The BioLockJ project Config chinaKrakenFullDB.properties lists 5 BioModules to run (lines 3-7) + 13 properties: #BioModule biolockj.module.implicit.RegisterNumReads #BioModule biolockj.module.classifier.wgs.KrakenClassifier #BioModule biolockj.module.report.taxa.NormalizeTaxaTables #BioModule biolockj.module.report.r.R_PlotPvalHistograms #BioModule biolockj.module.report.r.R_PlotOtus In addition to the 5 listed BioModules, 4 additional implicit BioModules will also run: Mod# Module Description 1 ImportMetadata Always run 1st (for all pipelines) 2 KrakenParser Always run after KrakenClassifier 3 AddMetadataToOtuTables Always run just before the 1st R module 4 CalculateStats Always run as the 1st R module. Key properties: Line# Property Description 08 cluster.jobHeader Each script will run on 1 node, 16 cores, and 128GB RAM for up to 30 minutes 10 pipeline.defaultProps Default config file defines most properties \u2013 in this case copperhead.properties 12 input.dirPaths Directory path containing 40 gzipped whole genome sequencing (WGS) fastq files 18 metadata.filePath Metadata file path: chinaMetadata.tsv BioLockJ must associate sequence files in input.dirPaths with the correct metadata row. This is done by matching sequence file names to the 1st column in the metadata file. If the Sample ID is not found in your file names, the file names must be updated. Use the following properties to ignore a file prefix or suffix when matching the sample IDs. input.suffixFw input.suffixRv input.trimPrefix input.trimSuffix Sample IDs from 1st column of the metadata file: 081A, 082A, 083A...etc. Sequence file names: 081A_R1.fq.gz, 082A_R1.fq.gz, 083A_R1.fq.gz...etc. The default Config file, copperhead.properties, has its own default Config file standard.properties which defines the property input.suffixFw=_R1 . As a result, all characters starting with (and including) \u201c_R1\u201d are ignored when matching the file name to the metadata sample ID. Step 2: Run BioLockJ Pipeline # > biolockj ~/chinaKrakenFullDB.properties Look in the BioLockJ pipeline output directory defined by $BLJ_PROJ for a new pipeline directory named after the property file + today\u2019s date: ~/projects/chinaKrakenFullDB_2018Apr09 The 5 configured modules have run in order, with the addition of 2 implicit modules (1st and last) which are added to all pipelines automatically. The biolockjComplete file indicates the pipeline ran successfully. Step 3: Review Pipeline Summary # Run the blj_summary command to review the pipeline execution summary. > blj_summary Pipeline Summary Step 4: Download R Reports # Run the blj_download command to get the command needed to download the analysis. > blj_download > rsync Step 5: Analyze R Reports # Open downloadDir on your local filesystem to review the analysis. This directory contains: Output Description /temp Directory where R log files are saved if R script runs locally. /tables Directory containing the OTU tables. /local Directory where R script output is saved if R script runs locally and r.debug=Y . *.RData The saved R sessions for R modules run if r.saveRData=Y . chinaKrakenFullDB.log The pipeline Java log file. MAIN_*.R Each R script for each module that generated reports has been updated to run on your local filesystem. *.tsv files Spreadsheets containing p-value and R^2 statistics for each OTU in the taxonomy level. *.pdf files P-value histograms, and bar-charts or scatterplots for each OTU in the taxonomy level. Each R module generates a report for each report.taxonomyLevel configured: Open chinaKrakenFullDB_Log10_genus.pdf # The report begins with the unadjusted P-Value Distributions: Since r.numHistogramBreaks=20 so the 1st bar represents the p-values < 0.05. The ruralUrban attribute appears significant, as indicated by the high number p-values < 0.05. For each OTU, a bar-chart or scatterplot is output with adjusted parametric and non-parametric p-values formatted using in the plot header. The p-value format is defined by r.pValFormat . The p-adjust method is defined by rStats.pAdjustMethod . P-values that meet the r.pvalCutoff threshold are highlighted with r.colorHighlight .","title":"Example Pipeline"},{"location":"Example-Pipeline/#step-1-prepare-biolockj-config-file","text":"The BioLockJ project Config chinaKrakenFullDB.properties lists 5 BioModules to run (lines 3-7) + 13 properties: #BioModule biolockj.module.implicit.RegisterNumReads #BioModule biolockj.module.classifier.wgs.KrakenClassifier #BioModule biolockj.module.report.taxa.NormalizeTaxaTables #BioModule biolockj.module.report.r.R_PlotPvalHistograms #BioModule biolockj.module.report.r.R_PlotOtus In addition to the 5 listed BioModules, 4 additional implicit BioModules will also run: Mod# Module Description 1 ImportMetadata Always run 1st (for all pipelines) 2 KrakenParser Always run after KrakenClassifier 3 AddMetadataToOtuTables Always run just before the 1st R module 4 CalculateStats Always run as the 1st R module. Key properties: Line# Property Description 08 cluster.jobHeader Each script will run on 1 node, 16 cores, and 128GB RAM for up to 30 minutes 10 pipeline.defaultProps Default config file defines most properties \u2013 in this case copperhead.properties 12 input.dirPaths Directory path containing 40 gzipped whole genome sequencing (WGS) fastq files 18 metadata.filePath Metadata file path: chinaMetadata.tsv BioLockJ must associate sequence files in input.dirPaths with the correct metadata row. This is done by matching sequence file names to the 1st column in the metadata file. If the Sample ID is not found in your file names, the file names must be updated. Use the following properties to ignore a file prefix or suffix when matching the sample IDs. input.suffixFw input.suffixRv input.trimPrefix input.trimSuffix Sample IDs from 1st column of the metadata file: 081A, 082A, 083A...etc. Sequence file names: 081A_R1.fq.gz, 082A_R1.fq.gz, 083A_R1.fq.gz...etc. The default Config file, copperhead.properties, has its own default Config file standard.properties which defines the property input.suffixFw=_R1 . As a result, all characters starting with (and including) \u201c_R1\u201d are ignored when matching the file name to the metadata sample ID.","title":"Step 1: Prepare BioLockJ Config File"},{"location":"Example-Pipeline/#step-2-run-biolockj-pipeline","text":"> biolockj ~/chinaKrakenFullDB.properties Look in the BioLockJ pipeline output directory defined by $BLJ_PROJ for a new pipeline directory named after the property file + today\u2019s date: ~/projects/chinaKrakenFullDB_2018Apr09 The 5 configured modules have run in order, with the addition of 2 implicit modules (1st and last) which are added to all pipelines automatically. The biolockjComplete file indicates the pipeline ran successfully.","title":"Step 2: Run BioLockJ Pipeline"},{"location":"Example-Pipeline/#step-3-review-pipeline-summary","text":"Run the blj_summary command to review the pipeline execution summary. > blj_summary Pipeline Summary","title":"Step 3: Review Pipeline Summary"},{"location":"Example-Pipeline/#step-4-download-r-reports","text":"Run the blj_download command to get the command needed to download the analysis. > blj_download > rsync","title":"Step 4: Download R Reports"},{"location":"Example-Pipeline/#step-5-analyze-r-reports","text":"Open downloadDir on your local filesystem to review the analysis. This directory contains: Output Description /temp Directory where R log files are saved if R script runs locally. /tables Directory containing the OTU tables. /local Directory where R script output is saved if R script runs locally and r.debug=Y . *.RData The saved R sessions for R modules run if r.saveRData=Y . chinaKrakenFullDB.log The pipeline Java log file. MAIN_*.R Each R script for each module that generated reports has been updated to run on your local filesystem. *.tsv files Spreadsheets containing p-value and R^2 statistics for each OTU in the taxonomy level. *.pdf files P-value histograms, and bar-charts or scatterplots for each OTU in the taxonomy level. Each R module generates a report for each report.taxonomyLevel configured:","title":"Step 5: Analyze R Reports"},{"location":"Example-Pipeline/#open-chinakrakenfulldb_log10_genuspdf","text":"The report begins with the unadjusted P-Value Distributions: Since r.numHistogramBreaks=20 so the 1st bar represents the p-values < 0.05. The ruralUrban attribute appears significant, as indicated by the high number p-values < 0.05. For each OTU, a bar-chart or scatterplot is output with adjusted parametric and non-parametric p-values formatted using in the plot header. The p-value format is defined by r.pValFormat . The p-adjust method is defined by rStats.pAdjustMethod . P-values that meet the r.pvalCutoff threshold are highlighted with r.colorHighlight .","title":"Open chinaKrakenFullDB_Log10_genus.pdf"},{"location":"FAQ/","text":"FAQ, Troublshooting and Special Cases # Question: How much does it cost to use BioLockJ ? # Answer: BioLockJ itself free and open-source. BioLockJ is designed for large data sets; and it is often necissary to purchase computational resources to handle large datasets and to run the processes that BioLockJ will manage. This cost often comes in the form of buying an effective computer, subscribing to a cluster, or purchasing cloud computeing power. Question: What are the system requirements for running BioLockJ ? # Answer: Either unix-and-java or docker, details below. Easy mode: you have a unix system and you can run docker. You're covered. BioLockJ requires java, but if you can run docker, then all of the java-components can run inside the docker container. Easy-ish mode: no unix, but you can run docker. See Pure-Docker . Local host mode: No docker. You need to have a unix-like system and java 1.8 or later. The launch process for BioLockJ will be easy, but the majority of modules have essential dependencies and you will have to install each of those dependencies on your own system. See Dependencies . In terms of memory, ram and cpus; the amount required really depends on the size of the data you are processing and the needs of the algorithms you are running. In general, processing sequence data requires a computer cluster or a cloud-computing system (more than a typical individual-user machine). After sequence data have been summarized as tables, all subsequent steps are orders of magnetude smaller and can usually run on a laptop within a matter of minutes. Most datasets can be dramatically sub-sampled to allow a laptop user to run a test of the pipeline; this does not produce usable results, but allows the user to test and troubleshoot the pipeline in a convenient setting before moving it to a bigger system. Question: BioLockJ says that my pipeline is running...now what? # Answer: Check on your pipeline's progress. See the Getting Started page . If you are using a unix-like system, you can use the cd-blj alias to jump to the most recent pipeline. On any system, the path to the new pipeline is printed during the launch process, it will be folder immediatly under your $BLJ_PROJ folder. Look in that directory. When I pipeline forms it creates the \"precheckStarted\" flag and then replaces that with the \"precheckComplete\" flag when all dependencies/settings are confirmed. Then the pipeline starts the first module, and the flag is replaced with \"biolockjStarted\". This generally takes a few seconds or less. The subfolder for the current module will also have the \"biolockjStarted\" flag. When a module is finished, the module flag is replaced with \"biolockjComplete\". When the last module is finished, the pipeline flag is finally changed to \"biolockjComplete\". From the pipeline folder, ls 0* is a quick way to see the current progress, becuase that will show the flag files and subfolders for each of the first ten modules. (That's \"LS zero star\", or \"LS one star\" if you have more than ten modules.) If any module encounters an error, and cannot complete, then that module is marked with the \"biolockjFailed\" flag, the pipeline shuts down, and the pipeline is also marked with \"biolockjFailed\". Extensive information is available in the pipeline's log file. A more concise message describing the error, and sometimes solutions, is written to the biolockjFailed flag. If your pipeline fails, use cat biolockjFailed to see the error message. Question: My pipeline failed...now what? # Answer: See Failure Recovery Most often, there is a consice error message that may even have instructions for fixing the pipeline. cd-blj cat biolockjFailed Don't be discouraged. It is normal to go through several, even many, failed attempts as you figure out how all the parts come together. Question: If biolockj indicates that my pipeline may have failed to start, how do I determine the cause of the failure? # Answer: Use -f . By default, BioLockJ runs the java component in the background, and only a minimal, helpful message is printed on the screen. If there was some problem in getting that short, helpful message to the screen, you can use the --foreground or -f option to force biolockj to run in the foreground, thus printing everything to the screen. Often the print-out ends shortly after a helpful message. Question: Sometimes BioLockJ adds modules to my pipeline. How can I tell what modules will be added? # Answer: Read the docs; or use -p With the --precheck-only or -p option, BioLockJ will create the pipeline and go through the check-dependencies phase for each module, but even without finding errors it will not actually run the pipeline. This allows you see what modules will be run, see the pipeline folder layout, and see if any errors will prevent the pipeline from starting. This is also ideal when you know you want to change more options or add more modules before you run the pipeline; but you want to check if there anything that needs to be fixed in what you have so far. In the documentation for each module, there is a section called \"Adds modules\". A module may give the class path of another module that it adds before or after itself. Many modules say \"none found\" to indicate that this module does not add any other modules before or after itself. Sometimes this section will say \"pipeline-dependent\" and more details are given in the \"Details\" section to explain which other modules might be added and when / why. Modules that are added by other modules are called pre-requisite modules . Modules that are added by the BioLockJ backbone are called implicit modules . These can be disabled with the properties pipeline.disableAddPreReqModules and pipeline.disableAddImplicitModules , respectively. Question: I get an error message about a property, but I have that property configured correctly. What gives? # Answer: Use -u . This is often the result of a typo somewhere. Generally, BioLockJ runs a check-dependencies protocol on each module, and all required properties should be checked during that process, and it stops when it first finds a problem. With the --unused-props or -u option, biolockj will check dependencies for all modules, even after one fails, and any properties that were never used will be printed to the screen. This often highlights typos in property names, or properties that are not used by the currenlty configured modules. Keep in mind, this only reports properties in your primary config file, not in any of your defaultProps files. Question: A module script is failing because an environent variable is missing. But I know I defined that variable, and I can see it with echo . Why can't the script see it ? # Answer: Use -e ; or reference it in your configuration file in the ${VAR} format Where possible, avoid relying on environment variables. Consider defining a value in your config file and/or adding the value to a parameter list that will be used with the script. Variables from your local envirnment must be explicitly passed into the module environments. See the Configuration page . Question: On a cluster system, I need a particular module to run on the head node. # Answer: Use module-specific properties to control the cluster properties for that module. See the Configuration page for more details about module-specific forms of general properties. Example: # On this cluster, the compute nodes do not have internet access, only the head node does. The first module in the pipeline is the SraDownload module to get the data, which requries internet access. All pipelines run on this cluster include a reference to the properties set up specifically for this cluster: pipeline.defaultProps=${BLJ}/ourCluster.properties This group chose to store their system configurations in the BioLockJ folder, which they reference using the fully dressed ${BLJ} variable. In this file, they have configurations for launching jobs: cluster.batchCommand = qsub SraDownload.batchCommand = /bin/bash BioLockJ launches jobs using qsub <script> . For ONLY the SraDownload module, the property SraDownload.batchCommand overrides cluster.batchCommand ; so for only this module, the jobs will be launched using /bin/bash which runs on the current node rather than launching a compute node. All config files that reference this file and launch on the head node, will run the SraDownload modude on the head node. Question: How do I configure my pipeline for multiplexed data? # Answer: See the Demultiplexer module Details . Question: How should I configure my properties for a dataset that is one-sample-per-file (ie not multiplexed)? # Answer1: BioLockJ can extract the sample name from the filename; see Input . OR Answer2: BioLockJ can connect the sample id to the file name given in one or more columns in the metadata; see Metadata . Question: Shutting down a pipeline. How do I stop a pipeline that is running? # Answer: Use kill , docker stop and possibly scheduler commands such as qdel . BioLockJ will shut down appropriately on its own when a pipeline either completes or fails. Sometimes , it is necessary to shut down the program pre-maturely. This is not an ideal exit and the steps depend on your environment. The main program is terminated by killing the java process. Any worker-processes that are still in progress will need to be shut down directly (or allowed to time out). If you are allowing worker-processess to time, you must NOT delete the pipeline folder. Those processes will write to that pipeline folder, and any new pipeline you make will get a new folder as long as the original still exists. To kill the BioLockJ program on a local system, get the id of the java process and kill it: ps # PID TTY TIME CMD # 1776 pts/0 00:00:00 bash # 1728 pts/0 00:00:00 ps # 4437 pts/0 00:00:00 java kill 4437 On a local system, workers are under the main program, so they will also be terminated. To kill the BioLockJ program running in docker, get the ID of the docker container and use docker stop . docker ps # CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES # f55a39311eb5 ubuntu \"/bin/bash\" 16 minutes ago Up 16 minutes brave_cori docker stop f55a39311eb5 In a docker pipeline, the container IDs for workers will also appear under ps. If you need to distinguish the BioLockJ containers from other docker containers running on your machine, you can see a list of them in the current modules script directory in a file named MAIN*.sh_Started . To kill the BioLockJ program that is run in the foreground (ie, the -f arg was used), then killing the current process will kill the program. This is usually done with ctrl + c . To kill the BioLockJ program on a cluster environment, use kill just like the local case to stop a process on the head node, and use qdel (or the equivilent on your scheduler) to terminate workers running on compute nodes. Question: How can I get color-coded syntax for a BioLockJ config file? # Answer: Treat it like a java properties file. A BioLockJ config file is a java properties file, with added \"#BioModule\" lines indicating which modules to run. BioLockJ config files typically use the extension \".config\" (preferred) or \".properties\". Some editors have configurations to color-code text in a meaningful way based on the type of file. For example, Sublime text will automatically apply the syntax highlighting for a java properties file to a file that ends in \".properties\". To extend this Sublime Text functionality to files that in \".config\": 1. open a file with the \".config\" extension, 2. go to View -> Syntax -> Open all with current extension -> Java -> Java Properties ( Much thanks to this helpful post: stack over flow ) Question: Why doesn't my pipeline run in docker ? # Answer: test docker, test file sharing First of all, make sure docker is installed in running. docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly. If this failed, then you need to troubleshoot docker before you move forward. When docker is fundamentally working, a common problem is file-sharing. In most cases, file sharing is enabled and you will see a pop-up window asking for permission to share folders you have not shared previously. If this is not the case, you may need to enable file sharing. File sharing, also called volume sharing, is what allows programs inside docker containers to interact with files stored on your computer. Depending on your version of Docker Desktop, this setting may be under Docker > Preferences > File Sharing , or Preferences > Resources > File Sharing or something similar. Make sure this feature is enabled. Any file that must be read by any part of the BioLockJ pipeline must be under one of the share-enabled folders. The BioLockJ Projects directory (BLJ_PROJ) must also be under one of these share-enabled folders. A quick test of file sharing: mkdir ~/testFolder echo 'hello sharing!' > ~/testFolder/testFile.txt docker run --rm -v ~/testFolder:/testFolder ubuntu cat /testFolder/testFile.txt # hello sharing! If this fails, then you'll need to troubleshoot docker file sharing before you move forward. The start process may be slow. The first time that you run a pipeline, docker downloads the images called for in that pipeline. For some images, this download may take several minutes. For pipelines with many modules, each using distinct docker images, there may be many 'several minute' downloads. As of version 1.3.15, these downloads happen in the background and no messages are printed to the screen from the BioLockJ start command; which can give the appearance of a frozen start-up. In another tab, run docker image ls , wait several minutes, and run the same command again. If the list is growing, then BioLockJ is not frozen, it is downloading images while (unfortunately) printing nothing. If you set docker.verifyImage=N in your config file, then images will be downloaded when the module is reached, rather than all at once at the start.","title":"FAQ & Troubleshooting"},{"location":"FAQ/#faq-troublshooting-and-special-cases","text":"","title":"FAQ, Troublshooting and Special Cases"},{"location":"FAQ/#question-how-much-does-it-cost-to-use-biolockj","text":"Answer: BioLockJ itself free and open-source. BioLockJ is designed for large data sets; and it is often necissary to purchase computational resources to handle large datasets and to run the processes that BioLockJ will manage. This cost often comes in the form of buying an effective computer, subscribing to a cluster, or purchasing cloud computeing power.","title":"Question: How much does it cost to use BioLockJ ?"},{"location":"FAQ/#question-what-are-the-system-requirements-for-running-biolockj","text":"Answer: Either unix-and-java or docker, details below. Easy mode: you have a unix system and you can run docker. You're covered. BioLockJ requires java, but if you can run docker, then all of the java-components can run inside the docker container. Easy-ish mode: no unix, but you can run docker. See Pure-Docker . Local host mode: No docker. You need to have a unix-like system and java 1.8 or later. The launch process for BioLockJ will be easy, but the majority of modules have essential dependencies and you will have to install each of those dependencies on your own system. See Dependencies . In terms of memory, ram and cpus; the amount required really depends on the size of the data you are processing and the needs of the algorithms you are running. In general, processing sequence data requires a computer cluster or a cloud-computing system (more than a typical individual-user machine). After sequence data have been summarized as tables, all subsequent steps are orders of magnetude smaller and can usually run on a laptop within a matter of minutes. Most datasets can be dramatically sub-sampled to allow a laptop user to run a test of the pipeline; this does not produce usable results, but allows the user to test and troubleshoot the pipeline in a convenient setting before moving it to a bigger system.","title":"Question: What are the system requirements for running BioLockJ ?"},{"location":"FAQ/#question-biolockj-says-that-my-pipeline-is-runningnow-what","text":"Answer: Check on your pipeline's progress. See the Getting Started page . If you are using a unix-like system, you can use the cd-blj alias to jump to the most recent pipeline. On any system, the path to the new pipeline is printed during the launch process, it will be folder immediatly under your $BLJ_PROJ folder. Look in that directory. When I pipeline forms it creates the \"precheckStarted\" flag and then replaces that with the \"precheckComplete\" flag when all dependencies/settings are confirmed. Then the pipeline starts the first module, and the flag is replaced with \"biolockjStarted\". This generally takes a few seconds or less. The subfolder for the current module will also have the \"biolockjStarted\" flag. When a module is finished, the module flag is replaced with \"biolockjComplete\". When the last module is finished, the pipeline flag is finally changed to \"biolockjComplete\". From the pipeline folder, ls 0* is a quick way to see the current progress, becuase that will show the flag files and subfolders for each of the first ten modules. (That's \"LS zero star\", or \"LS one star\" if you have more than ten modules.) If any module encounters an error, and cannot complete, then that module is marked with the \"biolockjFailed\" flag, the pipeline shuts down, and the pipeline is also marked with \"biolockjFailed\". Extensive information is available in the pipeline's log file. A more concise message describing the error, and sometimes solutions, is written to the biolockjFailed flag. If your pipeline fails, use cat biolockjFailed to see the error message.","title":"Question: BioLockJ says that my pipeline is running...now what?"},{"location":"FAQ/#question-my-pipeline-failednow-what","text":"Answer: See Failure Recovery Most often, there is a consice error message that may even have instructions for fixing the pipeline. cd-blj cat biolockjFailed Don't be discouraged. It is normal to go through several, even many, failed attempts as you figure out how all the parts come together.","title":"Question: My pipeline failed...now what?"},{"location":"FAQ/#question-if-biolockj-indicates-that-my-pipeline-may-have-failed-to-start-how-do-i-determine-the-cause-of-the-failure","text":"Answer: Use -f . By default, BioLockJ runs the java component in the background, and only a minimal, helpful message is printed on the screen. If there was some problem in getting that short, helpful message to the screen, you can use the --foreground or -f option to force biolockj to run in the foreground, thus printing everything to the screen. Often the print-out ends shortly after a helpful message.","title":"Question: If biolockj indicates that my pipeline may have failed to start, how do I determine the cause of the failure?"},{"location":"FAQ/#question-sometimes-biolockj-adds-modules-to-my-pipeline-how-can-i-tell-what-modules-will-be-added","text":"Answer: Read the docs; or use -p With the --precheck-only or -p option, BioLockJ will create the pipeline and go through the check-dependencies phase for each module, but even without finding errors it will not actually run the pipeline. This allows you see what modules will be run, see the pipeline folder layout, and see if any errors will prevent the pipeline from starting. This is also ideal when you know you want to change more options or add more modules before you run the pipeline; but you want to check if there anything that needs to be fixed in what you have so far. In the documentation for each module, there is a section called \"Adds modules\". A module may give the class path of another module that it adds before or after itself. Many modules say \"none found\" to indicate that this module does not add any other modules before or after itself. Sometimes this section will say \"pipeline-dependent\" and more details are given in the \"Details\" section to explain which other modules might be added and when / why. Modules that are added by other modules are called pre-requisite modules . Modules that are added by the BioLockJ backbone are called implicit modules . These can be disabled with the properties pipeline.disableAddPreReqModules and pipeline.disableAddImplicitModules , respectively.","title":"Question: Sometimes BioLockJ adds modules to my pipeline.  How can I tell what modules will be added?"},{"location":"FAQ/#question-i-get-an-error-message-about-a-property-but-i-have-that-property-configured-correctly-what-gives","text":"Answer: Use -u . This is often the result of a typo somewhere. Generally, BioLockJ runs a check-dependencies protocol on each module, and all required properties should be checked during that process, and it stops when it first finds a problem. With the --unused-props or -u option, biolockj will check dependencies for all modules, even after one fails, and any properties that were never used will be printed to the screen. This often highlights typos in property names, or properties that are not used by the currenlty configured modules. Keep in mind, this only reports properties in your primary config file, not in any of your defaultProps files.","title":"Question: I get an error message about a property, but I have that property configured correctly.  What gives?"},{"location":"FAQ/#question-a-module-script-is-failing-because-an-environent-variable-is-missing-but-i-know-i-defined-that-variable-and-i-can-see-it-with-echo-why-cant-the-script-see-it","text":"Answer: Use -e ; or reference it in your configuration file in the ${VAR} format Where possible, avoid relying on environment variables. Consider defining a value in your config file and/or adding the value to a parameter list that will be used with the script. Variables from your local envirnment must be explicitly passed into the module environments. See the Configuration page .","title":"Question: A module script is failing because an environent variable is missing. But I know I defined that variable, and I can see it with echo. Why can't the script see it ?"},{"location":"FAQ/#question-on-a-cluster-system-i-need-a-particular-module-to-run-on-the-head-node","text":"Answer: Use module-specific properties to control the cluster properties for that module. See the Configuration page for more details about module-specific forms of general properties.","title":"Question: On a cluster system, I need a particular module to run on the head node."},{"location":"FAQ/#example","text":"On this cluster, the compute nodes do not have internet access, only the head node does. The first module in the pipeline is the SraDownload module to get the data, which requries internet access. All pipelines run on this cluster include a reference to the properties set up specifically for this cluster: pipeline.defaultProps=${BLJ}/ourCluster.properties This group chose to store their system configurations in the BioLockJ folder, which they reference using the fully dressed ${BLJ} variable. In this file, they have configurations for launching jobs: cluster.batchCommand = qsub SraDownload.batchCommand = /bin/bash BioLockJ launches jobs using qsub <script> . For ONLY the SraDownload module, the property SraDownload.batchCommand overrides cluster.batchCommand ; so for only this module, the jobs will be launched using /bin/bash which runs on the current node rather than launching a compute node. All config files that reference this file and launch on the head node, will run the SraDownload modude on the head node.","title":"Example:"},{"location":"FAQ/#question-how-do-i-configure-my-pipeline-for-multiplexed-data","text":"Answer: See the Demultiplexer module Details .","title":"Question: How do I configure my pipeline for multiplexed data?"},{"location":"FAQ/#question-how-should-i-configure-my-properties-for-a-dataset-that-is-one-sample-per-file-ie-not-multiplexed","text":"Answer1: BioLockJ can extract the sample name from the filename; see Input . OR Answer2: BioLockJ can connect the sample id to the file name given in one or more columns in the metadata; see Metadata .","title":"Question: How should I configure my properties for a dataset that is one-sample-per-file (ie not multiplexed)?"},{"location":"FAQ/#question-shutting-down-a-pipeline-how-do-i-stop-a-pipeline-that-is-running","text":"Answer: Use kill , docker stop and possibly scheduler commands such as qdel . BioLockJ will shut down appropriately on its own when a pipeline either completes or fails. Sometimes , it is necessary to shut down the program pre-maturely. This is not an ideal exit and the steps depend on your environment. The main program is terminated by killing the java process. Any worker-processes that are still in progress will need to be shut down directly (or allowed to time out). If you are allowing worker-processess to time, you must NOT delete the pipeline folder. Those processes will write to that pipeline folder, and any new pipeline you make will get a new folder as long as the original still exists. To kill the BioLockJ program on a local system, get the id of the java process and kill it: ps # PID TTY TIME CMD # 1776 pts/0 00:00:00 bash # 1728 pts/0 00:00:00 ps # 4437 pts/0 00:00:00 java kill 4437 On a local system, workers are under the main program, so they will also be terminated. To kill the BioLockJ program running in docker, get the ID of the docker container and use docker stop . docker ps # CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES # f55a39311eb5 ubuntu \"/bin/bash\" 16 minutes ago Up 16 minutes brave_cori docker stop f55a39311eb5 In a docker pipeline, the container IDs for workers will also appear under ps. If you need to distinguish the BioLockJ containers from other docker containers running on your machine, you can see a list of them in the current modules script directory in a file named MAIN*.sh_Started . To kill the BioLockJ program that is run in the foreground (ie, the -f arg was used), then killing the current process will kill the program. This is usually done with ctrl + c . To kill the BioLockJ program on a cluster environment, use kill just like the local case to stop a process on the head node, and use qdel (or the equivilent on your scheduler) to terminate workers running on compute nodes.","title":"Question: Shutting down a pipeline.  How do I stop a pipeline that is running?"},{"location":"FAQ/#question-how-can-i-get-color-coded-syntax-for-a-biolockj-config-file","text":"Answer: Treat it like a java properties file. A BioLockJ config file is a java properties file, with added \"#BioModule\" lines indicating which modules to run. BioLockJ config files typically use the extension \".config\" (preferred) or \".properties\". Some editors have configurations to color-code text in a meaningful way based on the type of file. For example, Sublime text will automatically apply the syntax highlighting for a java properties file to a file that ends in \".properties\". To extend this Sublime Text functionality to files that in \".config\": 1. open a file with the \".config\" extension, 2. go to View -> Syntax -> Open all with current extension -> Java -> Java Properties ( Much thanks to this helpful post: stack over flow )","title":"Question: How can I get color-coded syntax for a BioLockJ config file?"},{"location":"FAQ/#question-why-doesnt-my-pipeline-run-in-docker","text":"Answer: test docker, test file sharing First of all, make sure docker is installed in running. docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly. If this failed, then you need to troubleshoot docker before you move forward. When docker is fundamentally working, a common problem is file-sharing. In most cases, file sharing is enabled and you will see a pop-up window asking for permission to share folders you have not shared previously. If this is not the case, you may need to enable file sharing. File sharing, also called volume sharing, is what allows programs inside docker containers to interact with files stored on your computer. Depending on your version of Docker Desktop, this setting may be under Docker > Preferences > File Sharing , or Preferences > Resources > File Sharing or something similar. Make sure this feature is enabled. Any file that must be read by any part of the BioLockJ pipeline must be under one of the share-enabled folders. The BioLockJ Projects directory (BLJ_PROJ) must also be under one of these share-enabled folders. A quick test of file sharing: mkdir ~/testFolder echo 'hello sharing!' > ~/testFolder/testFile.txt docker run --rm -v ~/testFolder:/testFolder ubuntu cat /testFolder/testFile.txt # hello sharing! If this fails, then you'll need to troubleshoot docker file sharing before you move forward. The start process may be slow. The first time that you run a pipeline, docker downloads the images called for in that pipeline. For some images, this download may take several minutes. For pipelines with many modules, each using distinct docker images, there may be many 'several minute' downloads. As of version 1.3.15, these downloads happen in the background and no messages are printed to the screen from the BioLockJ start command; which can give the appearance of a frozen start-up. In another tab, run docker image ls , wait several minutes, and run the same command again. If the list is growing, then BioLockJ is not frozen, it is downloading images while (unfortunately) printing nothing. If you set docker.verifyImage=N in your config file, then images will be downloaded when the module is reached, rather than all at once at the start.","title":"Question: Why doesn't my pipeline run in docker ?"},{"location":"Failure-Recovery/","text":"Failure Recovery # It is normal to go through several, even many, failed attempts. It is not time effecient to read and fully understand all of the documentation to compeltely correctly set all parameters for every part of your pipeline. Most users find it more effecient to read just enough to make an attempt, and then make corrections as needed. As errors occurr, BioLockJ can highlight the relevant documentation or parameters. An error message is not a reprimand, it is part of the conversation between the program and the user. With this in mind, BioLockJ is designed so that, where feasible, any errors that will occur for a pipeline are found within a few seconds of the initial launch. Find the problem # Look at your pipeline folder. If the pipeline failed, there is a flag file \"biolockjFailed\". cd-blj cat biolockjFailed This message often contains instructions about what to fix. Sometimes it may only be able to describe the problem and you will have to figure out the solution. In some cases it may be necessary to look a the log file at the top level, or at the log files of an individual module. But in all cases, you should start by looking at the message in the biolockjFailed file. In very rare cases, BioLockJ may fail to form a pipeline directory, which means there is no central place for messages to be saved to. If that is the case, launch the pipeline again but add the --foreground or -f flag (See See biolockj --help . This will almost certianly hit the same error, but the messages along the way will be printed to the screen. In most cases, the end of the printed output will have the same error message format as the biolockjFailed file. Try again # Once you have fixed the problem (changed a property in the config file, installed dependency, removed bad inputs, etc) you can launch your pipeline again. Ideally, the failure happened quickly, and the first attempt can be discarded. It may take several attempts to successfully launch the pipeline--and that's ok! Each new attempt will get a new folder, with _number to distinguish the different attempts. When you are happy with your pipeline, simply delete failed attempts. If a pipeline is still running, or has any workers / jobs still running, you should leave the pipeline folder in place until you are sure those jobs are done. This ensures that any files created or changed by those jobs are separated from any new runs. Sometimes, the pipeline fails someitme after the check-dependencies phase. If a failed pipeline has progress that you want to keep, then you can restart that pipeline rather than launching from scratch. Failed pipelines can be restarted to save the progress made by successfully completed modules. To restart a failed pipeline, add the --restart flag. See biolockj --help . biolockj -r <pipeline root> or biolockj --restart <pipeline root> This will preserve the output of any module that has been marked with \"biolockjComplete\". All other module directories will be deleted and recreated. back tracking # The restart process will automatically determine the first module to run as the first module that is not marked as complete. In some cases, you may want to re-run a completed module. You can manually indicate which module should be the first one to restart by resetting the pipeline to that module before restarting the pipeline. cd <dir of module to restart> blj_reset $PWD cd .. biolockj --restart $PWD Any module after that one in the module run order will also be rerun. build on completed pipelines # When you initially build out your pipeline, its best to take small steps. Configure a few modules, and launch the pipeline. You will probably have to go through a few attempts to get everything right for those modules. biolockj myAnalysis.properties Once you are happy with that, you can add modules to your module run order, and restart the completed pipeline to pick up where it left off. biolockj -c myAnalysis.properties --restart ${BLJ_PROJ}/myAnalysis_5_2020Aug20 Any modules that were complete in the myAnalysis_5_2020Aug20 pipeline will remain. This is only valid for appending an existing module run order. Often, you may use biolockj to encapsulate the \"boiler plate\" processing steps in your pipeline, and then take the outputs to work with manually for data exploration. After exploring the data, you will refine your analyis question and settle on the figures you want to produce, and the code to do that. Then come back to BioLockJ , and add your downstream scripts as GenMod steps, building on the pipeline you started with. This makes your custome downstream steps just as organized and reproducible as the initial boiler-plate phase.","title":"Failure Recovery"},{"location":"Failure-Recovery/#failure-recovery","text":"It is normal to go through several, even many, failed attempts. It is not time effecient to read and fully understand all of the documentation to compeltely correctly set all parameters for every part of your pipeline. Most users find it more effecient to read just enough to make an attempt, and then make corrections as needed. As errors occurr, BioLockJ can highlight the relevant documentation or parameters. An error message is not a reprimand, it is part of the conversation between the program and the user. With this in mind, BioLockJ is designed so that, where feasible, any errors that will occur for a pipeline are found within a few seconds of the initial launch.","title":"Failure Recovery"},{"location":"Failure-Recovery/#find-the-problem","text":"Look at your pipeline folder. If the pipeline failed, there is a flag file \"biolockjFailed\". cd-blj cat biolockjFailed This message often contains instructions about what to fix. Sometimes it may only be able to describe the problem and you will have to figure out the solution. In some cases it may be necessary to look a the log file at the top level, or at the log files of an individual module. But in all cases, you should start by looking at the message in the biolockjFailed file. In very rare cases, BioLockJ may fail to form a pipeline directory, which means there is no central place for messages to be saved to. If that is the case, launch the pipeline again but add the --foreground or -f flag (See See biolockj --help . This will almost certianly hit the same error, but the messages along the way will be printed to the screen. In most cases, the end of the printed output will have the same error message format as the biolockjFailed file.","title":"Find the problem"},{"location":"Failure-Recovery/#try-again","text":"Once you have fixed the problem (changed a property in the config file, installed dependency, removed bad inputs, etc) you can launch your pipeline again. Ideally, the failure happened quickly, and the first attempt can be discarded. It may take several attempts to successfully launch the pipeline--and that's ok! Each new attempt will get a new folder, with _number to distinguish the different attempts. When you are happy with your pipeline, simply delete failed attempts. If a pipeline is still running, or has any workers / jobs still running, you should leave the pipeline folder in place until you are sure those jobs are done. This ensures that any files created or changed by those jobs are separated from any new runs. Sometimes, the pipeline fails someitme after the check-dependencies phase. If a failed pipeline has progress that you want to keep, then you can restart that pipeline rather than launching from scratch. Failed pipelines can be restarted to save the progress made by successfully completed modules. To restart a failed pipeline, add the --restart flag. See biolockj --help . biolockj -r <pipeline root> or biolockj --restart <pipeline root> This will preserve the output of any module that has been marked with \"biolockjComplete\". All other module directories will be deleted and recreated.","title":"Try again"},{"location":"Failure-Recovery/#back-tracking","text":"The restart process will automatically determine the first module to run as the first module that is not marked as complete. In some cases, you may want to re-run a completed module. You can manually indicate which module should be the first one to restart by resetting the pipeline to that module before restarting the pipeline. cd <dir of module to restart> blj_reset $PWD cd .. biolockj --restart $PWD Any module after that one in the module run order will also be rerun.","title":"back tracking"},{"location":"Failure-Recovery/#build-on-completed-pipelines","text":"When you initially build out your pipeline, its best to take small steps. Configure a few modules, and launch the pipeline. You will probably have to go through a few attempts to get everything right for those modules. biolockj myAnalysis.properties Once you are happy with that, you can add modules to your module run order, and restart the completed pipeline to pick up where it left off. biolockj -c myAnalysis.properties --restart ${BLJ_PROJ}/myAnalysis_5_2020Aug20 Any modules that were complete in the myAnalysis_5_2020Aug20 pipeline will remain. This is only valid for appending an existing module run order. Often, you may use biolockj to encapsulate the \"boiler plate\" processing steps in your pipeline, and then take the outputs to work with manually for data exploration. After exploring the data, you will refine your analyis question and settle on the figures you want to produce, and the code to do that. Then come back to BioLockJ , and add your downstream scripts as GenMod steps, building on the pipeline you started with. This makes your custome downstream steps just as organized and reproducible as the initial boiler-plate phase.","title":"build on completed pipelines"},{"location":"Getting-Started-Windows/","text":"BioLockJ on Windows # Currently, windows is not officially supported as a system to run BioLockJ There are a few avenues for running BioLockJ on a Widows machine. The WSL2 avenue is the most likely to be supported into the future. Windows Subsystem for Linux (WSL2) # This option has been shown to work, but it is not rigorously tested in our current release testing process, and so is not officially supported. 1. Install WSL2 # Set up WSL2 on your machine and link it to your linux subsystem, see the Microsoft documentation . 2. Install linux distribution # For our tests, we chose the most recent Ubuntu distribution, see tested environments ; presumably others also work. 3. Install java # Install java in the linux subsystem. apt-get update sudo apt install default-jre 4. Install docker (with WSL2) # Set up docker to work with your linux subsystem on Windows. See docker documentation You'll need to be able to run the docker hello world example: docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly. 4. Install BioLockJ # Follow the standard instructions to for Getting Started with BioLockJ, operating within the linux subsystem. In theory, operating within this system will be identical to working with BioLockJ in a unix-like operating system. However we recommend (and run tests) using docker, as this removes the added troubleshooting of adapting to subtle differences across environments, which could be compounded by the system stacking, not to mention the often tedious task of installing all dependencies for all pipelines. Alternative: Docker with a native launch # In this case, java running on the host machine is required to launch the program; but the manager process and the required environment to run each module is all handled by Docker containers. This feature exists, but is still experimental. It is not guaranteed to work. All code chunks in this section assume you are running PowerShell as administrator . 1. Download the latest release & unpack the tarball. # Third party tools such as 7Zip allow you to unzip tar files on Windows. Save the uncompressed folder wherever you like to keep executables. If you choose to download the source code, you will need to compile it by running ant with the build.xml file in the resources folder. 2. Set PowerShell variables # In PowerShell, navigate (cd) into the BioLockJ folder, and run Set-Variable -Name BLJ -Value $PWD Add-Content $profile \"Set-Variable -Name BLJ -Value $BLJ\" cd into a folder of your choice, such as C:Users\\Documents\\biolockj_pipelines, and run Set-Variable -Name BLJ_PROJ -Value $PWD Add-Content $profile \"Set-Variable -Name BLJ_PROJ -Value $BLJ_PROJ\" Test the variables. $BLJ $BLJ_PROJ Note: The Set-Variable lines apply to the current session; the Add-content lines apply to future sessions. If the Add-Conent lines throw an error to effect \"could not find path\", then you may need to create the parent folder and try again, for example: $profile ## see file path of the profile: $HOME\\Documents\\WindowsPowerShell\\Microsoft.PowerShell_profile.ps1 mkdir $HOME\\Documents\\WindowsPowerShell\\ 3. Set an alias for the biolockj executable. # Set-Alias -Name biolockj -Value $BLJ\\script\\run-biolockj.ps1 Add-Content $profile \"Set-Alias -Name biolockj -Value $BLJ\\script\\run-biolockj.ps1\" Allow PowerShell to execute scripts on this machine: Set-ExecutionPolicy RemoteSigned Test that calling this alias makes a call to the BioLockJ program. biolockj --version biolockj --help This should show the biolockj help menu. Set an alias for the biolockj supporting tool: biolockj-api. Set-Alias -Name biolockj-api -Value $BLJ\\script\\run-biolockj-api.ps1 Add-Content $profile \"Set-Alias -Name biolockj-api -Value $BLJ\\script\\run-biolockj-api.ps1\" Test that calling this alias makes a call to the BioLockJ program. biolockj-api This should show the biolockj-api help menu. 4. Install docker # See the current instructions for installing docker on your system: https://docs.docker.com/get-started/ You'll need to be able to run the docker hello world example: docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly. 5. Run test pipeline # When you run the program, you will see a pop-up window asking for permission to share specific folders. Say yes. If that does not appear, see why doesn't my pipeline run in docker . biolockj -d $BLJ\\templates\\myFirstPipeline\\myFirstPipeline.properties # # Docker container id: 336259e7d3b8d9ab2fa71202258b562664be1bf9645d503a790ae5e9da15ce97 # Initializing BioLockJ.. # Building pipeline: /Users/joe/apps/BioLockJ/pipelines/myFirstPipeline_2020Jan17 # Fetching pipeline status # # Pipeline is complete. Alternative: Pure-Docker # In the pure-docker case, a handful of power-shell commands are used to launch a Docker container. The Docker container includes all the required software and environment, including java to run the launch process and manager process, and the required environment to run each module. This feature exists, but is still experimental. It has been shown to work anecdotally, but is not guaranteed to work. See Working in Pure Docker .","title":"Getting Started Windows"},{"location":"Getting-Started-Windows/#biolockj-on-windows","text":"Currently, windows is not officially supported as a system to run BioLockJ There are a few avenues for running BioLockJ on a Widows machine. The WSL2 avenue is the most likely to be supported into the future.","title":"BioLockJ on Windows"},{"location":"Getting-Started-Windows/#windows-subsystem-for-linux-wsl2","text":"This option has been shown to work, but it is not rigorously tested in our current release testing process, and so is not officially supported.","title":"Windows Subsystem for Linux (WSL2)"},{"location":"Getting-Started-Windows/#1-install-wsl2","text":"Set up WSL2 on your machine and link it to your linux subsystem, see the Microsoft documentation .","title":"1. Install WSL2"},{"location":"Getting-Started-Windows/#2-install-linux-distribution","text":"For our tests, we chose the most recent Ubuntu distribution, see tested environments ; presumably others also work.","title":"2. Install linux distribution"},{"location":"Getting-Started-Windows/#3-install-java","text":"Install java in the linux subsystem. apt-get update sudo apt install default-jre","title":"3. Install java"},{"location":"Getting-Started-Windows/#4-install-docker-with-wsl2","text":"Set up docker to work with your linux subsystem on Windows. See docker documentation You'll need to be able to run the docker hello world example: docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly.","title":"4. Install docker (with WSL2)"},{"location":"Getting-Started-Windows/#4-install-biolockj","text":"Follow the standard instructions to for Getting Started with BioLockJ, operating within the linux subsystem. In theory, operating within this system will be identical to working with BioLockJ in a unix-like operating system. However we recommend (and run tests) using docker, as this removes the added troubleshooting of adapting to subtle differences across environments, which could be compounded by the system stacking, not to mention the often tedious task of installing all dependencies for all pipelines.","title":"4. Install BioLockJ"},{"location":"Getting-Started-Windows/#alternative-docker-with-a-native-launch","text":"In this case, java running on the host machine is required to launch the program; but the manager process and the required environment to run each module is all handled by Docker containers. This feature exists, but is still experimental. It is not guaranteed to work. All code chunks in this section assume you are running PowerShell as administrator .","title":"Alternative: Docker with a native launch"},{"location":"Getting-Started-Windows/#1-download-the-latest-release-unpack-the-tarball","text":"Third party tools such as 7Zip allow you to unzip tar files on Windows. Save the uncompressed folder wherever you like to keep executables. If you choose to download the source code, you will need to compile it by running ant with the build.xml file in the resources folder.","title":"1. Download the latest release &amp; unpack the tarball."},{"location":"Getting-Started-Windows/#2-set-powershell-variables","text":"In PowerShell, navigate (cd) into the BioLockJ folder, and run Set-Variable -Name BLJ -Value $PWD Add-Content $profile \"Set-Variable -Name BLJ -Value $BLJ\" cd into a folder of your choice, such as C:Users\\Documents\\biolockj_pipelines, and run Set-Variable -Name BLJ_PROJ -Value $PWD Add-Content $profile \"Set-Variable -Name BLJ_PROJ -Value $BLJ_PROJ\" Test the variables. $BLJ $BLJ_PROJ Note: The Set-Variable lines apply to the current session; the Add-content lines apply to future sessions. If the Add-Conent lines throw an error to effect \"could not find path\", then you may need to create the parent folder and try again, for example: $profile ## see file path of the profile: $HOME\\Documents\\WindowsPowerShell\\Microsoft.PowerShell_profile.ps1 mkdir $HOME\\Documents\\WindowsPowerShell\\","title":"2. Set PowerShell variables"},{"location":"Getting-Started-Windows/#3-set-an-alias-for-the-biolockj-executable","text":"Set-Alias -Name biolockj -Value $BLJ\\script\\run-biolockj.ps1 Add-Content $profile \"Set-Alias -Name biolockj -Value $BLJ\\script\\run-biolockj.ps1\" Allow PowerShell to execute scripts on this machine: Set-ExecutionPolicy RemoteSigned Test that calling this alias makes a call to the BioLockJ program. biolockj --version biolockj --help This should show the biolockj help menu. Set an alias for the biolockj supporting tool: biolockj-api. Set-Alias -Name biolockj-api -Value $BLJ\\script\\run-biolockj-api.ps1 Add-Content $profile \"Set-Alias -Name biolockj-api -Value $BLJ\\script\\run-biolockj-api.ps1\" Test that calling this alias makes a call to the BioLockJ program. biolockj-api This should show the biolockj-api help menu.","title":"3. Set an alias for the biolockj executable."},{"location":"Getting-Started-Windows/#4-install-docker","text":"See the current instructions for installing docker on your system: https://docs.docker.com/get-started/ You'll need to be able to run the docker hello world example: docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly.","title":"4. Install docker"},{"location":"Getting-Started-Windows/#5-run-test-pipeline","text":"When you run the program, you will see a pop-up window asking for permission to share specific folders. Say yes. If that does not appear, see why doesn't my pipeline run in docker . biolockj -d $BLJ\\templates\\myFirstPipeline\\myFirstPipeline.properties # # Docker container id: 336259e7d3b8d9ab2fa71202258b562664be1bf9645d503a790ae5e9da15ce97 # Initializing BioLockJ.. # Building pipeline: /Users/joe/apps/BioLockJ/pipelines/myFirstPipeline_2020Jan17 # Fetching pipeline status # # Pipeline is complete.","title":"5. Run test pipeline"},{"location":"Getting-Started-Windows/#alternative-pure-docker","text":"In the pure-docker case, a handful of power-shell commands are used to launch a Docker container. The Docker container includes all the required software and environment, including java to run the launch process and manager process, and the required environment to run each module. This feature exists, but is still experimental. It has been shown to work anecdotally, but is not guaranteed to work. See Working in Pure Docker .","title":"Alternative: Pure-Docker"},{"location":"Getting-Started/","text":"Installation and test # Basic installation # The basic installation assumes you have java 1.8+ and a unix-like environment. Some features assume a bash shell, see Notes about environments . 1. Download the latest release & unpack the tarball. # tar -zxf BioLockJ-v*.tar.gz Save the folder wherever you like to keep executables. If you choose to download the source code, you will need to compile it by running ant with the build.xml file in the resources folder. 2. Run the install script # The install script updates the $USER bash profile to call blj_config . See Commands for a full description of blj_config cd BioLockJ ./install # Saved backup: /users/joe/.bash_profile~ # Saved profile: /users/joe/.bash_profile # BioLockJ installation complete! This will add the required variables to your path when you start your next session. exit # exit and start a new session Start a new bash session and verify that biolockj is on your $PATH . A new terminal window or a fresh log in will start a new session. biolockj --version biolockj --help 3. Run the test pipeline # echo $BLJ # /path/to/BioLockJ biolockj ${BLJ}/templates/myFirstPipeline/myFirstPipeline.properties # Initializing BioLockJ.. # Building pipeline: /Users/joe/apps/BioLockJ/pipelines/myFirstPipeline_2020Jan17 # cd-blj -> Move to pipeline output directory # Fetching pipeline status # # Pipeline is complete. Notice the use of the $BLJ variable. This variable is created by the installation process; it points to the BioLockJ folder. The myFirstPipeline project is the first in the tutorial series designed to introduce new users to the layout of a BioLockJ pipeline. You should take a moment to review your first pipeline . Docker installation # This install runs the launch process on your host machine, and the main program and all modules are run in docker. To run even the launch process in docker, see Working in Pure Docker . 1. Install docker # See the current instructions for installing docker on your system: https://docs.docker.com/get-started/ You'll need to be able to run the docker hello world example: docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly. 2. Download and install BioLockj # Follow the download and install steps in the Basic Installation download and install instructions. 3. Run the test pipeline in docker # When you run the program, you will see a pop-up window asking for permission to share specific files. Say yes. If that does not appear, see why doesn't my pipeline run in docker . biolockj --docker --blj ${BLJ}/templates/myFirstPipeline/myFirstPipeline.properties # # Docker container id: 336259e7d3b8d9ab2fa71202258b562664be1bf9645d503a790ae5e9da15ce97 # Initializing BioLockJ.. # Building pipeline: /Users/joe/apps/BioLockJ/pipelines/myFirstPipeline_2020Jan17 # Fetching pipeline status # # Pipeline is complete. You should take a moment to review your first pipeline . Cluster installation # Installing BioLockJ on a cluster follows the same process as the Basic Installation . EACH USER must run the install script in order to run the BioLockJ launch scripts. Use the property pipeline.env=cluster in your pipeline configuration to take advantage of parallell computing through the cluster. Review your first pipeline # The variable $BLJ_PROJ points to your projects folder. See a list of all of the pipelines in your projects folder. ls $BLJ_PROJ By default, $BLJ_PROJ is set to the \"pipelines\" folder in BioLockJ ( $BLJ/pipelines ). To change this, add a line to your bash_profile (or equivilent file): export BLJ_PROJ=/path/to/my/projects . This line must be after the call to the blj_config script. Look at your most recent pipeline: cd-blj This folder represents the analysis pipeline that you launched when you called biolockj on the file ${BLJ}/templates/myFirstPipeline/myFirstPipeline.properties . Notice that the original configuration (\"config\") file has been copied to this folder. Review the config file that was used to launch this pipeline: cat myFirstPipeline.properties Notice that modules are specified in the config using the keyword #BioModule . Each module in the pipeline creates a folder in the pipeline directory. Notice that an additional module \"00_ImportMetaData\" was added automatically. At the top level of the pipeline we see an empty flag file \"biolockjComplete\" which indicates that the pipeline finished successfully. While the pipeline is still in progress, the flag is \"biolockjStarted\"; and if the pipeline stops due to an error, the flag is \"biolockjFailed\". The summary.txt file is a summary of each module as it ran during pipeline execution. This is the best place to start when reviewing a pipeline. The file \"MASTER_myFirstPipeline_<DATE>.properties\" is the complete list of all properties used during this pipeline. This includes properties that were set in the primary config file (\"myFirstPipeline.properties\"), and properties that are set as defaults in the BioLockJ program, and properties that are set in user-supplied default config files, which are specified in the primary config file using the pipeline.defaultProps= property. This \"MASTER_*.properties\" file contains all of the settings required to reproduce this pipeline. If the pipeline was run using docker, a file named dockerInfo.json will show the container information. The pipeline log file \"myFirstPipeline_<DATE>.log\" is an excellent resource for troubleshooting. The validation has tables recording the MD5 sum for each output from each module. If the pipeline is run again, this folder can be used to determine if the results in the new run are an exact match for this run. Within each module's folder, we see the \"biolockjComplete\" flag (the same flags are used in modules and at the top level). All output-producing modules have a subfolder called output . Most modules also have folders script and temp . The output folder is used as input to down-stream modules. Modules are the building blocks of pipelines. For more information about modules, see Built-in BioModules . Making your own pipeline # Now that you have a working example, you can make your own pipeline. You may want to modify the example above, or look at others under /templates . Things are seldom perfect the first time. Its safe to assume you will make iterative changes to your pipeline configuration. BioLockJ offers some tools to facilitate this process. Check your pipeline using precheck mode Add modules onto your partial pipeline using restart Look through the base set of modules and even create your own A recommended practice is to make a subset of your data, and use that to develop your pipeline. Installing multiple versions on a single machine # You may want to have multiple versions of BioLockJ on the same machine. Create a folder to hold all versions, suggested name: BioLockJ_versions . In that folder, download and unpack the latest version of BioLockJ. This creates a folder called \"BioLockJ\". Rename that folder to include the version name (recommended: match the name of the downloaded file), for example: mv BioLockJ BioLockJ-v1.3.13 . Create a symbolic link: ln -s /full/path/to/BioLockJ_versions/BioLockJ-v1.3.13 current cd into the \"current\" folder, and run the install script. This will set the $BLJ variable in your profile to point to the this link. Download and unpack another release of BioLockJ in the BioLockJ_versions folder, repeating the process from above. Now you have two versions of BioLockJ, a BLJ variable in your profile that points to a current symbolic link. On systems with multiple users, an administrator should control the \"group default\" version by controlling the current link, and individual users can change their BLJ variable to point to the current group default or to a specific version. On single-user systems, you can edit a link or the variable, whichever is more convenient. All new versions of BioLockJ should be installed in the same BioLockJ_versions folder, in the same way. Other notes for starting out # Install any/all software Dependencies required by the modules you wish to include in your pipeline. BioLockJ is a pipeline manager, designed to integrate and manage external tools. These external tools are not packaged into the BioLockJ program. BioLockJ must run in an environment where these other tools have been installed OR run through docker using the docker images that have the tools installed. The core program, and all modules packaged with it, have corresponding docker images. BioLockJ will shut down appropriately on its own when a pipeline either completes or fails. Sometimes , it is necessary to shut down the program pre-maturely. See the FAQ page for more details.","title":"Getting Started"},{"location":"Getting-Started/#installation-and-test","text":"","title":"Installation and test"},{"location":"Getting-Started/#basic-installation","text":"The basic installation assumes you have java 1.8+ and a unix-like environment. Some features assume a bash shell, see Notes about environments .","title":"Basic installation"},{"location":"Getting-Started/#1-download-the-latest-release-unpack-the-tarball","text":"tar -zxf BioLockJ-v*.tar.gz Save the folder wherever you like to keep executables. If you choose to download the source code, you will need to compile it by running ant with the build.xml file in the resources folder.","title":"1. Download the latest release &amp; unpack the tarball."},{"location":"Getting-Started/#2-run-the-install-script","text":"The install script updates the $USER bash profile to call blj_config . See Commands for a full description of blj_config cd BioLockJ ./install # Saved backup: /users/joe/.bash_profile~ # Saved profile: /users/joe/.bash_profile # BioLockJ installation complete! This will add the required variables to your path when you start your next session. exit # exit and start a new session Start a new bash session and verify that biolockj is on your $PATH . A new terminal window or a fresh log in will start a new session. biolockj --version biolockj --help","title":"2. Run the install script"},{"location":"Getting-Started/#3-run-the-test-pipeline","text":"echo $BLJ # /path/to/BioLockJ biolockj ${BLJ}/templates/myFirstPipeline/myFirstPipeline.properties # Initializing BioLockJ.. # Building pipeline: /Users/joe/apps/BioLockJ/pipelines/myFirstPipeline_2020Jan17 # cd-blj -> Move to pipeline output directory # Fetching pipeline status # # Pipeline is complete. Notice the use of the $BLJ variable. This variable is created by the installation process; it points to the BioLockJ folder. The myFirstPipeline project is the first in the tutorial series designed to introduce new users to the layout of a BioLockJ pipeline. You should take a moment to review your first pipeline .","title":"3. Run the test pipeline"},{"location":"Getting-Started/#docker-installation","text":"This install runs the launch process on your host machine, and the main program and all modules are run in docker. To run even the launch process in docker, see Working in Pure Docker .","title":"Docker installation"},{"location":"Getting-Started/#1-install-docker","text":"See the current instructions for installing docker on your system: https://docs.docker.com/get-started/ You'll need to be able to run the docker hello world example: docker run hello-world # Hello from Docker! # This message shows that your installation appears to be working correctly.","title":"1. Install docker"},{"location":"Getting-Started/#2-download-and-install-biolockj","text":"Follow the download and install steps in the Basic Installation download and install instructions.","title":"2. Download and install BioLockj"},{"location":"Getting-Started/#3-run-the-test-pipeline-in-docker","text":"When you run the program, you will see a pop-up window asking for permission to share specific files. Say yes. If that does not appear, see why doesn't my pipeline run in docker . biolockj --docker --blj ${BLJ}/templates/myFirstPipeline/myFirstPipeline.properties # # Docker container id: 336259e7d3b8d9ab2fa71202258b562664be1bf9645d503a790ae5e9da15ce97 # Initializing BioLockJ.. # Building pipeline: /Users/joe/apps/BioLockJ/pipelines/myFirstPipeline_2020Jan17 # Fetching pipeline status # # Pipeline is complete. You should take a moment to review your first pipeline .","title":"3. Run the test pipeline in docker"},{"location":"Getting-Started/#cluster-installation","text":"Installing BioLockJ on a cluster follows the same process as the Basic Installation . EACH USER must run the install script in order to run the BioLockJ launch scripts. Use the property pipeline.env=cluster in your pipeline configuration to take advantage of parallell computing through the cluster.","title":"Cluster installation"},{"location":"Getting-Started/#review-your-first-pipeline","text":"The variable $BLJ_PROJ points to your projects folder. See a list of all of the pipelines in your projects folder. ls $BLJ_PROJ By default, $BLJ_PROJ is set to the \"pipelines\" folder in BioLockJ ( $BLJ/pipelines ). To change this, add a line to your bash_profile (or equivilent file): export BLJ_PROJ=/path/to/my/projects . This line must be after the call to the blj_config script. Look at your most recent pipeline: cd-blj This folder represents the analysis pipeline that you launched when you called biolockj on the file ${BLJ}/templates/myFirstPipeline/myFirstPipeline.properties . Notice that the original configuration (\"config\") file has been copied to this folder. Review the config file that was used to launch this pipeline: cat myFirstPipeline.properties Notice that modules are specified in the config using the keyword #BioModule . Each module in the pipeline creates a folder in the pipeline directory. Notice that an additional module \"00_ImportMetaData\" was added automatically. At the top level of the pipeline we see an empty flag file \"biolockjComplete\" which indicates that the pipeline finished successfully. While the pipeline is still in progress, the flag is \"biolockjStarted\"; and if the pipeline stops due to an error, the flag is \"biolockjFailed\". The summary.txt file is a summary of each module as it ran during pipeline execution. This is the best place to start when reviewing a pipeline. The file \"MASTER_myFirstPipeline_<DATE>.properties\" is the complete list of all properties used during this pipeline. This includes properties that were set in the primary config file (\"myFirstPipeline.properties\"), and properties that are set as defaults in the BioLockJ program, and properties that are set in user-supplied default config files, which are specified in the primary config file using the pipeline.defaultProps= property. This \"MASTER_*.properties\" file contains all of the settings required to reproduce this pipeline. If the pipeline was run using docker, a file named dockerInfo.json will show the container information. The pipeline log file \"myFirstPipeline_<DATE>.log\" is an excellent resource for troubleshooting. The validation has tables recording the MD5 sum for each output from each module. If the pipeline is run again, this folder can be used to determine if the results in the new run are an exact match for this run. Within each module's folder, we see the \"biolockjComplete\" flag (the same flags are used in modules and at the top level). All output-producing modules have a subfolder called output . Most modules also have folders script and temp . The output folder is used as input to down-stream modules. Modules are the building blocks of pipelines. For more information about modules, see Built-in BioModules .","title":"Review your first pipeline"},{"location":"Getting-Started/#making-your-own-pipeline","text":"Now that you have a working example, you can make your own pipeline. You may want to modify the example above, or look at others under /templates . Things are seldom perfect the first time. Its safe to assume you will make iterative changes to your pipeline configuration. BioLockJ offers some tools to facilitate this process. Check your pipeline using precheck mode Add modules onto your partial pipeline using restart Look through the base set of modules and even create your own A recommended practice is to make a subset of your data, and use that to develop your pipeline.","title":"Making your own pipeline"},{"location":"Getting-Started/#installing-multiple-versions-on-a-single-machine","text":"You may want to have multiple versions of BioLockJ on the same machine. Create a folder to hold all versions, suggested name: BioLockJ_versions . In that folder, download and unpack the latest version of BioLockJ. This creates a folder called \"BioLockJ\". Rename that folder to include the version name (recommended: match the name of the downloaded file), for example: mv BioLockJ BioLockJ-v1.3.13 . Create a symbolic link: ln -s /full/path/to/BioLockJ_versions/BioLockJ-v1.3.13 current cd into the \"current\" folder, and run the install script. This will set the $BLJ variable in your profile to point to the this link. Download and unpack another release of BioLockJ in the BioLockJ_versions folder, repeating the process from above. Now you have two versions of BioLockJ, a BLJ variable in your profile that points to a current symbolic link. On systems with multiple users, an administrator should control the \"group default\" version by controlling the current link, and individual users can change their BLJ variable to point to the current group default or to a specific version. On single-user systems, you can edit a link or the variable, whichever is more convenient. All new versions of BioLockJ should be installed in the same BioLockJ_versions folder, in the same way.","title":"Installing multiple versions on a single machine"},{"location":"Getting-Started/#other-notes-for-starting-out","text":"Install any/all software Dependencies required by the modules you wish to include in your pipeline. BioLockJ is a pipeline manager, designed to integrate and manage external tools. These external tools are not packaged into the BioLockJ program. BioLockJ must run in an environment where these other tools have been installed OR run through docker using the docker images that have the tools installed. The core program, and all modules packaged with it, have corresponding docker images. BioLockJ will shut down appropriately on its own when a pipeline either completes or fails. Sometimes , it is necessary to shut down the program pre-maturely. See the FAQ page for more details.","title":"Other notes for starting out"},{"location":"Pure-Docker/","text":"Pure Docker (experimental) # This option is still in the experimental stages. If you are running from any system that supports docker, you can run all commands through docker containers. This assumes that you have docker up and running. Double check that docker is working on your system: docker run hello-world For Windows systems, you will need to run PowerShell as administrator . For all systems, notice that many of these commands require a full path. Here we use $PWD so that the commands in code blocks can be copy/pasted. Be mindful of your current working directory. Step 1: # In powershell or terminal, navigate to a directory where you would like to store all of your BioLockJ materials, and enter the command below. Docker may prompt you to grant permission to access the workspace/ folder. If docker does not allow you to map in the folder, and does not prompt you, you may need to open the file sharing section of your docker preferences and add this folder to the list of files docker is allowed to share. Mac / unix mkdir workspace docker run --rm \\ -v /var/run/docker.sock:/var/run/docker.sock \\ -v $PWD/workspace:/workspace \\ -e HOST_OS_SCRIPT=bash \\ biolockjdevteam/biolockj_controller:latest setup_workspace PowerShell mkdir workspace docker run --rm ` -v /var/run/docker.sock:/var/run/docker.sock ` -v $PWD\\workspace:/workspace ` -e HOST_OS_SCRIPT=ps1 ` biolockjdevteam/biolockj_controller:latest setup_workspace This will create your docker preamble command. The docker preamble passes your biolockj commands into a docker environment. The command is saved as a docker-wrapper script. Step 2: # Make your system treat the docker-wrapper script as an executable file. You could choose to name the alias something other than \"biolockj\" if you also call the biolockj command locally and want to avoid ambiguity. Notice that these commands use $PWD with the assumption that your working directory has not changed since step 1. Mac / unix Depending on your system, you may use ~/.bash_profile or ~/.bashrc or ~/.zshrc , etc. echo 'PATH='\"$PWD\"'/workspace/script:$PATH' >> ~/.bash_profile (optional) Aliases are not universally supported; but where they are supported they are convenient and you will be able to copy/paste example commands that designed for local system calls. echo 'alias biolockj='\"$PWD\"'/workspace/script/docker-biolockj' >> ~/.bash_profile echo 'alias biolockj-api='\"$PWD\"'/workspace/script/docker-biolockj-api' >> ~/.bash_profile Start a new session, or simply source your profile: . ~/.bash_profile PowerShell Allow powershell to execute scripts created on this machine: Set-ExecutionPolicy RemoteSigned This only affects the current session: Set-Alias -Name biolockj -Value $PWD\\workspace\\script\\docker-biolockj.ps1 Set-Alias -Name biolockj-api -Value $PWD\\workspace\\script\\docker-biolockj-api.ps1 This makes the command available to future sessions: Add-Content $profile \"Set-Alias -Name biolockj -Value $PWD\\workspace\\script\\docker-biolockj.ps1\" Add-Content $profile \"Set-Alias -Name biolockj-api -Value $PWD\\workspace\\script\\docker-biolockj-api.ps1\" If that command throws an error \u201ccould not find part of the path\u201d, you may need to create the parent folder and try again, for example: mkdir $HOME\\Documents\\WindowsPowerShell\\ Step 3: (optional) # Run the test pipeline. Mac / unix AND PowerShell This command does NOT use $PWD. If you completed step 2, then this is how the command will look regardless of your system, or where you are in the file system. Notice that the path to the property file is given from the /workspace/, and all file-separators are \u201c/\u201d. biolockj /workspace/templates/myFirstPipeline/myFirstPipeline.properties If you did not complete step 2, then you will need to give the full path to the docker-wrapper, or even copy the command from there to the terminal. Ever after: # Use biolockj exactly the way that it is described in the rest the documentation WITH THESE EXCEPTIONS: The path to your properties file (or restart dir) given in your biolockj command must be under /workspace/ (the workspace directory you created in step 1). The path can be given by starting with \"/workspace/\" or by using the 'dockerified' absolute file path. If you used a different name for your alias in step 2 (or chose not use the alias at all), you will call the docker-biolockj script (or docker-biolockj.ps1 script) in place of \u201cbiolockj\u201d. File paths in your property file CAN use: (recommended) The \u201c./\u201d relative path format relative to the property file's directory. Relative paths should always use / as the file separator, regardless of host system. For files in the project folder, file paths given in this format will work consistenetly when the pipeline is run locally, in docker, or on the cloud; and when the project is copied to a different location in the file system or to a different machine. Full file paths using dockerified file paths: (mac) /Users/user/Documents/file.txt (windows) /host_mnt/c/Users/user/Documents/file.txt Full file paths using your native system (work in progress) (mac) /Users/user/Documents/file.txt (windows) C:\\Users\\user\\Documents\\file.txt Variables that are defined in the property file. VAR=/Users/user/Documents/file.txt file.path=${VAR}/data/file.txt Tips for the pure-docker user: # Your property file may include file paths that are not under the project, or even under the workspace directory; but you will need to make sure that docker is configured to share those folders. It is generally simpler to keep everything under the workspace folder. Use biolockj-api listMounts --config /workspace/<path/to/file> to see what is going to be mapped in based on a given property file. If any paths are missing, you may need to add them to your file sharing preferences. Developers working in pure-docker: # After making changes to the source code, build the program by building the docker image. Use $BLJ/resources/docker/docker_build_scripts/buildDockerImages.sh controller if you can use that script, or copy/paste the build commands from it. This will make a local copy of the docker image with the compiled changes. The image will be tagged with the current development version number (one ahead of the current public release). This is independent of any build you might have on your local system.","title":"Pure Docker"},{"location":"Pure-Docker/#pure-docker-experimental","text":"This option is still in the experimental stages. If you are running from any system that supports docker, you can run all commands through docker containers. This assumes that you have docker up and running. Double check that docker is working on your system: docker run hello-world For Windows systems, you will need to run PowerShell as administrator . For all systems, notice that many of these commands require a full path. Here we use $PWD so that the commands in code blocks can be copy/pasted. Be mindful of your current working directory.","title":"Pure Docker (experimental)"},{"location":"Pure-Docker/#step-1","text":"In powershell or terminal, navigate to a directory where you would like to store all of your BioLockJ materials, and enter the command below. Docker may prompt you to grant permission to access the workspace/ folder. If docker does not allow you to map in the folder, and does not prompt you, you may need to open the file sharing section of your docker preferences and add this folder to the list of files docker is allowed to share. Mac / unix mkdir workspace docker run --rm \\ -v /var/run/docker.sock:/var/run/docker.sock \\ -v $PWD/workspace:/workspace \\ -e HOST_OS_SCRIPT=bash \\ biolockjdevteam/biolockj_controller:latest setup_workspace PowerShell mkdir workspace docker run --rm ` -v /var/run/docker.sock:/var/run/docker.sock ` -v $PWD\\workspace:/workspace ` -e HOST_OS_SCRIPT=ps1 ` biolockjdevteam/biolockj_controller:latest setup_workspace This will create your docker preamble command. The docker preamble passes your biolockj commands into a docker environment. The command is saved as a docker-wrapper script.","title":"Step 1:"},{"location":"Pure-Docker/#step-2","text":"Make your system treat the docker-wrapper script as an executable file. You could choose to name the alias something other than \"biolockj\" if you also call the biolockj command locally and want to avoid ambiguity. Notice that these commands use $PWD with the assumption that your working directory has not changed since step 1. Mac / unix Depending on your system, you may use ~/.bash_profile or ~/.bashrc or ~/.zshrc , etc. echo 'PATH='\"$PWD\"'/workspace/script:$PATH' >> ~/.bash_profile (optional) Aliases are not universally supported; but where they are supported they are convenient and you will be able to copy/paste example commands that designed for local system calls. echo 'alias biolockj='\"$PWD\"'/workspace/script/docker-biolockj' >> ~/.bash_profile echo 'alias biolockj-api='\"$PWD\"'/workspace/script/docker-biolockj-api' >> ~/.bash_profile Start a new session, or simply source your profile: . ~/.bash_profile PowerShell Allow powershell to execute scripts created on this machine: Set-ExecutionPolicy RemoteSigned This only affects the current session: Set-Alias -Name biolockj -Value $PWD\\workspace\\script\\docker-biolockj.ps1 Set-Alias -Name biolockj-api -Value $PWD\\workspace\\script\\docker-biolockj-api.ps1 This makes the command available to future sessions: Add-Content $profile \"Set-Alias -Name biolockj -Value $PWD\\workspace\\script\\docker-biolockj.ps1\" Add-Content $profile \"Set-Alias -Name biolockj-api -Value $PWD\\workspace\\script\\docker-biolockj-api.ps1\" If that command throws an error \u201ccould not find part of the path\u201d, you may need to create the parent folder and try again, for example: mkdir $HOME\\Documents\\WindowsPowerShell\\","title":"Step 2:"},{"location":"Pure-Docker/#step-3-optional","text":"Run the test pipeline. Mac / unix AND PowerShell This command does NOT use $PWD. If you completed step 2, then this is how the command will look regardless of your system, or where you are in the file system. Notice that the path to the property file is given from the /workspace/, and all file-separators are \u201c/\u201d. biolockj /workspace/templates/myFirstPipeline/myFirstPipeline.properties If you did not complete step 2, then you will need to give the full path to the docker-wrapper, or even copy the command from there to the terminal.","title":"Step 3: (optional)"},{"location":"Pure-Docker/#ever-after","text":"Use biolockj exactly the way that it is described in the rest the documentation WITH THESE EXCEPTIONS: The path to your properties file (or restart dir) given in your biolockj command must be under /workspace/ (the workspace directory you created in step 1). The path can be given by starting with \"/workspace/\" or by using the 'dockerified' absolute file path. If you used a different name for your alias in step 2 (or chose not use the alias at all), you will call the docker-biolockj script (or docker-biolockj.ps1 script) in place of \u201cbiolockj\u201d. File paths in your property file CAN use: (recommended) The \u201c./\u201d relative path format relative to the property file's directory. Relative paths should always use / as the file separator, regardless of host system. For files in the project folder, file paths given in this format will work consistenetly when the pipeline is run locally, in docker, or on the cloud; and when the project is copied to a different location in the file system or to a different machine. Full file paths using dockerified file paths: (mac) /Users/user/Documents/file.txt (windows) /host_mnt/c/Users/user/Documents/file.txt Full file paths using your native system (work in progress) (mac) /Users/user/Documents/file.txt (windows) C:\\Users\\user\\Documents\\file.txt Variables that are defined in the property file. VAR=/Users/user/Documents/file.txt file.path=${VAR}/data/file.txt","title":"Ever after:"},{"location":"Pure-Docker/#tips-for-the-pure-docker-user","text":"Your property file may include file paths that are not under the project, or even under the workspace directory; but you will need to make sure that docker is configured to share those folders. It is generally simpler to keep everything under the workspace folder. Use biolockj-api listMounts --config /workspace/<path/to/file> to see what is going to be mapped in based on a given property file. If any paths are missing, you may need to add them to your file sharing preferences.","title":"Tips for the pure-docker user:"},{"location":"Pure-Docker/#developers-working-in-pure-docker","text":"After making changes to the source code, build the program by building the docker image. Use $BLJ/resources/docker/docker_build_scripts/buildDockerImages.sh controller if you can use that script, or copy/paste the build commands from it. This will make a local copy of the docker image with the compiled changes. The image will be tagged with the current development version number (one ahead of the current public release). This is independent of any build you might have on your local system.","title":"Developers working in pure-docker:"},{"location":"Supported-Environments/","text":"Notes about environments # The main BioLockJ program can be used in these environments: a local machine with a unix-like system (ie, linux or Mac, some features require a bash shell) a local machine with a unix-like system (See tested environments ) running docker * (coming soon) Windows 10 running docker * a cluster, running a supported scheduler such as torque (coming soon) any machine running docker (see Working in Pure Docker ) (* The launch scripts will still be run from your local machine, this requires java, but not a bash shell) The launch process requires a unix-like environment. This includes linux, macOS, or an ubuntu environment running on Windows. Windows support is still in development, see BioLockJ on Windows . If using docker , you will need to run the install script to create the variables used by the launch scripts, even though the main BioLockJ program will run within the biolockj_controller container. If using AWS , you will need to run the install script to create the variables used by the launch scripts, even though the main BioLockJ program will run on AWS. This is still experimental. If you are using BioLockJ on a shared system where another user has already installed BioLockJ, you will need to run the install script to create the required variables in your own user profile. There is also the option to run purely in docker, without installing even the launch scripts on your local machine. However this is considered a niche case scenario and not well supported. The helper commands (such as cd-blj ) assume a bash shell, though others may also work. To see what shell you currently using, run echo $0 . If you are not in a bash shell, you can change your current session to a bash shell, run chsh -s /bin/bash . Choosing an environment # The major resources that come together in a pipeline are: data (project data and reference data) compute resources (memory, ram, cpu) key executables In theory, you could install all the tools you need on your laptop; put your data on your laptop, and run your whole analysis on your laptop. This would be a \"local\" pipeline; a single compute node is handling everything. However, in practice, a single machine typically doesn't have enough compute resources to run a modern bioinformatics pipeline in a realistic time frame; and the tools may be difficult to install, or even impossible to install on a given system. Docker provides key executables by packaging them into containers. After the initial hurdle of installing docker itself, the 'install' of executables that are available in docker images is trivial, and they produce very consistent results; even when different steps in your pipeline have conflicting system requirements. The underlying tools for all modules packaged with the main BioLockJ program are available via docker containers. Docker is the most recommended way to run a pipeline. However, these executables still have to come together with some compute resources. A computer cluster offers large amounts of compute resources and plenty of storage. Some clusters also have administrators (or other users) who will install tools for you and mechanisms for you to install tools yourself. Downsides: cluster systems have their own idiosyncrasies and not everyone has access to one. AWS provides large amounts of compute resources and interfaces very well with docker and uses S3 for convenient and efficient data storage. Downsides: costs money for each use ; has its own learning curve. Tested environments # We try make our software as system-agnostic as possible; but it is impossible to verify every possible stack of hardware / operating system / software. We test multiple environments for each release, and we try to represent the resources of our user base. Release Testing # The most recent release testing was done on: locally on Mac OS Version 10.15.7 (2.2 GHz 6-Core Intel Core i7, 16 GB 2400 MHz DDR4) through docker on Mac OS Version 10.15.7 (same as above), using docker desktop Version 2.5.0.0 locally and as a cluster using Red Hat Enterprise Linux Server 7.5 (Maipo) through docker on Windows Version 1909 using docker desktop version 3.0.0 and Ubuntu 20.04.1 using WSL2 Anecdotal tests # BioLockJ has run successfully on: locally on Mac OS Version 10.15.5 (1.7 GHz Quad-Core Intel Core i7, 16 GB 2133 MHz LPDDR3) through docker on Mac OS Version 11.1 (2.3GHz Quad-Core Intel Core i5, 8 GB 2133MHz LPDDR3) with docker Version 3.0.3 through docker on Windows 10 Education version 1909; Intel Core-i7 6700K CPU @ 4.00 GHZ (hand-built box); ubuntu 20.04 and docker 20.10.0","title":"Environments"},{"location":"Supported-Environments/#notes-about-environments","text":"The main BioLockJ program can be used in these environments: a local machine with a unix-like system (ie, linux or Mac, some features require a bash shell) a local machine with a unix-like system (See tested environments ) running docker * (coming soon) Windows 10 running docker * a cluster, running a supported scheduler such as torque (coming soon) any machine running docker (see Working in Pure Docker ) (* The launch scripts will still be run from your local machine, this requires java, but not a bash shell) The launch process requires a unix-like environment. This includes linux, macOS, or an ubuntu environment running on Windows. Windows support is still in development, see BioLockJ on Windows . If using docker , you will need to run the install script to create the variables used by the launch scripts, even though the main BioLockJ program will run within the biolockj_controller container. If using AWS , you will need to run the install script to create the variables used by the launch scripts, even though the main BioLockJ program will run on AWS. This is still experimental. If you are using BioLockJ on a shared system where another user has already installed BioLockJ, you will need to run the install script to create the required variables in your own user profile. There is also the option to run purely in docker, without installing even the launch scripts on your local machine. However this is considered a niche case scenario and not well supported. The helper commands (such as cd-blj ) assume a bash shell, though others may also work. To see what shell you currently using, run echo $0 . If you are not in a bash shell, you can change your current session to a bash shell, run chsh -s /bin/bash .","title":"Notes about environments"},{"location":"Supported-Environments/#choosing-an-environment","text":"The major resources that come together in a pipeline are: data (project data and reference data) compute resources (memory, ram, cpu) key executables In theory, you could install all the tools you need on your laptop; put your data on your laptop, and run your whole analysis on your laptop. This would be a \"local\" pipeline; a single compute node is handling everything. However, in practice, a single machine typically doesn't have enough compute resources to run a modern bioinformatics pipeline in a realistic time frame; and the tools may be difficult to install, or even impossible to install on a given system. Docker provides key executables by packaging them into containers. After the initial hurdle of installing docker itself, the 'install' of executables that are available in docker images is trivial, and they produce very consistent results; even when different steps in your pipeline have conflicting system requirements. The underlying tools for all modules packaged with the main BioLockJ program are available via docker containers. Docker is the most recommended way to run a pipeline. However, these executables still have to come together with some compute resources. A computer cluster offers large amounts of compute resources and plenty of storage. Some clusters also have administrators (or other users) who will install tools for you and mechanisms for you to install tools yourself. Downsides: cluster systems have their own idiosyncrasies and not everyone has access to one. AWS provides large amounts of compute resources and interfaces very well with docker and uses S3 for convenient and efficient data storage. Downsides: costs money for each use ; has its own learning curve.","title":"Choosing an environment"},{"location":"Supported-Environments/#tested-environments","text":"We try make our software as system-agnostic as possible; but it is impossible to verify every possible stack of hardware / operating system / software. We test multiple environments for each release, and we try to represent the resources of our user base.","title":"Tested environments"},{"location":"Supported-Environments/#release-testing","text":"The most recent release testing was done on: locally on Mac OS Version 10.15.7 (2.2 GHz 6-Core Intel Core i7, 16 GB 2400 MHz DDR4) through docker on Mac OS Version 10.15.7 (same as above), using docker desktop Version 2.5.0.0 locally and as a cluster using Red Hat Enterprise Linux Server 7.5 (Maipo) through docker on Windows Version 1909 using docker desktop version 3.0.0 and Ubuntu 20.04.1 using WSL2","title":"Release Testing"},{"location":"Supported-Environments/#anecdotal-tests","text":"BioLockJ has run successfully on: locally on Mac OS Version 10.15.5 (1.7 GHz Quad-Core Intel Core i7, 16 GB 2133 MHz LPDDR3) through docker on Mac OS Version 11.1 (2.3GHz Quad-Core Intel Core i5, 8 GB 2133MHz LPDDR3) with docker Version 3.0.3 through docker on Windows 10 Education version 1909; Intel Core-i7 6700K CPU @ 4.00 GHZ (hand-built box); ubuntu 20.04 and docker 20.10.0","title":"Anecdotal tests"},{"location":"help-biolockj/","text":"The biolockj help menu: biolockj --help BioLockJ v1.2.6-dev - UNCC Fodor Lab July 2018 Usage: biolockj [options] <config|pipeline> Options: -v --version Show version -h --help Show help menu -p --precheck-only Set up pipeline and check dependencies and then STOP; do not execute the pipeline. This is helpful when testing edits to config files. -r --restart Resume an existing pipeline -c --config-override <file> New config file (if restarting a pipeline) --password <password> Encrypt password -d --docker Run in docker -a --aws Run on aws -g --gui Start the BioLockJ GUI -f --foreground Run the java process in the foreground without nohup -w --wait-for-start Do not release terminal until pipeline completes check-dependencies step. --external-modules <dir> Directory with compiled java code giving additional modules --blj Map $BLJ folder into the docker container; this replaces BioLockJ packaged in a docker container with the local copy. -e --env-var <var=val> Environment variables to be passed to the BioLockJ environment. Can be a comma-sep list. Values take the form: a=foo,b=bar,c=baz --blj_proj <dir> Directory that contains BioLockJ pipelines. If not supplied, biolockj will use the value of environment variable \"BLJ_PROJ\".","title":"Help biolockj"},{"location":"GENERATED/BioLockJ-Api/","text":"BioLockJ API # BioLockJ comes with an API. For the most up-to-date information about how to use the API, see the help menu: biolockj-api help BioLockJ API v1.3.16 - UNCC Fodor Lab Usage: (bash) biolockj-api <query> [options...] (java) java -cp /path/to/BioLockJ.jar biolockj.api.BioLockJ_API <query> [options...] For some uses, redirecting stderr is recommended: biolockj-api <query> [options...] 2> /dev/null Options shown in [ ] are optional for a given query. Use biolockj-api without args to get help menu. Options: --external-modules <dir> path to a directory containing additional modules --module <module_path> class path for a specific module --property <property> a specific property --value <value> a vlue to use for a specific property --config <file> file path for a configuration file giving one or more property values --verbose true flag indicating that all messages should go to standard err, including some that are typically disabled. query: last-pipeline Returns the path to the most recent pipeline. listModules [ --external-modules <dir> ] Returns a list of classpaths to the classes that extend BioModule. listApiModules [--external-modules <dir> ] Like listModules but limit list to modules that implement the ApiModule interface. listProps [ --module <module_path> ] Returns a list of properties. If no args, it returns the list of properties used by the BioLockJ backbone. If a modules is given, then it returns a list of all properties used by that module. listAllProps [ --external-modules <dir> ] Returns a list of all properties, include all backbone properties and all module properties. Optionally supply the path to a directory containing additional modules to include their properties. propType --property <property> [ --module <module_path> [ --external-modules <dir> ] ] Returns the type expected for the property: String, list, integer, positive number, etc. If a module is supplied, then the modules propType method is used. describeProp --property <property> [ --module <module_path> [ --external-modules <dir> ] ] Returns a description of the property. If a module is supplied, then the modules getDescription method is used. propValue --property <property> [ --config <file> ] [ --module <module_path> ] Returns the value for that property given that config file (optional) or no config file (ie the default value) isValidProp --property <property> --value <value> [ --module <module_path> [--external-modules <dir>] ] T/F/NA. Returns true if the value (val) for the property (prop) is valid; false if prop is a property but val is not a valid value, and NA if prop is not a recognized property. IF a module is supplied, then additionally call the validateProp(key, value) for that module, or for EACH module if a comma-separated list is given. propInfo Returns a json formatted list of the general properties (listProps) with the type, descrption and default for each property moduleInfo [--external-modules <dir>] Returns a json formatted list of all modules and for each module that implements the ApiModule interface, it lists the props used by the module, and for each prop the type, descrption and default. listMounts --config <file> Returns a list of directories that would need to be mounted in order for the files listed in the config file to be available to a pipeline running in docker. listUploads --config <file> Returns a list of file and directories that would need to be uploaded in order for the files listed in the config file to be available to a pipeline running in the cloud. help (or no args) Print help menu. listModules and listApiModules are nearly identical. The methods that allow the API to interface with modules are in the ApiModule interface, not all BioModules implement that interface. Once all of the build-in modules have those methods, then these two functions will be identical; the BioModule interface will absorb the ApiModule interface, and listApiModules will be depricated. listAllProps is the union of all possible output from listProps . propInfo returns information equivelent to calling biolockj_api listProps and creating a for-loop to call biolockj_api propType $PROP , biolockj_api describeProp $PROP and biolockj_api propValue $PROP for each PROP in the list. moduleInfo returns information equivelent to calling biolockj_api listModules and creating a for-loop to call biolockj_api listProps $MODULE and for each of its properties.","title":"BioLockJ API"},{"location":"GENERATED/BioLockJ-Api/#biolockj-api","text":"BioLockJ comes with an API. For the most up-to-date information about how to use the API, see the help menu: biolockj-api help BioLockJ API v1.3.16 - UNCC Fodor Lab Usage: (bash) biolockj-api <query> [options...] (java) java -cp /path/to/BioLockJ.jar biolockj.api.BioLockJ_API <query> [options...] For some uses, redirecting stderr is recommended: biolockj-api <query> [options...] 2> /dev/null Options shown in [ ] are optional for a given query. Use biolockj-api without args to get help menu. Options: --external-modules <dir> path to a directory containing additional modules --module <module_path> class path for a specific module --property <property> a specific property --value <value> a vlue to use for a specific property --config <file> file path for a configuration file giving one or more property values --verbose true flag indicating that all messages should go to standard err, including some that are typically disabled. query: last-pipeline Returns the path to the most recent pipeline. listModules [ --external-modules <dir> ] Returns a list of classpaths to the classes that extend BioModule. listApiModules [--external-modules <dir> ] Like listModules but limit list to modules that implement the ApiModule interface. listProps [ --module <module_path> ] Returns a list of properties. If no args, it returns the list of properties used by the BioLockJ backbone. If a modules is given, then it returns a list of all properties used by that module. listAllProps [ --external-modules <dir> ] Returns a list of all properties, include all backbone properties and all module properties. Optionally supply the path to a directory containing additional modules to include their properties. propType --property <property> [ --module <module_path> [ --external-modules <dir> ] ] Returns the type expected for the property: String, list, integer, positive number, etc. If a module is supplied, then the modules propType method is used. describeProp --property <property> [ --module <module_path> [ --external-modules <dir> ] ] Returns a description of the property. If a module is supplied, then the modules getDescription method is used. propValue --property <property> [ --config <file> ] [ --module <module_path> ] Returns the value for that property given that config file (optional) or no config file (ie the default value) isValidProp --property <property> --value <value> [ --module <module_path> [--external-modules <dir>] ] T/F/NA. Returns true if the value (val) for the property (prop) is valid; false if prop is a property but val is not a valid value, and NA if prop is not a recognized property. IF a module is supplied, then additionally call the validateProp(key, value) for that module, or for EACH module if a comma-separated list is given. propInfo Returns a json formatted list of the general properties (listProps) with the type, descrption and default for each property moduleInfo [--external-modules <dir>] Returns a json formatted list of all modules and for each module that implements the ApiModule interface, it lists the props used by the module, and for each prop the type, descrption and default. listMounts --config <file> Returns a list of directories that would need to be mounted in order for the files listed in the config file to be available to a pipeline running in docker. listUploads --config <file> Returns a list of file and directories that would need to be uploaded in order for the files listed in the config file to be available to a pipeline running in the cloud. help (or no args) Print help menu. listModules and listApiModules are nearly identical. The methods that allow the API to interface with modules are in the ApiModule interface, not all BioModules implement that interface. Once all of the build-in modules have those methods, then these two functions will be identical; the BioModule interface will absorb the ApiModule interface, and listApiModules will be depricated. listAllProps is the union of all possible output from listProps . propInfo returns information equivelent to calling biolockj_api listProps and creating a for-loop to call biolockj_api propType $PROP , biolockj_api describeProp $PROP and biolockj_api propValue $PROP for each PROP in the list. moduleInfo returns information equivelent to calling biolockj_api listModules and creating a for-loop to call biolockj_api listProps $MODULE and for each of its properties.","title":"BioLockJ API"},{"location":"GENERATED/Cluster/","text":"cluster properties # The cluster.* properties are ONLY relevant if pipeline.env=cluster . BioLockJ was originally designed to optimize effeciency on a cluster system, specifically one with a torque scheduler. We recomend chaining configuration properties across multiple files. The cluster.* properties would go in the configuration file for you environement. Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.host string The remote cluster host URL (used for ssh, scp, rsync, etc) default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.returnsBatchIds boolean Options Y/N. Does the cluster.batchCommand return a job id. If Y, if a job is submitted and no id is returned, that will be treated as a failure and the pipeline will stop. default: Y cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null For example, the following values are used for a torque scheduler in the file: cluster.properties pipeline.env=cluster cluster.batchCommand=qsub -q copperhead cluster.host=hpc.uncc.edu cluster.statusCommand=qstat Each project's indicidule configuration file includes pipeline.defaultProps = cluster.properties","title":"cluster"},{"location":"GENERATED/Cluster/#cluster-properties","text":"The cluster.* properties are ONLY relevant if pipeline.env=cluster . BioLockJ was originally designed to optimize effeciency on a cluster system, specifically one with a torque scheduler. We recomend chaining configuration properties across multiple files. The cluster.* properties would go in the configuration file for you environement. Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.host string The remote cluster host URL (used for ssh, scp, rsync, etc) default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.returnsBatchIds boolean Options Y/N. Does the cluster.batchCommand return a job id. If Y, if a job is submitted and no id is returned, that will be treated as a failure and the pipeline will stop. default: Y cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null For example, the following values are used for a torque scheduler in the file: cluster.properties pipeline.env=cluster cluster.batchCommand=qsub -q copperhead cluster.host=hpc.uncc.edu cluster.statusCommand=qstat Each project's indicidule configuration file includes pipeline.defaultProps = cluster.properties","title":"cluster properties"},{"location":"GENERATED/Docker/","text":"Docker # Docker is a powerful tool in creating reproducible results. Property Description docker.imageName string The name of a docker image to override whatever a module says to use. default: null docker.imageOwner string name of the Docker Hub user that owns the docker containers default: null docker.imageTag string indicate specific version of Docker images default: null docker.mountSock boolean should /var/run/docker.sock be mounted for modules. default: N docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null All BioLockJ modules are intended to be compatable with a docker environment. Each module has a default docker image; an environment where the module has been tested and that can spun up again for future use. This can be altered by the user.","title":"docker"},{"location":"GENERATED/Docker/#docker","text":"Docker is a powerful tool in creating reproducible results. Property Description docker.imageName string The name of a docker image to override whatever a module says to use. default: null docker.imageOwner string name of the Docker Hub user that owns the docker containers default: null docker.imageTag string indicate specific version of Docker images default: null docker.mountSock boolean should /var/run/docker.sock be mounted for modules. default: N docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null All BioLockJ modules are intended to be compatable with a docker environment. Each module has a default docker image; an environment where the module has been tested and that can spun up again for future use. This can be altered by the user.","title":"Docker"},{"location":"GENERATED/General-Properties/","text":"biolockj # Property Description biolockj.version string Property giving the biolockj version that was used to generate the config file. default: null cluster # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.host string The remote cluster host URL (used for ssh, scp, rsync, etc) default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.returnsBatchIds boolean Options Y/N. Does the cluster.batchCommand return a job id. If Y, if a job is submitted and no id is returned, that will be treated as a failure and the pipeline will stop. default: Y cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null demultiplexer # Property Description demultiplexer.barcodeCutoff numeric Options: (0.0 - 1.0); if defined, pipeline will fail if the percentage of reads with a barcode is less than this cutoff. default: 0.05 demultiplexer.barcodeRevComp boolean Options: Y/N. Use reverse compliment of metadata.barcodeColumn if demultimplexer.strategy = barcode_in_header or barcode_in_seq. default: null demultiplexer.strategy string Options: barcode_in_header, barcode_in_seq, id_in_header, do_not_demux.If using barcodes, they must be provided in the metadata file within column defined by metadata.barcodeColumn . default: null docker # Property Description docker.imageName string The name of a docker image to override whatever a module says to use. default: null docker.imageOwner string name of the Docker Hub user that owns the docker containers default: null docker.imageTag string indicate specific version of Docker images default: null docker.mountSock boolean should /var/run/docker.sock be mounted for modules. default: N docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe # Property Description exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null exe.awk executable Path for the \"awk\" executable; if not supplied, any script that needs the awk command will assume it is on the PATH. default: null exe.docker executable Path for the \"docker\" executable; if not supplied, any script that needs the docker command will assume it is on the PATH. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null exe.java executable Path for the \"java\" executable; if not supplied, any script that needs the java command will assume it is on the PATH. default: null exe.python executable Path for the \"python\" executable; if not supplied, any script that needs the python command will assume it is on the PATH. default: null humann2 # Property Description humann2.disableGeneFamilies boolean disable HumanN2 Gene Family report default: null humann2.disablePathAbundance boolean disable HumanN2 Pathway Abundance report default: null humann2.disablePathCoverage boolean disable HumanN2 Pathway Coverage report default: null input # Property Description input.allowDuplicateNames boolean Should files with the same name be permitted in inputs. File names are used to link data to metadata, and duplicated names create ambiguity. However in some pipelines, duplicates are appropriate. default: N input.dirPaths list of file paths List of one or more directories containing the pipeline input data. default: null input.ignoreFiles list file names to ignore if found in input directories default: null input.requireCompletePairs boolean Require all sequence input files have matching paired reads default: Y input.suffixFw regex file suffix used to identify forward reads ininput.dirPaths default: _R1 input.suffixRv regex file suffix used to identify reverse reads ininput.dirPaths default: _R2 input.trimPrefix string Prefix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text before it will be removed. default: null input.trimSuffix string Suffix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text after it will be removed. default: null metadata # Property Description metadata.barcodeColumn string metadata column with identifying barcodes default: BarcodeSequence metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.commentChar string metadata file comment indicator; Empty string is a valid option indicating no comments in metadata file. default: null metadata.fileNameColumn list name of the metadata column(s) with input file names default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.nullValue string metadata cells with this value will be treated as empty default: NA metadata.required boolean If Y, require metadata row for each sample with sequence data in input dirs; If N, samples without metadata are ignored. default: N metadata.useEveryRow boolean If Y, require a sequence file for every SampleID (every row) in metadata file; If N, metadata can include extraneous SampleIDs. default: null pipeline # Property Description pipeline.copyInput boolean copy input files into pipeline root directory default: null pipeline.defaultDemultiplexer string Java class name for default module used to demultiplex data default: biolockj.module.implicit.Demultiplexer pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultProps list of file paths file path of default property file(s); Nested default properties are supported (so the default property file can also have a default, and so on). default: null pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats pipeline.deleteTempFiles boolean delete files in temp directories default: null pipeline.detachJavaModules boolean If true Java modules do not run with main BioLockJ Java application. Instead they run on compute nodes on the CLUSTER or AWS environments. default: Y pipeline.disableAddImplicitModules boolean If set to true, implicit modules will not be added to the pipeline. default: null pipeline.disableAddPreReqModules boolean If set to true, prerequisite modules will not be added to the pipeline. default: null pipeline.downloadDir file path local directory used as the destination in the download command default: $HOME/projects/downloads pipeline.env string Environment in which a pipeline is run. Options: cluster, aws, local default: local pipeline.envVars list list of variables that should be passed into the runtime environment for all modules. default: BLJ pipeline.inputTypes list List of file types. This manually overrides the recommended auto-detection. default: null pipeline.limitDebugClasses list limit classes that log debug statements default: null pipeline.logLevel string Options: DEBUG, INFO, WARN, ERROR default: INFO pipeline.permissions string Set chmod -R command security bits on pipeline root directory (Ex. 770) default: 770 pipeline.setSeed integer set the seed for a random process. Must be positive integer. default: null pipeline.useEnvVars boolean when evaluating variables in the ${VAR} format, should environment variables be used. Regardless, priority is given to variable values defined in the config file. default: Y pipeline.userProfile file path Bash profile - may be ~/.bash_profile or ~/.bashrc or others default: null qiime # Property Description qiime.alphaMetrics list alpha diversity metrics to calculate through qiime; For complete list of skbio.diversity.alpha options, see http://scikit-bio.org/docs/latest/generated/skbio.diversity.alpha.html default: shannon qiime.plotAlphaMetrics boolean default: Y r # Property Description r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.colorPoint string default color of scatterplot and strip-chart plot points default: black r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.excludeFields list Fields from the metadata that will be excluded from any auto-determined typing, or plotting; R reports must contain at least one valid nominal or numeric metadata field. default: null r.nominalFields list Override default property type by explicitly listing it as nominal. default: null r.numericFields list Override default property type by explicitly listing it as numeric. default: null r.pch integer Sets R plot pch parameter for PDF report default: 21 r.pvalCutoff numeric p-value cutoff used to assign label r.colorHighlight default: 0.05 r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.reportFields list Metadata fields to include in reports; Fields listed here must exist in the metadata file. R reports must contain at least one valid field. default: null r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used. default: 0 r.useUniqueColors boolean force to use a unique color for every value in every field plotted; only recommended for low numbers of metadata columns/values. default: null r_PlotMds # Property Description r_PlotMds.reportFields list Metadata column names indicating fields to include in the MDS report; Fields listed here must exist in the metadata file. default: null report # Property Description report.logBase string Options: 10,e,null. If e, use natural log (base e); if 10, use log base 10; if not set, counts will not be converted to a log scale. default: 10 report.minCount integer minimum table count allowed, if a count less that this value is found, it is set to 0. default: 2 report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.numReads boolean Options: Y/N. If Y, and add Num_Reads to metadata default: Y report.scarceCountCutoff numeric Minimum percentage of samples that must contain a count value for it to be kept. default: 0.25 report.scarceSampleCutoff numeric Minimum percentage of data columns that must be non-zero to keep the sample. default: 0.25 report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels default: phylum,class,order,family,genus report.unclassifiedTaxa boolean report unclassified taxa default: Y script # Property Description script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.fileRefreshDelay integer delay this many seconds after scripts complete to allow the file system to reflect changes from a worker node/container/virtual machine. default: 1 script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null validation # Property Description validation.compareOn list Which columns in the expectation file should be used for the comparison. Options: name, size, md5. Default: use all columns in the expectation file. default: null validation.disableValidation boolean Turn off validation. No validation file output is produced. Options: Y/N. default: N default: null validation.expectationFile file path file path that gives the expected values for file metrics (probably generated by a previous run of the same pipeline) default: null validation.reportOn list Which attributes of the file should be included in the validation report file. Options: name, size, md5 default: null validation.sizeWithinPercent numeric What percentage difference is permitted between an output file and its expectation. Options: any positive number default: null validation.stopPipeline boolean If enabled, the validation utlility will stop the pipeline if any module fails validation. Options: Y/N default: N aws # Property Description aws.copyDbToS3 boolean If true, save all input files to S3 default: null aws.copyPipelineToS3 boolean If enabled save pipeline to S3 default: null aws.copyReportsToS3 boolean If enabled save reports to S3 default: null aws.ec2AcquisitionStrategy string The AWS acquisition strategy (SPOT or DEMAND) sets the service SLA for procuring new EC2 instances default: null aws.ec2InstanceID string ID of an existing ec2 instance to use as the head node default: null aws.ec2InstanceType string AWS instance type determines initial resource class (t2.micro is common) default: null aws.ec2SpotPer __ default: null aws.ec2TerminateHead boolean default: null aws.profile file path default: null aws.purgeEfsInputs boolean If enabled delete all EFS dirs (except pipelines) default: null aws.purgeEfsOutput boolean If enabled delete all EFS/pipelines default: null aws.ram string AWS memory set in Nextflow main.nf default: null aws.region string default: null aws.s3 string AWS S3 pipeline output directory used by Nextflow main.nf default: null aws.s3TransferTimeout integer Set the max number of minutes to allow for S3 transfers to complete. default: null aws.saveCloud boolean default: null aws.stack string An existing aws cloud stack ID default: null aws.walltime __ default: null","title":"General Properties"},{"location":"GENERATED/General-Properties/#biolockj","text":"Property Description biolockj.version string Property giving the biolockj version that was used to generate the config file. default: null","title":"biolockj"},{"location":"GENERATED/General-Properties/#cluster","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.host string The remote cluster host URL (used for ssh, scp, rsync, etc) default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.returnsBatchIds boolean Options Y/N. Does the cluster.batchCommand return a job id. If Y, if a job is submitted and no id is returned, that will be treated as a failure and the pipeline will stop. default: Y cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null","title":"cluster"},{"location":"GENERATED/General-Properties/#demultiplexer","text":"Property Description demultiplexer.barcodeCutoff numeric Options: (0.0 - 1.0); if defined, pipeline will fail if the percentage of reads with a barcode is less than this cutoff. default: 0.05 demultiplexer.barcodeRevComp boolean Options: Y/N. Use reverse compliment of metadata.barcodeColumn if demultimplexer.strategy = barcode_in_header or barcode_in_seq. default: null demultiplexer.strategy string Options: barcode_in_header, barcode_in_seq, id_in_header, do_not_demux.If using barcodes, they must be provided in the metadata file within column defined by metadata.barcodeColumn . default: null","title":"demultiplexer"},{"location":"GENERATED/General-Properties/#docker","text":"Property Description docker.imageName string The name of a docker image to override whatever a module says to use. default: null docker.imageOwner string name of the Docker Hub user that owns the docker containers default: null docker.imageTag string indicate specific version of Docker images default: null docker.mountSock boolean should /var/run/docker.sock be mounted for modules. default: N docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null","title":"docker"},{"location":"GENERATED/General-Properties/#exe","text":"Property Description exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null exe.awk executable Path for the \"awk\" executable; if not supplied, any script that needs the awk command will assume it is on the PATH. default: null exe.docker executable Path for the \"docker\" executable; if not supplied, any script that needs the docker command will assume it is on the PATH. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null exe.java executable Path for the \"java\" executable; if not supplied, any script that needs the java command will assume it is on the PATH. default: null exe.python executable Path for the \"python\" executable; if not supplied, any script that needs the python command will assume it is on the PATH. default: null","title":"exe"},{"location":"GENERATED/General-Properties/#humann2","text":"Property Description humann2.disableGeneFamilies boolean disable HumanN2 Gene Family report default: null humann2.disablePathAbundance boolean disable HumanN2 Pathway Abundance report default: null humann2.disablePathCoverage boolean disable HumanN2 Pathway Coverage report default: null","title":"humann2"},{"location":"GENERATED/General-Properties/#input","text":"Property Description input.allowDuplicateNames boolean Should files with the same name be permitted in inputs. File names are used to link data to metadata, and duplicated names create ambiguity. However in some pipelines, duplicates are appropriate. default: N input.dirPaths list of file paths List of one or more directories containing the pipeline input data. default: null input.ignoreFiles list file names to ignore if found in input directories default: null input.requireCompletePairs boolean Require all sequence input files have matching paired reads default: Y input.suffixFw regex file suffix used to identify forward reads ininput.dirPaths default: _R1 input.suffixRv regex file suffix used to identify reverse reads ininput.dirPaths default: _R2 input.trimPrefix string Prefix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text before it will be removed. default: null input.trimSuffix string Suffix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text after it will be removed. default: null","title":"input"},{"location":"GENERATED/General-Properties/#metadata","text":"Property Description metadata.barcodeColumn string metadata column with identifying barcodes default: BarcodeSequence metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.commentChar string metadata file comment indicator; Empty string is a valid option indicating no comments in metadata file. default: null metadata.fileNameColumn list name of the metadata column(s) with input file names default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.nullValue string metadata cells with this value will be treated as empty default: NA metadata.required boolean If Y, require metadata row for each sample with sequence data in input dirs; If N, samples without metadata are ignored. default: N metadata.useEveryRow boolean If Y, require a sequence file for every SampleID (every row) in metadata file; If N, metadata can include extraneous SampleIDs. default: null","title":"metadata"},{"location":"GENERATED/General-Properties/#pipeline","text":"Property Description pipeline.copyInput boolean copy input files into pipeline root directory default: null pipeline.defaultDemultiplexer string Java class name for default module used to demultiplex data default: biolockj.module.implicit.Demultiplexer pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultProps list of file paths file path of default property file(s); Nested default properties are supported (so the default property file can also have a default, and so on). default: null pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats pipeline.deleteTempFiles boolean delete files in temp directories default: null pipeline.detachJavaModules boolean If true Java modules do not run with main BioLockJ Java application. Instead they run on compute nodes on the CLUSTER or AWS environments. default: Y pipeline.disableAddImplicitModules boolean If set to true, implicit modules will not be added to the pipeline. default: null pipeline.disableAddPreReqModules boolean If set to true, prerequisite modules will not be added to the pipeline. default: null pipeline.downloadDir file path local directory used as the destination in the download command default: $HOME/projects/downloads pipeline.env string Environment in which a pipeline is run. Options: cluster, aws, local default: local pipeline.envVars list list of variables that should be passed into the runtime environment for all modules. default: BLJ pipeline.inputTypes list List of file types. This manually overrides the recommended auto-detection. default: null pipeline.limitDebugClasses list limit classes that log debug statements default: null pipeline.logLevel string Options: DEBUG, INFO, WARN, ERROR default: INFO pipeline.permissions string Set chmod -R command security bits on pipeline root directory (Ex. 770) default: 770 pipeline.setSeed integer set the seed for a random process. Must be positive integer. default: null pipeline.useEnvVars boolean when evaluating variables in the ${VAR} format, should environment variables be used. Regardless, priority is given to variable values defined in the config file. default: Y pipeline.userProfile file path Bash profile - may be ~/.bash_profile or ~/.bashrc or others default: null","title":"pipeline"},{"location":"GENERATED/General-Properties/#qiime","text":"Property Description qiime.alphaMetrics list alpha diversity metrics to calculate through qiime; For complete list of skbio.diversity.alpha options, see http://scikit-bio.org/docs/latest/generated/skbio.diversity.alpha.html default: shannon qiime.plotAlphaMetrics boolean default: Y","title":"qiime"},{"location":"GENERATED/General-Properties/#r","text":"Property Description r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.colorPoint string default color of scatterplot and strip-chart plot points default: black r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.excludeFields list Fields from the metadata that will be excluded from any auto-determined typing, or plotting; R reports must contain at least one valid nominal or numeric metadata field. default: null r.nominalFields list Override default property type by explicitly listing it as nominal. default: null r.numericFields list Override default property type by explicitly listing it as numeric. default: null r.pch integer Sets R plot pch parameter for PDF report default: 21 r.pvalCutoff numeric p-value cutoff used to assign label r.colorHighlight default: 0.05 r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.reportFields list Metadata fields to include in reports; Fields listed here must exist in the metadata file. R reports must contain at least one valid field. default: null r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used. default: 0 r.useUniqueColors boolean force to use a unique color for every value in every field plotted; only recommended for low numbers of metadata columns/values. default: null","title":"r"},{"location":"GENERATED/General-Properties/#r_plotmds","text":"Property Description r_PlotMds.reportFields list Metadata column names indicating fields to include in the MDS report; Fields listed here must exist in the metadata file. default: null","title":"r_PlotMds"},{"location":"GENERATED/General-Properties/#report","text":"Property Description report.logBase string Options: 10,e,null. If e, use natural log (base e); if 10, use log base 10; if not set, counts will not be converted to a log scale. default: 10 report.minCount integer minimum table count allowed, if a count less that this value is found, it is set to 0. default: 2 report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.numReads boolean Options: Y/N. If Y, and add Num_Reads to metadata default: Y report.scarceCountCutoff numeric Minimum percentage of samples that must contain a count value for it to be kept. default: 0.25 report.scarceSampleCutoff numeric Minimum percentage of data columns that must be non-zero to keep the sample. default: 0.25 report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels default: phylum,class,order,family,genus report.unclassifiedTaxa boolean report unclassified taxa default: Y","title":"report"},{"location":"GENERATED/General-Properties/#script","text":"Property Description script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.fileRefreshDelay integer delay this many seconds after scripts complete to allow the file system to reflect changes from a worker node/container/virtual machine. default: 1 script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"script"},{"location":"GENERATED/General-Properties/#validation","text":"Property Description validation.compareOn list Which columns in the expectation file should be used for the comparison. Options: name, size, md5. Default: use all columns in the expectation file. default: null validation.disableValidation boolean Turn off validation. No validation file output is produced. Options: Y/N. default: N default: null validation.expectationFile file path file path that gives the expected values for file metrics (probably generated by a previous run of the same pipeline) default: null validation.reportOn list Which attributes of the file should be included in the validation report file. Options: name, size, md5 default: null validation.sizeWithinPercent numeric What percentage difference is permitted between an output file and its expectation. Options: any positive number default: null validation.stopPipeline boolean If enabled, the validation utlility will stop the pipeline if any module fails validation. Options: Y/N default: N","title":"validation"},{"location":"GENERATED/General-Properties/#aws","text":"Property Description aws.copyDbToS3 boolean If true, save all input files to S3 default: null aws.copyPipelineToS3 boolean If enabled save pipeline to S3 default: null aws.copyReportsToS3 boolean If enabled save reports to S3 default: null aws.ec2AcquisitionStrategy string The AWS acquisition strategy (SPOT or DEMAND) sets the service SLA for procuring new EC2 instances default: null aws.ec2InstanceID string ID of an existing ec2 instance to use as the head node default: null aws.ec2InstanceType string AWS instance type determines initial resource class (t2.micro is common) default: null aws.ec2SpotPer __ default: null aws.ec2TerminateHead boolean default: null aws.profile file path default: null aws.purgeEfsInputs boolean If enabled delete all EFS dirs (except pipelines) default: null aws.purgeEfsOutput boolean If enabled delete all EFS/pipelines default: null aws.ram string AWS memory set in Nextflow main.nf default: null aws.region string default: null aws.s3 string AWS S3 pipeline output directory used by Nextflow main.nf default: null aws.s3TransferTimeout integer Set the max number of minutes to allow for S3 transfers to complete. default: null aws.saveCloud boolean default: null aws.stack string An existing aws cloud stack ID default: null aws.walltime __ default: null","title":"aws"},{"location":"GENERATED/Input/","text":"Input # Specify the input data for the pipeline by providing the path to one or more directories using input.dirPaths . If using multiple paths, they should be separated by a comma. Property Description input.allowDuplicateNames boolean Should files with the same name be permitted in inputs. File names are used to link data to metadata, and duplicated names create ambiguity. However in some pipelines, duplicates are appropriate. default: N input.dirPaths list of file paths List of one or more directories containing the pipeline input data. default: null input.ignoreFiles list file names to ignore if found in input directories default: null input.requireCompletePairs boolean Require all sequence input files have matching paired reads default: Y input.suffixFw regex file suffix used to identify forward reads ininput.dirPaths default: _R1 input.suffixRv regex file suffix used to identify reverse reads ininput.dirPaths default: _R2 input.trimPrefix string Prefix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text before it will be removed. default: null input.trimSuffix string Suffix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text after it will be removed. default: null BioLockJ will assume that the sample name for a given file is the same as the file name after removing the file suffix. This is often not-quite-enough. Use input.trimPrefix and input.trimSuffix to indicate additional text to remove from the file name to get the sample name. If using paired-end sequences, use input.suffixFw and input.suffixRv to indicate the forward and reverse reads for a given sample; these will also be removed when deriving the sample name. Example # Sample IDs = mbs1, mbs2, mbs3, mbs4 Example File names + gut_mbs1.fq.gz + gut_mbs2.fq.gz + oral_mbs3.fq + oral_mbs4.fq Config Properties + input.trimPrefix =_ + input.trimSuffix =.fq All characters before (and including) the 1st \"_\" in the file name are trimmed All characters after (and including) the 1st \".fq\" in the file name are trimmed BioLockJ automatically trims extensions \".fasta\" and \".fastq\" as if configured in input.trimSuffix . Sometimes, there is no way to derive the sample name from the file name; or its simply inconvenient to. An alternative way to link files to sample names is to list the file names in the metadata in one or more columns (one file name per cell) and list the names of these columns in metadata.fileNameColumn ; see Metatdata . If you want process only a subset of the files in your input directories, then specifying the file names in the metadata is much more effecient than list all files to ignore in input.ignoreFiles . Note that BioLockJ determines some information based on the type of data in the input directories. This is very helpful in determining appropriate sequence pre-processing steps. However it can be problematic when using an unusual input type. To avoid this automatic determineation, manually set pipeline.inputTypes . Setting this to \"other\" will avoid all assumptions that BioLockJ might make based on the input types.","title":"input"},{"location":"GENERATED/Input/#input","text":"Specify the input data for the pipeline by providing the path to one or more directories using input.dirPaths . If using multiple paths, they should be separated by a comma. Property Description input.allowDuplicateNames boolean Should files with the same name be permitted in inputs. File names are used to link data to metadata, and duplicated names create ambiguity. However in some pipelines, duplicates are appropriate. default: N input.dirPaths list of file paths List of one or more directories containing the pipeline input data. default: null input.ignoreFiles list file names to ignore if found in input directories default: null input.requireCompletePairs boolean Require all sequence input files have matching paired reads default: Y input.suffixFw regex file suffix used to identify forward reads ininput.dirPaths default: _R1 input.suffixRv regex file suffix used to identify reverse reads ininput.dirPaths default: _R2 input.trimPrefix string Prefix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text before it will be removed. default: null input.trimSuffix string Suffix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text after it will be removed. default: null BioLockJ will assume that the sample name for a given file is the same as the file name after removing the file suffix. This is often not-quite-enough. Use input.trimPrefix and input.trimSuffix to indicate additional text to remove from the file name to get the sample name. If using paired-end sequences, use input.suffixFw and input.suffixRv to indicate the forward and reverse reads for a given sample; these will also be removed when deriving the sample name.","title":"Input"},{"location":"GENERATED/Input/#example","text":"Sample IDs = mbs1, mbs2, mbs3, mbs4 Example File names + gut_mbs1.fq.gz + gut_mbs2.fq.gz + oral_mbs3.fq + oral_mbs4.fq Config Properties + input.trimPrefix =_ + input.trimSuffix =.fq All characters before (and including) the 1st \"_\" in the file name are trimmed All characters after (and including) the 1st \".fq\" in the file name are trimmed BioLockJ automatically trims extensions \".fasta\" and \".fastq\" as if configured in input.trimSuffix . Sometimes, there is no way to derive the sample name from the file name; or its simply inconvenient to. An alternative way to link files to sample names is to list the file names in the metadata in one or more columns (one file name per cell) and list the names of these columns in metadata.fileNameColumn ; see Metatdata . If you want process only a subset of the files in your input directories, then specifying the file names in the metadata is much more effecient than list all files to ignore in input.ignoreFiles . Note that BioLockJ determines some information based on the type of data in the input directories. This is very helpful in determining appropriate sequence pre-processing steps. However it can be problematic when using an unusual input type. To avoid this automatic determineation, manually set pipeline.inputTypes . Setting this to \"other\" will avoid all assumptions that BioLockJ might make based on the input types.","title":"Example"},{"location":"GENERATED/Metadata/","text":"Metadata # Any information that is given on a per-sample basis is metadata. BioLockJ pipelines do not separate biological information from technical information. Specify the path to the metadata table using metadata.filePath . Property Description metadata.barcodeColumn string metadata column with identifying barcodes default: BarcodeSequence metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.commentChar string metadata file comment indicator; Empty string is a valid option indicating no comments in metadata file. default: null metadata.fileNameColumn list name of the metadata column(s) with input file names default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.nullValue string metadata cells with this value will be treated as empty default: NA metadata.required boolean If Y, require metadata row for each sample with sequence data in input dirs; If N, samples without metadata are ignored. default: N metadata.useEveryRow boolean If Y, require a sequence file for every SampleID (every row) in metadata file; If N, metadata can include extraneous SampleIDs. default: null The first row in the metadata file is assumed to be column names. The first column (regardless of its name) is assumed to be the sample names. If no metadata table is supplied to the pipeline, then the ImportMetaData module will look at the input samples and create an empty metadata file. This module is implicitly added to all pipelines. The properties metadata.required and metadata.useEveryRow control how BioLockJ handles a mis-match between the data and the metadata. If both are set to Y, then BioLockJ will throw an error if there is not a 1-to-1 matchup between sample names in the first column of the metadata and the file names in the input.dirPaths (ignoring any files specified by input.ignoreFiles ). If there are files that do not have any corresponding metadata, and this ok, use metadata.required=N . If there are rows in the metadata that do not have corresponding files, and this is ok, use metadata.useEveryRow=N . Setting both to Y is recommended because in most cases, we have a perfect 1-to-1 match up, and if BioLockJ thinks otherwise it because of an error in matching the up files with the samples, and its best to fail early and fix the problem. The metadata.fileNameColumn property allows you to explicity state which input file should match up to a given sample row. This can be a list of columns; for example if you have paired reads you would have a column for the forward reads, a column for the reverse reads, and both column names would be given with a comma separating them. Example: metadata.fileNameColumn = forwardReadFile, reverseReadFile , where \"forwardReadFile\" and \"reverseReadFile\" are both column names in the metadata file. Alternatively, you can specify input properties telling BioLockJ how to trim away the file name to get the sample name. Some modules look for specific information supplied in the metadata. Those modules often have a property to supply a column name referencing the metadata. The metadata can change through the execution of the pipeline. Some modules add information to the metadata, such as number of reads in each file, or number of reads classified. Some modules may filter samples, and removed samples are removed from the metadata as well. The original metadata file is never changed, and a copy of the original is stored in the pipeline folder. Each time the data is updated, a new file is saved in the current module folder.","title":"metadata"},{"location":"GENERATED/Metadata/#metadata","text":"Any information that is given on a per-sample basis is metadata. BioLockJ pipelines do not separate biological information from technical information. Specify the path to the metadata table using metadata.filePath . Property Description metadata.barcodeColumn string metadata column with identifying barcodes default: BarcodeSequence metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.commentChar string metadata file comment indicator; Empty string is a valid option indicating no comments in metadata file. default: null metadata.fileNameColumn list name of the metadata column(s) with input file names default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.nullValue string metadata cells with this value will be treated as empty default: NA metadata.required boolean If Y, require metadata row for each sample with sequence data in input dirs; If N, samples without metadata are ignored. default: N metadata.useEveryRow boolean If Y, require a sequence file for every SampleID (every row) in metadata file; If N, metadata can include extraneous SampleIDs. default: null The first row in the metadata file is assumed to be column names. The first column (regardless of its name) is assumed to be the sample names. If no metadata table is supplied to the pipeline, then the ImportMetaData module will look at the input samples and create an empty metadata file. This module is implicitly added to all pipelines. The properties metadata.required and metadata.useEveryRow control how BioLockJ handles a mis-match between the data and the metadata. If both are set to Y, then BioLockJ will throw an error if there is not a 1-to-1 matchup between sample names in the first column of the metadata and the file names in the input.dirPaths (ignoring any files specified by input.ignoreFiles ). If there are files that do not have any corresponding metadata, and this ok, use metadata.required=N . If there are rows in the metadata that do not have corresponding files, and this is ok, use metadata.useEveryRow=N . Setting both to Y is recommended because in most cases, we have a perfect 1-to-1 match up, and if BioLockJ thinks otherwise it because of an error in matching the up files with the samples, and its best to fail early and fix the problem. The metadata.fileNameColumn property allows you to explicity state which input file should match up to a given sample row. This can be a list of columns; for example if you have paired reads you would have a column for the forward reads, a column for the reverse reads, and both column names would be given with a comma separating them. Example: metadata.fileNameColumn = forwardReadFile, reverseReadFile , where \"forwardReadFile\" and \"reverseReadFile\" are both column names in the metadata file. Alternatively, you can specify input properties telling BioLockJ how to trim away the file name to get the sample name. Some modules look for specific information supplied in the metadata. Those modules often have a property to supply a column name referencing the metadata. The metadata can change through the execution of the pipeline. Some modules add information to the metadata, such as number of reads in each file, or number of reads classified. Some modules may filter samples, and removed samples are removed from the metadata as well. The original metadata file is never changed, and a copy of the original is stored in the pipeline folder. Each time the data is updated, a new file is saved in the current module folder.","title":"Metadata"},{"location":"GENERATED/R/","text":"r properties # These properties are directed to R modules. Property Description r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.colorPoint string default color of scatterplot and strip-chart plot points default: black r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.excludeFields list Fields from the metadata that will be excluded from any auto-determined typing, or plotting; R reports must contain at least one valid nominal or numeric metadata field. default: null r.nominalFields list Override default property type by explicitly listing it as nominal. default: null r.numericFields list Override default property type by explicitly listing it as numeric. default: null r.pch integer Sets R plot pch parameter for PDF report default: 21 r.pvalCutoff numeric p-value cutoff used to assign label r.colorHighlight default: 0.05 r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.reportFields list Metadata fields to include in reports; Fields listed here must exist in the metadata file. R reports must contain at least one valid field. default: null r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used. default: 0 r.useUniqueColors boolean force to use a unique color for every value in every field plotted; only recommended for low numbers of metadata columns/values. default: null Several plotting options are available.","title":"r"},{"location":"GENERATED/R/#r-properties","text":"These properties are directed to R modules. Property Description r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.colorPoint string default color of scatterplot and strip-chart plot points default: black r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.excludeFields list Fields from the metadata that will be excluded from any auto-determined typing, or plotting; R reports must contain at least one valid nominal or numeric metadata field. default: null r.nominalFields list Override default property type by explicitly listing it as nominal. default: null r.numericFields list Override default property type by explicitly listing it as numeric. default: null r.pch integer Sets R plot pch parameter for PDF report default: 21 r.pvalCutoff numeric p-value cutoff used to assign label r.colorHighlight default: 0.05 r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.reportFields list Metadata fields to include in reports; Fields listed here must exist in the metadata file. R reports must contain at least one valid field. default: null r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used. default: 0 r.useUniqueColors boolean force to use a unique color for every value in every field plotted; only recommended for low numbers of metadata columns/values. default: null Several plotting options are available.","title":"r properties"},{"location":"GENERATED/Script/","text":"script properties # Nearly all modules are \"script modules\". The module writes one or more scripts to divide the work load, and each script is run on an independent cluster node (if pipeline.env=cluster ), or on an independent aws node (if pipeline.env=aws ), or one at a time on the current machine (if pipeline.env=local ). Property Description script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.fileRefreshDelay integer delay this many seconds after scripts complete to allow the file system to reflect changes from a worker node/container/virtual machine. default: 1 script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null All script modules have a \"script\" subdirectory. There is one \"MAIN\" script for the module, which launches the worker scripts. Each worker script does the work for one batch ; a set of samples.","title":"script"},{"location":"GENERATED/Script/#script-properties","text":"Nearly all modules are \"script modules\". The module writes one or more scripts to divide the work load, and each script is run on an independent cluster node (if pipeline.env=cluster ), or on an independent aws node (if pipeline.env=aws ), or one at a time on the current machine (if pipeline.env=local ). Property Description script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.fileRefreshDelay integer delay this many seconds after scripts complete to allow the file system to reflect changes from a worker node/container/virtual machine. default: 1 script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null All script modules have a \"script\" subdirectory. There is one \"MAIN\" script for the module, which launches the worker scripts. Each worker script does the work for one batch ; a set of samples.","title":"script properties"},{"location":"GENERATED/Validation/","text":"Validation # Summary # Description: Validation checks whether the output files of a pipeline match the expectation . Property Description validation.compareOn list Which columns in the expectation file should be used for the comparison. Options: name, size, md5. Default: use all columns in the expectation file. default: null validation.disableValidation boolean Turn off validation. No validation file output is produced. Options: Y/N. default: N default: null validation.expectationFile file path file path that gives the expected values for file metrics (probably generated by a previous run of the same pipeline) default: null validation.reportOn list Which attributes of the file should be included in the validation report file. Options: name, size, md5 default: null validation.sizeWithinPercent numeric What percentage difference is permitted between an output file and its expectation. Options: any positive number default: null validation.stopPipeline boolean If enabled, the validation utlility will stop the pipeline if any module fails validation. Options: Y/N default: N The validation utility creates a table for the output of each module where it reports the file name, size and md5. These tables are saved in the validation folder; the validation folder generated by a pipeline can be used as the expectations when re-running the same pipeline. If there are no expectations, these values are reported in the validation folder. If there are expectations, these values are reported and compared against the expected values; the result of the comparison is reported as either PASS or FAIL for each file. If validation.stopPipeline=Y , the validation utility will halt the pipeline if any outputs FAIL to meet expectations, otherwise the result is reported and the pipeline moves forward. Soft Validation # Many components of a pipeline have the potential for tiny variation: maybe a date is stored in the output, or a reported confidence level is based on a random sampling. With these tiny variations, the file is practically the same, but it will FAIL md5 validation. The file might also be a few bytes bigger or smaller, so it will also FAIL size validation. \"Soft validation\" is the practice of allowing some wiggle room. If the config file gives validation.sizeWithinPercent=1 , then an output file will PASS size validation as long as it is within 1.0% of the expected file size. By default, this value is 0, and a file must be exactly the expected size to pass size validation. Expectations # Give the file path to the expectation file using validation.expectationFile=/path/to/saved/validation . This path can either point to a tab-delimited table giving the expectations for a single module, or it can point to a folder, in which case BioLockJ assumes that a file within this folder has a name that matches the module being validated. When validating an entire pipeline, the expectation file for all modules can be passed with a single file path. The validation folder created by a pipeline is designed to be used as this input. The expectation file format is: The expectation file is a tab-delimited table. The first row is column names. The first column (labeled \"name\") gives the file names. Optional column \"size\" gives the file size in bytes. Optional column \"md5\" gives the md5 string. Optional column \"MATCHED_EXPECTION\" is always ignored. The file should not have any other columns. Use cases # The expectation is usually based on a previous run of the same pipeline. Maybe some software has been updated and the results are not expected to change, but you have to re-do your analysis with the latest version to appease reviewers. Maybe you added a filtering step. Maybe you just want to change module 5, and you expect 1-4 to produce the same outputs they did last time. Maybe this analysis has been published and the the original researcher made their pipeline available to you; you want to re-run it and know if the output you generated by running the pipeline is the same as what they had. The expectation can be set by hand. This is recommended for validation using name only, or soft validation using size only. This is a way to prevent a pipeline from continuing after it is effectively doomed. For example: Maybe module 5 is a resource-intensive classifier, and modules 1-4 are processing and filtering steps ending with the SeqFileValidator. If modules 1-4 filter out too much, you might not want to move forward with module 5 until you've made adjustments to the earlier modules. You could create an expectation file for module 4, that just lists the names of the files and their pre-filtering file size (in bytes), and set validation.sizeWithinPercent=80 and SeqFileValidator.stopPipeline=Y . With this, the pipeline will stop if any of those files are not in the module 4 output or if any of them have been reduced by more than 80%. The output file names are predictable if you've ever seen output from that module before. Other notes # gzip is a common utility, frequently used with sequence data. It can incorporate metadata into the zipped file, a minor variation which can cause md5 checks to fail. To avoid these misleading failures, the validation utility will take the md5 of the decompressed form of the file for any file that ends in \".gz\". Thus, the md5 reported for a fastq file is the same regardless of whether it has been gzipped.","title":"Validation"},{"location":"GENERATED/Validation/#validation","text":"","title":"Validation"},{"location":"GENERATED/Validation/#summary","text":"Description: Validation checks whether the output files of a pipeline match the expectation . Property Description validation.compareOn list Which columns in the expectation file should be used for the comparison. Options: name, size, md5. Default: use all columns in the expectation file. default: null validation.disableValidation boolean Turn off validation. No validation file output is produced. Options: Y/N. default: N default: null validation.expectationFile file path file path that gives the expected values for file metrics (probably generated by a previous run of the same pipeline) default: null validation.reportOn list Which attributes of the file should be included in the validation report file. Options: name, size, md5 default: null validation.sizeWithinPercent numeric What percentage difference is permitted between an output file and its expectation. Options: any positive number default: null validation.stopPipeline boolean If enabled, the validation utlility will stop the pipeline if any module fails validation. Options: Y/N default: N The validation utility creates a table for the output of each module where it reports the file name, size and md5. These tables are saved in the validation folder; the validation folder generated by a pipeline can be used as the expectations when re-running the same pipeline. If there are no expectations, these values are reported in the validation folder. If there are expectations, these values are reported and compared against the expected values; the result of the comparison is reported as either PASS or FAIL for each file. If validation.stopPipeline=Y , the validation utility will halt the pipeline if any outputs FAIL to meet expectations, otherwise the result is reported and the pipeline moves forward.","title":"Summary"},{"location":"GENERATED/Validation/#soft-validation","text":"Many components of a pipeline have the potential for tiny variation: maybe a date is stored in the output, or a reported confidence level is based on a random sampling. With these tiny variations, the file is practically the same, but it will FAIL md5 validation. The file might also be a few bytes bigger or smaller, so it will also FAIL size validation. \"Soft validation\" is the practice of allowing some wiggle room. If the config file gives validation.sizeWithinPercent=1 , then an output file will PASS size validation as long as it is within 1.0% of the expected file size. By default, this value is 0, and a file must be exactly the expected size to pass size validation.","title":"Soft Validation"},{"location":"GENERATED/Validation/#expectations","text":"Give the file path to the expectation file using validation.expectationFile=/path/to/saved/validation . This path can either point to a tab-delimited table giving the expectations for a single module, or it can point to a folder, in which case BioLockJ assumes that a file within this folder has a name that matches the module being validated. When validating an entire pipeline, the expectation file for all modules can be passed with a single file path. The validation folder created by a pipeline is designed to be used as this input. The expectation file format is: The expectation file is a tab-delimited table. The first row is column names. The first column (labeled \"name\") gives the file names. Optional column \"size\" gives the file size in bytes. Optional column \"md5\" gives the md5 string. Optional column \"MATCHED_EXPECTION\" is always ignored. The file should not have any other columns.","title":"Expectations"},{"location":"GENERATED/Validation/#use-cases","text":"The expectation is usually based on a previous run of the same pipeline. Maybe some software has been updated and the results are not expected to change, but you have to re-do your analysis with the latest version to appease reviewers. Maybe you added a filtering step. Maybe you just want to change module 5, and you expect 1-4 to produce the same outputs they did last time. Maybe this analysis has been published and the the original researcher made their pipeline available to you; you want to re-run it and know if the output you generated by running the pipeline is the same as what they had. The expectation can be set by hand. This is recommended for validation using name only, or soft validation using size only. This is a way to prevent a pipeline from continuing after it is effectively doomed. For example: Maybe module 5 is a resource-intensive classifier, and modules 1-4 are processing and filtering steps ending with the SeqFileValidator. If modules 1-4 filter out too much, you might not want to move forward with module 5 until you've made adjustments to the earlier modules. You could create an expectation file for module 4, that just lists the names of the files and their pre-filtering file size (in bytes), and set validation.sizeWithinPercent=80 and SeqFileValidator.stopPipeline=Y . With this, the pipeline will stop if any of those files are not in the module 4 output or if any of them have been reduced by more than 80%. The output file names are predictable if you've ever seen output from that module before.","title":"Use cases"},{"location":"GENERATED/Validation/#other-notes","text":"gzip is a common utility, frequently used with sequence data. It can incorporate metadata into the zipped file, a minor variation which can cause md5 checks to fail. To avoid these misleading failures, the validation utility will take the md5 of the decompressed form of the file for any file that ends in \".gz\". Thus, the md5 reported for a fastq file is the same regardless of whether it has been gzipped.","title":"Other notes"},{"location":"GENERATED/all-modules/","text":"All Modules # This is an auto-generated list of all modules with links to auto-generated module documentation. AddMetadataToPathwayTables AddMetadataToTaxaTables - Map metadata onto taxa tables using sample ID. AddPseudoCount - Add a pseudocount (+1) to each value in each taxa table. AwkFastaConverter - Convert fastq files into fasta format. BuildQiimeMapping BuildTaxaTables - Convert OTU-tables split by sample into taxa tables split by level. CompileOtuCounts DESeq2 - Determine statistically significant differences using DESeq2. Demultiplexer - Demultiplex samples into separate files for each sample. EdgeR - Determine statistically significant differences using edgeR. Email - Send an email containing the pipeline summary when the pipeline either completes or fails. ForEachFile - Like GenMod, but done for each file in a previous module's output dir. ForEachLevel - Like GenMod, but done for each taxonomic level. ForEachLoop - Like GenMod, but done for each string in a comma-separated list. ForEachSample - Like GenMod, but done for each sample listed in the metadata. GenMod - Allows user to add their own scripts into the BioLockJ pipeline. GenomeAssembly - Assemble WGS sequences with MetaSPAdes, bin contigs with Metabat2 and check quality with checkM. Gunzipper - Decompress gzipped files. HUMAnN2 - Profile the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. Humann2Parser - Build OTU tables from HumanN2 classifier module output. ImportMetadata - Read existing metadata file, or create a default one. JsonReport KneadData - Run the Biobakery KneadData program to remove contaminated DNA. Kraken2Classifier - Classify WGS samples with KRAKEN 2 . Kraken2Parser - Build OTU tables from KRAKEN mpa-format reports. KrakenClassifier - Classify WGS samples with KRAKEN. KrakenParser - Build OTU tables from KRAKEN mpa-format reports. LogTransformTaxaTables - Log-transform the raw taxa counts on Log10 or Log-e scales. MergeQiimeOtuTables Metaphlan2Classifier - Classify WGS samples with MetaPhlAn2 . Metaphlan2Parser Multiplexer - Multiplex samples into a single file, or two files (one with forward reads, one with reverse reads) if multiplexing paired reads. NormalizeByReadsPerMillion - new counts = counts / (total counts in sample / 1 million) NormalizeTaxaTables - Normalize taxa tables for sequencing depth. PearMergeReads - Run pear, the Paired-End reAd mergeR QiimeClassifier QiimeClosedRefClassifier - Pick OTUs using a closed reference database and construct an OTU table via the QIIME script pick_closed_reference_otus.py QiimeDeNovoClassifier - Run the QIIME pick_de_novo_otus.py script on all fasta sequence files QiimeOpenRefClassifier - Run the QIIME pick_open_reference_otus.py script on all fasta sequence files QiimeParser R_CalculateStats - Generate a summary statistics table with [adjusted and unadjusted] [parameteric and non-parametirc] p-values and r 2 values for each reportable metadata field and each report.taxonomyLevel configured. R_PlotEffectSize - Generate horizontal barplot representing effect size (Cohen's d, r 2 , and/or fold change) for each reportable metadata field and each report.taxonomyLevel configured. R_PlotMds - Generate sets of multidimensional scaling plots showing 2 axes at a time (up to the < r_PlotMds.numAxis >th axis) with color coding based on each categorical metadata field (default) or by each field given in r_PlotMds.reportFields R_PlotOtus - Generate OTU-metadata box-plots and scatter-plots for each reportable metadata field and each report.taxonomyLevel configured R_PlotPvalHistograms - Generate p-value histograms for each reportable metadata field and each report.taxonomyLevel configured RarefyOtuCounts - Applies a mean iterative post-OTU classification rarefication algorithm so that each output sample will have approximately the same number of OTUs. RarefySeqs - Randomly sub-sample sequences to reduce all samples to the configured maximum. RdpClassifier - Classify 16s samples with RDP . RdpHierParser - Create taxa tables from the _hierarchicalCount.tsv files output by RDP. RdpParser - Build OTU tables from RDP reports. RegisterNumReads RemoveLowOtuCounts - Removes OTUs with counts below report.minCount. RemoveLowPathwayCounts RemoveScarceOtuCounts RemoveScarcePathwayCounts Rmarkdown - Render a custom R markdown. SeqFileValidator - This BioModule validates fasta/fastq file formats are valid and enforces min/max read lengths. ShannonDiversity - Calculate shannon diversity as sum p(logp) SraDownload - SraDownload downloads and compresses short read archive (SRA) files to fastq.gz SraMetaDB - Makes sure that the SRAmetadb exists, downloads if it does not already exist. SraMetaData - Extract metadata via pysradb from local copy of SRAmetadb.sqlite. SrpSrrConverter - Create an SraAccList.txt file from an SRA project identifier. Stop - Stop a pipeline. TrimPrimers - Remove primers from reads, option to discard reads unless primers are attached to both forward and reverse reads.","title":"Modules"},{"location":"GENERATED/all-modules/#all-modules","text":"This is an auto-generated list of all modules with links to auto-generated module documentation. AddMetadataToPathwayTables AddMetadataToTaxaTables - Map metadata onto taxa tables using sample ID. AddPseudoCount - Add a pseudocount (+1) to each value in each taxa table. AwkFastaConverter - Convert fastq files into fasta format. BuildQiimeMapping BuildTaxaTables - Convert OTU-tables split by sample into taxa tables split by level. CompileOtuCounts DESeq2 - Determine statistically significant differences using DESeq2. Demultiplexer - Demultiplex samples into separate files for each sample. EdgeR - Determine statistically significant differences using edgeR. Email - Send an email containing the pipeline summary when the pipeline either completes or fails. ForEachFile - Like GenMod, but done for each file in a previous module's output dir. ForEachLevel - Like GenMod, but done for each taxonomic level. ForEachLoop - Like GenMod, but done for each string in a comma-separated list. ForEachSample - Like GenMod, but done for each sample listed in the metadata. GenMod - Allows user to add their own scripts into the BioLockJ pipeline. GenomeAssembly - Assemble WGS sequences with MetaSPAdes, bin contigs with Metabat2 and check quality with checkM. Gunzipper - Decompress gzipped files. HUMAnN2 - Profile the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. Humann2Parser - Build OTU tables from HumanN2 classifier module output. ImportMetadata - Read existing metadata file, or create a default one. JsonReport KneadData - Run the Biobakery KneadData program to remove contaminated DNA. Kraken2Classifier - Classify WGS samples with KRAKEN 2 . Kraken2Parser - Build OTU tables from KRAKEN mpa-format reports. KrakenClassifier - Classify WGS samples with KRAKEN. KrakenParser - Build OTU tables from KRAKEN mpa-format reports. LogTransformTaxaTables - Log-transform the raw taxa counts on Log10 or Log-e scales. MergeQiimeOtuTables Metaphlan2Classifier - Classify WGS samples with MetaPhlAn2 . Metaphlan2Parser Multiplexer - Multiplex samples into a single file, or two files (one with forward reads, one with reverse reads) if multiplexing paired reads. NormalizeByReadsPerMillion - new counts = counts / (total counts in sample / 1 million) NormalizeTaxaTables - Normalize taxa tables for sequencing depth. PearMergeReads - Run pear, the Paired-End reAd mergeR QiimeClassifier QiimeClosedRefClassifier - Pick OTUs using a closed reference database and construct an OTU table via the QIIME script pick_closed_reference_otus.py QiimeDeNovoClassifier - Run the QIIME pick_de_novo_otus.py script on all fasta sequence files QiimeOpenRefClassifier - Run the QIIME pick_open_reference_otus.py script on all fasta sequence files QiimeParser R_CalculateStats - Generate a summary statistics table with [adjusted and unadjusted] [parameteric and non-parametirc] p-values and r 2 values for each reportable metadata field and each report.taxonomyLevel configured. R_PlotEffectSize - Generate horizontal barplot representing effect size (Cohen's d, r 2 , and/or fold change) for each reportable metadata field and each report.taxonomyLevel configured. R_PlotMds - Generate sets of multidimensional scaling plots showing 2 axes at a time (up to the < r_PlotMds.numAxis >th axis) with color coding based on each categorical metadata field (default) or by each field given in r_PlotMds.reportFields R_PlotOtus - Generate OTU-metadata box-plots and scatter-plots for each reportable metadata field and each report.taxonomyLevel configured R_PlotPvalHistograms - Generate p-value histograms for each reportable metadata field and each report.taxonomyLevel configured RarefyOtuCounts - Applies a mean iterative post-OTU classification rarefication algorithm so that each output sample will have approximately the same number of OTUs. RarefySeqs - Randomly sub-sample sequences to reduce all samples to the configured maximum. RdpClassifier - Classify 16s samples with RDP . RdpHierParser - Create taxa tables from the _hierarchicalCount.tsv files output by RDP. RdpParser - Build OTU tables from RDP reports. RegisterNumReads RemoveLowOtuCounts - Removes OTUs with counts below report.minCount. RemoveLowPathwayCounts RemoveScarceOtuCounts RemoveScarcePathwayCounts Rmarkdown - Render a custom R markdown. SeqFileValidator - This BioModule validates fasta/fastq file formats are valid and enforces min/max read lengths. ShannonDiversity - Calculate shannon diversity as sum p(logp) SraDownload - SraDownload downloads and compresses short read archive (SRA) files to fastq.gz SraMetaDB - Makes sure that the SRAmetadb exists, downloads if it does not already exist. SraMetaData - Extract metadata via pysradb from local copy of SRAmetadb.sqlite. SrpSrrConverter - Create an SraAccList.txt file from an SRA project identifier. Stop - Stop a pipeline. TrimPrimers - Remove primers from reads, option to discard reads unless primers are attached to both forward and reverse reads.","title":"All Modules"},{"location":"GENERATED/biolockj-help/","text":"The biolockj help menu: biolockj --help BioLockJ v1.3.16 - UNCC Fodor Lab 2021 Usage: (bash) biolockj [options] <config|pipeline> (java) java -jar /path/to/BioLockJ.jar [options] <config|pipeline> Options: -v --version Show version -h --help Show help menu -p --precheck-only Set up pipeline and check dependencies and then STOP; do not execute the pipeline. This is helpful when testing edits to config files. -u --unused-props Check dependencies for all modules and report unused properties. Implies -p. This helps remove unnecessary properties and highlights errors in property names. -r --restart Resume an existing pipeline -c --config-override <file> New config file (if restarting a pipeline) --password <password> Encrypt password -d --docker Run in docker -a --aws Run on aws -g --gui Start the BioLockJ GUI -f --foreground Run the java process in the foreground without nohup -w --wait-for-start Do not release terminal until pipeline completes check-dependencies step. --external-modules <dir> Directory with compiled java code giving additional modules --blj Map $BLJ folder into the docker container; this replaces BioLockJ packaged in a docker container with the local copy. --verbose Equivalent to adding `pipeline.logLevel=DEBUG` to the config file. -e --env-var <var=val> Environment variables to be passed to the BioLockJ environment. Can be a comma-sep list. Values take the form: a=foo,b=bar,c=baz --blj_proj <dir> Directory that contains BioLockJ pipelines. If not supplied, biolockj will use the value of environment variable \"BLJ_PROJ\".","title":"BioLockJ help menu"},{"location":"GENERATED/biolockj.module/Stop/","text":"Stop # Add to module run order: #BioModule biolockj.module.Stop Description # Stop a pipeline. Properties # Properties are the name=value pairs in the configuration file. Stop properties: # none General properties applicable to this module: # none Details # This module immediatley stops a pipeline. This is useful when troubleshooting a pipeline, or while a pipeline is a work-in-progress. Any downstream modules will be checked in the checkDependencies phase, but will not be reached during the module execution phase. This module and the current pipeline will be flagged as biolockjFailed . To progress a pipeline past this module, remove this module from the BioModule run order, and restart the pipeline. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: STOP.imageOwner STOP.imageName STOP.imageTag Citation # Module created by Ivory Blakley","title":"Stop"},{"location":"GENERATED/biolockj.module/Stop/#stop","text":"Add to module run order: #BioModule biolockj.module.Stop","title":"Stop"},{"location":"GENERATED/biolockj.module/Stop/#description","text":"Stop a pipeline.","title":"Description"},{"location":"GENERATED/biolockj.module/Stop/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module/Stop/#stop-properties","text":"none","title":"Stop properties:"},{"location":"GENERATED/biolockj.module/Stop/#general-properties-applicable-to-this-module","text":"none","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module/Stop/#details","text":"This module immediatley stops a pipeline. This is useful when troubleshooting a pipeline, or while a pipeline is a work-in-progress. Any downstream modules will be checked in the checkDependencies phase, but will not be reached during the module execution phase. This module and the current pipeline will be flagged as biolockjFailed . To progress a pipeline past this module, remove this module from the BioModule run order, and restart the pipeline.","title":"Details"},{"location":"GENERATED/biolockj.module/Stop/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module/Stop/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: STOP.imageOwner STOP.imageName STOP.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module/Stop/#citation","text":"Module created by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/","text":"GenomeAssembly # Add to module run order: #BioModule biolockj.module.assembly.GenomeAssembly Description # Assemble WGS sequences with MetaSPAdes, bin contigs with Metabat2 and check quality with checkM. Properties # Properties are the name=value pairs in the configuration file. GenomeAssembly properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/genome_assembly:v1.2.0 This can be modified using the following properties: GenomeAssembly.imageOwner GenomeAssembly.imageName GenomeAssembly.imageTag Citation # checkM database https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz HMMER http://eddylab.org/software/hmmer/hmmer.tar.gz MASH https://github.com/marbl/Mash/releases/download/v2.2/mash-Linux64-v2.2.tar Metabat2 https://bitbucket.org/berkeleylab/metabat/downloads/metabat-static-binary-linux-x64_v2.12.1.tar.gz MetaSPAdes https://github.com/ablab/spades/releases/download/v3.13.0/SPAdes-3.13.0-Linux.tar.gz PPLACER https://github.com/matsen/pplacer/releases/download/v1.1.alpha19/pplacer-linux-v1.1.alpha19.zip PRODIGAL https://github.com/hyattpd/Prodigal/releases/download/v2.6.3/prodigal.linux Module developed by Shan Sun BioLockJ v1.3.16","title":"GenomeAssembly"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#genomeassembly","text":"Add to module run order: #BioModule biolockj.module.assembly.GenomeAssembly","title":"GenomeAssembly"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#description","text":"Assemble WGS sequences with MetaSPAdes, bin contigs with Metabat2 and check quality with checkM.","title":"Description"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#genomeassembly-properties","text":"none","title":"GenomeAssembly properties:"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/genome_assembly:v1.2.0 This can be modified using the following properties: GenomeAssembly.imageOwner GenomeAssembly.imageName GenomeAssembly.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.assembly/GenomeAssembly/#citation","text":"checkM database https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz HMMER http://eddylab.org/software/hmmer/hmmer.tar.gz MASH https://github.com/marbl/Mash/releases/download/v2.2/mash-Linux64-v2.2.tar Metabat2 https://bitbucket.org/berkeleylab/metabat/downloads/metabat-static-binary-linux-x64_v2.12.1.tar.gz MetaSPAdes https://github.com/ablab/spades/releases/download/v3.13.0/SPAdes-3.13.0-Linux.tar.gz PPLACER https://github.com/matsen/pplacer/releases/download/v1.1.alpha19/pplacer-linux-v1.1.alpha19.zip PRODIGAL https://github.com/hyattpd/Prodigal/releases/download/v2.6.3/prodigal.linux Module developed by Shan Sun BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/","text":"QiimeClosedRefClassifier # Add to module run order: #BioModule biolockj.module.classifier.r16s.QiimeClosedRefClassifier Description # Pick OTUs using a closed reference database and construct an OTU table via the QIIME script pick_closed_reference_otus.py Properties # Properties are the name=value pairs in the configuration file. QiimeClosedRefClassifier properties: # Property Description exe.vsearch executable Path for the \"vsearch\" executable; if not supplied, any script that needs the vsearch command will assume it is on the PATH. default: null qiime.params list Parameters for qiime default: null qiime.pynastAlignDB file path path to define ~/.qiime_config pynast_template_alignment_fp default: null qiime.refSeqDB file path path to define ~/.qiime_config pick_otus_reference_seqs_fp and assign_taxonomy_reference_seqs_fp default: null qiime.removeChimeras boolean if vsearch is needed for chimera removal default: Y qiime.taxaDB file path path to define ~/.qiime_config assign_taxonomy_id_to_taxonomy_fp default: null qiime.vsearchParams list Parameters for vsearch default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.awk executable Path for the \"awk\" executable; if not supplied, any script that needs the awk command will assume it is on the PATH. default: null pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This module picks OTUs using a closed reference database and constructs an OTU table via the QIIME script pick_closed_reference_otus.py . Taxonomy is assigned using a pre-defined taxonomy map of reference sequence OTU to taxonomy. This is the fastest OTU picking method since samples can be processed in parallel batches. Before the QIIME script is run, batches are prepared in the temp directory, with each batch directory containing a fasta directory with script.batchSize fasta files and a QIIME mapping file, created with awk, called batchMapping.tsv for the batch of samples. Inherits from QiimeClassifier . Adds modules # pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.qiime.QiimeClassifier biolockj.module.implicit.parser.r16s.QiimeParser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeClosedRefClassifier.imageOwner QiimeClosedRefClassifier.imageName QiimeClosedRefClassifier.imageTag Citation # QIIME allows analysis of high-throughput community sequencing data J Gregory Caporaso, Justin Kuczynski, Jesse Stombaugh, Kyle Bittinger, Frederic D Bushman, Elizabeth K Costello, Noah Fierer, Antonio Gonzalez Pena, Julia K Goodrich, Jeffrey I Gordon, Gavin A Huttley, Scott T Kelley, Dan Knights, Jeremy E Koenig, Ruth E Ley, Catherine A Lozupone, Daniel McDonald, Brian D Muegge, Meg Pirrung, Jens Reeder, Joel R Sevinsky, Peter J Turnbaugh, William A Walters, Jeremy Widmann, Tanya Yatsunenko, Jesse Zaneveld and Rob Knight; Nature Methods, 2010; doi:10.1038/nmeth.f.303 (needs further citation) http://www.wernerlab.org/software/macqiime/citations","title":"QiimeClosedRefClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#qiimeclosedrefclassifier","text":"Add to module run order: #BioModule biolockj.module.classifier.r16s.QiimeClosedRefClassifier","title":"QiimeClosedRefClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#description","text":"Pick OTUs using a closed reference database and construct an OTU table via the QIIME script pick_closed_reference_otus.py","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#qiimeclosedrefclassifier-properties","text":"Property Description exe.vsearch executable Path for the \"vsearch\" executable; if not supplied, any script that needs the vsearch command will assume it is on the PATH. default: null qiime.params list Parameters for qiime default: null qiime.pynastAlignDB file path path to define ~/.qiime_config pynast_template_alignment_fp default: null qiime.refSeqDB file path path to define ~/.qiime_config pick_otus_reference_seqs_fp and assign_taxonomy_reference_seqs_fp default: null qiime.removeChimeras boolean if vsearch is needed for chimera removal default: Y qiime.taxaDB file path path to define ~/.qiime_config assign_taxonomy_id_to_taxonomy_fp default: null qiime.vsearchParams list Parameters for vsearch default: null","title":"QiimeClosedRefClassifier properties:"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.awk executable Path for the \"awk\" executable; if not supplied, any script that needs the awk command will assume it is on the PATH. default: null pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#details","text":"This module picks OTUs using a closed reference database and constructs an OTU table via the QIIME script pick_closed_reference_otus.py . Taxonomy is assigned using a pre-defined taxonomy map of reference sequence OTU to taxonomy. This is the fastest OTU picking method since samples can be processed in parallel batches. Before the QIIME script is run, batches are prepared in the temp directory, with each batch directory containing a fasta directory with script.batchSize fasta files and a QIIME mapping file, created with awk, called batchMapping.tsv for the batch of samples. Inherits from QiimeClassifier .","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.qiime.QiimeClassifier biolockj.module.implicit.parser.r16s.QiimeParser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeClosedRefClassifier.imageOwner QiimeClosedRefClassifier.imageName QiimeClosedRefClassifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeClosedRefClassifier/#citation","text":"QIIME allows analysis of high-throughput community sequencing data J Gregory Caporaso, Justin Kuczynski, Jesse Stombaugh, Kyle Bittinger, Frederic D Bushman, Elizabeth K Costello, Noah Fierer, Antonio Gonzalez Pena, Julia K Goodrich, Jeffrey I Gordon, Gavin A Huttley, Scott T Kelley, Dan Knights, Jeremy E Koenig, Ruth E Ley, Catherine A Lozupone, Daniel McDonald, Brian D Muegge, Meg Pirrung, Jens Reeder, Joel R Sevinsky, Peter J Turnbaugh, William A Walters, Jeremy Widmann, Tanya Yatsunenko, Jesse Zaneveld and Rob Knight; Nature Methods, 2010; doi:10.1038/nmeth.f.303 (needs further citation) http://www.wernerlab.org/software/macqiime/citations","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/","text":"QiimeDeNovoClassifier # Add to module run order: #BioModule biolockj.module.classifier.r16s.QiimeDeNovoClassifier Description # Run the QIIME pick_de_novo_otus.py script on all fasta sequence files Properties # Properties are the name=value pairs in the configuration file. QiimeDeNovoClassifier properties: # Property Description exe.vsearch executable Path for the \"vsearch\" executable; if not supplied, any script that needs the vsearch command will assume it is on the PATH. default: null qiime.params list Parameters for qiime default: null qiime.pynastAlignDB file path path to define ~/.qiime_config pynast_template_alignment_fp default: null qiime.refSeqDB file path path to define ~/.qiime_config pick_otus_reference_seqs_fp and assign_taxonomy_reference_seqs_fp default: null qiime.removeChimeras boolean if vsearch is needed for chimera removal default: Y qiime.taxaDB file path path to define ~/.qiime_config assign_taxonomy_id_to_taxonomy_fp default: null qiime.vsearchParams list Parameters for vsearch default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This module runs the QIIME pick_de_novo_otus.py script on all fasta sequence files in a single script since OTUs are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier . Adds modules # pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.qiime.QiimeClassifier biolockj.module.implicit.parser.r16s.QiimeParser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeDeNovoClassifier.imageOwner QiimeDeNovoClassifier.imageName QiimeDeNovoClassifier.imageTag Citation # QIIME allows analysis of high-throughput community sequencing data J Gregory Caporaso, Justin Kuczynski, Jesse Stombaugh, Kyle Bittinger, Frederic D Bushman, Elizabeth K Costello, Noah Fierer, Antonio Gonzalez Pena, Julia K Goodrich, Jeffrey I Gordon, Gavin A Huttley, Scott T Kelley, Dan Knights, Jeremy E Koenig, Ruth E Ley, Catherine A Lozupone, Daniel McDonald, Brian D Muegge, Meg Pirrung, Jens Reeder, Joel R Sevinsky, Peter J Turnbaugh, William A Walters, Jeremy Widmann, Tanya Yatsunenko, Jesse Zaneveld and Rob Knight; Nature Methods, 2010; doi:10.1038/nmeth.f.303 (needs further citation) http://www.wernerlab.org/software/macqiime/citations","title":"QiimeDeNovoClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#qiimedenovoclassifier","text":"Add to module run order: #BioModule biolockj.module.classifier.r16s.QiimeDeNovoClassifier","title":"QiimeDeNovoClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#description","text":"Run the QIIME pick_de_novo_otus.py script on all fasta sequence files","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#qiimedenovoclassifier-properties","text":"Property Description exe.vsearch executable Path for the \"vsearch\" executable; if not supplied, any script that needs the vsearch command will assume it is on the PATH. default: null qiime.params list Parameters for qiime default: null qiime.pynastAlignDB file path path to define ~/.qiime_config pynast_template_alignment_fp default: null qiime.refSeqDB file path path to define ~/.qiime_config pick_otus_reference_seqs_fp and assign_taxonomy_reference_seqs_fp default: null qiime.removeChimeras boolean if vsearch is needed for chimera removal default: Y qiime.taxaDB file path path to define ~/.qiime_config assign_taxonomy_id_to_taxonomy_fp default: null qiime.vsearchParams list Parameters for vsearch default: null","title":"QiimeDeNovoClassifier properties:"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#details","text":"This module runs the QIIME pick_de_novo_otus.py script on all fasta sequence files in a single script since OTUs are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier .","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.qiime.QiimeClassifier biolockj.module.implicit.parser.r16s.QiimeParser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeDeNovoClassifier.imageOwner QiimeDeNovoClassifier.imageName QiimeDeNovoClassifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeDeNovoClassifier/#citation","text":"QIIME allows analysis of high-throughput community sequencing data J Gregory Caporaso, Justin Kuczynski, Jesse Stombaugh, Kyle Bittinger, Frederic D Bushman, Elizabeth K Costello, Noah Fierer, Antonio Gonzalez Pena, Julia K Goodrich, Jeffrey I Gordon, Gavin A Huttley, Scott T Kelley, Dan Knights, Jeremy E Koenig, Ruth E Ley, Catherine A Lozupone, Daniel McDonald, Brian D Muegge, Meg Pirrung, Jens Reeder, Joel R Sevinsky, Peter J Turnbaugh, William A Walters, Jeremy Widmann, Tanya Yatsunenko, Jesse Zaneveld and Rob Knight; Nature Methods, 2010; doi:10.1038/nmeth.f.303 (needs further citation) http://www.wernerlab.org/software/macqiime/citations","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/","text":"QiimeOpenRefClassifier # Add to module run order: #BioModule biolockj.module.classifier.r16s.QiimeOpenRefClassifier Description # Run the QIIME pick_open_reference_otus.py script on all fasta sequence files Properties # Properties are the name=value pairs in the configuration file. QiimeOpenRefClassifier properties: # Property Description exe.vsearch executable Path for the \"vsearch\" executable; if not supplied, any script that needs the vsearch command will assume it is on the PATH. default: null qiime.params list Parameters for qiime default: null qiime.pynastAlignDB file path path to define ~/.qiime_config pynast_template_alignment_fp default: null qiime.refSeqDB file path path to define ~/.qiime_config pick_otus_reference_seqs_fp and assign_taxonomy_reference_seqs_fp default: null qiime.removeChimeras boolean if vsearch is needed for chimera removal default: Y qiime.taxaDB file path path to define ~/.qiime_config assign_taxonomy_id_to_taxonomy_fp default: null qiime.vsearchParams list Parameters for vsearch default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This module runs the QIIME pick_open_reference_otus.py script on all fasta sequence files in a single script since clusters not identified in the reference database are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier . Adds modules # pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.qiime.QiimeClassifier biolockj.module.implicit.parser.r16s.QiimeParser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeOpenRefClassifier.imageOwner QiimeOpenRefClassifier.imageName QiimeOpenRefClassifier.imageTag Citation # QIIME allows analysis of high-throughput community sequencing data J Gregory Caporaso, Justin Kuczynski, Jesse Stombaugh, Kyle Bittinger, Frederic D Bushman, Elizabeth K Costello, Noah Fierer, Antonio Gonzalez Pena, Julia K Goodrich, Jeffrey I Gordon, Gavin A Huttley, Scott T Kelley, Dan Knights, Jeremy E Koenig, Ruth E Ley, Catherine A Lozupone, Daniel McDonald, Brian D Muegge, Meg Pirrung, Jens Reeder, Joel R Sevinsky, Peter J Turnbaugh, William A Walters, Jeremy Widmann, Tanya Yatsunenko, Jesse Zaneveld and Rob Knight; Nature Methods, 2010; doi:10.1038/nmeth.f.303 (needs further citation) http://www.wernerlab.org/software/macqiime/citations","title":"QiimeOpenRefClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#qiimeopenrefclassifier","text":"Add to module run order: #BioModule biolockj.module.classifier.r16s.QiimeOpenRefClassifier","title":"QiimeOpenRefClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#description","text":"Run the QIIME pick_open_reference_otus.py script on all fasta sequence files","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#qiimeopenrefclassifier-properties","text":"Property Description exe.vsearch executable Path for the \"vsearch\" executable; if not supplied, any script that needs the vsearch command will assume it is on the PATH. default: null qiime.params list Parameters for qiime default: null qiime.pynastAlignDB file path path to define ~/.qiime_config pynast_template_alignment_fp default: null qiime.refSeqDB file path path to define ~/.qiime_config pick_otus_reference_seqs_fp and assign_taxonomy_reference_seqs_fp default: null qiime.removeChimeras boolean if vsearch is needed for chimera removal default: Y qiime.taxaDB file path path to define ~/.qiime_config assign_taxonomy_id_to_taxonomy_fp default: null qiime.vsearchParams list Parameters for vsearch default: null","title":"QiimeOpenRefClassifier properties:"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultFastaConverter string Java class name for default module used to convert files into fasta format default: biolockj.module.seq.AwkFastaConverter pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#details","text":"This module runs the QIIME pick_open_reference_otus.py script on all fasta sequence files in a single script since clusters not identified in the reference database are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier .","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.qiime.QiimeClassifier biolockj.module.implicit.parser.r16s.QiimeParser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeOpenRefClassifier.imageOwner QiimeOpenRefClassifier.imageName QiimeOpenRefClassifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.r16s/QiimeOpenRefClassifier/#citation","text":"QIIME allows analysis of high-throughput community sequencing data J Gregory Caporaso, Justin Kuczynski, Jesse Stombaugh, Kyle Bittinger, Frederic D Bushman, Elizabeth K Costello, Noah Fierer, Antonio Gonzalez Pena, Julia K Goodrich, Jeffrey I Gordon, Gavin A Huttley, Scott T Kelley, Dan Knights, Jeremy E Koenig, Ruth E Ley, Catherine A Lozupone, Daniel McDonald, Brian D Muegge, Meg Pirrung, Jens Reeder, Joel R Sevinsky, Peter J Turnbaugh, William A Walters, Jeremy Widmann, Tanya Yatsunenko, Jesse Zaneveld and Rob Knight; Nature Methods, 2010; doi:10.1038/nmeth.f.303 (needs further citation) http://www.wernerlab.org/software/macqiime/citations","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/","text":"RdpClassifier # Add to module run order: #BioModule biolockj.module.classifier.r16s.RdpClassifier Description # Classify 16s samples with RDP . Properties # Properties are the name=value pairs in the configuration file. RdpClassifier properties: # Property Description rdp.containerJar string File path for RDP java executable JAR in docker. default: /app/classifier.jar rdp.db file path File path used to define an alternate RDP database default: null rdp.hierCounts boolean Generate TaxaTables using the RDP --hier_outfile option; uses the RdpHierParser instead of the standard RdpParser module. default: null rdp.jar file path File path for RDP java executable JAR default: null rdp.javaParams list the parameters to java when running rdp. default: null rdp.minThresholdScore numeric IFF rdp.hierCounts=Y, RdpClassifier will ignore OTU assignments below this threshold score (0-100) default: 80 rdp.params list parameters to use when running rdp. (must include \"-f fixrank\") default: -f fixrank General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.r16s.RdpParser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/rdp_classifier:v1.3.16 This can be modified using the following properties: RdpClassifier.imageOwner RdpClassifier.imageName RdpClassifier.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"RdpClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#rdpclassifier","text":"Add to module run order: #BioModule biolockj.module.classifier.r16s.RdpClassifier","title":"RdpClassifier"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#description","text":"Classify 16s samples with RDP .","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#rdpclassifier-properties","text":"Property Description rdp.containerJar string File path for RDP java executable JAR in docker. default: /app/classifier.jar rdp.db file path File path used to define an alternate RDP database default: null rdp.hierCounts boolean Generate TaxaTables using the RDP --hier_outfile option; uses the RdpHierParser instead of the standard RdpParser module. default: null rdp.jar file path File path for RDP java executable JAR default: null rdp.javaParams list the parameters to java when running rdp. default: null rdp.minThresholdScore numeric IFF rdp.hierCounts=Y, RdpClassifier will ignore OTU assignments below this threshold score (0-100) default: 80 rdp.params list parameters to use when running rdp. (must include \"-f fixrank\") default: -f fixrank","title":"RdpClassifier properties:"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#adds-modules","text":"pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.r16s.RdpParser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/rdp_classifier:v1.3.16 This can be modified using the following properties: RdpClassifier.imageOwner RdpClassifier.imageName RdpClassifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.r16s/RdpClassifier/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/","text":"HUMAnN2 # Add to module run order: #BioModule biolockj.module.classifier.wgs.Humann2Classifier Description # Profile the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. Properties # Properties are the name=value pairs in the configuration file. HUMAnN2 properties: # Property Description exe.humann2 executable Path for the \"humann2\" executable; if not supplied, any script that needs the humann2 command will assume it is on the PATH. default: null humann2.humann2JoinTableParams list The parameters to be used with humann2_join_tables default: null humann2.humann2Params list The humann2 executable params default: null humann2.humann2RenormTableParams list The parameters to use with humann2_renorm_table default: null humann2.nuclDB file path Directory containing the nucleotide database default: null humann2.protDB file path Directory containing the protein nucleotide database default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.Humann2Parser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/humann2_classifier:v1.3.16 This can be modified using the following properties: Humann2Classifier.imageOwner Humann2Classifier.imageName Humann2Classifier.imageTag Citation # Franzosa EA , McIver LJ , Rahnavard G, Thompson LR, Schirmer M, Weingart G, Schwarzberg Lipson K, Knight R, Caporaso JG, Segata N, Huttenhower C. Species-level functional profiling of metagenomes and metatranscriptomes. Nat Methods 15: 962-968 (2018). http://huttenhower.sph.harvard.edu/humann2 BioLockJ module developed by Mike Siota","title":"HUMAnN2"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#humann2","text":"Add to module run order: #BioModule biolockj.module.classifier.wgs.Humann2Classifier","title":"HUMAnN2"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#description","text":"Profile the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data.","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#humann2-properties","text":"Property Description exe.humann2 executable Path for the \"humann2\" executable; if not supplied, any script that needs the humann2 command will assume it is on the PATH. default: null humann2.humann2JoinTableParams list The parameters to be used with humann2_join_tables default: null humann2.humann2Params list The humann2 executable params default: null humann2.humann2RenormTableParams list The parameters to use with humann2_renorm_table default: null humann2.nuclDB file path Directory containing the nucleotide database default: null humann2.protDB file path Directory containing the protein nucleotide database default: null","title":"HUMAnN2 properties:"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#adds-modules","text":"pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.Humann2Parser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/humann2_classifier:v1.3.16 This can be modified using the following properties: Humann2Classifier.imageOwner Humann2Classifier.imageName Humann2Classifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.wgs/Humann2Classifier/#citation","text":"Franzosa EA , McIver LJ , Rahnavard G, Thompson LR, Schirmer M, Weingart G, Schwarzberg Lipson K, Knight R, Caporaso JG, Segata N, Huttenhower C. Species-level functional profiling of metagenomes and metatranscriptomes. Nat Methods 15: 962-968 (2018). http://huttenhower.sph.harvard.edu/humann2 BioLockJ module developed by Mike Siota","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/","text":"Kraken2Classifier # Add to module run order: #BioModule biolockj.module.classifier.wgs.Kraken2Classifier Description # Classify WGS samples with KRAKEN 2 . Properties # Properties are the name=value pairs in the configuration file. Kraken2Classifier properties: # Property Description exe.kraken2 executable Path for the \"kraken2\" executable; if not supplied, any script that needs the kraken2 command will assume it is on the PATH. default: null kraken2.db file path file path to Kraken2 kmer database directory default: null kraken2.kraken2Params list additional parameters to use with kraken2 default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.Kraken2Parser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/kraken2_classifier:v1.3.16 This can be modified using the following properties: Kraken2Classifier.imageOwner Kraken2Classifier.imageName Kraken2Classifier.imageTag Citation # Improved metagenomic analysis with Kraken 2 Derrick E. Wood, Jennifer Lu, Ben Langmead bioRxiv 762302; doi: https://doi.org/10.1101/762302","title":"Kraken2Classifier"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#kraken2classifier","text":"Add to module run order: #BioModule biolockj.module.classifier.wgs.Kraken2Classifier","title":"Kraken2Classifier"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#description","text":"Classify WGS samples with KRAKEN 2 .","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#kraken2classifier-properties","text":"Property Description exe.kraken2 executable Path for the \"kraken2\" executable; if not supplied, any script that needs the kraken2 command will assume it is on the PATH. default: null kraken2.db file path file path to Kraken2 kmer database directory default: null kraken2.kraken2Params list additional parameters to use with kraken2 default: null","title":"Kraken2Classifier properties:"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#adds-modules","text":"pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.Kraken2Parser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/kraken2_classifier:v1.3.16 This can be modified using the following properties: Kraken2Classifier.imageOwner Kraken2Classifier.imageName Kraken2Classifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.wgs/Kraken2Classifier/#citation","text":"Improved metagenomic analysis with Kraken 2 Derrick E. Wood, Jennifer Lu, Ben Langmead bioRxiv 762302; doi: https://doi.org/10.1101/762302","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/","text":"KrakenClassifier # Add to module run order: #BioModule biolockj.module.classifier.wgs.KrakenClassifier Description # Classify WGS samples with KRAKEN. Properties # Properties are the name=value pairs in the configuration file. KrakenClassifier properties: # Property Description exe.kraken executable Path for the \"kraken\" executable; if not supplied, any script that needs the kraken command will assume it is on the PATH. default: null kraken.db file path file path to Kraken kmer database directory default: null kraken.krakenParams list additional parameters to use with kraken default: --only-classified-output, --preload General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # Classify WGS samples with KRAKEN . If running in docker, the default docker container contains a kmer database which will be used if no database is supplied through the kraken.db property. Adds modules # pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.KrakenParser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/kraken_classifier:v1.3.16 This can be modified using the following properties: KrakenClassifier.imageOwner KrakenClassifier.imageName KrakenClassifier.imageTag Citation # Wood DE, Salzberg SL: Kraken: ultrafast metagenomic sequence classification using exact alignments. Genome Biology 2014, 15:R46.","title":"KrakenClassifier"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#krakenclassifier","text":"Add to module run order: #BioModule biolockj.module.classifier.wgs.KrakenClassifier","title":"KrakenClassifier"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#description","text":"Classify WGS samples with KRAKEN.","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#krakenclassifier-properties","text":"Property Description exe.kraken executable Path for the \"kraken\" executable; if not supplied, any script that needs the kraken command will assume it is on the PATH. default: null kraken.db file path file path to Kraken kmer database directory default: null kraken.krakenParams list additional parameters to use with kraken default: --only-classified-output, --preload","title":"KrakenClassifier properties:"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#details","text":"Classify WGS samples with KRAKEN . If running in docker, the default docker container contains a kmer database which will be used if no database is supplied through the kraken.db property.","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#adds-modules","text":"pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.KrakenParser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/kraken_classifier:v1.3.16 This can be modified using the following properties: KrakenClassifier.imageOwner KrakenClassifier.imageName KrakenClassifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.wgs/KrakenClassifier/#citation","text":"Wood DE, Salzberg SL: Kraken: ultrafast metagenomic sequence classification using exact alignments. Genome Biology 2014, 15:R46.","title":"Citation"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/","text":"Metaphlan2Classifier # Add to module run order: #BioModule biolockj.module.classifier.wgs.Metaphlan2Classifier Description # Classify WGS samples with MetaPhlAn2 . Properties # Properties are the name=value pairs in the configuration file. Metaphlan2Classifier properties: # Property Description exe.metaphlan2 executable Path for the \"metaphlan2\" executable; if not supplied, any script that needs the metaphlan2 command will assume it is on the PATH. default: null metaphlan2.db file path Directory containing alternate database. Must always be paired with metaphlan2.mpa_pkl default: null metaphlan2.metaphlan2Params list additional parameters to use with metaphlan2 default: null metaphlan2.mpa_pkl file path path to the mpa_pkl file used to reference an alternate DB. Must always be paired with metaphlan2.db default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.Metaphlan2Parser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/metaphlan2_classifier:v1.3.16 This can be modified using the following properties: Metaphlan2Classifier.imageOwner Metaphlan2Classifier.imageName Metaphlan2Classifier.imageTag Citation # MetaPhlAn2 for enhanced metagenomic taxonomic profiling. Duy Tin Truong, Eric A Franzosa, Timothy L Tickle, Matthias Scholz, George Weingart, Edoardo Pasolli, Adrian Tett, Curtis Huttenhower & Nicola Segata. Nature Methods 12, 902-903 (2015)","title":"Metaphlan2Classifier"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#metaphlan2classifier","text":"Add to module run order: #BioModule biolockj.module.classifier.wgs.Metaphlan2Classifier","title":"Metaphlan2Classifier"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#description","text":"Classify WGS samples with MetaPhlAn2 .","title":"Description"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#metaphlan2classifier-properties","text":"Property Description exe.metaphlan2 executable Path for the \"metaphlan2\" executable; if not supplied, any script that needs the metaphlan2 command will assume it is on the PATH. default: null metaphlan2.db file path Directory containing alternate database. Must always be paired with metaphlan2.mpa_pkl default: null metaphlan2.metaphlan2Params list additional parameters to use with metaphlan2 default: null metaphlan2.mpa_pkl file path path to the mpa_pkl file used to reference an alternate DB. Must always be paired with metaphlan2.db default: null","title":"Metaphlan2Classifier properties:"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#adds-modules","text":"pre-requisite modules none found post-requisite modules biolockj.module.implicit.parser.wgs.Metaphlan2Parser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/metaphlan2_classifier:v1.3.16 This can be modified using the following properties: Metaphlan2Classifier.imageOwner Metaphlan2Classifier.imageName Metaphlan2Classifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.classifier.wgs/Metaphlan2Classifier/#citation","text":"MetaPhlAn2 for enhanced metagenomic taxonomic profiling. Duy Tin Truong, Eric A Franzosa, Timothy L Tickle, Matthias Scholz, George Weingart, Edoardo Pasolli, Adrian Tett, Curtis Huttenhower & Nicola Segata. Nature Methods 12, 902-903 (2015)","title":"Citation"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/","text":"ShannonDiversity # Add to module run order: #BioModule biolockj.module.diversity.ShannonDiversity Description # Calculate shannon diversity as sum p(logp) Properties # Properties are the name=value pairs in the configuration file. ShannonDiversity properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: ShannonDiversity.imageOwner ShannonDiversity.imageName ShannonDiversity.imageTag Citation # Module developed by Anthony Fodor BioLockJ v1.3.16","title":"ShannonDiversity"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#shannondiversity","text":"Add to module run order: #BioModule biolockj.module.diversity.ShannonDiversity","title":"ShannonDiversity"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#description","text":"Calculate shannon diversity as sum p(logp)","title":"Description"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#shannondiversity-properties","text":"none","title":"ShannonDiversity properties:"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#details","text":"If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite.","title":"Details"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: ShannonDiversity.imageOwner ShannonDiversity.imageName ShannonDiversity.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.diversity/ShannonDiversity/#citation","text":"Module developed by Anthony Fodor BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/","text":"ForEachFile # Add to module run order: #BioModule biolockj.module.diy.ForEachFile Description # Like GenMod, but done for each file in a previous module's output dir. Properties # Properties are the name=value pairs in the configuration file. ForEachFile properties: # Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This is an extention of the GenMod module. The given script is run for each file in the previous modules output dir. If there is no previous module, then the input files are used.The user script is run using a command: [launcher] <script> <file path> [params] Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachFile.imageOwner ForEachFile.imageName ForEachFile.imageTag Citation # BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"ForEachFile"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#foreachfile","text":"Add to module run order: #BioModule biolockj.module.diy.ForEachFile","title":"ForEachFile"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#description","text":"Like GenMod, but done for each file in a previous module's output dir.","title":"Description"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#foreachfile-properties","text":"Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null","title":"ForEachFile properties:"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#details","text":"This is an extention of the GenMod module. The given script is run for each file in the previous modules output dir. If there is no previous module, then the input files are used.The user script is run using a command: [launcher] <script> <file path> [params]","title":"Details"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachFile.imageOwner ForEachFile.imageName ForEachFile.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.diy/ForEachFile/#citation","text":"BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/","text":"ForEachLevel # Add to module run order: #BioModule biolockj.module.diy.ForEachLevel Description # Like GenMod, but done for each taxonomic level. Properties # Properties are the name=value pairs in the configuration file. ForEachLevel properties: # Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels -> Used as the looping mechanism for this module. default: phylum,class,order,family,genus General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels -> Used as the looping mechanism for this module. default: phylum,class,order,family,genus script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This is an extention of the GenMod module. This module runs the specified script for each of the configured taxonomic levels, see report.taxonomyLevels under (General Properties)[GENERATED/General-Properties/#report].The user script is run using a command: [launcher] <script> <level> [params] Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachLevel.imageOwner ForEachLevel.imageName ForEachLevel.imageTag Citation # BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"ForEachLevel"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#foreachlevel","text":"Add to module run order: #BioModule biolockj.module.diy.ForEachLevel","title":"ForEachLevel"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#description","text":"Like GenMod, but done for each taxonomic level.","title":"Description"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#foreachlevel-properties","text":"Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels -> Used as the looping mechanism for this module. default: phylum,class,order,family,genus","title":"ForEachLevel properties:"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels -> Used as the looping mechanism for this module. default: phylum,class,order,family,genus script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#details","text":"This is an extention of the GenMod module. This module runs the specified script for each of the configured taxonomic levels, see report.taxonomyLevels under (General Properties)[GENERATED/General-Properties/#report].The user script is run using a command: [launcher] <script> <level> [params]","title":"Details"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachLevel.imageOwner ForEachLevel.imageName ForEachLevel.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.diy/ForEachLevel/#citation","text":"BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/","text":"ForEachLoop # Add to module run order: #BioModule biolockj.module.diy.ForEachLoop Description # Like GenMod, but done for each string in a comma-separated list. Properties # Properties are the name=value pairs in the configuration file. ForEachLoop properties: # Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.loopBy list List used as the looping mechanism for this module. default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This is an extention of the GenMod module. The given script is run for each element given in the comma-separated list genMod.loopBy .The user script is run using a command: [launcher] <script> <loop-element> [param] Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachLoop.imageOwner ForEachLoop.imageName ForEachLoop.imageTag Citation # BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"ForEachLoop"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#foreachloop","text":"Add to module run order: #BioModule biolockj.module.diy.ForEachLoop","title":"ForEachLoop"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#description","text":"Like GenMod, but done for each string in a comma-separated list.","title":"Description"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#foreachloop-properties","text":"Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.loopBy list List used as the looping mechanism for this module. default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null","title":"ForEachLoop properties:"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#details","text":"This is an extention of the GenMod module. The given script is run for each element given in the comma-separated list genMod.loopBy .The user script is run using a command: [launcher] <script> <loop-element> [param]","title":"Details"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachLoop.imageOwner ForEachLoop.imageName ForEachLoop.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.diy/ForEachLoop/#citation","text":"BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/","text":"ForEachSample # Add to module run order: #BioModule biolockj.module.diy.ForEachSample Description # Like GenMod, but done for each sample listed in the metadata. Properties # Properties are the name=value pairs in the configuration file. ForEachSample properties: # Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> The row names of the metadata are used as the looping mechanism for this module. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> The row names of the metadata are used as the looping mechanism for this module. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This is an extention of the GenMod module. For the purpose of this module, a sample is defined as a row of the metadata file.The user script is run using a command: [launcher] <script> <sample> [params] Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachSample.imageOwner ForEachSample.imageName ForEachSample.imageTag Citation # BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"ForEachSample"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#foreachsample","text":"Add to module run order: #BioModule biolockj.module.diy.ForEachSample","title":"ForEachSample"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#description","text":"Like GenMod, but done for each sample listed in the metadata.","title":"Description"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#foreachsample-properties","text":"Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> The row names of the metadata are used as the looping mechanism for this module. default: null","title":"ForEachSample properties:"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> The row names of the metadata are used as the looping mechanism for this module. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#details","text":"This is an extention of the GenMod module. For the purpose of this module, a sample is defined as a row of the metadata file.The user script is run using a command: [launcher] <script> <sample> [params]","title":"Details"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: ForEachSample.imageOwner ForEachSample.imageName ForEachSample.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.diy/ForEachSample/#citation","text":"BioLockJ v1.3.16 Module developed by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.diy/GenMod/","text":"GenMod # Add to module run order: #BioModule biolockj.module.diy.GenMod Description # Allows user to add their own scripts into the BioLockJ pipeline. Properties # Properties are the name=value pairs in the configuration file. GenMod properties: # Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # The specified script is executed using the modules script directory as the current working directory. A scriptPath is required. If specified, the launcher program (ie R, Python) will be used. If specified, any param will be listed as arguments to the script. If running in docker, dockerContainerName is required. This is ideal for: Custom analysis for a given pipeline, such as an R or python script Any steps where an appropriate BioLockJ module does not exist Any step in your analysis process that might otherwise have to be done manually can be stored as a custom script so that the entire process is as reproducible as possible. It is STRONGLY encouraged that users write scripts using common module conventions: use relative file paths (starting with . or .. ) put all generated output in the modules output directory ( ../output ) put any temporary files in the modules temp directory ( ../tmep ). the main pipeline directory would be ../.. , and the output of a previous module such as PearMergedReads would be in ../../*_PearMergedReads/output To use the GenMod module multiple times in a single pipeline, use the AS keyword to direct properties to the correct instance of the module. For example: #BioModule biolockj.module.diy.GenMod AS Part1 #<other modules> #BioModule biolockj.module.diy.GenMod AS Part2 Part1.launcher=python Part1.script=path/to/first/script.py Part2.script=path/to/bash/script/doLast.sh With this, script.py will be run using python. Then other modules will run. Then doLast.sh will be run using the default system (probably bash, unless it has a shebang line specifiying something else). Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: GenMod.imageOwner GenMod.imageName GenMod.imageTag Citation # BioLockJ v1.3.16","title":"GenMod"},{"location":"GENERATED/biolockj.module.diy/GenMod/#genmod","text":"Add to module run order: #BioModule biolockj.module.diy.GenMod","title":"GenMod"},{"location":"GENERATED/biolockj.module.diy/GenMod/#description","text":"Allows user to add their own scripts into the BioLockJ pipeline.","title":"Description"},{"location":"GENERATED/biolockj.module.diy/GenMod/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.diy/GenMod/#genmod-properties","text":"Property Description genMod.launcher string Define executable language command if it is not included in your $PATH default: null genMod.param string parameters to pass to the user's script default: null genMod.resources list of file paths path to one or more files to be copied to the module resource folder. default: null genMod.scriptPath file path path to user script default: null","title":"GenMod properties:"},{"location":"GENERATED/biolockj.module.diy/GenMod/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.diy/GenMod/#details","text":"The specified script is executed using the modules script directory as the current working directory. A scriptPath is required. If specified, the launcher program (ie R, Python) will be used. If specified, any param will be listed as arguments to the script. If running in docker, dockerContainerName is required. This is ideal for: Custom analysis for a given pipeline, such as an R or python script Any steps where an appropriate BioLockJ module does not exist Any step in your analysis process that might otherwise have to be done manually can be stored as a custom script so that the entire process is as reproducible as possible. It is STRONGLY encouraged that users write scripts using common module conventions: use relative file paths (starting with . or .. ) put all generated output in the modules output directory ( ../output ) put any temporary files in the modules temp directory ( ../tmep ). the main pipeline directory would be ../.. , and the output of a previous module such as PearMergedReads would be in ../../*_PearMergedReads/output To use the GenMod module multiple times in a single pipeline, use the AS keyword to direct properties to the correct instance of the module. For example: #BioModule biolockj.module.diy.GenMod AS Part1 #<other modules> #BioModule biolockj.module.diy.GenMod AS Part2 Part1.launcher=python Part1.script=path/to/first/script.py Part2.script=path/to/bash/script/doLast.sh With this, script.py will be run using python. Then other modules will run. Then doLast.sh will be run using the default system (probably bash, unless it has a shebang line specifiying something else).","title":"Details"},{"location":"GENERATED/biolockj.module.diy/GenMod/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.diy/GenMod/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: GenMod.imageOwner GenMod.imageName GenMod.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.diy/GenMod/#citation","text":"BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/","text":"Rmarkdown # Add to module run order: #BioModule biolockj.module.diy.Rmarkdown Description # Render a custom R markdown. Properties # Properties are the name=value pairs in the configuration file. Rmarkdown properties: # Property Description rmarkdown.resources list of file paths path to one or more files to be copied to the module resource folder. default: null rmarkdown.rmarkdown file path path to an R markdown file (.Rmd) to be rendered. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: rocker/r-rmd:latest This can be modified using the following properties: Rmarkdown.imageOwner Rmarkdown.imageName Rmarkdown.imageTag Citation # Module created by Ivory Blakley BioLockJ v1.3.16","title":"Rmarkdown"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#rmarkdown","text":"Add to module run order: #BioModule biolockj.module.diy.Rmarkdown","title":"Rmarkdown"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#description","text":"Render a custom R markdown.","title":"Description"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#rmarkdown-properties","text":"Property Description rmarkdown.resources list of file paths path to one or more files to be copied to the module resource folder. default: null rmarkdown.rmarkdown file path path to an R markdown file (.Rmd) to be rendered. default: null","title":"Rmarkdown properties:"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#docker","text":"If running in docker, this module will run in a docker container from this image: rocker/r-rmd:latest This can be modified using the following properties: Rmarkdown.imageOwner Rmarkdown.imageName Rmarkdown.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.diy/Rmarkdown/#citation","text":"Module created by Ivory Blakley BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/","text":"SraDownload # Add to module run order: #BioModule biolockj.module.getData.sra.SraDownload Description # SraDownload downloads and compresses short read archive (SRA) files to fastq.gz Properties # Properties are the name=value pairs in the configuration file. SraDownload properties: # Property Description exe.fasterq-dump executable Path for the \"fasterq-dump\" executable; if not supplied, any script that needs the fasterq-dump command will assume it is on the PATH. default: null sra.accessionIdColumn string Specifies the metadata file column name containing SRA run ids default: null sra.destinationDir file path Path to directory where downloaded files should be saved. If specified, it must exist. default: null sra.sraAccList file path A file that has one SRA accession per line and nothing else. default: null sra.sraProjectId list The project id(s) referencesing a project in the NCBI SRA. example: SRP009633, ERP016051 default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # Downloading and compressing files requires fasterq-dump and gzip.The accessions to download can be specified using any ONE of the following: 1. A metadata file (given by metadata.filePath that has column sra.accessionIdColumn . 2. sra.sraProjectId , OR 3. sra.sraAccList sra.destinationDir gives an external directory that can be shared across pipelines. This is recommended. If it is not specified, the files will be downlaoded to this modules output directory. Suggested: input.dirPaths = ${sra.destinationDir} Typically, BioLockJ will automatically determine modules to add to the pipeline to process sequence data. If the files are not present on the system when the pipeline starts, then it is up to the user to configure any and all sequence processing modules. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/sratoolkit:v1.3.16 This can be modified using the following properties: SraDownload.imageOwner SraDownload.imageName SraDownload.imageTag Citation # sra-tools Module developed by Philip Badzuh BioLockJ v1.3.16","title":"SraDownload"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#sradownload","text":"Add to module run order: #BioModule biolockj.module.getData.sra.SraDownload","title":"SraDownload"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#description","text":"SraDownload downloads and compresses short read archive (SRA) files to fastq.gz","title":"Description"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#sradownload-properties","text":"Property Description exe.fasterq-dump executable Path for the \"fasterq-dump\" executable; if not supplied, any script that needs the fasterq-dump command will assume it is on the PATH. default: null sra.accessionIdColumn string Specifies the metadata file column name containing SRA run ids default: null sra.destinationDir file path Path to directory where downloaded files should be saved. If specified, it must exist. default: null sra.sraAccList file path A file that has one SRA accession per line and nothing else. default: null sra.sraProjectId list The project id(s) referencesing a project in the NCBI SRA. example: SRP009633, ERP016051 default: null","title":"SraDownload properties:"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#details","text":"Downloading and compressing files requires fasterq-dump and gzip.The accessions to download can be specified using any ONE of the following: 1. A metadata file (given by metadata.filePath that has column sra.accessionIdColumn . 2. sra.sraProjectId , OR 3. sra.sraAccList sra.destinationDir gives an external directory that can be shared across pipelines. This is recommended. If it is not specified, the files will be downlaoded to this modules output directory. Suggested: input.dirPaths = ${sra.destinationDir} Typically, BioLockJ will automatically determine modules to add to the pipeline to process sequence data. If the files are not present on the system when the pipeline starts, then it is up to the user to configure any and all sequence processing modules.","title":"Details"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/sratoolkit:v1.3.16 This can be modified using the following properties: SraDownload.imageOwner SraDownload.imageName SraDownload.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.getData.sra/SraDownload/#citation","text":"sra-tools Module developed by Philip Badzuh BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/","text":"SraMetaDB # Add to module run order: #BioModule biolockj.module.getData.sra.SraMetaDB Description # Makes sure that the SRAmetadb exists, downloads if it does not already exist. Properties # Properties are the name=value pairs in the configuration file. SraMetaDB properties: # Property Description exe.gunzip executable Path for the \"gunzip\" executable; if not supplied, any script that needs the gunzip command will assume it is on the PATH. default: null exe.wget executable Path for the \"wget\" executable; if not supplied, any script that needs the wget command will assume it is on the PATH. default: null sra.forceDbUpdate boolean Y/N: download a newer verionsion if available. default: N sra.metaDataDir file path path to the directory where the SRAmetadb.sqlite database is stored. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # If sra.forceDbUpdate is set to Y, then the zipped form of the database is downloaded, and kept and used to compare the local version to the server version; and the server version is downloaded if it is newer. Server version location: https://starbuck1.s3.amazonaws.com/sradb/SRAmetadb.sqlite.gz sra.metaDataDir directory must exist. If the database does not exist at that location, it will be downloaded. The download process is somewhat error-prone, especially in docker. The download is about 4GB and the unzipped database is up to 30GB.It is generally recommended to download and unzip the database manually: wget https://starbuck1.s3.amazonaws.com/sradb/SRAmetadb.sqlite.gz; gunzip SRAmetadb.sqlite Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: SraMetaDB.imageOwner SraMetaDB.imageName SraMetaDB.imageTag Citation # Module developed by Malcolm Zapatas and Ivory Blakley BioLockJ v1.3.16","title":"SraMetaDB"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#srametadb","text":"Add to module run order: #BioModule biolockj.module.getData.sra.SraMetaDB","title":"SraMetaDB"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#description","text":"Makes sure that the SRAmetadb exists, downloads if it does not already exist.","title":"Description"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#srametadb-properties","text":"Property Description exe.gunzip executable Path for the \"gunzip\" executable; if not supplied, any script that needs the gunzip command will assume it is on the PATH. default: null exe.wget executable Path for the \"wget\" executable; if not supplied, any script that needs the wget command will assume it is on the PATH. default: null sra.forceDbUpdate boolean Y/N: download a newer verionsion if available. default: N sra.metaDataDir file path path to the directory where the SRAmetadb.sqlite database is stored. default: null","title":"SraMetaDB properties:"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#details","text":"If sra.forceDbUpdate is set to Y, then the zipped form of the database is downloaded, and kept and used to compare the local version to the server version; and the server version is downloaded if it is newer. Server version location: https://starbuck1.s3.amazonaws.com/sradb/SRAmetadb.sqlite.gz sra.metaDataDir directory must exist. If the database does not exist at that location, it will be downloaded. The download process is somewhat error-prone, especially in docker. The download is about 4GB and the unzipped database is up to 30GB.It is generally recommended to download and unzip the database manually: wget https://starbuck1.s3.amazonaws.com/sradb/SRAmetadb.sqlite.gz; gunzip SRAmetadb.sqlite","title":"Details"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: SraMetaDB.imageOwner SraMetaDB.imageName SraMetaDB.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaDB/#citation","text":"Module developed by Malcolm Zapatas and Ivory Blakley BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/","text":"SraMetaData # Add to module run order: #BioModule biolockj.module.getData.sra.SraMetaData Description # Extract metadata via pysradb from local copy of SRAmetadb.sqlite. Properties # Properties are the name=value pairs in the configuration file. SraMetaData properties: # Property Description exe.pysradb executable Path for the \"pysradb\" executable; if not supplied, any script that needs the pysradb command will assume it is on the PATH. default: null sra.metaDataDir file path path to the directory where the SRAmetadb.sqlite database is stored. default: null sra.sraProjectId list The project id(s) referencesing a project in the NCBI SRA. example: SRP009633, ERP016051 default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # The biolockj.module.getData.sra.SraMetaDB module is added a pre-requisite to ensure that the database is available. Adds modules # pre-requisite modules biolockj.module.getData.sra.SraMetaDB post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/pysradb:v1.3.16 This can be modified using the following properties: SraMetaData.imageOwner SraMetaData.imageName SraMetaData.imageTag Citation # Module developed by Malcolm Zapatas and Ivory Blakley BioLockJ v1.3.16","title":"SraMetaData"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#srametadata","text":"Add to module run order: #BioModule biolockj.module.getData.sra.SraMetaData","title":"SraMetaData"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#description","text":"Extract metadata via pysradb from local copy of SRAmetadb.sqlite.","title":"Description"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#srametadata-properties","text":"Property Description exe.pysradb executable Path for the \"pysradb\" executable; if not supplied, any script that needs the pysradb command will assume it is on the PATH. default: null sra.metaDataDir file path path to the directory where the SRAmetadb.sqlite database is stored. default: null sra.sraProjectId list The project id(s) referencesing a project in the NCBI SRA. example: SRP009633, ERP016051 default: null","title":"SraMetaData properties:"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#details","text":"The biolockj.module.getData.sra.SraMetaDB module is added a pre-requisite to ensure that the database is available.","title":"Details"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#adds-modules","text":"pre-requisite modules biolockj.module.getData.sra.SraMetaDB post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/pysradb:v1.3.16 This can be modified using the following properties: SraMetaData.imageOwner SraMetaData.imageName SraMetaData.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.getData.sra/SraMetaData/#citation","text":"Module developed by Malcolm Zapatas and Ivory Blakley BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/","text":"SrpSrrConverter # Add to module run order: #BioModule biolockj.module.getData.sra.SrpSrrConverter Description # Create an SraAccList.txt file from an SRA project identifier. Properties # Properties are the name=value pairs in the configuration file. SrpSrrConverter properties: # Property Description exe.efetch executable Path for the \"efetch\" executable; if not supplied, any script that needs the efetch command will assume it is on the PATH. default: null exe.esearch executable Path for the \"esearch\" executable; if not supplied, any script that needs the esearch command will assume it is on the PATH. default: null exe.xtract executable Path for the \"xtract\" executable; if not supplied, any script that needs the xtract command will assume it is on the PATH. default: null sra.sraProjectId list The project id(s) referencesing a project in the NCBI SRA. example: SRP009633, ERP016051 default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # Typcially, this module is only added to the pipeline when SraDownload needs it. This sets the value of sra.sraAccList to the SraAccList.txt file in this modules output directory Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: ncbi/edirect:latest This can be modified using the following properties: SrpSrrConverter.imageOwner SrpSrrConverter.imageName SrpSrrConverter.imageTag Citation # Module developed by Malcolm Zapatas and Ivory Blakley BioLockJ v1.3.16","title":"SrpSrrConverter"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#srpsrrconverter","text":"Add to module run order: #BioModule biolockj.module.getData.sra.SrpSrrConverter","title":"SrpSrrConverter"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#description","text":"Create an SraAccList.txt file from an SRA project identifier.","title":"Description"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#srpsrrconverter-properties","text":"Property Description exe.efetch executable Path for the \"efetch\" executable; if not supplied, any script that needs the efetch command will assume it is on the PATH. default: null exe.esearch executable Path for the \"esearch\" executable; if not supplied, any script that needs the esearch command will assume it is on the PATH. default: null exe.xtract executable Path for the \"xtract\" executable; if not supplied, any script that needs the xtract command will assume it is on the PATH. default: null sra.sraProjectId list The project id(s) referencesing a project in the NCBI SRA. example: SRP009633, ERP016051 default: null","title":"SrpSrrConverter properties:"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#details","text":"Typcially, this module is only added to the pipeline when SraDownload needs it. This sets the value of sra.sraAccList to the SraAccList.txt file in this modules output directory","title":"Details"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#docker","text":"If running in docker, this module will run in a docker container from this image: ncbi/edirect:latest This can be modified using the following properties: SrpSrrConverter.imageOwner SrpSrrConverter.imageName SrpSrrConverter.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.getData.sra/SrpSrrConverter/#citation","text":"Module developed by Malcolm Zapatas and Ivory Blakley BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/","text":"Demultiplexer # Add to module run order: #BioModule biolockj.module.implicit.Demultiplexer Description # Demultiplex samples into separate files for each sample. Properties # Properties are the name=value pairs in the configuration file. Demultiplexer properties: # Property Description demultiplexer.barcodeCutoff numeric Options: (0.0 - 1.0); if defined, pipeline will fail if the percentage of reads with a barcode is less than this cutoff. -> (DeuxUtil) default: 0.05 demultiplexer.barcodeRevComp boolean Options: Y/N. Use reverse compliment of metadata.barcodeColumn if demultimplexer.strategy = barcode_in_header or barcode_in_seq. -> (DeuxUtil) default: null demultiplexer.strategy string Options: barcode_in_header, barcode_in_seq, id_in_header, do_not_demux.If using barcodes, they must be provided in the metadata file within column defined by metadata.barcodeColumn . -> (DeuxUtil) default: null metadata.barcodeColumn string metadata column with identifying barcodes -> Values must be unique. default: BarcodeSequence metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> Used for matching sample id to barcodes. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null demultiplexer.barcodeCutoff numeric Options: (0.0 - 1.0); if defined, pipeline will fail if the percentage of reads with a barcode is less than this cutoff. -> (DeuxUtil) default: 0.05 demultiplexer.barcodeRevComp boolean Options: Y/N. Use reverse compliment of metadata.barcodeColumn if demultimplexer.strategy = barcode_in_header or barcode_in_seq. -> (DeuxUtil) default: null demultiplexer.strategy string Options: barcode_in_header, barcode_in_seq, id_in_header, do_not_demux.If using barcodes, they must be provided in the metadata file within column defined by metadata.barcodeColumn . -> (DeuxUtil) default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.barcodeColumn string metadata column with identifying barcodes -> Values must be unique. default: BarcodeSequence metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> Used for matching sample id to barcodes. default: null pipeline.defaultDemultiplexer string Java class name for default module used to demultiplex data default: biolockj.module.implicit.Demultiplexer script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # When BioLockJ detects that the input is multiplexed data, BioLockJ automatically adds a Demultiplexer as the 2nd module, using the class path supplied via the pipeline.defaultDemultiplexer property. ( ImportMetadata is added as the first module.) This Demultiplexer requires that the sequence headers contain either the Sample ID or an identifying barcode. Optionally, the barcode can be contained in the sequence itself. If your data does not conform to one of the following scenarios you will need to pre-process your sequence data to conform to a valid format. If samples are not identified by sample ID in the sequence headers: # Set demux.strategy =id_in_header Set input.trimPrefix to a character string that precedes the sample ID for all samples . Set input.trimSuffix to a character string that comes after the sample ID for all samples . Sample IDs = mbs1, mbs2, mbs3, mbs4 Scenario 1: Your multiplexed files include Sample IDs in the fastq sequence headers @mbs1_134_M01825:384:000000000-BCYPK:1:2106:23543:1336 1:N:0 @mbs2_12_M02825:384:000000000-BCYPK:1:1322:23543:1336 1:N:0 @mbs3_551_M03825:384:000000000-BCYPK:1:1123:23543:1336 1:N:0 @mbs4_1234_M04825:384:000000000-BCYPK:1:9872:23543:1336 1:N:0 Required Config + input.trimPrefix =@ + input.trimSuffix =_ All characters before (and including) the 1st \"@\" in the sequence header are trimmed All characters after (and including) the 1st \"_\" in the sequence header are trimmed If samples are identified by barcode (in the header or sequence): # Set demux.strategy =barcode_in_header or demux.strategy =barcode_in_seq Set metadata.filePath to metadata file path. Set metadata.barcodeColumn to the barcode column name. If the metadata barcodes are listed as reverse compliments, set demultiplexer.barcodeRevComp =Y. The metadata file must be prepared by adding a unique sequence barcode in the metadata.barcodeColumn column. This information is often available in a mapping file provided by the sequencing center that produced the raw data. Metadata file ID BarcodeColumn mbs1 GAGGCATGACTGGATA mbs2 NAGGCATATTTGCACA mbs3 GACCCATGACTGCATA mbs4 TACCCAGCACCGCTTA Scenario 2: Your multiplexed files include a barcode in the headers @M01825:384:000000000-BCYPK:1:2106:23543:1336 1:N:0:GAGGCATGACTGGATA @M01825:384:000000000-BCYPK:1:1322:23543:1336 1:N:0:NAGGCATATTTGCACA @M01825:384:000000000-BCYPK:1:1123:23543:1336 1:N:0:GACCCATGACTGCATA @M01825:384:000000000-BCYPK:1:9872:23543:1336 1:N:0:TACCCAGCACCGCTTA Required Config + demux.strategy =barcode_in_header + metadata.barcodeColumn =BarcodeColumn + metadata.filePath = Scenario 3: Your multiplexed files include a barcode in the sequences >M01825:384:000000000-BCYPK:1:2106:23543:1336 1:N:0: GAGGCATGACTGGATATATACATACTGAGGCATGACTACTTACTATAAGGCTTACTGACTGGTTACTGACTGGGAGGCATGACTACTTACTATAA >M01825:384:000000000-BCYPK:1:1322:23543:1336 1:N:0: CAGGCATATTTGCACACTAGAGGCAAGTTACTGACTGGATATACTGAGGCATGGGAGGCATGACTCTATAAGGCTTACTGACTGGTTACTGACTG >M01825:384:000000000-BCYPK:1:1123:23543:1336 1:N:0: CCATGAGACCTGCATA CCATGAGACCTGCATACACTGTGGGAGGCATGACTCACTATAAACTACTACTGACTGGATATACTGAGGCATACTGACTGGTTACTTATAAGGCT >M01825:384:000000000-BCYPK:1:9872:23543:1336 1:N:0:TACCCAGCACCGCTTA TACCCAGCACCGCTTCCTTGACTTGGGAGGCATGACTCACTATAAACTACTACTGACTGGATATACTGAGGCATACTGACTGGTTACTTATAAGG Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Demultiplexer.imageOwner Demultiplexer.imageName Demultiplexer.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"Demultiplexer"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#demultiplexer","text":"Add to module run order: #BioModule biolockj.module.implicit.Demultiplexer","title":"Demultiplexer"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#description","text":"Demultiplex samples into separate files for each sample.","title":"Description"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#demultiplexer-properties","text":"Property Description demultiplexer.barcodeCutoff numeric Options: (0.0 - 1.0); if defined, pipeline will fail if the percentage of reads with a barcode is less than this cutoff. -> (DeuxUtil) default: 0.05 demultiplexer.barcodeRevComp boolean Options: Y/N. Use reverse compliment of metadata.barcodeColumn if demultimplexer.strategy = barcode_in_header or barcode_in_seq. -> (DeuxUtil) default: null demultiplexer.strategy string Options: barcode_in_header, barcode_in_seq, id_in_header, do_not_demux.If using barcodes, they must be provided in the metadata file within column defined by metadata.barcodeColumn . -> (DeuxUtil) default: null metadata.barcodeColumn string metadata column with identifying barcodes -> Values must be unique. default: BarcodeSequence metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> Used for matching sample id to barcodes. default: null","title":"Demultiplexer properties:"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null demultiplexer.barcodeCutoff numeric Options: (0.0 - 1.0); if defined, pipeline will fail if the percentage of reads with a barcode is less than this cutoff. -> (DeuxUtil) default: 0.05 demultiplexer.barcodeRevComp boolean Options: Y/N. Use reverse compliment of metadata.barcodeColumn if demultimplexer.strategy = barcode_in_header or barcode_in_seq. -> (DeuxUtil) default: null demultiplexer.strategy string Options: barcode_in_header, barcode_in_seq, id_in_header, do_not_demux.If using barcodes, they must be provided in the metadata file within column defined by metadata.barcodeColumn . -> (DeuxUtil) default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.barcodeColumn string metadata column with identifying barcodes -> Values must be unique. default: BarcodeSequence metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. -> Used for matching sample id to barcodes. default: null pipeline.defaultDemultiplexer string Java class name for default module used to demultiplex data default: biolockj.module.implicit.Demultiplexer script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#details","text":"When BioLockJ detects that the input is multiplexed data, BioLockJ automatically adds a Demultiplexer as the 2nd module, using the class path supplied via the pipeline.defaultDemultiplexer property. ( ImportMetadata is added as the first module.) This Demultiplexer requires that the sequence headers contain either the Sample ID or an identifying barcode. Optionally, the barcode can be contained in the sequence itself. If your data does not conform to one of the following scenarios you will need to pre-process your sequence data to conform to a valid format.","title":"Details"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#if-samples-are-not-identified-by-sample-id-in-the-sequence-headers","text":"Set demux.strategy =id_in_header Set input.trimPrefix to a character string that precedes the sample ID for all samples . Set input.trimSuffix to a character string that comes after the sample ID for all samples . Sample IDs = mbs1, mbs2, mbs3, mbs4 Scenario 1: Your multiplexed files include Sample IDs in the fastq sequence headers @mbs1_134_M01825:384:000000000-BCYPK:1:2106:23543:1336 1:N:0 @mbs2_12_M02825:384:000000000-BCYPK:1:1322:23543:1336 1:N:0 @mbs3_551_M03825:384:000000000-BCYPK:1:1123:23543:1336 1:N:0 @mbs4_1234_M04825:384:000000000-BCYPK:1:9872:23543:1336 1:N:0 Required Config + input.trimPrefix =@ + input.trimSuffix =_ All characters before (and including) the 1st \"@\" in the sequence header are trimmed All characters after (and including) the 1st \"_\" in the sequence header are trimmed","title":"If samples are not identified by sample ID in the sequence headers:"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#if-samples-are-identified-by-barcode-in-the-header-or-sequence","text":"Set demux.strategy =barcode_in_header or demux.strategy =barcode_in_seq Set metadata.filePath to metadata file path. Set metadata.barcodeColumn to the barcode column name. If the metadata barcodes are listed as reverse compliments, set demultiplexer.barcodeRevComp =Y. The metadata file must be prepared by adding a unique sequence barcode in the metadata.barcodeColumn column. This information is often available in a mapping file provided by the sequencing center that produced the raw data. Metadata file ID BarcodeColumn mbs1 GAGGCATGACTGGATA mbs2 NAGGCATATTTGCACA mbs3 GACCCATGACTGCATA mbs4 TACCCAGCACCGCTTA Scenario 2: Your multiplexed files include a barcode in the headers @M01825:384:000000000-BCYPK:1:2106:23543:1336 1:N:0:GAGGCATGACTGGATA @M01825:384:000000000-BCYPK:1:1322:23543:1336 1:N:0:NAGGCATATTTGCACA @M01825:384:000000000-BCYPK:1:1123:23543:1336 1:N:0:GACCCATGACTGCATA @M01825:384:000000000-BCYPK:1:9872:23543:1336 1:N:0:TACCCAGCACCGCTTA Required Config + demux.strategy =barcode_in_header + metadata.barcodeColumn =BarcodeColumn + metadata.filePath = Scenario 3: Your multiplexed files include a barcode in the sequences >M01825:384:000000000-BCYPK:1:2106:23543:1336 1:N:0: GAGGCATGACTGGATATATACATACTGAGGCATGACTACTTACTATAAGGCTTACTGACTGGTTACTGACTGGGAGGCATGACTACTTACTATAA >M01825:384:000000000-BCYPK:1:1322:23543:1336 1:N:0: CAGGCATATTTGCACACTAGAGGCAAGTTACTGACTGGATATACTGAGGCATGGGAGGCATGACTCTATAAGGCTTACTGACTGGTTACTGACTG >M01825:384:000000000-BCYPK:1:1123:23543:1336 1:N:0: CCATGAGACCTGCATA CCATGAGACCTGCATACACTGTGGGAGGCATGACTCACTATAAACTACTACTGACTGGATATACTGAGGCATACTGACTGGTTACTTATAAGGCT >M01825:384:000000000-BCYPK:1:9872:23543:1336 1:N:0:TACCCAGCACCGCTTA TACCCAGCACCGCTTCCTTGACTTGGGAGGCATGACTCACTATAAACTACTACTGACTGGATATACTGAGGCATACTGACTGGTTACTTATAAGG","title":"If samples are identified by barcode (in the header or sequence):"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Demultiplexer.imageOwner Demultiplexer.imageName Demultiplexer.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit/Demultiplexer/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/","text":"ImportMetadata # Add to module run order: #BioModule biolockj.module.implicit.ImportMetadata Description # Read existing metadata file, or create a default one. Properties # Properties are the name=value pairs in the configuration file. ImportMetadata properties: # none General properties applicable to this module: # Property Description metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.useEveryRow boolean If Y, require a sequence file for every SampleID (every row) in metadata file; If N, metadata can include extraneous SampleIDs. default: null Details # This module is automatically added to the beginning of every pipeline. This module ensures that every pipeline has a metadata file, which is requried for modules that add columns to the metadata. If the configuration file does not specify a metadata file, this module will create an empty table with a row for each file in the input directory. This also ensures that any pre-existing metadata file has a suitable format. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: ImportMetadata.imageOwner ImportMetadata.imageName ImportMetadata.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"ImportMetadata"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#importmetadata","text":"Add to module run order: #BioModule biolockj.module.implicit.ImportMetadata","title":"ImportMetadata"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#description","text":"Read existing metadata file, or create a default one.","title":"Description"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#importmetadata-properties","text":"none","title":"ImportMetadata properties:"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#general-properties-applicable-to-this-module","text":"Property Description metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.useEveryRow boolean If Y, require a sequence file for every SampleID (every row) in metadata file; If N, metadata can include extraneous SampleIDs. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#details","text":"This module is automatically added to the beginning of every pipeline. This module ensures that every pipeline has a metadata file, which is requried for modules that add columns to the metadata. If the configuration file does not specify a metadata file, this module will create an empty table with a row for each file in the input directory. This also ensures that any pre-existing metadata file has a suitable format.","title":"Details"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: ImportMetadata.imageOwner ImportMetadata.imageName ImportMetadata.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit/ImportMetadata/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit/RegisterNumReads/","text":"RegisterNumReads # Add to module run order: #BioModule biolockj.module.implicit.RegisterNumReads This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RegisterNumReads.imageOwner RegisterNumReads.imageName RegisterNumReads.imageTag","title":"RegisterNumReads"},{"location":"GENERATED/biolockj.module.implicit/RegisterNumReads/#registernumreads","text":"Add to module run order: #BioModule biolockj.module.implicit.RegisterNumReads This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"RegisterNumReads"},{"location":"GENERATED/biolockj.module.implicit/RegisterNumReads/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit/RegisterNumReads/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RegisterNumReads.imageOwner RegisterNumReads.imageName RegisterNumReads.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/QiimeParser/","text":"QiimeParser # Add to module run order: #BioModule biolockj.module.implicit.parser.r16s.QiimeParser This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: QiimeParser.imageOwner QiimeParser.imageName QiimeParser.imageTag","title":"QiimeParser"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/QiimeParser/#qiimeparser","text":"Add to module run order: #BioModule biolockj.module.implicit.parser.r16s.QiimeParser This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"QiimeParser"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/QiimeParser/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/QiimeParser/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: QiimeParser.imageOwner QiimeParser.imageName QiimeParser.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/","text":"RdpHierParser # Add to module run order: #BioModule biolockj.module.implicit.parser.r16s.RdpHierParser Description # Create taxa tables from the _hierarchicalCount.tsv files output by RDP. Properties # Properties are the name=value pairs in the configuration file. RdpHierParser properties: # Property Description rdp.hierCounts boolean Set this property to \"Y\" to use this module instead as the follow-up to the RdpClassifier module. default: null rdp.minThresholdScore numeric RdpClassifier will use this property and ignore OTU assignments below this threshold score (0-100) default: 80 General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This module requires that rdp.hierCounts =Y for the RdpClassifier module to make the required output type. As long as rdp.hierCounts is set, this module will automatically be added to the module run order by the RdpClassifier module. If this module is in the module run order, it adds biolockj.module.classifier.r16s.RdpClassifier as a pre-quisite module. To use this module without the RDP module, include ModuleOutput[RdpClassifier] in the list of input types: pipeline.inputTypes=ModuleOutput[RdpClassifier] When using input from a directory, this module takes exactly one input directory. This module is an alternative to the default parser, RdpParser. The two parsers produce nearly identical output. The RdpParser module parses the output for each sequence and determines counts for each taxanomic unit. It fills in missing levels so all sequences are counted for all taxanomic levels; this means reads that are unclassified are reported as an OTU with \"unclassified\" in the name.By contrast, the RdpHierParser module relies on RDP to determine these totals.When using RdpParser the confidence threshold is applied by the parser, when using RdpHierParser the coinfidence threshold is applied by RDP during classification. Adds modules # pre-requisite modules biolockj.module.classifier.r16s.RdpClassifier post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RdpHierParser.imageOwner RdpHierParser.imageName RdpHierParser.imageTag Citation # Module created by Ivory Blakley","title":"RdpHierParser"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#rdphierparser","text":"Add to module run order: #BioModule biolockj.module.implicit.parser.r16s.RdpHierParser","title":"RdpHierParser"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#description","text":"Create taxa tables from the _hierarchicalCount.tsv files output by RDP.","title":"Description"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#rdphierparser-properties","text":"Property Description rdp.hierCounts boolean Set this property to \"Y\" to use this module instead as the follow-up to the RdpClassifier module. default: null rdp.minThresholdScore numeric RdpClassifier will use this property and ignore OTU assignments below this threshold score (0-100) default: 80","title":"RdpHierParser properties:"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#details","text":"This module requires that rdp.hierCounts =Y for the RdpClassifier module to make the required output type. As long as rdp.hierCounts is set, this module will automatically be added to the module run order by the RdpClassifier module. If this module is in the module run order, it adds biolockj.module.classifier.r16s.RdpClassifier as a pre-quisite module. To use this module without the RDP module, include ModuleOutput[RdpClassifier] in the list of input types: pipeline.inputTypes=ModuleOutput[RdpClassifier] When using input from a directory, this module takes exactly one input directory. This module is an alternative to the default parser, RdpParser. The two parsers produce nearly identical output. The RdpParser module parses the output for each sequence and determines counts for each taxanomic unit. It fills in missing levels so all sequences are counted for all taxanomic levels; this means reads that are unclassified are reported as an OTU with \"unclassified\" in the name.By contrast, the RdpHierParser module relies on RDP to determine these totals.When using RdpParser the confidence threshold is applied by the parser, when using RdpHierParser the coinfidence threshold is applied by RDP during classification.","title":"Details"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#adds-modules","text":"pre-requisite modules biolockj.module.classifier.r16s.RdpClassifier post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RdpHierParser.imageOwner RdpHierParser.imageName RdpHierParser.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpHierParser/#citation","text":"Module created by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/","text":"RdpParser # Add to module run order: #BioModule biolockj.module.implicit.parser.r16s.RdpParser Description # Build OTU tables from RDP reports. Properties # Properties are the name=value pairs in the configuration file. RdpParser properties: # Property Description rdp.minThresholdScore numeric RdpParser will ignore OTU assignments below this threshold score (0-100) default: 80 General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RdpParser.imageOwner RdpParser.imageName RdpParser.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"RdpParser"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#rdpparser","text":"Add to module run order: #BioModule biolockj.module.implicit.parser.r16s.RdpParser","title":"RdpParser"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#description","text":"Build OTU tables from RDP reports.","title":"Description"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#rdpparser-properties","text":"Property Description rdp.minThresholdScore numeric RdpParser will ignore OTU assignments below this threshold score (0-100) default: 80","title":"RdpParser properties:"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RdpParser.imageOwner RdpParser.imageName RdpParser.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.parser.r16s/RdpParser/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/","text":"Humann2Parser # Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.Humann2Parser Description # Build OTU tables from HumanN2 classifier module output. Properties # Properties are the name=value pairs in the configuration file. Humann2Parser properties: # Property Description humann2.keepUnintegrated boolean if true, keep UNINTEGRATED column in count tables default: null humann2.keepUnmapped boolean if true, keep UNMAPPED column in count tables default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Humann2Parser.imageOwner Humann2Parser.imageName Humann2Parser.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"Humann2Parser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#humann2parser","text":"Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.Humann2Parser","title":"Humann2Parser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#description","text":"Build OTU tables from HumanN2 classifier module output.","title":"Description"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#humann2parser-properties","text":"Property Description humann2.keepUnintegrated boolean if true, keep UNINTEGRATED column in count tables default: null humann2.keepUnmapped boolean if true, keep UNMAPPED column in count tables default: null","title":"Humann2Parser properties:"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Humann2Parser.imageOwner Humann2Parser.imageName Humann2Parser.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Humann2Parser/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/","text":"Kraken2Parser # Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.Kraken2Parser Description # Build OTU tables from KRAKEN mpa-format reports. Properties # Properties are the name=value pairs in the configuration file. Kraken2Parser properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Kraken2Parser.imageOwner Kraken2Parser.imageName Kraken2Parser.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"Kraken2Parser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#kraken2parser","text":"Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.Kraken2Parser","title":"Kraken2Parser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#description","text":"Build OTU tables from KRAKEN mpa-format reports.","title":"Description"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#kraken2parser-properties","text":"none","title":"Kraken2Parser properties:"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Kraken2Parser.imageOwner Kraken2Parser.imageName Kraken2Parser.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Kraken2Parser/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/","text":"KrakenParser # Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.KrakenParser Description # Build OTU tables from KRAKEN mpa-format reports. Properties # Properties are the name=value pairs in the configuration file. KrakenParser properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: KrakenParser.imageOwner KrakenParser.imageName KrakenParser.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"KrakenParser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#krakenparser","text":"Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.KrakenParser","title":"KrakenParser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#description","text":"Build OTU tables from KRAKEN mpa-format reports.","title":"Description"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#krakenparser-properties","text":"none","title":"KrakenParser properties:"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.numHits boolean Options: Y/N. If Y, and add Num_Hits to metadata default: Y report.unclassifiedTaxa boolean report unclassified taxa default: Y script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: KrakenParser.imageOwner KrakenParser.imageName KrakenParser.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/KrakenParser/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Metaphlan2Parser/","text":"Metaphlan2Parser # Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.Metaphlan2Parser This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Metaphlan2Parser.imageOwner Metaphlan2Parser.imageName Metaphlan2Parser.imageTag","title":"Metaphlan2Parser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Metaphlan2Parser/#metaphlan2parser","text":"Add to module run order: #BioModule biolockj.module.implicit.parser.wgs.Metaphlan2Parser This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"Metaphlan2Parser"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Metaphlan2Parser/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.parser.wgs/Metaphlan2Parser/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Metaphlan2Parser.imageOwner Metaphlan2Parser.imageName Metaphlan2Parser.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.qiime/BuildQiimeMapping/","text":"BuildQiimeMapping # Add to module run order: #BioModule biolockj.module.implicit.qiime.BuildQiimeMapping This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.3.16 This can be modified using the following properties: BuildQiimeMapping.imageOwner BuildQiimeMapping.imageName BuildQiimeMapping.imageTag","title":"BuildQiimeMapping"},{"location":"GENERATED/biolockj.module.implicit.qiime/BuildQiimeMapping/#buildqiimemapping","text":"Add to module run order: #BioModule biolockj.module.implicit.qiime.BuildQiimeMapping This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"BuildQiimeMapping"},{"location":"GENERATED/biolockj.module.implicit.qiime/BuildQiimeMapping/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.qiime/BuildQiimeMapping/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.3.16 This can be modified using the following properties: BuildQiimeMapping.imageOwner BuildQiimeMapping.imageName BuildQiimeMapping.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.qiime/MergeQiimeOtuTables/","text":"MergeQiimeOtuTables # Add to module run order: #BioModule biolockj.module.implicit.qiime.MergeQiimeOtuTables This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.3.16 This can be modified using the following properties: MergeQiimeOtuTables.imageOwner MergeQiimeOtuTables.imageName MergeQiimeOtuTables.imageTag","title":"MergeQiimeOtuTables"},{"location":"GENERATED/biolockj.module.implicit.qiime/MergeQiimeOtuTables/#mergeqiimeotutables","text":"Add to module run order: #BioModule biolockj.module.implicit.qiime.MergeQiimeOtuTables This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"MergeQiimeOtuTables"},{"location":"GENERATED/biolockj.module.implicit.qiime/MergeQiimeOtuTables/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.qiime/MergeQiimeOtuTables/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.3.16 This can be modified using the following properties: MergeQiimeOtuTables.imageOwner MergeQiimeOtuTables.imageName MergeQiimeOtuTables.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.implicit.qiime/QiimeClassifier/","text":"QiimeClassifier # Add to module run order: #BioModule biolockj.module.implicit.qiime.QiimeClassifier This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.parser.r16s.QiimeParser Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeClassifier.imageOwner QiimeClassifier.imageName QiimeClassifier.imageTag","title":"QiimeClassifier"},{"location":"GENERATED/biolockj.module.implicit.qiime/QiimeClassifier/#qiimeclassifier","text":"Add to module run order: #BioModule biolockj.module.implicit.qiime.QiimeClassifier This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"QiimeClassifier"},{"location":"GENERATED/biolockj.module.implicit.qiime/QiimeClassifier/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.implicit.parser.r16s.QiimeParser","title":"Adds modules"},{"location":"GENERATED/biolockj.module.implicit.qiime/QiimeClassifier/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/qimme_classifier:v1.1 This can be modified using the following properties: QiimeClassifier.imageOwner QiimeClassifier.imageName QiimeClassifier.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report/Email/","text":"Email # Add to module run order: #BioModule biolockj.module.report.Email Description # Send an email containing the pipeline summary when the pipeline either completes or fails. Properties # Properties are the name=value pairs in the configuration file. Email properties: # Property Description mail.encryptedPassword string The Base 64 encrypted password is stored in the Config file using this property. default: 7GYvu1m+Yv1Gk7Cd9BLaznJ/jq33g0q1 mail.from string Admin email address used to send user pipeline notifications default: biolockj@gmail.com mail.smtp.auth string default: Y mail.smtp.host string javax.mail.Session SMTP host default: smtp.gmail.com mail.smtp.port integer default: 587 mail.smtp.starttls.enable boolean default: Y mail.to string default: null General properties applicable to this module: # none Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Email.imageOwner Email.imageName Email.imageTag Citation # Module developed by Mike Sioda. BioLockJ v1.3.16","title":"Email"},{"location":"GENERATED/biolockj.module.report/Email/#email","text":"Add to module run order: #BioModule biolockj.module.report.Email","title":"Email"},{"location":"GENERATED/biolockj.module.report/Email/#description","text":"Send an email containing the pipeline summary when the pipeline either completes or fails.","title":"Description"},{"location":"GENERATED/biolockj.module.report/Email/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report/Email/#email-properties","text":"Property Description mail.encryptedPassword string The Base 64 encrypted password is stored in the Config file using this property. default: 7GYvu1m+Yv1Gk7Cd9BLaznJ/jq33g0q1 mail.from string Admin email address used to send user pipeline notifications default: biolockj@gmail.com mail.smtp.auth string default: Y mail.smtp.host string javax.mail.Session SMTP host default: smtp.gmail.com mail.smtp.port integer default: 587 mail.smtp.starttls.enable boolean default: Y mail.to string default: null","title":"Email properties:"},{"location":"GENERATED/biolockj.module.report/Email/#general-properties-applicable-to-this-module","text":"none","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report/Email/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report/Email/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report/Email/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Email.imageOwner Email.imageName Email.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report/Email/#citation","text":"Module developed by Mike Sioda. BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report/JsonReport/","text":"JsonReport # Add to module run order: #BioModule biolockj.module.report.JsonReport This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules biolockj.module.report.otu.CompileOtuCounts post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: JsonReport.imageOwner JsonReport.imageName JsonReport.imageTag","title":"JsonReport"},{"location":"GENERATED/biolockj.module.report/JsonReport/#jsonreport","text":"Add to module run order: #BioModule biolockj.module.report.JsonReport This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"JsonReport"},{"location":"GENERATED/biolockj.module.report/JsonReport/#adds-modules","text":"pre-requisite modules biolockj.module.report.otu.CompileOtuCounts post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report/JsonReport/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: JsonReport.imageOwner JsonReport.imageName JsonReport.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.humann2/AddMetadataToPathwayTables/","text":"AddMetadataToPathwayTables # Add to module run order: #BioModule biolockj.module.report.humann2.AddMetadataToPathwayTables This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: AddMetadataToPathwayTables.imageOwner AddMetadataToPathwayTables.imageName AddMetadataToPathwayTables.imageTag","title":"AddMetadataToPathwayTables"},{"location":"GENERATED/biolockj.module.report.humann2/AddMetadataToPathwayTables/#addmetadatatopathwaytables","text":"Add to module run order: #BioModule biolockj.module.report.humann2.AddMetadataToPathwayTables This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"AddMetadataToPathwayTables"},{"location":"GENERATED/biolockj.module.report.humann2/AddMetadataToPathwayTables/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.humann2/AddMetadataToPathwayTables/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: AddMetadataToPathwayTables.imageOwner AddMetadataToPathwayTables.imageName AddMetadataToPathwayTables.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveLowPathwayCounts/","text":"RemoveLowPathwayCounts # Add to module run order: #BioModule biolockj.module.report.humann2.RemoveLowPathwayCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveLowPathwayCounts.imageOwner RemoveLowPathwayCounts.imageName RemoveLowPathwayCounts.imageTag","title":"RemoveLowPathwayCounts"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveLowPathwayCounts/#removelowpathwaycounts","text":"Add to module run order: #BioModule biolockj.module.report.humann2.RemoveLowPathwayCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"RemoveLowPathwayCounts"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveLowPathwayCounts/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveLowPathwayCounts/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveLowPathwayCounts.imageOwner RemoveLowPathwayCounts.imageName RemoveLowPathwayCounts.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveScarcePathwayCounts/","text":"RemoveScarcePathwayCounts # Add to module run order: #BioModule biolockj.module.report.humann2.RemoveScarcePathwayCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveScarcePathwayCounts.imageOwner RemoveScarcePathwayCounts.imageName RemoveScarcePathwayCounts.imageTag","title":"RemoveScarcePathwayCounts"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveScarcePathwayCounts/#removescarcepathwaycounts","text":"Add to module run order: #BioModule biolockj.module.report.humann2.RemoveScarcePathwayCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"RemoveScarcePathwayCounts"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveScarcePathwayCounts/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.humann2/RemoveScarcePathwayCounts/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveScarcePathwayCounts.imageOwner RemoveScarcePathwayCounts.imageName RemoveScarcePathwayCounts.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.otu/CompileOtuCounts/","text":"CompileOtuCounts # Add to module run order: #BioModule biolockj.module.report.otu.CompileOtuCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: CompileOtuCounts.imageOwner CompileOtuCounts.imageName CompileOtuCounts.imageTag","title":"CompileOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/CompileOtuCounts/#compileotucounts","text":"Add to module run order: #BioModule biolockj.module.report.otu.CompileOtuCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"CompileOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/CompileOtuCounts/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.otu/CompileOtuCounts/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: CompileOtuCounts.imageOwner CompileOtuCounts.imageName CompileOtuCounts.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/","text":"RarefyOtuCounts # Add to module run order: #BioModule biolockj.module.report.otu.RarefyOtuCounts Description # Applies a mean iterative post-OTU classification rarefication algorithm so that each output sample will have approximately the same number of OTUs. Properties # Properties are the name=value pairs in the configuration file. RarefyOtuCounts properties: # Property Description rarefyOtuCounts.iterations integer (positive integer) the number of iterations to randomly select the rarefyOtuCounts.quantile of OTUs default: 10 rarefyOtuCounts.lowAbundantCutoff numeric (positive double) minimum percentage of samples that must contain an OTU. default: 0.01 rarefyOtuCounts.quantile numeric Quantile for rarefication. The number of OTUs/sample are ordered, all samples with more OTUs than the quantile sample are subselected without replacement until they have the same number of OTUs as the quantile sample default: 0.5 rarefyOtuCounts.rmLowSamples boolean Options: Y/N. If Y, all samples below the rarefyOtuCounts.quantile quantile sample are removed default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RarefyOtuCounts.imageOwner RarefyOtuCounts.imageName RarefyOtuCounts.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"RarefyOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#rarefyotucounts","text":"Add to module run order: #BioModule biolockj.module.report.otu.RarefyOtuCounts","title":"RarefyOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#description","text":"Applies a mean iterative post-OTU classification rarefication algorithm so that each output sample will have approximately the same number of OTUs.","title":"Description"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#rarefyotucounts-properties","text":"Property Description rarefyOtuCounts.iterations integer (positive integer) the number of iterations to randomly select the rarefyOtuCounts.quantile of OTUs default: 10 rarefyOtuCounts.lowAbundantCutoff numeric (positive double) minimum percentage of samples that must contain an OTU. default: 0.01 rarefyOtuCounts.quantile numeric Quantile for rarefication. The number of OTUs/sample are ordered, all samples with more OTUs than the quantile sample are subselected without replacement until they have the same number of OTUs as the quantile sample default: 0.5 rarefyOtuCounts.rmLowSamples boolean Options: Y/N. If Y, all samples below the rarefyOtuCounts.quantile quantile sample are removed default: null","title":"RarefyOtuCounts properties:"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RarefyOtuCounts.imageOwner RarefyOtuCounts.imageName RarefyOtuCounts.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.otu/RarefyOtuCounts/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/","text":"RemoveLowOtuCounts # Add to module run order: #BioModule biolockj.module.report.otu.RemoveLowOtuCounts Description # Removes OTUs with counts below report.minCount. Properties # Properties are the name=value pairs in the configuration file. RemoveLowOtuCounts properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveLowOtuCounts.imageOwner RemoveLowOtuCounts.imageName RemoveLowOtuCounts.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"RemoveLowOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#removelowotucounts","text":"Add to module run order: #BioModule biolockj.module.report.otu.RemoveLowOtuCounts","title":"RemoveLowOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#description","text":"Removes OTUs with counts below report.minCount.","title":"Description"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#removelowotucounts-properties","text":"none","title":"RemoveLowOtuCounts properties:"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveLowOtuCounts.imageOwner RemoveLowOtuCounts.imageName RemoveLowOtuCounts.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.otu/RemoveLowOtuCounts/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.otu/RemoveScarceOtuCounts/","text":"RemoveScarceOtuCounts # Add to module run order: #BioModule biolockj.module.report.otu.RemoveScarceOtuCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveScarceOtuCounts.imageOwner RemoveScarceOtuCounts.imageName RemoveScarceOtuCounts.imageTag","title":"RemoveScarceOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/RemoveScarceOtuCounts/#removescarceotucounts","text":"Add to module run order: #BioModule biolockj.module.report.otu.RemoveScarceOtuCounts This page is a place holder. This module does not have a properly generated user guide page because it does not implement the ApiModule interface. There may be a manually created page elsewhere.","title":"RemoveScarceOtuCounts"},{"location":"GENERATED/biolockj.module.report.otu/RemoveScarceOtuCounts/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.otu/RemoveScarceOtuCounts/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RemoveScarceOtuCounts.imageOwner RemoveScarceOtuCounts.imageName RemoveScarceOtuCounts.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/","text":"R_CalculateStats # Add to module run order: #BioModule biolockj.module.report.r.R_CalculateStats Description # Generate a summary statistics table with [adjusted and unadjusted] [parameteric and non-parametirc] p-values and r 2 values for each reportable metadata field and each report.taxonomyLevel configured. Properties # Properties are the name=value pairs in the configuration file. R_CalculateStats properties: # Property Description r_CalculateStats.customScript file path Path to a custom R script to use in place of the built-in module script. default: null r_CalculateStats.pAdjustMethod string the p.adjust \"method\" parameter default: BH r_CalculateStats.pAdjustScope string defines R p.adjust( n ) parameter is calculated. Options: GLOBAL, LOCAL, TAXA, ATTRIBUTE default: LOCAL General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_CalculateStats.imageOwner R_CalculateStats.imageName R_CalculateStats.imageTag Citation # BioLockJ v1.3.16 Module created by Mike Sioda and developed by Ivory Blakley","title":"R_CalculateStats"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#r_calculatestats","text":"Add to module run order: #BioModule biolockj.module.report.r.R_CalculateStats","title":"R_CalculateStats"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#description","text":"Generate a summary statistics table with [adjusted and unadjusted] [parameteric and non-parametirc] p-values and r 2 values for each reportable metadata field and each report.taxonomyLevel configured.","title":"Description"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#r_calculatestats-properties","text":"Property Description r_CalculateStats.customScript file path Path to a custom R script to use in place of the built-in module script. default: null r_CalculateStats.pAdjustMethod string the p.adjust \"method\" parameter default: BH r_CalculateStats.pAdjustScope string defines R p.adjust( n ) parameter is calculated. Options: GLOBAL, LOCAL, TAXA, ATTRIBUTE default: LOCAL","title":"R_CalculateStats properties:"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_CalculateStats.imageOwner R_CalculateStats.imageName R_CalculateStats.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.r/R_CalculateStats/#citation","text":"BioLockJ v1.3.16 Module created by Mike Sioda and developed by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/","text":"R_PlotEffectSize # Add to module run order: #BioModule biolockj.module.report.r.R_PlotEffectSize Description # Generate horizontal barplot representing effect size (Cohen's d, r 2 , and/or fold change) for each reportable metadata field and each report.taxonomyLevel configured. Properties # Properties are the name=value pairs in the configuration file. R_PlotEffectSize properties: # Property Description r_PlotEffectSize.customScript file path Path to a custom R script to use in place of the built-in module script. default: null r_PlotEffectSize.disableCohensD boolean Options: Y/N. If N (default), produce plots for binary attributes showing effect size calculated as Cohen's d. If Y, skip this plot type. default: null r_PlotEffectSize.disableFoldChange boolean Options: Y/N. If N (default), produce plots for binary attributes showing the fold change. If Y, skip this plot type. default: Y r_PlotEffectSize.disablePvalAdj boolean Options: Y/N. If Y, the non-adjusted p-value is used when determining which taxa to include in the plot and which should get a ( ). If N (default), the adjusted p-value is used. default: null* r_PlotEffectSize.disableRSquared boolean Options: Y/N. If N (default), produce plots showing effect size calculated as the r-squared value. If Y, skip this plot type. default: null r_PlotEffectSize.excludePvalAbove numeric Options: [0,1], Taxa with a p-value above this value are excluded from the plot. default: 1 r_PlotEffectSize.maxNumTaxa integer Each plot is given one page. This is the maximum number of bars to include in each one-page plot. default: 40 r_PlotEffectSize.parametricPval boolean Options: Y/N. If Y, the parametric p-value is used when determining which taxa to include in the plot and which should get a ( ). If N (default), the non-parametric p-value is used. default: null* r_PlotEffectSize.taxa list Override other criteria for selecting which taxa to include in the plot by specifying wich taxa should be included default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules biolockj.module.report.r.R_CalculateStats post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotEffectSize.imageOwner R_PlotEffectSize.imageName R_PlotEffectSize.imageTag Citation # BioLockJ v1.3.16 Module developed by Ivory Blakley.","title":"R_PlotEffectSize"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#r_ploteffectsize","text":"Add to module run order: #BioModule biolockj.module.report.r.R_PlotEffectSize","title":"R_PlotEffectSize"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#description","text":"Generate horizontal barplot representing effect size (Cohen's d, r 2 , and/or fold change) for each reportable metadata field and each report.taxonomyLevel configured.","title":"Description"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#r_ploteffectsize-properties","text":"Property Description r_PlotEffectSize.customScript file path Path to a custom R script to use in place of the built-in module script. default: null r_PlotEffectSize.disableCohensD boolean Options: Y/N. If N (default), produce plots for binary attributes showing effect size calculated as Cohen's d. If Y, skip this plot type. default: null r_PlotEffectSize.disableFoldChange boolean Options: Y/N. If N (default), produce plots for binary attributes showing the fold change. If Y, skip this plot type. default: Y r_PlotEffectSize.disablePvalAdj boolean Options: Y/N. If Y, the non-adjusted p-value is used when determining which taxa to include in the plot and which should get a ( ). If N (default), the adjusted p-value is used. default: null* r_PlotEffectSize.disableRSquared boolean Options: Y/N. If N (default), produce plots showing effect size calculated as the r-squared value. If Y, skip this plot type. default: null r_PlotEffectSize.excludePvalAbove numeric Options: [0,1], Taxa with a p-value above this value are excluded from the plot. default: 1 r_PlotEffectSize.maxNumTaxa integer Each plot is given one page. This is the maximum number of bars to include in each one-page plot. default: 40 r_PlotEffectSize.parametricPval boolean Options: Y/N. If Y, the parametric p-value is used when determining which taxa to include in the plot and which should get a ( ). If N (default), the non-parametric p-value is used. default: null* r_PlotEffectSize.taxa list Override other criteria for selecting which taxa to include in the plot by specifying wich taxa should be included default: null","title":"R_PlotEffectSize properties:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#adds-modules","text":"pre-requisite modules biolockj.module.report.r.R_CalculateStats post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotEffectSize.imageOwner R_PlotEffectSize.imageName R_PlotEffectSize.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.r/R_PlotEffectSize/#citation","text":"BioLockJ v1.3.16 Module developed by Ivory Blakley.","title":"Citation"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/","text":"R_PlotMds # Add to module run order: #BioModule biolockj.module.report.r.R_PlotMds Description # Generate sets of multidimensional scaling plots showing 2 axes at a time (up to the < r_PlotMds.numAxis >th axis) with color coding based on each categorical metadata field (default) or by each field given in r_PlotMds.reportFields Properties # Properties are the name=value pairs in the configuration file. R_PlotMds properties: # Property Description r_PlotMds.customScript file path Path to a custom R script to use in place of the built-in module script. default: null r_PlotMds.distance string distance metric for calculating MDS (default: bray) default: bray r_PlotMds.numAxis integer Sets # MDS axis to plot; default (3) produces mds1 vs mds2, mds1 vs mds3, and mds2 vs mds3 default: 3 r_PlotMds.reportFields list Override field used to explicitly list metadata columns to build MDS plots. If left undefined, all columns are reported default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.pch integer Sets R plot pch parameter for PDF report default: 21 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used. default: 0 r_PlotMds.reportFields list Override field used to explicitly list metadata columns to build MDS plots. If left undefined, all columns are reported default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotMds.imageOwner R_PlotMds.imageName R_PlotMds.imageTag Citation # BioLockJ v1.3.16 Module created by Mike Sioda and developed by Ivory Blakley","title":"R_PlotMds"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#r_plotmds","text":"Add to module run order: #BioModule biolockj.module.report.r.R_PlotMds","title":"R_PlotMds"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#description","text":"Generate sets of multidimensional scaling plots showing 2 axes at a time (up to the < r_PlotMds.numAxis >th axis) with color coding based on each categorical metadata field (default) or by each field given in r_PlotMds.reportFields","title":"Description"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#r_plotmds-properties","text":"Property Description r_PlotMds.customScript file path Path to a custom R script to use in place of the built-in module script. default: null r_PlotMds.distance string distance metric for calculating MDS (default: bray) default: bray r_PlotMds.numAxis integer Sets # MDS axis to plot; default (3) produces mds1 vs mds2, mds1 vs mds3, and mds2 vs mds3 default: 3 r_PlotMds.reportFields list Override field used to explicitly list metadata columns to build MDS plots. If left undefined, all columns are reported default: null","title":"R_PlotMds properties:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.pch integer Sets R plot pch parameter for PDF report default: 21 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used. default: 0 r_PlotMds.reportFields list Override field used to explicitly list metadata columns to build MDS plots. If left undefined, all columns are reported default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotMds.imageOwner R_PlotMds.imageName R_PlotMds.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.r/R_PlotMds/#citation","text":"BioLockJ v1.3.16 Module created by Mike Sioda and developed by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/","text":"R_PlotOtus # Add to module run order: #BioModule biolockj.module.report.r.R_PlotOtus Description # Generate OTU-metadata box-plots and scatter-plots for each reportable metadata field and each report.taxonomyLevel configured Properties # Properties are the name=value pairs in the configuration file. R_PlotOtus properties: # Property Description r.pValFormat string Sets the format used in R sprintf() function default: %1.2g r_PlotOtus.customScript file path Path to a custom R script to use in place of the built-in module script. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.colorPoint string default color of scatterplot and strip-chart plot points default: black r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.pch integer Sets R plot pch parameter for PDF report default: 21 r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotOtus.imageOwner R_PlotOtus.imageName R_PlotOtus.imageTag Citation # BioLockJ v1.3.16 Module created by Mike Sioda and developed by Ivory Blakley","title":"R_PlotOtus"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#r_plototus","text":"Add to module run order: #BioModule biolockj.module.report.r.R_PlotOtus","title":"R_PlotOtus"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#description","text":"Generate OTU-metadata box-plots and scatter-plots for each reportable metadata field and each report.taxonomyLevel configured","title":"Description"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#r_plototus-properties","text":"Property Description r.pValFormat string Sets the format used in R sprintf() function default: %1.2g r_PlotOtus.customScript file path Path to a custom R script to use in place of the built-in module script. default: null","title":"R_PlotOtus properties:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorBase string base color used for labels & headings in the PDF report; Must be a valid color in R. default: black r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.colorHighlight string color is used to highlight significant OTUs in plot default: red r.colorPalette string palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions default: null r.colorPoint string default color of scatterplot and strip-chart plot points default: black r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.pch integer Sets R plot pch parameter for PDF report default: 21 r.rareOtuThreshold numeric If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples default: 1 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotOtus.imageOwner R_PlotOtus.imageName R_PlotOtus.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.r/R_PlotOtus/#citation","text":"BioLockJ v1.3.16 Module created by Mike Sioda and developed by Ivory Blakley","title":"Citation"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/","text":"R_PlotPvalHistograms # Add to module run order: #BioModule biolockj.module.report.r.R_PlotPvalHistograms Description # Generate p-value histograms for each reportable metadata field and each report.taxonomyLevel configured Properties # Properties are the name=value pairs in the configuration file. R_PlotPvalHistograms properties: # Property Description plotPvalHistograms.customScript file path Path to a custom R script to use in place of the built-in module script. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.pvalCutoff numeric p-value cutoff used to assign label r.colorHighlight default: 0.05 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotPvalHistograms.imageOwner R_PlotPvalHistograms.imageName R_PlotPvalHistograms.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"R_PlotPvalHistograms"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#r_plotpvalhistograms","text":"Add to module run order: #BioModule biolockj.module.report.r.R_PlotPvalHistograms","title":"R_PlotPvalHistograms"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#description","text":"Generate p-value histograms for each reportable metadata field and each report.taxonomyLevel configured","title":"Description"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#r_plotpvalhistograms-properties","text":"Property Description plotPvalHistograms.customScript file path Path to a custom R script to use in place of the built-in module script. default: null","title":"R_PlotPvalHistograms properties:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null pipeline.defaultStatsModule string Java class name for default module used generate p-value and other stats default: biolockj.module.report.r.R_CalculateStats r.colorFile file path path to a tab-delimited file giving the color to use for each value of each metadata field plotted. default: null r.debug boolean Options: Y/N. If Y, will generate R Script log files default: Y r.pvalCutoff numeric p-value cutoff used to assign label r.colorHighlight default: 0.05 r.saveRData boolean If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension \".RData\" default: null r.timeout integer defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used. default: 0 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_module:v1.3.16 This can be modified using the following properties: R_PlotPvalHistograms.imageOwner R_PlotPvalHistograms.imageName R_PlotPvalHistograms.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.r/R_PlotPvalHistograms/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/","text":"AddMetadataToTaxaTables # Add to module run order: #BioModule biolockj.module.report.taxa.AddMetadataToTaxaTables Description # Map metadata onto taxa tables using sample ID. Properties # Properties are the name=value pairs in the configuration file. AddMetadataToTaxaTables properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.commentChar string metadata file comment indicator; Empty string is a valid option indicating no comments in metadata file. default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.nullValue string metadata cells with this value will be treated as empty default: NA report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels default: phylum,class,order,family,genus script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # The output of this module will have a row for each sample (just like the metadata and the taxa tables) and columns for data AND metadata. If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: AddMetadataToTaxaTables.imageOwner AddMetadataToTaxaTables.imageName AddMetadataToTaxaTables.imageTag Citation # Module developed by Mike Sioda and Anthony Fodor BioLockJ v1.3.16","title":"AddMetadataToTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#addmetadatatotaxatables","text":"Add to module run order: #BioModule biolockj.module.report.taxa.AddMetadataToTaxaTables","title":"AddMetadataToTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#description","text":"Map metadata onto taxa tables using sample ID.","title":"Description"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#addmetadatatotaxatables-properties","text":"none","title":"AddMetadataToTaxaTables properties:"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.columnDelim string defines how metadata columns are separated; Typically files are tab or comma separated. default: \\t metadata.commentChar string metadata file comment indicator; Empty string is a valid option indicating no comments in metadata file. default: null metadata.filePath string If absolute file path, use file as metadata. If directory path, must find exactly 1 file within, to use as metadata. default: null metadata.nullValue string metadata cells with this value will be treated as empty default: NA report.taxonomyLevels list Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels default: phylum,class,order,family,genus script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#details","text":"The output of this module will have a row for each sample (just like the metadata and the taxa tables) and columns for data AND metadata. If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite.","title":"Details"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: AddMetadataToTaxaTables.imageOwner AddMetadataToTaxaTables.imageName AddMetadataToTaxaTables.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.taxa/AddMetadataToTaxaTables/#citation","text":"Module developed by Mike Sioda and Anthony Fodor BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/","text":"AddPseudoCount # Add to module run order: #BioModule biolockj.module.report.taxa.AddPseudoCount Description # Add a pseudocount (+1) to each value in each taxa table. Properties # Properties are the name=value pairs in the configuration file. AddPseudoCount properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: AddPseudoCount.imageOwner AddPseudoCount.imageName AddPseudoCount.imageTag Citation # BioLockJ v1.3.16","title":"AddPseudoCount"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#addpseudocount","text":"Add to module run order: #BioModule biolockj.module.report.taxa.AddPseudoCount","title":"AddPseudoCount"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#description","text":"Add a pseudocount (+1) to each value in each taxa table.","title":"Description"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#addpseudocount-properties","text":"none","title":"AddPseudoCount properties:"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#details","text":"If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite.","title":"Details"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: AddPseudoCount.imageOwner AddPseudoCount.imageName AddPseudoCount.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.taxa/AddPseudoCount/#citation","text":"BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/","text":"BuildTaxaTables # Add to module run order: #BioModule biolockj.module.report.taxa.BuildTaxaTables Description # Convert OTU-tables split by sample into taxa tables split by level. Properties # Properties are the name=value pairs in the configuration file. BuildTaxaTables properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # Each classifier module has a parser module that converts the classifier-specific output format into a common OTU table format. This module merges those tables from all samples, and splits the tables by taxonomic level. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: BuildTaxaTables.imageOwner BuildTaxaTables.imageName BuildTaxaTables.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"BuildTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#buildtaxatables","text":"Add to module run order: #BioModule biolockj.module.report.taxa.BuildTaxaTables","title":"BuildTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#description","text":"Convert OTU-tables split by sample into taxa tables split by level.","title":"Description"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#buildtaxatables-properties","text":"none","title":"BuildTaxaTables properties:"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#details","text":"Each classifier module has a parser module that converts the classifier-specific output format into a common OTU table format. This module merges those tables from all samples, and splits the tables by taxonomic level.","title":"Details"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: BuildTaxaTables.imageOwner BuildTaxaTables.imageName BuildTaxaTables.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.taxa/BuildTaxaTables/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/","text":"LogTransformTaxaTables # Add to module run order: #BioModule biolockj.module.report.taxa.LogTransformTaxaTables Description # Log-transform the raw taxa counts on Log10 or Log-e scales. Properties # Properties are the name=value pairs in the configuration file. LogTransformTaxaTables properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.logBase string Options: 10,e,null. If e, use natural log (base e); if 10, use log base 10; if not set, counts will not be converted to a log scale. default: 10 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: LogTransformTaxaTables.imageOwner LogTransformTaxaTables.imageName LogTransformTaxaTables.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"LogTransformTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#logtransformtaxatables","text":"Add to module run order: #BioModule biolockj.module.report.taxa.LogTransformTaxaTables","title":"LogTransformTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#description","text":"Log-transform the raw taxa counts on Log10 or Log-e scales.","title":"Description"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#logtransformtaxatables-properties","text":"none","title":"LogTransformTaxaTables properties:"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.logBase string Options: 10,e,null. If e, use natural log (base e); if 10, use log base 10; if not set, counts will not be converted to a log scale. default: 10 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#details","text":"If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite.","title":"Details"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: LogTransformTaxaTables.imageOwner LogTransformTaxaTables.imageName LogTransformTaxaTables.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.taxa/LogTransformTaxaTables/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/","text":"NormalizeByReadsPerMillion # Add to module run order: #BioModule biolockj.module.report.taxa.NormalizeByReadsPerMillion Description # new counts = counts / (total counts in sample / 1 million) Properties # Properties are the name=value pairs in the configuration file. NormalizeByReadsPerMillion properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # Represent each count value in the table as: counts / (total counts in sample / 1 million) If report log base is not null, LogTransformTaxaTables is added as a post-requisite to do the log transformation and AddPseudoCount is added as a pre-requisite before normalization to avoid taking the log of 0. If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.report.taxa.LogTransformTaxaTables Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: NormalizeByReadsPerMillion.imageOwner NormalizeByReadsPerMillion.imageName NormalizeByReadsPerMillion.imageTag Citation # Module developed by Ivory Blakley BioLockj v1.3.16","title":"NormalizeByReadsPerMillion"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#normalizebyreadspermillion","text":"Add to module run order: #BioModule biolockj.module.report.taxa.NormalizeByReadsPerMillion","title":"NormalizeByReadsPerMillion"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#description","text":"new counts = counts / (total counts in sample / 1 million)","title":"Description"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#normalizebyreadspermillion-properties","text":"none","title":"NormalizeByReadsPerMillion properties:"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#details","text":"Represent each count value in the table as: counts / (total counts in sample / 1 million) If report log base is not null, LogTransformTaxaTables is added as a post-requisite to do the log transformation and AddPseudoCount is added as a pre-requisite before normalization to avoid taking the log of 0. If the pipeline input does not include at least one taxa table, then the BuildTaxaTables class is added by this module as a pre-requisite.","title":"Details"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.report.taxa.LogTransformTaxaTables","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: NormalizeByReadsPerMillion.imageOwner NormalizeByReadsPerMillion.imageName NormalizeByReadsPerMillion.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeByReadsPerMillion/#citation","text":"Module developed by Ivory Blakley BioLockj v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/","text":"NormalizeTaxaTables # Add to module run order: #BioModule biolockj.module.report.taxa.NormalizeTaxaTables Description # Normalize taxa tables for sequencing depth. Properties # Properties are the name=value pairs in the configuration file. NormalizeTaxaTables properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.logBase string Options: 10,e,null. If e, use natural log (base e); if 10, use log base 10; if not set, counts will not be converted to a log scale. default: 10 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # Normalize taxa tables based on formula: counts_{normalized} = \\frac{counts_{raw}}{n} \\frac{\\sum (x)}{N} +1 Where: counts_{raw} = raw count; the cell value before normalizing n = number of sequences in the sample (total within a sample) \\sum (x) = total number of counts in the table (total across samples) N = total number of samples Typically the data is put on a Log_{10} scale, so the full forumula is: counts_{final} = Log_{10} \\biggl( \\frac{counts_{raw}}{n} \\frac{\\sum (x)}{N} +1 \\biggr) The counts_{final} values will be in output dir of the LogTransformTaxaTables module. The counts_{normalized} values will be in the output of the NormalizeTaxaTables module. For further explanation regarding the normalization scheme, please read The ISME Journal 2013 paper by Dr. Anthony Fodor: \"Stochastic changes over time and not founder effects drive cage effects in microbial community assembly in a mouse model\" If report.logBase is not null, then the LogTransformTaxaTables will be added as a post-requisite module. Adds modules # pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.report.taxa.LogTransformTaxaTables Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: NormalizeTaxaTables.imageOwner NormalizeTaxaTables.imageName NormalizeTaxaTables.imageTag Citation # \"Stochastic changes over time and not founder effects drive cage effects in microbial community assembly in a mouse model\" Module developed by Mike Sioda BioLockJ v1.3.16","title":"NormalizeTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#normalizetaxatables","text":"Add to module run order: #BioModule biolockj.module.report.taxa.NormalizeTaxaTables","title":"NormalizeTaxaTables"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#description","text":"Normalize taxa tables for sequencing depth.","title":"Description"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#normalizetaxatables-properties","text":"none","title":"NormalizeTaxaTables properties:"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null report.logBase string Options: 10,e,null. If e, use natural log (base e); if 10, use log base 10; if not set, counts will not be converted to a log scale. default: 10 script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#details","text":"Normalize taxa tables based on formula: counts_{normalized} = \\frac{counts_{raw}}{n} \\frac{\\sum (x)}{N} +1 Where: counts_{raw} = raw count; the cell value before normalizing n = number of sequences in the sample (total within a sample) \\sum (x) = total number of counts in the table (total across samples) N = total number of samples Typically the data is put on a Log_{10} scale, so the full forumula is: counts_{final} = Log_{10} \\biggl( \\frac{counts_{raw}}{n} \\frac{\\sum (x)}{N} +1 \\biggr) The counts_{final} values will be in output dir of the LogTransformTaxaTables module. The counts_{normalized} values will be in the output of the NormalizeTaxaTables module. For further explanation regarding the normalization scheme, please read The ISME Journal 2013 paper by Dr. Anthony Fodor: \"Stochastic changes over time and not founder effects drive cage effects in microbial community assembly in a mouse model\" If report.logBase is not null, then the LogTransformTaxaTables will be added as a post-requisite module.","title":"Details"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules biolockj.module.report.taxa.LogTransformTaxaTables","title":"Adds modules"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: NormalizeTaxaTables.imageOwner NormalizeTaxaTables.imageName NormalizeTaxaTables.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.report.taxa/NormalizeTaxaTables/#citation","text":"\"Stochastic changes over time and not founder effects drive cage effects in microbial community assembly in a mouse model\" Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/","text":"DESeq2 # Add to module run order: #BioModule biolockj.module.rnaseq.DESeq2 Description # Determine statistically significant differences using DESeq2. Properties # Properties are the name=value pairs in the configuration file. DESeq2 properties: # Property Description deseq2.designFactors list A comma-separated list of metadata columns to include as factors in the design forumula used with DESeq2. default: null deseq2.designFormula string The exact string to use as the design the call to DESeqDataSetFromMatrix(). default: null deseq2.scriptPath file path An R script to use in place of the default script to call DESeq2. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # The two methods of expresison the design are mutually exclusive. deseq2.designFormula is used as an exact string to pass as the design argument to DESeqDataSetFromMatrix(); example: ~ Location:SoilType . deseq2.designFactors is a list (such as \"fist,second\") of one or more metadata columns to use in a formula. Using this method, the formula will take the form: ~ first + second The following two lines are equivilent: deseq2.designFormula = ~ treatment + batch deseq2.designFactors = treatment,batch Advanced users may want to make more advanced modifications to the call to the DESeq2 functions. The easiest way to do this is to run the module with the default script, and treat that as a working template (ie, see how input/outputs are passed to/from the R script). Modify the script in that first pipeline, and save the modified script to a stable location. Then run the pipeline with deseq2.scriptPath giving the path to the modified script. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_deseq2:v1.3.16 This can be modified using the following properties: DESeq2.imageOwner DESeq2.imageName DESeq2.imageTag Citation # R Core Team (2019). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. Love, M.I., Huber, W., Anders, S. (2014) Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology, 15:550. 10.1186/s13059-014-0550-8 Module developed by Ivory, Ke and Rosh BioLockJ v1.3.16","title":"DESeq2"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#deseq2","text":"Add to module run order: #BioModule biolockj.module.rnaseq.DESeq2","title":"DESeq2"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#description","text":"Determine statistically significant differences using DESeq2.","title":"Description"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#deseq2-properties","text":"Property Description deseq2.designFactors list A comma-separated list of metadata columns to include as factors in the design forumula used with DESeq2. default: null deseq2.designFormula string The exact string to use as the design the call to DESeqDataSetFromMatrix(). default: null deseq2.scriptPath file path An R script to use in place of the default script to call DESeq2. default: null","title":"DESeq2 properties:"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#details","text":"The two methods of expresison the design are mutually exclusive. deseq2.designFormula is used as an exact string to pass as the design argument to DESeqDataSetFromMatrix(); example: ~ Location:SoilType . deseq2.designFactors is a list (such as \"fist,second\") of one or more metadata columns to use in a formula. Using this method, the formula will take the form: ~ first + second The following two lines are equivilent: deseq2.designFormula = ~ treatment + batch deseq2.designFactors = treatment,batch Advanced users may want to make more advanced modifications to the call to the DESeq2 functions. The easiest way to do this is to run the module with the default script, and treat that as a working template (ie, see how input/outputs are passed to/from the R script). Modify the script in that first pipeline, and save the modified script to a stable location. Then run the pipeline with deseq2.scriptPath giving the path to the modified script.","title":"Details"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_deseq2:v1.3.16 This can be modified using the following properties: DESeq2.imageOwner DESeq2.imageName DESeq2.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.rnaseq/DESeq2/#citation","text":"R Core Team (2019). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. Love, M.I., Huber, W., Anders, S. (2014) Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology, 15:550. 10.1186/s13059-014-0550-8 Module developed by Ivory, Ke and Rosh BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/","text":"EdgeR # Add to module run order: #BioModule biolockj.module.rnaseq.EdgeR Description # Determine statistically significant differences using edgeR. Properties # Properties are the name=value pairs in the configuration file. EdgeR properties: # Property Description edgeR.designFactors list A comma-separated list of metadata columns to include as factors in the design forumula used with edgeR. default: null edgeR.designFormula string The exact string to use as the design the call to model.matrix(). default: null edgeR.scriptPath file path An R script to use in place of the default script to call edgeR. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # The two methods of expresison the design are mutually exclusive. edgeR.designFormula is used as an exact string to pass as the design argument to model.matrix(); example: ~ Location:SoilType. edgeR.designFactors is a list (such as \"fist,second\") of one or more metadata columns to use in a formula. Using this method, the formula will take the form: ~ first + second The following two lines are equivilent: edgeR.designFormula = ~ treatment + batch edgeR.designFactors = treatment,batch Advanced users may want to make more advanced modifications to the call to the edgeR functions. The easiest way to do this is to run the module with the default script, and treat that as a working template (ie, see how input/outputs are passed to/from the R script). Modify the script in that first pipeline, and save the modified script to a stable location. Then run the pipeline with edgeR.scriptPath giving the path to the modified script. Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_edger:v1.3.16 This can be modified using the following properties: EdgeR.imageOwner EdgeR.imageName EdgeR.imageTag Citation # R Core Team (2019). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. Mark D. Robinson, Davis J. McCarthy, Gordon K. Smyth, edgeR: a Bioconductor package for differential expression analysis of digital gene expression data, Bioinformatics, Volume 26, Issue 1, 1 January 2010, Pages 139\u2013140, https://doi.org/10.1093/bioinformatics/btp616 Module developed by Ivory, Ke and Rosh BioLockJ v1.3.16","title":"EdgeR"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#edger","text":"Add to module run order: #BioModule biolockj.module.rnaseq.EdgeR","title":"EdgeR"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#description","text":"Determine statistically significant differences using edgeR.","title":"Description"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#edger-properties","text":"Property Description edgeR.designFactors list A comma-separated list of metadata columns to include as factors in the design forumula used with edgeR. default: null edgeR.designFormula string The exact string to use as the design the call to model.matrix(). default: null edgeR.scriptPath file path An R script to use in place of the default script to call edgeR. default: null","title":"EdgeR properties:"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.Rscript executable Path for the \"Rscript\" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#details","text":"The two methods of expresison the design are mutually exclusive. edgeR.designFormula is used as an exact string to pass as the design argument to model.matrix(); example: ~ Location:SoilType. edgeR.designFactors is a list (such as \"fist,second\") of one or more metadata columns to use in a formula. Using this method, the formula will take the form: ~ first + second The following two lines are equivilent: edgeR.designFormula = ~ treatment + batch edgeR.designFactors = treatment,batch Advanced users may want to make more advanced modifications to the call to the edgeR functions. The easiest way to do this is to run the module with the default script, and treat that as a working template (ie, see how input/outputs are passed to/from the R script). Modify the script in that first pipeline, and save the modified script to a stable location. Then run the pipeline with edgeR.scriptPath giving the path to the modified script.","title":"Details"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/r_edger:v1.3.16 This can be modified using the following properties: EdgeR.imageOwner EdgeR.imageName EdgeR.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.rnaseq/EdgeR/#citation","text":"R Core Team (2019). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. Mark D. Robinson, Davis J. McCarthy, Gordon K. Smyth, edgeR: a Bioconductor package for differential expression analysis of digital gene expression data, Bioinformatics, Volume 26, Issue 1, 1 January 2010, Pages 139\u2013140, https://doi.org/10.1093/bioinformatics/btp616 Module developed by Ivory, Ke and Rosh BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/","text":"AwkFastaConverter # Add to module run order: #BioModule biolockj.module.seq.AwkFastaConverter Description # Convert fastq files into fasta format. Properties # Properties are the name=value pairs in the configuration file. AwkFastaConverter properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.awk executable Path for the \"awk\" executable; if not supplied, any script that needs the awk command will assume it is on the PATH. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # This module was first introduced because it was required for QIIME . Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: AwkFastaConverter.imageOwner AwkFastaConverter.imageName AwkFastaConverter.imageTag Citation # BioLockJ v1.3.16 Module developed by Mike Sioda","title":"AwkFastaConverter"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#awkfastaconverter","text":"Add to module run order: #BioModule biolockj.module.seq.AwkFastaConverter","title":"AwkFastaConverter"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#description","text":"Convert fastq files into fasta format.","title":"Description"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#awkfastaconverter-properties","text":"none","title":"AwkFastaConverter properties:"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.awk executable Path for the \"awk\" executable; if not supplied, any script that needs the awk command will assume it is on the PATH. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#details","text":"This module was first introduced because it was required for QIIME .","title":"Details"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: AwkFastaConverter.imageOwner AwkFastaConverter.imageName AwkFastaConverter.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/AwkFastaConverter/#citation","text":"BioLockJ v1.3.16 Module developed by Mike Sioda","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/","text":"Gunzipper # Add to module run order: #BioModule biolockj.module.seq.Gunzipper Description # Decompress gzipped files. Properties # Properties are the name=value pairs in the configuration file. Gunzipper properties: # none General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: Gunzipper.imageOwner Gunzipper.imageName Gunzipper.imageTag Citation # BioLockJ v1.3.16 Module developed by Mike Sioda","title":"Gunzipper"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#gunzipper","text":"Add to module run order: #BioModule biolockj.module.seq.Gunzipper","title":"Gunzipper"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#description","text":"Decompress gzipped files.","title":"Description"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#gunzipper-properties","text":"none","title":"Gunzipper properties:"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null exe.gzip executable Path for the \"gzip\" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/blj_basic:v1.3.16 This can be modified using the following properties: Gunzipper.imageOwner Gunzipper.imageName Gunzipper.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/Gunzipper/#citation","text":"BioLockJ v1.3.16 Module developed by Mike Sioda","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/KneadData/","text":"KneadData # Add to module run order: #BioModule biolockj.module.seq.KneadData Description # Run the Biobakery KneadData program to remove contaminated DNA. Properties # Properties are the name=value pairs in the configuration file. KneadData properties: # Property Description exe.kneaddata executable Path for the \"kneaddata\" executable; if not supplied, any script that needs the kneaddata command will assume it is on the PATH. default: null kneaddata.dbs file path Path to database for KneadData program default: null kneaddata.kneaddataParams string Optional parameters passed to kneaddata default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/kraken_classifier:v1.3.16 This can be modified using the following properties: KneadData.imageOwner KneadData.imageName KneadData.imageTag Citation # https://bitbucket.org/biobakery/kneaddata/wiki/Home Module developed by Mike Sioda","title":"KneadData"},{"location":"GENERATED/biolockj.module.seq/KneadData/#kneaddata","text":"Add to module run order: #BioModule biolockj.module.seq.KneadData","title":"KneadData"},{"location":"GENERATED/biolockj.module.seq/KneadData/#description","text":"Run the Biobakery KneadData program to remove contaminated DNA.","title":"Description"},{"location":"GENERATED/biolockj.module.seq/KneadData/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/KneadData/#kneaddata-properties","text":"Property Description exe.kneaddata executable Path for the \"kneaddata\" executable; if not supplied, any script that needs the kneaddata command will assume it is on the PATH. default: null kneaddata.dbs file path Path to database for KneadData program default: null kneaddata.kneaddataParams string Optional parameters passed to kneaddata default: null","title":"KneadData properties:"},{"location":"GENERATED/biolockj.module.seq/KneadData/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/KneadData/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.seq/KneadData/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/KneadData/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/kraken_classifier:v1.3.16 This can be modified using the following properties: KneadData.imageOwner KneadData.imageName KneadData.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/KneadData/#citation","text":"https://bitbucket.org/biobakery/kneaddata/wiki/Home Module developed by Mike Sioda","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/","text":"Multiplexer # Add to module run order: #BioModule biolockj.module.seq.Multiplexer Description # Multiplex samples into a single file, or two files (one with forward reads, one with reverse reads) if multiplexing paired reads. Properties # Properties are the name=value pairs in the configuration file. Multiplexer properties: # Property Description metadata.barcodeColumn string metadata column with identifying barcodes -> Values should be unique. default: BarcodeSequence multiplexer.gzip boolean If enabled, the multiplexed output will be gzipped default: Y General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.barcodeColumn string metadata column with identifying barcodes -> Values should be unique. default: BarcodeSequence script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Multiplexer.imageOwner Multiplexer.imageName Multiplexer.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"Multiplexer"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#multiplexer","text":"Add to module run order: #BioModule biolockj.module.seq.Multiplexer","title":"Multiplexer"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#description","text":"Multiplex samples into a single file, or two files (one with forward reads, one with reverse reads) if multiplexing paired reads.","title":"Description"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#multiplexer-properties","text":"Property Description metadata.barcodeColumn string metadata column with identifying barcodes -> Values should be unique. default: BarcodeSequence multiplexer.gzip boolean If enabled, the multiplexed output will be gzipped default: Y","title":"Multiplexer properties:"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null metadata.barcodeColumn string metadata column with identifying barcodes -> Values should be unique. default: BarcodeSequence script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: Multiplexer.imageOwner Multiplexer.imageName Multiplexer.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/Multiplexer/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/","text":"PearMergeReads # Add to module run order: #BioModule biolockj.module.seq.PearMergeReads Description # Run pear, the Paired-End reAd mergeR Properties # Properties are the name=value pairs in the configuration file. PearMergeReads properties: # Property Description exe.pear executable Path for the \"pear\" executable; if not supplied, any script that needs the pear command will assume it is on the PATH. default: null pearMergeReads.pearParams string optionally pass additional parameters to pear. default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/pear:v1.3.16 This can be modified using the following properties: PearMergeReads.imageOwner PearMergeReads.imageName PearMergeReads.imageTag Citation # Jiajie Zhang, Kassian Kobert, Tom\u00e1\u0161 Flouri, Alexandros Stamatakis, PEAR: a fast and accurate Illumina Paired-End reAd mergeR, Bioinformatics, Volume 30, Issue 5, 1 March 2014, Pages 614\u2013620, https://doi.org/10.1093/bioinformatics/btt593 https://cme.h-its.org/exelixis/web/software/pear/doc.html Module developed by Mike Sioda","title":"PearMergeReads"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#pearmergereads","text":"Add to module run order: #BioModule biolockj.module.seq.PearMergeReads","title":"PearMergeReads"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#description","text":"Run pear, the Paired-End reAd mergeR","title":"Description"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#pearmergereads-properties","text":"Property Description exe.pear executable Path for the \"pear\" executable; if not supplied, any script that needs the pear command will assume it is on the PATH. default: null pearMergeReads.pearParams string optionally pass additional parameters to pear. default: null","title":"PearMergeReads properties:"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/pear:v1.3.16 This can be modified using the following properties: PearMergeReads.imageOwner PearMergeReads.imageName PearMergeReads.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/PearMergeReads/#citation","text":"Jiajie Zhang, Kassian Kobert, Tom\u00e1\u0161 Flouri, Alexandros Stamatakis, PEAR: a fast and accurate Illumina Paired-End reAd mergeR, Bioinformatics, Volume 30, Issue 5, 1 March 2014, Pages 614\u2013620, https://doi.org/10.1093/bioinformatics/btt593 https://cme.h-its.org/exelixis/web/software/pear/doc.html Module developed by Mike Sioda","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/","text":"RarefySeqs # Add to module run order: #BioModule biolockj.module.seq.RarefySeqs Description # Randomly sub-sample sequences to reduce all samples to the configured maximum. Properties # Properties are the name=value pairs in the configuration file. RarefySeqs properties: # Property Description rarefySeqs.max numeric Randomly select this number of sequences to keep in each sample default: null rarefySeqs.min numeric Discard samples with less than minimum number of sequences default: 1 General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # Randomly sub-sample sequences to reduce all samples to the configured maximum rarefySeqs.max . Samples with less than the minimum number of reads rarefySeqs.min are discarded. This module will add biolockj.module.implicit.RegisterNumReads if there is not already a module to count starting reads per sample. If the input data are paired reads, this module will add a sequence merger, based on property pipeline.defaultSeqMerger (currently: biolockj.module.seq.PearMergeReads). Adds modules # pre-requisite modules pipeline-dependent post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RarefySeqs.imageOwner RarefySeqs.imageName RarefySeqs.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"RarefySeqs"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#rarefyseqs","text":"Add to module run order: #BioModule biolockj.module.seq.RarefySeqs","title":"RarefySeqs"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#description","text":"Randomly sub-sample sequences to reduce all samples to the configured maximum.","title":"Description"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#rarefyseqs-properties","text":"Property Description rarefySeqs.max numeric Randomly select this number of sequences to keep in each sample default: null rarefySeqs.min numeric Discard samples with less than minimum number of sequences default: 1","title":"RarefySeqs properties:"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null pipeline.defaultSeqMerger string Java class name for default module used combined paired read files default: biolockj.module.seq.PearMergeReads script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#details","text":"Randomly sub-sample sequences to reduce all samples to the configured maximum rarefySeqs.max . Samples with less than the minimum number of reads rarefySeqs.min are discarded. This module will add biolockj.module.implicit.RegisterNumReads if there is not already a module to count starting reads per sample. If the input data are paired reads, this module will add a sequence merger, based on property pipeline.defaultSeqMerger (currently: biolockj.module.seq.PearMergeReads).","title":"Details"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#adds-modules","text":"pre-requisite modules pipeline-dependent post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: RarefySeqs.imageOwner RarefySeqs.imageName RarefySeqs.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/RarefySeqs/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/","text":"SeqFileValidator # Add to module run order: #BioModule biolockj.module.seq.SeqFileValidator Description # This BioModule validates fasta/fastq file formats are valid and enforces min/max read lengths. Properties # Properties are the name=value pairs in the configuration file. SeqFileValidator properties: # Property Description seqFileValidator.requireEqualNumPairs boolean Options: Y/N; require number of forward and reverse reads default: Y seqFileValidator.seqMaxLen integer maximum number of bases per read default: null seqFileValidator.seqMinLen integer minimum number of bases per read default: null General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: SeqFileValidator.imageOwner SeqFileValidator.imageName SeqFileValidator.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"SeqFileValidator"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#seqfilevalidator","text":"Add to module run order: #BioModule biolockj.module.seq.SeqFileValidator","title":"SeqFileValidator"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#description","text":"This BioModule validates fasta/fastq file formats are valid and enforces min/max read lengths.","title":"Description"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#seqfilevalidator-properties","text":"Property Description seqFileValidator.requireEqualNumPairs boolean Options: Y/N; require number of forward and reverse reads default: Y seqFileValidator.seqMaxLen integer maximum number of bases per read default: null seqFileValidator.seqMinLen integer minimum number of bases per read default: null","title":"SeqFileValidator properties:"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: SeqFileValidator.imageOwner SeqFileValidator.imageName SeqFileValidator.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/SeqFileValidator/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/","text":"TrimPrimers # Add to module run order: #BioModule biolockj.module.seq.TrimPrimers Description # Remove primers from reads, option to discard reads unless primers are attached to both forward and reverse reads. Properties # Properties are the name=value pairs in the configuration file. TrimPrimers properties: # Property Description trimPrimers.filePath file path file path to file containing one primer sequence per line. default: null trimPrimers.requirePrimer boolean Options: Y/N. If Y, TrimPrimers will discard reads that do not include a primer sequence. default: Y General properties applicable to this module: # Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null Details # none Adds modules # pre-requisite modules none found post-requisite modules none found Docker # If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: TrimPrimers.imageOwner TrimPrimers.imageName TrimPrimers.imageTag Citation # Module developed by Mike Sioda BioLockJ v1.3.16","title":"TrimPrimers"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#trimprimers","text":"Add to module run order: #BioModule biolockj.module.seq.TrimPrimers","title":"TrimPrimers"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#description","text":"Remove primers from reads, option to discard reads unless primers are attached to both forward and reverse reads.","title":"Description"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#properties","text":"Properties are the name=value pairs in the configuration file.","title":"Properties"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#trimprimers-properties","text":"Property Description trimPrimers.filePath file path file path to file containing one primer sequence per line. default: null trimPrimers.requirePrimer boolean Options: Y/N. If Y, TrimPrimers will discard reads that do not include a primer sequence. default: Y","title":"TrimPrimers properties:"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#general-properties-applicable-to-this-module","text":"Property Description cluster.batchCommand string Terminal command used to submit jobs on the cluster default: null cluster.jobHeader string Header written at top of worker scripts default: null cluster.modules list List of cluster modules to load at start of worker scripts default: null cluster.prologue string To run at the start of every script after loading cluster modules (if any) default: null cluster.statusCommand string Terminal command used to check the status of jobs on the cluster default: null docker.saveContainerOnExit boolean If Y, docker run command will NOT include the --rm flag default: null docker.verifyImage boolean In check dependencies, run a test to verify the docker image. default: null script.defaultHeader string Store default script header for MAIN script and locally run WORKER scripts. default: #!/bin/bash script.numThreads integer Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter. default: 8 script.numWorkers integer Set number of samples to process per script (if parallel processing) default: 1 script.permissions string Used as chmod permission parameter (ex: 774) default: 770 script.timeout integer Sets # of minutes before worker scripts times out. default: null","title":"General properties applicable to this module:"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#details","text":"none","title":"Details"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#adds-modules","text":"pre-requisite modules none found post-requisite modules none found","title":"Adds modules"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#docker","text":"If running in docker, this module will run in a docker container from this image: biolockjdevteam/biolockj_controller:v1.3.16 This can be modified using the following properties: TrimPrimers.imageOwner TrimPrimers.imageName TrimPrimers.imageTag","title":"Docker"},{"location":"GENERATED/biolockj.module.seq/TrimPrimers/#citation","text":"Module developed by Mike Sioda BioLockJ v1.3.16","title":"Citation"},{"location":"module/classifier/module.classifier/","text":"Classifier Package # Modules in the biolockj.module.classifier package categorize micbrobial samples into Operational Taxonomic Units (OTUs) either by reference or with clustering algorithms. This package contains 2 sub-packages: module.classifier.r16s contains modules designed to classify 16S data. module.classifier.wgs contains modules designed to classify whole genome sequence data. Modules in these sub-packages extend the ClassifierModuleImpl class. ClassifierModuleImpl # Description: Abstract implementation of the ClassifierModule interface that the other classifier modules extend to inherit standard functionality. Abstract modules cannot be included in the pipeline run order.","title":"Classifier Package"},{"location":"module/classifier/module.classifier/#classifier-package","text":"Modules in the biolockj.module.classifier package categorize micbrobial samples into Operational Taxonomic Units (OTUs) either by reference or with clustering algorithms. This package contains 2 sub-packages: module.classifier.r16s contains modules designed to classify 16S data. module.classifier.wgs contains modules designed to classify whole genome sequence data. Modules in these sub-packages extend the ClassifierModuleImpl class.","title":"Classifier Package"},{"location":"module/classifier/module.classifier/#classifiermoduleimpl","text":"Description: Abstract implementation of the ClassifierModule interface that the other classifier modules extend to inherit standard functionality. Abstract modules cannot be included in the pipeline run order.","title":"ClassifierModuleImpl"},{"location":"module/classifier/module.classifier.r16s/","text":"biolockj.module.classifier.r16s is a sub-package of module.classifier. Package modules extend ClassifierModuleImpl to cluster and classify 16S micbrobial samples for taxonomy assignment. QiimeClosedRefClassifier # #BioModule biolockj.module.classifier.r16s.QiimeClosedRefClassifier Description: This module picks OTUs using a closed reference database and constructs an OTU table via the QIIME script pick_closed_reference_otus.py . Taxonomy is assigned using a pre-defined taxonomy map of reference sequence OTU to taxonomy. This is the fastest OTU picking method since samples can be processed in parallel batches. Before the QIIME script is run, batches are prepared in the temp directory, with each batch directory containing a fasta directory with script.batchSize fasta files and a QIIME mapping file, created with awk, called batchMapping.tsv for the batch of samples. Inherits from QiimeClassifier . Options: exe.awk QiimeDeNovoClassifier # #BioModule biolockj.module.classifier.r16s.QiimeDeNovoClassifier Description: This module runs the QIIME pick_de_novo_otus.py script on all fasta sequence files in a single script since OTUs are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier . Options: exe.vsearch exe.vsearchParams qiime.removeChimeras QiimeOpenRefClassifier # #BioModule biolockj.module.classifier.r16s.QiimeOpenRefClassifier Description: This module runs the QIIME pick_open_reference_otus.py script on all fasta sequence files in a single script since clusters not identified in the reference database are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier . Options: exe.vsearch exe.vsearchParams qiime.removeChimeras RdpClassifier # #BioModule biolockj.module.classifier.r16s.RdpClassifier Description: Classify 16s samples with RDP . Options: exe.java rdp.db rdp.jar rdp.minThresholdScore See also: Typical QIIME Pipeline","title":"Module.classifier.r16s"},{"location":"module/classifier/module.classifier.r16s/#qiimeclosedrefclassifier","text":"#BioModule biolockj.module.classifier.r16s.QiimeClosedRefClassifier Description: This module picks OTUs using a closed reference database and constructs an OTU table via the QIIME script pick_closed_reference_otus.py . Taxonomy is assigned using a pre-defined taxonomy map of reference sequence OTU to taxonomy. This is the fastest OTU picking method since samples can be processed in parallel batches. Before the QIIME script is run, batches are prepared in the temp directory, with each batch directory containing a fasta directory with script.batchSize fasta files and a QIIME mapping file, created with awk, called batchMapping.tsv for the batch of samples. Inherits from QiimeClassifier . Options: exe.awk","title":"QiimeClosedRefClassifier"},{"location":"module/classifier/module.classifier.r16s/#qiimedenovoclassifier","text":"#BioModule biolockj.module.classifier.r16s.QiimeDeNovoClassifier Description: This module runs the QIIME pick_de_novo_otus.py script on all fasta sequence files in a single script since OTUs are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier . Options: exe.vsearch exe.vsearchParams qiime.removeChimeras","title":"QiimeDeNovoClassifier"},{"location":"module/classifier/module.classifier.r16s/#qiimeopenrefclassifier","text":"#BioModule biolockj.module.classifier.r16s.QiimeOpenRefClassifier Description: This module runs the QIIME pick_open_reference_otus.py script on all fasta sequence files in a single script since clusters not identified in the reference database are assigned by a clustering algorithm. Additional parameters for this script are set using exe.classifierParams . If qiime.removeChimeras = \"Y\", vsearch is used to find chimeric sequences in the output and the QIIME script filter_otus_from_otu_table.py is run to remove them from ./output/otu_table.biom. Inherits from QiimeClassifier . Options: exe.vsearch exe.vsearchParams qiime.removeChimeras","title":"QiimeOpenRefClassifier"},{"location":"module/classifier/module.classifier.r16s/#rdpclassifier","text":"#BioModule biolockj.module.classifier.r16s.RdpClassifier Description: Classify 16s samples with RDP . Options: exe.java rdp.db rdp.jar rdp.minThresholdScore See also: Typical QIIME Pipeline","title":"RdpClassifier"},{"location":"module/classifier/module.classifier.wgs/","text":"Whole Genome Sequence Classifiers # biolockj.module.classifier.wgs is a sub-package of module.classifier. Package modules categorize whole genome sequence micbrobial samples into Operational Taxonomic Units (OTUs) either by reference or with clustering algorithms. Humann2Classifier # #BioModule biolockj.module.classifier.wgs.Humann2Classifier Description: Use the Biobakery HumanN2 program to generate the HMP Unified Metabolic Analysis Network. Options: humann2.disablePathAbundance humann2.disablePathCoverage humann2.disableGeneFamilies humann2.nuclDB humann2.protDB KrakenClassifier # #BioModule biolockj.module.classifier.wgs.KrakenClassifier Description: Classify WGS samples with KRAKEN . Options: kraken.db Kraken2Classifier # #BioModule biolockj.module.classifier.wgs.Kraken2Classifier Description: Classify WGS samples with KRAKEN 2 . Options: kraken2.db Metaphlan2Classifier # #BioModule biolockj.module.classifier.wgs.Metaphlan2Classifier Description: Classify WGS samples with MetaPhlAn . Options: exe.python metaphlan2.db metaphlan2.mpa_pkl","title":"Whole Genome Sequence Classifiers"},{"location":"module/classifier/module.classifier.wgs/#whole-genome-sequence-classifiers","text":"biolockj.module.classifier.wgs is a sub-package of module.classifier. Package modules categorize whole genome sequence micbrobial samples into Operational Taxonomic Units (OTUs) either by reference or with clustering algorithms.","title":"Whole Genome Sequence Classifiers"},{"location":"module/classifier/module.classifier.wgs/#humann2classifier","text":"#BioModule biolockj.module.classifier.wgs.Humann2Classifier Description: Use the Biobakery HumanN2 program to generate the HMP Unified Metabolic Analysis Network. Options: humann2.disablePathAbundance humann2.disablePathCoverage humann2.disableGeneFamilies humann2.nuclDB humann2.protDB","title":"Humann2Classifier"},{"location":"module/classifier/module.classifier.wgs/#krakenclassifier","text":"#BioModule biolockj.module.classifier.wgs.KrakenClassifier Description: Classify WGS samples with KRAKEN . Options: kraken.db","title":"KrakenClassifier"},{"location":"module/classifier/module.classifier.wgs/#kraken2classifier","text":"#BioModule biolockj.module.classifier.wgs.Kraken2Classifier Description: Classify WGS samples with KRAKEN 2 . Options: kraken2.db","title":"Kraken2Classifier"},{"location":"module/classifier/module.classifier.wgs/#metaphlan2classifier","text":"#BioModule biolockj.module.classifier.wgs.Metaphlan2Classifier Description: Classify WGS samples with MetaPhlAn . Options: exe.python metaphlan2.db metaphlan2.mpa_pkl","title":"Metaphlan2Classifier"},{"location":"module/implicit/module.implicit/","text":"biolockj.module.implicit modules are added to BioLockJ pipelines automatically if needed. Implicit modules are ignored if included in the Config file unless project.allowImplicitModules=Y This package contains the following sub-packages: module.implicit.parser contains ParserModule interface & ParserModuleImpl superclass. module.implicit.parser.r16s contains 16S parser modules. module.implicit.parser.wgs contains WGS parser modules. module.implicit.qiime contains QIIME Script wrappers. Demultiplexer # (added by BioLockJ) #BioModule biolockj.module.implicit.ImportMetadata Description: Demultiplex samples into separate files for each sample. Options: demultiplexer.barcodeCutoff demultiplexer.barcodeRevComp demultiplexer.strategy metadata.filePath ImportMetadata # (added by BioLockJ) #BioModule biolockj.module.implicit.ImportMetadata Description: Required 1st module in every pipeline. If metadata.filePath is undefined, a new metadata file will be created with only a single column \"SAMPLE_ID\". The imported file is converted to required BioLockJ metadata format: tab-delimited, with unique column headers, and empty cells are now populated with metadata.nullValue or \"NA\" if undefined. Options: metadata.columnDelim metadata.commentChar metadata.filePath metadata.nullValue RegisterNumReads # (added by BioLockJ) #BioModule biolockj.module.implicit.RegisterNumReads Description: Add \"Num_Reads\" column to metadata file to document the total number of reads per sample. Options: report.numReads","title":"Module.implicit"},{"location":"module/implicit/module.implicit/#demultiplexer","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.ImportMetadata Description: Demultiplex samples into separate files for each sample. Options: demultiplexer.barcodeCutoff demultiplexer.barcodeRevComp demultiplexer.strategy metadata.filePath","title":"Demultiplexer"},{"location":"module/implicit/module.implicit/#importmetadata","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.ImportMetadata Description: Required 1st module in every pipeline. If metadata.filePath is undefined, a new metadata file will be created with only a single column \"SAMPLE_ID\". The imported file is converted to required BioLockJ metadata format: tab-delimited, with unique column headers, and empty cells are now populated with metadata.nullValue or \"NA\" if undefined. Options: metadata.columnDelim metadata.commentChar metadata.filePath metadata.nullValue","title":"ImportMetadata"},{"location":"module/implicit/module.implicit/#registernumreads","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.RegisterNumReads Description: Add \"Num_Reads\" column to metadata file to document the total number of reads per sample. Options: report.numReads","title":"RegisterNumReads"},{"location":"module/implicit/module.implicit.parser/","text":"biolockj.module.implicit.parser modules parse classifier output to generate OTU tables. Implicit modules are ignored if included in the Config file unless project.allowImplicitModules =Y. This package contains the following sub-packages: module.implicit.parser.r16s modules parse module.classifier.r16s reports. module.implicit.parser.wgs modules parse module.classifier.wgs reports. ParserModuleImpl # cannot be included in the pipeline run order Description: Abstract implementation of ParserModule that the other modules extend to inherit standard functionality. Abstract modules cannot be added to a pipeline, but the r16s & WGS sub-packages contain modules that inherit standard parser functionality from this class. Options: report.numHits","title":"Module.implicit.parser"},{"location":"module/implicit/module.implicit.parser/#parsermoduleimpl","text":"cannot be included in the pipeline run order Description: Abstract implementation of ParserModule that the other modules extend to inherit standard functionality. Abstract modules cannot be added to a pipeline, but the r16s & WGS sub-packages contain modules that inherit standard parser functionality from this class. Options: report.numHits","title":"ParserModuleImpl"},{"location":"module/implicit/module.implicit.parser.r16s/","text":"biolockj.module.implicit.parser.r16s is a sub package of module.implicit.parser. Package modules extend ParserModuleImpl to generate OTU tables from 16S classifier output. Implicit modules are ignored if included in the Config file unless project.allowImplicitModules =Y. RdpParser # (added by BioLockJ) #BioModule biolockj.module.implicit.parser.r16s.RdpParser Description: Build OTU tables from RDP reports. Options: rdp.minThresholdScore QiimeParser # (added by BioLockJ) #BioModule biolockj.module.implicit.parser.r16s.QiimeParser Description: Build OTU tables from QIIME summarize_taxa.py otu_table text file reports. Options: none","title":"Module.implicit.parser.r16s"},{"location":"module/implicit/module.implicit.parser.r16s/#rdpparser","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.parser.r16s.RdpParser Description: Build OTU tables from RDP reports. Options: rdp.minThresholdScore","title":"RdpParser"},{"location":"module/implicit/module.implicit.parser.r16s/#qiimeparser","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.parser.r16s.QiimeParser Description: Build OTU tables from QIIME summarize_taxa.py otu_table text file reports. Options: none","title":"QiimeParser"},{"location":"module/implicit/module.implicit.parser.wgs/","text":"biolockj.module.implicit.parser.wgs is a sub package of module.implicit.parser. Package modules extend ParserModuleImpl to generate OTU tables from WGS classifier output. Implicit modules are ignored if included in the Config file unless project.allowImplicitModules =Y. Humann2Parser # (added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.Humann2Parser Description: Build OTU tables from HumanN2 classifier module output. Options: none KrakenParser # (added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.KrakenParser Description: Build OTU tables from KRAKEN mpa-format reports. Options: none Kraken2Parser # (added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.Kraken2Parser Description: Build OTU tables from KRAKEN 2 mpa-format reports. Options: none Metaphlan2Parser # (added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.MetaphlanParser Description: Build OTU tables from Metaphlan2 classifier module reports. Options: none","title":"Module.implicit.parser.wgs"},{"location":"module/implicit/module.implicit.parser.wgs/#humann2parser","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.Humann2Parser Description: Build OTU tables from HumanN2 classifier module output. Options: none","title":"Humann2Parser"},{"location":"module/implicit/module.implicit.parser.wgs/#krakenparser","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.KrakenParser Description: Build OTU tables from KRAKEN mpa-format reports. Options: none","title":"KrakenParser"},{"location":"module/implicit/module.implicit.parser.wgs/#kraken2parser","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.Kraken2Parser Description: Build OTU tables from KRAKEN 2 mpa-format reports. Options: none","title":"Kraken2Parser"},{"location":"module/implicit/module.implicit.parser.wgs/#metaphlan2parser","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.parser.wgs.MetaphlanParser Description: Build OTU tables from Metaphlan2 classifier module reports. Options: none","title":"Metaphlan2Parser"},{"location":"module/implicit/module.implicit.qiime/","text":"biolockj.module.implicit.qiime modules are QIIME Script wrappers implicitly added (if needed). Implicit modules are ignored if included in the Config file unless project.allowImplicitModules =Y. BuildQiimeMapping # (added by BioLockJ) #BioModule biolockj.module.implicit.qiime.BuildQiimeMapping Description: This module builds a QIIME mapping file from the metadata. If the metadata file contains the correct columns out of order, awk is used to correct the column order. The updated mapping file is verified with the QIIME script validate_mapping_file.py Options: exe.awk QiimeClassifier # (added by BioLockJ) #BioModule biolockj.module.implicit.qiime.QiimeClassifier Description: Generates bash script lines to summarize QIIME results, build taxonomy reports, and add alpha diversity metrics (if configured). For a complete list of available metrics, see: http://scikit-bio.org/docs/latest/generated/skbio.diversity.alpha.html Options: qiime.alphaMetrics qiime.pynastAlignDB qiime.refSeqDB qiime.removeChimeras qiime.taxaDB MergeQiimeOtuTables # (added by BioLockJ) #BioModule biolockj.module.implicit.qiime.MergeQiimeOtuTables Description: This module runs the QIIME script merge_otu_tables.py to combine the multiple otu_table.biom files output by its required prerequisite module QiimeClosedRefClassifier , so is only necessary if #samples > script.batchSize . Options: none","title":"Module.implicit.qiime"},{"location":"module/implicit/module.implicit.qiime/#buildqiimemapping","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.qiime.BuildQiimeMapping Description: This module builds a QIIME mapping file from the metadata. If the metadata file contains the correct columns out of order, awk is used to correct the column order. The updated mapping file is verified with the QIIME script validate_mapping_file.py Options: exe.awk","title":"BuildQiimeMapping"},{"location":"module/implicit/module.implicit.qiime/#qiimeclassifier","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.qiime.QiimeClassifier Description: Generates bash script lines to summarize QIIME results, build taxonomy reports, and add alpha diversity metrics (if configured). For a complete list of available metrics, see: http://scikit-bio.org/docs/latest/generated/skbio.diversity.alpha.html Options: qiime.alphaMetrics qiime.pynastAlignDB qiime.refSeqDB qiime.removeChimeras qiime.taxaDB","title":"QiimeClassifier"},{"location":"module/implicit/module.implicit.qiime/#mergeqiimeotutables","text":"(added by BioLockJ) #BioModule biolockj.module.implicit.qiime.MergeQiimeOtuTables Description: This module runs the QIIME script merge_otu_tables.py to combine the multiple otu_table.biom files output by its required prerequisite module QiimeClosedRefClassifier , so is only necessary if #samples > script.batchSize . Options: none","title":"MergeQiimeOtuTables"},{"location":"module/report/module.report.humann2/","text":"Pathway Modules # Modules in the biolockj.module.report.humann2 sub-package use ParserModule output to produce and process pathway tables, such as those produced by HumanN2 . Humann2CountModule # cannot be included in the pipeline run order Description: Abstract class extends JavaModuleImpl that other humann2 classes extend to inherit shared functionality. Abstract modules cannot be included in the pipeline run order. Options: humann2.disablePathAbundance humann2.disablePathCoverage humann2.disableGeneFamilies AddMetadataToPathwayTables # #BioModule biolockj.module.report.humann2.AddMetadataToPathwayTables Description: Add metadata columns to the OTU abundance tables. Options: none RemoveLowPathwayCounts # #BioModule biolockj.module.report.humann2.RemoveLowPathwayCounts Description: This BioModule Pathway counts below a configured threshold to zero. These low sample counts are assumed to be miscategorized or genomic contamination. Options: report.minCount RemoveScarcePathwayCounts # #BioModule biolockj.module.report.humann2.RemoveScarcePathwayCounts Description: This BioModule removes scarce pathways not found in enough samples. Each pathway must be found in a configurable percentage of samples to be retained. Options: report.scarceCountCutoff","title":"Pathway Modules"},{"location":"module/report/module.report.humann2/#pathway-modules","text":"Modules in the biolockj.module.report.humann2 sub-package use ParserModule output to produce and process pathway tables, such as those produced by HumanN2 .","title":"Pathway Modules"},{"location":"module/report/module.report.humann2/#humann2countmodule","text":"cannot be included in the pipeline run order Description: Abstract class extends JavaModuleImpl that other humann2 classes extend to inherit shared functionality. Abstract modules cannot be included in the pipeline run order. Options: humann2.disablePathAbundance humann2.disablePathCoverage humann2.disableGeneFamilies","title":"Humann2CountModule"},{"location":"module/report/module.report.humann2/#addmetadatatopathwaytables","text":"#BioModule biolockj.module.report.humann2.AddMetadataToPathwayTables Description: Add metadata columns to the OTU abundance tables. Options: none","title":"AddMetadataToPathwayTables"},{"location":"module/report/module.report.humann2/#removelowpathwaycounts","text":"#BioModule biolockj.module.report.humann2.RemoveLowPathwayCounts Description: This BioModule Pathway counts below a configured threshold to zero. These low sample counts are assumed to be miscategorized or genomic contamination. Options: report.minCount","title":"RemoveLowPathwayCounts"},{"location":"module/report/module.report.humann2/#removescarcepathwaycounts","text":"#BioModule biolockj.module.report.humann2.RemoveScarcePathwayCounts Description: This BioModule removes scarce pathways not found in enough samples. Each pathway must be found in a configurable percentage of samples to be retained. Options: report.scarceCountCutoff","title":"RemoveScarcePathwayCounts"},{"location":"module/report/module.report/","text":"Report Package # Modules in the biolockj.module.report package process ParserModule output, merge the OTU tables with the metadata, and can generate various reports and notifications. This package contains the following sub-packages: module.report.otu contains modules designed to produce or process otu tables. module.report.taxa contains modules designed to produce or process taxa tables. module.report.r contains modules that use R to generate statistics and/or visualizations. module.report.humann2 contains modules designed to produce or process pathway tables. Email # #BioModule biolockj.module.report.Email Description: Notify user pipeline is complete by emailing out the pipeline summary. Options: mail.encryptedPassword mail.from mail.smtp.auth mail.smtp.host mail.smtp.port mail.smtp.starttls.enable mail.to JsonReport # #BioModule biolockj.module.report.JsonReport Description: This module builds a JSON file from the ParserModule output. Options: report.logBase report.taxonomyLevels","title":"Report Package"},{"location":"module/report/module.report/#report-package","text":"Modules in the biolockj.module.report package process ParserModule output, merge the OTU tables with the metadata, and can generate various reports and notifications. This package contains the following sub-packages: module.report.otu contains modules designed to produce or process otu tables. module.report.taxa contains modules designed to produce or process taxa tables. module.report.r contains modules that use R to generate statistics and/or visualizations. module.report.humann2 contains modules designed to produce or process pathway tables.","title":"Report Package"},{"location":"module/report/module.report/#email","text":"#BioModule biolockj.module.report.Email Description: Notify user pipeline is complete by emailing out the pipeline summary. Options: mail.encryptedPassword mail.from mail.smtp.auth mail.smtp.host mail.smtp.port mail.smtp.starttls.enable mail.to","title":"Email"},{"location":"module/report/module.report/#jsonreport","text":"#BioModule biolockj.module.report.JsonReport Description: This module builds a JSON file from the ParserModule output. Options: report.logBase report.taxonomyLevels","title":"JsonReport"},{"location":"module/report/module.report.otu/","text":"OTU report modules # Modules in the biolockj.module.report sub-pakcage normalize ParserModule output, merge the OTU tables with the metadata, or process OTU tables. CompileOtuCounts # #BioModule biolockj.module.report.otu.CompileOtuCounts Description: Compiles the counts from all OTU count files into a single summary OTU count file containing OTU counts for the entire dataset. Options: none RarefyOtuCounts # #BioModule biolockj.module.report.otu.RarefyOtuCounts Description: Applies a mean iterative post-OTU classification rarefication algorithm so that each output sample will have approximately the same number of OTUs. Options: rarefyOtuCounts.iterations rarefyOtuCounts.lowAbundantCutoff rarefyOtuCounts.quantile rarefyOtuCounts.removeSamplesBelowQuantile RemoveLowOtuCounts # #BioModule biolockj.module.report.otu.RemoveLowOtuCounts Description: Removes OTUs with counts below report.minCount . Options: report.minCount report.numHits RemoveScarceOtuCounts # #BioModule biolockj.module.report.otu.RemoveScarceOtuCounts Description: Removes OTUs that are not found in enough samples. Options: report.scarceCountCutoff","title":"OTU report modules"},{"location":"module/report/module.report.otu/#otu-report-modules","text":"Modules in the biolockj.module.report sub-pakcage normalize ParserModule output, merge the OTU tables with the metadata, or process OTU tables.","title":"OTU report modules"},{"location":"module/report/module.report.otu/#compileotucounts","text":"#BioModule biolockj.module.report.otu.CompileOtuCounts Description: Compiles the counts from all OTU count files into a single summary OTU count file containing OTU counts for the entire dataset. Options: none","title":"CompileOtuCounts"},{"location":"module/report/module.report.otu/#rarefyotucounts","text":"#BioModule biolockj.module.report.otu.RarefyOtuCounts Description: Applies a mean iterative post-OTU classification rarefication algorithm so that each output sample will have approximately the same number of OTUs. Options: rarefyOtuCounts.iterations rarefyOtuCounts.lowAbundantCutoff rarefyOtuCounts.quantile rarefyOtuCounts.removeSamplesBelowQuantile","title":"RarefyOtuCounts"},{"location":"module/report/module.report.otu/#removelowotucounts","text":"#BioModule biolockj.module.report.otu.RemoveLowOtuCounts Description: Removes OTUs with counts below report.minCount . Options: report.minCount report.numHits","title":"RemoveLowOtuCounts"},{"location":"module/report/module.report.otu/#removescarceotucounts","text":"#BioModule biolockj.module.report.otu.RemoveScarceOtuCounts Description: Removes OTUs that are not found in enough samples. Options: report.scarceCountCutoff","title":"RemoveScarceOtuCounts"},{"location":"module/report/module.report.r/","text":"R Report Modules # Modules in the biolockj.module.report.r sub-package generate the statistical analysis and visualizations by executing R scripts. The statistical analysis is performed on the taxa abundance tables generated by AddMetadataToTaxaTables. R_Module # cannot be included in the pipeline run order Description: Abstract implementation of ScriptModule that other R modules extend to inherit standard R script functionality. Abstract modules cannot be included in the pipeline run order. Options: exe.rScript r.debug r.nominalFields r.numericFields r.rareOtuThreshold r.reportFields r.saveRData r.timeout report.numHits report.numReads report.taxonomyLevel R_CalculateStats # #BioModule biolockj.module.report.r.R_CalculateStats Description: Generate a summary statistics table with [adjusted and unadjusted] [parameteric and non-parametirc] p-values and r 2 values for each reportable metadata field and each report.taxonomyLevel configured. Options: r_CalculateStats.pAdjustMethod r_CalculateStats.pAdjustScope R_PlotEffectSize # #BioModule biolockj.module.report.r.R_PlotEffectSize Description: Generate horizontal barplot representing effect size (Cohen's d, r 2 , and/or fold change) for each reportable metadata field and each report.taxonomyLevel configured. Options: r_PlotEffectSize.parametricPval r_PlotEffectSize.disablePvalAdj r_PlotEffectSize.excludePvalAbove r_PlotEffectSize.taxa r_PlotEffectSize.maxNumTaxa r_PlotEffectSize.disableCohensD r_PlotEffectSize.disableRSquared r_PlotEffectSize.disableFoldChange r.colorHighlight r.pvalCutoff R_PlotMds # #BioModule biolockj.module.report.r.R_PlotMds Description: Generate sets of multidimensional scaling plots showing 2 axes at a time (up to the < r_PlotMds.numAxis >th axis) with color coding based on each categorical metadata field (default) or by each field given in r_PlotMds.reportFields . Options: r_PlotMds.numAxis r_PlotMds.reportFields r_PlotMds.distance r.colorPalette r.colorPoint r.pch r.pvalCutoff r.pValFormat R_PlotOtus # #BioModule biolockj.module.report.r.R_PlotOtus Description: Generate OTU-metadata box-plots and scatter-plots for each reportable metadata field and each report.taxonomyLevel configured Options: r.colorBase r.colorHighlight r.colorPalette r.colorPoint r.pch r.pvalCutoff r.rareOtuThreshold r.pValFormat R_PlotPvalHistograms # #BioModule biolockj.module.report.r.R_PlotPvalHistograms Description: Generate p-value histograms for each reportable metadata field and each report.taxonomyLevel configured Options: r.pvalCutoff","title":"R Report Modules"},{"location":"module/report/module.report.r/#r-report-modules","text":"Modules in the biolockj.module.report.r sub-package generate the statistical analysis and visualizations by executing R scripts. The statistical analysis is performed on the taxa abundance tables generated by AddMetadataToTaxaTables.","title":"R Report Modules"},{"location":"module/report/module.report.r/#r_module","text":"cannot be included in the pipeline run order Description: Abstract implementation of ScriptModule that other R modules extend to inherit standard R script functionality. Abstract modules cannot be included in the pipeline run order. Options: exe.rScript r.debug r.nominalFields r.numericFields r.rareOtuThreshold r.reportFields r.saveRData r.timeout report.numHits report.numReads report.taxonomyLevel","title":"R_Module"},{"location":"module/report/module.report.r/#r_calculatestats","text":"#BioModule biolockj.module.report.r.R_CalculateStats Description: Generate a summary statistics table with [adjusted and unadjusted] [parameteric and non-parametirc] p-values and r 2 values for each reportable metadata field and each report.taxonomyLevel configured. Options: r_CalculateStats.pAdjustMethod r_CalculateStats.pAdjustScope","title":"R_CalculateStats"},{"location":"module/report/module.report.r/#r_ploteffectsize","text":"#BioModule biolockj.module.report.r.R_PlotEffectSize Description: Generate horizontal barplot representing effect size (Cohen's d, r 2 , and/or fold change) for each reportable metadata field and each report.taxonomyLevel configured. Options: r_PlotEffectSize.parametricPval r_PlotEffectSize.disablePvalAdj r_PlotEffectSize.excludePvalAbove r_PlotEffectSize.taxa r_PlotEffectSize.maxNumTaxa r_PlotEffectSize.disableCohensD r_PlotEffectSize.disableRSquared r_PlotEffectSize.disableFoldChange r.colorHighlight r.pvalCutoff","title":"R_PlotEffectSize"},{"location":"module/report/module.report.r/#r_plotmds","text":"#BioModule biolockj.module.report.r.R_PlotMds Description: Generate sets of multidimensional scaling plots showing 2 axes at a time (up to the < r_PlotMds.numAxis >th axis) with color coding based on each categorical metadata field (default) or by each field given in r_PlotMds.reportFields . Options: r_PlotMds.numAxis r_PlotMds.reportFields r_PlotMds.distance r.colorPalette r.colorPoint r.pch r.pvalCutoff r.pValFormat","title":"R_PlotMds"},{"location":"module/report/module.report.r/#r_plototus","text":"#BioModule biolockj.module.report.r.R_PlotOtus Description: Generate OTU-metadata box-plots and scatter-plots for each reportable metadata field and each report.taxonomyLevel configured Options: r.colorBase r.colorHighlight r.colorPalette r.colorPoint r.pch r.pvalCutoff r.rareOtuThreshold r.pValFormat","title":"R_PlotOtus"},{"location":"module/report/module.report.r/#r_plotpvalhistograms","text":"#BioModule biolockj.module.report.r.R_PlotPvalHistograms Description: Generate p-value histograms for each reportable metadata field and each report.taxonomyLevel configured Options: r.pvalCutoff","title":"R_PlotPvalHistograms"}]}
\ No newline at end of file
diff --git a/docs/sitemap.xml.gz b/docs/sitemap.xml.gz
index df5a923c9618e2c1a486147db559ad6b6549d0e9..830740b65f26a9657badeeea01553aab0e529025 100644
GIT binary patch
delta 15
Wcmcb?c!QB$zMF%?S&(%i`$YgFGz2yP

delta 15
Wcmcb?c!QB$zMF$1_yWsB_KN@~O9cb~

diff --git a/mkdocs/user-guide/docs/index.md b/mkdocs/user-guide/docs/index.md
index f5c35bc8a..cbc5ff39d 100644
--- a/mkdocs/user-guide/docs/index.md
+++ b/mkdocs/user-guide/docs/index.md
@@ -40,7 +40,7 @@
 
 ## Links for Developers
 
-[BioLockJ Developers Guide](../DevNotes-main)
+[BioLockJ Developers Guide](DevNotes-main)
 
 Repository of functional tests           
 [https://github.com/BioLockJ-Dev-Team/sheepdog_testing_suite](https://github.com/BioLockJ-Dev-Team/sheepdog_testing_suite)