Merge pull request #231 from ICB-DCM/develop

Release 0.4.2 * Improve parallel scaling by better message handling * Remove unreliable auto-detection of launch via mpi launcher, use --mpi instead * Allow setting integration retries / tolerance relaxation via environment variables PARPE_NUM_SIMULATION_TRIALS and PARPE_INTEGRATION_TOLERANCE_RELAXATION_FACTOR * CMake: enable use of PACKAGE_ROOT * Minor fixes
ICB-DCM · Jan 28, 2020 · 40432be · 40432be
2 parents 1ce7004 + b37dc30
commit 40432be
Show file tree

Hide file tree

Showing 21 changed files with 386 additions and 158 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,6 +1,11 @@
 cmake_minimum_required(VERSION 3.7)
 cmake_policy(VERSION 3.7)
 
+if(POLICY CMP0074)
+  # Use package_ROOT environment variables
+  cmake_policy(SET CMP0074 NEW)
+endif(POLICY CMP0074)
+
 project(parpe)
 
 set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/CMakeModules/)
@@ -44,9 +49,9 @@ set_property(CACHE BLAS PROPERTY STRINGS "CBLAS" "MKL")
 set(PARPE_ENABLE_MPI TRUE CACHE BOOL "Use MPI?")
 set(IPOPT_DIR "" CACHE PATH "IpOpt root directory")
 set(IPOPT_INCLUDE_DIRS "${IPOPT_DIR}/include/coin/"  CACHE PATH "IpOpt include directory")
-set(IPOPT_LIBRARIES ${IPOPT_DIR}/lib/libipopt.a ${IPOPT_DIR}/lib/libcoinhsl.a gfortran CACHE STRINGS "IpOpt library")
+set(IPOPT_LIBRARIES ${IPOPT_DIR}/lib/libipopt.a ${IPOPT_DIR}/lib/libcoinhsl.a gfortran CACHE STRING "IpOpt library")
 set(CERES_INCLUDE_DIRS "" "/usr/include/eigen3" CACHE PATH "CERES include directories")
-set(CERES_LIBRARIES "" CACHE STRINGS "CERES libraries")
+set(CERES_LIBRARIES "" CACHE STRING "CERES libraries")
 set(PARPE_ENABLE_IPOPT TRUE CACHE BOOL "Enable ipopt optimizer?")
 set(PARPE_ENABLE_CERES TRUE CACHE BOOL "Enable ceres optimizer?")
 set(PARPE_ENABLE_DLIB FALSE CACHE BOOL "Enable dlib optimizers?")

diff --git a/README.md b/README.md
@@ -1,7 +1,8 @@
 [![Run Status](https://api.shippable.com/projects/59463d3e8993d7070010407b/badge?branch=master)](https://app.shippable.com/github/dweindl/parPE)
 [![Coverage Badge](https://api.shippable.com/projects/59463d3e8993d7070010407b/coverageBadge?branch=master)](https://app.shippable.com/github/dweindl/parPE)
 [![Codacy Badge](https://api.codacy.com/project/badge/Grade/1f1ee5a0d90d431499f200a148fb7fdc)](https://www.codacy.com?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=ICB-DCM/parPE&amp;utm_campaign=Badge_Grade)
-[![DOI](https://zenodo.org/badge/92953596.svg)](https://zenodo.org/badge/latestdoi/92953596)
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3478612.svg)](https://doi.org/10.5281/zenodo.3478612)
+
 
 # parPE
 

diff --git a/deps/AMICI/ThirdParty/sundials/README.md b/deps/AMICI/ThirdParty/sundials/README.md
@@ -114,7 +114,7 @@ reporting work done with SUNDIALS:
 * Alan C. Hindmarsh, Peter N. Brown, Keith E. Grant, Steven L. Lee, Radu
 Serban, Dan E. Shumaker, and Carol S. Woodward. 2005. SUNDIALS: Suite of
 nonlinear and differential/algebraic equation solvers. ACM Trans. Math. Softw.
-31, 3 (September 2005), 363-396. DOI=http://dx.doi.org/10.1145/1089014.1089020
+31, 3 (September 2005), 363-396. DOI=https://doi.org/10.1145/1089014.1089020
 
 ## License ##
 SUNDIALS is released under the BSD 3-clause license. See the [LICENSE](./LICENSE)

diff --git a/deps/AMICI/documentation/amici_refs.bib b/deps/AMICI/documentation/amici_refs.bib
@@ -300,7 +300,7 @@ @article{LoosMoe2018
 	Title = {A Hierarchical, Data-Driven Approach to Modeling Single-Cell Populations Predicts Latent Causes of Cell-To-Cell Variability},
 	Volume = {6},
 	Year = {2018},
-	Bdsk-Url-1 = {http://dx.doi.org/10.1016/j.cels.2018.04.008}}
+	Bdsk-Url-1 = {https://doi.org/10.1016/j.cels.2018.04.008}}
 
 @article{MaierLoo2017,
 	Author = {Maier, C. and Loos, C. and Hasenauer, J.},
@@ -457,7 +457,7 @@ @Article{SchmiesterSch2019
   year     = {2019},
   month    = {07},
   issn     = {1367-4803},
-  abstract = {{Mechanistic models of biochemical reaction networks facilitate the quantitative understanding of biological processes and the integration of heterogeneous datasets. However, some biological processes require the consideration of comprehensive reaction networks and therefore large-scale models. Parameter estimation for such models poses great challenges, in particular when the data are on a relative scale.Here, we propose a novel hierarchical approach combining (i) the efficient analytic evaluation of optimal scaling, offset, and error model parameters with (ii) the scalable evaluation of objective function gradients using adjoint sensitivity analysis. We evaluate the properties of the methods by parameterizing a pan-cancer ordinary differential equation model (\\&gt;1000 state variables, \\&gt;4000 parameters) using relative protein, phospho-protein and viability measurements. The hierarchical formulation improves optimizer performance considerably. Furthermore, we show that this approach allows estimating error model parameters with negligible computational overhead when no experimental estimates are available, providing an unbiased way to weight heterogeneous data. Overall, our hierarchical formulation is applicable to a wide range of models, and allows for the efficient parameterization of large-scale models based on heterogeneous relative measurements.Supplementary information are available at Bioinformatics online. Supplementary code and data are available online at http://doi.org/10.5281/zenodo.3254429 and http://doi.org/10.5281/zenodo.3254441.}},
+  abstract = {{Mechanistic models of biochemical reaction networks facilitate the quantitative understanding of biological processes and the integration of heterogeneous datasets. However, some biological processes require the consideration of comprehensive reaction networks and therefore large-scale models. Parameter estimation for such models poses great challenges, in particular when the data are on a relative scale.Here, we propose a novel hierarchical approach combining (i) the efficient analytic evaluation of optimal scaling, offset, and error model parameters with (ii) the scalable evaluation of objective function gradients using adjoint sensitivity analysis. We evaluate the properties of the methods by parameterizing a pan-cancer ordinary differential equation model (\\&gt;1000 state variables, \\&gt;4000 parameters) using relative protein, phospho-protein and viability measurements. The hierarchical formulation improves optimizer performance considerably. Furthermore, we show that this approach allows estimating error model parameters with negligible computational overhead when no experimental estimates are available, providing an unbiased way to weight heterogeneous data. Overall, our hierarchical formulation is applicable to a wide range of models, and allows for the efficient parameterization of large-scale models based on heterogeneous relative measurements.Supplementary information are available at Bioinformatics online. Supplementary code and data are available online at https://doi.org/10.5281/zenodo.3254429 and https://doi.org/10.5281/zenodo.3254441.}},
   doi      = {10.1093/bioinformatics/btz581},
   eprint   = {http://oup.prod.sis.lan/bioinformatics/advance-article-pdf/doi/10.1093/bioinformatics/btz581/29004243/btz581.pdf},
   url      = {https://doi.org/10.1093/bioinformatics/btz581},

diff --git a/doc/optimizationApplication.md b/doc/optimizationApplication.md
@@ -61,3 +61,10 @@ Run the created executable with the `-h`/`--help` argument.
 
   Note: These variables have no effect in case of shared-memory (non-MPI) execution
 
+- **PARPE_NUM_SIMULATION_TRIALS** (integer) and
+  **PARPE_INTEGRATION_TOLERANCE_RELAXATION_FACTOR** (float)
+
+  In case of simulation failure, parPE try rerunning an AMICI simulation with a
+  `PARPE_INTEGRATION_TOLERANCE_RELAXATION_FACTOR`-fold higher error tolerance
+  for a total of `PARPE_NUM_SIMULATION_TRIALS` times (including the initial
+  attempt).
diff --git a/doc/snakemake_workflow.md b/doc/snakemake_workflow.md
@@ -23,7 +23,7 @@ provided in `snakemake/config.schema.yaml`.
 After that you can run the full pipeline with:
 
     cd snakemake
-    snakemake --configfile parpe_optimize_petab_steadystate.yaml postprocess
+    snakemake --configfile parpe_optimize_petab_steadystate.yaml -- postprocess
 
 This generate C++ code of the model, build model specific binaries for
 parameter estimation, run parameters, and process the results.

diff --git a/examples/parpeamici/steadystate/main_simulator.cpp b/examples/parpeamici/steadystate/main_simulator.cpp
@@ -1,22 +1,54 @@
+#include <parpecommon/parpeConfig.h>
+
 #include "steadyStateMultiConditionDataprovider.h"
 
 #include <parpeamici/standaloneSimulator.h>
 #include <parpecommon/misc.h>
+#include <parpecommon/parpeConfig.h>
 
 #include <cstdio> // remove
 #include <iostream>
+#include <stdexcept>
+
+#ifdef PARPE_ENABLE_MPI
+#include <mpi.h>
+#endif
 
 std::unique_ptr<amici::Model> getModel();
 
+void printUsage() {
+    std::cerr<<"Error: wrong number of arguments.\n";
+    std::cerr<<"Usage: ... CONDITION_FILE_NAME CONDITION_FILE_PATH "
+               "[PARAMETER_FILE_NAME PARAMETER_FILE_PATH] "
+               "OUTFILENAME OUTFILEPATH "
+               "--at-optimum|--along-trajectory "
+               "--mpi|--nompi\n";
+    // |--parameter-matrix=PATH-UNSUPPORTED
+}
+
 int main(int argc, char **argv) {
     int status = EXIT_SUCCESS;
 
-    parpe::initMpiIfNeeded(&argc, &argv);
+    if(argc != 7 && argc != 9) {
+        printUsage();
+        return EXIT_FAILURE;
+    }
+
+
+    if(std::string(argv[argc -1]) == "--mpi") {
+#ifdef PARPE_ENABLE_MPI
+        MPI_Init(&argc, &argv);
+#else
+        throw std::runtime_error("parPE was built without MPI support.");
+#endif
+    } else if(std::string(argv[argc -1]) == "--nompi") {
+        ;
+    } else {
+        printUsage();
+        return EXIT_FAILURE;
+    }
 
-    switch(argc)
-    {
-    case 6:
-    {
+    if(argc == 7) {
         std::string dataFileName = argv[1];
         std::string dataFilePath = argv[2];
         std::string resultFileName = argv[3];
@@ -32,10 +64,7 @@ int main(int argc, char **argv) {
                                      dataFileName, dataFilePath,
                                      dataFileName, dataFilePath,
                                      resultFileName, resultPath);
-        break;
-    }
-    case 8:
-    {
+    } else if(argc == 9) {
         // simulate on test set: need optimizer result and test set data as inputs
         std::string conditionFileName = argv[1];
         std::string conditionFilePath = argv[2];
@@ -66,12 +95,6 @@ int main(int argc, char **argv) {
                                      conditionFileName, conditionFilePath,
                                      parameterFileName, parameterFilePath,
                                      resultFileName, resultPath);
-        break;
-    }
-    default:
-        std::cerr<<"Error: wrong number of arguments.\n";
-        std::cerr<<"Usage: ... CONDITION_FILE_NAME CONDITION_FILE_PATH [PARAMETER_FILE_NAME PARAMETER_FILE_PATH] OUTFILENAME OUTFILEPATH --at-optimum|--along-trajectory\n"; // |--parameter-matrix=PATH-UNSUPPORTED
-        status = EXIT_FAILURE;
     }
 
     parpe::finalizeMpiIfNeeded();

diff --git a/examples/parpeamici/steadystate/run-examples.sh b/examples/parpeamici/steadystate/run-examples.sh
@@ -34,7 +34,7 @@ rm -rf example_steadystate_multi-test-optimize/
 rm -f simulate1.h5
 ./example_steadystate_multi_simulator \
   example_steadystate_multi-test-optimize/_rank00000.h5 / simulate1.h5 / \
-  --at-optimum 2>&1 > test.log
+  --at-optimum --nompi 2>&1 > test.log
 (! grep ERR test.log)
 (! grep WRN test.log)
 (! grep exception test.log)
@@ -48,7 +48,7 @@ test -f simulate1.h5
 # Run optimization with default settings
 
 rm -rf example_steadystate_multi-test-optimize/
-${MPIEXEC} ./example_steadystate_multi \
+${MPIEXEC} ./example_steadystate_multi --mpi \
   -o example_steadystate_multi-test-optimize/ ${HDF5_FILE} 2>&1 >> test.log
 (! grep ERR test.log)
 (! grep WRN test.log)
@@ -58,7 +58,7 @@ ${MPIEXEC} ./example_steadystate_multi \
 rm -f simulate2.h5
 ${MPIEXEC} ./example_steadystate_multi_simulator \
   example_steadystate_multi-test-optimize/_rank00000.h5 / simulate2.h5 / \
-  --along-trajectory 2>&1 >> test.log
+  --along-trajectory --mpi 2>&1 >> test.log
 (! grep ERR test.log)
 (! grep WRN test.log)
 (! grep exception test.log)
@@ -67,11 +67,10 @@ test -f simulate2.h5
 
 # Simulate on test set
 
-
 rm -f simulate3.h5
 ${MPIEXEC} ./example_steadystate_multi_simulator \
   ${HDF5_FILE_TEST} / example_steadystate_multi-test-optimize/_rank00000.h5 / \
-  simulate3.h5 / --at-optimum
+  simulate3.h5 / --at-optimum --mpi
 h5dump -d /multistarts/0/ySim/3 simulate3.h5 # test dataset exists
 (! grep ERR test.log)
 (! grep WRN test.log)

diff --git a/include/parpeamici/optimizationApplication.h b/include/parpeamici/optimizationApplication.h
@@ -35,7 +35,8 @@ class OptimizationApplication {
 
     /**
      * @brief User-provided problem initialization.
-     * Must set OptimizationApplication::problem, OptimizationApplication::multiStartOptimization and should set
+     * Must set OptimizationApplication::problem,
+     * OptimizationApplication::multiStartOptimization and should set
      * OptimizationApplication::resultWriter
      * @param inFileArgument
      * @param outFileArgument
@@ -45,14 +46,15 @@ class OptimizationApplication {
 
     /**
      * @brief Start the optimization run. Must only be called once.
+     * Initializes MPI if not already done.
+     * Must be called before any other functions.
      * @return status code; 0 on success
      */
     int run(int argc, char **argv);
 
     /**
      * @brief This is run by the MPI rank 0 process when started with multiple
      * processes.
-     * @return
      */
     virtual void runMaster();
 
@@ -105,14 +107,26 @@ class OptimizationApplication {
     static void initMPI(int *argc, char ***argv);
 
     /**
-     * @brief Parse command line Options.
-     * Must be called before any other functions.
-     * Initializes MPI if not already done.
+     * @brief Parse command line options before MPI_INIT is potentially called.
+     *
+     * Used e.g. to print usage information without first initialization MPI.
+     *
+     * Argv may contain extra MPI arguments.
+     * @param argc
+     * @param argv
+     * @return
+     */
+    virtual int parseCliOptionsPreMpiInit(int argc, char **argv);
+
+    /**
+     * @brief Parse command line options after MPI_Init is called.
+     *
+     * Any MPI-related CLI arguments will be removed here.
      * @param argc
      * @param argv
      * @return
      */
-    virtual int parseOptions(int argc, char **argv);
+    virtual int parseCliOptionsPostMpiInit(int argc, char **argv);
 
     /**
      * @brief Print CLI usage
@@ -134,12 +148,13 @@ class OptimizationApplication {
 
 protected:
     // command line option parsing
-    const char *shortOptions = "dhvt:o:s:";
-    struct option const longOptions[8] = {
+    const char *shortOptions = "dhvmt:o:s:";
+    struct option const longOptions[9] = {
         {"debug", no_argument, NULL, 'd'},
         {"print-worklist", no_argument, NULL, 'p'},
         {"help", no_argument, NULL, 'h'},
         {"version", no_argument, NULL, 'v'},
+        {"mpi", no_argument, NULL, 'm'},
         {"task", required_argument, NULL, 't'},
         {"outfile-prefix", required_argument, NULL, 'o'},
         {"first-start-idx", required_argument, NULL, 's'},
@@ -158,9 +173,10 @@ class OptimizationApplication {
     // the need to be filled in by sub
     std::unique_ptr<MultiStartOptimizationProblem> multiStartOptimizationProblem;
     std::unique_ptr<OptimizationProblem> problem;
-    hid_t file_id;
+    hid_t file_id = 0;
     OperationType operationType = OperationType::parameterEstimation;
     LoadBalancerMaster loadBalancer;
+    bool withMPI = false;
 };
 
 

diff --git a/include/parpecommon/misc.h b/include/parpecommon/misc.h
@@ -112,6 +112,14 @@ void fillArrayRandomDoubleSameInterval(double min, double max, gsl::span<double>
 int getMpiRank();
 int getMpiCommSize();
 int getMpiActive();
+
+/**
+ * @brief Was application launched by mpiexec?
+ *
+ * Make an educated guess if the application was launched with mpiexec
+ * or similar and therefore require MPI_INIT.
+ * @return True if probably launched by mpiexec
+ */
 bool launchedWithMpi();
 
 void initMpiIfNeeded(int *argc, char ***argv);