From 0d14338a48e17e90713e29c217e101779aea588e Mon Sep 17 00:00:00 2001
From: odunbar <odunbar@caltech.edu>
Date: Wed, 15 Nov 2023 13:26:51 -0800
Subject: [PATCH] lorenz docs

---
 .../emulators/lorenz_integrator_3d_3d.md      | 147 +++++++++++++++++-
 .../examples/emulators/regression_2d_2d.md    |   2 +-
 examples/Emulator/L63/emulate.jl              |  62 ++++----
 3 files changed, 178 insertions(+), 33 deletions(-)

diff --git a/docs/src/examples/emulators/lorenz_integrator_3d_3d.md b/docs/src/examples/emulators/lorenz_integrator_3d_3d.md
index b8a13aa4..143704a9 100644
--- a/docs/src/examples/emulators/lorenz_integrator_3d_3d.md
+++ b/docs/src/examples/emulators/lorenz_integrator_3d_3d.md
@@ -1,7 +1,150 @@
 # Integrating Lorenz 63 with an Emulated integrator
 
+In this example, we assess directly the performance of our machine learning emulators. The task is to learn the forward Euler integrator of a [Lorenz 63 system](https://en.wikipedia.org/wiki/Lorenz_system). The model parameters are set to their classical settings ``(\sigma, \rho, \beta) = (10,28,\frac{8}{3})`` to exhibit chaotic behavior. The discrete system is given as:
+
+```math
+\begin{aligned}
+x(t_{n+1}) &= x(t_n) + \Delta t(y(t_n) - x(t_n))\\
+y(t_{n+1}) &= y(t_n) + \Delta t(x(t_n)(28 - z(t_n)) - y(t_n))\\
+z(t_{n+1}) &= z(t_n) + \Delta t(x(t_n)y(t_n) - \frac{8}{3}z(t_n))
+\end{aligned}
+```
+where ``t_n = n\Delta t``. The data consists of pairs ``\{ (x(t_k),y(t_k),z(t_k)), (x(t_{k+1}),y(t_{k+1}),z(t_{k+1})+\eta_k\}`` for 600 values of ``k``, with each output subjected to independent, additive Gaussian noise ``\eta_k\sim N(0,\Gamma_y)``.
+
+We have several different emulator configurations in this example that the user can play around with. The goal of the emulator is that the posterior mean will approximate this discrete update map, or integrator, for any point on the Lorenz attractor from the sparse noisy data. To validate this, we recursively apply the trained emulator to the state, plotting the evolution of the trajectory and marginal statistics of the states over short and long timescales. We include a repeats option (`n_repeats`) to run the randomized training for multiple trials and illustrate robustness of marginal statistics by plotting long time marginal cdfs of the state. 
+
+We will term scalar-output Gaussin process emulator as "GP", and scalar- or vector-output random feature emulator as "RF".
+
+## Walkthrough of the code
+
+We first import some standard packages
+```julia
+using Random, Distributions, LinearAlgebra # utilities
+using CairoMakie, ColorSchemes # for plots
+using JLD2 # for saved data
+```
+For the true integrator of the Lorenz system we import
+```julia
+using OrdinaryDiffEq 
+```
+For relevant CES packages needed to define the emulators, packages and kernel structures
+```julia
+using CalibrateEmulateSample.Emulators
+# Contains `Emulator`, `GaussianProcess`, `ScalarRandomFeatureInterface`, `VectorRandomFeatureInterface`
+# `GPJL`, `SeparablKernel`, `NonSeparableKernel`, `OneDimFactor`, `LowRankFactor`, `DiagonalFactor`
+using CalibrateEmulateSample.DataContainers # Contains `PairedDataContainer`
+```
+and if one wants to play with optimizer options for the random feature emulators we import
+```julia 
+using CalibrateEmulateSample.EnsembleKalmanProcesses # Contains `DataMisfitController`
+```
+
+We generate the truth data using `OrdinaryDiffEq`: the time series `sol` is used for training data, `sol_test` is used for plotting short time trajectories, and `sol_hist` for plotting histograms of the state over long times:
+```julia
+# Run L63 from 0 -> tmax
+u0 = [1.0; 0.0; 0.0]
+tmax = 20
+dt = 0.01
+tspan = (0.0, tmax)
+prob = ODEProblem(lorenz, u0, tspan)
+sol = solve(prob, Euler(), dt = dt)
+
+# Run L63 from end for test trajectory data
+tmax_test = 100
+tspan_test = (0.0, tmax_test)
+u0_test = sol.u[end]
+prob_test = ODEProblem(lorenz, u0_test, tspan_test)
+sol_test = solve(prob_test, Euler(), dt = dt)
+
+# Run L63 from end for histogram matching data
+tmax_hist = 1000
+tspan_hist = (0.0, tmax_hist)
+u0_hist = sol_test.u[end]
+prob_hist = ODEProblem(lorenz, u0_hist, tspan_hist)
+sol_hist = solve(prob_hist, Euler(), dt = dt)
+```
+
+We generate the training data from `sol` within `[tburn, tmax]`. The user has the option of how many training points to take `n_train_pts` and whether these are selected randomly or sequentially (`sample_rand`). The true outputs are perturbed by noise of variance `1e-4` and pairs are stored in the compatible data format `PairedDataContainer`
+```julia
+tburn = 1 # NB works better with no spin-up!
+burnin = Int(floor(tburn / dt))
+n_train_pts = 600 
+sample_rand = true
+if sample_rand
+   ind = Int.(shuffle!(rng, Vector(burnin:(tmax / dt - 1)))[1:n_train_pts])
+else
+   ind = burnin:(n_train_pts + burnin)
+end
+n_tp = length(ind)
+input = zeros(3, n_tp)
+output = zeros(3, n_tp)
+Γy = 1e-4 * I(3)
+noise = rand(rng, MvNormal(zeros(3), Γy), n_tp)
+for i in 1:n_tp
+    input[:, i] = sol.u[ind[i]]
+    output[:, i] = sol.u[ind[i] + 1] + noise[:, i]
+end
+iopairs = PairedDataContainer(input, output)
+```
+We have several cases the user can play with,
+```julia
+cases = ["GP", "RF-scalar", "RF-scalar-diagin", "RF-svd-nonsep", "RF-nosvd-nonsep", "RF-nosvd-sep"]
+```
+Then, looping over the repeats, we first define some common hyperparamter optimization options for the `"RF-"` cases. In this case, the options are used primarily for diagnostics and acceleration (not required in general to solve this problem) 
+```julia
+rf_optimizer_overrides = Dict(
+    "verbose" => true, # output diagnostics from the hyperparameter optimizer
+    "scheduler" => DataMisfitController(terminate_at = 1e4), # timestepping method for the optimizer
+    "cov_sample_multiplier" => 0.5, # 0.5*default number of samples to estimate covariances in optimizer
+    "n_features_opt" => 200, # number of features during hyperparameter optimization
+    "n_iteration" => 20, # iterations of the optimizer solver
+)
+```
+Then we build the machine learning tools. Here we highlight scalar-output Gaussian process (`GP`), where we use the default squared-exponential kernel, and learn a lengthscale hyperparameter in each input dimension. To handle multiple outputs, we will use a decorrelation in the output space, and so will actually train three of these models.
+```julia
+gppackage = Emulators.GPJL() # use GaussianProcesses.jl
+pred_type = Emulators.YType() # predicted variances are for data not latent function
+mlt = GaussianProcess(
+    gppackage;
+    prediction_type = pred_type,
+    noise_learn = false, # do not additionally learn a white kernel
+)
+```
+we also highlight the Vector Random Feature with nonseparable kernel (`RF-nosvd-nonsep`), this can natively handle multiple outputs without decorrelation of the output space. This kernel is a rank-3 representation with small nugget term.
+```julia
+nugget = 1e-12
+kernel_structure = NonseparableKernel(LowRankFactor(3, nugget))
+n_features = 500 # number of features for prediction
+mlt = VectorRandomFeatureInterface(
+    n_features,
+    3, # input dimension
+    3, # output dimension
+    rng = rng, # pin random number generator
+    kernel_structure = kernel_structure,
+    optimizer_options = rf_optimizer_overrides, 
+)           
+```
+With machine learning tools specified, we build the emulator object
+```julia
+# decorrelate = true for `GP`
+# decorrelate = false for `RF-nosvd-nonsep`
+emulator = Emulator(mlt, iopairs; obs_noise_cov = Γy, decorrelate = decorrelate) 
+optimize_hyperparameters!(emulator) # some GP packages require this additional call 
+```
+
 ## Plots
-Here are a selection of the predicted 
+
+We predict the trained emulator mean, over the short-timescale validation trajectory
+```julia
+u_test_tmp = zeros(3, length(xspan_test))
+u_test_tmp[:, 1] = sol_test.u[1] # initialize at the final-time solution of the training period
+
+for i in 1:(length(xspan_test) - 1)
+    rf_mean, _ = predict(emulator, u_test_tmp[:, i:i], transform_to_real = true) # 3x1 matrix
+    u_test_tmp[:, i + 1] = rf_mean
+end
+```
+The other trajectories are similar. We then produce the following plots. In all figures, the results from evolving the state with the true integrator is orange, and with the emulated integrators are blue.
 
 ### Gaussian Process Emulator (Sci-kit learn: `GP`)
 For one example fit
@@ -17,7 +160,7 @@ For one example fit
 <img src="../../assets/RF-nosvd-nonsep_l63_attr.png" width="300"><img src="../../assets/RF-nosvd-nonsep_l63_pdf.png" width="300">
 ```
 
-We also The CDFs over 20 randomized fits of the random feature optimization
+and here are CDFs over 20 randomized trials of the random feature hyperparameter optimization
 ```@raw html
 <img src="../../assets/RF-nosvd-nonsep_l63_cdfs.png" width="600">
 ```
diff --git a/docs/src/examples/emulators/regression_2d_2d.md b/docs/src/examples/emulators/regression_2d_2d.md
index 16e07247..8b1df23a 100644
--- a/docs/src/examples/emulators/regression_2d_2d.md
+++ b/docs/src/examples/emulators/regression_2d_2d.md
@@ -7,7 +7,7 @@ G\colon [0,2\pi]^2 \to \mathbb{R}^2, G(x_1,x_2) = (\sin(x_1) + \cos(x_2), \sin(x
 ```
 observed at 150 points, subject to additive (and possibly correlated) Gaussian noise ``N(0,\Sigma)``.
 
-We have many different emulator configurations in this example that the user can play around with. The goal of this example is to predict the function (i.e. posterior mean) and uncertainty (i.e posterior pointwise variance) on a ``200\times200`` grid providing a mean square error between emulated and true function and  with `plot_flag = true` we also save several plots.
+We have several different emulator configurations in this example that the user can play around with. The goal of this example is to predict the function (i.e. posterior mean) and uncertainty (i.e posterior pointwise variance) on a ``200\times200`` grid providing a mean square error between emulated and true function and  with `plot_flag = true` we also save several plots.
 
 We will term scalar-output Gaussin process emulator as "GP", scalar-output random feature emulator as "scalar RF", and vector-output random feature emulator as "vector RF" henceforth.
 ## Walkthrough of the code
diff --git a/examples/Emulator/L63/emulate.jl b/examples/Emulator/L63/emulate.jl
index e040901a..c2019ca7 100644
--- a/examples/Emulator/L63/emulate.jl
+++ b/examples/Emulator/L63/emulate.jl
@@ -5,13 +5,9 @@ using CairoMakie, ColorSchemes #for plots
 using JLD2
 
 # CES 
-using CalibrateEmulateSample
 using CalibrateEmulateSample.Emulators
-using CalibrateEmulateSample.ParameterDistributions
 using CalibrateEmulateSample.EnsembleKalmanProcesses
 using CalibrateEmulateSample.DataContainers
-EKP = CalibrateEmulateSample.EnsembleKalmanProcesses
-
 
 function lorenz(du, u, p, t)
     du[1] = 10.0 * (u[2] - u[1])
@@ -21,12 +17,19 @@ end
 
 function main()
 
+    output_directory = joinpath(@__DIR__, "output")
+    if !isdir(output_directory)
+        mkdir(output_directory)
+    end
+
     # rng
     rng = MersenneTwister(1232434)
 
     n_repeats = 20 # repeat exp with same data.
     println("run experiment $n_repeats times")
 
+
+    
     # Run L63 from 0 -> tmax
     u0 = [1.0; 0.0; 0.0]
     tmax = 20
@@ -35,7 +38,7 @@ function main()
     prob = ODEProblem(lorenz, u0, tspan)
     sol = solve(prob, Euler(), dt = dt)
 
-    # Run L63 from end for test trajetory data
+    # Run L63 from end for test trajectory data
     tmax_test = 100
     tspan_test = (0.0, tmax_test)
     u0_test = sol.u[end]
@@ -67,7 +70,7 @@ function main()
     # Create training pairs (with noise) from subsampling [burnin,tmax] 
     tburn = 1 # NB works better with no spin-up!
     burnin = Int(floor(tburn / dt))
-    n_train_pts = 600 #600 
+    n_train_pts = 600 
     sample_rand = true
     if sample_rand
         ind = Int.(shuffle!(rng, Vector(burnin:(tmax / dt - 1)))[1:n_train_pts])
@@ -90,20 +93,18 @@ function main()
     cases = ["GP", "RF-scalar", "RF-scalar-diagin", "RF-svd-nonsep", "RF-nosvd-nonsep", "RF-nosvd-sep"]
 
     case = cases[5]
-    decorrelate = true
+    
     nugget = Float64(1e-12)
     u_test = []
     u_hist = []
     train_err = []
     for rep_idx in 1:n_repeats
 
-        overrides = Dict(
-            "verbose" => true,
+        rf_optimizer_overrides = Dict(
             "scheduler" => DataMisfitController(terminate_at = 1e4),
             "cov_sample_multiplier" => 0.5,
             "n_features_opt" => 200,
             "n_iteration" => 20,
-            #            "accelerator" => NesterovAccelerator(),
         )
 
         # Build ML tools
@@ -112,11 +113,9 @@ function main()
             pred_type = Emulators.YType()
             mlt = GaussianProcess(
                 gppackage;
-                kernel = nothing, # use default squared exponential kernel
                 prediction_type = pred_type,
                 noise_learn = false,
             )
-
         elseif case ∈ ["RF-scalar", "RF-scalar-diagin"]
             n_features = 10 * Int(floor(sqrt(3 * n_tp)))
             kernel_structure =
@@ -127,7 +126,7 @@ function main()
                 3,
                 rng = rng,
                 kernel_structure = kernel_structure,
-                optimizer_options = overrides,
+                optimizer_options = rf_optimizer_overrides,
             )
         elseif case ∈ ["RF-svd-nonsep"]
             kernel_structure = NonseparableKernel(LowRankFactor(6, nugget))
@@ -139,35 +138,38 @@ function main()
                 3,
                 rng = rng,
                 kernel_structure = kernel_structure,
-                optimizer_options = overrides,
+                optimizer_options = rf_optimizer_overrides,
             )
         elseif case ∈ ["RF-nosvd-nonsep"]
             kernel_structure = NonseparableKernel(LowRankFactor(3, nugget))
             n_features = 500
-            decorrelate = false # don't do SVD
             mlt = VectorRandomFeatureInterface(
                 n_features,
                 3,
                 3,
                 rng = rng,
                 kernel_structure = kernel_structure,
-                optimizer_options = overrides,
-            )
+                optimizer_options = rf_optimizer_overrides,
+            )           
         elseif case ∈ ["RF-nosvd-sep"]
             kernel_structure = SeparableKernel(LowRankFactor(3, nugget), LowRankFactor(3, nugget))
             n_features = 500
-            decorrelate = false # don't do SVD
             mlt = VectorRandomFeatureInterface(
                 n_features,
                 3,
                 3,
                 rng = rng,
                 kernel_structure = kernel_structure,
-                optimizer_options = overrides,
+                optimizer_options = rf_optimizer_overrides,
             )
         end
 
         # Emulate
+        if case ∈   ["RF-nosvd-nonsep","RF-nosvd-sep"]
+            decorrelate = false
+        else
+            decorrelate = true
+        end
         emulator = Emulator(mlt, iopairs; obs_noise_cov = Γy, decorrelate = decorrelate)
         optimize_hyperparameters!(emulator)
 
@@ -219,8 +221,8 @@ function main()
 
             current_figure()
             # save
-            save(case * "_l63_test.png", f, px_per_unit = 3)
-            save(case * "_l63_test.pdf", f, pt_per_unit = 3)
+            save(joinpath(output_directory, case * "_l63_test.png"), f, px_per_unit = 3)
+            save(joinpath(output_directory, case * "_l63_test.pdf"), f, pt_per_unit = 3)
 
             # plot attractor
             f3 = Figure()
@@ -228,8 +230,8 @@ function main()
             lines(f3[2, 1], solplot[1, :], solplot[3, :], color = :orange)
 
             # save
-            save(case * "_l63_attr.png", f3, px_per_unit = 3)
-            save(case * "_l63_attr.pdf", f3, pt_per_unit = 3)
+            save(joinpath(output_directory, case * "_l63_attr.png"), f3, px_per_unit = 3)
+            save(joinpath(output_directory, case * "_l63_attr.pdf"), f3, pt_per_unit = 3)
 
             # plotting histograms
             f2 = Figure()
@@ -242,16 +244,16 @@ function main()
             hist!(f2[1, 3], solhist[3, :], bins = 50, normalization = :pdf, color = (:orange, 0.5))
 
             # save
-            save(case * "_l63_pdf.png", f2, px_per_unit = 3)
-            save(case * "_l63_pdf.pdf", f2, pt_per_unit = 3)
+            save(joinpath(output_directory, case * "_l63_pdf.png"), f2, px_per_unit = 3)
+            save(joinpath(output_directory, case * "_l63_pdf.pdf"), f2, pt_per_unit = 3)
         end
 
     end
 
     # save data
-    JLD2.save(case * "_l63_trainerr.jld2", "train_err", train_err)
-    JLD2.save(case * "_l63_histdata.jld2", "solhist", solhist, "uhist", u_hist)
-    JLD2.save(case * "_l63_testdata.jld2", "solplot", solplot, "uplot", u_test)
+    JLD2.save(joinpath(output_directory, case * "_l63_trainerr.jld2"), "train_err", train_err)
+    JLD2.save(joinpath(output_directory, case * "_l63_histdata.jld2"), "solhist", solhist, "uhist", u_hist)
+    JLD2.save(joinpath(output_directory, case * "_l63_testdata.jld2"), "solplot", solplot, "uplot", u_test)
 
     # compare  marginal histograms to truth - rough measure of fit
     sol_cdf = sort(solhist, dims = 2)
@@ -282,8 +284,8 @@ function main()
 
 
     # save
-    save(case * "_l63_cdfs.png", f4, px_per_unit = 3)
-    save(case * "_l63_cdfs.pdf", f4, pt_per_unit = 3)
+    save(joinpath(output_directory, case * "_l63_cdfs.png"), f4, px_per_unit = 3)
+    save(joinpath(output_directory, case * "_l63_cdfs.pdf"), f4, pt_per_unit = 3)
 
 end