From 47572e4a1deae840f5d3a0da1cd8ad5d45bd99e8 Mon Sep 17 00:00:00 2001
From: florentiner <danil.ejow98@gmail.com>
Date: Fri, 9 Aug 2024 13:03:13 +0300
Subject: [PATCH 1/6] add mini batches for operator

---
 examples/example_Allen_Cahn_batches.py        | 121 ++++++++++++++++++
 .../example_wave_adaptive_lambdas_batches.py  | 115 +++++++++++++++++
 .../example_weak_wave_periodic_batches.py     | 110 ++++++++++++++++
 tedeous/callbacks/adaptive_lambda.py          |   2 +-
 tedeous/callbacks/plot.py                     |   1 +
 tedeous/eval.py                               |  34 ++++-
 tedeous/losses.py                             |  17 ++-
 tedeous/model.py                              |  25 ++--
 tedeous/solution.py                           |  30 ++++-
 9 files changed, 430 insertions(+), 25 deletions(-)
 create mode 100644 examples/example_Allen_Cahn_batches.py
 create mode 100644 examples/example_wave_adaptive_lambdas_batches.py
 create mode 100644 examples/example_weak_wave_periodic_batches.py

diff --git a/examples/example_Allen_Cahn_batches.py b/examples/example_Allen_Cahn_batches.py
new file mode 100644
index 00000000..277b0a21
--- /dev/null
+++ b/examples/example_Allen_Cahn_batches.py
@@ -0,0 +1,121 @@
+import torch
+import numpy as np
+import sys
+import os
+
+os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..')))
+
+from tedeous.data import Domain, Conditions, Equation
+from tedeous.model import Model
+from tedeous.callbacks import cache, early_stopping, plot
+from tedeous.optimizers.optimizer import Optimizer
+from tedeous.device import solver_device
+from tedeous.models import Fourier_embedding
+
+solver_device('gpu')
+
+# if the casual_loss is used the time parameter must be
+# at the first place in the grid
+
+domain = Domain()
+
+domain.variable('t', [0, 1], 51, dtype='float32')
+domain.variable('x', [-1, 1], 51, dtype='float32')
+
+boundaries = Conditions()
+
+# Initial conditions at t=0
+x = domain.variable_dict['x']
+
+value = x**2*torch.cos(np.pi*x)
+
+boundaries.dirichlet({'x': [-1, 1], 't': 0}, value=value)
+
+    
+# Initial conditions at t=1
+boundaries.periodic([{'x': -1, 't': [0, 1]}, {'x': 1, 't': [0, 1]}])
+
+bop3= {
+        'du/dx':
+            {
+                'coeff': 1,
+                'du/dx': [1],
+                'pow': 1,
+                'var': 0
+            }
+}
+
+boundaries.periodic([{'x': -1, 't': [0, 1]}, {'x': 1, 't': [0, 1]}], operator=bop3)
+
+equation = Equation()
+
+AC = {
+    '1*du/dt**1':
+        {
+            'coeff': 1,
+            'du/dt': [0],
+            'pow': 1,
+            'var': 0
+        },
+    '-0.0001*d2u/dx2**1':
+        {
+            'coeff': -0.0001,
+            'd2u/dx2': [1,1],
+            'pow': 1,
+            'var': 0
+        },
+    '+5u**3':
+        {
+            'coeff': 5,
+            'u': [None],
+            'pow': 3,
+            'var': 0
+        },
+    '-5u**1':
+        {
+            'coeff': -5,
+            'u': [None],
+            'pow': 1,
+            'var': 0
+        }
+}
+
+equation.add(AC)
+
+FFL = Fourier_embedding(L=[None, 2], M=[None, 10])
+
+out = FFL.out_features
+
+net = torch.nn.Sequential(
+    FFL,
+    torch.nn.Linear(out, 128),
+    torch.nn.Tanh(),
+    torch.nn.Linear(128,128),
+    torch.nn.Tanh(),
+    torch.nn.Linear(128,128),
+    torch.nn.Tanh(),
+    torch.nn.Linear(128,1)
+)
+    
+model = Model(net, domain, equation, boundaries, batch_size=32)
+
+model.compile('autograd', lambda_operator=1, lambda_bound=100, tol=10)
+
+img_dir = os.path.join(os.path.dirname( __file__ ), 'AC_eq_img')
+
+cb_cache = cache.Cache(cache_verbose=False, model_randomize_parameter=1e-5)
+
+cb_es = early_stopping.EarlyStopping(eps=1e-7,
+                                     loss_window=100,
+                                     no_improvement_patience=1000,
+                                     patience=5,
+                                     abs_loss=1e-5,
+                                     info_string_every=1000,
+                                     randomize_parameter=1e-5)
+
+cb_plots = plot.Plots(save_every=1000, print_every=None, img_dir=img_dir)
+
+optimizer = Optimizer('Adam', {'lr': 1e-3}, gamma=0.9, decay_every=1000)
+
+model.train(optimizer, 1e5, save_model=True, callbacks=[cb_cache, cb_es, cb_plots])
diff --git a/examples/example_wave_adaptive_lambdas_batches.py b/examples/example_wave_adaptive_lambdas_batches.py
new file mode 100644
index 00000000..78807ec0
--- /dev/null
+++ b/examples/example_wave_adaptive_lambdas_batches.py
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon May 31 12:33:44 2021
+
+@author: user
+"""
+import torch
+import numpy as np
+import os
+import sys
+
+os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..')))
+
+from tedeous.data import Domain, Conditions, Equation
+from tedeous.model import Model
+from tedeous.callbacks import early_stopping, plot, adaptive_lambda
+from tedeous.optimizers.optimizer import Optimizer
+from tedeous.device import solver_device
+
+"""
+Preparing grid
+
+Grid is an essentially torch.Tensor  of a n-D points where n is the problem
+dimensionality
+"""
+
+solver_device('cuda')
+
+domain = Domain()
+domain.variable('x', [0, 1], 20)
+domain.variable('t', [0, 1], 20)
+
+A = 0.5
+C = 2
+
+def func(grid):
+    x, t = grid[:,1],grid[:,0]
+    return torch.sin(np.pi * x) * torch.cos(C * np.pi * t) + \
+            A * torch.sin(2 * C * np.pi * x) * torch.cos(4 * C * np.pi * t)
+
+boundaries = Conditions()
+
+# Initial conditions at t=0
+boundaries.dirichlet({'t': [0, 1], 'x': 0}, value=func)
+
+# Boundary conditions at x=1
+boundaries.dirichlet({'t': [0, 1], 'x': 1}, value=func)
+
+# Initial conditions at t=0
+boundaries.dirichlet({'t': 0, 'x': [0, 1]}, value=func)
+
+# Initial conditions (operator) at t=0
+bop4= {
+        'du/dt':
+            {
+                'coeff': 1,
+                'du/dt': [0],
+                'pow': 1,
+            }
+}
+boundaries.operator({'t': 0, 'x': [0, 1]}, operator=bop4, value=func)
+
+equation = Equation()
+
+# operator is 4*d2u/dx2-1*d2u/dt2=0
+wave_eq = {
+    '-C*d2u/dx2**1':
+        {
+            'coeff': -4,
+            'd2u/dx2': [1, 1],
+            'pow': 1
+        },
+    'd2u/dt2**1':
+        {
+            'coeff': 1,
+            'd2u/dt2': [0, 0],
+            'pow':1
+        }
+}
+
+equation.add(wave_eq)
+
+net = torch.nn.Sequential(
+    torch.nn.Linear(2, 256),
+    torch.nn.Tanh(),
+    torch.nn.Linear(256, 256),
+    torch.nn.Tanh(),
+    torch.nn.Linear(256, 256),
+    torch.nn.Tanh(),
+    torch.nn.Linear(256, 256),
+    torch.nn.Tanh(),
+    torch.nn.Linear(256, 1))
+
+model =  Model(net, domain, equation, boundaries, batch_size=32)
+
+model.compile("autograd", lambda_operator=1, lambda_bound=100)
+
+cb_es = early_stopping.EarlyStopping(eps=1e-7,
+                                    loss_window=1000,
+                                    no_improvement_patience=1000,
+                                    patience=10,
+                                    randomize_parameter=1e-5,
+                                    abs_loss=0.1,
+                                    info_string_every=500)
+
+img_dir=os.path.join(os.path.dirname( __file__ ), 'wave_eq_img')
+
+cb_plots = plot.Plots(save_every=500, print_every=None, img_dir=img_dir)
+
+cb_lambda = adaptive_lambda.AdaptiveLambda()
+
+optimizer = Optimizer('Adam', {'lr': 1e-3}, gamma=0.9, decay_every=1000)
+
+model.train(optimizer, 1e5, save_model=False, callbacks=[cb_es, cb_plots, cb_lambda])
diff --git a/examples/example_weak_wave_periodic_batches.py b/examples/example_weak_wave_periodic_batches.py
new file mode 100644
index 00000000..5a04f98c
--- /dev/null
+++ b/examples/example_weak_wave_periodic_batches.py
@@ -0,0 +1,110 @@
+import torch
+import numpy as np
+import sys
+import os
+import time
+
+os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..')))
+
+
+from tedeous.data import Domain, Conditions, Equation
+from tedeous.model import Model
+from tedeous.callbacks import early_stopping, plot
+from tedeous.optimizers.optimizer import Optimizer
+from tedeous.device import solver_device
+
+solver_device('cpu')
+# Grid
+domain = Domain()
+
+domain.variable('x', [0, 1], 50)
+domain.variable('t', [0, 1], 50)
+
+boundaries = Conditions()
+
+# u(x,0)=1e4*sin^2(x(x-1)/10)
+x = domain.variable_dict['x']
+func_bnd1 = lambda x: 10 ** 4 * torch.sin((1/10) * x * (x-1)) ** 2
+boundaries.dirichlet({'x': [0, 1], 't': 0}, value=func_bnd1(x))
+
+func_bnd2 = lambda x: 10 ** 3 * torch.sin((1/10) * x * (x-1)) ** 2
+# du/dx (x,0) = 1e3*sin^2(x(x-1)/10)
+bop2 = {
+        'du/dt':
+            {
+                'coeff': 1,
+                'du/dt': [1],
+                'pow': 1,
+                'var': 0
+            }
+}
+boundaries.operator({'x': [0, 1], 't': 0}, operator=bop2, value=func_bnd2(x))
+
+# u(0,t) = u(1,t)
+boundaries.periodic([{'x': 0, 't': [0, 1]}, {'x': 1, 't': [0, 1]}])
+
+# du/dt(0,t) = du/dt(1,t)
+bop4= {
+        'du/dx':
+            {
+                'coeff': 1,
+                'du/dx': [0],
+                'pow': 1,
+                'var': 0
+            }
+}
+boundaries.periodic([{'x': 0, 't': [0, 1]}, {'x': 1, 't': [0, 1]}], operator=bop4)
+
+equation = Equation()
+
+# wave equation is d2u/dt2-(1/4)*d2u/dx2=0
+C = 4
+wave_eq = {
+    'd2u/dt2':
+        {
+            'coeff': 1,
+            'd2u/dt2': [1, 1],
+            'pow': 1
+        },
+        '-1/C*d2u/dx2':
+        {
+            'coeff': -1/C,
+            'd2u/dx2': [0, 0],
+            'pow': 1
+        }
+}
+
+equation.add(wave_eq)
+
+net = torch.nn.Sequential(
+         torch.nn.Linear(2, 100),
+         torch.nn.Tanh(),
+         torch.nn.Linear(100, 100),
+         torch.nn.Tanh(),
+         torch.nn.Linear(100, 100),
+         torch.nn.Tanh(),
+         torch.nn.Linear(100, 1))
+
+def v(grid):
+    return torch.cos(grid[:,0])+grid[:,1]
+weak_form = [v]
+
+start = time.time()
+
+model =  Model(net, domain, equation, boundaries, batch_size=32)
+
+model.compile("NN", lambda_operator=1, lambda_bound=1000, h=0.01)
+
+cb_es = early_stopping.EarlyStopping(eps=1e-6, no_improvement_patience=500, info_string_every=1000)
+
+img_dir = os.path.join(os.path.dirname( __file__ ), 'wave_periodic_weak_img')
+
+cb_plots = plot.Plots(save_every=100, print_every=None, img_dir=img_dir)
+
+optimizer = Optimizer('Adam', {'lr': 1e-2})
+
+model.train(optimizer, 1e5, save_model=True, callbacks=[cb_es, cb_plots])
+
+end = time.time()
+print('Time taken 10= ', end - start)
diff --git a/tedeous/callbacks/adaptive_lambda.py b/tedeous/callbacks/adaptive_lambda.py
index bfae503f..34caed4a 100644
--- a/tedeous/callbacks/adaptive_lambda.py
+++ b/tedeous/callbacks/adaptive_lambda.py
@@ -92,7 +92,7 @@ def lambda_update(self):
         true_bval = sln_cls.true_bval
         bval_keys = sln_cls.bval_keys
         bval_length = sln_cls.bval_length
-        op = sln_cls.op
+        op = sln_cls.op if sln_cls.batch_size is None else sln_cls.save_op # if batch mod use accumulative loss else from single eval
         self.op_list = sln_cls.op_list
         self.bval_list = sln_cls.bval_list
         self.loss_list = sln_cls.loss_list
diff --git a/tedeous/callbacks/plot.py b/tedeous/callbacks/plot.py
index 8cf69a24..29b65fdd 100644
--- a/tedeous/callbacks/plot.py
+++ b/tedeous/callbacks/plot.py
@@ -6,6 +6,7 @@
 from matplotlib import cm
 import torch
 from tedeous.callbacks.callback import Callback
+from mpl_toolkits.mplot3d import Axes3D
 
 
 class Plots(Callback):
diff --git a/tedeous/eval.py b/tedeous/eval.py
index fcd92bbe..841b2c6d 100644
--- a/tedeous/eval.py
+++ b/tedeous/eval.py
@@ -8,6 +8,7 @@
 from tedeous.device import device_type, check_device
 from tedeous.utils import PadTransform
 
+from torch.utils.data import DataLoader
 
 def integration(func: torch.Tensor,
                 grid: torch.Tensor,
@@ -96,7 +97,8 @@ def __init__(self,
                  model: Union[torch.nn.Sequential, torch.Tensor],
                  mode: str,
                  weak_form: list[callable],
-                 derivative_points: int):
+                 derivative_points: int,
+                 batch_size: int = None):
         """
         Args:
             grid (torch.Tensor): grid (domain discretization).
@@ -106,6 +108,7 @@ def __init__(self,
             weak_form (list[callable]): list with basis functions (if the form is *weak*).
             derivative_points (int): points number for derivative calculation.
                                      For details to Derivative_mat class.
+            batch_size (int): size of batch.
         """
         self.grid = check_device(grid)
         self.prepared_operator = prepared_operator
@@ -118,8 +121,24 @@ def __init__(self,
             self.sorted_grid = torch.cat(list(self.grid_dict.values()))
         elif self.mode in ('autograd', 'mat'):
             self.sorted_grid = self.grid
+        self.batch_size = batch_size
+        if self.batch_size is not None:
+            self.grid_loader =  DataLoader(self.sorted_grid, batch_size=self.batch_size, shuffle=True,
+                                      generator=torch.Generator(device=device_type()))
+            self.n_batches = len(self.grid_loader)
+            del self.sorted_grid
+            torch.cuda.empty_cache()
+            self.init_mini_batches()
+            self.new_init = True
         self.derivative = Derivative(self.model,
                                 self.derivative_points).set_strategy(self.mode).take_derivative
+    
+    def init_mini_batches(self):
+        """ Initialization of batch iterator.
+
+        """
+        self.grid_iter = iter(self.grid_loader)
+        self.grid_batch = next(self.grid_iter)
 
     def apply_operator(self,
                        operator: list,
@@ -152,15 +171,24 @@ def _pde_compute(self) -> torch.Tensor:
             torch.Tensor: P/O DE residual.
         """
 
+        if self.batch_size is not None:
+            sorted_grid = self.grid_batch
+            try:
+                self.grid_batch = next(self.grid_iter)
+            except: # if no batches left then reinit
+                self.init_mini_batches()
+                self.new_init = True
+        else:
+            sorted_grid = self.sorted_grid
         num_of_eq = len(self.prepared_operator)
         if num_of_eq == 1:
             op = self.apply_operator(
-                self.prepared_operator[0], self.sorted_grid).reshape(-1,1)
+                self.prepared_operator[0], sorted_grid).reshape(-1,1)
         else:
             op_list = []
             for i in range(num_of_eq):
                 op_list.append(self.apply_operator(
-                    self.prepared_operator[i], self.sorted_grid).reshape(-1,1))
+                    self.prepared_operator[i], sorted_grid).reshape(-1,1))
             op = torch.cat(op_list, 1)
         return op
 
diff --git a/tedeous/losses.py b/tedeous/losses.py
index 042f6733..242416db 100644
--- a/tedeous/losses.py
+++ b/tedeous/losses.py
@@ -15,18 +15,21 @@ def __init__(self,
                  mode: str,
                  weak_form: Union[None, list],
                  n_t: int,
-                 tol: Union[int, float]):
+                 tol: Union[int, float],
+                 n_t_operation: callable = None):
         """
         Args:
             mode (str): calculation mode, *NN, autograd, mat*.
             weak_form (Union[None, list]): list of basis functions if form is weak.
-            n_t (int): number of unique points in time dinension.
+            n_t (int): number of unique points in time dimension.
             tol (Union[int, float])): penalty in *casual loss*.
+            n_t_operation (callable): function to calculate n_t for each batch
         """
 
         self.mode = mode
         self.weak_form = weak_form
         self.n_t = n_t
+        self.n_t_operation = n_t_operation
         self.tol = tol
         # TODO: refactor loss_op, loss_bcs into one function, carefully figure out when bval
         # is None + fix causal_loss operator crutch (line 76).
@@ -147,9 +150,13 @@ def _causal_loss(self,
             loss (torch.Tensor): loss.
             loss_normalized (torch.Tensor): loss, where regularization parameters are 1.
         """
-
-        res = torch.sum(operator**2, dim=1).reshape(self.n_t, -1)
-        res = torch.mean(res, axis=1).reshape(self.n_t, 1)
+        if self.n_t_operation is not None: # calculate if batch mod
+            self.n_t = self.n_t_operation(operator)
+        try:
+            res = torch.sum(operator**2, dim=1).reshape(self.n_t, -1)
+        except: # if n_t_operation calculate bad n_t then change n_t to batch size
+            self.n_t = operator.size()[0]
+            res = torch.sum(operator**2, dim=1).reshape(self.n_t, -1)
         m = torch.triu(torch.ones((self.n_t, self.n_t), dtype=res.dtype), diagonal=1).T
         with torch.no_grad():
             w = torch.exp(- self.tol * (m @ res))
diff --git a/tedeous/model.py b/tedeous/model.py
index 910b0c19..e1b33669 100644
--- a/tedeous/model.py
+++ b/tedeous/model.py
@@ -21,13 +21,15 @@ def __init__(
             net: Union[torch.nn.Module, torch.Tensor],
             domain: Domain,
             equation: Equation,
-            conditions: Conditions):
+            conditions: Conditions,
+            batch_size: int = None):
         """
         Args:
             net (Union[torch.nn.Module, torch.Tensor]): neural network or torch.Tensor for mode *mat*
             grid (Domain): object of class Domain
             equation (Equation): object of class Equation
             conditions (Conditions): object of class Conditions
+            batch_size (int): size of batch
         """
         self.net = net
         self.domain = domain
@@ -41,6 +43,7 @@ def __init__(
         else:
             os.makedirs(folder_path)
         self._save_dir = folder_path
+        self.batch_size = batch_size
 
     def compile(
             self,
@@ -90,7 +93,8 @@ def compile(
                                                    boundary_order=boundary_order).set_strategy(mode)
         
         self.solution_cls = Solution(grid, self.equation_cls, self.net, mode, weak_form,
-                                     lambda_operator, lambda_bound, tol, derivative_points)
+                                     lambda_operator, lambda_bound, tol, derivative_points,
+                                     batch_size=self.batch_size)
 
     def _model_save(
         self,
@@ -153,16 +157,15 @@ def train(self,
 
         while self.t < epochs and self.stop_training == False:
             callbacks.on_epoch_begin()
-
             self.optimizer.zero_grad()
-            
-            if device_type() == 'cuda' and mixed_precision:
-                closure()
-            else:
-                self.optimizer.step(closure)
-            if optimizer.gamma is not None and self.t % optimizer.decay_every == 0:
-                optimizer.scheduler.step()
-
+            iter_count = 1 if self.batch_size is None else self.solution_cls.operator.n_batches
+            for _ in range(iter_count): # if batch mod then iter until end of batches else only once
+                if device_type() == 'cuda' and mixed_precision:
+                    closure()
+                else:
+                    self.optimizer.step(closure)
+                if optimizer.gamma is not None and self.t % optimizer.decay_every == 0:
+                    optimizer.scheduler.step()
             callbacks.on_epoch_end()
 
             self.t += 1
diff --git a/tedeous/solution.py b/tedeous/solution.py
index cbfe10de..1e7f0451 100644
--- a/tedeous/solution.py
+++ b/tedeous/solution.py
@@ -30,7 +30,8 @@ def __init__(
         lambda_operator,
         lambda_bound,
         tol: float = 0,
-        derivative_points: int = 2):
+        derivative_points: int = 2,
+        batch_size: int = None):
         """
         Args:
             grid (torch.Tensor): discretization of comp-l domain.
@@ -42,6 +43,7 @@ def __init__(
             lambda_bound (_type_): regularization parameter for boundary term in loss.
             tol (float, optional): penalty in *casual loss*. Defaults to 0.
             derivative_points (int, optional): points number for derivative calculation.
+            batch_size (int): size of batch.
             For details to Derivative_mat class.. Defaults to 2.
         """
 
@@ -49,10 +51,14 @@ def __init__(
         if mode == 'NN':
             sorted_grid = Points_type(self.grid).grid_sort()
             self.n_t = len(sorted_grid['central'][:, 0].unique())
+            self.n_t_operation = lambda sorted_grid: len(sorted_grid['central'][:, 0].unique())
         elif mode == 'autograd':
             self.n_t = len(self.grid[:, 0].unique())
+            self.n_t_operation = lambda grid: len(grid[:, 0].unique())
         elif mode == 'mat':
             self.n_t = grid.shape[1]
+            self.n_t_operation = lambda grid: grid.shape[1]
+        
         equal_copy = deepcopy(equal_cls)
         prepared_operator = equal_copy.operator_prepare()
         self._operator_coeff(equal_cls, prepared_operator)
@@ -64,13 +70,19 @@ def __init__(
         self.lambda_bound = lambda_bound
         self.tol = tol
         self.derivative_points = derivative_points
+        self.batch_size = batch_size
+        if self.batch_size is None:
+            self.n_t_operation = None
+        
 
         self.operator = Operator(self.grid, prepared_operator, self.model,
-                                   self.mode, weak_form, derivative_points)
+                                   self.mode, weak_form, derivative_points, 
+                                   self.batch_size)
         self.boundary = Bounds(self.grid,self.prepared_bconds, self.model,
                                    self.mode, weak_form, derivative_points)
 
-        self.loss_cls = Losses(self.mode, self.weak_form, self.n_t, self.tol)
+        self.loss_cls = Losses(self.mode, self.weak_form, self.n_t, self.tol, 
+                               self.n_t_operation) # n_t calculate for each batch 
         self.op_list = []
         self.bval_list = []
         self.loss_list = []
@@ -111,7 +123,8 @@ def _model_change(self, new_model: torch.nn.Module) -> None:
                                           new_model,
                                           self.mode,
                                           self.weak_form,
-                                          self.derivative_points)
+                                          self.derivative_points,
+                                          self.batch_size)
 
     def evaluate(self,
                  save_graph: bool = True) -> Tuple[torch.Tensor, torch.Tensor]:
@@ -129,7 +142,6 @@ def evaluate(self,
         Returns:
             Tuple[torch.Tensor, torch.Tensor]: loss
         """
-
         self.op = self.operator.operator_compute()
         self.bval, self.true_bval,\
             self.bval_keys, self.bval_length = self.boundary.apply_bcs()
@@ -144,5 +156,13 @@ def evaluate(self,
             self.lambda_operator,
             self.lambda_bound,
             save_graph)
+        if self.batch_size is not None: 
+            if self.operator.new_init: # if first batch in epoch
+                self.save_op = self.op
+                self.operator.new_init = False
+            else:
+                self.save_op = torch.cat((self.save_op, self.operator.operator_compute()), 0) # cat curent losses to previous
+            del self.op
+            torch.cuda.empty_cache()
 
         return self.loss, self.loss_normalized

From e7268c3bacd247ae7b15c437ef2d53094f71a189 Mon Sep 17 00:00:00 2001
From: florentiner <danil.ejow98@gmail.com>
Date: Fri, 9 Aug 2024 15:15:31 +0300
Subject: [PATCH 2/6] fix apply lambda bug

---
 tedeous/eval.py     | 4 ++--
 tedeous/solution.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tedeous/eval.py b/tedeous/eval.py
index 841b2c6d..e744f0a1 100644
--- a/tedeous/eval.py
+++ b/tedeous/eval.py
@@ -129,7 +129,7 @@ def __init__(self,
             del self.sorted_grid
             torch.cuda.empty_cache()
             self.init_mini_batches()
-            self.new_init = True
+            self.current_batch_i = 0
         self.derivative = Derivative(self.model,
                                 self.derivative_points).set_strategy(self.mode).take_derivative
     
@@ -177,7 +177,7 @@ def _pde_compute(self) -> torch.Tensor:
                 self.grid_batch = next(self.grid_iter)
             except: # if no batches left then reinit
                 self.init_mini_batches()
-                self.new_init = True
+                self.current_batch_i = -1
         else:
             sorted_grid = self.sorted_grid
         num_of_eq = len(self.prepared_operator)
diff --git a/tedeous/solution.py b/tedeous/solution.py
index 1e7f0451..172366eb 100644
--- a/tedeous/solution.py
+++ b/tedeous/solution.py
@@ -157,11 +157,11 @@ def evaluate(self,
             self.lambda_bound,
             save_graph)
         if self.batch_size is not None: 
-            if self.operator.new_init: # if first batch in epoch
+            if self.operator.current_batch_i == 0: # if first batch in epoch
                 self.save_op = self.op
-                self.operator.new_init = False
             else:
-                self.save_op = torch.cat((self.save_op, self.operator.operator_compute()), 0) # cat curent losses to previous
+                self.save_op = torch.cat((self.save_op, self.op), 0) # cat curent losses to previous
+            self.operator.current_batch_i += 1
             del self.op
             torch.cuda.empty_cache()
 

From 312f8efeab594eca6f981bed8877988ab122f6e0 Mon Sep 17 00:00:00 2001
From: SuperSashka <heretik.unlimited@gmail.com>
Date: Wed, 21 Aug 2024 18:57:51 +0300
Subject: [PATCH 3/6] Experimental little update

---
 examples/example_Lotka_Volterra.py            | 12 ++++-----
 examples/example_Lotka_Volterra_paper.py      | 25 +++++++++----------
 examples/example_wave_adaptive_lambdas.py     |  2 +-
 .../example_wave_adaptive_lambdas_batches.py  |  3 +--
 examples/example_weak_wave_periodic.py        |  4 +--
 .../example_weak_wave_periodic_batches.py     |  6 ++---
 6 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/examples/example_Lotka_Volterra.py b/examples/example_Lotka_Volterra.py
index 307129cd..e85201b7 100644
--- a/examples/example_Lotka_Volterra.py
+++ b/examples/example_Lotka_Volterra.py
@@ -26,7 +26,7 @@
 from tedeous.device import solver_device, check_device, device_type
 
 
-solver_device('сpu')
+solver_device('gpu')
 
 alpha = 20.
 beta = 20.
@@ -113,7 +113,7 @@
         torch.nn.Linear(100, 2)
     )
 
-model =  Model(net, domain, equation, boundaries)
+model =  Model(net, domain, equation, boundaries, batch_size=32)
 
 model.compile("NN", lambda_operator=1, lambda_bound=100, h=h)
 
@@ -133,7 +133,7 @@
 
 optimizer = Optimizer('Adam', {'lr': 1e-4})
 
-model.train(optimizer, 5e6, save_model=True, callbacks=[cb_es, cb_cache, cb_plots])
+model.train(optimizer, 5e6, save_model=True, callbacks=[cb_es, cb_cache,cb_plots])
 
 end = time.time()
     
@@ -161,8 +161,8 @@ def deriv(X, t, alpha, beta, delta, gamma):
 plt.title("odeint and NN methods comparing")
 plt.plot(t, x, '+', label = 'preys_odeint')
 plt.plot(t, y, '*', label = "predators_odeint")
-plt.plot(grid, net(grid)[:,0].detach().numpy().reshape(-1), label='preys_NN')
-plt.plot(grid, net(grid)[:,1].detach().numpy().reshape(-1), label='predators_NN')
+plt.plot(grid.cpu(), net(grid.cpu())[:,0].detach().numpy().reshape(-1), label='preys_NN')
+plt.plot(grid.cpu(), net(grid.cpu())[:,1].detach().numpy().reshape(-1), label='predators_NN')
 plt.xlabel('Time t, [days]')
 plt.ylabel('Population')
 plt.legend(loc='upper right')
@@ -171,7 +171,7 @@ def deriv(X, t, alpha, beta, delta, gamma):
 plt.figure()
 plt.grid()
 plt.title('Phase plane: prey vs predators')
-plt.plot(net(grid)[:,0].detach().numpy().reshape(-1), net(grid)[:,1].detach().numpy().reshape(-1), '-*', label='NN')
+plt.plot(net(grid.cpu())[:,0].detach().numpy().reshape(-1), net(grid.cpu())[:,1].detach().numpy().reshape(-1), '-*', label='NN')
 plt.plot(x,y, label='odeint')
 plt.xlabel('preys')
 plt.ylabel('predators')
diff --git a/examples/example_Lotka_Volterra_paper.py b/examples/example_Lotka_Volterra_paper.py
index d1da08d8..545407d2 100644
--- a/examples/example_Lotka_Volterra_paper.py
+++ b/examples/example_Lotka_Volterra_paper.py
@@ -38,7 +38,7 @@
 def Lotka_experiment(grid_res, CACHE):
     exp_dict_list = []
 
-    solver_device('cpu')
+    solver_device('gpu')
 
     domain = Domain()
     domain.variable('t', [0, tmax], grid_res)
@@ -105,18 +105,16 @@ def Lotka_experiment(grid_res, CACHE):
     equation.add(eq2)
 
     net = torch.nn.Sequential(
-            torch.nn.Linear(1, 100),
+            torch.nn.Linear(1, 32),
             torch.nn.Tanh(),
-            torch.nn.Linear(100, 100),
+            torch.nn.Linear(32, 32),
             torch.nn.Tanh(),
-            torch.nn.Linear(100, 100),
-            torch.nn.Tanh(),
-            torch.nn.Linear(100, 2)
+            torch.nn.Linear(32, 2)
         )
 
-    model =  Model(net, domain, equation, boundaries)
+    model =  Model(net, domain, equation, boundaries,batch_size=16)
 
-    model.compile("NN", lambda_operator=1, lambda_bound=100, h=h)
+    model.compile("autograd", lambda_operator=1, lambda_bound=100)
     
     img_dir=os.path.join(os.path.dirname( __file__ ), 'img_Lotka_Volterra_paper')
 
@@ -126,6 +124,7 @@ def Lotka_experiment(grid_res, CACHE):
                                         loss_window=100,
                                         no_improvement_patience=500,
                                         patience=3,
+                                        info_string_every=100,
                                         randomize_parameter=1e-5)
     
     cb_plots = plot.Plots(save_every=1000, print_every=None, img_dir=img_dir)
@@ -175,10 +174,10 @@ def deriv(X, t, alpha, beta, delta, gamma):
     plt.figure()
     plt.grid()
     plt.title("odeint and NN methods comparing")
-    plt.plot(t, u_exact[:,0].detach().numpy().reshape(-1), '+', label = 'preys_odeint')
-    plt.plot(t, u_exact[:,1].detach().numpy().reshape(-1), '*', label = "predators_odeint")
-    plt.plot(grid, net(grid)[:,0].detach().numpy().reshape(-1), label='preys_NN')
-    plt.plot(grid, net(grid)[:,1].detach().numpy().reshape(-1), label='predators_NN')
+    plt.plot(t.cpu(), u_exact[:,0].detach().numpy().reshape(-1), '+', label = 'preys_odeint')
+    plt.plot(t.cpu(), u_exact[:,1].detach().numpy().reshape(-1), '*', label = "predators_odeint")
+    plt.plot(grid.cpu(), net(grid.cpu())[:,0].detach().numpy().reshape(-1), label='preys_NN')
+    plt.plot(grid.cpu(), net(grid.cpu())[:,1].detach().numpy().reshape(-1), label='predators_NN')
     plt.xlabel('Time t, [days]')
     plt.ylabel('Population')
     plt.legend(loc='upper right')
@@ -186,7 +185,7 @@ def deriv(X, t, alpha, beta, delta, gamma):
 
     return exp_dict_list
 
-nruns=10
+nruns=1
 
 exp_dict_list=[]
 
diff --git a/examples/example_wave_adaptive_lambdas.py b/examples/example_wave_adaptive_lambdas.py
index 0d8abc4f..41f30478 100644
--- a/examples/example_wave_adaptive_lambdas.py
+++ b/examples/example_wave_adaptive_lambdas.py
@@ -104,7 +104,7 @@ def func(grid):
                                     abs_loss=0.1,
                                     info_string_every=500)
 
-img_dir=os.path.join(os.path.dirname( __file__ ), 'wave_eq_img')
+img_dir=os.path.join(os.path.dirname( __file__ ), 'wave_eq_img_nobatch')
 
 cb_plots = plot.Plots(save_every=500, print_every=None, img_dir=img_dir)
 
diff --git a/examples/example_wave_adaptive_lambdas_batches.py b/examples/example_wave_adaptive_lambdas_batches.py
index 78807ec0..b65a99ee 100644
--- a/examples/example_wave_adaptive_lambdas_batches.py
+++ b/examples/example_wave_adaptive_lambdas_batches.py
@@ -101,10 +101,9 @@ def func(grid):
                                     no_improvement_patience=1000,
                                     patience=10,
                                     randomize_parameter=1e-5,
-                                    abs_loss=0.1,
                                     info_string_every=500)
 
-img_dir=os.path.join(os.path.dirname( __file__ ), 'wave_eq_img')
+img_dir=os.path.join(os.path.dirname( __file__ ), 'wave_eq_img_batch')
 
 cb_plots = plot.Plots(save_every=500, print_every=None, img_dir=img_dir)
 
diff --git a/examples/example_weak_wave_periodic.py b/examples/example_weak_wave_periodic.py
index 7d77f41b..1b1a9453 100644
--- a/examples/example_weak_wave_periodic.py
+++ b/examples/example_weak_wave_periodic.py
@@ -14,7 +14,7 @@
 from tedeous.optimizers.optimizer import Optimizer
 from tedeous.device import solver_device
 
-solver_device('cpu')
+solver_device('gpu')
 # Grid
 domain = Domain()
 
@@ -98,7 +98,7 @@ def v(grid):
 
 cb_es = early_stopping.EarlyStopping(eps=1e-6, no_improvement_patience=500, info_string_every=1000)
 
-img_dir = os.path.join(os.path.dirname( __file__ ), 'wave_periodic_weak_img')
+img_dir = os.path.join(os.path.dirname( __file__ ), 'wave_periodic_weak_img_nobatch')
 
 cb_plots = plot.Plots(save_every=100, print_every=None, img_dir=img_dir)
 
diff --git a/examples/example_weak_wave_periodic_batches.py b/examples/example_weak_wave_periodic_batches.py
index 5a04f98c..be54c60c 100644
--- a/examples/example_weak_wave_periodic_batches.py
+++ b/examples/example_weak_wave_periodic_batches.py
@@ -14,7 +14,7 @@
 from tedeous.optimizers.optimizer import Optimizer
 from tedeous.device import solver_device
 
-solver_device('cpu')
+solver_device('gpu')
 # Grid
 domain = Domain()
 
@@ -92,13 +92,13 @@ def v(grid):
 
 start = time.time()
 
-model =  Model(net, domain, equation, boundaries, batch_size=32)
+model =  Model(net, domain, equation, boundaries, batch_size=64)
 
 model.compile("NN", lambda_operator=1, lambda_bound=1000, h=0.01)
 
 cb_es = early_stopping.EarlyStopping(eps=1e-6, no_improvement_patience=500, info_string_every=1000)
 
-img_dir = os.path.join(os.path.dirname( __file__ ), 'wave_periodic_weak_img')
+img_dir = os.path.join(os.path.dirname( __file__ ), 'wave_periodic_weak_img_batch')
 
 cb_plots = plot.Plots(save_every=100, print_every=None, img_dir=img_dir)
 

From 382111e57a64067d9986233a35e1d21316f984ba Mon Sep 17 00:00:00 2001
From: SuperSashka <heretik.unlimited@gmail.com>
Date: Fri, 23 Aug 2024 19:29:40 +0300
Subject: [PATCH 4/6] Lotka-Volterra example workaround

---
 examples/example_Lotka_Volterra_paper.py | 98 ++++++++++++++++++------
 1 file changed, 74 insertions(+), 24 deletions(-)

diff --git a/examples/example_Lotka_Volterra_paper.py b/examples/example_Lotka_Volterra_paper.py
index 545407d2..d87d51a1 100644
--- a/examples/example_Lotka_Volterra_paper.py
+++ b/examples/example_Lotka_Volterra_paper.py
@@ -33,17 +33,68 @@
 x0 = 4.
 y0 = 2.
 t0 = 0.
-tmax = 1.
+tmax = 1
+
+
+from copy import deepcopy
+
+
+# Define the model
+class MultiOutputModel(torch.nn.Module):
+    def __init__(self):
+        super(MultiOutputModel, self).__init__()
+        
+        self.width_out=[2]
+
+        # Shared layers (base network)
+        self.shared_fc1 = torch.nn.Linear(1, 64)  # Input size of 1 (for t)
+        self.shared_fc2 = torch.nn.Linear(64, 32)
+        
+        # Output head for Process 1
+        self.process1_fc = torch.nn.Linear(32, 1)
+        
+        # Output head for Process 2
+        self.process2_fc = torch.nn.Linear(32, 1)
+    
+    def forward(self, t):
+        # Shared layers forward pass
+        x = torch.tanh(self.shared_fc1(t))
+        x = torch.tanh(self.shared_fc2(x))
+        
+        # Process 1 output head
+        process1_out = self.process1_fc(x)
+        
+        # Process 2 output head
+        process2_out = self.process2_fc(x)
+        
+        out=torch.cat((process1_out, process2_out), dim=1)
+
+        return out
+
+# Initialize the model
+#model = 
+
 
 def Lotka_experiment(grid_res, CACHE):
-    exp_dict_list = []
 
+    exp_dict_list = []
     solver_device('gpu')
 
-    domain = Domain()
-    domain.variable('t', [0, tmax], grid_res)
+    #net = torch.nn.Sequential(
+    #    torch.nn.Linear(1, 32),
+    #    torch.nn.Tanh(),
+    #    torch.nn.Linear(32, 32),
+    #    torch.nn.Tanh(),
+    #    torch.nn.Linear(32, 2)
+    #)
 
-    h = 0.0001
+    net=MultiOutputModel()
+
+
+    
+
+    domain = Domain()
+    domain.variable('t', [0, 1], grid_res)
 
     boundaries = Conditions()
     #initial conditions
@@ -104,15 +155,9 @@ def Lotka_experiment(grid_res, CACHE):
     equation.add(eq1)
     equation.add(eq2)
 
-    net = torch.nn.Sequential(
-            torch.nn.Linear(1, 32),
-            torch.nn.Tanh(),
-            torch.nn.Linear(32, 32),
-            torch.nn.Tanh(),
-            torch.nn.Linear(32, 2)
-        )
 
-    model =  Model(net, domain, equation, boundaries,batch_size=16)
+
+    model =  Model(net, domain, equation, boundaries)
 
     model.compile("autograd", lambda_operator=1, lambda_bound=100)
     
@@ -121,17 +166,19 @@ def Lotka_experiment(grid_res, CACHE):
     start = time.time()
 
     cb_es = early_stopping.EarlyStopping(eps=1e-6,
-                                        loss_window=100,
-                                        no_improvement_patience=500,
-                                        patience=3,
-                                        info_string_every=100,
-                                        randomize_parameter=1e-5)
+                                    loss_window=1000,
+                                    no_improvement_patience=500,
+                                    patience=3,
+                                    info_string_every=100,
+                                    randomize_parameter=1e-5)
     
     cb_plots = plot.Plots(save_every=1000, print_every=None, img_dir=img_dir)
 
+    #cb_cache = cache.Cache(cache_verbose=True, model_randomize_parameter=1e-5)
+
     optimizer = Optimizer('Adam', {'lr': 1e-4})
 
-    model.train(optimizer, 5e6, save_model=True, callbacks=[cb_es, cb_plots])
+    model.train(optimizer, 2e5, save_model=True, callbacks=[cb_es, cb_plots])
 
     end = time.time()
     
@@ -150,7 +197,7 @@ def deriv(X, t, alpha, beta, delta, gamma):
             doty = y * (-delta + gamma * x)
             return np.array([dotx, doty])
 
-        t = np.linspace(0., tmax, grid_res+1)
+        t = np.linspace(0, 1, grid_res+1)
 
         X0 = [x0, y0]
         res = integrate.odeint(deriv, X0, t, args = (alpha, beta, delta, gamma))
@@ -168,20 +215,23 @@ def deriv(X, t, alpha, beta, delta, gamma):
     print('Time taken {}= {}'.format(grid_res, end - start))
     print('RMSE {}= {}'.format(grid_res, error_rmse))
 
-    t = domain.variable_dict['t']
+    #t = domain.variable_dict['t']
     grid = domain.build('NN')
 
+    t = np.linspace(0, 1, grid_res+1)
+
     plt.figure()
     plt.grid()
     plt.title("odeint and NN methods comparing")
-    plt.plot(t.cpu(), u_exact[:,0].detach().numpy().reshape(-1), '+', label = 'preys_odeint')
-    plt.plot(t.cpu(), u_exact[:,1].detach().numpy().reshape(-1), '*', label = "predators_odeint")
+    plt.plot(t, u_exact[:,0].detach().numpy().reshape(-1), '+', label = 'preys_odeint')
+    plt.plot(t, u_exact[:,1].detach().numpy().reshape(-1), '*', label = "predators_odeint")
     plt.plot(grid.cpu(), net(grid.cpu())[:,0].detach().numpy().reshape(-1), label='preys_NN')
     plt.plot(grid.cpu(), net(grid.cpu())[:,1].detach().numpy().reshape(-1), label='predators_NN')
     plt.xlabel('Time t, [days]')
     plt.ylabel('Population')
     plt.legend(loc='upper right')
-    plt.show()
+    plt.savefig(os.path.join(img_dir,'compare_{}_{}.png'.format(grid_res,part)))
+
 
     return exp_dict_list
 

From a65fbab65ca9f6809a7090e50c84fc25ea889fa4 Mon Sep 17 00:00:00 2001
From: SuperSashka <heretik.unlimited@gmail.com>
Date: Mon, 26 Aug 2024 12:10:29 +0300
Subject: [PATCH 5/6] batch size fix

---
 examples/example_Lotka_Volterra_paper.py | 20 +++++++++++---------
 tedeous/model.py                         |  3 +++
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/examples/example_Lotka_Volterra_paper.py b/examples/example_Lotka_Volterra_paper.py
index d87d51a1..c782f7ee 100644
--- a/examples/example_Lotka_Volterra_paper.py
+++ b/examples/example_Lotka_Volterra_paper.py
@@ -47,20 +47,22 @@ def __init__(self):
         self.width_out=[2]
 
         # Shared layers (base network)
-        self.shared_fc1 = torch.nn.Linear(1, 64)  # Input size of 1 (for t)
-        self.shared_fc2 = torch.nn.Linear(64, 32)
-        
+        self.shared_fc1 = torch.nn.Linear(1, 100)  # Input size of 1 (for t)
+        self.shared_fc2 = torch.nn.Linear(100, 100)
+        self.shared_fc3 = torch.nn.Linear(100, 100)
+        self.shared_fc4 = torch.nn.Linear(100, 100)
         # Output head for Process 1
-        self.process1_fc = torch.nn.Linear(32, 1)
+        self.process1_fc = torch.nn.Linear(100, 1)
         
         # Output head for Process 2
-        self.process2_fc = torch.nn.Linear(32, 1)
+        self.process2_fc = torch.nn.Linear(100, 1)
     
     def forward(self, t):
         # Shared layers forward pass
         x = torch.tanh(self.shared_fc1(t))
         x = torch.tanh(self.shared_fc2(x))
-        
+        x = torch.tanh(self.shared_fc3(x))
+        x = torch.tanh(self.shared_fc4(x))
         # Process 1 output head
         process1_out = self.process1_fc(x)
         
@@ -157,7 +159,7 @@ def Lotka_experiment(grid_res, CACHE):
 
 
 
-    model =  Model(net, domain, equation, boundaries)
+    model =  Model(net, domain, equation, boundaries, batch_size=64)
 
     model.compile("autograd", lambda_operator=1, lambda_bound=100)
     
@@ -230,7 +232,7 @@ def deriv(X, t, alpha, beta, delta, gamma):
     plt.xlabel('Time t, [days]')
     plt.ylabel('Population')
     plt.legend(loc='upper right')
-    plt.savefig(os.path.join(img_dir,'compare_{}_{}.png'.format(grid_res,part)))
+    plt.savefig(os.path.join(img_dir,'compare_{}.png'.format(grid_res)))
 
 
     return exp_dict_list
@@ -241,7 +243,7 @@ def deriv(X, t, alpha, beta, delta, gamma):
 
 CACHE=False
 
-for grid_res in range(60,101,10):
+for grid_res in range(60,1001,100):
     for _ in range(nruns):
         exp_dict_list.append(Lotka_experiment(grid_res,CACHE))
    
diff --git a/tedeous/model.py b/tedeous/model.py
index 30345f7f..53896d9c 100644
--- a/tedeous/model.py
+++ b/tedeous/model.py
@@ -93,6 +93,9 @@ def compile(
         self.equation_cls = Operator_bcond_preproc(grid, operator, bconds, h=h, inner_order=inner_order,
                                                    boundary_order=boundary_order).set_strategy(mode)
 
+        if len(grid)<self.batch_size:
+            self.batch_size=None
+
         self.solution_cls = Solution(grid, self.equation_cls, self.net, mode, weak_form,
                                      lambda_operator, lambda_bound, tol, derivative_points,
                                      batch_size=self.batch_size)

From 48df8c4fc0e5193593066892fb91b92d181cf74d Mon Sep 17 00:00:00 2001
From: SuperSashka <heretik.unlimited@gmail.com>
Date: Mon, 26 Aug 2024 14:49:24 +0300
Subject: [PATCH 6/6] Version update

---
 tedeous/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tedeous/version.py b/tedeous/version.py
index c9435752..e95e6343 100644
--- a/tedeous/version.py
+++ b/tedeous/version.py
@@ -1 +1 @@
-__version__ = '0.4.2'
\ No newline at end of file
+__version__ = '0.4.3'
\ No newline at end of file