deepmodeling · hellozhaoming · Oct 27, 2023 · Oct 27, 2023 · Oct 27, 2023 · njzjz
diff --git a/deepmd/common.py b/deepmd/common.py
@@ -125,6 +125,7 @@ def gelu_wrapper(x):
     "softplus": tf.nn.softplus,
     "sigmoid": tf.sigmoid,
     "tanh": tf.nn.tanh,
+    "swish": tf.nn.swish,
     "gelu": gelu,
     "gelu_tf": gelu_tf,
     "None": None,

diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
@@ -58,6 +58,8 @@
 )
 from deepmd.utils.learning_rate import (
     LearningRateExp,
+    LearningRateCos,
+    LearningRateCosRestarts,
 )
 from deepmd.utils.sess import (
     run_sess,
@@ -113,13 +115,21 @@ def get_lr_and_coef(lr_param):
                 scale_lr_coef = np.sqrt(self.run_opt.world_size).real
             else:
                 scale_lr_coef = 1.0
-            lr_type = lr_param.get("type", "exp")
-            if lr_type == "exp":
+            self.lr_type = lr_param.get("type", "exp")
+            if self.lr_type == "exp":
                 lr = LearningRateExp(
                     lr_param["start_lr"], lr_param["stop_lr"], lr_param["decay_steps"]
                 )
+            elif self.lr_type == "cos":
+                lr = LearningRateCos(
+                    lr_param["start_lr"], lr_param["stop_lr"], lr_param["decay_steps"]
+                )
+            elif self.lr_type == "cosrestart":
+                lr = LearningRateCosRestarts(
+                    lr_param["start_lr"], lr_param["stop_lr"], lr_param["decay_steps"]
+                )
             else:
-                raise RuntimeError("unknown learning_rate type " + lr_type)
+                raise RuntimeError("unknown learning_rate type " + self.lr_type)
             return lr, scale_lr_coef
 
         # learning rate
@@ -553,29 +563,31 @@ def train(self, train_data=None, valid_data=None):
         is_first_step = True
         self.cur_batch = cur_batch
         if not self.multi_task_mode:
-            log.info(
-                "start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e"
-                % (
-                    run_sess(self.sess, self.learning_rate),
-                    self.lr.value(cur_batch),
-                    self.lr.decay_steps_,
-                    self.lr.decay_rate_,
-                    self.lr.value(stop_batch),
-                )
-            )
-        else:
-            for fitting_key in self.fitting:
+            if self.lr_type == "exp":
                 log.info(
-                    "%s: start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e"
+                    "start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e"
                     % (
-                        fitting_key,
-                        run_sess(self.sess, self.learning_rate_dict[fitting_key]),
-                        self.lr_dict[fitting_key].value(cur_batch),
-                        self.lr_dict[fitting_key].decay_steps_,
-                        self.lr_dict[fitting_key].decay_rate_,
-                        self.lr_dict[fitting_key].value(stop_batch),
+                        run_sess(self.sess, self.learning_rate),
+                        self.lr.value(cur_batch),
+                        self.lr.decay_steps_,
+                        self.lr.decay_rate_,
+                        self.lr.value(stop_batch),
                     )
                 )
+        else:
+            for fitting_key in self.fitting:
+                if self.lr_type == "exp":
+                    log.info(
+                        "%s: start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e"
+                        % (
+                            fitting_key,
+                            run_sess(self.sess, self.learning_rate_dict[fitting_key]),
+                            self.lr_dict[fitting_key].value(cur_batch),
+                            self.lr_dict[fitting_key].decay_steps_,
+                            self.lr_dict[fitting_key].decay_rate_,
+                            self.lr_dict[fitting_key].value(stop_batch),
+                        )
+                    )
 
         prf_options = None
         prf_run_metadata = None

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
@@ -1010,13 +1010,42 @@ def learning_rate_exp():
     ]
     return args
 
+def learning_rate_cos():
+    doc_start_lr = "The learning rate the start of the training."
+    doc_stop_lr = "The desired learning rate at the end of the training."
+    doc_decay_steps = (
+        "Number of steps to decay over."
+    )
+
+    args = [
+        Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr),
+        Argument("stop_lr", float, optional=True, default=1e-8, doc=doc_stop_lr),
+        Argument("decay_steps", int, optional=True, default=100000, doc=doc_decay_steps),
+    ]
+    return args
+
+def learning_rate_cosrestarts():
+    doc_start_lr = "The learning rate the start of the training."
+    doc_stop_lr = "The desired learning rate at the end of the training."
+    doc_decay_steps = (
+        "Number of steps to decay over of the first decay."
+    )
+
+    args = [
+        Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr),
+        Argument("stop_lr", float, optional=True, default=1e-8, doc=doc_stop_lr),
+        Argument("decay_steps", int, optional=True, default=10000, doc=doc_decay_steps),
+    ]
+    return args
 
 def learning_rate_variant_type_args():
     doc_lr = "The type of the learning rate."
 
     return Variant(
         "type",
-        [Argument("exp", dict, learning_rate_exp())],
+        [Argument("exp", dict, learning_rate_exp()),
+         Argument("cos", dict, learning_rate_cos()),
+         Argument("cosrestart", dict, learning_rate_cosrestarts())],
         optional=True,
         default_tag="exp",
         doc=doc_lr,

diff --git a/deepmd/utils/learning_rate.py b/deepmd/utils/learning_rate.py
@@ -105,3 +105,174 @@ def start_lr(self) -> float:
     def value(self, step: int) -> float:
         """Get the lr at a certain step."""
         return self.start_lr_ * np.power(self.decay_rate_, (step // self.decay_steps_))
+
+class LearningRateCos:
+    r"""The cosine decaying learning rate.
+
+  The function returns the decayed learning rate.  It is computed as:
+  ```python
+  global_step = min(global_step, decay_steps)
+  cosine_decay = 0.5 * (1 + cos(pi * global_step / decay_steps))
+  decayed = (1 - alpha) * cosine_decay + alpha
+  decayed_learning_rate = learning_rate * decayed
+  ```
+
+    Parameters
+    ----------
+    start_lr
+            Starting learning rate 
+    stop_lr
+            Minimum learning rate value as a fraction of learning_rate.
+    decay_steps
+            Number of steps to decay over.
+    """
+
+    def __init__(
+        self,
+        start_lr: float,
+        stop_lr: float = 5e-8,
+        decay_steps: int = 100000,
+    ) -> None:
+        """Constructor."""
+        self.cd = {}
+        self.cd["start_lr"] = start_lr
+        self.cd["stop_lr"] = stop_lr
+        self.cd["decay_steps"] = decay_steps
+        self.start_lr_ = self.cd["start_lr"]
+        self.alpha_ = self.cd["stop_lr"]/self.cd["start_lr"]
+
+    def build(
+        self, global_step: tf.Tensor, stop_step: Optional[int] = None
+    ) -> tf.Tensor:
+        """Build the learning rate.
+
+        Parameters
+        ----------
+        global_step
+            The tf Tensor prividing the global training step
+        stop_step
+            The stop step.
+
+        Returns
+        -------
+        learning_rate
+            The learning rate
+        """
+        if stop_step is None:
+            self.decay_steps_ = (
+                self.cd["decay_steps"] if self.cd["decay_steps"] is not None else 100000
+            )
+        else:
+            self.stop_lr_ = (
+                self.cd["stop_lr"] if self.cd["stop_lr"] is not None else 5e-8
+            )
+            self.decay_steps_ = (
+                self.cd["decay_steps"]
+                if self.cd["decay_steps"] is not None
+                else stop_step
+            )
+
+        return tf.train.cosine_decay(
+            self.start_lr_,
+            global_step,
+            self.decay_steps_,
+            self.alpha_,
+            name="cosine",
+        )
+
+    def start_lr(self) -> float:
+        """Get the start lr."""
+        return self.start_lr_
+
+    def value(self, step: int) -> float:
+        """Get the lr at a certain step."""
+        step = min(step, self.decay_steps_)
+        cosine_decay = 0.5 * (1 + np.cos(np.pi * step / self.decay_steps_))
+        decayed = (1 - self.alpha_) * cosine_decay + self.alpha_
+        decayed_learning_rate = self.start_lr_ * decayed
+        return decayed_learning_rate
+
+
+class LearningRateCosRestarts:
+    r"""The cosine decaying restart learning rate.
+
+  The function returns the cosine decayed learning rate while taking into account
+  possible warm restarts.
+  ```
+
+    Parameters
+    ----------
+    start_lr
+            Starting learning rate 
+    stop_lr
+            Minimum learning rate value as a fraction of learning_rate.
+    decay_steps
+            Number of steps to decay over.
+    """
+
+    def __init__(
+        self,
+        start_lr: float,
+        stop_lr: float = 5e-8,
+        decay_steps: int = 10000,
+    ) -> None:
+        """Constructor."""
+        self.cd = {}
+        self.cd["start_lr"] = start_lr
+        self.cd["stop_lr"] = stop_lr
+        self.cd["decay_steps"] = decay_steps
+        self.start_lr_ = self.cd["start_lr"]
+        self.alpha_ = self.cd["stop_lr"]/self.cd["start_lr"]
+
+    def build(
+        self, global_step: tf.Tensor, stop_step: Optional[int] = None
+    ) -> tf.Tensor:
+        """Build the learning rate.
+
+        Parameters
+        ----------
+        global_step
+            The tf Tensor prividing the global training step
+        stop_step
+            The stop step.
+
+        Returns
+        -------
+        learning_rate
+            The learning rate
+        """
+        if stop_step is None:
+            self.decay_steps_ = (
+                self.cd["decay_steps"] if self.cd["decay_steps"] is not None else 10000
+            )
+        else:
+            self.stop_lr_ = (
+                self.cd["stop_lr"] if self.cd["stop_lr"] is not None else 5e-8
+            )
+            self.decay_steps_ = (
+                self.cd["decay_steps"]
+                if self.cd["decay_steps"] is not None
+                else stop_step
+            )
+
+
+
+        return tf.train.cosine_decay_restarts(
+            learning_rate=self.start_lr_,
+            global_step=global_step,
+            first_decay_steps=self.decay_steps_,
+            alpha=self.alpha_,
+            name="cosinerestart",
+        )
+
+    def start_lr(self) -> float:
+        """Get the start lr."""
+        return self.start_lr_
+
+    def value(self, step: int) -> float:
+        """Get the lr at a certain step. Need to revise later"""
+        step = min(step, self.decay_steps_)
+        cosine_decay = 0.5 * (1 + np.cos(np.pi * step / self.decay_steps_))
+        decayed = (1 - self.alpha_) * cosine_decay + self.alpha_
+        decayed_learning_rate = self.start_lr_ * decayed
+        return decayed_learning_rate