diff --git a/xopt/generators/bayesian/bayesian_generator.py b/xopt/generators/bayesian/bayesian_generator.py
index 452e273c..e53057aa 100644
--- a/xopt/generators/bayesian/bayesian_generator.py
+++ b/xopt/generators/bayesian/bayesian_generator.py
@@ -4,13 +4,14 @@
 import warnings
 from abc import ABC, abstractmethod
 from copy import deepcopy
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 
 import pandas as pd
 import torch
 from botorch.acquisition import FixedFeatureAcquisitionFunction, qUpperConfidenceBound
 from botorch.models.model import Model
 from botorch.sampling import get_sampler
+from botorch.utils.multi_objective import is_non_dominated
 from botorch.utils.multi_objective.box_decompositions import DominatedPartitioning
 from gpytorch import Module
 from pydantic import Field, field_validator, PositiveInt, SerializeAsAny
@@ -407,8 +408,18 @@ def propose_candidates(self, model, n_candidates=1):
         # get acquisition function
         acq_funct = self.get_acquisition(model)
 
-        # get candidates
-        candidates = self.numerical_optimizer.optimize(acq_funct, bounds, n_candidates)
+        # get initial candidates to start acquisition function optimization
+        initial_points = self._get_initial_conditions(n_candidates)
+
+        # get candidates -- grid optimizer does not support batch_initial_conditions
+        if isinstance(self.numerical_optimizer, GridOptimizer):
+            candidates = self.numerical_optimizer.optimize(
+                acq_funct, bounds, n_candidates
+            )
+        else:
+            candidates = self.numerical_optimizer.optimize(
+                acq_funct, bounds, n_candidates, batch_initial_conditions=initial_points
+            )
         return candidates
 
     def get_training_data(self, data: pd.DataFrame) -> pd.DataFrame:
@@ -536,6 +547,11 @@ def visualize_model(self, **kwargs):
         """displays the GP models"""
         return visualize_generator_model(self, **kwargs)
 
+    def _get_initial_conditions(self, n_candidates=1) -> Union[Tensor, None]:
+        """overwrite if algorithm should specifiy initial candidates for optimizing
+        the acquisition function"""
+        return None
+
     def _process_candidates(self, candidates: Tensor):
         """process pytorch candidates from optimizing the acquisition function"""
         logger.debug(f"Best candidate from optimize {candidates}")
@@ -776,31 +792,52 @@ def torch_reference_point(self):
 
         return torch.tensor(pt, **self._tkwargs)
 
-    def calculate_hypervolume(self):
-        """compute hypervolume given data"""
-        objective_data = torch.tensor(
-            self.vocs.objective_data(self.data, return_raw=True).to_numpy()
+    def _get_scaled_data(self):
+        """get scaled input/objective data for use with botorch logic which assumes
+        maximization for each objective"""
+        var_df, obj_df, _, _ = self.vocs.extract_data(
+            self.data, return_valid=True, return_raw=True
         )
 
-        # hypervolume must only take into account feasible data points
-        if self.vocs.n_constraints > 0:
-            objective_data = objective_data[
-                self.vocs.feasibility_data(self.data)["feasible"].to_list()
-            ]
+        variable_data = torch.tensor(var_df[self.vocs.variable_names].to_numpy())
+        objective_data = torch.tensor(obj_df[self.vocs.objective_names].to_numpy())
+        weights = set_botorch_weights(self.vocs).to(**self._tkwargs)[
+            : self.vocs.n_objectives
+        ]
+        return variable_data, objective_data * weights, weights
 
-        n_objectives = self.vocs.n_objectives
-        weights = torch.zeros(n_objectives)
-        weights = set_botorch_weights(self.vocs).to(**self._tkwargs)
-        objective_data = objective_data * weights
+    def calculate_hypervolume(self):
+        """compute hypervolume given data"""
 
         # compute hypervolume
         bd = DominatedPartitioning(
-            ref_point=self.torch_reference_point, Y=objective_data
+            ref_point=self.torch_reference_point, Y=self._get_scaled_data()[1]
         )
         volume = bd.compute_hypervolume().item()
 
         return volume
 
+    def get_pareto_front(self):
+        """compute the pareto front x/y values given data"""
+        variable_data, objective_data, weights = self._get_scaled_data()
+        obj_data = torch.vstack(
+            (self.torch_reference_point.unsqueeze(0), objective_data)
+        )
+        var_data = torch.vstack(
+            (
+                torch.full_like(variable_data[0], float("Nan")).unsqueeze(0),
+                variable_data,
+            )
+        )
+        non_dominated = is_non_dominated(obj_data)
+
+        # note need to undo weights for real number output
+        # only return values if non nan values exist
+        if torch.all(torch.isnan(var_data[non_dominated])):
+            return None, None
+        else:
+            return var_data[non_dominated], obj_data[non_dominated] / weights
+
 
 def formatted_base_docstring():
     return "\nBase Generator\n---------------\n" + BayesianGenerator.__doc__
diff --git a/xopt/generators/bayesian/mobo.py b/xopt/generators/bayesian/mobo.py
index c82df309..7e399999 100644
--- a/xopt/generators/bayesian/mobo.py
+++ b/xopt/generators/bayesian/mobo.py
@@ -1,16 +1,29 @@
+from typing import Union
+
+import torch
 from botorch.acquisition import FixedFeatureAcquisitionFunction
 from botorch.acquisition.multi_objective import qNoisyExpectedHypervolumeImprovement
 from botorch.acquisition.multi_objective.logei import (
     qLogNoisyExpectedHypervolumeImprovement,
 )
+from botorch.utils import draw_sobol_samples
+from pydantic import Field
+from torch import Tensor
 
 from xopt.generators.bayesian.bayesian_generator import MultiObjectiveBayesianGenerator
 
 from xopt.generators.bayesian.objectives import create_mobo_objective
+from xopt.numerical_optimizer import LBFGSOptimizer
 
 
 class MOBOGenerator(MultiObjectiveBayesianGenerator):
     name = "mobo"
+    supports_batch_generation: bool = True
+    use_pf_as_initial_points: bool = Field(
+        False,
+        description="flag to specify if pareto front points are to be used during "
+        "optimization of the acquisition function",
+    )
     __doc__ = """Implements Multi-Objective Bayesian Optimization using the Expected
             Hypervolume Improvement acquisition function"""
 
@@ -64,3 +77,59 @@ def _get_acquisition(self, model):
         )
 
         return acq
+
+    def _get_initial_conditions(self, n_candidates=1) -> Union[Tensor, None]:
+        """
+        generate initial candidates for optimizing the acquisition function based on
+        the pareto front
+
+        If `use_pf_as_initial_points` flag is set to true then the current
+        Pareto-optimal set is used as initial points for optimizing the acquisition
+        function instead of randomly selected points (random points fill in the set
+        if `num_restarts` is greater than the number of points in the Pareto set).
+
+        Returns:
+            A `num_restarts x q x d` tensor of initial conditions.
+
+        """
+        if self.use_pf_as_initial_points:
+            if isinstance(self.numerical_optimizer, LBFGSOptimizer):
+                bounds = self._get_optimization_bounds()
+                num_restarts = self.numerical_optimizer.n_restarts
+
+                pf_locations, _ = self.get_pareto_front()
+
+                # if there is no pareto front just return None to revert back to
+                # default behavior
+                if pf_locations is None:
+                    return None
+
+                initial_points = torch.clone(pf_locations)
+
+                # add the q dimension
+                initial_points = initial_points.unsqueeze(1)
+                initial_points = initial_points.expand([-1, n_candidates, -1])
+
+                # initial_points must equal the number of restarts
+                if len(initial_points) < num_restarts:
+                    # add random points to the list inside the bounds
+                    sobol_samples = draw_sobol_samples(
+                        bounds, num_restarts - len(initial_points), n_candidates
+                    )
+
+                    initial_points = torch.cat([initial_points, sobol_samples])
+                elif len(initial_points) > num_restarts:
+                    # if there are too many select the first `num_restarts` points at
+                    # random
+                    initial_points = initial_points[
+                        torch.randperm(len(initial_points))
+                    ][:num_restarts]
+
+                return initial_points
+            else:
+                raise RuntimeWarning(
+                    "cannot use PF as initial optimization points "
+                    "for non-LBFGS optimizers, ignoring flag"
+                )
+
+        return None
diff --git a/xopt/numerical_optimizer.py b/xopt/numerical_optimizer.py
index b48e80f1..c697ae78 100644
--- a/xopt/numerical_optimizer.py
+++ b/xopt/numerical_optimizer.py
@@ -15,7 +15,9 @@ class NumericalOptimizer(XoptBaseModel, ABC):
     model_config = ConfigDict(extra="forbid")
 
     @abstractmethod
-    def optimize(self, function: AcquisitionFunction, bounds: Tensor, n_candidates=1):
+    def optimize(
+        self, function: AcquisitionFunction, bounds: Tensor, n_candidates=1, **kwargs
+    ):
         """optimize a function to produce a number of candidate points that
         minimize the function"""
         pass
@@ -33,7 +35,7 @@ class LBFGSOptimizer(NumericalOptimizer):
 
     model_config = ConfigDict(validate_assignment=True)
 
-    def optimize(self, function, bounds, n_candidates=1):
+    def optimize(self, function, bounds, n_candidates=1, **kwargs):
         assert isinstance(bounds, Tensor)
         if len(bounds) != 2:
             raise ValueError("bounds must have the shape [2, ndim]")
@@ -46,6 +48,7 @@ def optimize(self, function, bounds, n_candidates=1):
             num_restarts=self.n_restarts,
             timeout_sec=self.max_time,
             options={"maxiter": self.max_iter},
+            **kwargs,
         )
         return candidates
 
diff --git a/xopt/resources/test_functions/zdt.py b/xopt/resources/test_functions/zdt.py
new file mode 100644
index 00000000..cae2a8e9
--- /dev/null
+++ b/xopt/resources/test_functions/zdt.py
@@ -0,0 +1,51 @@
+import numpy as np
+
+from xopt import VOCS
+
+
+def construct_zdt(n_dims, problem_index=1):
+    """construct Xopt versions of the multiobjective ZDT test problems"""
+    vocs = VOCS(
+        variables={f"x{i + 1}": [0, 1] for i in range(n_dims)},
+        objectives={"f1": "MINIMIZE", "f2": "MINIMIZE"},
+    )
+
+    if problem_index == 1:
+        # ZDT1
+        def evaluate(input_dict):
+            x = np.array([input_dict[f"x{i + 1}"] for i in range(n_dims)])
+
+            f1 = x[0]
+            g = 1 + (9 / (n_dims - 1)) * np.sum(x[1:])
+            h = 1 - np.sqrt(f1 / g)
+            f2 = g * h
+
+            return {"f1": f1, "f2": f2, "g": g}
+    elif problem_index == 2:
+
+        def evaluate(input_dict):
+            x = np.array([input_dict[f"x{i + 1}"] for i in range(n_dims)])
+
+            f1 = x[0]
+            g = 1 + (9 / (n_dims - 1)) * np.sum(x[1:])
+            h = 1 - (f1 / g) ** 2
+            f2 = g * h
+
+            return {"f1": f1, "f2": f2, "g": g}
+    elif problem_index == 3:
+
+        def evaluate(input_dict):
+            x = np.array([input_dict[f"x{i + 1}"] for i in range(n_dims)])
+
+            f1 = x[0]
+            g = 1 + (9 / (n_dims - 1)) * np.sum(x[1:])
+            h = 1 - np.sqrt(f1 / g) - (f1 / g) * np.sin(10 * np.pi * f1)
+            f2 = g * h
+
+            return {"f1": f1, "f2": f2, "g": g}
+    else:
+        raise NotImplementedError()
+
+    reference_point = {"f1": 1.0, "f2": 1.0}
+
+    return vocs, evaluate, reference_point
diff --git a/xopt/tests/generators/bayesian/test_mobo.py b/xopt/tests/generators/bayesian/test_mobo.py
index 762d3ab0..fd8229ca 100644
--- a/xopt/tests/generators/bayesian/test_mobo.py
+++ b/xopt/tests/generators/bayesian/test_mobo.py
@@ -3,6 +3,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+import torch
 from botorch.acquisition.multi_objective.logei import (
     qLogNoisyExpectedHypervolumeImprovement,
 )
@@ -46,6 +47,83 @@ def test_script(self):
             X.random_evaluate(3)
             X.step()
 
+    def test_parallel(self):
+        vocs = deepcopy(TEST_VOCS_BASE)
+        vocs.objectives.update({"y2": "MINIMIZE"})
+        vocs.constraints = {}
+
+        test_data = pd.DataFrame(
+            {
+                "x1": [0.1, 0.2, 0.4, 0.4],
+                "x2": [0.1, 0.2, 0.3, 0.2],
+                "y1": [1.0, 2.0, 1.0, 0.0],
+                "y2": [0.5, 0.1, 1.0, 1.5],
+            }
+        )
+        reference_point = {"y1": 10.0, "y2": 1.5}
+        gen = MOBOGenerator(
+            vocs=vocs,
+            reference_point=reference_point,
+            use_pf_as_initial_points=True,
+        )
+        gen.add_data(test_data)
+
+        gen.generate(2)
+
+    def test_pareto_front_calculation(self):
+        vocs = deepcopy(TEST_VOCS_BASE)
+        vocs.objectives.update({"y2": "MINIMIZE"})
+        vocs.constraints = {}
+
+        test_data = pd.DataFrame(
+            {
+                "x1": [0.1, 0.2, 0.4, 0.4],
+                "x2": [0.1, 0.2, 0.3, 0.2],
+                "y1": [1.0, 2.0, 1.0, 0.0],
+                "y2": [0.5, 0.1, 1.0, 1.5],
+            }
+        )
+        reference_point = {"y1": 10.0, "y2": 1.5}
+        gen = MOBOGenerator(
+            vocs=vocs,
+            reference_point=reference_point,
+            use_pf_as_initial_points=True,
+        )
+        gen.add_data(test_data)
+
+        pfx, pfy = gen.get_pareto_front()
+        assert torch.allclose(
+            torch.tensor([[0.1, 0.2, 0.4], [0.1, 0.2, 0.2]], dtype=torch.double).T, pfx
+        )
+        assert torch.allclose(
+            torch.tensor([[1.0, 2.0, 0.0], [0.5, 0.1, 1.5]], dtype=torch.double).T, pfy
+        )
+
+        # test with constraints
+        vocs.constraints = {"c1": ["GREATER_THAN", 0.5]}
+        test_data = pd.DataFrame(
+            {
+                "x1": [0.1, 0.2, 0.4, 0.4],
+                "x2": [0.1, 0.2, 0.3, 0.2],
+                "y1": [1.0, 2.0, 1.0, 0.0],
+                "y2": [0.5, 0.1, 1.0, 1.5],
+                "c1": [1.0, 1.0, 1.0, 0.0],
+            }
+        )
+        gen = MOBOGenerator(
+            vocs=vocs,
+            reference_point=reference_point,
+            use_pf_as_initial_points=True,
+        )
+        gen.add_data(test_data)
+        pfx, pfy = gen.get_pareto_front()
+        assert torch.allclose(
+            torch.tensor([[0.1, 0.2], [0.1, 0.2]], dtype=torch.double).T, pfx
+        )
+        assert torch.allclose(
+            torch.tensor([[1.0, 2.0], [0.5, 0.1]], dtype=torch.double).T, pfy
+        )
+
     def test_hypervolume_calculation(self):
         vocs = deepcopy(TEST_VOCS_BASE)
         vocs.objectives.update({"y2": "MINIMIZE"})
@@ -71,6 +149,80 @@ def test_hypervolume_calculation(self):
 
         assert gen.calculate_hypervolume() == 0.0
 
+    def test_initial_conditions(self):
+        vocs = deepcopy(TEST_VOCS_BASE)
+        vocs.objectives.update({"y2": "MINIMIZE"})
+        vocs.constraints = {}
+
+        test_data = pd.DataFrame(
+            {
+                "x1": [0.1, 0.2, 0.4, 0.4],
+                "x2": [0.1, 0.2, 0.3, 0.2],
+                "y1": [1.0, 2.0, 1.0, 0.0],
+                "y2": [0.5, 0.1, 1.0, 1.5],
+            }
+        )
+        reference_point = {"y1": 10.0, "y2": 1.5}
+        gen = MOBOGenerator(
+            vocs=vocs,
+            reference_point=reference_point,
+            use_pf_as_initial_points=True,
+        )
+        gen.numerical_optimizer.max_time = 1.0
+        gen.add_data(test_data)
+        initial_points = gen._get_initial_conditions()
+
+        assert torch.allclose(
+            torch.tensor([[0.1, 0.2, 0.4], [0.1, 0.2, 0.2]], dtype=torch.double).T,
+            initial_points[:3].squeeze(),
+        )
+        assert len(initial_points) == gen.numerical_optimizer.n_restarts
+        gen.generate(1)
+
+        # try with a small number of n_restarts
+        gen.numerical_optimizer.n_restarts = 1
+        initial_points = gen._get_initial_conditions()
+        assert len(initial_points) == 1
+        gen.generate(1)
+
+        # try with no points on the pareto front
+        gen.reference_point = {"y1": 0.0, "y2": 0.0}
+        gen.numerical_optimizer.n_restarts = 20
+
+        initial_points = gen._get_initial_conditions()
+        assert initial_points is None
+        gen.generate(1)
+
+        # test with constraints
+        vocs.constraints = {"c1": ["GREATER_THAN", 0.5]}
+        test_data = pd.DataFrame(
+            {
+                "x1": [0.1, 0.2, 0.4, 0.4, 0.15],
+                "x2": [0.1, 0.2, 0.3, 0.2, 0.15],
+                "y1": [1.0, 2.0, 1.0, 0.0, 1.5],
+                "y2": [0.5, 0.1, 1.0, 1.5, 0.25],
+                "c1": [1.0, 1.0, 1.0, 1.0, 0.0],
+            }
+        )
+        gen = MOBOGenerator(
+            vocs=vocs,
+            reference_point=reference_point,
+            use_pf_as_initial_points=True,
+        )
+        gen.add_data(test_data)
+        gen.numerical_optimizer.max_time = 1.0
+
+        # make sure that no invalid points make it into the initial conditions
+        ic = gen._get_initial_conditions()
+        assert not torch.allclose(
+            ic[:4],
+            torch.tensor(((0.1, 0.1), (0.2, 0.2), (0.4, 0.2), (0.15, 0.15)))
+            .reshape(4, 1, 2)
+            .double(),
+        )
+
+        gen.generate(1)
+
     def test_log_mobo(self):
         evaluator = Evaluator(function=evaluate_TNK)
         reference_point = tnk_reference_point
@@ -87,6 +239,7 @@ def test_log_mobo(self):
             dump = ele.model_dump()
             generator = MOBOGenerator(vocs=tnk_vocs, **dump)
             X = Xopt(generator=generator, evaluator=evaluator, vocs=tnk_vocs)
+            X.generator.numerical_optimizer.max_iter = 1
             X.random_evaluate(3)
             X.step()
 
diff --git a/xopt/tests/test_numerical_optimizer.py b/xopt/tests/test_numerical_optimizer.py
index 41271576..024d4e20 100644
--- a/xopt/tests/test_numerical_optimizer.py
+++ b/xopt/tests/test_numerical_optimizer.py
@@ -1,3 +1,4 @@
+import time
 from copy import deepcopy
 from unittest.mock import patch
 
@@ -29,11 +30,13 @@ def test_lbfgs_optimizer(self):
                 assert candidates.shape == torch.Size([ncandidate, ndim])
 
         # test max time
-        max_time_optimizer = LBFGSOptimizer(max_time=0.1)
+        max_time_optimizer = LBFGSOptimizer(max_time=0.01)
         ndim = 1
         bounds = torch.stack((torch.zeros(ndim), torch.ones(ndim)))
         for ncandidate in [1, 3]:
+            start_time = time.time()
             candidates = max_time_optimizer.optimize(f, bounds, ncandidate)
+            assert time.time() - start_time < 0.01
             assert candidates.shape == torch.Size([ncandidate, ndim])
 
     def test_grid_optimizer(self):
diff --git a/xopt/tests/test_vocs.py b/xopt/tests/test_vocs.py
index fcc0486f..f85ff519 100644
--- a/xopt/tests/test_vocs.py
+++ b/xopt/tests/test_vocs.py
@@ -346,3 +346,37 @@ def test_normalize_inputs(self):
             vocs.denormalize_inputs(normed_data)[vocs.variable_names].to_numpy(),
             test_data[vocs.variable_names].to_numpy(),
         ).all()
+
+    def test_extract_data(self):
+        vocs = deepcopy(TEST_VOCS_BASE)
+        test_data = pd.DataFrame(
+            {
+                "x1": [0.1, 0.2, 0.4, 0.4],
+                "x2": [0.1, 0.2, 0.3, 0.2],
+                "c1": [1.0, 0.0, 1.0, 0.0],
+                "y1": [0.5, 0.1, 1.0, 1.5],
+            }
+        )
+
+        # test default functionality
+        data = vocs.extract_data(test_data)
+        assert set(data[0].keys()) == {"x1", "x2"}
+        assert set(data[1].keys()) == {"y1"}
+        assert set(data[2].keys()) == {"c1"}
+        for ele in data[:-1]:  # ignore observable data
+            assert len(ele) == 4
+
+        # test return_valid functionality
+        data = vocs.extract_data(test_data, return_valid=True)
+        assert data[0]["x1"].to_list() == [0.1, 0.4]
+        assert data[1]["y1"].to_list() == [0.5, 1.0]
+
+        for ele in data[:-1]:  # ignore observable data
+            assert len(ele) == 2
+
+        # test w/o constraints
+        vocs = deepcopy(TEST_VOCS_BASE)
+        vocs.constraints = {}
+        data = vocs.extract_data(test_data)
+        assert len(data[0]) == 4
+        assert data[2].empty
diff --git a/xopt/vocs.py b/xopt/vocs.py
index 8dbb4f4a..3b67563c 100644
--- a/xopt/vocs.py
+++ b/xopt/vocs.py
@@ -530,7 +530,7 @@ def validate_input_data(self, input_points: pd.DataFrame) -> None:
         """
         validate_input_data(self, input_points)
 
-    def extract_data(self, data: pd.DataFrame, return_raw=False):
+    def extract_data(self, data: pd.DataFrame, return_raw=False, return_valid=False):
         """
         split dataframe into seperate dataframes for variables, objectives and
         constraints based on vocs - objective data is transformed based on
@@ -542,6 +542,9 @@ def extract_data(self, data: pd.DataFrame, return_raw=False):
                 Dataframe to be split
             return_raw : bool, optional
                 If True, return untransformed objective data
+            return_valid : bool, optional
+                If True, only return data that satisfies all of the contraint
+                conditions.
 
         Returns
         -------
@@ -555,7 +558,18 @@ def extract_data(self, data: pd.DataFrame, return_raw=False):
         variable_data = self.variable_data(data, "")
         objective_data = self.objective_data(data, "", return_raw)
         constraint_data = self.constraint_data(data, "")
-        return variable_data, objective_data, constraint_data
+        observable_data = self.observable_data(data, "")
+
+        if return_valid:
+            feasible_status = self.feasibility_data(data)["feasible"]
+            return (
+                variable_data[feasible_status],
+                objective_data[feasible_status],
+                constraint_data[feasible_status],
+                observable_data[feasible_status],
+            )
+
+        return variable_data, objective_data, constraint_data, observable_data
 
     def select_best(self, data: pd.DataFrame, n: int = 1):
         """