Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
ritalyu17 authored Feb 28, 2025
1 parent c2eb1fa commit 8b17dbb
Showing 1 changed file with 75 additions and 104 deletions.
179 changes: 75 additions & 104 deletions benchmarks/domains/Hardness.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
"""
@Time : 2024/09/30 11:17:24
@Author : Daniel Persaud
@Version : 1.0
@Contact : [email protected]
@Desc : Hardness benchmarking, a maximization task on experimental hardness dataset.
"""
# Hardness benchmarking, a maximization task on experimental hardness dataset.

from __future__ import annotations

Expand Down Expand Up @@ -32,61 +26,55 @@
ConvergenceExperimentSettings,
)

# Set up directory and load datasets
HomeDir = os.getcwd()
# Materials Project (MP) bulk modulus dataset
dfMP = pd.read_csv(os.path.join(HomeDir, "benchmarks", "domains", "mp_bulkModulus_goodOverlap.csv"), index_col=0)
# Experimental (Exp) hardness dataset
dfExp = pd.read_csv(os.path.join(HomeDir, "benchmarks", "domains", "exp_hardness_goodOverlap.csv"), index_col=0)
elementCols = dfExp.columns.to_list()[4:]

# IMPORT AND PREPROCESS DATA------------------------------------------------------------------------------
strHomeDir = os.getcwd()
dfMP = pd.read_csv(
os.path.join(strHomeDir, "benchmarks", "domains", "mp_bulkModulus_goodOverlap.csv"), index_col=0
)
dfExp = pd.read_csv(
os.path.join(strHomeDir, "benchmarks", "domains", "exp_hardness_goodOverlap.csv"), index_col=0
)
lstElementCols = dfExp.columns.to_list()[4:]

# ----- FUTHER CLEAN THE DATA BASED ON THE EDA -----
# initialize an empty dataframe to store the integrated hardness values
# Initialize an empty dataframe to store the integrated hardness values
dfExp_integratedHardness = pd.DataFrame()

# for each unique composition in dfExp, make a cubic spline interpolation of the hardness vs load curve
for strComposition_temp in dfExp["composition"].unique():
dfComposition_temp = dfExp[dfExp["composition"] == strComposition_temp]
# sort the data by load
dfComposition_temp = dfComposition_temp.sort_values(by="load")
dfComposition_temp = dfComposition_temp.drop_duplicates(subset="load")
if len(dfComposition_temp) < 5: # continue to the next composition
# For each unique composition in dfExp, make a cubic spline interpolation of the hardness vs load curve
for composition_i in dfExp["composition"].unique():
composition_subset = dfExp[dfExp["composition"] == composition_i]
# Sort the data by load
composition_subset = composition_subset.sort_values(by="load")
composition_subset = composition_subset.drop_duplicates(subset="load")
if len(composition_subset) < 5: # Continue to the next composition
continue

# make a cubic spline interpolation of the hardness vs load curve
spSpline_temp = sp.interpolate.CubicSpline(dfComposition_temp["load"], dfComposition_temp["hardness"])
# integrate the spline from the minimum load to the maximum load
fltIntegral_temp = spSpline_temp.integrate(0.5, 5, extrapolate = True)
# Perform cubic spline interpolation of the hardness vs load curve
spline = sp.interpolate.CubicSpline(composition_subset["load"], composition_subset["hardness"])
# Integrate the spline from the minimum load to the maximum load
integrated_value = spline.integrate(0.5, 5, extrapolate = True)

# make a new dataframe with the lstElementCols from dfComposition_temp
dfComposition_temp = dfComposition_temp[['strComposition', 'composition'] + lstElementCols]
dfComposition_temp = dfComposition_temp.drop_duplicates(subset='composition')
dfComposition_temp["integratedHardness"] = fltIntegral_temp
# Make a new dataframe with the elementCols from composition_subset
composition_summary = composition_subset[['strComposition', 'composition'] + elementCols]
composition_summary = composition_summary.drop_duplicates(subset='composition')
composition_summary["integratedHardness"] = integrated_value

dfExp_integratedHardness = pd.concat([dfExp_integratedHardness, dfComposition_temp])
dfExp_integratedHardness = pd.concat([dfExp_integratedHardness, composition_summary])

# ----- TARGET FUNCTION (INTEGRATED HARDNESS) -----
# make a dataframe for the task function (integrated hardness)
dfSearchSpace_target = dfExp_integratedHardness[lstElementCols]
# ----- Target function (integrated hardness) -----
dfSearchSpace_target = dfExp_integratedHardness[elementCols]
dfSearchSpace_target["Function"] = "targetFunction"

# make a lookup table for the task function (integrate hardness) - add the 'integratedHardness' column from dfExp to dfSearchSpace_task
# Make a lookup table for the task function (integrate hardness) - add the 'integratedHardness' column from dfExp to dfSearchSpace_task
dfLookupTable_target = pd.concat([dfSearchSpace_target, dfExp_integratedHardness["integratedHardness"]], axis=1)
dfLookupTable_target = dfLookupTable_target.rename(columns={"integratedHardness":"Target"})

# ----- SOURCE FUNCTION (VOIGT BULK MODULUS) -----
# make a dataframe for the source function (voigt bulk modulus)
dfSearchSpace_source = dfMP[lstElementCols]
# ----- Source function (voigt bulk modulus) -----
dfSearchSpace_source = dfMP[elementCols]
dfSearchSpace_source["Function"] = "sourceFunction"

# make a lookup table for the source function (voigt bulk modulus) - add the 'vrh' column from dfMP to dfSearchSpace_source
# Make a lookup table for the source function (voigt bulk modulus) - add the 'vrh' column from dfMP to dfSearchSpace_source
dfLookupTable_source = pd.concat([dfSearchSpace_source, dfMP["vrh"]], axis=1)
dfLookupTable_source = dfLookupTable_source.rename(columns={"vrh": "Target"})

# concatenate the two dataframes
# Combine the search space
dfSearchSpace = pd.concat([dfSearchSpace_target, dfSearchSpace_source])

def hardness(settings: ConvergenceExperimentSettings) -> DataFrame:
Expand All @@ -107,52 +95,46 @@ def hardness(settings: ConvergenceExperimentSettings) -> DataFrame:
Objective: Maximization
"""

lstParameters_bb = []
lstParameters_bb_noTask = []
parameters = []
parameters_noTask = []

# for each column in dfSearchSpace except the last one, create a NumericalDiscreteParameter
for strCol_temp in dfSearchSpace.columns[:-1]:
bbParameter_temp = NumericalDiscreteParameter(
name=strCol_temp,
values=np.unique(dfSearchSpace[strCol_temp]),
# For each column in dfSearchSpace except the last one, create a NumericalDiscreteParameter
for column in dfSearchSpace.columns[:-1]:
parameter_i = NumericalDiscreteParameter(
name=column,
values=np.unique(dfSearchSpace[column]),
tolerance=0.0,
)
# append the parameter to the list of parameters
lstParameters_bb.append(bbParameter_temp)
lstParameters_bb_noTask.append(bbParameter_temp)
parameters.append(parameter_i)
parameters_noTask.append(parameter_i)

# create a TaskParameter
bbTaskParameter = TaskParameter(
# Create TaskParameter
taskParameter = TaskParameter(
name="Function",
values=["targetFunction", "sourceFunction"],
active_values=["targetFunction"],
)
parameters.append(taskParameter)

# append the taskParameter to the list of parameters
lstParameters_bb.append(bbTaskParameter)

search_space = SearchSpace.from_dataframe(dfSearchSpace, parameters=lstParameters_bb)
SearchSpace_noTask = SearchSpace.from_dataframe(dfSearchSpace_target[lstElementCols], parameters=lstParameters_bb_noTask)
search_space = SearchSpace.from_dataframe(dfSearchSpace, parameters=parameters)
SearchSpace_noTask = SearchSpace.from_dataframe(dfSearchSpace_target[elementCols], parameters=parameters_noTask)

objective = NumericalTarget(name="Target", mode=TargetMode.MAX).to_objective()

scenarios: dict[str, Campaign] = {
"Random Recommender": Campaign(
searchspace=SearchSpace.from_dataframe(
dfSearchSpace_target[lstElementCols],
parameters=lstParameters_bb_noTask
dfSearchSpace_target[elementCols],
parameters=parameters_noTask
),
recommender=RandomRecommender(),
objective=objective,
),
"Default Recommender": Campaign(
searchspace=SearchSpace.from_dataframe(
dfSearchSpace,
parameters=lstParameters_bb,
),
searchspace=search_space,
objective=objective,
),
"noTask_bb": Campaign(
"No TaskParameter": Campaign(
searchspace=SearchSpace_noTask,
objective=objective,
),
Expand Down Expand Up @@ -186,44 +168,41 @@ def hardness_transfer_learning(settings: ConvergenceExperimentSettings) -> DataF
Objective: Maximization
"""

lstParameters_bb = []
lstParameters_bb_noTask = []
parameters = []
parameters_noTask = []

# for each column in dfSearchSpace except the last one, create a NumericalDiscreteParameter
for strCol_temp in dfSearchSpace.columns[:-1]:
bbParameter_temp = NumericalDiscreteParameter(
name=strCol_temp,
values=np.unique(dfSearchSpace[strCol_temp]),
# For each column in dfSearchSpace except the last one, create a NumericalDiscreteParameter
for column in dfSearchSpace.columns[:-1]:
parameter_i = NumericalDiscreteParameter(
name=column,
values=np.unique(dfSearchSpace[column]),
tolerance=0.0,
)
# append the parameter to the list of parameters
lstParameters_bb.append(bbParameter_temp)
lstParameters_bb_noTask.append(bbParameter_temp)
parameters.append(parameter_i)
parameters_noTask.append(parameter_i)

# create a TaskParameter
bbTaskParameter = TaskParameter(
# Create TaskParameter
taskParameter = TaskParameter(
name="Function",
values=["targetFunction", "sourceFunction"],
active_values=["targetFunction"],
)

# append the taskParameter to the list of parameters
lstParameters_bb.append(bbTaskParameter)
parameters.append(taskParameter)

objective = NumericalTarget(name="Target", mode=TargetMode.MAX).to_objective()

search_space = SearchSpace.from_dataframe(dfSearchSpace, parameters=parameters)
campaign = Campaign(searchspace=search_space, objective=objective)

# Use diff init data size ----------------------
# Create a list of dataframes with n samples from dfLookupTable_source to use as initial data
for n in (2, 4, 6, 30):
bbSearchSpace = SearchSpace.from_dataframe(dfSearchSpace, parameters=lstParameters_bb)
bbCampaign_temp = Campaign(
searchspace=bbSearchSpace,
objective=objective)
# create a list of dataframes with n samples from dfLookupTable_source to use as initial data
lstInitialData_temp = [dfLookupTable_source.sample(n) for _ in range(settings.n_mc_iterations)]
initialData_i = [dfLookupTable_source.sample(n) for _ in range(settings.n_mc_iterations)]

return simulate_scenarios(
{f"{n} Initial Data": bbCampaign_temp},
{f"{n} Initial Data": campaign},
dfLookupTable_target,
initial_data=lstInitialData_temp,
initial_data=initialData_i,
batch_size=settings.batch_size,
n_doe_iterations=settings.n_doe_iterations,
impute_mode="error",
Expand Down Expand Up @@ -252,18 +231,14 @@ def hardness_transfer_learning(settings: ConvergenceExperimentSettings) -> DataF

if __name__ == "__main__":

# describe the benchmark task
# Describe the benchmark task
print("Hardness benchmark is a maximization task on experimental hardness dataset. ")
print("The dataset is downselect to 94 composition with more than 5 hardness values. ")
print("The hardness values are integrated using cubic spline interpolation, and the task is to maximize the integrated hardness. ")
print("")
print("Hardness benchmark compares across random, default, and no task parameter set up. ")
print("")
print("The hardness values are integrated using cubic spline interpolation, and the task is to maximize the integrated hardness. \n")
print("Hardness benchmark compares across random, default, and no task parameter set up. \n")
print("Hardness transfer learning benchmark compares across different initialized data sizes. ")


# Visualize the Hardness value histogram
# initialize a subplot with 1 row and 1 column
# Visualize the Hardness value histogram
fig, ax = plt.subplots(
1, 1,
figsize=(8, 5),
Expand All @@ -272,13 +247,9 @@ def hardness_transfer_learning(settings: ConvergenceExperimentSettings) -> DataF
constrained_layout = True
)

# plot a histogram of the hardness values
# Plot a histogram of the hardness values
ax.hist(dfExp["hardness"], bins=20)

# add a title, x-aixs label, and y-axis label
ax.set_xlabel("Hardness")
ax.set_ylabel("Frequency")
ax.set_title("Integrated Hardness Distribution")

# add a grid
ax.grid()

0 comments on commit 8b17dbb

Please sign in to comment.