Bayesian Optimization Hyperband (Sequential)
Within the EGONN framework, different HP tuning frequencies are explored: maintaining initially tuned HPs (HPO-static), tuning every five iterations (HPO-5itr), and tuning every iteration (HPO-1itr). These strategies aim to balance dynamic HP adjustments with time efficiency. HPO is conducted using Bayesian optimization Hyperband, building on the successful application of Bayesian optimization Hyperband in the one-shot method with the two-dimensional Branin function.
The EGONN method starts with an initial sample size of 10 and adds 45 infill points, using a Halton sequence to ensure consistent starting points. The study compares various HPO strategies within EGONN against the traditional EGO method as a benchmark. Initially, all HPs are optimized, followed by a focused optimization of a subset based on preliminary findings. The evaluation highlights convergence performance and time efficiency across the methods
The following piece of code offers an example:
from imp_functions_bohb_workers import *
from scipy import optimize
import argparse
import subprocess
import time
import pickle
from scipy.io import savemat
from scipy.stats import qmc
from sklearn.model_selection import train_test_split
import numpy as np
import logging
import hpbandster.core.nameserver as hpns
#logging.WARNING has less log
# logging.basicConfig(level=logging.DEBUG)
logging.getLogger('hpbandster').setLevel(logging.WARNING)
argparser = argparse.ArgumentParser()
argparser.add_argument("--train_size", type=int, default=10)
argparser.add_argument("--budget", default=50, type=int)
argparser.add_argument("--runs", type=int, default=1)
argparser.add_argument('--n_workers', type=int, default=10)
args = argparser.parse_args()
# Setting bohb parameters
iteration = 40
min_b = 5
max_b = 30
# Number of points/dim
num_dim = 3 # no of dimensions
num_mean_var = 30
num_mean = 10
num_var = num_mean_var - num_mean
budget = args.budget # Maximum number of iterations/infill points
print("###############################################", flush=True)
print("Mean training size: {}".format(num_mean), flush=True)
print("Variance training size: {}".format(num_var), flush=True)
print("Budget: {}".format(budget), flush=True)
print("Parallel worker size: {}".format(args.n_workers), flush=True)
print("###############################################", flush=True)
# Bounds
ub = np.array([1, 1, 1]) # upper bounds for both dimensions
lb = np.array([0, 0, 0]) # lower bounds for both dimensions
bounds = []
for i in np.arange(len(lb)):
bounds.append((lb[i], ub[i]))
u_bounds = [1, 1, 1]
l_bounds = [0, 0, 0]
sampler_mean_var = qmc.Halton(d=num_dim, scramble=False)
sample_mean_var = sampler_mean_var.random(n=num_mean_var)
x_mean_var = qmc.scale(sample_mean_var, l_bounds, u_bounds)
x_mean = x_mean_var[:num_mean]
y_mean = np.array([HARTMANN3(x) for x in x_mean])
x_var = x_mean_var[num_mean:]
y_var = np.array([HARTMANN3(x) for x in x_var])
# Storing the data
data = {
'x': x_mean,
'y': y_mean
}
# Saving the data
savemat('doe.mat', data)
training_nrmse = np.zeros((budget, 1))
training_nrmse_mean = np.zeros((budget, 1))
training_nrmse_var = np.zeros((budget, 1))
store_times = []
activation = np.zeros((budget, 1))
epoch = np.zeros((budget, 1))
layer = np.zeros((budget, 1))
neuron = np.zeros((budget, 1))
var_activation = np.zeros((budget, 1))
var_epoch = np.zeros((budget, 1))
var_layer = np.zeros((budget, 1))
var_neuron = np.zeros((budget, 1))
var_learning_rate = np.zeros((budget, 1))
var_activation = np.zeros((budget, 1))
var_epoch = np.zeros((budget, 1))
loss = np.zeros((budget, 1))
var_loss = np.zeros((budget, 1))
# ----------------------------------------------------------------------------
# EGONN Loop
# ----------------------------------------------------------------------------
# n = 1
n = 5
for itr in range(budget):
print("\nInfill iteration: {}".format(itr + 1))
print("Optimizing hyperparameters and training mean NN", flush=True)
tic = time.time()
######### Train the mean NN
if itr == 0:
# if itr % n == 0:
# Split the training data into training and cross-validation data
x_train, x_cv, y_train, y_cv = train_test_split(x_mean, y_mean, test_size=0.2)
# Serialize training data to pass to workers
data = {
'x_train': x_train,
'y_train': y_train,
'x_cv': x_cv,
'y_cv': y_cv
}
with open('train_data.pkl', 'wb') as f:
pickle.dump(data, f)
# Start a nameserver
NS = hpns.NameServer(run_id='bohb', host='127.0.0.1', port=None)
NS.start()
# Start worker processes using subprocess
worker_processes = []
for i in range(args.n_workers):
# Open imp_function located in two level up from the current directory
worker_cmd = f'python ../../imp_functions_bohb_workers.py --run_id bohb --host 127.0.0.1 --worker {i}'
worker_processes.append(subprocess.Popen(worker_cmd, shell=True))
# Give workers some time to start up and register
time.sleep(5)
# Run hpbandster
res = run_hpbandster(x_train, y_train, x_cv, y_cv, iteration, min_b, max_b)
# Extract the best configuration
best_config = res.get_id2config_mapping()[res.get_incumbent_id()]['config']
best_loss = res.get_runs_by_id(res.get_incumbent_id())[-1]['loss']
best_loss = np.array(best_loss)
print(f"Best objective (loss for training + CV): {best_loss}", flush=True)
print(f"Best hyperparameters: {best_config}", flush=True)
opt_params = {
"num_epochs": best_config["epoch"],
"activation": best_config["act"],
"num_hidden_layers": best_config["layer"],
"num_neurons": best_config["neuron"]
}
# Get the model
mean_NN, x_transform, y_transform = train(x_train, y_train, opt_params)
else:
mean_NN, x_transform, y_transform = train(x_mean, y_mean, opt_params)
########## Train the var NN
print("Optimizing hyperparameters and training var NN", flush=True)
# Predict value at x_mean
y_mean_pred = predict(x_mean, x_transform, y_transform, mean_NN)
# Calculate the error (s2) for first dataset
error_mean = y_mean - y_mean_pred
# Rescaling the predicted y at x_var for calculating the error
y_var_pred = predict(x_var, x_transform, y_transform, mean_NN)
# Calculate the error (s2) for second dataset
error_var = y_var - y_var_pred
# Stack the error data
s2_var = np.vstack((error_mean, error_var))
if itr == 0:
# if itr % n == 0:
# Split the training data into training and cross-validation data
x_train, x_cv, y_train, y_cv = train_test_split(np.vstack((x_mean, x_var)), s2_var, test_size=0.2)
# Serialize training data to pass to workers
data = {
'x_train': x_train,
'y_train': y_train,
'x_cv': x_cv,
'y_cv': y_cv
}
with open('train_data.pkl', 'wb') as f:
pickle.dump(data, f)
# Start a nameserver
NS = hpns.NameServer(run_id='bohb', host='127.0.0.1', port=None)
NS.start()
# Start worker processes using subprocess
worker_processes = []
for i in range(args.n_workers):
# Open imp_function located in two level up from the current directory
worker_cmd = f'python ../../imp_functions_bohb_workers.py --run_id bohb --host 127.0.0.1 --worker {i}'
worker_processes.append(subprocess.Popen(worker_cmd, shell=True))
# Give workers some time to start up and register
time.sleep(5)
# Run hpbandster
res = run_hpbandster(x_train, y_train, x_cv, y_cv, iteration, min_b, max_b)
# Extract the best configuration
best_config_var = res.get_id2config_mapping()[res.get_incumbent_id()]['config']
best_loss_var = res.get_runs_by_id(res.get_incumbent_id())[-1]['loss']
best_loss_var = np.array(best_loss_var)
print(f"Best objective (loss for training + CV): {best_loss_var}", flush=True)
print(f"Best hyperparameters: {best_config_var}", flush=True)
opt_params_var = {
"num_epochs": best_config_var["epoch"],
"activation": best_config_var["act"],
"num_hidden_layers": best_config_var["layer"],
"num_neurons": best_config_var["neuron"]
}
# Get the model
var_NN, x_s2_transform, s2_transform = train(np.vstack((x_mean, x_var)), s2_var, opt_params_var)
else:
var_NN, x_s2_transform, s2_transform = train(np.vstack((x_mean, x_var)), s2_var, opt_params_var)
toc = time.time()
times_bohb = toc - tic
store_times.append(times_bohb)
print(f"Elapsed time for HPO : {times_bohb} seconds")
# Transform the data
x_train = x_transform.transform(x_train)
# Predict at training data
y_pred = mean_NN(x_train)
# Transform back to original scale
x_train = x_transform.inverse_transform(x_train)
y_pred = y_transform.inverse_transform(y_pred)
training_loss = np.sqrt(mean_squared_error(y_train, y_pred) / np.ptp(y_train))
s2_var_pred = predict(np.vstack((x_mean, x_var)), x_s2_transform, s2_transform, var_NN)
training_loss_mean = np.sqrt(mean_squared_error(y_mean, y_mean_pred)/np.ptp(y_mean))
training_loss_var = np.sqrt(mean_squared_error(s2_var, s2_var_pred)/np.ptp(s2_var))
print("Training NRMSE for mean NN: {}".format(training_loss), flush=True)
print("Training NRMSE2 for mean NN: {}".format(training_loss_mean), flush=True)
print("Training NRMSE for var NN: {}".format(training_loss_var), flush=True)
# ----------------------------------------------------------------------------
# Minimizing the EI
# ----------------------------------------------------------------------------
print("######################## Adding Infill {} of {}".format(itr + 1, budget), flush=True)
# Finding feasible y_min req for EI calculation
ymin = np.min(y_mean)
index = np.where(y_mean == ymin)[0][0]
xmin = x_mean[index,:]
print("Best objective found: {}".format(ymin), flush=True)
print("Best objective found at: {}".format(xmin), flush=True)
######### Optimize the expected improvement
print("Maximizing EI", flush=True)
# Maximize the EI
ei_opt = optimize.differential_evolution(expectedImprovement, bounds, popsize=50, mutation=0.5, recombination=0.9,
args=(x_transform, y_transform, x_s2_transform, s2_transform, ymin, mean_NN, var_NN), polish=False)
# Computing function value at new x
x_new = ei_opt.x.reshape(1,-1)
#f_new = HARTMANN3(x_new).reshape(1,-1)
f_new = np.array([HARTMANN3(x) for x in x_new]).reshape(1,-1)
print("New infill point: {}".format(ei_opt.x), flush=True)
print("max EI: {}".format(-ei_opt.fun), flush=True)
# ----------------------------------------------------------------------------
# Save the data
# ----------------------------------------------------------------------------
# Storing the ei_opt and corresponding x
if itr == 0:
x_ei_optimums = ei_opt.x.reshape(1,-1)
y_ei_optimums = -ei_opt.fun.reshape(-1,)
else:
x_ei_optimums = np.vstack((x_ei_optimums, ei_opt.x.reshape(1,-1)))
y_ei_optimums = np.append(y_ei_optimums, -ei_opt.fun.reshape(-1,))
if itr == 0:
x_min_data = xmin.reshape(1,-1)
y_min_data = ymin.reshape(-1,)
else:
x_min_data = np.vstack((x_min_data, xmin.reshape(1,-1)))
y_min_data = np.append(y_min_data, ymin.reshape(-1,))
# Appending the data for next iteration
x_mean = np.vstack((x_mean, x_new))
y_mean = np.vstack((y_mean, f_new))
if itr == 0:
# if itr % n == 0:
epoch[itr, 0] = opt_params['num_epochs']
activation[itr, 0] = opt_params['activation']
layer[itr, 0] = opt_params['num_hidden_layers']
neuron[itr, 0] = opt_params['num_neurons']
var_epoch[itr, 0] = opt_params_var['num_epochs']
var_activation[itr, 0] = opt_params_var['activation']
var_layer[itr, 0] = opt_params_var['num_hidden_layers']
var_neuron[itr, 0] = opt_params_var['num_neurons']
training_nrmse[itr, 0] = training_loss
training_nrmse_mean[itr, 0] = training_loss_mean
training_nrmse_var[itr, 0] = training_loss_var
loss[itr, 0] = best_loss
var_loss[itr, 0] = best_loss_var
data = {
'x_data_min': x_min_data,
'f_data_min': y_min_data,
'xei': x_ei_optimums,
'yei': y_ei_optimums,
'y': y_mean,
"activation": activation,
"epoch": epoch,
"layer": layer,
"neuron": neuron,
"var_activation": var_activation,
"var_epoch": var_epoch,
"var_layer": var_layer,
"var_neuron": var_neuron,
"training_nrmse_mean": training_nrmse,
"training_nrmse_var": training_nrmse_var,
"training_nrmse_mean2": training_nrmse_mean,
"time": store_times,
"var_loss" : var_loss,
"loss" : loss,
}
savemat("result.mat", data)
# Shutdown the optimizer and nameserver
NS.shutdown()
# Terminate worker processes
for p in worker_processes:
p.terminate()
p.wait()
Note
If you want to test HPO-static, you should set if loop as if itr % n == 0:. If you want to test HPO-1itr and HPO-5itr, you should set n in line 101 and set if loop as if itr == 0:.