Bayesian Optimization Hyperband
Bayesian optimization Hyperband combines Bayesian optimization’s guided search with Hyperband’s efficient resource allocation. It replaces Bayesian optimization’s GP with a tree-structured Parzen estimator (TPE), which improves performance by using kernel density estimation to compute probability densities for high-performing HP configurations. Bayesian optimization Hyperband optimizes multiple HP configurations in parallel, making it highly efficient in navigating complex HP spaces.
The following piece of code offers an example:
from imp_functions_bohb_workers import *
from scipy.optimize import differential_evolution
import argparse
import os
import subprocess
import time
import pickle
from scipy.io import savemat
from scipy.stats import qmc
from sklearn.model_selection import train_test_split
import numpy as np
import logging
import hpbandster.core.nameserver as hpns
from hpbandster.optimizers import BOHB
# logging.WARNING has less log
#logging.basicConfig(level=logging.DEBUG)
logging.getLogger('hpbandster').setLevel(logging.WARNING)
argparser = argparse.ArgumentParser()
argparser.add_argument("--train_size", type=int, default=10)
argparser.add_argument("--runs", type=int, default=1)
argparser.add_argument('--n_workers', type=int, default=10)
args = argparser.parse_args()
# Setting bohb parameters
iteration = 40
min_b = 5
max_b = 30
print("###############################################", flush=True)
print("Training size: {}".format(args.train_size), flush=True)
print("Parallel worker size: {}".format(args.n_workers), flush=True)
print("###############################################", flush=True)
# Bounds
ub = np.array([8.0, 8.0])
lb = np.array([0.0, 0.0])
bounds = [(lb[i], ub[i]) for i in np.arange(len(lb))]
num_dim = 2
samples = args.train_size
sampler_x_train = qmc.Halton(d=num_dim, scramble=False)
sample_x_train = sampler_x_train.random(n=samples)
x = qmc.scale(sample_x_train, lb, ub)
y = branin(x)
# Split the training data into training and cross-validation data
x_train, x_cv, y_train, y_cv = train_test_split(x, y, test_size=0.2)
x_opt = np.zeros((args.runs, num_dim))
f_opt = np.zeros((args.runs, 1))
epoch = np.zeros((args.runs, 1))
activation = np.zeros((args.runs, 1))
layer = np.zeros((args.runs, 1))
neuron = np.zeros((args.runs, 1))
training_nrmse = np.zeros((args.runs, 1))
loss = np.zeros((args.runs, 1))
store_times = []
# Serialize training data to pass to workers
data = {
'x_train': x_train,
'y_train': y_train,
'x_cv': x_cv,
'y_cv': y_cv
}
with open('train_data.pkl', 'wb') as f:
pickle.dump(data, f)
# Running the optimization loop
for idx in range(args.runs):
print("\nIteration: {}".format(idx + 1), flush=True)
# Start a nameserver
NS = hpns.NameServer(run_id='bohb', host='127.0.0.1', port=None)
NS.start()
# Start worker processes using subprocess
worker_processes = []
for i in range(args.n_workers):
# Open imp_function located in two level up from the current directory
worker_cmd = f'python ../../imp_functions_bohb_workers.py --run_id bohb --host 127.0.0.1 --worker {i}'
worker_processes.append(subprocess.Popen(worker_cmd, shell=True))
# Give workers some time to start up and register
time.sleep(5)
tic = time.time()
# Run hpbandster
res = run_hpbandster(x_train, y_train, x_cv, y_cv, iteration, min_b, max_b)
# Extract the best configuration
best_config = res.get_id2config_mapping()[res.get_incumbent_id()]['config']
best_loss = res.get_runs_by_id(res.get_incumbent_id())[-1]['loss']
best_loss = np.array(best_loss)
print(f"Best objective (loss for training + CV): {best_loss}", flush=True)
print(f"Best hyperparameters: {best_config}", flush=True)
opt_params = {
"num_epochs": best_config["epoch"],
"activation": best_config["act"],
"num_hidden_layers": best_config["layer"],
"num_neurons": best_config["neuron"]
}
# Get the model
model, x_transform, y_transform = train(x_train, y_train, opt_params)
# Transform the data
x_train = x_transform.transform(x_train)
# Predict at training data
y_pred = model(x_train)
# Transform back to original scale
x_train = x_transform.inverse_transform(x_train)
y_pred = y_transform.inverse_transform(y_pred)
training_loss = np.sqrt(mean_squared_error(y_train, y_pred) / np.ptp(y_train))
print("Training NRMSE: {}".format(training_loss), flush=True)
# Minimize the NN model
result = differential_evolution(predict, bounds, mutation=0.5, recombination=0.9,
args=(x_transform, y_transform, model), polish=True, disp=False)
print("Optimal x: {}".format(result.x), flush=True)
print("Optimal f: {}".format(result.fun), flush=True)
toc = time.time()
print(f"Elapsed time : {toc - tic} seconds")
epoch[idx, 0] = opt_params['num_epochs']
activation[idx, 0] = opt_params['activation']
layer[idx, 0] = opt_params['num_hidden_layers']
neuron[idx, 0] = opt_params['num_neurons']
x_opt[idx, 0] = result.x[0]
x_opt[idx, 1] = result.x[1]
f_opt[idx, 0] = result.fun
training_nrmse[idx, 0] = training_loss
loss[idx, 0] = best_loss
times_bohb = toc - tic
if idx == 0:
store_times.append(times_bohb)
else:
times_grid = store_times[idx - 1]
store_times.append(times_bohb)
data = {
"x": x_opt,
"fun": f_opt,
"training_nrmse": training_nrmse,
"time": store_times,
"activation": activation,
"epoch": epoch,
"layer": layer,
"neuron": neuron,
"loss" : loss,
}
savemat("result.mat", data)
# Shutdown the optimizer and nameserver
NS.shutdown()
# Terminate worker processes
for p in worker_processes:
p.terminate()
p.wait()