Bayesian Optimization Hyperband (Sequential)

Within the EGONN framework, different HP tuning frequencies are explored: maintaining initially tuned HPs (HPO-static), tuning every five iterations (HPO-5itr), and tuning every iteration (HPO-1itr). These strategies aim to balance dynamic HP adjustments with time efficiency. HPO is conducted using Bayesian optimization Hyperband, building on the successful application of Bayesian optimization Hyperband in the one-shot method with the two-dimensional Branin function.

The EGONN method starts with an initial sample size of 10 and adds 45 infill points, using a Halton sequence to ensure consistent starting points. The study compares various HPO strategies within EGONN against the traditional EGO method as a benchmark. Initially, all HPs are optimized, followed by a focused optimization of a subset based on preliminary findings. The evaluation highlights convergence performance and time efficiency across the methods

The following piece of code offers an example:

from imp_functions_bohb_workers import *
from scipy import optimize

import argparse
import subprocess
import time
import pickle
from scipy.io import savemat
from scipy.stats import qmc
from sklearn.model_selection import train_test_split
import numpy as np
import logging
import hpbandster.core.nameserver as hpns

#logging.WARNING has less log
# logging.basicConfig(level=logging.DEBUG)
logging.getLogger('hpbandster').setLevel(logging.WARNING)

argparser = argparse.ArgumentParser()
argparser.add_argument("--train_size", type=int, default=10)
argparser.add_argument("--budget", default=50, type=int)
argparser.add_argument("--runs", type=int, default=1)
argparser.add_argument('--n_workers', type=int, default=10)
args = argparser.parse_args()

# Setting bohb parameters
iteration = 40
min_b = 5
max_b = 30

# Number of points/dim
num_dim = 3 # no of dimensions
num_mean_var = 30
num_mean = 10
num_var = num_mean_var - num_mean
budget = args.budget # Maximum number of iterations/infill points

print("###############################################", flush=True)
print("Mean training size: {}".format(num_mean), flush=True)
print("Variance training size: {}".format(num_var), flush=True)
print("Budget: {}".format(budget), flush=True)
print("Parallel worker size: {}".format(args.n_workers), flush=True)
print("###############################################", flush=True)

# Bounds
ub = np.array([1, 1, 1]) # upper bounds for both dimensions
lb = np.array([0, 0, 0]) # lower bounds for both dimensions

bounds = []
for i in np.arange(len(lb)):
    bounds.append((lb[i], ub[i]))

u_bounds = [1, 1, 1]
l_bounds = [0, 0, 0]

sampler_mean_var  = qmc.Halton(d=num_dim, scramble=False)
sample_mean_var = sampler_mean_var.random(n=num_mean_var)
x_mean_var = qmc.scale(sample_mean_var, l_bounds, u_bounds)

x_mean = x_mean_var[:num_mean]
y_mean = np.array([HARTMANN3(x) for x in x_mean])

x_var = x_mean_var[num_mean:]
y_var = np.array([HARTMANN3(x) for x in x_var])

# Storing the data
data = {
    'x': x_mean,
    'y': y_mean
}

# Saving the data
savemat('doe.mat', data)

training_nrmse = np.zeros((budget, 1))
training_nrmse_mean = np.zeros((budget, 1))
training_nrmse_var = np.zeros((budget, 1))
store_times = []

activation = np.zeros((budget, 1))
epoch = np.zeros((budget, 1))
layer = np.zeros((budget, 1))
neuron = np.zeros((budget, 1))

var_activation = np.zeros((budget, 1))
var_epoch = np.zeros((budget, 1))
var_layer = np.zeros((budget, 1))
var_neuron = np.zeros((budget, 1))

var_learning_rate = np.zeros((budget, 1))
var_activation = np.zeros((budget, 1))
var_epoch = np.zeros((budget, 1))

loss = np.zeros((budget, 1))
var_loss = np.zeros((budget, 1))

# ----------------------------------------------------------------------------
#                               EGONN Loop
# ----------------------------------------------------------------------------

# n = 1
n = 5

for itr in range(budget):

    print("\nInfill iteration: {}".format(itr + 1))

    print("Optimizing hyperparameters and training mean NN", flush=True)

    tic = time.time()

    ######### Train the mean NN

    if itr == 0:
    # if itr % n == 0:

        # Split the training data into training and cross-validation data
        x_train, x_cv, y_train, y_cv = train_test_split(x_mean, y_mean, test_size=0.2)
        # Serialize training data to pass to workers
        data = {
            'x_train': x_train,
            'y_train': y_train,
            'x_cv': x_cv,
            'y_cv': y_cv
        }
        with open('train_data.pkl', 'wb') as f:
            pickle.dump(data, f)

        # Start a nameserver
        NS = hpns.NameServer(run_id='bohb', host='127.0.0.1', port=None)
        NS.start()

        # Start worker processes using subprocess
        worker_processes = []
        for i in range(args.n_workers):
            # Open imp_function located in two level up from the current directory
            worker_cmd = f'python ../../imp_functions_bohb_workers.py --run_id bohb --host 127.0.0.1 --worker {i}'
            worker_processes.append(subprocess.Popen(worker_cmd, shell=True))

        # Give workers some time to start up and register
        time.sleep(5)

        # Run hpbandster
        res = run_hpbandster(x_train, y_train, x_cv, y_cv, iteration, min_b, max_b)

        # Extract the best configuration
        best_config = res.get_id2config_mapping()[res.get_incumbent_id()]['config']
        best_loss = res.get_runs_by_id(res.get_incumbent_id())[-1]['loss']
        best_loss = np.array(best_loss)
        print(f"Best objective (loss for training + CV): {best_loss}", flush=True)
        print(f"Best hyperparameters: {best_config}", flush=True)

        opt_params = {
            "num_epochs": best_config["epoch"],
            "activation": best_config["act"],
            "num_hidden_layers": best_config["layer"],
            "num_neurons": best_config["neuron"]
        }

        # Get the model
        mean_NN, x_transform, y_transform = train(x_train, y_train, opt_params)

    else:
        mean_NN, x_transform, y_transform = train(x_mean, y_mean, opt_params)


    ########## Train the var NN

    print("Optimizing hyperparameters and training var NN", flush=True)
    # Predict value at x_mean
    y_mean_pred = predict(x_mean, x_transform, y_transform, mean_NN)
    # Calculate the error (s2) for first dataset
    error_mean = y_mean - y_mean_pred
    # Rescaling the predicted y at x_var for calculating the error
    y_var_pred = predict(x_var, x_transform, y_transform, mean_NN)
    # Calculate the error (s2) for second dataset
    error_var = y_var - y_var_pred
    # Stack the error data
    s2_var = np.vstack((error_mean, error_var))

    if itr == 0:
    # if itr % n == 0:

        # Split the training data into training and cross-validation data
        x_train, x_cv, y_train, y_cv = train_test_split(np.vstack((x_mean, x_var)), s2_var, test_size=0.2)
        # Serialize training data to pass to workers
        data = {
            'x_train': x_train,
            'y_train': y_train,
            'x_cv': x_cv,
            'y_cv': y_cv
        }
        with open('train_data.pkl', 'wb') as f:
            pickle.dump(data, f)

        # Start a nameserver
        NS = hpns.NameServer(run_id='bohb', host='127.0.0.1', port=None)
        NS.start()

        # Start worker processes using subprocess
        worker_processes = []
        for i in range(args.n_workers):
            # Open imp_function located in two level up from the current directory
            worker_cmd = f'python ../../imp_functions_bohb_workers.py --run_id bohb --host 127.0.0.1 --worker {i}'
            worker_processes.append(subprocess.Popen(worker_cmd, shell=True))

        # Give workers some time to start up and register
        time.sleep(5)

        # Run hpbandster
        res = run_hpbandster(x_train, y_train, x_cv, y_cv, iteration, min_b, max_b)

        # Extract the best configuration
        best_config_var = res.get_id2config_mapping()[res.get_incumbent_id()]['config']
        best_loss_var = res.get_runs_by_id(res.get_incumbent_id())[-1]['loss']
        best_loss_var = np.array(best_loss_var)
        print(f"Best objective (loss for training + CV): {best_loss_var}", flush=True)
        print(f"Best hyperparameters: {best_config_var}", flush=True)

        opt_params_var = {
            "num_epochs": best_config_var["epoch"],
            "activation": best_config_var["act"],
            "num_hidden_layers": best_config_var["layer"],
            "num_neurons": best_config_var["neuron"]
        }

        # Get the model
        var_NN, x_s2_transform, s2_transform = train(np.vstack((x_mean, x_var)), s2_var, opt_params_var)

    else:
        var_NN, x_s2_transform, s2_transform = train(np.vstack((x_mean, x_var)), s2_var, opt_params_var)

    toc = time.time()
    times_bohb = toc - tic
    store_times.append(times_bohb)
    print(f"Elapsed time for HPO : {times_bohb} seconds")

    # Transform the data
    x_train = x_transform.transform(x_train)
    # Predict at training data
    y_pred = mean_NN(x_train)
    # Transform back to original scale
    x_train = x_transform.inverse_transform(x_train)
    y_pred = y_transform.inverse_transform(y_pred)

    training_loss = np.sqrt(mean_squared_error(y_train, y_pred) / np.ptp(y_train))

    s2_var_pred = predict(np.vstack((x_mean, x_var)), x_s2_transform, s2_transform, var_NN)
    training_loss_mean = np.sqrt(mean_squared_error(y_mean, y_mean_pred)/np.ptp(y_mean))
    training_loss_var = np.sqrt(mean_squared_error(s2_var, s2_var_pred)/np.ptp(s2_var))

    print("Training NRMSE for mean NN: {}".format(training_loss), flush=True)
    print("Training NRMSE2 for mean NN: {}".format(training_loss_mean), flush=True)
    print("Training NRMSE for var NN: {}".format(training_loss_var), flush=True)

    # ----------------------------------------------------------------------------
    #                           Minimizing the EI
    # ----------------------------------------------------------------------------

    print("######################## Adding Infill {} of {}".format(itr + 1, budget), flush=True)

    # Finding feasible y_min req for EI calculation
    ymin = np.min(y_mean)
    index = np.where(y_mean == ymin)[0][0]
    xmin = x_mean[index,:]

    print("Best objective found: {}".format(ymin), flush=True)
    print("Best objective found at: {}".format(xmin), flush=True)

    ######### Optimize the expected improvement

    print("Maximizing EI", flush=True)

    # Maximize the EI
    ei_opt = optimize.differential_evolution(expectedImprovement, bounds, popsize=50, mutation=0.5, recombination=0.9,
                args=(x_transform, y_transform, x_s2_transform, s2_transform, ymin, mean_NN, var_NN), polish=False)

    # Computing function value at new x
    x_new = ei_opt.x.reshape(1,-1)
    #f_new = HARTMANN3(x_new).reshape(1,-1)
    f_new = np.array([HARTMANN3(x) for x in x_new]).reshape(1,-1)

    print("New infill point: {}".format(ei_opt.x), flush=True)
    print("max EI: {}".format(-ei_opt.fun), flush=True)

    # ----------------------------------------------------------------------------
    #                               Save the data
    # ----------------------------------------------------------------------------

    # Storing the ei_opt and corresponding x
    if itr == 0:
        x_ei_optimums = ei_opt.x.reshape(1,-1)
        y_ei_optimums = -ei_opt.fun.reshape(-1,)
    else:
        x_ei_optimums = np.vstack((x_ei_optimums, ei_opt.x.reshape(1,-1)))
        y_ei_optimums = np.append(y_ei_optimums, -ei_opt.fun.reshape(-1,))

    if itr == 0:
        x_min_data = xmin.reshape(1,-1)
        y_min_data = ymin.reshape(-1,)
    else:
        x_min_data = np.vstack((x_min_data, xmin.reshape(1,-1)))
        y_min_data = np.append(y_min_data, ymin.reshape(-1,))

    # Appending the data for next iteration
    x_mean = np.vstack((x_mean, x_new))
    y_mean = np.vstack((y_mean, f_new))

    if itr == 0:
    # if itr % n == 0:

        epoch[itr, 0] = opt_params['num_epochs']
        activation[itr, 0] = opt_params['activation']
        layer[itr, 0] = opt_params['num_hidden_layers']
        neuron[itr, 0] = opt_params['num_neurons']

        var_epoch[itr, 0] = opt_params_var['num_epochs']
        var_activation[itr, 0] = opt_params_var['activation']
        var_layer[itr, 0] = opt_params_var['num_hidden_layers']
        var_neuron[itr, 0] = opt_params_var['num_neurons']

        training_nrmse[itr, 0] = training_loss
        training_nrmse_mean[itr, 0] = training_loss_mean
        training_nrmse_var[itr, 0] = training_loss_var

        loss[itr, 0] = best_loss
        var_loss[itr, 0] = best_loss_var


    data = {
        'x_data_min': x_min_data,
        'f_data_min': y_min_data,
        'xei': x_ei_optimums,
        'yei': y_ei_optimums,
        'y': y_mean,
        "activation": activation,
        "epoch": epoch,
        "layer": layer,
        "neuron": neuron,
        "var_activation": var_activation,
        "var_epoch": var_epoch,
        "var_layer": var_layer,
        "var_neuron": var_neuron,
        "training_nrmse_mean": training_nrmse,
        "training_nrmse_var": training_nrmse_var,
        "training_nrmse_mean2": training_nrmse_mean,
        "time": store_times,
        "var_loss" : var_loss,
        "loss" : loss,
    }

    savemat("result.mat", data)

    # Shutdown the optimizer and nameserver
    NS.shutdown()

    # Terminate worker processes
    for p in worker_processes:
        p.terminate()
        p.wait()

Note

If you want to test HPO-static, you should set if loop as if itr % n == 0:. If you want to test HPO-1itr and HPO-5itr, you should set n in line 101 and set if loop as if itr == 0:.