Bayesian Optimization

Bayesian optimization is a popular method for optimizing black-box models, using a surrogate model (often a Gaussian process, GP) to predict performance and an acquisition function (e.g., expected improvement, EI) to select new HP configurations. Bayesian optimization updates the surrogate model with each new evaluation, balancing exploration and exploitation. While it converges efficiently, GP’s sequential nature and slow performance can be limitations.

The following piece of code offers an example:

import argparse
from scipy.stats import qmc
from scipy.io import savemat
from sklearn.model_selection import train_test_split
from imp_functions import *
from scipy.optimize import differential_evolution
import time
import numpy as np

# Setting the CML arguments
argparser = argparse.ArgumentParser()
argparser.add_argument("--train_size", type=int, default=100)
argparser.add_argument("--test_size", type=int, default=50)
argparser.add_argument("--runs", type=int)
args = argparser.parse_args()

# Setting CML arguments
train_size = args.train_size
test_size = args.test_size
num_dim = 2

print("###############################################", flush=True)
print("Training size: {}".format(train_size), flush=True)
print("###############################################", flush=True)

# Bounds
ub = np.array([8.0, 8.0])
lb = np.array([0.0, 0.0])
bounds = []
for i in np.arange(len(lb)):
    bounds.append((lb[i], ub[i]))

u_bounds = [8.0, 8.0]
l_bounds = [0.0, 0.0]

num_dim = 2
runs = args.runs

samples = train_size

sampler_x_train  = qmc.Halton(d=num_dim, scramble=False)
sample_x_train  = sampler_x_train.random(n=samples)
x_mean = qmc.scale(sample_x_train, l_bounds, u_bounds)
y_mean = branin(x_mean)

x_train, x_cv, y_train, y_cv = train_test_split(x_mean, y_mean, test_size=0.2)

x_opt = np.zeros((runs, num_dim))
f_opt = np.zeros((runs, 1))

epoch = np.zeros((runs, 1))
activation = np.zeros((runs, 1))
layer = np.zeros((runs, 1))
neuron = np.zeros((runs, 1))
training_nrmse = np.zeros((runs, 1))
loss_training_cv = np.zeros((args.runs, 1))

store_times = []

# Running Ax multiple times
for idx in range(args.runs):

    print("\nIteration: {}".format(idx+1), flush=True)

    tic = time.time()

    # Optimize the hyperparameters
    opt_params, obj = Ax(x_train, y_train, x_cv, y_cv)

    print("Optimal hyperparameters: {}".format(opt_params), flush=True)

    model, x_transform, y_transform = train(x_train, y_train, opt_params)

    # Transform the data
    x_train = x_transform.transform(x_train)

    # Predict at testing data
    y_pred = model(x_train)

    # Transform back to original scale
    x_train = x_transform.inverse_transform(x_train)

    y_pred = y_transform.inverse_transform(y_pred)

    best_loss = np.array(obj)
    training_loss = np.sqrt(mean_squared_error(y_train, y_pred)/np.ptp(y_train))

    print(f"Best objective (loss for training + CV): {best_loss}", flush=True)
    print("Training NRMSE: {}".format(training_loss), flush=True)

    ########################### Minimize the NN model
    # Minimum of the NN model
    result = differential_evolution(predict, bounds, mutation=0.5, recombination=0.9,
                    args=(x_transform, y_transform, model), polish=True, disp=False)

    print("Optimal x: {}".format(result.x), flush=True)
    print("Optimal f: {}".format(result.fun), flush=True)

    toc = time.time()
    print(f"Elapsed time : {toc-tic} seconds")

    epoch[idx, 0] =  opt_params['epochs']
    activation[idx, 0] = opt_params['activation']
    layer[idx, 0] = opt_params['num_hidden_layers']
    neuron[idx, 0] = opt_params['neurons']

    x_opt[idx, 0] = result.x[0]
    x_opt[idx, 1] = result.x[1]
    f_opt[idx, 0] = result.fun
    training_nrmse[idx, 0] = training_loss
    loss_training_cv[idx, 0] = best_loss

    times_grid = toc-tic

    if idx == 0:
        store_times.append(times_grid)
    else:
        times_grid += store_times[idx-1]
        store_times.append(times_grid)

    data = {
        "x": x_opt,
        "fun": f_opt,
        "training_nrmse": training_nrmse,
        "time": store_times,
        "activation": activation,
        "epoch": epoch,
        "layer": layer,
        "neuron": neuron,
        "loss" : loss_training_cv,
    }

    savemat("result.mat".format(idx), data)