======================================== Bayesian Optimization ======================================== Bayesian optimization is a popular method for optimizing black-box models, using a surrogate model (often a Gaussian process, GP) to predict performance and an acquisition function (e.g., expected improvement, EI) to select new HP configurations. Bayesian optimization updates the surrogate model with each new evaluation, balancing exploration and exploitation. While it converges efficiently, GP's sequential nature and slow performance can be limitations. The following piece of code offers an example:: import argparse from scipy.stats import qmc from scipy.io import savemat from sklearn.model_selection import train_test_split from imp_functions import * from scipy.optimize import differential_evolution import time import numpy as np # Setting the CML arguments argparser = argparse.ArgumentParser() argparser.add_argument("--train_size", type=int, default=100) argparser.add_argument("--test_size", type=int, default=50) argparser.add_argument("--runs", type=int) args = argparser.parse_args() # Setting CML arguments train_size = args.train_size test_size = args.test_size num_dim = 2 print("###############################################", flush=True) print("Training size: {}".format(train_size), flush=True) print("###############################################", flush=True) # Bounds ub = np.array([8.0, 8.0]) lb = np.array([0.0, 0.0]) bounds = [] for i in np.arange(len(lb)): bounds.append((lb[i], ub[i])) u_bounds = [8.0, 8.0] l_bounds = [0.0, 0.0] num_dim = 2 runs = args.runs samples = train_size sampler_x_train = qmc.Halton(d=num_dim, scramble=False) sample_x_train = sampler_x_train.random(n=samples) x_mean = qmc.scale(sample_x_train, l_bounds, u_bounds) y_mean = branin(x_mean) x_train, x_cv, y_train, y_cv = train_test_split(x_mean, y_mean, test_size=0.2) x_opt = np.zeros((runs, num_dim)) f_opt = np.zeros((runs, 1)) epoch = np.zeros((runs, 1)) activation = np.zeros((runs, 1)) layer = np.zeros((runs, 1)) neuron = np.zeros((runs, 1)) training_nrmse = np.zeros((runs, 1)) loss_training_cv = np.zeros((args.runs, 1)) store_times = [] # Running Ax multiple times for idx in range(args.runs): print("\nIteration: {}".format(idx+1), flush=True) tic = time.time() # Optimize the hyperparameters opt_params, obj = Ax(x_train, y_train, x_cv, y_cv) print("Optimal hyperparameters: {}".format(opt_params), flush=True) model, x_transform, y_transform = train(x_train, y_train, opt_params) # Transform the data x_train = x_transform.transform(x_train) # Predict at testing data y_pred = model(x_train) # Transform back to original scale x_train = x_transform.inverse_transform(x_train) y_pred = y_transform.inverse_transform(y_pred) best_loss = np.array(obj) training_loss = np.sqrt(mean_squared_error(y_train, y_pred)/np.ptp(y_train)) print(f"Best objective (loss for training + CV): {best_loss}", flush=True) print("Training NRMSE: {}".format(training_loss), flush=True) ########################### Minimize the NN model # Minimum of the NN model result = differential_evolution(predict, bounds, mutation=0.5, recombination=0.9, args=(x_transform, y_transform, model), polish=True, disp=False) print("Optimal x: {}".format(result.x), flush=True) print("Optimal f: {}".format(result.fun), flush=True) toc = time.time() print(f"Elapsed time : {toc-tic} seconds") epoch[idx, 0] = opt_params['epochs'] activation[idx, 0] = opt_params['activation'] layer[idx, 0] = opt_params['num_hidden_layers'] neuron[idx, 0] = opt_params['neurons'] x_opt[idx, 0] = result.x[0] x_opt[idx, 1] = result.x[1] f_opt[idx, 0] = result.fun training_nrmse[idx, 0] = training_loss loss_training_cv[idx, 0] = best_loss times_grid = toc-tic if idx == 0: store_times.append(times_grid) else: times_grid += store_times[idx-1] store_times.append(times_grid) data = { "x": x_opt, "fun": f_opt, "training_nrmse": training_nrmse, "time": store_times, "activation": activation, "epoch": epoch, "layer": layer, "neuron": neuron, "loss" : loss_training_cv, } savemat("result.mat".format(idx), data)