==================== Hyperband ==================== Hyperband is a resource-efficient HPO technique that uses the successive halving algorithm (SHA) to allocate resources dynamically among HP configurations. It starts by evaluating several configurations with minimal resources and progressively eliminates lower-performing ones while allocating more resources to promising configurations. Hyperband offers scalability and parallelization, but its reliance on random sampling can lead to suboptimal HPs in large search spaces. The following piece of code offers an example:: from imp_functions_hb_workers import * from scipy.optimize import differential_evolution import argparse import os import subprocess import time import pickle import numpy as np from scipy.io import savemat from scipy.stats import qmc from sklearn.model_selection import train_test_split import logging import hpbandster.core.nameserver as hpns from hpbandster.optimizers import HyperBand # logging.WARNING has less log # logging.basicConfig(level=logging.DEBUG) logging.getLogger('hpbandster').setLevel(logging.WARNING) argparser = argparse.ArgumentParser() argparser.add_argument("--train_size", type=int, default=10) argparser.add_argument("--runs", type=int, default=1) argparser.add_argument('--n_workers', type=int, default=10) args = argparser.parse_args() # Setting hb parameters iteration = 40 min_b = 5 max_b = 30 print("###############################################", flush=True) print("Training size: {}".format(args.train_size), flush=True) print("Parallel worker size: {}".format(args.n_workers), flush=True) print("###############################################", flush=True) # Bounds ub = np.array([8.0, 8.0]) lb = np.array([0.0, 0.0]) bounds = [(lb[i], ub[i]) for i in np.arange(len(lb))] num_dim = 2 samples = args.train_size sampler_x_train = qmc.Halton(d=num_dim, scramble=False) sample_x_train = sampler_x_train.random(n=samples) x = qmc.scale(sample_x_train, lb, ub) y = branin(x) # Split the training data into training and cross-validation data x_train, x_cv, y_train, y_cv = train_test_split(x, y, test_size=0.2) x_opt = np.zeros((args.runs, num_dim)) f_opt = np.zeros((args.runs, 1)) epoch = np.zeros((args.runs, 1)) activation = np.zeros((args.runs, 1)) layer = np.zeros((args.runs, 1)) neuron = np.zeros((args.runs, 1)) training_nrmse = np.zeros((args.runs, 1)) loss = np.zeros((args.runs, 1)) store_times = [] # Serialize training data to pass to workers data = { 'x_train': x_train, 'y_train': y_train, 'x_cv': x_cv, 'y_cv': y_cv } with open('train_data.pkl', 'wb') as f: pickle.dump(data, f) # Running the optimization loop for idx in range(args.runs): print("\nIteration: {}".format(idx + 1), flush=True) # Start a nameserver NS = hpns.NameServer(run_id='hb', host='127.0.0.1', port=None) NS.start() # Start worker processes using subprocess worker_processes = [] for i in range(args.n_workers): # Open imp_function located in two level up from the current directory worker_cmd = f'python ../../imp_functions_hb_workers.py --run_id hb --host 127.0.0.1 --worker {i}' worker_processes.append(subprocess.Popen(worker_cmd, shell=True)) # Give workers some time to start up and register time.sleep(5) tic = time.time() # Run hpbandster res = run_hpbandster(x_train, y_train, x_cv, y_cv, iteration, min_b, max_b) # Extract the best configuration best_config = res.get_id2config_mapping()[res.get_incumbent_id()]['config'] best_loss = res.get_runs_by_id(res.get_incumbent_id())[-1]['loss'] best_loss = np.array(best_loss) print(f"Best objective (loss for training + CV): {best_loss}", flush=True) print(f"Best hyperparameters: {best_config}", flush=True) opt_params = { "num_epochs": best_config["epoch"], "activation": best_config["act"], "num_hidden_layers": best_config["layer"], "num_neurons": best_config["neuron"] } # Get the model model, x_transform, y_transform = train(x_train, y_train, opt_params) # Transform the data x_train = x_transform.transform(x_train) # Predict at training data y_pred = model(x_train) # Transform back to original scale x_train = x_transform.inverse_transform(x_train) y_pred = y_transform.inverse_transform(y_pred) training_loss = np.sqrt(mean_squared_error(y_train, y_pred) / np.ptp(y_train)) print("Training NRMSE: {}".format(training_loss), flush=True) # Minimize the NN model result = differential_evolution(predict, bounds, mutation=0.5, recombination=0.9, args=(x_transform, y_transform, model), polish=True, disp=False) print("Optimal x: {}".format(result.x), flush=True) print("Optimal f: {}".format(result.fun), flush=True) toc = time.time() print(f"Elapsed time : {toc - tic} seconds") epoch[idx, 0] = opt_params['num_epochs'] activation[idx, 0] = opt_params['activation'] layer[idx, 0] = opt_params['num_hidden_layers'] neuron[idx, 0] = opt_params['num_neurons'] x_opt[idx, 0] = result.x[0] x_opt[idx, 1] = result.x[1] f_opt[idx, 0] = result.fun training_nrmse[idx, 0] = training_loss loss[idx, 0] = best_loss times_hb = toc - tic if idx == 0: store_times.append(times_hb) else: times_grid = store_times[idx - 1] store_times.append(times_hb) data = { "x": x_opt, "fun": f_opt, "training_nrmse": training_nrmse, "time": store_times, "activation": activation, "epoch": epoch, "layer": layer, "neuron": neuron, "loss" : loss, } savemat("result.mat", data) # Shutdown the optimizer and nameserver NS.shutdown() # Terminate worker processes for p in worker_processes: p.terminate() p.wait()