Random Search
Random search samples HP configurations randomly from specified distributions. It is more efficient than grid search for large HP spaces and reduces computation time, increasing the chances of discovering optimal configurations. However, it does not utilize previous results to guide future searches, making it less efficient compared to model-based methods.
The following piece of code offers an example:
import argparse
import numpy as np
from scipy.io import savemat
from scipy.stats import qmc
from sklearn.model_selection import train_test_split
from imp_functions import *
from scipy.optimize import differential_evolution
import time
# Setting the CML arguments
argparser = argparse.ArgumentParser()
argparser.add_argument("--train_size", type=int, default=100)
argparser.add_argument("--test_size", type=int, default=50)
argparser.add_argument("--runs", type=int, default=1)
args = argparser.parse_args()
# Setting CML arguments
train_size = args.train_size
test_size = args.test_size
runs = args.runs
print("###############################################", flush=True)
print("Training size: {}".format(train_size), flush=True)
print("###############################################", flush=True)
########################### Create architectures for random search
# 4*4*4*4=256 which is the architectures for grid search
pool = 256
architectures = np.zeros((pool, 4))
for i in np.arange(pool):
w = np.round(np.random.uniform(low=1000, high=10000, size=1))
x = np.round(np.random.uniform(low=4, high=16, size=1))
y = np.round(np.random.uniform(low=1, high=4, size=1))
z = np.round(np.random.uniform(low=1, high=4, size=1))
architectures[i] = np.hstack((w, x, y, z))
architectures = architectures.astype(int)
########################### Create the dataset
# Bounds
ub = np.array([8.0, 8.0])
lb = np.array([0.0, 0.0])
bounds = []
for i in np.arange(len(lb)):
bounds.append((lb[i], ub[i]))
u_bounds = [8.0, 8.0]
l_bounds = [0.0, 0.0]
num_dim = 2
samples = train_size
sampler_x_train = qmc.Halton(d=num_dim, scramble=False)
sample_x_train = sampler_x_train.random(n=samples)
x_mean = qmc.scale(sample_x_train, l_bounds, u_bounds)
y_mean = branin(x_mean)
x_train = x_mean
y_train = y_mean
x_train, x_cv, y_train, y_cv = train_test_split(x_mean, y_mean, test_size=0.2)
x_opt = np.zeros((runs, num_dim))
f_opt = np.zeros((runs, 1))
epoch = np.zeros((runs, 1))
activation = np.zeros((runs, 1))
layer = np.zeros((runs, 1))
neuron = np.zeros((runs, 1))
training_nrmse = np.zeros((runs, 1))
loss_training_cv = np.zeros((runs, 1))
store_times = []
########################### Running grid search multiple times
for idx in range(runs):
print("\nIteration: {}".format(idx+1), flush=True)
tic = time.time()
loss = 10000
for architecture in architectures:
parameters = {}
parameters['epochs'] = architecture[0]
parameters['neurons'] = architecture[1]
parameters['num_hidden_layers'] = architecture[2]
parameters['activation'] = architecture[3]
# Optimize the hyperparameters
obj = objective(x_train, y_train, x_cv, y_cv, parameters)
# Check if the objective is less than the previous objective
if obj < loss:
loss = obj
opt_params = parameters
best_loss = np.array(loss)
print(f"Best objective (loss for training + CV): {best_loss}", flush=True)
print("Optimal hyperparameters: {}".format(opt_params), flush=True)
# Get the model
model, x_transform, y_transform = train(x_train, y_train, opt_params)
# Transform the data
x_train = x_transform.transform(x_train)
# Predict at testing data
y_pred = model(x_train)
# Transform back to original scale
x_train = x_transform.inverse_transform(x_train)
y_pred = y_transform.inverse_transform(y_pred)
training_loss = np.sqrt(mean_squared_error(y_train, y_pred)/np.ptp(y_train))
print("Training NRMSE: {}".format(training_loss), flush=True)
########################### Minimize the NN model
# Minimum of the NN model
result = differential_evolution(predict, bounds, mutation=0.5, recombination=0.9,
args=(x_transform, y_transform, model), polish=True, disp=False)
print("Optimal x: {}".format(result.x), flush=True)
print("Optimal f: {}".format(result.fun), flush=True)
toc = time.time()
print(f"Elapsed time : {toc-tic} seconds")
epoch[idx, 0] = parameters['epochs']
activation[idx, 0] = parameters['activation']
layer[idx, 0] = parameters['num_hidden_layers']
neuron[idx, 0] = parameters['neurons']
x_opt[idx, 0] = result.x[0]
x_opt[idx, 1] = result.x[1]
f_opt[idx, 0] = result.fun
training_nrmse[idx, 0] = training_loss
loss_training_cv[idx, 0] = best_loss
times_rs = toc-tic
if idx == 0:
store_times.append(times_rs)
else:
times_rs += store_times[idx-1]
store_times.append(times_rs)
data = {
"x": x_opt,
"fun": f_opt,
"training_nrmse": training_nrmse,
"time": store_times,
"activation": activation,
"epoch": epoch,
"layer": layer,
"neuron": neuron,
"loss" : loss_training_cv,
}
savemat("result.mat".format(idx), data)