Random Search

Random search samples HP configurations randomly from specified distributions. It is more efficient than grid search for large HP spaces and reduces computation time, increasing the chances of discovering optimal configurations. However, it does not utilize previous results to guide future searches, making it less efficient compared to model-based methods.
The following piece of code offers an example:
import argparse
import numpy as np
from scipy.io import savemat
from scipy.stats import qmc
from sklearn.model_selection import train_test_split
from imp_functions import *
from scipy.optimize import differential_evolution
import time

# Setting the CML arguments
argparser = argparse.ArgumentParser()
argparser.add_argument("--train_size", type=int, default=100)
argparser.add_argument("--test_size", type=int, default=50)
argparser.add_argument("--runs", type=int, default=1)
args = argparser.parse_args()

# Setting CML arguments
train_size = args.train_size
test_size = args.test_size
runs = args.runs

print("###############################################", flush=True)
print("Training size: {}".format(train_size), flush=True)
print("###############################################", flush=True)

########################### Create architectures for random search

# 4*4*4*4=256 which is the architectures for grid search
pool = 256

architectures = np.zeros((pool, 4))

for i in np.arange(pool):
    w = np.round(np.random.uniform(low=1000, high=10000, size=1))
    x = np.round(np.random.uniform(low=4, high=16, size=1))
    y = np.round(np.random.uniform(low=1, high=4, size=1))
    z = np.round(np.random.uniform(low=1, high=4, size=1))
    architectures[i] = np.hstack((w, x, y, z))

architectures = architectures.astype(int)

########################### Create the dataset
# Bounds
ub = np.array([8.0, 8.0])
lb = np.array([0.0, 0.0])

bounds = []
for i in np.arange(len(lb)):
    bounds.append((lb[i], ub[i]))

u_bounds = [8.0, 8.0]
l_bounds = [0.0, 0.0]

num_dim = 2
samples = train_size

sampler_x_train  = qmc.Halton(d=num_dim, scramble=False)
sample_x_train  = sampler_x_train.random(n=samples)
x_mean = qmc.scale(sample_x_train, l_bounds, u_bounds)
y_mean = branin(x_mean)

x_train = x_mean
y_train = y_mean

x_train, x_cv, y_train, y_cv = train_test_split(x_mean, y_mean, test_size=0.2)

x_opt = np.zeros((runs, num_dim))
f_opt = np.zeros((runs, 1))

epoch = np.zeros((runs, 1))
activation = np.zeros((runs, 1))
layer = np.zeros((runs, 1))
neuron = np.zeros((runs, 1))
training_nrmse = np.zeros((runs, 1))
loss_training_cv = np.zeros((runs, 1))

store_times = []

########################### Running grid search multiple times

for idx in range(runs):

    print("\nIteration: {}".format(idx+1), flush=True)

    tic = time.time()

    loss = 10000

    for architecture in architectures:

        parameters = {}
        parameters['epochs'] = architecture[0]
        parameters['neurons'] = architecture[1]
        parameters['num_hidden_layers'] = architecture[2]
        parameters['activation'] = architecture[3]

        # Optimize the hyperparameters
        obj = objective(x_train, y_train, x_cv, y_cv, parameters)

        # Check if the objective is less than the previous objective
        if obj < loss:
            loss = obj
            opt_params = parameters

    best_loss = np.array(loss)

    print(f"Best objective (loss for training + CV): {best_loss}", flush=True)
    print("Optimal hyperparameters: {}".format(opt_params), flush=True)

    # Get the model
    model, x_transform, y_transform = train(x_train, y_train, opt_params)

    # Transform the data
    x_train = x_transform.transform(x_train)

    # Predict at testing data
    y_pred = model(x_train)

    # Transform back to original scale
    x_train = x_transform.inverse_transform(x_train)
    y_pred = y_transform.inverse_transform(y_pred)

    training_loss = np.sqrt(mean_squared_error(y_train, y_pred)/np.ptp(y_train))

    print("Training NRMSE: {}".format(training_loss), flush=True)

    ########################### Minimize the NN model

    # Minimum of the NN model
    result = differential_evolution(predict, bounds, mutation=0.5, recombination=0.9,
                    args=(x_transform, y_transform, model), polish=True, disp=False)

    print("Optimal x: {}".format(result.x), flush=True)
    print("Optimal f: {}".format(result.fun), flush=True)

    toc = time.time()
    print(f"Elapsed time : {toc-tic} seconds")

    epoch[idx, 0] =  parameters['epochs']
    activation[idx, 0] = parameters['activation']
    layer[idx, 0] = parameters['num_hidden_layers']
    neuron[idx, 0] = parameters['neurons']
    x_opt[idx, 0] = result.x[0]
    x_opt[idx, 1] = result.x[1]
    f_opt[idx, 0] = result.fun
    training_nrmse[idx, 0] = training_loss
    loss_training_cv[idx, 0] = best_loss

    times_rs = toc-tic

    if idx == 0:
        store_times.append(times_rs)
    else:
        times_rs += store_times[idx-1]
        store_times.append(times_rs)

    data = {
        "x": x_opt,
        "fun": f_opt,
        "training_nrmse": training_nrmse,
        "time": store_times,
        "activation": activation,
        "epoch": epoch,
        "layer": layer,
        "neuron": neuron,
        "loss" : loss_training_cv,
    }

    savemat("result.mat".format(idx), data)