Question: Neural Networks. Need help with code for Implementation of mini-batch SGD Question: def mini_batch_gradient(param, x_batch, y_batch, reg_lambda): implement the function to compute the mini batch
Neural Networks. Need help with code for Implementation of mini-batch SGD Question:
def mini_batch_gradient(param, x_batch, y_batch, reg_lambda): """implement the function to compute the mini batch gradient input: param -- parameters dictionary (w, b) x_batch -- a batch of x (size, 784) y_batch -- a batch of y (size,) reg_lambdba -- regularization parameter output: dw -- derivative for weight w db -- derivative for bias b batch_loss -- average loss on the mini-batch samples """ # Your code goes here
return dw, db, batch_loss
Provided code:
import os import sys import json
root = os.path.abspath('.') root += '/configs/'
def loadConfig(name): """ Read a configuration file as a dictionary""" full_path = root + name json_file = open(full_path, 'r') cfg = json.load(json_file) json_file.close() return cfg
def initialize(num_inputs,num_classes): """initialize the parameters""" # num_inputs = 28*28 = 784 # num_classes = 10 w = numpy.zeros((num_classes, num_inputs)) # (10*784) b = numpy.zeros((num_classes, 1)) # (10*1) param = { 'w' : w, # (10*784) 'b' : b # (10*1) } return param
def eval(param, hyp, x_data, y_data): """ implement the evaluation function input: param -- parameters dictionary (w, b) hyp -- hyper-parameter: we use hyp['lambda'] to compute regularization x_data -- x_train or x_test (size, 784) y_data -- y_train or y_test (size,) output: loss and accuracy """ reg_lambda = hyp['lambda'] # w: (10*784), x: (10000*784), y:(10000,) loss_list = [] w = param['w'].transpose() dist = numpy.array([numpy.squeeze(softmax(numpy.matmul(x_data[i], w))) for i in range(len(y_data))])
result = numpy.argmax(dist,axis=1) accuracy = sum(result == y_data)/float(len(y_data))
loss_list = [neg_log_loss(dist[i],y_data[i]) for i in range(len(y_data))] loss = sum(loss_list) / len(loss_list) + reg_lambda/2 * numpy.sum(w * w) + reg_lambda/2 * numpy.sum(b * b) return loss, accuracy
def train(param, hyp, x_train, y_train, x_test, y_test,cfg_idx): """ implement the train function input: param -- parameters dictionary (w, b) hyp -- hyperparameters dictionary x_train -- (60000, 784) y_train -- (60000,) x_test -- x_test (10000, 784) y_test -- y_test (10000,) output: train_loss_list, train_acc_list, test_loss_list, test_acc_list Four lists contain the epoch-wise loss function on training data, accuracy on training data, loss function on testing data, accuracy on testing data, respectively """ num_epoches = hyp['num_epoches'] batch_size = hyp['batch_size'] learning_rate = hyp['learning_rate'] mu = hyp['mu'] reg_lambda = hyp['lambda'] train_loss_list, train_acc_list, test_loss_list, test_acc_list = [],[],[],[] if bool(hyp['momentum']) == True: w_velocity = numpy.zeros(param['w'].shape) b_velocity = numpy.zeros(param['b'].shape)
for epoch in range(num_epoches): # select the random sequence of training set rand_indices = numpy.random.choice(x_train.shape[0],x_train.shape[0],replace=False) num_batch = int(x_train.shape[0]/batch_size) if bool(hyp['learning_decay']) == True: try: if test_acc_list[-1] - test_acc_list[-2] < 0.001: learning_rate *= hyp['decay_factor'] except: pass message = 'learning rate: %.8f' % learning_rate print(message) logging.info(message)
# for each batch of train data for batch in range(num_batch): index = rand_indices[batch_size*batch:batch_size*(batch+1)] x_batch = x_train[index] y_batch = y_train[index]
# calculate the stochastic gradient w.r.t w and b dw, db, batch_loss = mini_batch_gradient(param, x_batch, y_batch, reg_lambda)
param['w'] -= learning_rate * dw param['b'] -= learning_rate * db if (batch+1) % 100 == 0: message = 'Epoch [%d/%d], Batch [%d/%d], Loss %.4f' % (epoch+1, num_epoches, batch+1, num_batch, batch_loss) print(message)
train_loss, train_acc = eval(param,hyp,x_train,y_train) test_loss, test_acc = eval(param,hyp,x_test,y_test) train_loss_list.append(train_loss) train_acc_list.append(train_acc) test_loss_list.append(test_loss) test_acc_list.append(test_acc)
message = 'Epoch %d/%d, Train Loss %.4f, Train Acc %.4f, Test Loss %.4f, Test Acc %.4f' % (epoch+1, num_epoches, train_loss, train_acc, test_loss, test_acc) print(message) logging.info(message) return train_loss_list, train_acc_list, test_loss_list, test_acc_list
def plot(train_loss_list, train_acc_list, test_loss_list, test_acc_list, cfg_idx): """store the plots""" # epoch_list = list(range(len(loss_list))) plt.plot(train_loss_list, '-b', label='train loss') plt.plot(test_loss_list, '-r', label='test loss') plt.legend() plt.ylabel('Loss Function') plt.xlabel('Epoch') plt.xticks(rotation=60) plt.title('Loss Function ~ Epoch') plt.savefig('assets/loss_{}.png'.format(cfg_idx)) plt.show() plt.plot(train_acc_list, '-b', label='train acc') plt.plot(test_acc_list, '-r', label='test acc') plt.legend() plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.xticks(rotation=60) plt.title('Accuracy ~ Epoch') plt.savefig('assets/accr_{}.png'.format(cfg_idx)) plt.show()
def main(cfg_idx): # cfg_idx = args.config cfg_name = 'config_{}.json'.format(cfg_idx) hyperpara = loadConfig(cfg_name)
# setting the random seed numpy.random.seed(1024)
# initialize the parameters num_inputs = x_train.shape[1] num_classes = len(set(y_train)) param = initialize(num_inputs,num_classes)
# train the model train_loss_list, train_acc_list, test_loss_list, test_acc_list = train(param,hyperpara,x_train,y_train,x_test,y_test, cfg_idx)
# plot the loss and accuracy plot(train_loss_list, train_acc_list, test_loss_list, test_acc_list, cfg_idx)
Step by Step Solution
There are 3 Steps involved in it
Get step-by-step solutions from verified subject matter experts
