123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475 |
- # Improve performance of neural network
- import numpy as np
- from lab import neural, io_mnist
- from copy import copy
- def train(inputNetwork, learnRate, epochs, batchSize = 10):
- """
- Create an improved network
- inputNetwork : the network to be trained
- epochs : the number of iterations
- return : a trained copy with improved performance
- """
- net = copy(inputNetwork)
- np_images, np_expected = io_mnist.load_training_samples()
- nbSamples = np_images.shape[1]
- # Prepare variables
- a0 = np.empty((net.inputLength, batchSize))
- w1 = net.layer1 # reference
- b1 = np.stack([net.bias1] * batchSize).transpose() # stack
- z1 = np.empty((net.hiddenLength, batchSize))
- a1 = np.empty((net.hiddenLength, batchSize))
- w2 = net.layer2 # reference
- b2 = np.stack([net.bias2] * batchSize).transpose() # stack
- z2 = np.empty((net.outputLength, batchSize))
- a2 = np.empty((net.outputLength, batchSize))
-
- y = np.empty((net.outputLength, batchSize))
- g = net.activationFunction
- g_ = net.activationDerivative
- d2 = np.empty(a2.shape)
- d1 = np.empty(a1.shape)
- for epoch in range(epochs):
- # Create mini batches
- # TODO Shuffle samples
- # Iterate over batches
- for batchIndex in range(0, nbSamples, batchSize):
- # Capture batch
- batchEndIndex = batchIndex + batchSize
- a0 = np_images[:, batchIndex:batchEndIndex]
- y = np_expected[:, batchIndex:batchEndIndex]
- # Forward computation
- z1 = w1 @ a0 + b1
- a1 = g(z1)
- z2 = w2 @ a1 + b2
- a2 = g(z2)
- # Backward propagation
- d2 = a2 - y
- d1 = w2.transpose() @ d2 * g_(z1)
- # Weight correction
- net.layer2 -= learnRate * d2 @ a1.transpose() / batchSize
- net.layer1 -= learnRate * d1 @ a0.transpose() / batchSize
- net.bias2 -= learnRate * d2 @ np.ones(batchSize) / batchSize
- net.bias1 -= learnRate * d1 @ np.ones(batchSize) / batchSize
- return net
|