|
@@ -1,9 +1,10 @@
|
|
|
# Improve performance of neural network
|
|
|
|
|
|
-from lab import neural
|
|
|
+import numpy as np
|
|
|
+from lab import neural, io_mnist
|
|
|
from copy import copy
|
|
|
|
|
|
-def train(inputNetwork, learnRate, epochs):
|
|
|
+def train(inputNetwork, learnRate, epochs, batchSize = 10):
|
|
|
"""
|
|
|
Create an improved network
|
|
|
|
|
@@ -12,9 +13,63 @@ def train(inputNetwork, learnRate, epochs):
|
|
|
|
|
|
return : a trained copy with improved performance
|
|
|
"""
|
|
|
- outputNetwork = copy(inputNetwork)
|
|
|
+ net = copy(inputNetwork)
|
|
|
|
|
|
- # TODO Training
|
|
|
+ np_images, np_expected = io_mnist.load_training_samples()
|
|
|
|
|
|
- return outputNetwork
|
|
|
+ nbSamples = np_images.shape[1]
|
|
|
+
|
|
|
+ # Prepare variables
|
|
|
+ a0 = np.empty((net.inputLength, batchSize))
|
|
|
+
|
|
|
+ w1 = net.layer1 # reference
|
|
|
+ b1 = np.stack([net.bias1] * batchSize).transpose() # stack
|
|
|
+
|
|
|
+ z1 = np.empty(net.hiddenLength, batchSize)
|
|
|
+ a1 = np.empty(net.hiddenLength, batchSize)
|
|
|
+
|
|
|
+ w2 = net.layer2 # reference
|
|
|
+ b2 = np.stack([net.bias2] * batchSize).transpose() # stack
|
|
|
+
|
|
|
+ z2 = np.empty(net.outputLength, batchSize)
|
|
|
+ a2 = np.empty(net.outputLength, batchSize)
|
|
|
+
|
|
|
+ y = np.empty((net.outputLength, batchSize))
|
|
|
+
|
|
|
+ g = net.activationFunction
|
|
|
+ g_ = net.activationDerivative
|
|
|
+
|
|
|
+ d2 = np.empty(a2.shape)
|
|
|
+ d1 = np.empty(a1.shape)
|
|
|
+
|
|
|
+ for epoch in range(epochs):
|
|
|
+ # Create mini batches
|
|
|
+ # TODO Shuffle samples
|
|
|
+
|
|
|
+ # Iterate over batches
|
|
|
+ for batchIndex in range(0, nbSamples, batchSize):
|
|
|
+ # Capture batch
|
|
|
+ batchEndIndex = batchIndex + batchSize
|
|
|
+ a0 = np_images[:, batchIndex:batchEndIndex]
|
|
|
+ y = np_expected[:, batchIndex:batchEndIndex]
|
|
|
+
|
|
|
+ # Forward computation
|
|
|
+ z1 = w1 @ a0 + b1
|
|
|
+ a1 = g(z1)
|
|
|
+
|
|
|
+ z2 = w2 @ a1 + b2
|
|
|
+ a2 = g(z2)
|
|
|
+
|
|
|
+ # Backward propagation
|
|
|
+ d2 = a2 - y
|
|
|
+ d1 = w2.transpose() @ d2 * g_(z1)
|
|
|
+
|
|
|
+ # Weight correction
|
|
|
+ net.layer2 -= learnRate * d2 @ a1.transpose() / batchSize
|
|
|
+ net.layer1 -= learnRate * d1 @ a0.transpose() / batchSize
|
|
|
+
|
|
|
+ net.bias2 -= learnRate * d2 @ np.ones(batchSize) / batchSize
|
|
|
+ net.bias1 -= learnRate * d1 @ np.ones(batchSize) / batchSize
|
|
|
+
|
|
|
+ return net
|
|
|
|