Ver código fonte

Implement training

DricomDragon 5 anos atrás
pai
commit
1d5db18dc3
1 arquivos alterados com 60 adições e 5 exclusões
  1. 60 5
      python/lab/trainer.py

+ 60 - 5
python/lab/trainer.py

@@ -1,9 +1,10 @@
 # Improve performance of neural network
 
-from lab import neural
+import numpy as np
+from lab import neural, io_mnist
 from copy import copy
 
-def train(inputNetwork, learnRate, epochs):
+def train(inputNetwork, learnRate, epochs, batchSize = 10):
 	"""
 	Create an improved network
 
@@ -12,9 +13,63 @@ def train(inputNetwork, learnRate, epochs):
 
 	return : a trained copy with improved performance
 	"""
-	outputNetwork = copy(inputNetwork)
+	net = copy(inputNetwork)
 
-	# TODO Training
+	np_images, np_expected = io_mnist.load_training_samples()
 
-	return outputNetwork
+	nbSamples = np_images.shape[1]
+
+	# Prepare variables
+	a0 = np.empty((net.inputLength, batchSize))
+
+	w1 = net.layer1 # reference
+	b1 =  np.stack([net.bias1] * batchSize).transpose() # stack
+
+	z1 = np.empty(net.hiddenLength, batchSize)
+	a1 = np.empty(net.hiddenLength, batchSize) 
+
+	w2 = net.layer2 # reference
+	b2 = np.stack([net.bias2] * batchSize).transpose() # stack
+
+	z2 = np.empty(net.outputLength, batchSize)
+	a2 = np.empty(net.outputLength, batchSize) 
+	
+	y = np.empty((net.outputLength, batchSize))
+
+	g = net.activationFunction
+	g_ = net.activationDerivative
+
+	d2 = np.empty(a2.shape)
+	d1 = np.empty(a1.shape)
+
+	for epoch in range(epochs):
+		# Create mini batches
+		# TODO Shuffle samples
+
+		# Iterate over batches
+		for batchIndex in range(0, nbSamples, batchSize):
+			# Capture batch
+			batchEndIndex = batchIndex + batchSize
+			a0 = np_images[:, batchIndex:batchEndIndex]
+			y = np_expected[:, batchIndex:batchEndIndex]
+
+			# Forward computation
+			z1 = w1 @ a0 + b1
+			a1 = g(z1)
+
+			z2 = w2 @ a1 + b2
+			a2 = g(z2)
+
+			# Backward propagation
+			d2 = a2 - y
+			d1 = w2.transpose() @ d2 * g_(z1)
+
+			# Weight correction
+			net.layer2 -= learnRate * d2 @ a1.transpose() / batchSize
+			net.layer1 -= learnRate * d1 @ a0.transpose() / batchSize
+
+			net.bias2 -= learnRate * d2 @ np.ones(batchSize) / batchSize
+			net.bias1 -= learnRate * d1 @ np.ones(batchSize) / batchSize
+
+	return net