# -*- coding: utf-8 -*- """ Iris dataset, pure Python, Fully Connected sequential Neural Network classification @author: Gabriel Turinici """ import numpy as np from sklearn import datasets from sklearn.utils import shuffle import matplotlib.pyplot as plt # Load the iris dataset iris = datasets.load_iris() # Shuffle the data and target data, target = shuffle(iris.data, iris.target, random_state=42) n_iris=data.shape[0] n_val=int(0.3*n_iris)# select 30% of them for validation n_train=n_iris-n_val#the rest for training X=data[:n_train,:] y=target[:n_train] Xval=data[n_train:,:] yval=target[n_train:] # tranforms y in vector of len=3 with 0/1 : hot encoding nb_classes = len(np.unique(target)) one_hot_targets = None#matrix of shape (150, 3) def softmax(x): x=x-np.max(x) return np.exp(x)/np.sum(np.exp(x)) #function to create a dense layer def create_dense_layer(n_input,n_output): weights=None# 2D numpy array of shape (n_output, n_input), entries are normal r.v. of mean 0, variance "1/n_input" biases=None #vector of shape (n_output,1), normal variables return weights, biases def ReLU(input_array): return np.maximum(0,input_array) dim1,dim2,dim3,dim4=4, 5, 7, 3 weights1, biases1 = create_dense_layer(dim1, dim2) weights2, biases2 = create_dense_layer(dim2,dim3) weights3, biases3 = create_dense_layer(dim3,dim4) losses=[] alpha=0.01# put alpha=0 to compare with no training # train loop n_iter=5000 for iter in range(n_iter): # alpha=0.1/(1+.01*iter)# this decay schedule seems ok sample_index=np.random.choice(n_train) Y0 = X[[sample_index],:].T # FORWARD STEP Y1tilde = None# dense layer 1 Y1 = None # activation 1: ReLU # dense layer 2 Y2tilde = None Y2 = None # activation 2: ReLU # dense layer 3 Y3tilde = None #final computations of the fwd step label=one_hot_targets[[sample_index],:].T q=softmax(Y3tilde) loss_val= -np.sum(label*np.log(q)) losses.append(loss_val) dY3tilde=None #use formula # gradient on parameters layer 3 dweights3 = None dbiases3 = None # gradient on values activation 2 dY2 = None dY2tilde= dY2.copy() dY2tilde[Y2tilde < 0] = 0 # gradient on parameters layer 2 dweights2 = None dbiases2 = None # gradient on values activation 1 dY1 = None dY1tilde= dY1.copy() dY1tilde[Y1tilde <= 0] = 0 # gradient on parameters layer 1 dweights1 = None dbiases1 = None # update weights and biases weights1 -= alpha*dweights1 biases1 -= alpha*dbiases1 weights2 -= alpha*dweights2 biases2 -= alpha*dbiases2 weights3 -= alpha*dweights3 biases3 -= alpha*dbiases3 #plt.loglog(losses) plt.plot(losses) plt.title('losses') plt.pause(0.1) #evaluate accuracy qresults=[] count=0 for nn in range(n_val): #propagate example nn Y0 = None Y1 = None# dense layer 1 Y2 = None# dense layer 2 q = softmax( None )# dense layer 3 qresults.append(q.copy()) if np.all([q[yval[nn],0] >= p for jj,p in enumerate(q[:,0]) if jj != yval[nn]]): count=count+1 print('accuracy=',np.round(100*count/n_val,2),"\b%") plt.hist(np.argmax(qresults,axis=1),alpha = 0.5) plt.hist(yval,alpha = 0.5) plt.legend(['predictions','true values']) plt.title('predictions')