/* * MultiLayerPerceptron.java * * This class is a simple implementation of a three layer (single hidden layer) * perceptron, (mostly) as shown in Chapter 20 * of Artificial Intelligence: A Modern Approach, 2nd Edition. * * Created on May 24, 2006, 8:04 PM */ /** * * @author Bryan Pardo */ //package pardo.learning; import java.util.*; import java.lang.*; import java.io.*; public class MultiLayerPerceptron implements Serializable { // weights between layers private double [][] weight_input2hidden; private double [][] weight_hidden2output; // numbers of elements in layers private int numberOfInputs; private int numberOfOutputs; private int numberOfHidden; /** Creates a new instance of MultiLayerPerceptron. The perceptron * is created to be fully connected between adjacent * layers. Connection weights are set randomly between -0.5 and * 0.5. * * @param numberofIn is the number of distinct input values each * training example will have * @param numberofHid is the number of hidden nodes to have * @param numberofOut is the number of output nodes to have */ public MultiLayerPerceptron(int numberOfIn, int numberOfHid, int numberOfOut) { numberOfInputs = numberOfIn; // we add one for the bias node numberOfHidden = numberOfHid;// we add one for the bias node numberOfOutputs = numberOfOut; weight_input2hidden = new double [numberOfInputs][numberOfHidden]; // weights from input to hidden layer weight_hidden2output = new double[numberOfHidden][numberOfOutputs]; // weights from hidden nodes to output layer // set the weights from the input layer to the hidden layer for (int k = 0; k < numberOfInputs; k++) for (int j = 0; j < numberOfHidden; j++) weight_input2hidden[k][j] = (- 0.5 + Math.random()); // set the weights from the hidden layer to the output layer for (int j = 0; j < numberOfHidden; j++) for (int i = 0; i < numberOfOutputs; i++) weight_hidden2output[j][i] = ( -0.5 + Math.random()); } /** This is a constructor that loads in a serialized MultiLayerPerceptron * from a file whose name and path are specified in the parameter * * @param fullFilePath This needs to be the full path name to the file * containing the serialized MultiLayerPerceptron object * * @throws IllegalArgumentException if we cant open a MultiLayerPerceptron * file by that name or if the file does not contain * a serialized MultiLayerPerceptron */ public MultiLayerPerceptron(String fullFilePath){ MultiLayerPerceptron mlp = null; try{ ObjectInputStream ois = new ObjectInputStream(new FileInputStream(fullFilePath)); mlp = (MultiLayerPerceptron)ois.readObject(); ois.close(); } catch (ClassNotFoundException cnfe){throw new IllegalArgumentException(fullFilePath + " does not contain a serialized MultiLayerPerceptron");} catch (FileNotFoundException fnfe){throw new IllegalArgumentException(fullFilePath + " not found");} catch (IOException e){throw new IllegalArgumentException(fullFilePath + " : IOexception");} // weights between layers this.weight_input2hidden = mlp.weight_input2hidden; this.weight_hidden2output = mlp.weight_hidden2output; // numbers of elements in layers this.numberOfInputs = mlp.numberOfInputs; this.numberOfOutputs = mlp.numberOfOutputs; this.numberOfHidden = mlp.numberOfHidden; } /** this returns the sigmoid function from figure 20.16 (page 738) * of Artificial Intelligence: A Modern Approach 2nd edition * * @param x a double * @returns the value 1/(1+e^-x) */ public double sigmoid(double x){ return 1.0 /(1.0 + Math.exp(-x)); } /** Do the back-prop-learning algorithm shown in figure 20.25 * (page 746) of Artificial Intelligence: A Modern Approach 2nd * edition . * * Starting with the output layer this computes the desired change * for the output units, using the difference between the desired * output and the current output of the network. It then * propagates these changes back through previous layers. It does * this once in each epoch. training continues until the desired * number of training epochs is completed * *@param inputAL a ArrayList of arrays of doubles. the ith array *in the ArrayList is the input in the ith training example * *@param desiredOutputAL a ArrayList of arrays of doubles. the ith *array in the ArrayList is the desired output of the ith training *example * *@param maxNumberOfEpochs says how many times the weights will be *updated in response * *@param learningRate is a value (should range from 0 to 1 ) that *speeds or slows learning to the training examples */ public void backprop(ArrayList inputAL, ArrayList desiredOutputAL, int maxNumberOfEpochs, double learningRate){ double input [] = new double[numberOfInputs]; double hidden [] = new double[numberOfHidden]; double output[] = new double [numberOfOutputs]; double desiredOutput [] = new double [numberOfOutputs]; double deltaOutput [] = new double [numberOfOutputs]; double deltaHidden [] = new double [numberOfHidden]; // this loop runs once per epoch ( an epoch is a presentation // of all the training examples to the learner) for (int epoch = 1; epoch <= maxNumberOfEpochs; epoch++){ // this loop runs once per training example for (int example = 0; example < inputAL.size(); example++){ // load the input example input = (double [])inputAL.get(example); // load the desired output desiredOutput = (double [])desiredOutputAL.get(example); // calulate the values of the hidden layer for(int h = 0; h < numberOfHidden; h++){ for (int i = 0; i < numberOfInputs; i++) hidden[h] = hidden[h]+input[i]*weight_input2hidden[i][h]; hidden[h] = sigmoid(hidden[h]); } // calulate the values of the output layer for(int u = 0; u < numberOfOutputs; u++){ for (int h = 0; h < numberOfHidden; h++) output[u] = output[u]+hidden[h]*weight_hidden2output[h][u]; output[u] = sigmoid(output[u]); } // for each output unit, calculate its error term for (int u = 0; u