import java.util.Random; // FFNN: Feed Foreward Neural Network // with one hidden layer and num_inp - num_hid - num_out structure // the activation function of the hidden layer is 'tansig' and that // of the output layer is 'purelin' or 'hardlim' // tansig(x) = 2/(1+exp(-2*x))-1 // logsig(x) = 1/(1+exp(-x)) // purelin(x) = x // hardlim(x) = 1, if n >= 0 // 0, otherwise public class FFNN { public final static int HARDLIM = 0; public final static int PURELIN = 1; public final static int TANSIG = 2; public final static int LOGSIG = 3; public static int out_act_fun = HARDLIM; // activation function of output layer public static float[][] tr_set; // the training set // the first num_inp elements of each row of the matrix // are inputs and the remainings are outputs float[][] ihw; // input to hidden layer weight matrix float[][] how; // hidden to output layer weight matrix float[] hbias; // bias vector of hidden layer float[] obias; // bias vector of output layer int num_inp; // number of neurons in the input layer int num_hid; // number of neurons in the hidden layer int num_out; // number of neurons in the output layer public float fitness; // greater fitness means better FFNN (i.e Individual) private static Random rng = new Random(); public FFNN(int inputs,int hiddens,int outputs){ num_inp = inputs; num_hid = hiddens; num_out = outputs; ihw = new float[num_inp][num_hid]; how = new float[num_hid][num_out]; hbias = new float[num_hid]; obias = new float[num_out]; // initialize weights and biases: // set all these weights to small random values within (-.5,+.5) for(int i=0; i < num_inp; i++) for(int j=0; j < num_hid; j++) ihw[i][j] = rng.nextFloat() - .5f; for(int i=0; i < num_hid; i++) for(int j=0; j < num_out; j++) how[i][j] = rng.nextFloat() - .5f; for(int i=0; i < num_hid; i++) hbias[i] = rng.nextFloat() - .5f; for(int i=0; i < num_out; i++) obias[i] = rng.nextFloat() - .5f; // Nguyen-Widrow initialization: float betta = .7f * (float) Math.pow(num_hid,1.0f/((float)num_inp)); for(int i=0; i < num_hid; i++){ float sum = 0.0f; for(int j=0; j < num_inp; j++) sum += ihw[j][i]*ihw[j][i]; sum = (float) Math.sqrt(sum); for(int j=0; j < num_inp; j++) ihw[j][i] = betta * ihw[j][i] / sum; } for(int i=0; i < num_hid; i++) hbias[i] = rng.nextFloat()*2*betta - betta; evalFitness(); } public FFNN(float[][] ih,float[][] ho,float[] hb,float[] ob){ num_inp = ih.length; num_hid = hb.length; num_out = ob.length; ihw = (float[][]) ih.clone(); how = (float[][]) ho.clone(); hbias = (float[]) hb.clone(); obias = (float[]) ob.clone(); evalFitness(); } public FFNN mutate(){ // mutate this FFNN and return the mutated individual: float[][] ih = (float[][]) ihw.clone(); float[][] ho = (float[][]) how.clone(); float[] hb = (float[]) hbias.clone(); float[] ob = (float[]) obias.clone(); boolean creep_mutation = rng.nextBoolean(); // if creep_mutation == true then use creep mutation, otherwise use normal mutation // mutate one element of ihw matrix: int i = rng.nextInt(num_inp); int j = rng.nextInt(num_hid); if( creep_mutation ) ih[i][j] = ih[i][j] + rng.nextFloat()*2.0f - 1.0f; else ih[i][j] = rng.nextFloat()*2.0f - 1.0f; // mutate one element of how matrix: i = rng.nextInt(num_hid); j = rng.nextInt(num_out); if( creep_mutation ) ho[i][j] = ho[i][j] + rng.nextFloat()*2.0f - 1.0f; else ho[i][j] = rng.nextFloat()*2.0f - 1.0f; // mutate one element of hbias vector: i = rng.nextInt(num_hid); if( creep_mutation ) hb[i] = hb[i] + rng.nextFloat()*2.0f - 1.0f; else hb[i] = rng.nextFloat()*2.0f - 1.0f; // mutate one element of obias vector: i = rng.nextInt(num_out); if( creep_mutation ) ob[i] = ob[i] + rng.nextFloat()*2.0f - 1.0f; else ob[i] = rng.nextFloat()*2.0f - 1.0f; return new FFNN(ih,ho,hb,ob); } public static FFNN xover_uniform(FFNN f,FFNN m){ // apply uniform cross over on f and m (as two parents) and return the child: float[][] ih = new float[f.num_inp][f.num_hid]; // input-hidden weight matrix of the child float[][] ho = new float[f.num_hid][f.num_out]; // hidden-output weight matrix of the child float[] hb = new float[f.num_hid]; // hidden layer bias vector of the child float[] ob = new float[f.num_out]; // output layer bias vector of the child boolean avr_xover = rng.nextBoolean(); // average cross-over // if avr_xover == false then apply normal list cross-over // otherwise apply average cross-over. // e.g.: average cross-over on {f0,f1,f2} and {m0,m1,m2} results this // offspring: { .5f0+.5m0, .5f1+.5m1, .5f2+.5m2 } if( avr_xover ){ for(int i=0; i < f.num_inp; i++) for(int j=0; j < f.num_hid; j++) ih[i][j] = (f.ihw[i][j] + m.ihw[i][j]) / 2.0f; for(int i=0; i < f.num_hid; i++) for(int j=0; j < f.num_out; j++) ho[i][j] = (f.how[i][j] + m.how[i][j]) / 2.0f; for(int i=0; i < f.num_hid; i++) hb[i] = (f.hbias[i] + m.hbias[i]) / 2.0f; for(int i=0; i < f.num_out; i++) ob[i] = (f.obias[i] + m.obias[i]) / 2.0f; }else{ // apply list cross-over: for(int i=0; i < f.num_inp; i++) for(int j=0; j < f.num_hid; j++) if( rng.nextBoolean() ) ih[i][j] = f.ihw[i][j]; else ih[i][j] = m.ihw[i][j]; for(int i=0; i < f.num_hid; i++) for(int j=0; j < f.num_out; j++) if( rng.nextBoolean() ) ho[i][j] = f.how[i][j]; else ho[i][j] = m.how[i][j]; for(int i=0; i < f.num_hid; i++) if( rng.nextBoolean() ) hb[i] = f.hbias[i]; else hb[i] = m.hbias[i]; for(int i=0; i < f.num_out; i++) if( rng.nextBoolean() ) ob[i] = f.obias[i]; else ob[i] = m.obias[i]; } return new FFNN(ih,ho,hb,ob); } public float[] eval(float[] inp){ // evaluate the output vector of net, if the input vector is inp: float[][] inpm = new float[1][]; // input matrix, has just one row inpm[0] = inp; float[][] h_in = NNUtils.multiply(inpm,ihw); // input of hidden layer neurons // add biases: for(int i=0; i < num_hid; i++) h_in[0][i] = h_in[0][i] + hbias[i]; // pass h_in from activation function of hidden layer: // tansig(x) = 2/(1+exp(-2*x))-1 for(int i=0; i < num_hid; i++) h_in[0][i] = 2.0f / (1.0f+(float)Math.exp(-2.0f*h_in[0][i])) - 1.0f; float[][] o_in = NNUtils.multiply(h_in,how); // add biases: for(int i=0; i < num_out; i++) o_in[0][i] = o_in[0][i] + obias[i]; // pass o_in from activation function of output layer: for(int i=0; i < num_out; i++) if( out_act_fun == HARDLIM ) o_in[0][i] = o_in[0][i] >= 0? 1.0f:0.0f; // 'hardlim' else if( out_act_fun == TANSIG ) o_in[0][i] = 2.0f / (1.0f+(float)Math.exp(-2.0f*o_in[0][i])) - 1.0f; else if( out_act_fun == LOGSIG ) o_in[0][i] = 1.0f / (1.0f+(float)Math.exp(-1.0f*o_in[0][i])); // else out_act_fun == PURELIN so everything is ok. return o_in[0]; } public float eval_error(){ // evaluate sum of square error of this FFNN on training set tr_set: float[] inp = new float[num_inp]; float sse = 0.0f; // sum of square error for(int i=0; i < tr_set.length; i++){ for(int j=0; j < num_inp; j++) inp[j] = tr_set[i][j]; float[] out = eval(inp); for(int j=0; j < num_out; j++) sse += (out[j] - tr_set[i][num_inp+j])*(out[j] - tr_set[i][num_inp+j]); } return sse; } public void evalFitness(){ // greater fitness means better Individual fitness = 1.0f / (1.0f + eval_error()); } public String toString(){ String s = num_inp + "-" + num_hid + "-" + num_out + " feed forward neural network with one hidden layer:"; s+="\n\ninput-hidden weight matrix:\n"; for(int i=0; i < ihw.length; i++) s += NNUtils.array2string(ihw[i])+"\n"; s+="\nhidden layer bias vector:\n"+NNUtils.array2string(hbias)+"\n"; s+="\nhidden-output weight matrix:\n"; for(int i=0; i < how.length; i++) s += NNUtils.array2string(how[i])+"\n"; s+="\noutput layer bias vector:\n"+NNUtils.array2string(obias)+"\n"; s+="\nsum of square errors = " + (1.0f/fitness - 1.0f); s+="\nfitness = " + fitness; return s; } public static void main(String[] args) { FFNN.out_act_fun = FFNN.PURELIN; FFNN.tr_set = NNUtils.read_training_set("c:/xor.txt"); // Random search to find weights for XOR: FFNN result = null; for(int i=0; i < 10000; i++){ System.out.println(i); FFNN net = new FFNN(3,2,1); if( net.fitness == 1.0f ){ result = net; break; } } if( result != null ){ System.out.println(result); System.out.println(NNUtils.array2string(result.eval(new float[]{.5f,1.5f}))); } } }