#! /usr/bin/env python # -*- coding: iso-8859-15 -*- """Artificial neural networks for Orange. Orange module to add artificial neural networks as learning algorithms using calls to SNNS software. Version: 1.09 (working but some more testing and refinements can improve it to version 1.10) SNNS randomness agrees with Orange behaviour on randomness: http://www.ailab.si/orange/doc/reference/random.htm In spite of the communicating media with SNNS being files, this code is supposed to be reentrant. Any way, as some of the temporal files are named by the module, but created by SNNS, there is a really extremely small chance of files becoming corrupted and breaking. Don't worry you would probably win the lotto and hang a windows program a billion times before this happens. TO DO: see marked XXX in code, error handling in system calls, error handling when SNNS fails, Copyright (C) 2005-2006 Antonio Arauzo Azofra This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """ # From std python import os, sys, string, re import tempfile, math, random from itertools import izip # From orange import orange, statc # Should be set to the path where binaries of SNNS tools are # located, if they are not in system path pathSNNS = "" # example: #pathSNNS = "~/SNNSv4.2/tools/bin/i686-pc-linux-gnu/" # If messages should be printed verbose = False def _argmax(array): """ _argmax returns the position of the maximun value of an array """ return max(izip(array,xrange(len(array))))[1] class Transform: def __init__(self, table, alpha=0.1, beta=0.9): """ Prepares transformation of data for neural network * discrete to N features in {alpha, beta} * continuous to [alpha, beta] Details of the transformation performed (transform): [(Continuous=True, slope, pos),(Continuous=False, no.values)] y = slope*x + pos Notes: Destination is not an orange.Example because we can not use more than one class feature in Orange Destination domain is formed by nnAntecedent values followed by nnTargets (following original order in each subgroup) """ self.transform = [] self.alpha = alpha self.beta = beta self.domain = table.domain # Previous domain self.nnAntecedents = 0 # Transformed domain self.nnTargets = 0 # Prepares transformation of data basicAttrStat = orange.DomainBasicAttrStat(table) for i in range( len(table.domain.variables) ): # Continuous if self.domain[i].varType == orange.VarTypes.Continuous: varRange = (basicAttrStat[i].max - basicAttrStat[i].min) if varRange == 0.0: slope = 1.0 # Unique value else: slope = float(beta-alpha) / varRange pos = alpha - (slope * basicAttrStat[i].min) self.transform.append( (True, slope, pos) ) if i != self.domain.variables.index(self.domain.classVar): self.nnAntecedents += 1 else: self.nnTargets +=1 # Discrete else: nValues = len( self.domain[i].values ) self.transform.append( (False, nValues) ) if i != self.domain.variables.index(self.domain.classVar): self.nnAntecedents += nValues else: self.nnTargets += nValues def apply(self, example): """ Applies a defined trasformation over an example Returns: a list with the result """ rtn = [] # Antecedents for i in range( len(example) ): if i != self.domain.variables.index(self.domain.classVar): # Continuous if self.transform[i][0]: if example[i].value in ['?', '~', '.']: rtn.append(0.5) # NULL values (uses average of [0,1]) XXX else: rtn.append( example[i]*self.transform[i][1] + self.transform[i][2] ) # Discrete else: for j in range(self.transform[i][1]): if self.domain[i].values[j] == example[i]: rtn.append(self.beta) else: rtn.append(self.alpha) # Target (Class or goal attribute) i = self.domain.variables.index(self.domain.classVar) # Continuous if self.transform[i][0]: if example[i].value in ['?', '~', '.']: rtn.append(0.5) # NULL values (uses average of [0,1]) XXX else: rtn.append( example[i]*self.transform[i][1] + self.transform[i][2] ) # Discrete else: for j in range(self.transform[i][1]): if self.domain[i].values[j] == example[i]: rtn.append(self.beta) else: rtn.append(self.alpha) return rtn def applyInverseToTarget(self, target): """ From a NN output get the class by: majority criterion, or denormalizing in continuous cases. Returns: orange.Value with the class """ i = self.domain.variables.index(self.domain.classVar) if self.transform[i][0]: # continuous trTarget = (target[0] - self.transform[i][2]) / self.transform[i][1] rtn = orange.Value(self.domain.classVar, trTarget) else: # discrete rtn = orange.Value(self.domain.classVar, _argmax(target) ) #XXX would not be nice if this worked in Orange: # domain[i].values[_argmax(out)] return rtn def __str__(self): t = ' ' + str(MSE) + ' and SIGNAL == 0 do\n' # nu += 'if CYCLES mod 10 == 0 then\n' # nu += 'print ("cycles = ", CYCLES, " SSE = ", SSE, " MSE = ",MSE) endif\n' nu += 'trainNet()\nendwhile\n' nu += 'if SIGNAL !=0 then print("Stopped due to signal reception: signal " + SIGNAL)\nendif' nu += '\nsaveNet("'+nnFileName+'")\n' batchmanScriptFile.write(nu) batchmanScriptFile.close() # Train the NN if verbose: orden = pathSNNS + "batchman -f " + batchmanScriptFileName else: orden = pathSNNS + "batchman -q -f " + batchmanScriptFileName os.system(orden) # Remove tmp file os.remove(batchmanScriptFileName) def trainAutoNN(nnFileName, trainFileName, testFileName, MSE, cycles, nRepeat, step, algorithm, learningParams): """ Trains a neural network using batchman. Uses test data to evaluate the training state and select the best neural network. Bad accuracy (not used) """ # Open tmp file for the script try: fd, batchmanScriptFileName = tempfile.mkstemp() batchmanScriptFile = os.fdopen(fd, 'w') except IOError: print 'Error: Couldn\'t create temp file.' sys.exit(0) # Create script batchman nu = 'net = "' + nnFileName + '"\n' nu += 'loadNet(net)\n' nu += 'trainPat = "' + trainFileName + '"\n' nu += 'testPat = "' + testFileName + '"\n' nu += 'loadPattern(trainPat)\n' nu += 'loadPattern(testPat)\n' nu += 'setInitFunc("Randomize_Weights", 1.0, -1.0)\n' nu += 'setLearnFunc("' + algorithm + '"' +\ string.join(["," + p for p in learningParams], sep="") + ')\n' nu += 'setShuffle(TRUE)\n' nu += 'mejor = 100000000 #Valor grande para representar +infinito\n' nu += 'for i:=1 to ' + str(nRepeat) + ' do\n' if verbose: nu += ' print(" --- ", i)\n' nu += ' initNet()\n' nu += ' while CYCLES < ' + str(cycles) + ' and MSE > ' + str(MSE) + ' and SIGNAL == 0 do\n' nu += ' setPattern(trainPat)\n' nu += ' for k:= 1 to ' + str(step) + ' do\n' nu += ' trainNet()\n' nu += ' endfor\n' nu += ' setPattern(testPat)\n' nu += ' testNet()\n' if verbose: nu += ' print("MSE =", MSE, "ciclos:", CYCLES)\n' nu += ' if MSE < mejor then\n' nu += ' mejor = MSE\n' nu += ' saveNet(net)\n' if verbose: nu += ' print(CYCLES, ": ", MSE, "(mejor MSE)")\n' nu += ' endif\n' nu += ' endwhile\n' nu += 'endfor\n' if verbose: nu += 'print("Mejor MSE(", net, ")= ", mejor)\n' nu += 'if SIGNAL !=0 then print("Stopped due to signal reception: signal " + SIGNAL)\nendif' batchmanScriptFile.write(nu) batchmanScriptFile.close() # Train the NN if verbose: orden = pathSNNS + "batchman -f " + batchmanScriptFileName else: orden = pathSNNS + "batchman -q -f " + batchmanScriptFileName os.system(orden) # Remove tmp file os.remove(batchmanScriptFileName) #print "BATCHMAN:", batchmanScriptFileName def guessTrainParameters(nnFileName, trainFileName, testFileName, MSE, cycles, nRepeat, step, algorithm, learningParams): """ By a series of tests choose the number of cycles to train a neural network. """ # Open tmp file for the script try: fd, batchmanScriptFileName = tempfile.mkstemp() batchmanScriptFile = os.fdopen(fd, 'w') except IOError: print 'Error: Couldn\'t create temp file.' sys.exit(0) # Create script batchman nu = 'net = "' + nnFileName + '"\n' nu += 'loadNet(net)\n' nu += 'trainPat = "' + trainFileName + '"\n' nu += 'testPat = "' + testFileName + '"\n' nu += 'loadPattern(trainPat)\n' nu += 'loadPattern(testPat)\n' nu += 'setInitFunc("Randomize_Weights", 1.0, -1.0)\n' nu += 'setLearnFunc("' + algorithm + '"' +\ string.join(["," + p for p in learningParams], sep="") + ')\n' nu += 'setShuffle(TRUE)\n' nu += 'for i:=1 to ' + str(nRepeat) + ' do\n' nu += ' mejor = 100000000 #Valor grande para representar +infinito\n' nu += ' mejorCycles = 0\n' nu += ' print(" --- ", i)\n' nu += ' initNet()\n' nu += ' while CYCLES < ' + str(cycles) + ' and MSE > ' + str(MSE) + ' and SIGNAL == 0 do\n' nu += ' setPattern(trainPat)\n' nu += ' for k:= 1 to ' + str(step) + ' do\n' nu += ' trainNet()\n' nu += ' endfor\n' nu += ' setPattern(testPat)\n' nu += ' testNet()\n' nu += ' print("MSE =", MSE, "ciclos:", CYCLES)\n' nu += ' if MSE < mejor then\n' nu += ' mejor = MSE\n' nu += ' mejorCycles = CYCLES\n' nu += ' print(CYCLES, ": ", MSE, "(mejor MSE)")\n' nu += ' endif\n' nu += ' endwhile\n' nu += ' print("SetCycles=", mejorCycles)\n' nu += 'endfor\n' nu += 'print("Mejor MSE(", net, ")= ", mejor)\n' nu += 'if SIGNAL !=0 then print("Stopped due to signal reception: signal " + SIGNAL)\nendif' batchmanScriptFile.write(nu) batchmanScriptFile.close() # Train the NN if verbose: orden = pathSNNS + "batchman -f " + batchmanScriptFileName else: orden = pathSNNS + "batchman -q -f " + batchmanScriptFileName inout = os.popen2(orden) cycles = [] l = inout[1].readline() while l: if l[0:10] == "SetCycles=": cycles.append( int(l[10:]) ) #print l, l = inout[1].readline() if verbose: print "cycles=", cycles # Remove tmp file os.remove(batchmanScriptFileName) return int( statc.mean(cycles) ) def extractWeights(nnFN): """ Extract weights and bias from a neural network trained file """ f = open(nnFN, 'r') aLine = f.readline() if aLine.find("SNNS network") == -1: raise Exception(nnFN + " is not an SNNS network") # Extract bias while not aLine.find("unit definition section") != -1: aLine = f.readline() dre = re.compile("\d") while not dre.search(aLine): aLine = f.readline() bias = [] while dre.search(aLine): tokens = re.split('\|', aLine) b = float( tokens[4] ) bias.append(b) aLine = f.readline() # Extract weights while not aLine.find("connection definition section") != -1: aLine = f.readline() dre = re.compile("\d") while not dre.search(aLine): aLine = f.readline() weightVectors = [] while dre.search(aLine): cols = re.split('\|', aLine) tokens = cols[2].strip() while tokens[-1] == ',': tokens += f.readline().strip() tokens = re.split('^[^:]*:|,[^:]*:', tokens) weights = [float(w) for w in tokens if w != ''] weightVectors.append(weights) aLine = f.readline() f.close() return (weightVectors, bias) # --------------Learner classes (orange integration)------------------ def SNNSLearner(examples=None, **kwds): learner = SNNSLearner_Class(*(), **kwds) if examples: return learner(examples) else: return learner class SNNSLearner_Class: """ Artificial Neural Network(ANN) learner class that uses SNNS to create and train the ANN. """ def __init__(self, name='SNNS neural network', hiddenLayers=None, MSE=0, cycles=200, auto=False, nRepeat=3, step=50, percentTrain=0.90, algorithm=None, learningParams=None): """ Initializes a new neural network learner, defining the structure of the networks and training parameters. By now the structure is a multilayered perceptron name = learner name hiddenLayers = a list with the number of nodes of each hidden layer MSE = stop training if mse is smaller than this value cycles = stop training after this number of cycles auto = Whether trainNN (False) or trainAutoNN (True) is used nRepeat = if auto, the number of times the net is trained step = if auto, the number of cycles between one test and the next one percentTrain = if auto, the proportion of patterns used for training algorithm = name of training algorithm as identified in SNNS learningParams = list of strings with the parameters as in SNNS """ self.name = name self.hiddenLayers = hiddenLayers self.MSE = MSE self.cycles = cycles self.auto = auto self.nRepeat = nRepeat self.step = step self.percentTrain = percentTrain if algorithm: self.algorithm = algorithm else: self.algorithm = "Std_Backpropagation" if learningParams: self.learningParams = learningParams else: self.learningParams = [] def __call__(self, t, weight=None): patFileName, transform = savePatFile(t) # If input has no feature with values return a Majority classifier if transform.nnAntecedents < 1: return orange.MajorityLearner(t) if not self.hiddenLayers: self.hiddenLayers = [ (transform.nnAntecedents + transform.nnTargets)/2 ] nnFN = createNN(transform.nnAntecedents, self.hiddenLayers, transform.nnTargets) if self.auto: selection = orange.MakeRandomIndices2(t, self.percentTrain) trnPatFileName, transform = savePatFile( t.select(selection, 0) ) testPatFileName, ignore = savePatFile( t.select(selection, 1) ) cycles = guessTrainParameters(nnFN, trnPatFileName, testPatFileName, self.MSE, self.cycles, self.nRepeat, self.step, self.algorithm, self.learningParams) else: cycles = self.cycles trainNN(nnFN, patFileName, self.MSE, cycles, self.algorithm, self.learningParams) # Extract info from nnFile weights, bias = extractWeights(nnFN) nn = {'in': transform.nnAntecedents, 'hidden': self.hiddenLayers, 'out': transform.nnTargets, 'weights': weights, 'bias': bias} os.remove(patFileName) os.remove(nnFN) if self.auto: os.remove(trnPatFileName) os.remove(testPatFileName) # This self.domain seems needed by orgnFSS.FilteredClassifier # orange bug or misfeature?? XXX report # domain = t.domain # Note: -this is used to know which atts are being used by the learner # -Could be used to check that every example agrees on domain (maybe # not efficient) return SNNSClassifier(nn=nn, transform=transform, domain = t.domain) class SNNSClassifier: def __init__(self, transform, name=None, **kwds): self.__dict__ = kwds if not name: self.name="snns" self.transform = transform def __call__(self, exampleOfAnySize, resultType = orange.GetValue): # Need to perform feature filtering because # IMHO this should be the duty of orngFSS.FilteredClassifier.__call__ # to achieve transparency of FS in learning methods XXX report # Workaround to avoid the problem that appears in examples # with less features: workaround_domain = orange.Domain([a.name for a in self.transform.domain], exampleOfAnySize.domain) example = orange.Example(workaround_domain, exampleOfAnySize) exTr = self.transform.apply(example) output = self.simulateNN(exTr[:self.transform.nnAntecedents]) v = self.transform.applyInverseToTarget(output) if resultType == orange.GetValue: return v elif resultType == orange.GetProbabilities: return output else: return (v,output) def __str__(self): t = '", regresor(e) ## snns1 = SNNSLearner(name="snns0.2", auto=True, cycles=100, step=10, learningParams=["0.2"]) ## snns2 = SNNSLearner(cycles=2000, learningParams=["0.2"]) ## snns3 = SNNSLearner(cycles=2500, learningParams=["0.2"]) ## learners = [snns1]#, snns1,snns2,snns3] ## # compute accuracies on data ## results = orngTest.crossValidation(learners, data, folds=5) ## # Print results ## if data.domain.classVar.varType == orange.VarTypes.Continuous: ## print "\nLearner MSE SE #Atts SE" ## for i in range(len(learners)): ## mse, se = complete.MSE_se(results, reportSE=1)[i] ## print "%-15s %6.2f %5.3f" % (learners[i].name, mse, se) ## else: ## print "\nLearner Accuracy SE #Atts SE" ## for i in range(len(learners)): ## ca, se = orngStat.CA_se(results)[i] ## ca, se = ca * 100, se * 100 ## print "%-15s %6.2f %5.3f" % (learners[i].name, ca, se)