#! /usr/bin/env python # -*- coding: iso-8859-15 -*- """Artificial neural networks for Orange. Orange module to add artificial neural networks as learning algorithms using calls to SNNS software. Version: 0.99 (working but leaks memory on every network training due to importing-unimporting modules refinements are needed) SNNS randomness agrees with Orange behaviour on randomness: http://www.ailab.si/orange/doc/reference/random.htm In spite of the communicating media with SNNS being files, this code is supposed to be reentrant. Any way, as some of the temporal files are named by the module, but created by SNNS, there is a really extremely small chance of files becoming corrupted and breaking. Don't worry you would probably win the lotto and hang a windows program a billion times before this happens. TO DO: marked XXX in code, check removal of all tmp files, error handling in system calls, error handling when SNNS fails, Copyright (C) 2005-2006 Antonio Arauzo Azofra This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """ # From std python import os import string import sys import tempfile import random from itertools import izip # From orange import orange import statc # Should be set to the path where binaries of SNNS tools are # located, if they are not in system path pathSNNS = "" # example: #pathSNNS = "~/SNNSv4.2/tools/bin/i686-pc-linux-gnu/" # If messages should be printed verbose = False __module_names_freed__ = [] def _argmax(array): """ _argmax returns the position of the maximun value of an array """ return max(izip(array,xrange(len(array))))[1] class Transform: def __init__(self, table, alpha=0.1, beta=0.9): """ Prepares transformation of data for neural network * discrete to N features in {alpha, beta} * continuous to [alpha, beta] Details of the transformation performed (transform): [(Continuous=True, slope, pos),(Continuous=False, no.values)] y = slope*x + pos Notes: Destination is not an orange.Example because we can not use more than one class feature in Orange Destination domain is formed by nnAntecedent values followed by nnTargets (following original order in each subgroup) """ self.transform = [] self.alpha = alpha self.beta = beta self.domain = table.domain # Previous domain self.nnAntecedents = 0 # Transformed domain self.nnTargets = 0 # Prepares transformation of data basicAttrStat = orange.DomainBasicAttrStat(table) for i in range( len(table.domain.variables) ): # Continuous if self.domain[i].varType == orange.VarTypes.Continuous: varRange = (basicAttrStat[i].max - basicAttrStat[i].min) if varRange == 0.0: slope = 1.0 # Unique value else: slope = float(beta-alpha) / varRange pos = alpha - (slope * basicAttrStat[i].min) self.transform.append( (True, slope, pos) ) if i != self.domain.variables.index(self.domain.classVar): self.nnAntecedents += 1 else: self.nnTargets +=1 # Discrete else: nValues = len( self.domain[i].values ) self.transform.append( (False, nValues) ) if i != self.domain.variables.index(self.domain.classVar): self.nnAntecedents += nValues else: self.nnTargets += nValues def apply(self, example): """ Applies a defined trasformation over an example Returns: a list with the result """ rtn = [] # Antecedents for i in range( len(example) ): if i != self.domain.variables.index(self.domain.classVar): # Continuous if self.transform[i][0]: if example[i].value in ['?', '~', '.']: rtn.append(0.5) # NULL values (uses average of [0,1]) XXX else: rtn.append( example[i]*self.transform[i][1] + self.transform[i][2] ) # Discrete else: for j in range(self.transform[i][1]): if self.domain[i].values[j] == example[i]: rtn.append(self.beta) else: rtn.append(self.alpha) # Target (Class or goal attribute) i = self.domain.variables.index(self.domain.classVar) # Continuous if self.transform[i][0]: if example[i].value in ['?', '~', '.']: rtn.append(0.5) # NULL values (uses average of [0,1]) XXX else: rtn.append( example[i]*self.transform[i][1] + self.transform[i][2] ) # Discrete else: for j in range(self.transform[i][1]): if self.domain[i].values[j] == example[i]: rtn.append(self.beta) else: rtn.append(self.alpha) return rtn def applyInverseToTarget(self, target): """ From a NN output get the class by: majority criterion, or denormalizing in continuous cases. Returns: orange.Value with the class """ i = self.domain.variables.index(self.domain.classVar) if self.transform[i][0]: # continuous trTarget = (target[0] - self.transform[i][2]) / self.transform[i][1] rtn = orange.Value(self.domain.classVar, trTarget) else: # discrete rtn = orange.Value(self.domain.classVar, _argmax(out) ) #XXX would not be nice if this worked in Orange: # domain[i].values[_argmax(out)] return rtn def __str__(self): t = ' ' + str(MSE) + ' and SIGNAL == 0 do\n' # nu += 'if CYCLES mod 10 == 0 then\n' # nu += 'print ("cycles = ", CYCLES, " SSE = ", SSE, " MSE = ",MSE) endif\n' nu += 'trainNet()\nendwhile\n' nu += 'if SIGNAL !=0 then print("Stopped due to signal reception: signal " + SIGNAL)\nendif' nu += '\nsaveNet("'+nnFileName+'")\n' batchmanScriptFile.write(nu) batchmanScriptFile.close() # Train the NN if verbose: orden = pathSNNS + "batchman -f " + batchmanScriptFileName else: orden = pathSNNS + "batchman -q -f " + batchmanScriptFileName os.system(orden) # Remove tmp file os.remove(batchmanScriptFileName) def trainAutoNN(nnFileName, trainFileName, testFileName, MSE, cycles, nRepeat, step, algorithm, learningParams): """ Trains a neural network using batchman. Uses test data to evaluate the training state and select the best neural network. Bad accuracy (not used) """ # Open tmp file for the script try: fd, batchmanScriptFileName = tempfile.mkstemp() batchmanScriptFile = os.fdopen(fd, 'w') except IOError: print 'Error: Couldn\'t create temp file.' sys.exit(0) # Create script batchman nu = 'net = "' + nnFileName + '"\n' nu += 'loadNet(net)\n' nu += 'trainPat = "' + trainFileName + '"\n' nu += 'testPat = "' + testFileName + '"\n' nu += 'loadPattern(trainPat)\n' nu += 'loadPattern(testPat)\n' nu += 'setInitFunc("Randomize_Weights", 1.0, -1.0)\n' nu += 'setLearnFunc("' + algorithm + '"' +\ string.join(["," + p for p in learningParams], sep="") + ')\n' nu += 'setShuffle(TRUE)\n' nu += 'mejor = 100000000 #Valor grande para representar +infinito\n' nu += 'for i:=1 to ' + str(nRepeat) + ' do\n' if verbose: nu += ' print(" --- ", i)\n' nu += ' initNet()\n' nu += ' while CYCLES < ' + str(cycles) + ' and MSE > ' + str(MSE) + ' and SIGNAL == 0 do\n' nu += ' setPattern(trainPat)\n' nu += ' for k:= 1 to ' + str(step) + ' do\n' nu += ' trainNet()\n' nu += ' endfor\n' nu += ' setPattern(testPat)\n' nu += ' testNet()\n' if verbose: nu += ' print("MSE =", MSE, "ciclos:", CYCLES)\n' nu += ' if MSE < mejor then\n' nu += ' mejor = MSE\n' nu += ' saveNet(net)\n' if verbose: nu += ' print(CYCLES, ": ", MSE, "(mejor MSE)")\n' nu += ' endif\n' nu += ' endwhile\n' nu += 'endfor\n' if verbose: nu += 'print("Mejor MSE(", net, ")= ", mejor)\n' nu += 'if SIGNAL !=0 then print("Stopped due to signal reception: signal " + SIGNAL)\nendif' batchmanScriptFile.write(nu) batchmanScriptFile.close() # Train the NN if verbose: orden = pathSNNS + "batchman -f " + batchmanScriptFileName else: orden = pathSNNS + "batchman -q -f " + batchmanScriptFileName os.system(orden) # Remove tmp file os.remove(batchmanScriptFileName) #print "BATCHMAN:", batchmanScriptFileName def guessTrainParameters(nnFileName, trainFileName, testFileName, MSE, cycles, nRepeat, step, algorithm, learningParams): """ By a series of tests choose the number of cycles to train a neural network. """ # Open tmp file for the script try: fd, batchmanScriptFileName = tempfile.mkstemp() batchmanScriptFile = os.fdopen(fd, 'w') except IOError: print 'Error: Couldn\'t create temp file.' sys.exit(0) # Create script batchman nu = 'net = "' + nnFileName + '"\n' nu += 'loadNet(net)\n' nu += 'trainPat = "' + trainFileName + '"\n' nu += 'testPat = "' + testFileName + '"\n' nu += 'loadPattern(trainPat)\n' nu += 'loadPattern(testPat)\n' nu += 'setInitFunc("Randomize_Weights", 1.0, -1.0)\n' nu += 'setLearnFunc("' + algorithm + '"' +\ string.join(["," + p for p in learningParams], sep="") + ')\n' nu += 'setShuffle(TRUE)\n' nu += 'for i:=1 to ' + str(nRepeat) + ' do\n' nu += ' mejor = 100000000 #Valor grande para representar +infinito\n' nu += ' mejorCycles = 0\n' nu += ' print(" --- ", i)\n' nu += ' initNet()\n' nu += ' while CYCLES < ' + str(cycles) + ' and MSE > ' + str(MSE) + ' and SIGNAL == 0 do\n' nu += ' setPattern(trainPat)\n' nu += ' for k:= 1 to ' + str(step) + ' do\n' nu += ' trainNet()\n' nu += ' endfor\n' nu += ' setPattern(testPat)\n' nu += ' testNet()\n' nu += ' print("MSE =", MSE, "ciclos:", CYCLES)\n' nu += ' if MSE < mejor then\n' nu += ' mejor = MSE\n' nu += ' mejorCycles = CYCLES\n' nu += ' print(CYCLES, ": ", MSE, "(mejor MSE)")\n' nu += ' endif\n' nu += ' endwhile\n' nu += ' print("SetCycles=", mejorCycles)\n' nu += 'endfor\n' nu += 'print("Mejor MSE(", net, ")= ", mejor)\n' nu += 'if SIGNAL !=0 then print("Stopped due to signal reception: signal " + SIGNAL)\nendif' batchmanScriptFile.write(nu) batchmanScriptFile.close() # Train the NN if verbose: orden = pathSNNS + "batchman -f " + batchmanScriptFileName else: orden = pathSNNS + "batchman -q -f " + batchmanScriptFileName inout = os.popen2(orden) cycles = [] l = inout[1].readline() while l: if l[0:10] == "SetCycles=": cycles.append( int(l[10:]) ) #print l, l = inout[1].readline() if verbose: print "cycles=", cycles # Remove tmp file os.remove(batchmanScriptFileName) return int( statc.mean(cycles) ) def nn2module(nnFileName, nOutputs): """ Creates a new python module with a funcion "eval" that calls the trained artificial neural network Caller is responsible for deleting module file and directory. Returns: the complete pathname of the new module """ # Changing False for the commented code solves memory leak but # introduces errors, as it uses previous modules if False: #__module_names_freed__: modulePath, moduleBaseName = __module_names_freed__.pop() else: # Creates a tmp directory and module name to create module modulePath = tempfile.mkdtemp(prefix="oSNNSmod") moduleBaseName = "tmpOrngSNNSMod" + str( random.randint(0,1000000) ) while moduleBaseName in sys.modules: moduleBaseName = "tmpOrngSNNSMod" + str( random.randint(0,1000000000000000) ) moduleName = os.path.join(modulePath, moduleBaseName) # Generates NN C code using SNNS cFileName = nnFileName.replace(".net", ".c") if verbose: orden = pathSNNS + "snns2c " + nnFileName + " " + cFileName + " runNN" else: orden = pathSNNS + "snns2c " + nnFileName + " " + cFileName + " runNN &>/dev/null" os.system(orden) # Gets NN C code cFile = open(cFileName) cCode = cFile.read() cFile.close() os.remove(cFileName) os.remove(cFileName.replace(".c", ".h")) # Creates python module with NN code head = "#define NOUTPUTS " + str(nOutputs) + "\n" head += "#include \n" pyInterface = """ /* Automatically generated code by OrangeSNNS This code is by no means an example of good error handling. The messages printed are aimed to help detecting errors while programming. As these errors are not supposed posible to happen they are not handled propertly (no execptions are raised neither program is stopped). */ static PyObject * """ + moduleBaseName + """_eval(PyObject *self, PyObject *args) { PyObject * inputs; int i, l; float * inputsC = NULL; float * outputsC = NULL; PyObject * rtn = NULL; if (!PyArg_ParseTuple(args, "O", &inputs)) return NULL; if (!PySequence_Check(inputs)) fprintf(stderr, "Sequence required in call to NN"); else { l = PySequence_Length(inputs); inputsC = (float *) malloc( l*sizeof(float) ); if (inputsC != NULL) { for (i=0; i < l; i++) { PyObject * f = PySequence_GetItem(inputs, i); if (!PyFloat_Check(f)) if (!PyInt_Check(f)) fprintf(stderr, "Non-float/int input in call to NN"); else inputsC[i] = (float) PyInt_AS_LONG(f); else inputsC[i] = (float) PyFloat_AsDouble(f); Py_DECREF(f); } l = NOUTPUTS; outputsC = (float *) malloc( l*sizeof(float) ); if (outputsC != NULL) { // Call to NN code runNN(inputsC, outputsC, 0); rtn = PyTuple_New(l); if (rtn != NULL) { for (i=0; i < l; i++) { PyObject * f = PyFloat_FromDouble( outputsC[i] ); if (f != NULL) { if ( PyTuple_SetItem(rtn, i, f) ) fprintf(stderr, "Error setting tuple element in NN call"); } else fprintf(stderr, "Unable to create PyFloat from NN return"); } } else fprintf(stderr, "Unable to create tuple calling NN"); } else fprintf(stderr, "Out of memory calling NN"); } else fprintf(stderr, "Out of memory calling NN"); } free(inputsC); free(outputsC); return rtn; // Py_BuildValue("i", sts); } static PyMethodDef orangeSnnsTmpMethods[] = { {"eval", """ + moduleBaseName + """_eval, METH_VARARGS, "Execute a nn."}, {NULL, NULL, 0, NULL} /* Sentinel */ }; PyMODINIT_FUNC init""" + moduleBaseName + """(void) { (void) Py_InitModule(\"""" + moduleBaseName + """\", orangeSnnsTmpMethods); } """ fd, srcFileName = tempfile.mkstemp(prefix="src", suffix=".c") srcFile = os.fdopen(fd, "w") srcFile.write(head) srcFile.write(cCode) srcFile.write(pyInterface) srcFile.close() # Creates setup.py to compile fd, setupFileName = tempfile.mkstemp(prefix="setup", suffix=".py") setupFile = os.fdopen(fd, "w") setupFile.write( """ from distutils.core import setup, Extension module1 = Extension('""" + moduleBaseName + """', sources = ['""" + srcFileName + """']) setup (name = 'OrangeSNNStmp', version = '1.0', description = 'Function that calls a trained NN', ext_modules = [module1]) """) setupFile.close() # Compile buildDirTmp = tempfile.mkdtemp() if buildDirTmp: if verbose: os.system("python " + setupFileName + " build -b " + buildDirTmp + \ " install --install-lib " + modulePath) else: os.system("python " + setupFileName + " --quiet build -b " + buildDirTmp + \ " install --install-lib " + modulePath) os.system("rm -r " + buildDirTmp) os.remove(srcFileName) os.remove(setupFileName) return moduleName # --------------Learner classes (orange integration)------------------ def SNNSLearner(examples=None, **kwds): learner = SNNSLearner_Class(*(), **kwds) if examples: return learner(examples) else: return learner class SNNSLearner_Class: """ Artificial Neural Network(ANN) learner class that uses SNNS to create and train the ANN. """ def __init__(self, name='SNNS neural network', hiddenLayers=None, MSE=0, cycles=200, auto=False, nRepeat=3, step=50, percentTrain=0.90, algorithm=None, learningParams=None): """ Initializes a new neural network learner, defining the structure of the networks and training parameters. By now the structure is a multilayered perceptron name = learner name hiddenLayers = a list with the number of nodes of each hidden layer MSE = stop training if mse is smaller than this value cycles = stop training after this number of cycles auto = Whether trainNN (False) or trainAutoNN (True) is used nRepeat = if auto, the number of times the net is trained step = if auto, the number of cycles between one test and the next one percentTrain = if auto, the proportion of patterns used for training algorithm = name of training algorithm as identified in SNNS learningParams = list of strings with the parameters as in SNNS """ self.name = name self.hiddenLayers = hiddenLayers self.MSE = MSE self.cycles = cycles self.auto = auto self.nRepeat = nRepeat self.step = step self.percentTrain = percentTrain if algorithm: self.algorithm = algorithm else: self.algorithm = "Std_Backpropagation" if learningParams: self.learningParams = learningParams else: self.learningParams = [] def __call__(self, t, weight=None): patFileName, transform = savePatFile(t) # If input has no feature with values return a Majority classifier if transform.nnAntecedents < 1: return orange.MajorityLearner(t) if not self.hiddenLayers: self.hiddenLayers = [ (transform.nnAntecedents + transform.nnTargets)/2 ] nnFN = createNN(transform.nnAntecedents, self.hiddenLayers, transform.nnTargets) if self.auto: selection = orange.MakeRandomIndices2(t, self.percentTrain) trnPatFileName, transform = savePatFile( t.select(selection, 0) ) testPatFileName, ignore = savePatFile( t.select(selection, 1) ) cycles = guessTrainParameters(nnFN, trnPatFileName, testPatFileName, self.MSE, self.cycles, self.nRepeat, self.step, self.algorithm, self.learningParams) else: cycles = self.cycles trainNN(nnFN, patFileName, self.MSE, cycles, self.algorithm, self.learningParams) moduleName = nn2module(nnFN, transform.nnTargets) os.remove(patFileName) os.remove(nnFN) if self.auto: os.remove(trnPatFileName) os.remove(testPatFileName) # This self.domain seems needed by orgnFSS.FilteredClassifier # orange bug or misfeature?? XXX report # domain = t.domain # Note: -this is used to know which atts are being used by the learner # -Could be used to check that every example agrees on domain (maybe # not efficient) return SNNSClassifier(moduleName, transform=transform, domain = t.domain) class SNNSClassifier: def __init__(self, moduleName, transform, name=None, **kwds): self.__dict__ = kwds if not name: self.name="snns" self.moduleName = moduleName self.transform = transform modulePath, moduleBaseName = os.path.split(self.moduleName) sys.path.insert(0, modulePath) self.nnModule = __import__(moduleBaseName, globals(), locals()) del sys.path[0] def __del__(self): # Free module, it is unimported, beaware! modulePath, moduleBaseName = os.path.split(self.moduleName) del sys.modules[moduleBaseName] del self.nnModule os.remove(self.moduleName + ".so") dir, filen = os.path.split(self.moduleName) os.rmdir(dir) __module_names_freed__.append( (modulePath, moduleBaseName) ) def __call__(self, exampleOfAnySize, resultType = orange.GetValue): # Need to perform feature filtering because # IMHO this should be the duty of orngFSS.FilteredClassifier.__call__ # to achieve transparency of FS in learning methods XXX report # Workaround to avoid the problem that appears in examples # with less features: workaround_domain = orange.Domain([a.name for a in self.transform.domain], exampleOfAnySize.domain) example = orange.Example(workaround_domain, exampleOfAnySize) exTr = self.transform.apply(example) output = self.nnModule.eval(exTr) v = self.transform.applyInverseToTarget(output) if resultType == orange.GetValue: return v elif resultType == orange.GetProbabilities: return output else: return (v,output) # --- main - test ---------------------------------------------- # (Unsorted tests used for development) def printMemoryUse(): rgc = gc.collect() f = open("/proc/" + str(os.getpid()) + "/statm") totalMemorySize = f.readline().split()[0] f.close() print "Mem. used:", totalMemorySize, "(gc:", rgc, ")" if __name__ == "__main__": import orngTest, orngStat import gc, os, re, fileinput import table, table2orange f = sys.argv[1] # Get table if re.match('(.*\.tab)$|(.*\.txt)$', f): data = orange.ExampleTable(f) else: infile = fileinput.input(f) tt = table.Format().getTableReader().read(infile) infile.close() data = table2orange.table2ExampleTable(tt) snns0 = SNNSLearner(name="snns", cycles=10000) regresor=snns0(data) sse = 0.0 print "Results (test)" for e in data: print e, "->", regresor(e) sse += (e['y'] - regresor(e))**2 print "MSE=", sse / len(data) ## for i in range(10): ## printMemoryUse() ## del cl ## cl=snns0(data) ## #cl(data[0]) sys.exit() snns1 = SNNSLearner(name="snns0.2", auto=True, cycles=100, step=10, learningParams=["0.2"]) snns2 = SNNSLearner(cycles=2000, learningParams=["0.2"]) snns3 = SNNSLearner(cycles=2500, learningParams=["0.2"]) learners = [snns1]#, snns1,snns2,snns3] ## for lp in [ ["0.2"], ["0.1"] ]: ## for cycles in [50]: ## for mode in [True, False]: ## learners.append( SNNSLearner(name="snns"+lp[0]+"-"+str(cycles)+str(mode), auto=mode, cycles=cycles, step=10, learningParams=lp) ) # compute accuracies on data results = orngTest.crossValidation(learners, data, folds=5) # Print results if data.domain.classVar.varType == orange.VarTypes.Continuous: print "\nLearner MSE SE #Atts SE" for i in range(len(learners)): mse, se = complete.MSE_se(results, reportSE=1)[i] print "%-15s %6.2f %5.3f" % (learners[i].name, mse, se) else: print "\nLearner Accuracy SE #Atts SE" for i in range(len(learners)): ca, se = orngStat.CA_se(results)[i] ca, se = ca * 100, se * 100 print "%-15s %6.2f %5.3f" % (learners[i].name, ca, se) ## classifier = snnsLearner(data) ## # compute classification accuracy ## correct = 0.0 ## for ex in data: ## if classifier(ex) == ex.getclass(): ## correct += 1 ## print "Classification accuracy:", correct/len(data) ## for example in t: ## print example, ## print "->", classifier(example) # Remove the classifier before 'os' module in order to be able to # delete module file XXX TODO: revise this now it is disabled # del classifier