Hello - Below is a simple script to grab annotation data from R packages and dump it into a Python dictionary, where all sorts of interesting things can be done.
You end up with the data structures: probes[] holds the probeIDs dict[] holds the annotation data for each probe e.g.: dict['GO'][0] contains GO info the the first probe The dict can be filled with all possible annotations from the package. Nothing special, but pretty useful for downstream analysis of probe ids, and as far as I am concerned, Python is the place to do it - not R. Cheers- Tom > #! /usr/bin/env python > > # > # annotate.py > # > # does gene annotation automagically for a list of probes > # > # stores all the info in a Python dictionary > # > # > > import sys > import getopt > import string > > ################################### > # likely to be modified > > NoInfoString = "No Info" # whatever > > def pprint ( pfunc, adata ): > # can switch here for different pretty prints for > # various annotations > if pfunc == "GO" and adata != NoInfoString: > print "\t", pfunc, "=" > for goId in adata: > print "\t\t", goId, adata[goId]['Ontology'] > else: > print "\t", pfunc, "=", adata > > def filter ( pfunc, flist ): > # can switch here to filter various annotations > # some of the things returns for the annonations > # packages may be screwy > if pfunc == "GO" or pfunc == "SUMFUNC" or \ > pfunc == "PATH": > for i in range( len(flist)): > if flist[i] == -2147483648: > flist[i] = NoInfoString > > # these members of the packages are ignored > # mainly non-Probe functions > nonProbeFuncs = [ "CHRLENGTHS", "ENZYME2PROBE", > "GO2ALLPROBES", "GO2PROBE", > "LOCUSID", # deprecated > "MAPCOUNTS", "ORGANISM", > "PATH2PROBE", "PFAM", > "PMID2PROBE", "PROSITE", "QC", "QCDATA" ] > > # > #################################### > > > def processPkgFunc ( list, pkg, func ): > data = [] > for i in list : > cmd = "r.get(\"" + i + "\", env = r." + pkg + func + ")" > data . append( eval ( cmd ) ) > return data > > def annoteDesired ( all, annotes, func ): > if all: > return 1 > for i in annotes: > if string . lower( func) == string . lower( i ) : > return 1 > return 0 > > > def usage ( ) : > print "Usage: " > print sys . argv [ 0 ], " [options] probeListFile(1 ID per line)" > print "\t does automatic annotations - stores info in dictionary" > print "\t options:" > print "\t -P pkgName R annotation package containing the > probe set[mouse4302]" > print "\t -a annoType print out specific > annotation(case-insensitive) per probe" > print "\t Example: \"-a genename\" prints the > probeID gene names" > print "\t NOTE: can be used multplie times in > cmdline" > print "\t -A print ALL annotations fpr each probe" > print "\t -l list available annotations" > > try: > opts, args = getopt.getopt(sys.argv[1:], "P:a:Avl") > except getopt.error, msg: > sys.stderr.write(sys.argv[0] + ': ' + str(msg) + '\n') > usage ( ) > sys . exit( -1 ) > > if len ( sys .argv ) == 1: > usage() > sys . exit(0) > > pkgName = "mouse4302" > annotes = [] > allAnnotes = 0 > listAnnotes = 0 > verbose = 0 > > for o, a in opts : > if o == '-P': > pkgName = a > if o == '-a': > annotes . append ( a ) > if o == 'v': > verbose = 1 > if o == '-A': > allAnnotes = 1 > if o == '-l': > listAnnotes = 1 > > if not len( pkgName ) : > print "Error: no R annotation probe set specified - exiting." > sys . exit( -1 ) > > from rpy import * > r . library( "annotate" ) > r . library( pkgName ) > > pkgFuncs = r . ls( "package:" + pkgName ) > # remove nonProbeFuncs > probeFuncs = [] > for func in pkgFuncs: > probeFunc = 1 > for npfunc in nonProbeFuncs: > if func[len(pkgName):] == npfunc: > probeFunc = 0 > if probeFunc and len( func ) > len(pkgName): > probeFuncs . append ( func[len(pkgName):] ) > > if listAnnotes: > for i in probeFuncs: > print i > sys . exit(0) > > # grab probe list > pFile = open ( sys . argv[-1], "r" ) > probes = [] > for line in pFile . readlines ( ) : > data = string . split ( line ) > probes . append ( data [ 0 ] ) > > # build annotation dict > dict = {} > for pfunc in probeFuncs: > if annoteDesired( allAnnotes, annotes, pfunc ): > if verbose: > print "getting", pfunc, "data." > data = processPkgFunc ( probes, pkgName, pfunc ) > filter ( pfunc, data ) > dict [ pfunc ] = data > > ######################################### > # > # At this point you have a dict filled with the requested > # annotation data for each probe > # e.g: > # dict["GO"][0] > # holds GO info (which is itself a dict of dict) for the first probe > # > ######################################### > > # prints > if allAnnotes or len( annotes ): > for i in range( len( probes)): > print probes[i] + ":" > for pfunc in probeFuncs: > if annoteDesired( allAnnotes, annotes, pfunc ): > pprint( pfunc, (dict[pfunc][i]) ) ------------------------------------------------------------------------- Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys - and earn cash http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV _______________________________________________ rpy-list mailing list rpy-list@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/rpy-list