Hi everyone, I have been developing my first python script and jumped straight into the world of twisted. I like the concept but I seem to be doing something very wrong. The script takes an NZB file and parses it (using HellaNZB's parser) and then makes a specified number of connections to the news server and checks that all articles are there using the STAT command. I also know nothing about newsservers or nntp so this script is a complete shot in the dark. First time with python, first time with twisted and first time with nntp seems to equal disaster!
The problem is that the script is very slow, even with 20 simultaneous connections to the internet I only produce 111k or so of traffic and only achieve 2 article checks per connection per second. On a 24meg connection I can download the whole thing quicker than checking it with my script! Any ideas why it is so slow? I have attached the script. Thanks for any help. Tom Greenwood
#!/usr/bin/python from twisted.internet import reactor from twisted.internet import protocol from NZBHellaParser import NZBParser, parseNZB from twisted.news.nntp import NNTPClient import sys,traceback,time results = None allClients = None class NNTPStatCheck(NNTPClient): "Class to check if the server has all the articles required using as little bandwidth as possible." nextMessageID = None def __init__(self, _username, _password, _newsgroup): NNTPClient.__init__(self) self.username = _username self.password = _password self.newsgroup = _newsgroup self.nextMessageID = 0 self.currMessageID = None #self.lastTime = 0 def getStat(self): #print "Stating for " + self.currMessageID #self.startTime = time.time() self.sendLine('STAT <%s>' % (self.currMessageID, )) self._newState(None, self.getStatFailed, self._stateStat) def connectionLost(self, error): NNTPClient.connectionLost(self) if self.nextMessageID >= len(results): self.factory.readyToStop = True; for client in allClients: if client.readyToStop == False: return reactor.stop() else: print "Unexpected Connection Loss, Should we try again?? " + str(self.nextMessageID) + "/" + str(len(results)) def connectionMade(self): NNTPClient.connectionMade(self) print 'Connection made, logging in..' self.authInfo() def gotGroup(self, group): #print "GOT GROUP " + str(group) + "\n" self.statNext() def gotStat(self, stat): #print "GOT STAT %s\n" % str(stat) results[self.currMessageID][self.newsgroup] = True #FIXME: Don't need to check the rest of the groups if we have it def _stateStat(self, (code, message)): #print str(allClients.index(self.factory)) + " " + str(time.time() - self.startTime) + " " + str(time.time() - self.lastTime) #self.lastTime = time.time() if code == 223: self.gotStat((code, message)) self._endState() self.tryNext() else: self.getStatFailed("%s %s" % (str(code) , str(message))) def statNext(self): if self.nextMessageID < len(results): self.currMessageID = results.keys()[self.nextMessageID] self.nextMessageID = (self.nextMessageID + 1) if results[self.currMessageID] [self.newsgroup] is None: self.getStat() else: self.nextMessageID = (self.nextMessageID + 1) self.statNext() else: # FIXME: Should now be fixed but sometimes the client got into an unknown state print "Finished, disconnecting from server." self.quit() def tryNext(self): if self.nextMessageID < len(results): self.fetchGroup(self.newsgroup) else: print "Finished, disconnecting from server." self.quit() def getStatFailed(self, error): print str(time.time() - self.startTime) try: (code, message) = error.split(" ", 1) except AttributeError: print 'Unknown reply structure: %s\n' % str(error) if code == "430": self.messageDoesNotExist(error) else: print 'Error occured after stat: %s\n' % str(error) results[self.currMessageID][self.newsgroup] = False self.tryNext() def messageDoesNotExist(self, error): print 'Message ' + self.currMessageID + ' does not exist in group ' + self.newsgroup + '\n' def authInfo(self): self.sendLine('AUTHINFO USER ' + self.username) self._newState(None, self.authInfoFailed, self._authInfoUserResponse) def _authInfoUserResponse(self, (code, message)): if code == 381: self.sendLine('AUTHINFO PASS ' + self.password) self._newState(None, self.authInfoFailed, self._authInfoPassResponse) else: self.authInfoFailed('%d %s' % (code, message)) self._endState() def _authInfoPassResponse(self, (code, message)): if code == 281: self.gotauthInfoOk('%d %s' % (code, message)) else: self.authInfoFailed('%d %s' % (code, message)) self._endState() def gotauthInfoOk(self, message): print 'Logged in, starting stat checks..' self.fetchGroup(self.newsgroup) def authInfoFailed(self, error): # FIXME: We use reactor.stop so we dont try and auth incorrectly for ever print 'Error occured whilst attempting auth: %s\n' % error reactor.stop() class NNTPVerifyFactory(protocol.ClientFactory): readyToStop = False def __init__(self, _username, _password, _newsgroup): self.username = _username self.password = _password self.newsgroup = _newsgroup def clientConnectionFailed(self, connector, reason): print 'Failed to connect using connector: ' + str(connector) + " because " + str(reason) reactor.stop() def buildProtocol(self, addr): p = NNTPStatCheck(self.username, self.password, self.newsgroup) p.factory = self return p def checkFoundArticle(groups): "If Article found returns group found, if not found returns false, if failed returns None" for group, value in groups.iteritems(): if value == True: return group elif value == False: return False; return None def createDictionaryFromList(list): retVal = {} for item in list: retVal[item] = None return retVal def calcConnections(numGroups,maxConn): if numGroups == 0: return 0 if maxConn == 0: return 0 if maxConn < numGroups: return 0 else: if (maxConn / numGroups) >= 1: return (maxConn / numGroups) else: return 1 def printUsage(): print "Usage: NZBVerify.py maxConnections nzbFile" print " e.g: NZBVerify.py 20 test.nzb" print "\n" print "NZBVerify will distribute the connections between the groups specified in the NZB" print "Your usenet provider will be able to tell you how many maximum connection you can use" print "The value of maxConnections must be at least equal to the number of groups" print "There is very little error handling so if you think its got stuck, it probably has!" def main(): global results, allClients results = {} allClients = [] if len(sys.argv) < 3: printUsage() return try: maxConns = int(sys.argv[1]) except ValueError: print "Please enter a valid integer for maxConnections" printUsage() return print "Max connections set to " + str(maxConns) print "Loading NZB File from " + sys.argv[2] try: groups, messageIDs = parseNZB(sys.argv[2]) except IOError: print "There was an IO error loading the NZB File. The error follows:\n" print traceback.print_exc() return print "Parsed NZB File with " + str(len(messageIDs)) + " articles from " + str(len(groups)) + " groups." connectionsPerGroup = calcConnections(len(groups),maxConns) print "Using " + str(connectionsPerGroup) + " connections per group." if connectionsPerGroup == 0: print "Finished. Can't do anything with no connections per group!" return for currMessageID in messageIDs: # FIXME: Need to check if we need to create a new Dictionary instance for each message or if python is clever enough to work it out itself. results[currMessageID] = createDictionaryFromList(groups) #FIXME: Can only have as many groups as connections for group in groups: for i in range(connectionsPerGroup): verifier = NNTPVerifyFactory("tgreenw...@pp.newsgroups.user", "tomwap1986", str(group)) allClients.append(verifier) reactor.connectTCP("us.usenet-news.net", 119, verifier) print "Connecting to server.." reactor.run() print "Got required data from server, analysing.." totalArticles = len(results) totalFound = 0 totalNotChecked = 0 for currMessageID, _groups in results.iteritems(): result = checkFoundArticle(_groups) if result is None: print "Article Not Checked: " + currMessageID totalNotChecked = (totalNotChecked + 1) elif result is False: print "Article Not Found: " + currMessageID else: print "Found Article: " + currMessageID + " in " + result totalFound = (totalFound + 1) print "Don't take the groups listed above as gospel, it only means that you newsserver said it had it on the server - it may not have cared that we asked for the article in a particular group!" print "All Finished." print "Found " + str(totalFound) + ", Not Found " + str(totalArticles - (totalFound+totalNotChecked)) + ", Not checked " + str(totalNotChecked) sys.exit(0) if __name__ == '__main__': main()
from xml.sax import make_parser from xml.sax.handler import ContentHandler, feature_external_ges, feature_namespaces def parseNZB(fileName): """ Initialize the queue from the specified nzb file """ # Create a parser parser = make_parser() # No XML namespaces here parser.setFeature(feature_namespaces, 0) parser.setFeature(feature_external_ges, 0) # Create the handler dh = NZBParser() # Tell the parser to use it parser.setContentHandler(dh) # Parse the input parser.parse(fileName) return (dh.groups, dh.queue) class NZBParser(ContentHandler): """ Parse an NZB 1.0 file into a list of msgids http://www.newzbin.com/DTD/nzb/nzb-1.0.dtd """ def __init__(self): # downloading queue to add NZB segments to self.queue = [] # nzb file to parse #self.nzb = nzb self.groups = [] # parsing variables self.file = None self.bytes = None self.number = None self.chars = None self.fileNeedsDownload = None self.fileCount = 0 self.segmentCount = 0 def startElement(self, name, attrs): if name == 'file': subject = self.parseUnicode(attrs.get('subject')) poster = self.parseUnicode(attrs.get('poster')) self.fileCount += 1 elif name == 'group': self.chars = [] elif name == 'segment': self.bytes = int(attrs.get('bytes')) self.number = int(attrs.get('number')) self.chars = [] def characters(self, content): if self.chars is not None: self.chars.append(content) def endElement(self, name): if name == 'file': self.file = None self.fileNeedsDownload = None elif name == 'group': newsgroup = self.parseUnicode(''.join(self.chars)) if newsgroup not in self.groups: self.groups.append(newsgroup) self.chars = None elif name == 'segment': self.segmentCount += 1 messageId = self.parseUnicode(''.join(self.chars)) self.queue.append(messageId) self.chars = None self.number = None self.bytes = None def parseUnicode(self, unicodeOrStr): if isinstance(unicodeOrStr, unicode): return unicodeOrStr.encode('latin-1') return unicodeOrStr
_______________________________________________ Twisted-Python mailing list Twisted-Python@twistedmatrix.com http://twistedmatrix.com/cgi-bin/mailman/listinfo/twisted-python