I wrote a python script using QtWebKit to get all page info including info generated by AJAX requests. I run the following code on CentOS Server and do the following settings:
> $ Xvfb :100 -screen 0 9000x9000x24 & export DISPLAY=:100 The following code worked, however, it could only get *one-screen* info of the web page, namely, getting different amount of info according to the screen resolution. I could only get part of the info of the webpage. I have tried using *selenium *and I can get all web info if I set large screen resolution using *Xvfb* . Please give me some tips about how to solve the problem and any manual for *QtWebKit *is also appreciated because I can't find more materials about it. And the following code can exit automatically after getting the work done. I can't find where's the bug…… Everytime I must use the command *kill* to terminate the script. Thanks anyway. The following is my code: #!/usr/bin/env python > > #coding: utf-8 > > >> import sys > > >> from PyQt4.QtCore import QUrl, SIGNAL, QSize > > from PyQt4.QtGui import QApplication > > from PyQt4.QtWebKit import QWebPage, QWebView > > >> >> class WebPage(QWebPage): > > > > def javaScriptConsoleMessage(self, message, lineNumber, sourceID): > > sys.stderr.write('Javascritp error at line number %d\n' % >> (lineNumber)) > > sys.stderr.write('%s\n' % (message, )) > > sys.stderr.write('Source ID: %s\n' % (sourceID, )) > > >> >> class Crawler(QApplication): > > > > def __init__(self, url): > > super(Crawler, self).__init__(sys.argv) > > > > self.url = url > > self.web_view = QWebView() > > self.web_page = WebPage() > > self.web_view.setPage(self.web_page) > > self.web_frame = self.web_page.currentFrame() > > >> self.qsize = QSize() > > self.qsize.setHeight(9000) > > self.qsize.setWidth(9000) > > >> > > # self.settings.setAttribute(QWebSettings.AutoLoadImages, False) > > # self.setttings.setAttribute(QWebSettings.PluginsEnabled, False) > > >> # self.setMaximumSize(10000, 10000) > > >> # >> self.web_page.setViewportSize(self.web_page.mainFrame().contentsSize()) > > >> print 'Before connecting' > > self.connect(self.web_view, SIGNAL('loadFinished(bool)'), >> self.loadFinished) > > print 'After connecting' > > >> print 'Before loading' > > self.web_frame.load(QUrl(self.url)) > > print 'After loading' > > > > def loadFinished(self, ok): > > print 'In callback, before writing' > > with open('jd.txt', 'ab+') as fp: > > fp.write(self.web_page.currentFrame().toHtml().toUtf8()) > > print 'In callback, after writing' > > >> >> if __name__ == '__main__': > > url = 'http://www.360buy.com/product/729487.html' > > crawler = Crawler(url) > > sys.exit(crawler.exec_()) > > > -- 宠辱不惊,闲看庭前花开花落;去留无意,漫随天边云卷云舒。
_______________________________________________ PyQt mailing list PyQt@riverbankcomputing.com http://www.riverbankcomputing.com/mailman/listinfo/pyqt