python core.py Traceback (most recent call last): File "core.py", line 39, in <module> crawler = Crawler(settings) File "/home/hardik/Install/envs/webparse/local/lib/python2.7/site-packages/scrapy/crawler.py", line 32, in __init__ self.spidercls.update_settings(self.settings) AttributeError: 'Settings' object has no attribute 'update_settings'
Please tell me how to set Crawler settings -- You received this message because you are subscribed to the Google Groups "scrapy-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to scrapy-users+unsubscr...@googlegroups.com. To post to this group, send email to scrapy-users@googlegroups.com. Visit this group at http://groups.google.com/group/scrapy-users. For more options, visit https://groups.google.com/d/optout.
import logging import scrapy logger = logging.getLogger('mycustomlogger') # import the spiders you want to run from spiders.faballey import FaballeySpider from spiders.bewakoof import BewakoofSpider # scrapy api imports from scrapy import signals from twisted.internet import reactor from scrapy.crawler import Crawler, CrawlerProcess from scrapy.settings import Settings # list of crawlers TO_CRAWL = [FaballeySpider, BewakoofSpider] # crawlers that are running RUNNING_CRAWLERS = [] def spider_closing(spider): """ Activates on spider closed signal """ log.msg("Spider closed: %s" % spider, level=log.INFO) RUNNING_CRAWLERS.remove(spider) if not RUNNING_CRAWLERS: reactor.stop() # start logger # log.start(loglevel=log.DEBUG) # set up the crawler and start to crawl one spider at a time for spider in TO_CRAWL: settings = Settings() # crawl responsibly settings.set("USER_AGENT","Aniket Jagani (+http://aniketjagani.github.io)", priority='cmdline') crawler = Crawler(settings) crawler_obj = spider() RUNNING_CRAWLERS.append(crawler_obj) # stop reactor when spider closes crawler.signals.connect(spider_closing, signal=signals.spider_closed) crawler.configure() crawler.crawl(crawler_obj) crawler.start() # blocks process; so always keep as the last statement reactor.run()