Hi, i have a behavior i need to understand. My scrapy script request 53 urls (and i check on the webpages, there are 53 urls corresponding to my request) but it returns only 43 items scrapped.
if my code is: allowed_domains = ['vsetkyfirmy.sk'] start_urls = [ 'https://www.vsetkyfirmy.sk/autokempy/', ] rules = [ Rule( LinkExtractor( restrict_xpaths=(u'//*[text()[contains(., "Ďalšie")]]')), callback='parse_start_url', follow = True ) ] page_num = 1 counter = 1 def parse_start_url(self, response): urls = Selector(response).xpath('//td/a[contains(@id, "detaily")]/@href' ).extract() for u in urls: yield {'link' : u} it returns me correctly 53 urls but if my code is: allowed_domains = ['vsetkyfirmy.sk'] start_urls = [ 'https://www.vsetkyfirmy.sk/autokempy/', ] rules = [ Rule( LinkExtractor( restrict_xpaths=(u'//*[text()[contains(., "Ďalšie")]]')), callback='parse_start_url', follow = True ) ] page_num = 1 counter = 1 def parse_start_url(self, response): urls = Selector(response).xpath('//td/a[contains(@id, "detaily")]/@href' ).extract() for u in urls: yield scrapy.Request(u, callback=self.parse_company) def parse_company(self, response): job = Selector(response).xpath( '//body/div/table[2]/tbody/tr[3]/td[2]/a/text()').extract() name = Selector(response).xpath( '//body/div/table[1]/tbody/tr[1]/td[1]/h1/span/text()').extract() yield { "count" : self.counter, "job" : job, "company page url" : response.url, "company" : name, } self.counter = self.counter + 1 it returns me only 43. why ? thx -- You received this message because you are subscribed to the Google Groups "scrapy-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to scrapy-users+unsubscr...@googlegroups.com. To post to this group, send email to scrapy-users@googlegroups.com. Visit this group at https://groups.google.com/group/scrapy-users. For more options, visit https://groups.google.com/d/optout.