Hi you need define the next request. Because you only set the start_url, and don't set the next url.
Check the page 4 in this pdf https://media.readthedocs.org/pdf/scrapy/1.0/scrapy.pdf Regards El lunes, 30 de enero de 2017, 14:37:51 (UTC-5), bassam...@gmail.com escribió: > > Here is the scrapy code i am using. The for loop needs to go through all > <li> tags. And check that specific xpath for the presence of the anchor > tag. But the loop stops at the first iteration of the for loop. > > import scrapy > from shinjukuproject.items import HandlingStoreInfo > from w3lib.html import remove_tags > > class ShijukuHandlingStores(scrapy.Spider): > name = "singlepagestores" > start_urls = [' > https://suumo.jp/chintai/tokyo/sc_shinjuku/jnc_000013357603/'] > > > > > > def parse(self, response): > > for li in response.xpath('/html/body/div[5]/div[6]/ul//li'): > > > hsurl = li.xpath( > './/div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-object"]/div[@class="itemcassette_img"]/div[@class="itemcassette_img-desc"]/a/@href' > ).extract_first() > > if(hsurl): > item = HandlingStoreInfo() > item['Room_ID'] = response.xpath( > '/html/head/link[@rel="canonical"]/@href').re('\w+\_\d+') > request = scrapy.Request(response.urljoin(hsurl), callback > =self.parse_storeinfo, dont_filter = True) > request.meta['item'] = item > return request > else: > item = HandlingStoreInfo() > item['Room_ID'] = response.xpath( > '/html/head/link[@rel="canonical"]/@href').re('\w+\_\d+') > hsn = li.xpath( > '//div[@class="itemcassette"]/div[@class="itemcassette-header"]/span[@class="itemcassette-header-ttl"]/text()' > ).extract_first('Null').strip() > item['Handling_Store_Name'] = remove_tags(hsn) > item['Handling_Store_id'] = item['Room_ID'] > item['Location'] = li.xpath( > '//div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-contents"]/div[@class="itemcassette_matrix"]/div[@class="itemcassette_matrix-cell01"]/text()' > ).extract_first('Null').strip() > item['Transportation_Facilities'] = "N/A" > contact = li.xpath( > '//div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-contents"]/div[@class="itemcassette_matrix"]/div[@class="itemcassette_matrix-cell04"]/span/text()' > ).re('\d+\-\d+\-\d+') > item['Contact'] = remove_tags(contact) > item['Fax'] = "N/A" > bh = li.xpath( > '//div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-contents"]/div[@class="itemcassette_matrix"]/div[@class="itemcassette_matrix-cell02"]/text()' > ).extract_first(' ').strip() > item['Buisiness_Hours'] = remove_tags(bh) > rh = li.xpath( > '//div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-contents"]/div[@class="itemcassette_matrix"]/div[@class="itemcassette_matrix-cell03"]/text()' > ).extract_first(' ').strip() > item['Regular_Holidays'] = remove_tags(rh) > item['License_Number'] = "N/A" > item['Store_Characteristics'] = "N/A" > return item > > > def parse_storeinfo(self, response): > > > item = response.meta['item'] > item['Handling_Store_Name'] = response.css('html > body.chintai.ch_leaf div#wrapper div#contents.ch-shdt h1::text'). > extract_first(' ').strip() > item['Handling_Store_id'] = response.css('html head > link[rel=canonical]::attr(href)').re('\w+\_\d+\_\d+') > item['Location'] = response.css('div#wrapper div#contents.ch-shdt > div.section table.data_table.table_gaiyou tr:nth-of-type(1) > td:nth-of-type(1)::text').extract_first(' ').strip() > item['Transportation_Facilities'] = response.css('div#wrapper > div#contents.ch-shdt div.section table.data_table.table_gaiyou > tr:nth-of-type(1) td:nth-of-type(2) ul li::text').extract_first() > item['Contact'] = response.css('html body.chintai.ch_leaf > div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou > tr:nth-of-type(2) td:nth-of-type(1) span.col-notice em::text'). > extract_first(' ').strip() > item['Fax'] = response.css('html body.chintai.ch_leaf div#wrapper > div#contents.ch-shdt div.section table.data_table.table_gaiyou > tr:nth-of-type(2) td:nth-of-type(2)::text').re('\d+\-\d+\-\d+') > bh = response.css('html body.chintai.ch_leaf div#wrapper > div#contents.ch-shdt div.section table.data_table.table_gaiyou > tr:nth-of-type(3) td:nth-of-type(1)::text').extract_first(' ').strip() > item['Buisiness_Hours'] = remove_tags(bh) > rh = response.css('html body.chintai.ch_leaf div#wrapper > div#contents.ch-shdt div.section table.data_table.table_gaiyou > tr:nth-of-type(3) td:nth-of-type(2)::text').extract_first(' ').strip() > item['Regular_Holidays'] = remove_tags(rh) > item['License_Number'] = response.css('html body.chintai.ch_leaf > div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou > tr:nth-of-type(4) td:nth-of-type(2)::text').extract_first(' ').strip() > item['Store_Characteristics'] = response.css('html > body.chintai.ch_leaf div#wrapper div#contents.ch-shdt div.section > table.data_table.table_gaiyou tr:nth-of-type(5) td:nth-of-type(1)::text'). > extract_first(' ').strip() > return item > > > > > -- You received this message because you are subscribed to the Google Groups "scrapy-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to scrapy-users+unsubscr...@googlegroups.com. To post to this group, send email to scrapy-users@googlegroups.com. Visit this group at https://groups.google.com/group/scrapy-users. For more options, visit https://groups.google.com/d/optout.