Here is the scrapy code i am using. The for loop needs to go through all <li> tags. And check that specific xpath for the presence of the anchor tag. But the loop stops at the first iteration of the for loop.
import scrapy from shinjukuproject.items import HandlingStoreInfo from w3lib.html import remove_tags class ShijukuHandlingStores(scrapy.Spider): name = "singlepagestores" start_urls = [ 'https://suumo.jp/chintai/tokyo/sc_shinjuku/jnc_000013357603/'] def parse(self, response): for li in response.xpath('/html/body/div[5]/div[6]/ul//li'): hsurl = li.xpath( './/div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-object"]/div[@class="itemcassette_img"]/div[@class="itemcassette_img-desc"]/a/@href' ).extract_first() if(hsurl): item = HandlingStoreInfo() item['Room_ID'] = response.xpath( '/html/head/link[@rel="canonical"]/@href').re('\w+\_\d+') request = scrapy.Request(response.urljoin(hsurl), callback= self.parse_storeinfo, dont_filter = True) request.meta['item'] = item return request else: item = HandlingStoreInfo() item['Room_ID'] = response.xpath( '/html/head/link[@rel="canonical"]/@href').re('\w+\_\d+') hsn = li.xpath( '//div[@class="itemcassette"]/div[@class="itemcassette-header"]/span[@class="itemcassette-header-ttl"]/text()' ).extract_first('Null').strip() item['Handling_Store_Name'] = remove_tags(hsn) item['Handling_Store_id'] = item['Room_ID'] item['Location'] = li.xpath( '//div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-contents"]/div[@class="itemcassette_matrix"]/div[@class="itemcassette_matrix-cell01"]/text()' ).extract_first('Null').strip() item['Transportation_Facilities'] = "N/A" contact = li.xpath( '//div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-contents"]/div[@class="itemcassette_matrix"]/div[@class="itemcassette_matrix-cell04"]/span/text()' ).re('\d+\-\d+\-\d+') item['Contact'] = remove_tags(contact) item['Fax'] = "N/A" bh = li.xpath( '//div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-contents"]/div[@class="itemcassette_matrix"]/div[@class="itemcassette_matrix-cell02"]/text()' ).extract_first(' ').strip() item['Buisiness_Hours'] = remove_tags(bh) rh = li.xpath( '//div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-contents"]/div[@class="itemcassette_matrix"]/div[@class="itemcassette_matrix-cell03"]/text()' ).extract_first(' ').strip() item['Regular_Holidays'] = remove_tags(rh) item['License_Number'] = "N/A" item['Store_Characteristics'] = "N/A" return item def parse_storeinfo(self, response): item = response.meta['item'] item['Handling_Store_Name'] = response.css('html body.chintai.ch_leaf div#wrapper div#contents.ch-shdt h1::text'). extract_first(' ').strip() item['Handling_Store_id'] = response.css('html head link[rel=canonical]::attr(href)').re('\w+\_\d+\_\d+') item['Location'] = response.css('div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(1) td:nth-of-type(1)::text').extract_first(' ').strip() item['Transportation_Facilities'] = response.css('div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(1) td:nth-of-type(2) ul li::text').extract_first() item['Contact'] = response.css('html body.chintai.ch_leaf div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(2) td:nth-of-type(1) span.col-notice em::text').extract_first (' ').strip() item['Fax'] = response.css('html body.chintai.ch_leaf div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(2) td:nth-of-type(2)::text').re('\d+\-\d+\-\d+') bh = response.css('html body.chintai.ch_leaf div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(3) td:nth-of-type(1)::text').extract_first(' ').strip() item['Buisiness_Hours'] = remove_tags(bh) rh = response.css('html body.chintai.ch_leaf div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(3) td:nth-of-type(2)::text').extract_first(' ').strip() item['Regular_Holidays'] = remove_tags(rh) item['License_Number'] = response.css('html body.chintai.ch_leaf div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(4) td:nth-of-type(2)::text').extract_first(' ').strip() item['Store_Characteristics'] = response.css('html body.chintai.ch_leaf div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(5) td:nth-of-type(1)::text'). extract_first(' ').strip() return item -- You received this message because you are subscribed to the Google Groups "scrapy-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to scrapy-users+unsubscr...@googlegroups.com. To post to this group, send email to scrapy-users@googlegroups.com. Visit this group at https://groups.google.com/group/scrapy-users. For more options, visit https://groups.google.com/d/optout.