in the following code i need to loop through al thee <li> elements under ul and checking them if the <a> anchor tag is present in that <li> tag as per the xpath mentioned. But the spider stops crawling after only one <li> tag. Don't understand what's wrong.
Here is the spider: Enter code here...import scrapy from shinjukuproject.items import HandlingStoreInfo from w3lib.html import remove_tags class ShijukuHandlingStores(scrapy.Spider): name = "singlepagestores" start_urls = [ 'https://suumo.jp/chintai/tokyo/sc_shinjuku/jnc_000013357603/'] def parse(self, response): for li in response.xpath('/html/body/div[5]/div[6]/ul//li'): hsurl = li.xpath( './/div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-object"]/div[@class="itemcassette_img"]/div[@class="itemcassette_img-desc"]/a/@href' ).extract_first() if(hsurl): item = HandlingStoreInfo() item['Room_ID'] = response.xpath( '/html/head/link[@rel="canonical"]/@href').re('\w+\_\d+') request = scrapy.Request(response.urljoin(hsurl), callback= self.parse_storeinfo, dont_filter = True) request.meta['item'] = item return request else: item = HandlingStoreInfo() item['Room_ID'] = response.xpath( '/html/head/link[@rel="canonical"]/@href').re('\w+\_\d+') hsn = li.xpath( '//div[@class="itemcassette"]/div[@class="itemcassette-header"]/span[@class="itemcassette-header-ttl"]/text()' ).extract_first('Null').strip() item['Handling_Store_Name'] = remove_tags(hsn) item['Handling_Store_id'] = item['Room_ID'] item['Location'] = li.xpath( '//div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-contents"]/div[@class="itemcassette_matrix"]/div[@class="itemcassette_matrix-cell01"]/text()' ).extract_first('Null').strip() item['Transportation_Facilities'] = "N/A" contact = li.xpath( '//div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-contents"]/div[@class="itemcassette_matrix"]/div[@class="itemcassette_matrix-cell04"]/span/text()' ).re('\d+\-\d+\-\d+') item['Contact'] = remove_tags(contact) item['Fax'] = "N/A" bh = li.xpath( '//div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-contents"]/div[@class="itemcassette_matrix"]/div[@class="itemcassette_matrix-cell02"]/text()' ).extract_first(' ').strip() item['Buisiness_Hours'] = remove_tags(bh) rh = li.xpath( '//div[@class="itemcassette"]/div[@class="itemcassette-body"]/div[@class="itemcassette-body-contents"]/div[@class="itemcassette_matrix"]/div[@class="itemcassette_matrix-cell03"]/text()' ).extract_first(' ').strip() item['Regular_Holidays'] = remove_tags(rh) item['License_Number'] = "N/A" item['Store_Characteristics'] = "N/A" return item def parse_storeinfo(self, response): item = response.meta['item'] item['Handling_Store_Name'] = response.css('html body.chintai.ch_leaf div#wrapper div#contents.ch-shdt h1::text'). extract_first(' ').strip() item['Handling_Store_id'] = response.css('html head link[rel=canonical]::attr(href)').re('\w+\_\d+\_\d+') item['Location'] = response.css('div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(1) td:nth-of-type(1)::text').extract_first(' ').strip() item['Transportation_Facilities'] = response.css('div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(1) td:nth-of-type(2) ul li::text').extract_first() item['Contact'] = response.css('html body.chintai.ch_leaf div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(2) td:nth-of-type(1) span.col-notice em::text').extract_first (' ').strip() item['Fax'] = response.css('html body.chintai.ch_leaf div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(2) td:nth-of-type(2)::text').re('\d+\-\d+\-\d+') bh = response.css('html body.chintai.ch_leaf div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(3) td:nth-of-type(1)::text').extract_first(' ').strip() item['Buisiness_Hours'] = remove_tags(bh) rh = response.css('html body.chintai.ch_leaf div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(3) td:nth-of-type(2)::text').extract_first(' ').strip() item['Regular_Holidays'] = remove_tags(rh) item['License_Number'] = response.css('html body.chintai.ch_leaf div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(4) td:nth-of-type(2)::text').extract_first(' ').strip() item['Store_Characteristics'] = response.css('html body.chintai.ch_leaf div#wrapper div#contents.ch-shdt div.section table.data_table.table_gaiyou tr:nth-of-type(5) td:nth-of-type(1)::text'). extract_first(' ').strip() return item -- You received this message because you are subscribed to the Google Groups "scrapy-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to scrapy-users+unsubscr...@googlegroups.com. To post to this group, send email to scrapy-users@googlegroups.com. Visit this group at https://groups.google.com/group/scrapy-users. For more options, visit https://groups.google.com/d/optout.