I am having a problem while trying to click "Next" on a hyperlink using Selenium Webdriver. I tried using CssSelector as well as XPath and nothing seem to work. Its not looping as expected. Please any one guide to crawl all page jobs.
I have posted my spider code and output from this link : http://ge.tt/4nr7Q372 Platform : Scrapy + selenium + Python Please let me know is there any correction needed? Thanks Advance. HTML code: <div id="win0divHRS_APPL_WRK_HRS_LST_NEXT"> <span class="PSHYPERLINK" title="Next In List"> <a id="HRS_APPL_WRK_HRS_LST_NEXT" class="PSHYPERLINK" href="javascript:submitAction_win0(document.win0,'HRS_APPL_WRK_HRS_LST_NEXT');" tabindex="74" ptlinktgt="pt_replace" name="HRS_APPL_WRK_HRS_LST_NEXT">Next</a> </span> </div> Spider code: driver = webdriver.Firefox() def parse(self,response): self.driver.get('https://eapplicant.northshore.org/psc/psapp/EMPLOYEE/HRMS/c/HRS_HRAM.HRS_CE.GBL') done = False while not done: selector = Selector(text=self.driver.page_source) try: next = self.driver.find_element_by_xpath('//*[@id="HRS_APPL_WRK_HRS_LST_NEXT"]') except: done = True try: links = [] for link in selector.css('span.PSEDITBOX_DISPONLY').re('.*>(\d+)<.*'): abc = 'https://eapplicant.northshore.org/psp/psapp/EMPLOYEE/HRMS/c/HRS_HRAM.HRS_CE.GBL?Page=HRS_CE_JOB_DTL&Action=A&JobOpeningId='+link+'&SiteId=1&PostingSeq=1' yield Request(abc,callback=self.PS_Form, headers={"X-Requested-With": "XMLHttpRequest"}, dont_filter=True) if not done: next.click() except: break #self.driver.close() def PS_Form(self,response): selector = Selector(response) url = selector.xpath('//*[@id="ptifrmtgtframe"]/@src').extract()[0] yield Request(url,callback=self.parse_listing_page, headers={"X-Requested-With": "XMLHttpRequest"}, dont_filter=True) def parse_listing_page(self,response): selector = Selector(response) item=northshoreSpiderItem() print response item['CompanyName'] = "NorthShore University Health System" item ['JobDetailUrl'] = response.url item ['Title'] = selector.xpath('//*[@id="HRS_JO_WRK_POSTING_TITLE$0"]/text()').extract() item ['Internaljobid'] = selector.xpath(".//*[@id='HRS_JO_WRK_HRS_JOB_OPENING_ID$0']/text()").extract() item ['City'] = selector.xpath(".//*[@id='HRS_CE_WRK2_HRS_CE_JO_LCTNS$0']/text()").re('(.*?)\,.*') item ['State'] = selector.xpath(".//*[@id='HRS_CE_WRK2_HRS_CE_JO_LCTNS$0']/text()").re('.*\, (.*?) .*') item ['PositionType'] = selector.xpath(".//*[@id='HRS_CE_WRK2_HRS_FULL_PART_TIME$0']/text()").extract() item ['Country'] = "US" item ['Country'] = "US" item['Zipcode'] = "00000" Description = selector.xpath('//div//p//span//text()').extract() item['Description'] = [d.encode('UTF-8') for d in Description] yield item Thanks Advance -- You received this message because you are subscribed to the Google Groups "scrapy-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To post to this group, send email to [email protected]. Visit this group at http://groups.google.com/group/scrapy-users. For more options, visit https://groups.google.com/d/optout.
