python connection refused error in crawl scrapy with seleniumrc

charu awhad Wed, 07 Jan 2015 02:57:08 -0800

  
<http://stackoverflow.com/questions/27815829/python-connection-refused-error-in-crawl-scrapy-with-seleniumrc#>
 
  
I am new in scrapy,python. for dynamic loading I have used ajax call for 
crawl spider. The code I written is:


import scrapy

import re

from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor

from scrapy.selector import Selector

from scrapy.spider import BaseSpider

from streetdirectory.items import StreetdirectoryItem

from selenium import webdriver

class StdallurlsSpider(scrapy.Spider):

name = "stdallurls"

allowed_domains = ["streetdirectory.com/businessfinder/"]

start_urls = ['
http://www.streetdirectory.com/businessfinder/company/All/All/A/']

def __init__(self):
    self.driver = 
webdriver.Remote("http://127.0.0.1:4444/wd/hub",webdriver.DesiredCapabilities.HTMLUNITWITHJS)
def parse(self, response):

    self.driver.get(response.url)
    self.driver.implicitly_wait(10)

    hxs = Selector(response)
    item = StreetdirectoryItem
    finalurls = []

    while True:
        next = self.driver.find_element_by_xpath('.//span[@class="ver_11 
viewLink"]/a')
        print "-------------next------------",next


        try:
            next.click()
            item['page'] = response.url

            urls = self.driver.find_elements_by_xpath('.//h3[@class="fleft"]/a')
            print "===============urls============",urls

            for url in urls:
                url = url.get_attribute("href")
                print "...................url.......................",url
                finalurls.append(url)
                item['urls'] = finalurls

        except:
            break

self.driver.close()
    return item

my Items.py for this is:

import scrapy

from scrapy.item import Item,Field

class StreetdirectoryItem(scrapy.Item):

page = Field()

urls = Field()
pass

when I am trying to crawl it I got an error:

resp = opener.open(request)
  File "/usr/lib/python2.7/urllib2.py", line 404, in open
    response = self._open(req, data)
  File "/usr/lib/python2.7/urllib2.py", line 422, in _open
    '_open', req)
  File "/usr/lib/python2.7/urllib2.py", line 382, in _call_chain
    result = func(*args)
  File "/usr/lib/python2.7/urllib2.py", line 1214, in http_open
    return self.do_open(httplib.HTTPConnection, req)
  File "/usr/lib/python2.7/urllib2.py", line 1184, in do_open
    raise URLError(err)
urllib2.URLError: <urlopen error [Errno 111] Connection refused>

If anybody know the solution then can they let me know please.

#python #scrapy #ajax #seleniumrc #web crawler




-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.

python connection refused error in crawl scrapy with seleniumrc

Reply via email to