Hello everyone, As I am new in python and I am facing this error from long 
time here I am attaching my code please go through it.

And the full error is:

File "/usr/bin/scrapy", line 9, in <module>
    load_entry_point('Scrapy==0.24.4', 'console_scripts', 'scrapy')()
  File "/usr/lib/pymodules/python2.7/scrapy/cmdline.py", line 143, in 
execute
    _run_print_help(parser, _run_command, cmd, args, opts)
  File "/usr/lib/pymodules/python2.7/scrapy/cmdline.py", line 89, in 
_run_print_help
    func(*a, **kw)
  File "/usr/lib/pymodules/python2.7/scrapy/cmdline.py", line 150, in 
_run_command
    cmd.run(args, opts)
  File "/usr/lib/pymodules/python2.7/scrapy/commands/crawl.py", line 58, in 
run
    spider = crawler.spiders.create(spname, **opts.spargs)
  File "/usr/lib/pymodules/python2.7/scrapy/spidermanager.py", line 48, in 
create
    return spcls(**spider_kwargs)
  File 
"/home/ubuntu/Desktop/python/timesdirectory/timesdirectory/spiders/seleniumtdurls.py",
 
line 22, in __init__
    self.driver = webdriver.Remote("http://127.0.0.1:4444/wd/hub";, 
webdriver.DesiredCapabilities.HTMLUNITWITHJS)
  File 
"/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py",
 
line 73, in __init__
    self.start_session(desired_capabilities, browser_profile)
  File 
"/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py",
 
line 121, in start_session
    'desiredCapabilities': desired_capabilities,
  File 
"/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py",
 
line 171, in execute
    response = self.command_executor.execute(driver_command, params)
  File 
"/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/remote_connection.py",
 
line 349, in execute
    return self._request(command_info[0], url, body=data)
  File 
"/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/remote_connection.py",
 
line 417, in _request
    resp = opener.open(request)
  File "/usr/lib/python2.7/urllib2.py", line 404, in open
    response = self._open(req, data)
  File "/usr/lib/python2.7/urllib2.py", line 422, in _open
    '_open', req)
  File "/usr/lib/python2.7/urllib2.py", line 382, in _call_chain
    result = func(*args)
  File "/usr/lib/python2.7/urllib2.py", line 1214, in http_open
    return self.do_open(httplib.HTTPConnection, req)
  File "/usr/lib/python2.7/urllib2.py", line 1184, in do_open
    raise URLError(err)
urllib2.URLError: <urlopen error [Errno 111] Connection refused>


   Hope for good response.
Thanks
Charu

-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.
# -*- coding: utf-8 -*-
import scrapy
import re
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import Selector
from scrapy.spider import BaseSpider

from timesdirectory.items import TimesdirectoryItem

from selenium import webdriver


class SeleniumtdurlsSpider(scrapy.Spider):
    name = "tdurls"
    allowed_domains = ["timesbusinessdirectory.com"]
    #start_urls = (
     #   'http://www.timesbusinessdirectory.com/',
    #)
    start_urls = ['http://www.timesbusinessdirectory.com/CompanyListings_MG.aspx?DirID=187&name=Company+Listings&mid=1276']

    def __init__(self):
        self.driver = webdriver.Remote("http://127.0.0.1:4444/wd/hub";, webdriver.DesiredCapabilities.HTMLUNITWITHJS)



    def parse(self, response):

        self.driver.get(response.url)
        self.driver.implicitly_wait(10)

        hxs = Selector(response)
        item = TimesdirectoryItem()
        finalurls = []

        while True:
            next = self.driver.find_element_by_xpath('.//table[@id="dgrdCompany"]/tr[12]/td/a')
            print "------------------next---------------",next

            try:
                next.click()
                item['page'] = response.url

                urls = self.driver.find_elements_by_xpath('.//table[@id="dgrdCompany"]/tr/td/table/tr/td[1]/table/tr/td/a')
                print "===============urls==============",urls

                for url in urls:
                    url = url.get_attribute("href")
                    print "........................url..................",url
                    finalurls.append(url)
                    item['urls'] = finalurls

            except:
                break

        self.driver.close()
        return item
        #pass
# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html

import scrapy
from scrapy.item import Item,Field


class TimesdirectoryItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    page = Field()
    urls = Field()


    pageurls = Field()
    title = Field()
    basic_info = Field()


    content_info = Field()
    categories = Field()
    pass

Reply via email to