Hi All,
I have spider code inwhich i want to scrap All pages data.
But my data is extracting from second page, it is skip first page data.
please solve my problem.
my spider code is:


import re
import sys
from string import join
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import HtmlXPathSelector
from scrapy.http import Request
from ebay.items import EbayItem
from urlparse import urlparse
class InfojobsSpider(CrawlSpider):
name = "ebaytest"
allowed_domains = ["www.ebay.com"]
start_urls = [
"http://www.ebay.com/sch/m.html?_ssn=lugnutguys&_ipg=200&rt=nc";
]
rules = (
Rule(SgmlLinkExtractor(allow=(r'sch\/m\.html'),restrict_xpaths=('//a[@class="gspr
 
next"]')), callback='parse_item', follow=True),
)
#def parse_item(self, response):
items=[]
hxs = HtmlXPathSelector(response)
titles = hxs.select('//table[contains(@class,"li rsittlref")]//tr')
for titleurl in titles:
item= EbayItem()
item['titlename']=titleurl.select('td[@class="dtl 
dtlsp"]//h3/a/text()').extract()
item['titleurl']= titleurl.select('td[@class="dtl 
dtlsp"]//h3/a/@href').extract()
testdata=item['titleurl']
for data2 in testdata:
preg=re.search('.+\/(.*?)\?',data2)
if preg:
item['upc']=preg.group(1)
else:
print "not found"
item['price']=titleurl.select('td[@class="prc"]//div[@class="g-b"]/span/text()').extract()
items.append(item) 
return items
parse_start_urls=parse_item

-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to scrapy-users+unsubscr...@googlegroups.com.
To post to this group, send email to scrapy-users@googlegroups.com.
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.

Reply via email to