I have the following code, which scraped the data perfectly: class DmozSpider(Spider): name = "dmoz" allowed_domains = ["reuters.com"] a = datetime.date(2014, 04, 01) b = datetime.date(2014, 04, 02) articles=ET.Element("articles")
urls=["http://www.reuters.com/resources/archive/us/" + dt.strftime("%Y") + dt.strftime("%m") + dt.strftime("%d")+".html" for dt in rrule(DAILY, dtstart=a, until=b)] def start_requests(self): date=" " for url in self.urls: yield Request(url=url,meta={'date':str(url)[-10:-4]},callback=self.parse) def parse(self, response): sel = Selector(response) sites = sel.xpath('//*[@id="content"]/div[2]/div/div/div[1]') passed_date=response.meta.get('date') items=[] for site in sites: item = DmozItem() item['title'] = site.xpath('.//div/a/text()').extract() item['link'] = site.xpath('.//a/@href').extract() item['time'] = site.xpath('.//div/text()').extract() item['date'] = passed_date items.append(item) return items I would like to store the items at the xml file with the following structure <root> <article_date>passed_date <article_time>item['time'] <article_name>item['title']</article_name> <article_link>item['link']</article_link> </article_time> </article_date></root> What I have tried to write (nothing writes to the selected file): import xml.etree.cElementTree as ET class TutorialPipeline(object): def __init__(self): dispatcher.connect(self.spider_opened, signals.spider_opened) dispatcher.connect(self.spider_closed, signals.spider_closed) self.files = {} self.exporters = {} def spider_opened(self, spider): file = open('~/Documents/test.xml', 'w+b') self.files[spider] = file self.exporters[spider] = XmlItemExporter(file) self.exporters[spider].start_exporting() def spider_closed(self, spider): self.exporters[spider].finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): self.exporters[spider].export_item(item) return item -- You received this message because you are subscribed to the Google Groups "scrapy-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to scrapy-users+unsubscr...@googlegroups.com. To post to this group, send email to scrapy-users@googlegroups.com. Visit this group at http://groups.google.com/group/scrapy-users. For more options, visit https://groups.google.com/d/optout.