Hi Group,

I've managed to get a pipeline working that is INSERTING my scraped data 
into a MySQL database but I am unable to update records as it appears the 
update logic is not working correctly... any suggestions as to why its not 
triggering? I have tested the SQL statement etc directly which is working 
fine.

Pipeline.py:
import sys
import MySQLdb
import hashlib
from datetime import datetime
from scrapy.exceptions import DropItem
from scrapy.http import Request

class EmptyItemPipeline(object):

    def process_item(self, item, spider):
        if item['handicap']:
            return item
        else:
            item['handicap'] = '99'
            return item

class MySQLStorePipeline(object):

    def __init__(self, dbpool):
        self.dbpool = dbpool

    @classmethod
    def from_settings(cls, settings):
        dbargs = dict(
            host=settings['localhost'],
            db=settings['database'],
            user=settings['username'],
            passwd=settings['password'],
            charset='utf8',
            use_unicode=True,
        )
        dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs)        
        return cls(dbpool)

    def process_item(self, item, spider):
        # run db query in the thread pool
        d = self.dbpool.runInteraction(self._do_upsert, item, spider)
        d.addErrback(self._handle_error, item, spider)
        # at the end return the item in case of success or failure
        d.addBoth(lambda _: item)
        # return the deferred instead the item. This makes the engine to
        # process next item (according to CONCURRENT_ITEMS setting) after 
this
        # operation (deferred) has finished.
        return d

    def _do_upsert(self, conn, item, spider):

        conn.execute("""SELECT EXISTS(
                SELECT 1 FROM tblHandicaps WHERE Player = %s)""", 
(item['name'][0]))
        ret = conn.fetchone()[0]

        if ret:
            conn.execute("""
                UPDATE tblHandicaps
                SET Player=%s, Handicap=%s, Exact=%s, Category=%s, Updated
                WHERE Player=%s
            """, (item['name'][0], item['handicap'][0], item['exact'][0], 
item['category'][0], datetime.now(), item['name'][0]))
            spider.log("Item updated in db: %s %r" % (item['name'][0], 
item))
        else:
            conn.execute("""INSERT INTO tblHandicaps (Player, Handicap, 
Exact, Category, Sex, Updated) VALUES (%s, %s, %s, %s, 'M', %s)""",
                (item['name'][0],
                 item['handicap'][0],
                 item['exact'][0],
                 item['category'][0],
                 datetime.now()))

            spider.log("Item stored in db: %s %r" % (item['name'][0], item))

    def _handle_error(self, failure, item, spider):
        """Handle occurred on db interaction."""
        # do nothing, just log
        log.err(failure)



I don't see a specific error in the console log other than an SQL primary 
key error as its trying to INSERT rather than UPDATE.

Cheers


Shaun

-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to