Hi Group,
I've managed to get a pipeline working that is INSERTING my scraped data
into a MySQL database but I am unable to update records as it appears the
update logic is not working correctly... any suggestions as to why its not
triggering? I have tested the SQL statement etc directly which is working
fine.
Pipeline.py:
import sys
import MySQLdb
import hashlib
from datetime import datetime
from scrapy.exceptions import DropItem
from scrapy.http import Request
class EmptyItemPipeline(object):
def process_item(self, item, spider):
if item['handicap']:
return item
else:
item['handicap'] = '99'
return item
class MySQLStorePipeline(object):
def __init__(self, dbpool):
self.dbpool = dbpool
@classmethod
def from_settings(cls, settings):
dbargs = dict(
host=settings['localhost'],
db=settings['database'],
user=settings['username'],
passwd=settings['password'],
charset='utf8',
use_unicode=True,
)
dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs)
return cls(dbpool)
def process_item(self, item, spider):
# run db query in the thread pool
d = self.dbpool.runInteraction(self._do_upsert, item, spider)
d.addErrback(self._handle_error, item, spider)
# at the end return the item in case of success or failure
d.addBoth(lambda _: item)
# return the deferred instead the item. This makes the engine to
# process next item (according to CONCURRENT_ITEMS setting) after
this
# operation (deferred) has finished.
return d
def _do_upsert(self, conn, item, spider):
conn.execute("""SELECT EXISTS(
SELECT 1 FROM tblHandicaps WHERE Player = %s)""",
(item['name'][0]))
ret = conn.fetchone()[0]
if ret:
conn.execute("""
UPDATE tblHandicaps
SET Player=%s, Handicap=%s, Exact=%s, Category=%s, Updated
WHERE Player=%s
""", (item['name'][0], item['handicap'][0], item['exact'][0],
item['category'][0], datetime.now(), item['name'][0]))
spider.log("Item updated in db: %s %r" % (item['name'][0],
item))
else:
conn.execute("""INSERT INTO tblHandicaps (Player, Handicap,
Exact, Category, Sex, Updated) VALUES (%s, %s, %s, %s, 'M', %s)""",
(item['name'][0],
item['handicap'][0],
item['exact'][0],
item['category'][0],
datetime.now()))
spider.log("Item stored in db: %s %r" % (item['name'][0], item))
def _handle_error(self, failure, item, spider):
"""Handle occurred on db interaction."""
# do nothing, just log
log.err(failure)
I don't see a specific error in the console log other than an SQL primary
key error as its trying to INSERT rather than UPDATE.
Cheers
Shaun
--
You received this message because you are subscribed to the Google Groups
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/groups/opt_out.