Xqt has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/103252


Change subject: [PEP8] changes, code improvements, insert __version__ string
......................................................................

[PEP8] changes, code improvements, insert __version__ string

Change-Id: Icff281c4d659d40a527eeecac12de17afaef8201
---
M data_ingestion.py
1 file changed, 80 insertions(+), 51 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/compat 
refs/changes/52/103252/1

diff --git a/data_ingestion.py b/data_ingestion.py
index f5c8f3d..4098399 100644
--- a/data_ingestion.py
+++ b/data_ingestion.py
@@ -1,69 +1,85 @@
 #!/usr/bin/python
 # -*- coding: utf-8  -*-
-'''
+"""
 A generic bot to do data ingestion (batch uploading) to Commons
 
-'''
-import pywikibot, upload
-import posixpath, urlparse
+"""
+#
+# (C) Pywikibot team, 2011-2013
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+#
+
+import posixpath
+import urlparse
 import urllib
-import hashlib, base64
+import hashlib
+import base64
 import StringIO
 try:
     import json
 except ImportError:
     import simplejson as json
+import pywikibot
+import upload
+
 
 class Photo(object):
-    '''
+    """
     Represents a Photo (or other file), with metadata, to upload to Commons.
 
     The constructor takes two parameters: URL (string) and metadata (dict with 
str:str key:value pairs)
     that can be referred to from the title & template generation.
 
-
-    '''
+    """
     def __init__(self, URL, metadata):
         self.URL = URL
         self.metadata = metadata
         self.metadata["_url"] = URL
-        self.metadata["_filename"] = filename = 
posixpath.split(urlparse.urlparse(URL)[2])[1]
+        self.metadata["_filename"] = filename = posixpath.split(
+            urlparse.urlparse(URL)[2])[1]
         self.metadata["_ext"] = ext = filename.split(".")[-1]
         if ext == filename:
             self.metadata["_ext"] = ext = None
         self.contents = None
 
     def downloadPhoto(self):
-        '''
+        """
         Download the photo and store it in a StringIO.StringIO object.
 
         TODO: Add exception handling
-        '''
+
+        """
         if not self.contents:
-            imageFile=urllib.urlopen(self.URL).read()
+            imageFile = urllib.urlopen(self.URL).read()
             self.contents = StringIO.StringIO(imageFile)
         return self.contents
 
-    def findDuplicateImages(self, site = pywikibot.getSite(u'commons', 
u'commons')):
-        '''
-        Takes the photo, calculates the SHA1 hash and asks the mediawiki api 
for a list of duplicates.
+    def findDuplicateImages(self,
+                            site=pywikibot.getSite(u'commons', u'commons')):
+        """
+        Takes the photo, calculates the SHA1 hash and asks the mediawiki api
+        for a list of duplicates.
 
         TODO: Add exception handling, fix site thing
-        '''
+
+        """
         hashObject = hashlib.sha1()
         hashObject.update(self.downloadPhoto().getvalue())
         return site.getFilesFromAnHash(base64.b16encode(hashObject.digest()))
 
     def getTitle(self, fmt):
         """
-        Given a format string with %(name)s entries, returns the string 
formatted with metadata
+        Given a format string with %(name)s entries, returns the string
+        formatted with metadata
+
         """
         return fmt % self.metadata
 
     def getDescription(self, template, extraparams={}):
-        '''
-        Generate a description for a file
-        '''
+        """ Generate a description for a file """
 
         params = {}
         params.update(self.metadata)
@@ -72,13 +88,15 @@
         for key in sorted(params.keys()):
             value = params[key]
             if not key.startswith("_"):
-                description = description + (u'|%s=%s' % (key, 
self._safeTemplateValue(value))) + "\n"
-        description = description + u'}}'
+                description += (u'|%s=%s'
+                                % (key, self._safeTemplateValue(value))) + "\n"
+        description += u'}}'
 
         return description
 
     def _safeTemplateValue(self, value):
         return value.replace("|", "{{!}}")
+
 
 def CSVReader(fileobj, urlcolumn, *args, **kwargs):
     import csv
@@ -88,30 +106,35 @@
         yield Photo(line[urlcolumn], line)
 
 
-def JSONReader(baseurl, start=0, end=100, JSONBase=None, 
metadataFunction=None, fileurl=u'fileurl'):
-    '''
+def JSONReader(baseurl, start=0, end=100, JSONBase=None, metadataFunction=None,
+               fileurl=u'fileurl'):
+    """
     Loops over a bunch of json page and process them with processJSONPage().
 
     Will yield Photo objects with metadata
-    '''
+
+    """
     if baseurl:
-        for i in range(start , end):
+        for i in range(start, end):
             url = baseurl % (i,)
-            photo = processJSONPage(url, JSONBase=JSONBase, 
metadataFunction=metadataFunction, fileurl=u'fileurl')
+            photo = processJSONPage(url, JSONBase=JSONBase,
+                                    metadataFunction=metadataFunction,
+                                    fileurl=u'fileurl')
             if photo:
                 yield photo
-                
 
 
-def processJSONPage(url, JSONBase=None, metadataFunction=None, 
fileurl=u'fileurl'):
-    '''
+def processJSONPage(url, JSONBase=None, metadataFunction=None,
+                    fileurl=u'fileurl'):
+    """
     Process a single JSON page.
     For the JSON page you can rebase it to not get all the crap
     You can apply a custom metadata function to do some modification on the 
metadata and checking
     By default the field 'fileurl' is expected in the metadata to contain the 
file. You can change this.
 
     Will a return Photo object with metadata or None if something is wrong
-    '''
+
+    """
     JSONPage = urllib.urlopen(url)
     JSONData = json.load(JSONPage)
     JSONPage.close()
@@ -130,17 +153,20 @@
         if metadataFunction:
             metadata = metadataFunction(metadata)
 
-        # If the metadataFunction didn't return none (something was wrong). 
Return the photo
+        # If the metadataFunction didn't return none (something was wrong).
+        # Return the photo
         if metadata:
             return Photo(metadata.get(fileurl), metadata)
-
     return False
 
+
 def JSONRebase(JSONData, JSONBase):
-    '''
+    """
     Moves the base of the JSON object to the part you're intrested in.
-    JSONBase is a list to crawl the tree. If one of the steps is not found, 
return None
-    '''
+    JSONBase is a list to crawl the tree. If one of the steps is not found,
+    return None
+
+    """
     for step in JSONBase:
         if JSONData:
             if type(JSONData) == dict:
@@ -148,21 +174,20 @@
             elif type(JSONData) == list:
                 # FIXME: Needs error, length etc checking
                 JSONData = JSONData[step]
-
     return JSONData
 
 
 def JSONTree(metadata, fieldlist, record):
-    '''
+    """
     metadata: Dict with end result
     key: The key we encountered
     record: Record to work on
-    '''
+    """
     if type(record) == list:
         for r in record:
             metadata = JSONTree(metadata, fieldlist, r)
     elif type(record) == dict:
-        for k,v in record.items():
+        for k, v in record.items():
             metadata = JSONTree(metadata, fieldlist + [k], v)
     elif type(record) == unicode:
         key = u'_'.join(fieldlist)
@@ -172,11 +197,13 @@
             newkey = key + u'_2'
             if not newkey in metadata:
                 metadata[newkey] = record
-        
     return metadata
 
+
 class DataIngestionBot:
-    def __init__(self, reader, titlefmt, pagefmt, 
site=pywikibot.getSite(u'commons', u'commons')):
+
+    def __init__(self, reader, titlefmt, pagefmt,
+                 site=pywikibot.getSite(u'commons', u'commons')):
         self.reader = reader
         self.titlefmt = titlefmt
         self.pagefmt = pagefmt
@@ -190,17 +217,16 @@
         title = photo.getTitle(self.titlefmt)
         description = photo.getDescription(self.pagefmt)
 
-        bot = upload.UploadRobot(url = photo.URL,
-                                 description = description,
-                                 useFilename = title,
-                                 keepFilename = True,
-                                 verifyDescription = False,
+        bot = upload.UploadRobot(url=photo.URL,
+                                 description=description,
+                                 useFilename=title,
+                                 keepFilename=True,
+                                 verifyDescription=False,
                                  ignoreWarning=True,
-                                 targetSite = self.site)
+                                 targetSite=self.site)
         bot._contents = photo.downloadPhoto().getvalue()
         bot._retrieved = True
         bot.run()
-
         return title
 
     def doSingle(self):
@@ -210,9 +236,12 @@
         for photo in self.reader:
             self._doUpload(photo)
 
-if __name__=="__main__":
+
+if __name__ == "__main__":
     reader = CSVReader(open('tests/data/csv_ingestion.csv'), 'url')
-    bot = DataIngestionBot(reader, "%(name)s - %(set)s.%(_ext)s", 
":user:valhallasw/test_template", pywikibot.getSite('test', 'test'))
+    bot = DataIngestionBot(reader, "%(name)s - %(set)s.%(_ext)s",
+                           ":user:valhallasw/test_template",
+                           pywikibot.getSite('test', 'test'))
     bot.run()
 
 """

-- 
To view, visit https://gerrit.wikimedia.org/r/103252
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Icff281c4d659d40a527eeecac12de17afaef8201
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Xqt <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to