This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kibble-scanners.git


The following commit(s) were added to refs/heads/master by this push:
     new 2e67be0  prefer full name over username, if available
2e67be0 is described below

commit 2e67be04a9c5ecb4cdb87247d7ee9891e5030dd1
Author: Daniel Gruno <humbed...@apache.org>
AuthorDate: Fri Mar 2 19:51:32 2018 +0100

    prefer full name over username, if available
    
    - only store shortened bio if it's new
    - prefer full name over username if we find it.
---
 src/plugins/scanners/discourse.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/plugins/scanners/discourse.py 
b/src/plugins/scanners/discourse.py
index fc1f769..d160baa 100644
--- a/src/plugins/scanners/discourse.py
+++ b/src/plugins/scanners/discourse.py
@@ -87,8 +87,11 @@ def scanJob(KibbleBit, source, cat, creds):
                 # Store user-ID-to-username mapping for later
                 allUsers[user['id']] = userDoc
                 
-                # Store it (or, queue storage)
-                KibbleBit.append('person', userDoc)
+                # Store it (or, queue storage) unless it exists.
+                # We don't wanna override better data, so we check if
+                # it's there first.
+                if not KibbleBit.exists('person', dhash):
+                    KibbleBit.append('person', userDoc)
             
             # Now, for each topic, we'll store a topic document
             for topic in catjson['topic_list']['topics']:
@@ -146,10 +149,12 @@ def scanJob(KibbleBit, source, cat, creds):
                 KibbleBit.pprint("%s has %u posts" % (pURL, len(posts)))
                 for post in posts:
                     phash = hashlib.sha224( ("%s-%s-post-%s" % 
(source['organisation'], source['sourceURL'], post['id']) ).encode('ascii', 
errors='replace')).hexdigest()
+                    uname = post.get('name', post['username']) or 
post['username'] # Hack to get longest non-zero value
                     
                     # Find the hash of the person who posted it
-                    # We may know them, or we may have to store them
-                    if post['user_id'] in allUsers:
+                    # We may know them, or we may have to store them.
+                    # If we have better info now (full name), re-store
+                    if post['user_id'] in allUsers and 
allUsers[post['user_id']]['name'] == uname:
                         uhash = allUsers[post['user_id']]['id']
                     else:
                         # Same as before, fake email, store...
@@ -160,7 +165,7 @@ def scanJob(KibbleBit, source, cat, creds):
                         userDoc = {
                             'id': uhash,
                             'organisation': source['organisation'],
-                            'name': post['username'],
+                            'name': uname,
                             'email': email,
                         }
                         

-- 
To stop receiving notification emails like this one, please contact
humbed...@apache.org.

Reply via email to