Nilesh has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/77060


Change subject: Modified MapReduce keys to output different kinds of pairs into 
different files
......................................................................

Modified MapReduce keys to output different kinds of pairs into different files

Change-Id: Ie27f486f6f979f0ae2a9017f95613e1fdcc56d48
---
M wikiparser/wikiparser.py
M wikiparser/wikiparser_r.py
2 files changed, 14 insertions(+), 10 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/WikidataEntitySuggester 
refs/changes/60/77060/1

diff --git a/wikiparser/wikiparser.py b/wikiparser/wikiparser.py
index c7e3731..7fc047f 100644
--- a/wikiparser/wikiparser.py
+++ b/wikiparser/wikiparser.py
@@ -41,10 +41,10 @@
                             toyield1 = str(statement['value'])
                             value = 
str(statement['wikibase-entityid']['numeric-id']) if 'wikibase-entityid' in 
statement else statement['string']
                             toyield2 = str(statement['value']) + "----" + value
-                            sys.stdout.write("$$\t" + toyield1.encode("utf-8", 
'ignore').strip() + "\n")
-                            sys.stdout.write("$$\t" + toyield2.encode("utf-8", 
'ignore').strip() + "\n")
-                            sys.stdout.write("@@\t" + str(title) + "," + 
toyield1.encode("utf-8", 'ignore').strip() + "\n")
-                            sys.stdout.write("@@\t" + str(title) + "," + 
toyield2.encode("utf-8", 'ignore').strip() + "\n")
+                            sys.stdout.write("$$P_LIST$$\t" + 
toyield1.encode("utf-8", 'ignore').strip() + "\n")
+                            sys.stdout.write("$$PVE_LIST$$\t" + 
toyield2.encode("utf-8", 'ignore').strip() + "\n")
+                            sys.stdout.write("$$IP_PAIRS$$\t" + str(title) + 
"," + toyield1.encode("utf-8", 'ignore').strip() + "\n")
+                            sys.stdout.write("$$IPV_PAIRS$$\t" + str(title) + 
"," + toyield2.encode("utf-8", 'ignore').strip() + "\n")
                         except KeyError:
                             pass
     except (KeyError, ValueError, TypeError) as e:
diff --git a/wikiparser/wikiparser_r.py b/wikiparser/wikiparser_r.py
index 0593c0e..4528d22 100644
--- a/wikiparser/wikiparser_r.py
+++ b/wikiparser/wikiparser_r.py
@@ -3,14 +3,18 @@
 import sys
 
 def main():
-    listout = open(sys.argv[1], "w")
+    for i in range(1..4):
+        outputFiles[i] = open(sys.argv[i], "w")
+    fileMap = { "$$P_LIST$$": outputFiles[1],
+                "$$P_LIST$$": outputFiles[2],
+                "$$P_LIST$$": outputFiles[3],
+                "$$P_LIST$$": outputFiles[4] }
     for i in sys.stdin:
         (key, value) = i.split("\t")
-        if key == "@@":
-            sys.stdout.write(value)
-        else:
-            listout.write(value)
-    listout.close()
+        fileMap.get(key).write(value)
+        
+    for f in outputFiles:
+        f.close()
         
 if __name__ == '__main__':
     main()

-- 
To view, visit https://gerrit.wikimedia.org/r/77060
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie27f486f6f979f0ae2a9017f95613e1fdcc56d48
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/WikidataEntitySuggester
Gerrit-Branch: master
Gerrit-Owner: Nilesh <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to