Nilesh has uploaded a new change for review.
https://gerrit.wikimedia.org/r/77060
Change subject: Modified MapReduce keys to output different kinds of pairs into
different files
......................................................................
Modified MapReduce keys to output different kinds of pairs into different files
Change-Id: Ie27f486f6f979f0ae2a9017f95613e1fdcc56d48
---
M wikiparser/wikiparser.py
M wikiparser/wikiparser_r.py
2 files changed, 14 insertions(+), 10 deletions(-)
git pull
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/WikidataEntitySuggester
refs/changes/60/77060/1
diff --git a/wikiparser/wikiparser.py b/wikiparser/wikiparser.py
index c7e3731..7fc047f 100644
--- a/wikiparser/wikiparser.py
+++ b/wikiparser/wikiparser.py
@@ -41,10 +41,10 @@
toyield1 = str(statement['value'])
value =
str(statement['wikibase-entityid']['numeric-id']) if 'wikibase-entityid' in
statement else statement['string']
toyield2 = str(statement['value']) + "----" + value
- sys.stdout.write("$$\t" + toyield1.encode("utf-8",
'ignore').strip() + "\n")
- sys.stdout.write("$$\t" + toyield2.encode("utf-8",
'ignore').strip() + "\n")
- sys.stdout.write("@@\t" + str(title) + "," +
toyield1.encode("utf-8", 'ignore').strip() + "\n")
- sys.stdout.write("@@\t" + str(title) + "," +
toyield2.encode("utf-8", 'ignore').strip() + "\n")
+ sys.stdout.write("$$P_LIST$$\t" +
toyield1.encode("utf-8", 'ignore').strip() + "\n")
+ sys.stdout.write("$$PVE_LIST$$\t" +
toyield2.encode("utf-8", 'ignore').strip() + "\n")
+ sys.stdout.write("$$IP_PAIRS$$\t" + str(title) +
"," + toyield1.encode("utf-8", 'ignore').strip() + "\n")
+ sys.stdout.write("$$IPV_PAIRS$$\t" + str(title) +
"," + toyield2.encode("utf-8", 'ignore').strip() + "\n")
except KeyError:
pass
except (KeyError, ValueError, TypeError) as e:
diff --git a/wikiparser/wikiparser_r.py b/wikiparser/wikiparser_r.py
index 0593c0e..4528d22 100644
--- a/wikiparser/wikiparser_r.py
+++ b/wikiparser/wikiparser_r.py
@@ -3,14 +3,18 @@
import sys
def main():
- listout = open(sys.argv[1], "w")
+ for i in range(1..4):
+ outputFiles[i] = open(sys.argv[i], "w")
+ fileMap = { "$$P_LIST$$": outputFiles[1],
+ "$$P_LIST$$": outputFiles[2],
+ "$$P_LIST$$": outputFiles[3],
+ "$$P_LIST$$": outputFiles[4] }
for i in sys.stdin:
(key, value) = i.split("\t")
- if key == "@@":
- sys.stdout.write(value)
- else:
- listout.write(value)
- listout.close()
+ fileMap.get(key).write(value)
+
+ for f in outputFiles:
+ f.close()
if __name__ == '__main__':
main()
--
To view, visit https://gerrit.wikimedia.org/r/77060
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie27f486f6f979f0ae2a9017f95613e1fdcc56d48
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/WikidataEntitySuggester
Gerrit-Branch: master
Gerrit-Owner: Nilesh <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits