Nilesh has uploaded a new change for review.
https://gerrit.wikimedia.org/r/72626
Change subject: Made corrections to wikiparser scripts and added reducer
......................................................................
Made corrections to wikiparser scripts and added reducer
Change-Id: Iaa9c8ca99cab30fe5ebd7724ef35fecdc8797b93
---
M wikiparser/wikiparser.py
M wikiparser/wikiparser_db.py
A wikiparser/wikiparser_r.py
3 files changed, 20 insertions(+), 7 deletions(-)
git pull
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/WikidataEntitySuggester
refs/changes/26/72626/1
diff --git a/wikiparser/wikiparser.py b/wikiparser/wikiparser.py
index 02653d6..aab0f53 100644
--- a/wikiparser/wikiparser.py
+++ b/wikiparser/wikiparser.py
@@ -5,7 +5,6 @@
import json
import sys
-count = 0
page = ''
def main():
@@ -42,14 +41,15 @@
toyield1 = str(statement['value'])
value =
str(statement['wikibase-entityid']['numeric-id']) if 'wikibase-entityid' in
statement else statement['string']
toyield2 = str(statement['value']) + "----" + value
- sys.stdout.write(toyield1.encode("utf-8",
'ignore') + "\n")
- sys.stdout.write(toyield2.encode("utf-8",
'ignore') + "\n")
+ sys.stdout.write("||\t" + str(title) + "," +
toyield1.encode("utf-8", 'ignore') + "\n")
+ sys.stdout.write("||\t" + str(title) + "," +
toyield2.encode("utf-8", 'ignore') + "\n")
+ sys.stdout.write("$$\t" + toyield1.encode("utf-8",
'ignore') + "\n")
+ sys.stdout.write("$$\t" + toyield2.encode("utf-8",
'ignore') + "\n")
except KeyError:
pass
except (KeyError, ValueError, TypeError) as e:
sys.stderr.write("Error occurred for page : " + str(title) + ", ns = "
+ str(page['ns']) + "\n")
sys.stderr.write(traceback.format_exc() + "\n")
-
if __name__ == '__main__':
main()
diff --git a/wikiparser/wikiparser_db.py b/wikiparser/wikiparser_db.py
index be0c6b0..953cda1 100644
--- a/wikiparser/wikiparser_db.py
+++ b/wikiparser/wikiparser_db.py
@@ -14,7 +14,7 @@
con = None
cur = None
try:
- con = mdb.connect('localhost', 'root', 'orangetail', 'wikidatawiki');
+ con = mdb.connect('localhost', 'root', 'password', 'wikidatawiki');
cur = con.cursor()
cur.execute("SET FOREIGN_KEY_CHECKS = 0")
cur.execute("SET UNIQUE_CHECKS = 0")
@@ -66,8 +66,10 @@
toyield1 = str(statement['value'])
value =
str(statement['wikibase-entityid']['numeric-id']) if 'wikibase-entityid' in
statement else statement['string']
toyield2 = str(statement['value']) + "----" + value
- sys.stdout.write(toyield1.encode("utf-8",
'ignore') + "\n")
- sys.stdout.write(toyield2.encode("utf-8",
'ignore') + "\n")
+ sys.stdout.write("||\t" + str(title) + "," +
toyield1.encode("utf-8", 'ignore') + "\n")
+ sys.stdout.write("||\t" + str(title) + "," +
toyield2.encode("utf-8", 'ignore') + "\n")
+ sys.stdout.write("$$\t" + toyield1.encode("utf-8",
'ignore') + "\n")
+ sys.stdout.write("$$\t" + toyield2.encode("utf-8",
'ignore') + "\n")
except KeyError:
pass
elif page['ns'] == '120':
diff --git a/wikiparser/wikiparser_r.py b/wikiparser/wikiparser_r.py
new file mode 100644
index 0000000..52604d4
--- /dev/null
+++ b/wikiparser/wikiparser_r.py
@@ -0,0 +1,11 @@
+#!/usr/bin/python
+
+import sys
+
+def main():
+ for i in sys.stdin:
+ (key, value) = i.split("\t")
+ sys.stdout.write(value + "\n")
+
+if __name__ == '__main__':
+ main()
--
To view, visit https://gerrit.wikimedia.org/r/72626
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Iaa9c8ca99cab30fe5ebd7724ef35fecdc8797b93
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/WikidataEntitySuggester
Gerrit-Branch: master
Gerrit-Owner: Nilesh <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits