Ottomata has uploaded a new change for review.
https://gerrit.wikimedia.org/r/90553
Change subject: Using tempfile script for large Hive add partition queries.
......................................................................
Using tempfile script for large Hive add partition queries.
Change-Id: Ib7de34fd718e7c01fb47f37a1427667b97323fdd
---
M kraken-etl/util.py
1 file changed, 19 insertions(+), 1 deletion(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/kraken
refs/changes/53/90553/1
diff --git a/kraken-etl/util.py b/kraken-etl/util.py
index edca75a..186c2b0 100644
--- a/kraken-etl/util.py
+++ b/kraken-etl/util.py
@@ -3,6 +3,7 @@
import os
import re
import subprocess
+import tempfile
logger = logging.getLogger('kraken-etl-util')
@@ -187,7 +188,14 @@
"""
if partition_datetimes:
q = self.add_partitions_ddl(table, partition_datetimes)
- return self.query(q, True)
+
+ # If there are a lot of partitions to add,
+ # use a tempfile Hive script to add the partitions,
+ # rather than passing the whole query on the command line.
+ if (len(partition_datetimes) > 20):
+ return self.query_through_script(q)
+ else:
+ return self.query(q)
else:
logger.info("Not creating any partitions for table %s. No
partition datetimes were given." % table)
@@ -252,6 +260,16 @@
return self.tables[table]['interval']
+ def query_through_script(self, query, check_return_code=True):
+ """Writes the query to a tempfile and runs it as a Hive script."""
+ f = tempfile.NamedTemporaryFile()
+ logger.debug('Writing Hive query to tempfile %s.' % f.name)
+ f.write(query)
+ f.flush()
+ out = self.script(f.name)
+ # NamedTemporaryFile will be deleted on close().
+ f.close()
+ return out
def query(self, query, check_return_code=True):
"""Runs the given hive query and returns stdout"""
--
To view, visit https://gerrit.wikimedia.org/r/90553
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib7de34fd718e7c01fb47f37a1427667b97323fdd
Gerrit-PatchSet: 1
Gerrit-Project: analytics/kraken
Gerrit-Branch: master
Gerrit-Owner: Ottomata <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits