Ottomata has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/90553


Change subject: Using tempfile script for large Hive add partition queries.
......................................................................

Using tempfile script for large Hive add partition queries.

Change-Id: Ib7de34fd718e7c01fb47f37a1427667b97323fdd
---
M kraken-etl/util.py
1 file changed, 19 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/kraken 
refs/changes/53/90553/1

diff --git a/kraken-etl/util.py b/kraken-etl/util.py
index edca75a..186c2b0 100644
--- a/kraken-etl/util.py
+++ b/kraken-etl/util.py
@@ -3,6 +3,7 @@
 import os
 import re
 import subprocess
+import tempfile
 
 
 logger = logging.getLogger('kraken-etl-util')
@@ -187,7 +188,14 @@
         """
         if partition_datetimes:
             q = self.add_partitions_ddl(table, partition_datetimes)
-            return self.query(q, True)
+
+            # If there are a lot of partitions to add, 
+            # use a tempfile Hive script to add the partitions,
+            # rather than passing the whole query on the command line.
+            if (len(partition_datetimes) > 20):
+                return self.query_through_script(q)
+            else:
+                return self.query(q)
         else:
             logger.info("Not creating any partitions for table %s.  No 
partition datetimes were given." % table)
 
@@ -252,6 +260,16 @@
 
         return self.tables[table]['interval']
 
+    def query_through_script(self, query, check_return_code=True):
+        """Writes the query to a tempfile and runs it as a Hive script."""
+        f = tempfile.NamedTemporaryFile()
+        logger.debug('Writing Hive query to tempfile %s.' % f.name)
+        f.write(query)
+        f.flush()
+        out = self.script(f.name)
+        # NamedTemporaryFile will be deleted on close().
+        f.close()
+        return out
 
     def query(self, query, check_return_code=True):
         """Runs the given hive query and returns stdout"""

-- 
To view, visit https://gerrit.wikimedia.org/r/90553
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib7de34fd718e7c01fb47f37a1427667b97323fdd
Gerrit-PatchSet: 1
Gerrit-Project: analytics/kraken
Gerrit-Branch: master
Gerrit-Owner: Ottomata <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to