QChris has submitted this change and it was merged.
Change subject: Catch ValueError raised by hive.partition_datetime_from_path
......................................................................
Catch ValueError raised by hive.partition_datetime_from_path
We just saw a case where the data directory had a directory in it
that did not match something that could be passed to dateutil.parser.parse.
This change now keeps the whole script from dying when that happens.
Change-Id: I4e53a25a0519826128982fe94427b2bba66fda6f
---
M bin/refinery-drop-webrequest-partitions
M python/refinery/util.py
2 files changed, 20 insertions(+), 7 deletions(-)
Approvals:
QChris: Verified; Looks good to me, approved
diff --git a/bin/refinery-drop-webrequest-partitions
b/bin/refinery-drop-webrequest-partitions
index ddb871a..542bef6 100755
--- a/bin/refinery-drop-webrequest-partitions
+++ b/bin/refinery-drop-webrequest-partitions
@@ -119,13 +119,22 @@
# Loop through all the partition directory paths for this table
# and check if any of them are old enough for deletion.
for partition_path in HdfsUtils.ls(partition_glob, include_children=False):
- partition_datetime = hive.partition_datetime_from_path(
- partition_path,
- webrequest_date_regex
- )
- if partition_datetime < old_partition_datetime_threshold:
+ try:
+ partition_datetime = hive.partition_datetime_from_path(
+ partition_path,
+ webrequest_date_regex
+ )
+ except ValueError:
+ logging.error(
+ 'hive.partition_datetime_from_path could not parse date found
in {0} using pattern {1}. Skipping.'
+ .format(partition_path, webrequest_date_regex.pattern)
+ )
+ continue
+
+ if partition_datetime and partition_datetime <
old_partition_datetime_threshold:
partition_paths_to_delete.append(partition_path)
+
# Drop any old Hive partitions
if partition_specs_to_drop:
if dry_run:
diff --git a/python/refinery/util.py b/python/refinery/util.py
index 9092a37..e7816c5 100755
--- a/python/refinery/util.py
+++ b/python/refinery/util.py
@@ -356,8 +356,12 @@
if isinstance(regex, basestring):
regex = re.compile(regex)
- return dateutil_parse(regex.search(path).group(1))
-
+ match = regex.search(path)
+ if match:
+ return dateutil_parse(match.group(1))
+ else:
+ logger.debug('No path matching {0} was found in
{1}.'.format(regex.pattern, path))
+ return None
def query(self, query, check_return_code=True, use_tempfile=False):
"""
--
To view, visit https://gerrit.wikimedia.org/r/180305
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I4e53a25a0519826128982fe94427b2bba66fda6f
Gerrit-PatchSet: 2
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Ottomata <[email protected]>
Gerrit-Reviewer: QChris <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits