Repository: incubator-airflow Updated Branches: refs/heads/master 1359d8735 -> 8d2f43073
[AIRFLOW-1883] Get File Size for objects in Google Cloud Storage Closes #2840 from kaxil/Get_File_Size Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/8d2f4307 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/8d2f4307 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/8d2f4307 Branch: refs/heads/master Commit: 8d2f430732331c003d2c82f9a0c435e013281fe9 Parents: 1359d87 Author: Kaxil Naik <[email protected]> Authored: Mon Dec 4 14:10:31 2017 -0800 Committer: Chris Riccomini <[email protected]> Committed: Mon Dec 4 14:10:37 2017 -0800 ---------------------------------------------------------------------- airflow/contrib/hooks/gcs_hook.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/8d2f4307/airflow/contrib/hooks/gcs_hook.py ---------------------------------------------------------------------- diff --git a/airflow/contrib/hooks/gcs_hook.py b/airflow/contrib/hooks/gcs_hook.py index f6ad39f..3103a5a 100644 --- a/airflow/contrib/hooks/gcs_hook.py +++ b/airflow/contrib/hooks/gcs_hook.py @@ -269,3 +269,31 @@ class GoogleCloudStorageHook(GoogleCloudBaseHook): # empty next page token break return ids + + def get_size(self, bucket, object): + """ + Gets the size of a file in Google Cloud Storage. + :param bucket: The Google cloud storage bucket where the object is. + :type bucket: string + :param object: The name of the object to check in the Google cloud + storage bucket. + :type object: string + """ + self.log.info('Checking the file size of object: %s in bucket: %s', object, bucket) + service = self.get_conn() + try: + response = service.objects().get( + bucket=bucket, + object=object + ).execute() + + if 'name' in response and response['name'][-1] != '/': + # Remove Directories & Just check size of files + size = response['size'] + self.log.info('The file size of %s is %s', object, size) + return size + else: + raise ValueError('Object is not a file') + except errors.HttpError as ex: + if ex.resp['status'] == '404': + raise ValueError('Object Not Found')
