kevgeo commented on code in PR #37237:
URL: https://github.com/apache/airflow/pull/37237#discussion_r1485205691
##########
airflow/providers/google/cloud/hooks/gcs.py:
##########
@@ -1295,37 +1300,45 @@ def _prepare_sync_plan(
destination_object: str | None,
recursive: bool,
) -> tuple[set[storage.Blob], set[storage.Blob], set[storage.Blob]]:
- # Calculate the number of characters that remove from the name,
because they contain information
+ # Calculate the number of characters that are removed from the name,
because they contain information
# about the parent's path
source_object_prefix_len = len(source_object) if source_object else 0
destination_object_prefix_len = len(destination_object) if
destination_object else 0
delimiter = "/" if not recursive else None
+
# Fetch blobs list
source_blobs = list(source_bucket.list_blobs(prefix=source_object,
delimiter=delimiter))
destination_blobs = list(
destination_bucket.list_blobs(prefix=destination_object,
delimiter=delimiter)
)
+
# Create indexes that allow you to identify blobs based on their name
source_names_index = {a.name[source_object_prefix_len:]: a for a in
source_blobs}
destination_names_index = {a.name[destination_object_prefix_len:]: a
for a in destination_blobs}
+
# Create sets with names without parent object name
source_names = set(source_names_index.keys())
+ # Discards empty string that creates an empty source subdirectory
Review Comment:
@dirrao Could you let me know if the above comment is more clear.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]