pitrou commented on code in PR #12763:
URL: https://github.com/apache/arrow/pull/12763#discussion_r893265103


##########
cpp/src/arrow/filesystem/gcsfs.h:
##########
@@ -22,22 +22,56 @@
 #include <vector>
 
 #include "arrow/filesystem/filesystem.h"
+#include "arrow/util/optional.h"
 #include "arrow/util/uri.h"
 
 namespace arrow {
 namespace fs {
 
-struct GcsCredentials;
+// Opaque wrapper for GCS's library credentials to avoid exposing in Arrow 
headers.
+struct GcsCredentialsHolder;
+class GcsFileSystem;
+
+/// \brief Container for GCS Credentials and information necessary to recreate 
them.
+class ARROW_EXPORT GcsCredentials {
+ public:
+  bool Equals(const GcsCredentials& other) const;
+  bool anonymous() const { return anonymous_; }
+  const std::string& access_token() const { return access_token_; }
+  TimePoint expiration() const { return expiration_; }
+  const std::string& target_service_account() const { return 
target_service_account_; }
+  const std::string& json_credentials() const { return json_credentials_; }
+  const std::shared_ptr<GcsCredentialsHolder>& holder() const { return 
holder_; }
+
+ private:
+  GcsCredentials() = default;
+  bool anonymous_ = false;
+  std::string access_token_;
+  TimePoint expiration_;
+  std::string target_service_account_;
+  std::string json_credentials_;
+  std::shared_ptr<GcsCredentialsHolder> holder_;
+  friend class GcsFileSystem;
+  friend struct GcsOptions;
+};
 
 /// Options for the GcsFileSystem implementation.
 struct ARROW_EXPORT GcsOptions {
-  std::shared_ptr<GcsCredentials> credentials;
+  /// \brief Equivelant to the GcsOptions::Defaults() that returns.
+  GcsOptions();
+  GcsCredentials credentials;
 
   std::string endpoint_override;
   std::string scheme;
   /// \brief Location to use for creating buckets.
   std::string default_bucket_location;
 
+  /// \brief If set used to control total time allowed for retrying underlying
+  /// errors.
+  ///
+  /// The default policy is to retry for up to 15 minutes.
+  arrow::util::optional<int> retry_limit_seconds;

Review Comment:
   Can we make this a double instead? There is no reason to limit ourselves to 
second precision AFAICT (the `LimitedTimeRetryPolicy` seems to use milliseconds 
internally).



##########
cpp/src/arrow/filesystem/gcsfs.h:
##########
@@ -22,22 +22,56 @@
 #include <vector>
 
 #include "arrow/filesystem/filesystem.h"
+#include "arrow/util/optional.h"
 #include "arrow/util/uri.h"
 
 namespace arrow {
 namespace fs {
 
-struct GcsCredentials;
+// Opaque wrapper for GCS's library credentials to avoid exposing in Arrow 
headers.
+struct GcsCredentialsHolder;
+class GcsFileSystem;
+
+/// \brief Container for GCS Credentials and information necessary to recreate 
them.
+class ARROW_EXPORT GcsCredentials {
+ public:
+  bool Equals(const GcsCredentials& other) const;
+  bool anonymous() const { return anonymous_; }
+  const std::string& access_token() const { return access_token_; }
+  TimePoint expiration() const { return expiration_; }
+  const std::string& target_service_account() const { return 
target_service_account_; }
+  const std::string& json_credentials() const { return json_credentials_; }
+  const std::shared_ptr<GcsCredentialsHolder>& holder() const { return 
holder_; }
+
+ private:
+  GcsCredentials() = default;
+  bool anonymous_ = false;
+  std::string access_token_;
+  TimePoint expiration_;
+  std::string target_service_account_;
+  std::string json_credentials_;
+  std::shared_ptr<GcsCredentialsHolder> holder_;
+  friend class GcsFileSystem;
+  friend struct GcsOptions;
+};
 
 /// Options for the GcsFileSystem implementation.
 struct ARROW_EXPORT GcsOptions {
-  std::shared_ptr<GcsCredentials> credentials;
+  /// \brief Equivelant to the GcsOptions::Defaults() that returns.

Review Comment:
   ```suggestion
     /// \brief Equivalent to GcsOptions::Defaults().
   ```



##########
dev/tasks/python-wheels/github.osx.arm64.yml:
##########
@@ -134,6 +134,8 @@ jobs:
           $PYTHON -m venv test-arm64-env
           source test-arm64-env/bin/activate
           pip install --upgrade pip wheel
+          arch -arm64 pip install -r arrow/python/requirements-wheel-test.txt
+          PYTHON=python arch -arm64 arrow/ci/scripts/install_gcs_testbench.sh 
default

Review Comment:
   Same here, I'm curious why the `arch` call is necessary.



##########
cpp/src/arrow/filesystem/gcsfs.cc:
##########
@@ -698,18 +783,30 @@ Result<GcsOptions> GcsOptions::FromUri(const 
arrow::internal::Uri& uri,
     options_map.emplace(kv.first, kv.second);
   }
 
-  if (!uri.password().empty() || !uri.username().empty()) {
-    return Status::Invalid("GCS does not accept username or password.");
+  const std::string& username = uri.username();
+  bool anonymous = username == "anonymous";
+  if (!username.empty() && !anonymous) {
+    return Status::Invalid("GCS URIs do not accept username except 
\"anonymous\".");
+  }
+  if (!uri.password().empty()) {
+    return Status::Invalid("GCS URIs do not accept password.");
   }
+  auto options = anonymous ? GcsOptions::Anonymous() : GcsOptions::Defaults();
 
-  auto options = GcsOptions::Defaults();
   for (const auto& kv : options_map) {
     if (kv.first == "location") {
       options.default_bucket_location = kv.second;
     } else if (kv.first == "scheme") {
       options.scheme = kv.second;
     } else if (kv.first == "endpoint_override") {
       options.endpoint_override = kv.second;
+    } else if (kv.first == "retry_limit_seconds") {
+      int parsed_seconds = atoi(kv.second.c_str());
+      if (parsed_seconds <= 0) {
+        return Status::Invalid(
+            "retry_limit_seconds  must be a positive integer.  found '", 
kv.second, "'");

Review Comment:
   Nit: error formatting
   ```suggestion
               "retry_limit_seconds must be a positive integer, got '", 
kv.second, "'");
   ```



##########
cpp/src/arrow/filesystem/gcsfs_test.cc:
##########
@@ -727,6 +793,13 @@ TEST_F(GcsIntegrationTest, CreateDirUri) {
   ASSERT_RAISES(Invalid, fs->CreateDir("gs://" + RandomBucketName(), true));
 }
 
+TEST_F(GcsIntegrationTest, DeleteBucketDirSuccess) {
+  auto fs = GcsFileSystem::Make(TestGcsOptions());
+  ASSERT_OK(fs->CreateDir("pyarrow-filesystem/", /*recursive=*/true));
+  EXPECT_FALSE(fs->CreateDir("/", false).ok());

Review Comment:
   Use `ASSERT_RAISES` with the expected error here.



##########
dev/tasks/python-wheels/github.osx.arm64.yml:
##########
@@ -134,6 +134,8 @@ jobs:
           $PYTHON -m venv test-arm64-env
           source test-arm64-env/bin/activate
           pip install --upgrade pip wheel
+          arch -arm64 pip install -r arrow/python/requirements-wheel-test.txt

Review Comment:
   Is the `arch` call necessary here? `pip` should point to the 
`test-arm64-env` pip install which already should have the right 
architecture... (same below for x86)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to