coryan commented on a change in pull request #11375:
URL: https://github.com/apache/arrow/pull/11375#discussion_r726160001



##########
File path: cpp/src/arrow/filesystem/gcsfs_test.cc
##########
@@ -30,10 +34,58 @@ namespace arrow {
 namespace fs {
 namespace {
 
+namespace bp = boost::process;
+namespace gc = google::cloud;
+namespace gcs = google::cloud::storage;
+
 using ::testing::IsEmpty;
 using ::testing::Not;
 using ::testing::NotNull;
 
+auto const* kPreexistingBucket = "test-bucket-name";
+
+class GcsIntegrationTest : public ::testing::Test {
+ public:
+  ~GcsIntegrationTest() override {
+    if (server_process_.valid()) {
+      // Brutal shutdown
+      server_process_.terminate();
+      server_process_.wait();
+    }
+  }
+
+ protected:
+  void SetUp() override {
+    port_ = std::to_string(GetListenPort());
+    auto exe_path = bp::search_path("python3");
+    ASSERT_THAT(exe_path, Not(IsEmpty()));
+
+    server_process_ = bp::child(boost::this_process::environment(), exe_path, 
"-m",
+                                "testbench", "--port", port_);
+
+    // Create a bucket in the testbench so additional
+    auto client = gcs::Client(
+        google::cloud::Options{}
+            .set<gcs::RestEndpointOption>("http://127.0.0.1:"; + port_)
+            .set<gc::UnifiedCredentialsOption>(gc::MakeInsecureCredentials()));
+    auto metadata = client.CreateBucketForProject(
+        kPreexistingBucket, "ignored-by-testbench", gcs::BucketMetadata{});
+    EXPECT_TRUE(metadata.ok()) << "Failed to create bucket <" << 
kPreexistingBucket

Review comment:
       Done.

##########
File path: cpp/src/arrow/filesystem/gcsfs.cc
##########
@@ -35,19 +68,85 @@ google::cloud::Options AsGoogleCloudOptions(const 
GcsOptions& o) {
   if (!o.endpoint_override.empty()) {
     std::string scheme = o.scheme;
     if (scheme.empty()) scheme = "https";
+    if (scheme == "https") {
+      options.set<google::cloud::UnifiedCredentialsOption>(
+          google::cloud::MakeGoogleDefaultCredentials());
+    } else {
+      options.set<google::cloud::UnifiedCredentialsOption>(
+          google::cloud::MakeInsecureCredentials());
+    }
     options.set<gcs::RestEndpointOption>(scheme + "://" + o.endpoint_override);
   }
   return options;
 }
 
+Status ToArrowStatus(google::cloud::Status const& s) {

Review comment:
       > there should probably be more detailed unit tests for this function?
   
   Done.  I tried to make it as little of a "change detection test" as 
possible, but it is hard for these things.
   
   > Do you think it will be important to retrieve the original status? Arrow 
status has an opaque shared pointer that can be used for this.
   
   I doubt it.  The client library automatically retries (subject to policies 
set by the application), by the time it returns the most common action is to 
log the error.  We can always add this later in any case.
   

##########
File path: cpp/src/arrow/filesystem/gcsfs.cc
##########
@@ -35,19 +68,85 @@ google::cloud::Options AsGoogleCloudOptions(const 
GcsOptions& o) {
   if (!o.endpoint_override.empty()) {
     std::string scheme = o.scheme;
     if (scheme.empty()) scheme = "https";
+    if (scheme == "https") {
+      options.set<google::cloud::UnifiedCredentialsOption>(
+          google::cloud::MakeGoogleDefaultCredentials());
+    } else {
+      options.set<google::cloud::UnifiedCredentialsOption>(
+          google::cloud::MakeInsecureCredentials());
+    }
     options.set<gcs::RestEndpointOption>(scheme + "://" + o.endpoint_override);
   }
   return options;
 }
 
+Status ToArrowStatus(google::cloud::Status const& s) {
+  std::ostringstream os;
+  os << "google::cloud::Status(" << s << ")";
+  switch (s.code()) {
+    case google::cloud::StatusCode::kOk:
+      break;
+    case google::cloud::StatusCode::kCancelled:
+      return Status::Cancelled(os.str());
+    case google::cloud::StatusCode::kUnknown:
+      return Status::UnknownError(os.str());
+    case google::cloud::StatusCode::kInvalidArgument:
+      return Status::Invalid(os.str());
+    case google::cloud::StatusCode::kDeadlineExceeded:
+    case google::cloud::StatusCode::kNotFound:
+      // TODO: it is unclear if a better mapping would be possible.
+      return Status::UnknownError(os.str());
+    case google::cloud::StatusCode::kAlreadyExists:
+      return Status::AlreadyExists(os.str());
+    case google::cloud::StatusCode::kPermissionDenied:
+    case google::cloud::StatusCode::kUnauthenticated:
+      return Status::UnknownError(os.str());
+    case google::cloud::StatusCode::kResourceExhausted:
+      return Status::CapacityError(os.str());
+    case google::cloud::StatusCode::kFailedPrecondition:
+    case google::cloud::StatusCode::kAborted:
+      return Status::UnknownError(os.str());
+    case google::cloud::StatusCode::kOutOfRange:
+      return Status::Invalid(os.str());
+    case google::cloud::StatusCode::kUnimplemented:
+      return Status::NotImplemented(os.str());
+    case google::cloud::StatusCode::kInternal:
+    case google::cloud::StatusCode::kUnavailable:
+    case google::cloud::StatusCode::kDataLoss:
+      return Status::UnknownError(os.str());
+  }
+  return Status::OK();
+}
+
 class GcsFileSystem::Impl {
  public:
   explicit Impl(GcsOptions o)
       : options_(std::move(o)), client_(AsGoogleCloudOptions(options_)) {}
 
   GcsOptions const& options() const { return options_; }
 
+  Result<FileInfo> GetFileInfo(GcsPath const& path) {
+    if (!path.object.empty()) {
+      auto meta = client_.GetObjectMetadata(path.bucket, path.object);
+      return GetFileInfoImpl(path, std::move(meta).status(), FileType::File);
+    }
+    auto meta = client_.GetBucketMetadata(path.bucket);
+    return GetFileInfoImpl(path, std::move(meta).status(), 
FileType::Directory);
+  }
+
  private:
+  Result<FileInfo> GetFileInfoImpl(GcsPath const& path, google::cloud::Status 
status,

Review comment:
       Done.

##########
File path: cpp/src/arrow/filesystem/gcsfs.cc
##########
@@ -35,19 +68,85 @@ google::cloud::Options AsGoogleCloudOptions(const 
GcsOptions& o) {
   if (!o.endpoint_override.empty()) {
     std::string scheme = o.scheme;
     if (scheme.empty()) scheme = "https";
+    if (scheme == "https") {
+      options.set<google::cloud::UnifiedCredentialsOption>(
+          google::cloud::MakeGoogleDefaultCredentials());
+    } else {
+      options.set<google::cloud::UnifiedCredentialsOption>(
+          google::cloud::MakeInsecureCredentials());
+    }
     options.set<gcs::RestEndpointOption>(scheme + "://" + o.endpoint_override);
   }
   return options;
 }
 
+Status ToArrowStatus(google::cloud::Status const& s) {
+  std::ostringstream os;
+  os << "google::cloud::Status(" << s << ")";
+  switch (s.code()) {
+    case google::cloud::StatusCode::kOk:
+      break;
+    case google::cloud::StatusCode::kCancelled:
+      return Status::Cancelled(os.str());
+    case google::cloud::StatusCode::kUnknown:
+      return Status::UnknownError(os.str());
+    case google::cloud::StatusCode::kInvalidArgument:
+      return Status::Invalid(os.str());
+    case google::cloud::StatusCode::kDeadlineExceeded:
+    case google::cloud::StatusCode::kNotFound:
+      // TODO: it is unclear if a better mapping would be possible.
+      return Status::UnknownError(os.str());
+    case google::cloud::StatusCode::kAlreadyExists:
+      return Status::AlreadyExists(os.str());
+    case google::cloud::StatusCode::kPermissionDenied:
+    case google::cloud::StatusCode::kUnauthenticated:
+      return Status::UnknownError(os.str());
+    case google::cloud::StatusCode::kResourceExhausted:
+      return Status::CapacityError(os.str());
+    case google::cloud::StatusCode::kFailedPrecondition:
+    case google::cloud::StatusCode::kAborted:
+      return Status::UnknownError(os.str());
+    case google::cloud::StatusCode::kOutOfRange:
+      return Status::Invalid(os.str());
+    case google::cloud::StatusCode::kUnimplemented:
+      return Status::NotImplemented(os.str());
+    case google::cloud::StatusCode::kInternal:
+    case google::cloud::StatusCode::kUnavailable:
+    case google::cloud::StatusCode::kDataLoss:
+      return Status::UnknownError(os.str());
+  }
+  return Status::OK();
+}
+
 class GcsFileSystem::Impl {
  public:
   explicit Impl(GcsOptions o)
       : options_(std::move(o)), client_(AsGoogleCloudOptions(options_)) {}
 
   GcsOptions const& options() const { return options_; }
 
+  Result<FileInfo> GetFileInfo(GcsPath const& path) {

Review comment:
       Fixed here and elsewhere.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to