(hadoop) branch HADOOP-19343 updated: HADOOP-19343: Add hadoop-gcp configuration to core-default.xml and ServiceLoader file.

cnauroth Fri, 29 Aug 2025 09:24:25 -0700

This is an automated email from the ASF dual-hosted git repository.

cnauroth pushed a commit to branch HADOOP-19343
in repository https://gitbox.apache.org/repos/asf/hadoop.git



The following commit(s) were added to refs/heads/HADOOP-19343 by this push:
     new 237d082f803 HADOOP-19343: Add hadoop-gcp configuration to 
core-default.xml and ServiceLoader file.
237d082f803 is described below

commit 237d082f8030cb22b3b6d095265e92c5d0207051
Author: Chris Nauroth <cnaur...@apache.org>
AuthorDate: Fri Aug 29 16:22:58 2025 +0000

    HADOOP-19343: Add hadoop-gcp configuration to core-default.xml and 
ServiceLoader file.
    
    Closes #7916
    
    Signed-off-by: Shilun Fan <slfan1...@apache.org>
---
 .../src/main/resources/core-default.xml            | 200 ++++++++++++++++++++-
 .../services/org.apache.hadoop.fs.FileSystem       |  16 ++
 .../hadoop-gcp/src/test/resources/core-site.xml    |   8 -
 3 files changed, 209 insertions(+), 15 deletions(-)

diff --git 
a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml 
b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index a0e7e9520ba..76664ea72d2 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -1285,7 +1285,7 @@
 
 <property>
   <name>fs.viewfs.overload.scheme.target.gs.impl</name>
-  <value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
+  <value>org.apache.hadoop.fs.gs.GoogleHadoopFileSystem</value>
   <description>The GoogleHadoopFS/Google Cloud Storage file system for view
    file system overload scheme when child file system and 
ViewFSOverloadScheme's
    schemes are gs.
@@ -2373,12 +2373,6 @@ The switch to turn S3A auditing on or off.
       otherwise fall back to hadoop.tmp.dir </description>
   </property>
 
-<property>
-  <name>fs.AbstractFileSystem.gs.impl</name>
-  <value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
-  <description>The AbstractFileSystem for gs: uris.</description>
-</property>
-
 <property>
   <name>fs.azure.enable.readahead</name>
   <value>true</value>
@@ -4509,4 +4503,196 @@ The switch to turn S3A auditing on or off.
       If the value is less than or equal to 0, the cache is disabled entirely.
     </description>
   </property>
+
+  <property>
+    <name>fs.gs.impl</name>
+    <value>org.apache.hadoop.fs.gs.GoogleHadoopFileSystem</value>
+    <description>The FileSystem for gs: uris.</description>
+  </property>
+
+  <property>
+    <name>fs.AbstractFileSystem.gs.impl</name>
+    <value>org.apache.hadoop.fs.gs.Gs</value>
+    <description>The AbstractFileSystem for gs: uris.</description>
+  </property>
+
+  <property>
+    <name>fs.gs.project.id</name>
+    <description>
+      Google Cloud Project ID with access to Google Cloud Storage buckets.
+      Required only for list buckets and create bucket operations.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.working.dir</name>
+    <value>/</value>
+    <description>
+      The directory relative gs: uris resolve in inside the default bucket.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.rewrite.max.chunk.size</name>
+    <value>512m</value>
+    <description>
+      Maximum size of object chunk that will be rewritten in a single rewrite
+      request when fs.gs.copy.with.rewrite.enable is set to true.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.bucket.delete.enable</name>
+    <value>false</value>
+    <description>
+      If true, recursive delete on a path that refers to a Cloud Storage bucket
+      itself or delete on that path when it is empty will result in deletion of
+      the bucket itself. If false, any operation that normally would have
+      deleted the bucket will be ignored. Setting to false preserves the 
typical
+      behavior of rm -rf / which translates to deleting everything inside of
+      root, but without clobbering the filesystem authority corresponding to 
that
+      root path in the process.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.block.size</name>
+    <value>64m</value>
+    <description>
+      The reported block size of the file system. This does not change any
+      behavior of the connector or the underlying Google Cloud Storage objects.
+      However, it will affect the number of splits Hadoop MapReduce uses for a
+      given input.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.create.items.conflict.check.enable</name>
+    <value>true</value>
+    <description>
+      Enables a check that ensures that conflicting directories do not exist 
when
+      creating files and conflicting files do not exist when creating 
directories.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.marker.file.pattern</name>
+    <description>
+      If set, files that match specified pattern are copied last during folder
+      rename operation.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.auth.type</name>
+    <value>COMPUTE_ENGINE</value>
+    <description>
+      What type of authentication mechanism to use for Google Cloud Storage
+      access. Valid values: APPLICATION_DEFAULT, COMPUTE_ENGINE,
+      SERVICE_ACCOUNT_JSON_KEYFILE, UNAUTHENTICATED, USER_CREDENTIALS.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.auth.service.account.json.keyfile</name>
+    <description>
+      The path to the JSON keyfile for the service account when fs.gs.auth.type
+      property is set to SERVICE_ACCOUNT_JSON_KEYFILE. The file must exist at
+      the same path on all nodes
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.auth.client.id</name>
+    <description>
+      The OAuth2 client ID.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.auth.client.secret</name>
+    <description>
+      The OAuth2 client secret.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.auth.refresh.token</name>
+    <description>
+      The refresh token.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.inputstream.support.gzip.encoding.enable</name>
+    <value>false</value>
+    <description>
+      If set to false then reading files with GZIP content encoding (HTTP 
header
+      Content-Encoding: gzip) will result in failure (IOException is thrown).
+
+      This feature is disabled by default because processing of
+      GZIP encoded files is inefficient and error-prone in Hadoop and Spark.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.outputstream.buffer.size</name>
+    <value>8m</value>
+    <description>
+      Write buffer size used by the file system API to send the data to be
+      uploaded to Cloud Storage upload thread via pipes. The various pipe types
+      are documented below.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.outputstream.sync.min.interval</name>
+    <value>0</value>
+    <description>
+      Output stream configuration that controls the minimum interval between
+      consecutive syncs. This allows to avoid getting rate-limited by Google 
Cloud
+      Storage. Default is 0 - no wait between syncs. Note that hflush() will
+      be no-op if called more frequently than minimum sync interval and hsync()
+      will block until an end of a min sync interval.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.inputstream.fadvise</name>
+    <value>AUTO</value>
+    <description>
+      Tunes reading objects behavior to optimize HTTP GET requests for various 
use
+      cases. Valid values: SEQUENTIAL, RANDOM, AUTO, AUTO_RANDOM.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.fadvise.request.track.count</name>
+    <value>3</value>
+    <description>
+      Self adaptive fadvise mode uses distance between the served requests to
+      decide the access pattern. This property controls how many such requests
+      need to be tracked. It is used when AUTO_RANDOM is selected.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.inputstream.inplace.seek.limit</name>
+    <value>8m</value>
+    <description>
+      If forward seeks are within this many bytes of the current position, 
seeks
+      are performed by reading and discarding bytes in-place rather than 
opening a
+      new underlying stream.
+    </description>
+  </property>
+
+  <property>
+    <name>fs.gs.inputstream.min.range.request.size</name>
+    <value>2m</value>
+    <description>
+      Minimum size in bytes of the read range for Cloud Storage request when
+      opening a new stream to read an object.
+    </description>
+  </property>
+
 </configuration>
diff --git 
a/hadoop-tools/hadoop-gcp/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
 
b/hadoop-tools/hadoop-gcp/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
new file mode 100644
index 00000000000..a727523db3c
--- /dev/null
+++ 
b/hadoop-tools/hadoop-gcp/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.hadoop.fs.gs.GoogleHadoopFileSystem
diff --git a/hadoop-tools/hadoop-gcp/src/test/resources/core-site.xml 
b/hadoop-tools/hadoop-gcp/src/test/resources/core-site.xml
index eb12fe132f4..25109f98e7d 100644
--- a/hadoop-tools/hadoop-gcp/src/test/resources/core-site.xml
+++ b/hadoop-tools/hadoop-gcp/src/test/resources/core-site.xml
@@ -33,14 +33,6 @@
     <name>hadoop.security.authentication</name>
     <value>simple</value>
   </property>
-  <property>
-    <name>fs.gs.impl</name>
-    <value>org.apache.hadoop.fs.gs.GoogleHadoopFileSystem</value>
-  </property>
-  <property>
-    <name>fs.AbstractFileSystem.gs.impl</name>
-    <value>org.apache.hadoop.fs.gs.Gs</value>
-  </property>
 
   <!--
   To run these tests.


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

(hadoop) branch HADOOP-19343 updated: HADOOP-19343: Add hadoop-gcp configuration to core-default.xml and ServiceLoader file.

Reply via email to