This is an automated email from the ASF dual-hosted git repository.

jinglun pushed a commit to branch HADOOP-19236
in repository https://gitbox.apache.org/repos/asf/hadoop.git

commit a9c765a6ba71d5eb090a8545e498439b1720c8a5
Author: lijinglun <lijing...@bytedance.com>
AuthorDate: Wed Aug 21 17:49:40 2024 +0800

    Integration of TOS: Add TosKeys and ConfKeys.
---
 .../org/apache/hadoop/fs/tosfs/conf/ConfKeys.java  |  17 ++
 .../org/apache/hadoop/fs/tosfs/conf/TosKeys.java   | 203 +++++++++++++++++++++
 2 files changed, 220 insertions(+)

diff --git 
a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/ConfKeys.java
 
b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/ConfKeys.java
index 8bf122686cd..a059baaf11e 100644
--- 
a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/ConfKeys.java
+++ 
b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/ConfKeys.java
@@ -20,8 +20,18 @@ package org.apache.hadoop.fs.tosfs.conf;
 
 public class ConfKeys {
 
+  /**
+   * Object storage endpoint to connect to, which should include both region 
and object domain name.
+   * e.g. 'fs.tos.endpoint'='tos-cn-beijing.volces.com'.
+   */
   public static final ArgumentKey FS_TOS_ENDPOINT = new 
ArgumentKey("fs.%s.endpoint");
 
+  /**
+   * The region of the object storage, e.g. fs.tos.region. Parsing template 
"fs.%s.endpoint" to
+   * know the region.
+   */
+  public static final ArgumentKey FS_TOS_REGION = new 
ArgumentKey("fs.%s.region");
+
   /**
    * The object storage implementation for the defined scheme. For example, we 
can delegate the
    * scheme 'abc' to TOS (or other object storage),and access the TOS object 
storage as
@@ -29,4 +39,11 @@ public class ConfKeys {
    */
   public static final ArgumentKey FS_OBJECT_STORAGE_IMPL =
       new ArgumentKey("fs.objectstorage.%s.impl");
+
+  /**
+   * The batch size of deleting multiple objects per request for the given 
object storage.
+   * e.g. fs.tos.delete.batch-size
+   */
+  public static final ArgumentKey FS_BATCH_DELETE_SIZE = new 
ArgumentKey("fs.%s.delete.batch-size");
+  public static final int FS_BATCH_DELETE_SIZE_DEFAULT = 250;
 }
diff --git 
a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/TosKeys.java
 
b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/TosKeys.java
index ca4e863654c..075f3a2d09e 100644
--- 
a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/TosKeys.java
+++ 
b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/TosKeys.java
@@ -18,6 +18,9 @@
 
 package org.apache.hadoop.fs.tosfs.conf;
 
+import org.apache.hadoop.fs.tosfs.object.ChecksumType;
+import org.apache.hadoop.fs.tosfs.object.tos.TOSErrorCodes;
+
 public class TosKeys {
 
   /**
@@ -72,4 +75,204 @@ public class TosKeys {
   public static final String[] 
FS_TOS_CUSTOM_CREDENTIAL_PROVIDER_CLASSES_DEFAULT =
       new String[] { 
"org.apache.hadoop.fs.tosfs.object.tos.auth.EnvironmentCredentialsProvider",
           
"org.apache.hadoop.fs.tosfs.object.tos.auth.SimpleCredentialsProvider" };
+
+  /**
+   * Set a canned ACL for newly created and copied objects. Value may be 
'private', 'public-read',
+   * 'public-read-write', 'authenticated-read', 'bucket-owner-read', 
'bucket-owner-full-control',
+   * 'bucket-owner-entrusted'. If set, caller IAM role must have 
'tos:PutObjectAcl' permission on
+   * the bucket.
+   */
+  public static final String FS_TOS_ACL_DEFAULT = "fs.tos.acl.default";
+
+  // TOS http client.
+  /**
+   * The maximum number of connections to the TOS service that a client can 
create.
+   */
+  public static final String FS_TOS_HTTP_MAX_CONNECTIONS = 
"fs.tos.http.maxConnections";
+  public static final int FS_TOS_HTTP_MAX_CONNECTIONS_DEFAULT = 1024;
+
+  /**
+   * The time that a connection thread can be in idle state, larger than which 
the thread will be
+   * terminated.
+   */
+  public static final String FS_TOS_HTTP_IDLE_CONNECTION_TIME_MILLS =
+      "fs.tos.http.idleConnectionTimeMills";
+  public static final int FS_TOS_HTTP_IDLE_CONNECTION_TIME_MILLS_DEFAULT = 
60000;
+
+  /**
+   * The connect timeout that the tos client tries to connect to the TOS 
service.
+   */
+  public static final String FS_TOS_HTTP_CONNECT_TIMEOUT_MILLS = 
"fs.tos.http.connectTimeoutMills";
+  public static final int FS_TOS_HTTP_CONNECT_TIMEOUT_MILLS_DEFAULT = 10000;
+
+  /**
+   * The reading timeout when reading data from tos. Note that it is 
configured for the tos client,
+   * not proton.
+   */
+  public static final String FS_TOS_HTTP_READ_TIMEOUT_MILLS = 
"fs.tos.http.readTimeoutMills";
+  public static final int FS_TOS_HTTP_READ_TIMEOUT_MILLS_DEFAULT = 30000;
+
+  /**
+   * The writing timeout when uploading data to tos. Note that it is 
configured for the tos client,
+   * not proton.
+   */
+  public static final String FS_TOS_HTTP_WRITE_TIMEOUT_MILLS = 
"fs.tos.http.writeTimeoutMills";
+  public static final int FS_TOS_HTTP_WRITE_TIMEOUT_MILLS_DEFAULT = 30000;
+
+  /**
+   * Enables SSL connections to TOS or not.
+   */
+  public static final String FS_TOS_HTTP_ENABLE_VERIFY_SSL = 
"fs.tos.http.enableVerifySSL";
+  public static final boolean FS_TOS_HTTP_ENABLE_VERIFY_SSL_DEFAULT = true;
+
+  /**
+   * The timeout (in minutes) of the dns cache used in tos client.
+   */
+  public static final String FS_TOS_HTTP_DNS_CACHE_TIME_MINUTES = 
"fs.tos.http.dnsCacheTimeMinutes";
+  public static final int FS_TOS_HTTP_DNS_CACHE_TIME_MINUTES_DEFAULT = 0;
+
+  /**
+   * True to create the missed parent dir asynchronously during deleting or 
renaming a file or dir.
+   */
+  public static final String FS_ASYNC_CREATE_MISSED_PARENT = 
"fs.tos.missed.parent.dir.async-create";
+  public static final boolean FS_ASYNC_CREATE_MISSED_PARENT_DEFAULT = false;
+
+  /**
+   * Whether enable tos getFileStatus API or not, which returns the object 
info directly in one RPC
+   * request, otherwise, might need to send three RPC requests to get object 
info.
+   * For example, there is a key 'a/b/c' exists in TOS, and we want to get 
object status of 'a/b',
+   * the GetFileStatus('a/b') will return the prefix 'a/b/' as a directory 
object directly. If this
+   * property is disabled, we need to head('a/b') at first, and then 
head('a/b/'), and last call
+   * list('a/b/', limit=1) to get object info. Using GetFileStatus API can 
reduce the RPC call
+   * times.
+   */
+  public static final String FS_TOS_GET_FILE_STATUS_ENABLED = 
"fs.tos.get-file-status.enabled";
+  public static final boolean FS_TOS_GET_FILE_STATUS_ENABLED_DEFAULT = true;
+
+  /**
+   * Used for directory bucket, whether enable recursive delete capability in 
TOS server, which will
+   * atomic delete all objects under given dir(inclusive), otherwise the 
client will list all sub
+   * objects, and then send batch delete request to TOS to delete dir.
+   */
+  public static final String FS_TOS_RMR_SERVER_ENABLED = 
"fs.tos.rmr.server.enabled";
+  public static final boolean FS_FS_TOS_RMR_SERVER_ENABLED_DEFAULT = false;
+
+  /**
+   * If fs.tos.rmr.client.enabled is true, client will list all objects under 
the given dir and
+   * delete them by batch. Set value with true will use the recursive delete 
capability of TOS SDK,
+   * otherwise will delete object one by one via preorder tree walk.
+   */
+  public static final String FS_TOS_RMR_CLIENT_ENABLE = 
"fs.tos.rmr.client.enabled";
+  public static final boolean FS_TOS_RMR_CLIENT_ENABLE_DEFAULT = true;
+
+  /**
+   * The prefix will be used as the product name in TOS SDK. The final user 
agent pattern is
+   * '{prefix}/Proton/{proton version}'.
+   * TODO: review it.
+   */
+  public static final String FS_TOS_USER_AGENT_PREFIX = 
"fs.tos.user.agent.prefix";
+  public static final String FS_TOS_USER_AGENT_PREFIX_DEFAULT = "EMR";
+
+  /**
+   * The key indicates the name of the tos checksum algorithm. Specify the 
algorithm name to compare
+   * checksums between different storage systems. For example to compare 
checksums between hdfs and
+   * tos, we need to configure the algorithm name to COMPOSITE-CRC32C.
+   */
+  public static final String FS_TOS_CHECKSUM_ALGORITHM = 
"fs.tos.checksum-algorithm";
+  public static final String FS_TOS_CHECKSUM_ALGORITHM_DEFAULT = 
"PROTON-CHECKSUM";
+
+  /**
+   * The key indicates how to retrieve file checksum from tos, error will be 
thrown if the
+   * configured checksum type is not supported by tos. The supported checksum 
types are:
+   * CRC32C, CRC64ECMA.
+   */
+  public static final String FS_TOS_CHECKSUM_TYPE = "fs.tos.checksum-type";
+  public static final String FS_TOS_CHECKSUM_TYPE_DEFAULT = 
ChecksumType.CRC64ECMA.name();
+
+  // TOS common keys.
+  /**
+   * The threshold indicates whether reuse the socket connection to optimize 
read performance during
+   * closing tos object inputstream of get object. If the remaining bytes is 
less than max drain
+   * bytes during closing the inputstream, will just skip the bytes instead of 
closing the socket
+   * connection.
+   */
+  public static final String FS_TOS_MAX_DRAIN_BYTES = "fs.tos.max-drain-bytes";
+  public static final long FS_TOS_MAX_DRAIN_BYTES_DEFAULT = 1024 * 1024L;
+
+  /**
+   * Whether disable the tos http client cache in the current JVM.
+   */
+  public static final String FS_TOS_DISABLE_CLIENT_CACHE = 
"fs.tos.client.disable.cache";
+  public static final boolean FS_TOS_DISABLE_CLIENT_CACHE_DEFAULT = false;
+
+  /**
+   * The batch size when deleting the objects in batches.
+   */
+  public static final String FS_TOS_DELETE_OBJECTS_COUNT = 
"fs.tos.batch.delete.objects-count";
+  public static final int FS_TOS_DELETE_OBJECTS_COUNT_DEFAULT = 1000;
+
+  /**
+   * The maximum retry times when deleting objects in batches failed.
+   */
+  public static final String FS_TOS_BATCH_DELETE_MAX_RETRIES = 
"fs.tos.batch.delete.max-retries";
+  public static final int FS_TOS_BATCH_DELETE_MAX_RETRIES_DEFAULT = 20;
+
+  /**
+   * The codes from TOS deleteMultiObjects response, Proton will resend the 
batch delete request to
+   * delete the failed keys again if the response only contains these codes, 
otherwise won't send
+   * request anymore.
+   */
+  public static final String FS_TOS_BATCH_DELETE_RETRY_CODES = 
"fs.tos.batch.delete.retry-codes";
+  public static final String[] FS_TOS_BATCH_DELETE_RETRY_CODES_DEFAULT =
+      new String[] { "ExceedAccountQPSLimit", "ExceedAccountRateLimit", 
"ExceedBucketQPSLimit",
+          "ExceedBucketRateLimit", "InternalError", "ServiceUnavailable", 
"SlowDown",
+          "TooManyRequests" };
+
+  /**
+   * The retry interval (in milliseconds) when deleting objects in batches 
failed.
+   */
+  public static final String FS_TOS_BATCH_DELETE_RETRY_INTERVAL =
+      "fs.tos.batch.delete.retry.interval";
+  public static final long FS_TOS_BATCH_DELETE_RETRY_INTERVAL_DEFAULT = 1000L;
+
+  /**
+   * The batch size of listing object per request for the given object 
storage, such as listing a
+   * directory, searching for all objects whose path starts with the directory 
path, and returning
+   * them as a list.
+   */
+  public static final String FS_TOS_LIST_OBJECTS_COUNT = 
"fs.tos.list.objects-count";
+  public static final int FS_TOS_LIST_OBJECTS_COUNT_DEFAULT = 1000;
+
+  /**
+   * The maximum retry times of sending request via TOS client, Proton will 
resend the request if
+   * got retryable exceptions, e.g. SocketException, UnknownHostException, 
SSLException,
+   * InterruptedException, SocketTimeoutException, or got TOO_MANY_REQUESTS, 
INTERNAL_SERVER_ERROR
+   * http codes.
+   */
+  public static final String FS_TOS_REQUEST_MAX_RETRY_TIMES = 
"fs.tos.request.max.retry.times";
+  public static final int FS_TOS_REQUEST_MAX_RETRY_TIMES_DEFAULT = 20;
+
+  /**
+   * The fast-fail error codes means the error cannot be solved by retrying 
the request. TOS client
+   * won't retry the request if receiving a 409 http status code and if the 
error code is in the
+   * configured non-retryable error code list.
+   */
+  public static final String FS_TOS_FAST_FAILURE_409_ERROR_CODES =
+      "fs.tos.fast-fail-409-error-codes";
+  public static final String FS_TOS_FAST_FAILURE_409_ERROR_CODES_DEFAULT =
+      TOSErrorCodes.FAST_FAILURE_CONFLICT_ERROR_CODES;
+
+  /**
+   * The maximum retry times of reading object content via TOS client, Proton 
will resend the
+   * request to create a new input stream if getting unexpected end of stream 
error during reading
+   * the input stream.
+   */
+  public static final String FS_TOS_MAX_READ_OBJECT_RETRIES = 
"fs.tos.inputstream.max.retry.times";
+  public static final int FS_TOS_MAX_READ_OBJECT_RETRIES_DEFAULT = 5;
+
+  /**
+   * Enable the crc check when uploading files to tos or not.
+   */
+  public static final String FS_TOS_CRC_CHECK_ENABLED = 
"fs.tos.crc.check.enable";
+  public static final boolean FS_TOS_CRC_CHECK_ENABLED_DEFAULT = true;
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to