This is an automated email from the ASF dual-hosted git repository. jinglun pushed a commit to branch HADOOP-19236 in repository https://gitbox.apache.org/repos/asf/hadoop.git
commit a9c765a6ba71d5eb090a8545e498439b1720c8a5 Author: lijinglun <lijing...@bytedance.com> AuthorDate: Wed Aug 21 17:49:40 2024 +0800 Integration of TOS: Add TosKeys and ConfKeys. --- .../org/apache/hadoop/fs/tosfs/conf/ConfKeys.java | 17 ++ .../org/apache/hadoop/fs/tosfs/conf/TosKeys.java | 203 +++++++++++++++++++++ 2 files changed, 220 insertions(+) diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/ConfKeys.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/ConfKeys.java index 8bf122686cd..a059baaf11e 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/ConfKeys.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/ConfKeys.java @@ -20,8 +20,18 @@ package org.apache.hadoop.fs.tosfs.conf; public class ConfKeys { + /** + * Object storage endpoint to connect to, which should include both region and object domain name. + * e.g. 'fs.tos.endpoint'='tos-cn-beijing.volces.com'. + */ public static final ArgumentKey FS_TOS_ENDPOINT = new ArgumentKey("fs.%s.endpoint"); + /** + * The region of the object storage, e.g. fs.tos.region. Parsing template "fs.%s.endpoint" to + * know the region. + */ + public static final ArgumentKey FS_TOS_REGION = new ArgumentKey("fs.%s.region"); + /** * The object storage implementation for the defined scheme. For example, we can delegate the * scheme 'abc' to TOS (or other object storage),and access the TOS object storage as @@ -29,4 +39,11 @@ public class ConfKeys { */ public static final ArgumentKey FS_OBJECT_STORAGE_IMPL = new ArgumentKey("fs.objectstorage.%s.impl"); + + /** + * The batch size of deleting multiple objects per request for the given object storage. + * e.g. fs.tos.delete.batch-size + */ + public static final ArgumentKey FS_BATCH_DELETE_SIZE = new ArgumentKey("fs.%s.delete.batch-size"); + public static final int FS_BATCH_DELETE_SIZE_DEFAULT = 250; } diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/TosKeys.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/TosKeys.java index ca4e863654c..075f3a2d09e 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/TosKeys.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/conf/TosKeys.java @@ -18,6 +18,9 @@ package org.apache.hadoop.fs.tosfs.conf; +import org.apache.hadoop.fs.tosfs.object.ChecksumType; +import org.apache.hadoop.fs.tosfs.object.tos.TOSErrorCodes; + public class TosKeys { /** @@ -72,4 +75,204 @@ public class TosKeys { public static final String[] FS_TOS_CUSTOM_CREDENTIAL_PROVIDER_CLASSES_DEFAULT = new String[] { "org.apache.hadoop.fs.tosfs.object.tos.auth.EnvironmentCredentialsProvider", "org.apache.hadoop.fs.tosfs.object.tos.auth.SimpleCredentialsProvider" }; + + /** + * Set a canned ACL for newly created and copied objects. Value may be 'private', 'public-read', + * 'public-read-write', 'authenticated-read', 'bucket-owner-read', 'bucket-owner-full-control', + * 'bucket-owner-entrusted'. If set, caller IAM role must have 'tos:PutObjectAcl' permission on + * the bucket. + */ + public static final String FS_TOS_ACL_DEFAULT = "fs.tos.acl.default"; + + // TOS http client. + /** + * The maximum number of connections to the TOS service that a client can create. + */ + public static final String FS_TOS_HTTP_MAX_CONNECTIONS = "fs.tos.http.maxConnections"; + public static final int FS_TOS_HTTP_MAX_CONNECTIONS_DEFAULT = 1024; + + /** + * The time that a connection thread can be in idle state, larger than which the thread will be + * terminated. + */ + public static final String FS_TOS_HTTP_IDLE_CONNECTION_TIME_MILLS = + "fs.tos.http.idleConnectionTimeMills"; + public static final int FS_TOS_HTTP_IDLE_CONNECTION_TIME_MILLS_DEFAULT = 60000; + + /** + * The connect timeout that the tos client tries to connect to the TOS service. + */ + public static final String FS_TOS_HTTP_CONNECT_TIMEOUT_MILLS = "fs.tos.http.connectTimeoutMills"; + public static final int FS_TOS_HTTP_CONNECT_TIMEOUT_MILLS_DEFAULT = 10000; + + /** + * The reading timeout when reading data from tos. Note that it is configured for the tos client, + * not proton. + */ + public static final String FS_TOS_HTTP_READ_TIMEOUT_MILLS = "fs.tos.http.readTimeoutMills"; + public static final int FS_TOS_HTTP_READ_TIMEOUT_MILLS_DEFAULT = 30000; + + /** + * The writing timeout when uploading data to tos. Note that it is configured for the tos client, + * not proton. + */ + public static final String FS_TOS_HTTP_WRITE_TIMEOUT_MILLS = "fs.tos.http.writeTimeoutMills"; + public static final int FS_TOS_HTTP_WRITE_TIMEOUT_MILLS_DEFAULT = 30000; + + /** + * Enables SSL connections to TOS or not. + */ + public static final String FS_TOS_HTTP_ENABLE_VERIFY_SSL = "fs.tos.http.enableVerifySSL"; + public static final boolean FS_TOS_HTTP_ENABLE_VERIFY_SSL_DEFAULT = true; + + /** + * The timeout (in minutes) of the dns cache used in tos client. + */ + public static final String FS_TOS_HTTP_DNS_CACHE_TIME_MINUTES = "fs.tos.http.dnsCacheTimeMinutes"; + public static final int FS_TOS_HTTP_DNS_CACHE_TIME_MINUTES_DEFAULT = 0; + + /** + * True to create the missed parent dir asynchronously during deleting or renaming a file or dir. + */ + public static final String FS_ASYNC_CREATE_MISSED_PARENT = "fs.tos.missed.parent.dir.async-create"; + public static final boolean FS_ASYNC_CREATE_MISSED_PARENT_DEFAULT = false; + + /** + * Whether enable tos getFileStatus API or not, which returns the object info directly in one RPC + * request, otherwise, might need to send three RPC requests to get object info. + * For example, there is a key 'a/b/c' exists in TOS, and we want to get object status of 'a/b', + * the GetFileStatus('a/b') will return the prefix 'a/b/' as a directory object directly. If this + * property is disabled, we need to head('a/b') at first, and then head('a/b/'), and last call + * list('a/b/', limit=1) to get object info. Using GetFileStatus API can reduce the RPC call + * times. + */ + public static final String FS_TOS_GET_FILE_STATUS_ENABLED = "fs.tos.get-file-status.enabled"; + public static final boolean FS_TOS_GET_FILE_STATUS_ENABLED_DEFAULT = true; + + /** + * Used for directory bucket, whether enable recursive delete capability in TOS server, which will + * atomic delete all objects under given dir(inclusive), otherwise the client will list all sub + * objects, and then send batch delete request to TOS to delete dir. + */ + public static final String FS_TOS_RMR_SERVER_ENABLED = "fs.tos.rmr.server.enabled"; + public static final boolean FS_FS_TOS_RMR_SERVER_ENABLED_DEFAULT = false; + + /** + * If fs.tos.rmr.client.enabled is true, client will list all objects under the given dir and + * delete them by batch. Set value with true will use the recursive delete capability of TOS SDK, + * otherwise will delete object one by one via preorder tree walk. + */ + public static final String FS_TOS_RMR_CLIENT_ENABLE = "fs.tos.rmr.client.enabled"; + public static final boolean FS_TOS_RMR_CLIENT_ENABLE_DEFAULT = true; + + /** + * The prefix will be used as the product name in TOS SDK. The final user agent pattern is + * '{prefix}/Proton/{proton version}'. + * TODO: review it. + */ + public static final String FS_TOS_USER_AGENT_PREFIX = "fs.tos.user.agent.prefix"; + public static final String FS_TOS_USER_AGENT_PREFIX_DEFAULT = "EMR"; + + /** + * The key indicates the name of the tos checksum algorithm. Specify the algorithm name to compare + * checksums between different storage systems. For example to compare checksums between hdfs and + * tos, we need to configure the algorithm name to COMPOSITE-CRC32C. + */ + public static final String FS_TOS_CHECKSUM_ALGORITHM = "fs.tos.checksum-algorithm"; + public static final String FS_TOS_CHECKSUM_ALGORITHM_DEFAULT = "PROTON-CHECKSUM"; + + /** + * The key indicates how to retrieve file checksum from tos, error will be thrown if the + * configured checksum type is not supported by tos. The supported checksum types are: + * CRC32C, CRC64ECMA. + */ + public static final String FS_TOS_CHECKSUM_TYPE = "fs.tos.checksum-type"; + public static final String FS_TOS_CHECKSUM_TYPE_DEFAULT = ChecksumType.CRC64ECMA.name(); + + // TOS common keys. + /** + * The threshold indicates whether reuse the socket connection to optimize read performance during + * closing tos object inputstream of get object. If the remaining bytes is less than max drain + * bytes during closing the inputstream, will just skip the bytes instead of closing the socket + * connection. + */ + public static final String FS_TOS_MAX_DRAIN_BYTES = "fs.tos.max-drain-bytes"; + public static final long FS_TOS_MAX_DRAIN_BYTES_DEFAULT = 1024 * 1024L; + + /** + * Whether disable the tos http client cache in the current JVM. + */ + public static final String FS_TOS_DISABLE_CLIENT_CACHE = "fs.tos.client.disable.cache"; + public static final boolean FS_TOS_DISABLE_CLIENT_CACHE_DEFAULT = false; + + /** + * The batch size when deleting the objects in batches. + */ + public static final String FS_TOS_DELETE_OBJECTS_COUNT = "fs.tos.batch.delete.objects-count"; + public static final int FS_TOS_DELETE_OBJECTS_COUNT_DEFAULT = 1000; + + /** + * The maximum retry times when deleting objects in batches failed. + */ + public static final String FS_TOS_BATCH_DELETE_MAX_RETRIES = "fs.tos.batch.delete.max-retries"; + public static final int FS_TOS_BATCH_DELETE_MAX_RETRIES_DEFAULT = 20; + + /** + * The codes from TOS deleteMultiObjects response, Proton will resend the batch delete request to + * delete the failed keys again if the response only contains these codes, otherwise won't send + * request anymore. + */ + public static final String FS_TOS_BATCH_DELETE_RETRY_CODES = "fs.tos.batch.delete.retry-codes"; + public static final String[] FS_TOS_BATCH_DELETE_RETRY_CODES_DEFAULT = + new String[] { "ExceedAccountQPSLimit", "ExceedAccountRateLimit", "ExceedBucketQPSLimit", + "ExceedBucketRateLimit", "InternalError", "ServiceUnavailable", "SlowDown", + "TooManyRequests" }; + + /** + * The retry interval (in milliseconds) when deleting objects in batches failed. + */ + public static final String FS_TOS_BATCH_DELETE_RETRY_INTERVAL = + "fs.tos.batch.delete.retry.interval"; + public static final long FS_TOS_BATCH_DELETE_RETRY_INTERVAL_DEFAULT = 1000L; + + /** + * The batch size of listing object per request for the given object storage, such as listing a + * directory, searching for all objects whose path starts with the directory path, and returning + * them as a list. + */ + public static final String FS_TOS_LIST_OBJECTS_COUNT = "fs.tos.list.objects-count"; + public static final int FS_TOS_LIST_OBJECTS_COUNT_DEFAULT = 1000; + + /** + * The maximum retry times of sending request via TOS client, Proton will resend the request if + * got retryable exceptions, e.g. SocketException, UnknownHostException, SSLException, + * InterruptedException, SocketTimeoutException, or got TOO_MANY_REQUESTS, INTERNAL_SERVER_ERROR + * http codes. + */ + public static final String FS_TOS_REQUEST_MAX_RETRY_TIMES = "fs.tos.request.max.retry.times"; + public static final int FS_TOS_REQUEST_MAX_RETRY_TIMES_DEFAULT = 20; + + /** + * The fast-fail error codes means the error cannot be solved by retrying the request. TOS client + * won't retry the request if receiving a 409 http status code and if the error code is in the + * configured non-retryable error code list. + */ + public static final String FS_TOS_FAST_FAILURE_409_ERROR_CODES = + "fs.tos.fast-fail-409-error-codes"; + public static final String FS_TOS_FAST_FAILURE_409_ERROR_CODES_DEFAULT = + TOSErrorCodes.FAST_FAILURE_CONFLICT_ERROR_CODES; + + /** + * The maximum retry times of reading object content via TOS client, Proton will resend the + * request to create a new input stream if getting unexpected end of stream error during reading + * the input stream. + */ + public static final String FS_TOS_MAX_READ_OBJECT_RETRIES = "fs.tos.inputstream.max.retry.times"; + public static final int FS_TOS_MAX_READ_OBJECT_RETRIES_DEFAULT = 5; + + /** + * Enable the crc check when uploading files to tos or not. + */ + public static final String FS_TOS_CRC_CHECK_ENABLED = "fs.tos.crc.check.enable"; + public static final boolean FS_TOS_CRC_CHECK_ENABLED_DEFAULT = true; } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org