[
https://issues.apache.org/jira/browse/HADOOP-18637?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17704175#comment-17704175
]
ASF GitHub Bot commented on HADOOP-18637:
-----------------------------------------
mukund-thakur commented on code in PR #5481:
URL: https://github.com/apache/hadoop/pull/5481#discussion_r1146257205
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java:
##########
@@ -1255,4 +1255,8 @@ private Constants() {
*/
public static final String PREFETCH_BLOCK_COUNT_KEY =
"fs.s3a.prefetch.block.count";
public static final int PREFETCH_BLOCK_DEFAULT_COUNT = 8;
+
+ public static final String ALLOW_MULTIPART_UPLOADS =
"fs.s3a.allow.multipart.uploads";
+
+ public static final boolean IS_ALLOWED_MULTIPART_UPLOADS_DEFAULT = true;
Review Comment:
change to MULTIPART_UPLOAD_ENABLED_DEFAULT;
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java:
##########
@@ -564,11 +564,12 @@ class ByteBufferBlock extends DataBlock {
* @param statistics statistics to update
*/
ByteBufferBlock(long index,
- int bufferSize,
+ long bufferSize,
BlockOutputStreamStatistics statistics) {
super(index, statistics);
- this.bufferSize = bufferSize;
- blockBuffer = requestBuffer(bufferSize);
+ this.bufferSize = bufferSize > Integer.MAX_VALUE ?
+ Integer.MAX_VALUE : (int) bufferSize;
+ blockBuffer = requestBuffer((int) bufferSize);
Review Comment:
use this.bufferSize rather than casting again.
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java:
##########
@@ -169,6 +169,9 @@ class S3ABlockOutputStream extends OutputStream implements
/** Thread level IOStatistics Aggregator. */
private final IOStatisticsAggregator threadIOStatisticsAggregator;
+ /**Is multipart upload allowed? */
+ private final boolean isMultipartAllowed;
Review Comment:
isMultipartEnabled
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java:
##########
@@ -369,6 +373,9 @@ private synchronized void uploadCurrentBlock(boolean isLast)
*/
@Retries.RetryTranslated
private void initMultipartUpload() throws IOException {
+ if (!isMultipartAllowed){
+ return;
Review Comment:
throw Exception.
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java:
##########
@@ -1255,4 +1255,8 @@ private Constants() {
*/
public static final String PREFETCH_BLOCK_COUNT_KEY =
"fs.s3a.prefetch.block.count";
public static final int PREFETCH_BLOCK_DEFAULT_COUNT = 8;
+
+ public static final String ALLOW_MULTIPART_UPLOADS =
"fs.s3a.allow.multipart.uploads";
Review Comment:
Change to
MULTIPART_UPLOADS_ENABLED = "fs.s3a.multipart.uploads.enabled";
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java:
##########
@@ -516,6 +516,7 @@ public void initialize(URI name, Configuration originalConf)
maxKeys = intOption(conf, MAX_PAGING_KEYS, DEFAULT_MAX_PAGING_KEYS, 1);
partSize = getMultipartSizeProperty(conf,
MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE);
+ LOG.warn("Patcchhhh: The part size is : {}", partSize);
Review Comment:
delete
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java:
##########
@@ -1831,6 +1832,11 @@ private FSDataOutputStream innerCreateFile(
final PutObjectOptions putOptions =
new PutObjectOptions(keep, null, options.getHeaders());
+ if(!checkDiskBuffer(getConf())){
Review Comment:
just add a method validateOutputStreamConfiguration() and throw exception in
the implementation only.
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java:
##########
@@ -1031,6 +1031,19 @@ public static long
getMultipartSizeProperty(Configuration conf,
return partSize;
}
+ public static boolean checkDiskBuffer(Configuration conf){
+ boolean isAllowedMultipart = conf.getBoolean(ALLOW_MULTIPART_UPLOADS,
+ IS_ALLOWED_MULTIPART_UPLOADS_DEFAULT);
+ if (isAllowedMultipart) {
Review Comment:
this is wrong here I guess.
if isAllowedMultipart is enabled then FAST_UPLOAD_BUFFER must be disk else
we throw an error right?
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java:
##########
@@ -269,8 +269,8 @@ public PutObjectRequest createPutObjectRequest(
String dest,
File sourceFile,
final PutObjectOptions options) {
- Preconditions.checkState(sourceFile.length() < Integer.MAX_VALUE,
- "File length is too big for a single PUT upload");
+ //Preconditions.checkState(sourceFile.length() < Integer.MAX_VALUE,
Review Comment:
remove, no unnecessary comments.
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java:
##########
@@ -436,11 +436,11 @@ static class ByteArrayBlock extends DataBlock {
private Integer dataSize;
ByteArrayBlock(long index,
- int limit,
+ long limit,
BlockOutputStreamStatistics statistics) {
super(index, statistics);
- this.limit = limit;
- buffer = new S3AByteArrayOutputStream(limit);
+ this.limit = (limit > Integer.MAX_VALUE) ? Integer.MAX_VALUE : (int)
limit;
+ buffer = new S3AByteArrayOutputStream((int) limit);
Review Comment:
use this.limit.
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java:
##########
@@ -595,7 +596,7 @@ public void initialize(URI name, Configuration originalConf)
}
blockOutputBuffer = conf.getTrimmed(FAST_UPLOAD_BUFFER,
DEFAULT_FAST_UPLOAD_BUFFER);
- partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize);
+ //partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize);
Review Comment:
cut
> S3A to support upload of files greater than 2 GB using DiskBlocks
> -----------------------------------------------------------------
>
> Key: HADOOP-18637
> URL: https://issues.apache.org/jira/browse/HADOOP-18637
> Project: Hadoop Common
> Issue Type: Improvement
> Components: fs/s3
> Reporter: Harshit Gupta
> Assignee: Harshit Gupta
> Priority: Major
> Labels: pull-request-available
>
> Use S3A Diskblocks to support the upload of files greater than 2 GB using
> DiskBlocks. Currently, the max upload size of a single block is ~2GB.
> cc: [~mthakur] [[email protected]] [~mehakmeet]
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]