[ 
https://issues.apache.org/jira/browse/HADOOP-18637?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17704175#comment-17704175
 ] 

ASF GitHub Bot commented on HADOOP-18637:
-----------------------------------------

mukund-thakur commented on code in PR #5481:
URL: https://github.com/apache/hadoop/pull/5481#discussion_r1146257205


##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java:
##########
@@ -1255,4 +1255,8 @@ private Constants() {
    */
   public static final String PREFETCH_BLOCK_COUNT_KEY = 
"fs.s3a.prefetch.block.count";
   public static final int PREFETCH_BLOCK_DEFAULT_COUNT = 8;
+
+  public static final String ALLOW_MULTIPART_UPLOADS = 
"fs.s3a.allow.multipart.uploads";
+
+  public static final boolean IS_ALLOWED_MULTIPART_UPLOADS_DEFAULT = true;

Review Comment:
   change to MULTIPART_UPLOAD_ENABLED_DEFAULT;



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java:
##########
@@ -564,11 +564,12 @@ class ByteBufferBlock extends DataBlock {
        * @param statistics statistics to update
        */
       ByteBufferBlock(long index,
-          int bufferSize,
+          long bufferSize,
           BlockOutputStreamStatistics statistics) {
         super(index, statistics);
-        this.bufferSize = bufferSize;
-        blockBuffer = requestBuffer(bufferSize);
+        this.bufferSize = bufferSize > Integer.MAX_VALUE ?
+            Integer.MAX_VALUE : (int) bufferSize;
+        blockBuffer = requestBuffer((int) bufferSize);

Review Comment:
   use this.bufferSize rather than casting again. 



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java:
##########
@@ -169,6 +169,9 @@ class S3ABlockOutputStream extends OutputStream implements
   /** Thread level IOStatistics Aggregator. */
   private final IOStatisticsAggregator threadIOStatisticsAggregator;
 
+  /**Is multipart upload allowed? */
+  private final boolean isMultipartAllowed;

Review Comment:
   isMultipartEnabled



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java:
##########
@@ -369,6 +373,9 @@ private synchronized void uploadCurrentBlock(boolean isLast)
    */
   @Retries.RetryTranslated
   private void initMultipartUpload() throws IOException {
+    if (!isMultipartAllowed){
+      return;

Review Comment:
   throw Exception. 



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java:
##########
@@ -1255,4 +1255,8 @@ private Constants() {
    */
   public static final String PREFETCH_BLOCK_COUNT_KEY = 
"fs.s3a.prefetch.block.count";
   public static final int PREFETCH_BLOCK_DEFAULT_COUNT = 8;
+
+  public static final String ALLOW_MULTIPART_UPLOADS = 
"fs.s3a.allow.multipart.uploads";

Review Comment:
   Change to 
   MULTIPART_UPLOADS_ENABLED = "fs.s3a.multipart.uploads.enabled";



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java:
##########
@@ -516,6 +516,7 @@ public void initialize(URI name, Configuration originalConf)
       maxKeys = intOption(conf, MAX_PAGING_KEYS, DEFAULT_MAX_PAGING_KEYS, 1);
       partSize = getMultipartSizeProperty(conf,
           MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE);
+      LOG.warn("Patcchhhh: The part size is : {}", partSize);

Review Comment:
   delete



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java:
##########
@@ -1831,6 +1832,11 @@ private FSDataOutputStream innerCreateFile(
     final PutObjectOptions putOptions =
         new PutObjectOptions(keep, null, options.getHeaders());
 
+    if(!checkDiskBuffer(getConf())){

Review Comment:
   just add a method validateOutputStreamConfiguration() and throw exception in 
the implementation only. 



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java:
##########
@@ -1031,6 +1031,19 @@ public static long 
getMultipartSizeProperty(Configuration conf,
     return partSize;
   }
 
+  public static boolean checkDiskBuffer(Configuration conf){
+    boolean isAllowedMultipart = conf.getBoolean(ALLOW_MULTIPART_UPLOADS,
+        IS_ALLOWED_MULTIPART_UPLOADS_DEFAULT);
+    if (isAllowedMultipart) {

Review Comment:
   this is wrong here I guess. 
   if isAllowedMultipart is enabled then FAST_UPLOAD_BUFFER must be disk else 
we throw an error right?



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java:
##########
@@ -269,8 +269,8 @@ public PutObjectRequest createPutObjectRequest(
       String dest,
       File sourceFile,
       final PutObjectOptions options) {
-    Preconditions.checkState(sourceFile.length() < Integer.MAX_VALUE,
-        "File length is too big for a single PUT upload");
+    //Preconditions.checkState(sourceFile.length() < Integer.MAX_VALUE,

Review Comment:
   remove, no unnecessary comments. 



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java:
##########
@@ -436,11 +436,11 @@ static class ByteArrayBlock extends DataBlock {
     private Integer dataSize;
 
     ByteArrayBlock(long index,
-        int limit,
+        long limit,
         BlockOutputStreamStatistics statistics) {
       super(index, statistics);
-      this.limit = limit;
-      buffer = new S3AByteArrayOutputStream(limit);
+      this.limit = (limit > Integer.MAX_VALUE) ? Integer.MAX_VALUE : (int) 
limit;
+      buffer = new S3AByteArrayOutputStream((int) limit);

Review Comment:
   use this.limit.



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java:
##########
@@ -595,7 +596,7 @@ public void initialize(URI name, Configuration originalConf)
       }
       blockOutputBuffer = conf.getTrimmed(FAST_UPLOAD_BUFFER,
           DEFAULT_FAST_UPLOAD_BUFFER);
-      partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize);
+      //partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize);

Review Comment:
   cut





> S3A to support upload of files greater than 2 GB using DiskBlocks
> -----------------------------------------------------------------
>
>                 Key: HADOOP-18637
>                 URL: https://issues.apache.org/jira/browse/HADOOP-18637
>             Project: Hadoop Common
>          Issue Type: Improvement
>          Components: fs/s3
>            Reporter: Harshit Gupta
>            Assignee: Harshit Gupta
>            Priority: Major
>              Labels: pull-request-available
>
> Use S3A Diskblocks to support the upload of files greater than 2 GB using 
> DiskBlocks. Currently, the max upload size of a single block is ~2GB. 
> cc: [~mthakur] [[email protected]] [~mehakmeet] 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to