eskabetxe commented on a change in pull request #11307: [FLINK-16371] 
[BulkWriter] Fix Hadoop Compression BulkWriter
URL: https://github.com/apache/flink/pull/11307#discussion_r388523946
 
 

 ##########
 File path: 
flink-formats/flink-compress/src/main/java/org/apache/flink/formats/compress/CompressWriterFactory.java
 ##########
 @@ -42,33 +47,41 @@
 
        private Extractor<IN> extractor;
        private CompressionCodec hadoopCodec;
+       private String hadoopCodecName;
+       private Map<String, String> hadoopConfigurationMap;
 
        public CompressWriterFactory(Extractor<IN> extractor) {
                this.extractor = Preconditions.checkNotNull(extractor, 
"extractor cannot be null");
+               this.hadoopConfigurationMap = new HashMap<>();
        }
 
        public CompressWriterFactory<IN> withHadoopCompression(String 
hadoopCodecName) {
                return withHadoopCompression(hadoopCodecName, new 
Configuration());
        }
 
        public CompressWriterFactory<IN> withHadoopCompression(String 
hadoopCodecName, Configuration hadoopConfiguration) {
-               return withHadoopCompression(new 
CompressionCodecFactory(hadoopConfiguration).getCodecByName(hadoopCodecName));
-       }
+               this.hadoopCodecName = hadoopCodecName;
+
+               for (Map.Entry<String, String> entry : hadoopConfiguration) {
+                       hadoopConfigurationMap.put(entry.getKey(), 
entry.getValue());
+               }
 
-       public CompressWriterFactory<IN> withHadoopCompression(CompressionCodec 
hadoopCodec) {
-               this.hadoopCodec = Preconditions.checkNotNull(hadoopCodec, 
"hadoopCodec cannot be null");
                return this;
        }
 
        @Override
        public BulkWriter<IN> create(FSDataOutputStream out) throws IOException 
{
-               try {
-                       return (hadoopCodec != null)
-                               ? new HadoopCompressionBulkWriter<>(out, 
extractor, hadoopCodec)
-                               : new NoCompressionBulkWriter<>(out, extractor);
-               } catch (Exception e) {
-                       throw new IOException(e.getLocalizedMessage(), e);
+               if (StringUtils.isEmpty(hadoopCodecName)) {
+                       return new NoCompressionBulkWriter<>(out, extractor);
+               }
+
+               initializeCompressionCodec();
+
+               if (hadoopCodec == null) {
+                       throw new RuntimeException("Unable to load the provided 
compression codec [" + hadoopCodecName + "]");
                }
+
+               return new 
HadoopCompressionBulkWriter<>(hadoopCodec.createOutputStream(out), extractor);
        }
 
        public String codecExtension() {
 
 Review comment:
   this will be useless, as it return "" until create is called..
   
   we added this to allow for example configure OutputFileConfig.partSuffix 
with the codec extension

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to