Github user sureshsubbiah commented on a diff in the pull request: https://github.com/apache/trafodion/pull/1417#discussion_r164280205 --- Diff: core/sql/src/main/java/org/trafodion/sql/HDFSClient.java --- @@ -0,0 +1,319 @@ +// @@@ START COPYRIGHT @@@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +// @@@ END COPYRIGHT @@@ + +package org.trafodion.sql; + +import org.apache.log4j.PropertyConfigurator; +import org.apache.log4j.Logger; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.conf.Configuration; +import java.nio.ByteBuffer; +import java.io.IOException; +import java.io.OutputStream; +import java.util.concurrent.Callable; +import java.util.concurrent.Future; +import java.util.concurrent.Executors; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.io.compress.CodecPool; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.Compressor; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.io.SequenceFile.CompressionType; +import org.apache.hadoop.util.ReflectionUtils; + +public class HDFSClient +{ + static Logger logger_ = Logger.getLogger(HDFSClient.class.getName()); + private static Configuration config_ = null; + private static ExecutorService executorService_ = null; + private static FileSystem defaultFs_ = null; + private FileSystem fs_ = null; + private int bufNo_; + private FSDataInputStream fsdis_; + private OutputStream outStream_; + private String filename_; + private ByteBuffer buf_; + private int bufLen_; + private int bufOffset_ = 0; + private long pos_ = 0; + private int len_ = 0; + private int lenRemain_ = 0; + private int blockSize_; + private int bytesRead_; + private Future future_ = null; + + static { + String confFile = System.getProperty("trafodion.log4j.configFile"); + System.setProperty("trafodion.root", System.getenv("TRAF_HOME")); + if (confFile == null) { + confFile = System.getenv("TRAF_CONF") + "/log4j.sql.config"; + } + PropertyConfigurator.configure(confFile); + config_ = TrafConfiguration.create(TrafConfiguration.HDFS_CONF); + executorService_ = Executors.newCachedThreadPool(); + try { + defaultFs_ = FileSystem.get(config_); + } + catch (IOException ioe) { + throw new RuntimeException("Exception in HDFSClient static block", ioe); + } + } + + class HDFSRead implements Callable --- End diff -- Could you please explain how the classes HdfsClient, HdfsClient.HDFSRead and HdfsScan are related? Thank you for the nice comments in HdfsScan.java. I took HdfsClient to be the class that contains all the methods that we removed from SequenceFileWriter. If that is true, why do we need a HDFSRead subclass? Is this for the future or is there some functionality that I missed. For error row logging do we need read?
---