Github user jackylk commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/104#discussion_r76595559
--- Diff:
processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java
---
@@ -112,25 +116,29 @@ private void initializeReader() throws IOException {
// if already one input stream is open first we need to close and then
// open new stream
close();
- // get the block offset
- long startOffset =
this.csvParserVo.getBlockDetailsList().get(blockCounter).getBlockOffset();
- FileType fileType = FileFactory
-
.getFileType(this.csvParserVo.getBlockDetailsList().get(blockCounter).getFilePath());
- // calculate the end offset the block
- long endOffset =
-
this.csvParserVo.getBlockDetailsList().get(blockCounter).getBlockLength() +
startOffset;
-
- // create a input stream for the block
- DataInputStream dataInputStream = FileFactory
-
.getDataInputStream(this.csvParserVo.getBlockDetailsList().get(blockCounter).getFilePath(),
- fileType, bufferSize, startOffset);
- // if start offset is not 0 then reading then reading and ignoring the
extra line
- if (startOffset != 0) {
- LineReader lineReader = new LineReader(dataInputStream, 1);
- startOffset += lineReader.readLine(new Text(), 0);
+
+ String path =
this.csvParserVo.getBlockDetailsList().get(blockCounter).getFilePath();
+ FileType fileType = FileFactory.getFileType(path);
+
+ if (path.endsWith(".gz")) {
+ DataInputStream dataInputStream =
+ FileFactory.getCompressedDataInputStream(path, fileType,
bufferSize);
+ inputStreamReader = new BufferedReader(new
InputStreamReader(dataInputStream));
+ } else {
+ long startOffset =
this.csvParserVo.getBlockDetailsList().get(blockCounter).getBlockOffset();
+ long blockLength =
this.csvParserVo.getBlockDetailsList().get(blockCounter).getBlockLength();
+ long endOffset = blockLength + startOffset;
+
+ DataInputStream dataInputStream =
FileFactory.getDataInputStream(path, fileType, bufferSize);
+
+ // if start offset is not 0 then reading then reading and ignoring
the extra line
+ if (startOffset != 0) {
+ LineReader lineReader = new LineReader(dataInputStream, 1);
+ startOffset += lineReader.readLine(new Text(), 0);
+ }
+ inputStreamReader = new BufferedReader(new InputStreamReader(
+ new BoundedDataStream(dataInputStream, endOffset -
startOffset)));
--- End diff --
Forgot to add.
Added now
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---