[1/2] trafodion git commit: [TRAFODION-3171] Refactor Hive sequence file reading to use the new implementation
Repository: trafodion Updated Branches: refs/heads/master 6dbea7e87 -> 1650c784e [TRAFODION-3171] Refactor Hive sequence file reading to use the new implementation Fix for hive/TEST006 failure in daily build Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/02ad26b8 Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/02ad26b8 Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/02ad26b8 Branch: refs/heads/master Commit: 02ad26b8e1384c156ac1d877b28b52d0af6b8931 Parents: a9bbe12 Author: selvaganesang Authored: Mon Aug 27 18:23:14 2018 + Committer: selvaganesang Committed: Mon Aug 27 18:23:14 2018 + -- core/sql/src/main/java/org/trafodion/sql/HDFSClient.java | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/trafodion/blob/02ad26b8/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java -- diff --git a/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java b/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java index 39e4c9a..4f06e7b 100644 --- a/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java +++ b/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java @@ -264,6 +264,7 @@ public class HDFSClient int sequenceFileRead(int readLenRemain) throws IOException { boolean eof = false; + boolean nextValue; byte[] byteArray; int readLen; int totalReadLen = 0; @@ -273,7 +274,11 @@ public class HDFSClient while (!eof && lenRemain > 0) { try { tempPos = reader_.getPosition(); -eof = reader_.next(key_, value_); +nextValue = reader_.next(key_, value_); +if (!nextValue) { + eof = true; + break; +} } catch (java.io.EOFException e) {
[1/2] trafodion git commit: [TRAFODION-3171] Refactor Hive sequence file reading to use the new implementation
Repository: trafodion Updated Branches: refs/heads/master 08e0ab09e -> a9bbe12aa [TRAFODION-3171] Refactor Hive sequence file reading to use the new implementation Fix for the following exception seen while accessing hive sequence file with the new implementation *** ERROR[8447] An error occurred during hdfs access. Error Detail: SETUP_HDFS_SCAN java.util.concurrent.ExecutionException: java.nio.BufferOverflowException java.util.concurrent.FutureTask.report(FutureTask.java:122) java.util.concurrent.FutureTask.get(FutureTask.java:192) org.trafodion.sql.HDFSClient.trafHdfsReadBuffer(HDFSClient.java:424) org.trafodion.sql.HdfsScan.trafHdfsRead(HdfsScan.java:215) Caused by java.nio.BufferOverflowException java.nio.DirectByteBuffer.put(DirectByteBuffer.java:363) org.trafodion.sql.HDFSClient.sequenceFileRead(HDFSClient.java:301) org.trafodion.sql.HDFSClient$HDFSRead.call(HDFSClient.java:217) java.util.concurrent.FutureTask.run(FutureTask.java:266) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) java.lang.Thread.run(Thread.java:748) [2018-08-21 15:56:00] Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/75dfb43c Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/75dfb43c Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/75dfb43c Branch: refs/heads/master Commit: 75dfb43cccb9562e20a23cd1ec38250f31759e5a Parents: 31cab90 Author: selvaganesang Authored: Fri Aug 24 19:58:47 2018 + Committer: selvaganesang Committed: Fri Aug 24 19:58:47 2018 + -- core/sql/src/main/java/org/trafodion/sql/HDFSClient.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/trafodion/blob/75dfb43c/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java -- diff --git a/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java b/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java index 6b7f051..81b9129 100644 --- a/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java +++ b/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java @@ -286,7 +286,7 @@ public class HDFSClient buf_.put(byteArray, 0, readLen); buf_.put(recDelimiter_); - lenRemain_ -= (readLen+1); + lenRemain -= (readLen+1); totalReadLen += (readLen+1); } else { // Reset the position because the row can't be copied to buffer
[1/2] trafodion git commit: [TRAFODION-3171] Refactor Hive sequence file reading to use the new implementation
Repository: trafodion Updated Branches: refs/heads/master 5c55e68f8 -> 0550932b4 [TRAFODION-3171] Refactor Hive sequence file reading to use the new implementation Added a known diff file till TRAFODION-3185 is taken care of/ Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/7fbee467 Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/7fbee467 Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/7fbee467 Branch: refs/heads/master Commit: 7fbee467beb280a62e65e96244c61458c17df3e9 Parents: 6151b59 Author: selvaganesang Authored: Tue Aug 21 17:49:16 2018 + Committer: selvaganesang Committed: Tue Aug 21 17:49:16 2018 + -- core/sql/regress/hive/DIFF006.KNOWN | 114 +++ 1 file changed, 114 insertions(+) -- http://git-wip-us.apache.org/repos/asf/trafodion/blob/7fbee467/core/sql/regress/hive/DIFF006.KNOWN -- diff --git a/core/sql/regress/hive/DIFF006.KNOWN b/core/sql/regress/hive/DIFF006.KNOWN new file mode 100644 index 000..c2f7977 --- /dev/null +++ b/core/sql/regress/hive/DIFF006.KNOWN @@ -0,0 +1,114 @@ +30a31,143 +> 3,4c3,7 +> < P_PROMO_SK P_PROMO_ID P_START_DATE_SK P_END_DATE_SK P_ITEM_SK P_COST P_RESPONSE_TARGET P_PROMO_NAME P_CHANNEL_DMAIL P_CHANNEL_EMAIL P_CHANNEL_CATALOG P_CHANNEL_TV P_CHANNEL_RADIO P_CHANNEL_PRESS P_CHANNEL_EVENT P_CHANNEL_DEMO P_CHANNEL_DETAILS P_PURPOSE P_DISCOUNT_ACTIVE +> < -- +> --- +> > *** ERROR[8447] An error occurred during hdfs access. Error Detail: SETUP_HDFS_SCAN java.io.IOException: Spliting of compressed sequence file is not supported +> > org.TRAFODION.sql.HDFSClient.initSequenceFileRead(HDFSClient.java:386) +> > org.TRAFODION.sql.HDFSClient.(HDFSClient.java:365) +> > org.TRAFODION.sql.HdfsScan.scheduleHdfsScanRange(HdfsScan.java:173) +> > org.TRAFODION.sql.HdfsScan.trafHdfsRead(HdfsScan.java:260) +> 6,106c9 +> < 1 BAAA 2450164 2450185 10022 1.000E+003 1 ought Y N N N N N N N Men will not say merely. Unknown N +> < 2 CAAA 2450118 2450150 2410 1.000E+003 1 able Y N N N N N N N So willing buildings coul Unknown N +> < 3 DAAA 2450675 2450712 10843 1.000E+003 1 pri Y N N N N N N N Companies shall not pr Unknown N +> < 4 EAAA 2450633 2450646 9794 1.000E+003 1 ese N N N N N N N N High, good shoulders can Unknown N +> < 5 FAAA 2450347 2450357 6655 1.000E+003 1 anti N N N N N N N N Huge, competent structure Unknown N +> < 6 GAAA 2450516 2450561 9464 1.000E+003 1 cally N N N N N N N N Boards might not reverse Unknown N +> < 7 HAAA 2450123 2450132 14527 1.000E+003 1 ation N N N N N N N N Effects used to prefer ho Unknown N +> < 8 IAAA 2450350 2450409 304 1.000E+003 1 eing Y N N N N N N N Offences feel only on a f Unknown N +> < 9 JAAA 2450192 2450248 3439 1.000E+003 1 n st Y N N N N N N N External forces shall com Unknown N +> < 10 KAAA 2450324 2450365 3314 1.000E+003 1 bar N N N N N N N N Only local achievements u Unknown N +> < 11 LAAA ? ? ? 1.000E+003 ? ought N N N Teachers shall not make u Unknown N +> < 12 MAAA 2450153 2450169 6688 1.000E+003 1 able N N N N N N N N Also only times would com Unknown N +> < 13 NAAA 2450316 2450365 11354 1.000E+003 1 pri N N N N N N N N Years shall not go later Unknown N +> < 14 OAAA 2450236 2450282 14480 1.000E+003 1 ese N N N N N N N N Adults would not delay ra Unknown N +> < 15 PAAA ? ? ? 1.000E+003 ? anti Y N N N N N Old elements would shake Unknown N +> < 16 ABAA 2450342 2450351 11899 1.000E+003 1 cally Y N N N N N N N Sudden, wooden theories w Unknown N +> < 17 BBAA 2450360 2450406 15529 1.000E+003 1 ation Y N N N N N N N Almost old churches ought Unknown N +> < 18 CBAA 2450581 2450592 8599 1.000E+003 1 eing Y N N N N N N N National communities use Unknown N +> < 19 DBAA 2450623 2450640 5185 1.000E+003 1 n st N N N N N N N N Young families act. Most Unknown N +> < 20 EBAA 2450531 2450545 17860 1.000E+003 1 bar Y N N N N N N N Usually common courses fi Unknown N +> < 21 FBAA 2450895 2450904 3632 1.000E+003 1 ought N N N N N N N N British tensions should n Unknown N +> < 22 GBAA 2450596 2450629 8044 1.000E+003 1 able Y N N N N N N N Powers ought to allow to Unknown N +> < 23 HBAA 2450478 2450493 4321 1.000E+003 1 pri N N N N N N N N As main privileges cannot Unknown N +> < 24 IBAA 2450719 2450773 12004 1.000E+003 1 ese Y N N N N N N N Words appear technical, c Unknown N +> < 25
[1/2] trafodion git commit: [TRAFODION-3171] Refactor Hive sequence file reading to use the new implementation
Repository: trafodion Updated Branches: refs/heads/master 5e8bfc70d -> 7fba1c662 [TRAFODION-3171] Refactor Hive sequence file reading to use the new implementation Ensured that split of non-compressed sequence file works. In case of compressed sequence filee, an exception is thrown when the file is split. Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/1c8c81d7 Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/1c8c81d7 Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/1c8c81d7 Branch: refs/heads/master Commit: 1c8c81d708af0520e61b8c6a20ff69135a6b8f5c Parents: 39d7110 Author: selvaganesang Authored: Mon Aug 13 23:24:47 2018 + Committer: selvaganesang Committed: Mon Aug 13 23:24:47 2018 + -- .../main/java/org/trafodion/sql/HDFSClient.java | 45 ++-- 1 file changed, 32 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/trafodion/blob/1c8c81d7/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java -- diff --git a/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java b/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java index 1995851..6b7f051 100644 --- a/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java +++ b/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java @@ -124,6 +124,8 @@ public class HDFSClient private Writable key_; private Writable value_; private SequenceFile.Reader reader_; + private SequenceFile.CompressionType seqCompressionType_; + static { String confFile = System.getProperty("trafodion.log4j.configFile"); System.setProperty("trafodion.root", System.getenv("TRAF_HOME")); @@ -171,7 +173,8 @@ public class HDFSClient int bytesRead; int totalBytesRead = 0; if (sequenceFile_) { -reader_.sync(pos_); +// do nothing +; } else if (compressed_) { bufArray_ = new byte[ioByteArraySizeInKB_ * 1024]; } @@ -196,11 +199,10 @@ public class HDFSClient } do { -if (compressed_) { - bytesRead = compressedFileRead(lenRemain_); -} else if (sequenceFile_) { +if (sequenceFile_) bytesRead = sequenceFileRead(lenRemain_); -} +else if (compressed_) + bytesRead = compressedFileRead(lenRemain_); else { if (buf_.hasArray()) bytesRead = fsdis_.read(pos_, buf_.array(), bufOffset_, lenRemain_); @@ -269,8 +271,8 @@ public class HDFSClient int lenRemain = readLenRemain; while (!eof && lenRemain > 0) { - tempPos = reader_.getPosition(); try { +tempPos = reader_.getPosition(); eof = reader_.next(key_, value_); } catch (java.io.EOFException e) @@ -288,7 +290,11 @@ public class HDFSClient totalReadLen += (readLen+1); } else { // Reset the position because the row can't be copied to buffer - reader_.sync(tempPos); + try { +reader_.sync(tempPos); + } + catch (java.io.EOFException e1) + {} break; } } @@ -323,8 +329,10 @@ public class HDFSClient inStream_ = inStream; sequenceFile_ = sequenceFile; recDelimiter_ = recDelimiter; - if (sequenceFile_) - fsdis_ = fs_.open(filepath_); + if (sequenceFile_) { + fsdis_ = null; + inStream_ = null; + } else { codec_ = codecFactory_.getCodec(filepath_); if (codec_ != null) { @@ -366,10 +374,21 @@ public class HDFSClient public void initSequenceFileRead() throws IOException, EOFException { - SequenceFile.Reader.Option seqPos = SequenceFile.Reader.start(pos_); SequenceFile.Reader.Option seqLen = SequenceFile.Reader.length(lenRemain_); - SequenceFile.Reader.Option seqInputStream = SequenceFile.Reader.stream(fsdis_); - reader_ = new SequenceFile.Reader(config_, seqPos, seqLen, seqInputStream); + SequenceFile.Reader.Option seqFileName = SequenceFile.Reader.file(filepath_); + reader_ = new SequenceFile.Reader(config_, seqLen, seqFileName); + seqCompressionType_ = reader_.getCompressionType(); + if (seqCompressionType_ == SequenceFile.CompressionType.NONE) + compressed_ = false; + else + compressed_ = true; + if (compressed_ && pos_ != 0) + throw new IOException("Spliting of compressed sequence file is not supported"); + try { +reader_.sync(pos_); + } catch (EOFException e)
[1/2] trafodion git commit: [TRAFODION-3171] Refactor Hive sequence file reading to use the new implementation
Repository: trafodion Updated Branches: refs/heads/master f38a1006b -> c1aa0e6f4 [TRAFODION-3171] Refactor Hive sequence file reading to use the new implementation Changes possibly fix the hive/TEST006 failure in daily build. Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/39d71103 Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/39d71103 Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/39d71103 Branch: refs/heads/master Commit: 39d71103aaed3ce10a518533f296c35201cf3bea Parents: affc9db Author: selvaganesang Authored: Wed Aug 8 23:06:58 2018 + Committer: selvaganesang Committed: Wed Aug 8 23:06:58 2018 + -- core/sql/src/main/java/org/trafodion/sql/HdfsScan.java | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/trafodion/blob/39d71103/core/sql/src/main/java/org/trafodion/sql/HdfsScan.java -- diff --git a/core/sql/src/main/java/org/trafodion/sql/HdfsScan.java b/core/sql/src/main/java/org/trafodion/sql/HdfsScan.java index 4d7b90d..7d813eb 100644 --- a/core/sql/src/main/java/org/trafodion/sql/HdfsScan.java +++ b/core/sql/src/main/java/org/trafodion/sql/HdfsScan.java @@ -177,8 +177,12 @@ public class HdfsScan } catch (EOFException e) { // Skip this range -currRange_++; -scheduleHdfsScanRange(bufNo, 0); +if (currRange_ == (hdfsScanRanges_.length-1)) +scanCompleted_ = true; +else { + currRange_++; + scheduleHdfsScanRange(bufNo, 0); +} } } }