[
https://issues.apache.org/jira/browse/HIVE-29297?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Marta Kuczora updated HIVE-29297:
---------------------------------
Description:
In HIVE-27536 the value of the "tmpPrefix" variable in the Utilities class was
changed to "-tmp." from "_tmp.". This prefix was used to create the temporary
directory for the manifest files. As a side effect of this change, if there is
an insert and the read query running at the same time on the same table, the
manifest files are read by the the read query. This leads to exception like this
{code:java}
ERROR : Failed with exception java.io.IOException:java.lang.RuntimeException:
ORC split generation failed with exception: org.apache.orc.FileFormatException:
Malformed ORC file
hdfs://ccycloud-1.kuczoram731.root.comops.site:8020/warehouse/tablespace/managed/hive/acid_test/-tmp.delta_0000002_0000002_0000/000000_0.manifest.
Invalid postscript.
java.io.IOException: java.lang.RuntimeException: ORC split generation failed
with exception: org.apache.orc.FileFormatException: Malformed ORC file
hdfs://ccycloud-1.kuczoram731.root.comops.site:8020/warehouse/tablespace/managed/hive/acid_test/-tmp.delta_0000002_0000002_0000/000000_0.manifest.
Invalid postscript.
at
org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:646)
at
org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:553)
at
org.apache.hadoop.hive.ql.exec.FetchTask.executeInner(FetchTask.java:217)
at org.apache.hadoop.hive.ql.exec.FetchTask.execute(FetchTask.java:114)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:819)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:547)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:541)
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:190)
at
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:236)
at
org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:92)
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:341)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1910)
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:361)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.RuntimeException: ORC split generation failed with
exception: org.apache.orc.FileFormatException: Malformed ORC file
hdfs://ccycloud-1.kuczoram731.root.comops.site:8020/warehouse/tablespace/managed/hive/acid_test/-tmp.delta_0000002_0000002_0000/000000_0.manifest.
Invalid postscript.
at
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:1891)
at
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getSplits(OrcInputFormat.java:1980)
at
org.apache.hadoop.hive.ql.exec.FetchOperator.generateWrappedSplits(FetchOperator.java:457)
at
org.apache.hadoop.hive.ql.exec.FetchOperator.getNextSplits(FetchOperator.java:424)
at
org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:328)
at
org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:584)
... 21 more
Caused by: java.util.concurrent.ExecutionException:
org.apache.orc.FileFormatException: Malformed ORC file
hdfs://ccycloud-1.kuczoram731.root.comops.site:8020/warehouse/tablespace/managed/hive/acid_test/-tmp.delta_0000002_0000002_0000/000000_0.manifest.
Invalid postscript.
at java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.util.concurrent.FutureTask.get(FutureTask.java:192)
at
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:1885)
... 26 more
Caused by: org.apache.orc.FileFormatException: Malformed ORC file
hdfs://ccycloud-1.kuczoram731.root.comops.site:8020/warehouse/tablespace/managed/hive/acid_test/-tmp.delta_0000002_0000002_0000/000000_0.manifest.
Invalid postscript.
at org.apache.orc.impl.ReaderImpl.ensureOrcFooter(ReaderImpl.java:464)
at org.apache.orc.impl.ReaderImpl.extractFileTail(ReaderImpl.java:812)
at org.apache.orc.impl.ReaderImpl.<init>(ReaderImpl.java:567)
at
org.apache.hadoop.hive.ql.io.orc.ReaderImpl.<init>(ReaderImpl.java:61)
at
org.apache.hadoop.hive.ql.io.orc.OrcFile.createReader(OrcFile.java:112)
at
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$SplitGenerator.populateAndCacheStripeDetails(OrcInputFormat.java:1686)
at
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$SplitGenerator.callInternal(OrcInputFormat.java:1574)
at
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$SplitGenerator.access$2900(OrcInputFormat.java:1357)
at
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$SplitGenerator$1.run(OrcInputFormat.java:1546)
at
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$SplitGenerator$1.run(OrcInputFormat.java:1543)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1910)
at
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$SplitGenerator.call(OrcInputFormat.java:1543)
at
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$SplitGenerator.call(OrcInputFormat.java:1357)
... 4 more {code}
> The directory of the direct insert manifest files should be hidden from read
> queries
> ------------------------------------------------------------------------------------
>
> Key: HIVE-29297
> URL: https://issues.apache.org/jira/browse/HIVE-29297
> Project: Hive
> Issue Type: Bug
> Affects Versions: 4.0.0
> Reporter: Marta Kuczora
> Assignee: Marta Kuczora
> Priority: Major
>
> In HIVE-27536 the value of the "tmpPrefix" variable in the Utilities class
> was changed to "-tmp." from "_tmp.". This prefix was used to create the
> temporary directory for the manifest files. As a side effect of this change,
> if there is an insert and the read query running at the same time on the same
> table, the manifest files are read by the the read query. This leads to
> exception like this
> {code:java}
> ERROR : Failed with exception java.io.IOException:java.lang.RuntimeException:
> ORC split generation failed with exception:
> org.apache.orc.FileFormatException: Malformed ORC file
> hdfs://ccycloud-1.kuczoram731.root.comops.site:8020/warehouse/tablespace/managed/hive/acid_test/-tmp.delta_0000002_0000002_0000/000000_0.manifest.
> Invalid postscript.
> java.io.IOException: java.lang.RuntimeException: ORC split generation failed
> with exception: org.apache.orc.FileFormatException: Malformed ORC file
> hdfs://ccycloud-1.kuczoram731.root.comops.site:8020/warehouse/tablespace/managed/hive/acid_test/-tmp.delta_0000002_0000002_0000/000000_0.manifest.
> Invalid postscript.
> at
> org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:646)
> at
> org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:553)
> at
> org.apache.hadoop.hive.ql.exec.FetchTask.executeInner(FetchTask.java:217)
> at org.apache.hadoop.hive.ql.exec.FetchTask.execute(FetchTask.java:114)
> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:819)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:547)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:541)
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:190)
> at
> org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:236)
> at
> org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:92)
> at
> org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:341)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1910)
> at
> org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:361)
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> Caused by: java.lang.RuntimeException: ORC split generation failed with
> exception: org.apache.orc.FileFormatException: Malformed ORC file
> hdfs://ccycloud-1.kuczoram731.root.comops.site:8020/warehouse/tablespace/managed/hive/acid_test/-tmp.delta_0000002_0000002_0000/000000_0.manifest.
> Invalid postscript.
> at
> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:1891)
> at
> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getSplits(OrcInputFormat.java:1980)
> at
> org.apache.hadoop.hive.ql.exec.FetchOperator.generateWrappedSplits(FetchOperator.java:457)
> at
> org.apache.hadoop.hive.ql.exec.FetchOperator.getNextSplits(FetchOperator.java:424)
> at
> org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:328)
> at
> org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:584)
> ... 21 more
> Caused by: java.util.concurrent.ExecutionException:
> org.apache.orc.FileFormatException: Malformed ORC file
> hdfs://ccycloud-1.kuczoram731.root.comops.site:8020/warehouse/tablespace/managed/hive/acid_test/-tmp.delta_0000002_0000002_0000/000000_0.manifest.
> Invalid postscript.
> at java.util.concurrent.FutureTask.report(FutureTask.java:122)
> at java.util.concurrent.FutureTask.get(FutureTask.java:192)
> at
> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:1885)
> ... 26 more
> Caused by: org.apache.orc.FileFormatException: Malformed ORC file
> hdfs://ccycloud-1.kuczoram731.root.comops.site:8020/warehouse/tablespace/managed/hive/acid_test/-tmp.delta_0000002_0000002_0000/000000_0.manifest.
> Invalid postscript.
> at org.apache.orc.impl.ReaderImpl.ensureOrcFooter(ReaderImpl.java:464)
> at org.apache.orc.impl.ReaderImpl.extractFileTail(ReaderImpl.java:812)
> at org.apache.orc.impl.ReaderImpl.<init>(ReaderImpl.java:567)
> at
> org.apache.hadoop.hive.ql.io.orc.ReaderImpl.<init>(ReaderImpl.java:61)
> at
> org.apache.hadoop.hive.ql.io.orc.OrcFile.createReader(OrcFile.java:112)
> at
> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$SplitGenerator.populateAndCacheStripeDetails(OrcInputFormat.java:1686)
> at
> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$SplitGenerator.callInternal(OrcInputFormat.java:1574)
> at
> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$SplitGenerator.access$2900(OrcInputFormat.java:1357)
> at
> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$SplitGenerator$1.run(OrcInputFormat.java:1546)
> at
> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$SplitGenerator$1.run(OrcInputFormat.java:1543)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1910)
> at
> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$SplitGenerator.call(OrcInputFormat.java:1543)
> at
> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$SplitGenerator.call(OrcInputFormat.java:1357)
> ... 4 more {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)