[
https://issues.apache.org/jira/browse/HIVE-17505?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Eugene Koifman updated HIVE-17505:
----------------------------------
Description:
add this to TestTxnNoBuckets (not related to Acid - just a repro)
{noformat}
@Test
public void testToAcidConversionMultiBucket() throws Exception {
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE, true);
hiveConf.setVar(HiveConf.ConfVars.HIVEFETCHTASKCONVERSION, "none");
d.close();
d = new Driver(hiveConf);
int[][] values = {{1,2},{3,4},{5,6},{7,8},{9,10}};
runStatementOnDriver("insert into " + Table.ACIDTBL +
makeValuesClause(values));
runStatementOnDriver("drop table if exists T");
runStatementOnDriver("create table T (a int, b int) stored as ORC
TBLPROPERTIES ('transactional'='false')");//todo: try with T bucketd
// runStatementOnDriver("insert into T select a,b from (" + "select a, b
from " + Table.ACIDTBL + " where a <= 5 union all select a, b from " +
Table.ACIDTBL + " where a >= 5" + ") S order by a, b");
runStatementOnDriver("insert into T(a,b) select a, b from " + Table.ACIDTBL
+ " where a between 1 and 3 group by a, b union all select a, b from " +
Table.ACIDTBL + " where a between 5 and 7 union all select a, b from " +
Table.ACIDTBL + " where a >= 9");
List<String> rs = runStatementOnDriver("select a, b, INPUT__FILE__NAME from
T order by a, b, INPUT__FILE__NAME");
LOG.warn("before converting to acid");
for(String s : rs) {
LOG.warn(s);
}
{noformat}
this creates
{noformat}
ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree
~/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/
/Users/ekoifman/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/
├── -ext-10002
│ ├── 19
│ │ └── 000000_0
│ ├── 20
│ │ └── 000000_0
│ └── 21
│ └── 000000_0
└── _tmp.-ext-10000
5 directories, 3 files
{noformat}
but
_Hive.copyFiles(HiveConf conf, Path srcf, Path destf, FileSystem fs, boolean
isSrcLocal, boolean isAcid, List<Path> newFiles)_
bails out at
{noformat}
if (srcs == null) {
LOG.info("No sources specified to move: " + srcf);
return;
// srcs = new FileStatus[0]; Why is this needed?
}
{noformat}
and so the table T ends up empty. (because srcs is
file:/Users/ekoifman/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505156503971/warehouse/t/.hive-staging_hive_2017-09-11_12-02-47_021_1458754468823875082-1/-ext-10000
(not -ext-10002))
{noformat}
ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ ./bin/hive --orcfiledump -d
-j
~/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in
[jar:file:/Users/ekoifman/dev/hiverwcommit/packaging/target/apache-hive-3.0.0-SNAPSHOT-bin/apache-hive-3.0.0-SNAPSHOT-bin/lib/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in
[jar:file:/Users/ekoifman/dev/hwxhadoop/hadoop-dist/target/hadoop-2.7.3.2.6.0.0-SNAPSHOT/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Processing data file
file:/Users/ekoifman/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/-ext-10002/19/000000_0
[length: 242]
{"a":1,"b":2}
{"a":3,"b":4}
________________________________________________________________________________________________________________________
Processing data file
file:/Users/ekoifman/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/-ext-10002/20/000000_0
[length: 243]
{"a":7,"b":8}
{"a":5,"b":6}
________________________________________________________________________________________________________________________
Processing data file
file:/Users/ekoifman/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/-ext-10002/21/000000_0
[length: 242]
{"a":9,"b":10}
________________________________________________________________________________________________________________________
{noformat}
cc [~ashutoshc], [~jcamachorodriguez]
was:
add this to TestTxnNoBuckets (not related to Acid - just a repro)
{noformat}
@Test
public void testToAcidConversionMultiBucket() throws Exception {
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE, true);
hiveConf.setVar(HiveConf.ConfVars.HIVEFETCHTASKCONVERSION, "none");
d.close();
d = new Driver(hiveConf);
int[][] values = {{1,2},{3,4},{5,6},{7,8},{9,10}};
runStatementOnDriver("insert into " + Table.ACIDTBL +
makeValuesClause(values));
runStatementOnDriver("drop table if exists T");
runStatementOnDriver("create table T (a int, b int) stored as ORC
TBLPROPERTIES ('transactional'='false')");//todo: try with T bucketd
// runStatementOnDriver("insert into T select a,b from (" + "select a, b
from " + Table.ACIDTBL + " where a <= 5 union all select a, b from " +
Table.ACIDTBL + " where a >= 5" + ") S order by a, b");
runStatementOnDriver("insert into T(a,b) select a, b from " + Table.ACIDTBL
+ " where a between 1 and 3 group by a, b union all select a, b from " +
Table.ACIDTBL + " where a between 5 and 7 union all select a, b from " +
Table.ACIDTBL + " where a >= 9");
List<String> rs = runStatementOnDriver("select a, b, INPUT__FILE__NAME from
T order by a, b, INPUT__FILE__NAME");
LOG.warn("before converting to acid");
for(String s : rs) {
LOG.warn(s);
}
{noformat}
this creates
{noformat}
ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree
~/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/
/Users/ekoifman/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/
├── -ext-10002
│ ├── 19
│ │ └── 000000_0
│ ├── 20
│ │ └── 000000_0
│ └── 21
│ └── 000000_0
└── _tmp.-ext-10000
5 directories, 3 files
{noformat}
but
_Hive.copyFiles(HiveConf conf, Path srcf, Path destf, FileSystem fs, boolean
isSrcLocal, boolean isAcid, List<Path> newFiles)_
bails out at
{noformat}
if (srcs == null) {
LOG.info("No sources specified to move: " + srcf);
return;
// srcs = new FileStatus[0]; Why is this needed?
}
{noformat}
and so the table T ends up empty.
{noformat}
ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ ./bin/hive --orcfiledump -d
-j
~/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in
[jar:file:/Users/ekoifman/dev/hiverwcommit/packaging/target/apache-hive-3.0.0-SNAPSHOT-bin/apache-hive-3.0.0-SNAPSHOT-bin/lib/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in
[jar:file:/Users/ekoifman/dev/hwxhadoop/hadoop-dist/target/hadoop-2.7.3.2.6.0.0-SNAPSHOT/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Processing data file
file:/Users/ekoifman/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/-ext-10002/19/000000_0
[length: 242]
{"a":1,"b":2}
{"a":3,"b":4}
________________________________________________________________________________________________________________________
Processing data file
file:/Users/ekoifman/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/-ext-10002/20/000000_0
[length: 243]
{"a":7,"b":8}
{"a":5,"b":6}
________________________________________________________________________________________________________________________
Processing data file
file:/Users/ekoifman/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/-ext-10002/21/000000_0
[length: 242]
{"a":9,"b":10}
________________________________________________________________________________________________________________________
{noformat}
cc [~ashutoshc], [~jcamachorodriguez]
> hive.optimize.union.remove=true doesn't work with insert into
> -------------------------------------------------------------
>
> Key: HIVE-17505
> URL: https://issues.apache.org/jira/browse/HIVE-17505
> Project: Hive
> Issue Type: Bug
> Reporter: Eugene Koifman
>
> add this to TestTxnNoBuckets (not related to Acid - just a repro)
> {noformat}
> @Test
> public void testToAcidConversionMultiBucket() throws Exception {
> hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE, true);
> hiveConf.setVar(HiveConf.ConfVars.HIVEFETCHTASKCONVERSION, "none");
> d.close();
> d = new Driver(hiveConf);
> int[][] values = {{1,2},{3,4},{5,6},{7,8},{9,10}};
> runStatementOnDriver("insert into " + Table.ACIDTBL +
> makeValuesClause(values));
> runStatementOnDriver("drop table if exists T");
> runStatementOnDriver("create table T (a int, b int) stored as ORC
> TBLPROPERTIES ('transactional'='false')");//todo: try with T bucketd
> // runStatementOnDriver("insert into T select a,b from (" + "select a, b
> from " + Table.ACIDTBL + " where a <= 5 union all select a, b from " +
> Table.ACIDTBL + " where a >= 5" + ") S order by a, b");
> runStatementOnDriver("insert into T(a,b) select a, b from " +
> Table.ACIDTBL + " where a between 1 and 3 group by a, b union all select a, b
> from " + Table.ACIDTBL + " where a between 5 and 7 union all select a, b from
> " + Table.ACIDTBL + " where a >= 9");
> List<String> rs = runStatementOnDriver("select a, b, INPUT__FILE__NAME
> from T order by a, b, INPUT__FILE__NAME");
> LOG.warn("before converting to acid");
> for(String s : rs) {
> LOG.warn(s);
> }
> {noformat}
> this creates
> {noformat}
> ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree
> ~/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/
> /Users/ekoifman/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/
> ├── -ext-10002
> │ ├── 19
> │ │ └── 000000_0
> │ ├── 20
> │ │ └── 000000_0
> │ └── 21
> │ └── 000000_0
> └── _tmp.-ext-10000
> 5 directories, 3 files
> {noformat}
> but
> _Hive.copyFiles(HiveConf conf, Path srcf, Path destf, FileSystem fs, boolean
> isSrcLocal, boolean isAcid, List<Path> newFiles)_
> bails out at
> {noformat}
> if (srcs == null) {
> LOG.info("No sources specified to move: " + srcf);
> return;
> // srcs = new FileStatus[0]; Why is this needed?
> }
> {noformat}
> and so the table T ends up empty. (because srcs is
> file:/Users/ekoifman/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505156503971/warehouse/t/.hive-staging_hive_2017-09-11_12-02-47_021_1458754468823875082-1/-ext-10000
> (not -ext-10002))
> {noformat}
> ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ ./bin/hive --orcfiledump -d
> -j
> ~/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/
> SLF4J: Class path contains multiple SLF4J bindings.
> SLF4J: Found binding in
> [jar:file:/Users/ekoifman/dev/hiverwcommit/packaging/target/apache-hive-3.0.0-SNAPSHOT-bin/apache-hive-3.0.0-SNAPSHOT-bin/lib/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
> SLF4J: Found binding in
> [jar:file:/Users/ekoifman/dev/hwxhadoop/hadoop-dist/target/hadoop-2.7.3.2.6.0.0-SNAPSHOT/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
> SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an
> explanation.
> SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
> Processing data file
> file:/Users/ekoifman/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/-ext-10002/19/000000_0
> [length: 242]
> {"a":1,"b":2}
> {"a":3,"b":4}
> ________________________________________________________________________________________________________________________
> Processing data file
> file:/Users/ekoifman/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/-ext-10002/20/000000_0
> [length: 243]
> {"a":7,"b":8}
> {"a":5,"b":6}
> ________________________________________________________________________________________________________________________
> Processing data file
> file:/Users/ekoifman/dev/hiverwgit/ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1505153866252/warehouse/t/.hive-staging_hive_2017-09-11_11-18-48_614_1924461543400304640-1/-ext-10002/21/000000_0
> [length: 242]
> {"a":9,"b":10}
> ________________________________________________________________________________________________________________________
> {noformat}
> cc [~ashutoshc], [~jcamachorodriguez]
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)