HIVE-19311 : Partition and bucketing support for âload dataâ statement (Deepak Jaiswal, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/46c5580b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/46c5580b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/46c5580b Branch: refs/heads/master Commit: 46c5580b735f9b3b4f3e482d8f7a524dd88d4771 Parents: e8651cb Author: Deepak Jaiswal <djais...@apache.org> Authored: Wed May 2 01:27:02 2018 -0700 Committer: Deepak Jaiswal <djais...@apache.org> Committed: Wed May 2 01:27:02 2018 -0700 ---------------------------------------------------------------------- data/files/load_data_job/bucketing.txt | 118 + .../load_data_job/load_data_1_partition.txt | 118 + .../partitions/load_data_1_partition.txt | 118 + .../partitions/load_data_2_partitions.txt | 118 + .../partitions/subdir/load_data_1_partition.txt | 118 + .../subdir/load_data_2_partitions.txt | 118 + .../test/resources/testconfiguration.properties | 1 + .../java/org/apache/hadoop/hive/ql/Context.java | 12 +- .../org/apache/hadoop/hive/ql/ErrorMsg.java | 2 + .../apache/hadoop/hive/ql/metadata/Table.java | 6 +- .../hive/ql/parse/LoadSemanticAnalyzer.java | 172 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 43 +- .../ql/parse/UpdateDeleteSemanticAnalyzer.java | 43 +- .../apache/hadoop/hive/ql/TestTxnLoadData.java | 5 +- .../queries/clientnegative/load_part_nospec.q | 2 - .../test/queries/clientnegative/nopart_load.q | 5 - .../clientpositive/load_data_using_job.q | 83 + .../clientnegative/load_part_nospec.q.out | 9 - .../results/clientnegative/nopart_load.q.out | 9 - .../llap/load_data_using_job.q.out | 2769 ++++++++++++++++++ 20 files changed, 3747 insertions(+), 122 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/data/files/load_data_job/bucketing.txt ---------------------------------------------------------------------- diff --git a/data/files/load_data_job/bucketing.txt b/data/files/load_data_job/bucketing.txt new file mode 100644 index 0000000..8334415 --- /dev/null +++ b/data/files/load_data_job/bucketing.txt @@ -0,0 +1,118 @@ +165val_165 +484val_484 +150val_150 +224val_224 +66val_66 +213val_213 +374val_374 +495val_495 +37val_37 +327val_327 +15val_15 +338val_338 +459val_459 +466val_466 +396val_396 +309val_309 +367val_367 +0val_0 +455val_455 +316val_316 +345val_345 +129val_129 +378val_378 +4val_4 +356val_356 +169val_169 +125val_125 +437val_437 +286val_286 +187val_187 +176val_176 +459val_459 +51val_51 +103val_103 +239val_239 +213val_213 +176val_176 +275val_275 +260val_260 +404val_404 +217val_217 +84val_84 +466val_466 +8val_8 +411val_411 +172val_172 +129val_129 +158val_158 +0val_0 +26val_26 +165val_165 +327val_327 +51val_51 +404val_404 +95val_95 +282val_282 +187val_187 +316val_316 +169val_169 +77val_77 +0val_0 +118val_118 +282val_282 +419val_419 +15val_15 +118val_118 +19val_19 +224val_224 +309val_309 +389val_389 +327val_327 +242val_242 +392val_392 +242val_242 +396val_396 +95val_95 +11val_11 +143val_143 +228val_228 +33val_33 +103val_103 +367val_367 +239val_239 +480val_480 +202val_202 +316val_316 +235val_235 +80val_80 +44val_44 +466val_466 +257val_257 +190val_190 +114val_114 +396val_396 +217val_217 +125val_125 +187val_187 +480val_480 +491val_491 +305val_305 +444val_444 +169val_169 +323val_323 +480val_480 +136val_136 +172val_172 +462val_462 +26val_26 +462val_462 +341val_341 +183val_183 +84val_84 +37val_37 +448val_448 +194val_194 +477val_477 +169val_169 +400val_400 http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/data/files/load_data_job/load_data_1_partition.txt ---------------------------------------------------------------------- diff --git a/data/files/load_data_job/load_data_1_partition.txt b/data/files/load_data_job/load_data_1_partition.txt new file mode 100644 index 0000000..9f34604 --- /dev/null +++ b/data/files/load_data_job/load_data_1_partition.txt @@ -0,0 +1,118 @@ +165val_1652008-04-08 +484val_4842008-04-08 +150val_1502008-04-08 +224val_2242008-04-08 +66val_662008-04-08 +213val_2132008-04-08 +374val_3742008-04-08 +495val_4952008-04-08 +37val_372008-04-08 +327val_3272008-04-08 +15val_152008-04-08 +338val_3382008-04-08 +459val_4592008-04-08 +466val_4662008-04-08 +396val_3962008-04-08 +309val_3092008-04-08 +367val_3672008-04-08 +0val_02008-04-08 +455val_4552008-04-08 +316val_3162008-04-08 +345val_3452008-04-08 +129val_1292008-04-08 +378val_3782008-04-08 +4val_42008-04-08 +356val_3562008-04-08 +169val_1692008-04-08 +125val_1252008-04-08 +437val_4372008-04-08 +286val_2862008-04-08 +187val_1872008-04-08 +176val_1762008-04-08 +459val_4592008-04-08 +51val_512008-04-08 +103val_1032008-04-08 +239val_2392008-04-08 +213val_2132008-04-08 +176val_1762008-04-08 +275val_2752008-04-08 +260val_2602008-04-08 +404val_4042008-04-08 +217val_2172008-04-08 +84val_842008-04-08 +466val_4662008-04-08 +8val_82008-04-08 +411val_4112008-04-08 +172val_1722008-04-08 +129val_1292008-04-08 +158val_1582008-04-08 +0val_02008-04-08 +26val_262008-04-08 +165val_1652008-04-08 +327val_3272008-04-08 +51val_512008-04-08 +404val_4042008-04-08 +95val_952008-04-08 +282val_2822008-04-08 +187val_1872008-04-08 +316val_3162008-04-08 +169val_1692008-04-08 +77val_772008-04-08 +0val_02008-04-08 +118val_1182008-04-08 +282val_2822008-04-08 +419val_4192008-04-08 +15val_152008-04-08 +118val_1182008-04-08 +19val_192008-04-08 +224val_2242008-04-08 +309val_3092008-04-08 +389val_3892008-04-08 +327val_3272008-04-08 +242val_2422008-04-08 +392val_3922008-04-08 +242val_2422008-04-08 +396val_3962008-04-08 +95val_952008-04-08 +11val_112008-04-08 +143val_1432008-04-08 +228val_2282008-04-08 +33val_332008-04-08 +103val_1032008-04-08 +367val_3672008-04-08 +239val_2392008-04-08 +480val_4802008-04-08 +202val_2022008-04-08 +316val_3162008-04-08 +235val_2352008-04-08 +80val_802008-04-08 +44val_442008-04-08 +466val_4662008-04-08 +257val_2572008-04-08 +190val_1902008-04-08 +114val_1142008-04-08 +396val_3962008-04-08 +217val_2172008-04-08 +125val_1252008-04-08 +187val_1872008-04-08 +480val_4802008-04-08 +491val_4912008-04-08 +305val_3052008-04-08 +444val_4442008-04-08 +169val_1692008-04-08 +323val_3232008-04-08 +480val_4802008-04-08 +136val_1362008-04-08 +172val_1722008-04-08 +462val_4622008-04-08 +26val_262008-04-08 +462val_4622008-04-08 +341val_3412008-04-08 +183val_1832008-04-08 +84val_842008-04-08 +37val_372008-04-08 +448val_4482008-04-08 +194val_1942008-04-08 +477val_4772008-04-08 +169val_1692008-04-08 +400val_4002008-04-08 http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/data/files/load_data_job/partitions/load_data_1_partition.txt ---------------------------------------------------------------------- diff --git a/data/files/load_data_job/partitions/load_data_1_partition.txt b/data/files/load_data_job/partitions/load_data_1_partition.txt new file mode 100644 index 0000000..50ea6d5 --- /dev/null +++ b/data/files/load_data_job/partitions/load_data_1_partition.txt @@ -0,0 +1,118 @@ +165val_1652008-04-081 +484val_4842008-04-080 +150val_1502008-04-081 +224val_2242008-04-080 +66val_662008-04-081 +213val_2132008-04-080 +374val_3742008-04-081 +495val_4952008-04-080 +37val_372008-04-081 +327val_3272008-04-080 +15val_152008-04-081 +338val_3382008-04-080 +459val_4592008-04-081 +466val_4662008-04-080 +396val_3962008-04-081 +309val_3092008-04-080 +367val_3672008-04-081 +0val_02008-04-080 +455val_4552008-04-081 +316val_3162008-04-080 +345val_3452008-04-081 +129val_1292008-04-080 +378val_3782008-04-081 +4val_42008-04-080 +356val_3562008-04-081 +169val_1692008-04-080 +125val_1252008-04-081 +437val_4372008-04-080 +286val_2862008-04-081 +187val_1872008-04-080 +176val_1762008-04-081 +459val_4592008-04-080 +51val_512008-04-081 +103val_1032008-04-080 +239val_2392008-04-081 +213val_2132008-04-080 +176val_1762008-04-081 +275val_2752008-04-080 +260val_2602008-04-081 +404val_4042008-04-080 +217val_2172008-04-081 +84val_842008-04-080 +466val_4662008-04-081 +8val_82008-04-080 +411val_4112008-04-081 +172val_1722008-04-080 +129val_1292008-04-081 +158val_1582008-04-080 +0val_02008-04-081 +26val_262008-04-080 +165val_1652008-04-081 +327val_3272008-04-080 +51val_512008-04-081 +404val_4042008-04-080 +95val_952008-04-081 +282val_2822008-04-080 +187val_1872008-04-081 +316val_3162008-04-080 +169val_1692008-04-081 +77val_772008-04-080 +0val_02008-04-081 +118val_1182008-04-080 +282val_2822008-04-081 +419val_4192008-04-080 +15val_152008-04-081 +118val_1182008-04-080 +19val_192008-04-081 +224val_2242008-04-080 +309val_3092008-04-081 +389val_3892008-04-080 +327val_3272008-04-081 +242val_2422008-04-080 +392val_3922008-04-081 +242val_2422008-04-080 +396val_3962008-04-081 +95val_952008-04-080 +11val_112008-04-081 +143val_1432008-04-080 +228val_2282008-04-081 +33val_332008-04-080 +103val_1032008-04-081 +367val_3672008-04-080 +239val_2392008-04-081 +480val_4802008-04-080 +202val_2022008-04-081 +316val_3162008-04-080 +235val_2352008-04-081 +80val_802008-04-080 +44val_442008-04-081 +466val_4662008-04-080 +257val_2572008-04-081 +190val_1902008-04-080 +114val_1142008-04-081 +396val_3962008-04-080 +217val_2172008-04-081 +125val_1252008-04-080 +187val_1872008-04-081 +480val_4802008-04-080 +491val_4912008-04-081 +305val_3052008-04-080 +444val_4442008-04-081 +169val_1692008-04-080 +323val_3232008-04-081 +480val_4802008-04-080 +136val_1362008-04-081 +172val_1722008-04-080 +462val_4622008-04-081 +26val_262008-04-080 +462val_4622008-04-081 +341val_3412008-04-080 +183val_1832008-04-081 +84val_842008-04-080 +37val_372008-04-081 +448val_4482008-04-080 +194val_1942008-04-081 +477val_4772008-04-080 +169val_1692008-04-081 +400val_4002008-04-080 http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/data/files/load_data_job/partitions/load_data_2_partitions.txt ---------------------------------------------------------------------- diff --git a/data/files/load_data_job/partitions/load_data_2_partitions.txt b/data/files/load_data_job/partitions/load_data_2_partitions.txt new file mode 100644 index 0000000..50ea6d5 --- /dev/null +++ b/data/files/load_data_job/partitions/load_data_2_partitions.txt @@ -0,0 +1,118 @@ +165val_1652008-04-081 +484val_4842008-04-080 +150val_1502008-04-081 +224val_2242008-04-080 +66val_662008-04-081 +213val_2132008-04-080 +374val_3742008-04-081 +495val_4952008-04-080 +37val_372008-04-081 +327val_3272008-04-080 +15val_152008-04-081 +338val_3382008-04-080 +459val_4592008-04-081 +466val_4662008-04-080 +396val_3962008-04-081 +309val_3092008-04-080 +367val_3672008-04-081 +0val_02008-04-080 +455val_4552008-04-081 +316val_3162008-04-080 +345val_3452008-04-081 +129val_1292008-04-080 +378val_3782008-04-081 +4val_42008-04-080 +356val_3562008-04-081 +169val_1692008-04-080 +125val_1252008-04-081 +437val_4372008-04-080 +286val_2862008-04-081 +187val_1872008-04-080 +176val_1762008-04-081 +459val_4592008-04-080 +51val_512008-04-081 +103val_1032008-04-080 +239val_2392008-04-081 +213val_2132008-04-080 +176val_1762008-04-081 +275val_2752008-04-080 +260val_2602008-04-081 +404val_4042008-04-080 +217val_2172008-04-081 +84val_842008-04-080 +466val_4662008-04-081 +8val_82008-04-080 +411val_4112008-04-081 +172val_1722008-04-080 +129val_1292008-04-081 +158val_1582008-04-080 +0val_02008-04-081 +26val_262008-04-080 +165val_1652008-04-081 +327val_3272008-04-080 +51val_512008-04-081 +404val_4042008-04-080 +95val_952008-04-081 +282val_2822008-04-080 +187val_1872008-04-081 +316val_3162008-04-080 +169val_1692008-04-081 +77val_772008-04-080 +0val_02008-04-081 +118val_1182008-04-080 +282val_2822008-04-081 +419val_4192008-04-080 +15val_152008-04-081 +118val_1182008-04-080 +19val_192008-04-081 +224val_2242008-04-080 +309val_3092008-04-081 +389val_3892008-04-080 +327val_3272008-04-081 +242val_2422008-04-080 +392val_3922008-04-081 +242val_2422008-04-080 +396val_3962008-04-081 +95val_952008-04-080 +11val_112008-04-081 +143val_1432008-04-080 +228val_2282008-04-081 +33val_332008-04-080 +103val_1032008-04-081 +367val_3672008-04-080 +239val_2392008-04-081 +480val_4802008-04-080 +202val_2022008-04-081 +316val_3162008-04-080 +235val_2352008-04-081 +80val_802008-04-080 +44val_442008-04-081 +466val_4662008-04-080 +257val_2572008-04-081 +190val_1902008-04-080 +114val_1142008-04-081 +396val_3962008-04-080 +217val_2172008-04-081 +125val_1252008-04-080 +187val_1872008-04-081 +480val_4802008-04-080 +491val_4912008-04-081 +305val_3052008-04-080 +444val_4442008-04-081 +169val_1692008-04-080 +323val_3232008-04-081 +480val_4802008-04-080 +136val_1362008-04-081 +172val_1722008-04-080 +462val_4622008-04-081 +26val_262008-04-080 +462val_4622008-04-081 +341val_3412008-04-080 +183val_1832008-04-081 +84val_842008-04-080 +37val_372008-04-081 +448val_4482008-04-080 +194val_1942008-04-081 +477val_4772008-04-080 +169val_1692008-04-081 +400val_4002008-04-080 http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/data/files/load_data_job/partitions/subdir/load_data_1_partition.txt ---------------------------------------------------------------------- diff --git a/data/files/load_data_job/partitions/subdir/load_data_1_partition.txt b/data/files/load_data_job/partitions/subdir/load_data_1_partition.txt new file mode 100644 index 0000000..50ea6d5 --- /dev/null +++ b/data/files/load_data_job/partitions/subdir/load_data_1_partition.txt @@ -0,0 +1,118 @@ +165val_1652008-04-081 +484val_4842008-04-080 +150val_1502008-04-081 +224val_2242008-04-080 +66val_662008-04-081 +213val_2132008-04-080 +374val_3742008-04-081 +495val_4952008-04-080 +37val_372008-04-081 +327val_3272008-04-080 +15val_152008-04-081 +338val_3382008-04-080 +459val_4592008-04-081 +466val_4662008-04-080 +396val_3962008-04-081 +309val_3092008-04-080 +367val_3672008-04-081 +0val_02008-04-080 +455val_4552008-04-081 +316val_3162008-04-080 +345val_3452008-04-081 +129val_1292008-04-080 +378val_3782008-04-081 +4val_42008-04-080 +356val_3562008-04-081 +169val_1692008-04-080 +125val_1252008-04-081 +437val_4372008-04-080 +286val_2862008-04-081 +187val_1872008-04-080 +176val_1762008-04-081 +459val_4592008-04-080 +51val_512008-04-081 +103val_1032008-04-080 +239val_2392008-04-081 +213val_2132008-04-080 +176val_1762008-04-081 +275val_2752008-04-080 +260val_2602008-04-081 +404val_4042008-04-080 +217val_2172008-04-081 +84val_842008-04-080 +466val_4662008-04-081 +8val_82008-04-080 +411val_4112008-04-081 +172val_1722008-04-080 +129val_1292008-04-081 +158val_1582008-04-080 +0val_02008-04-081 +26val_262008-04-080 +165val_1652008-04-081 +327val_3272008-04-080 +51val_512008-04-081 +404val_4042008-04-080 +95val_952008-04-081 +282val_2822008-04-080 +187val_1872008-04-081 +316val_3162008-04-080 +169val_1692008-04-081 +77val_772008-04-080 +0val_02008-04-081 +118val_1182008-04-080 +282val_2822008-04-081 +419val_4192008-04-080 +15val_152008-04-081 +118val_1182008-04-080 +19val_192008-04-081 +224val_2242008-04-080 +309val_3092008-04-081 +389val_3892008-04-080 +327val_3272008-04-081 +242val_2422008-04-080 +392val_3922008-04-081 +242val_2422008-04-080 +396val_3962008-04-081 +95val_952008-04-080 +11val_112008-04-081 +143val_1432008-04-080 +228val_2282008-04-081 +33val_332008-04-080 +103val_1032008-04-081 +367val_3672008-04-080 +239val_2392008-04-081 +480val_4802008-04-080 +202val_2022008-04-081 +316val_3162008-04-080 +235val_2352008-04-081 +80val_802008-04-080 +44val_442008-04-081 +466val_4662008-04-080 +257val_2572008-04-081 +190val_1902008-04-080 +114val_1142008-04-081 +396val_3962008-04-080 +217val_2172008-04-081 +125val_1252008-04-080 +187val_1872008-04-081 +480val_4802008-04-080 +491val_4912008-04-081 +305val_3052008-04-080 +444val_4442008-04-081 +169val_1692008-04-080 +323val_3232008-04-081 +480val_4802008-04-080 +136val_1362008-04-081 +172val_1722008-04-080 +462val_4622008-04-081 +26val_262008-04-080 +462val_4622008-04-081 +341val_3412008-04-080 +183val_1832008-04-081 +84val_842008-04-080 +37val_372008-04-081 +448val_4482008-04-080 +194val_1942008-04-081 +477val_4772008-04-080 +169val_1692008-04-081 +400val_4002008-04-080 http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/data/files/load_data_job/partitions/subdir/load_data_2_partitions.txt ---------------------------------------------------------------------- diff --git a/data/files/load_data_job/partitions/subdir/load_data_2_partitions.txt b/data/files/load_data_job/partitions/subdir/load_data_2_partitions.txt new file mode 100644 index 0000000..50ea6d5 --- /dev/null +++ b/data/files/load_data_job/partitions/subdir/load_data_2_partitions.txt @@ -0,0 +1,118 @@ +165val_1652008-04-081 +484val_4842008-04-080 +150val_1502008-04-081 +224val_2242008-04-080 +66val_662008-04-081 +213val_2132008-04-080 +374val_3742008-04-081 +495val_4952008-04-080 +37val_372008-04-081 +327val_3272008-04-080 +15val_152008-04-081 +338val_3382008-04-080 +459val_4592008-04-081 +466val_4662008-04-080 +396val_3962008-04-081 +309val_3092008-04-080 +367val_3672008-04-081 +0val_02008-04-080 +455val_4552008-04-081 +316val_3162008-04-080 +345val_3452008-04-081 +129val_1292008-04-080 +378val_3782008-04-081 +4val_42008-04-080 +356val_3562008-04-081 +169val_1692008-04-080 +125val_1252008-04-081 +437val_4372008-04-080 +286val_2862008-04-081 +187val_1872008-04-080 +176val_1762008-04-081 +459val_4592008-04-080 +51val_512008-04-081 +103val_1032008-04-080 +239val_2392008-04-081 +213val_2132008-04-080 +176val_1762008-04-081 +275val_2752008-04-080 +260val_2602008-04-081 +404val_4042008-04-080 +217val_2172008-04-081 +84val_842008-04-080 +466val_4662008-04-081 +8val_82008-04-080 +411val_4112008-04-081 +172val_1722008-04-080 +129val_1292008-04-081 +158val_1582008-04-080 +0val_02008-04-081 +26val_262008-04-080 +165val_1652008-04-081 +327val_3272008-04-080 +51val_512008-04-081 +404val_4042008-04-080 +95val_952008-04-081 +282val_2822008-04-080 +187val_1872008-04-081 +316val_3162008-04-080 +169val_1692008-04-081 +77val_772008-04-080 +0val_02008-04-081 +118val_1182008-04-080 +282val_2822008-04-081 +419val_4192008-04-080 +15val_152008-04-081 +118val_1182008-04-080 +19val_192008-04-081 +224val_2242008-04-080 +309val_3092008-04-081 +389val_3892008-04-080 +327val_3272008-04-081 +242val_2422008-04-080 +392val_3922008-04-081 +242val_2422008-04-080 +396val_3962008-04-081 +95val_952008-04-080 +11val_112008-04-081 +143val_1432008-04-080 +228val_2282008-04-081 +33val_332008-04-080 +103val_1032008-04-081 +367val_3672008-04-080 +239val_2392008-04-081 +480val_4802008-04-080 +202val_2022008-04-081 +316val_3162008-04-080 +235val_2352008-04-081 +80val_802008-04-080 +44val_442008-04-081 +466val_4662008-04-080 +257val_2572008-04-081 +190val_1902008-04-080 +114val_1142008-04-081 +396val_3962008-04-080 +217val_2172008-04-081 +125val_1252008-04-080 +187val_1872008-04-081 +480val_4802008-04-080 +491val_4912008-04-081 +305val_3052008-04-080 +444val_4442008-04-081 +169val_1692008-04-080 +323val_3232008-04-081 +480val_4802008-04-080 +136val_1362008-04-081 +172val_1722008-04-080 +462val_4622008-04-081 +26val_262008-04-080 +462val_4622008-04-081 +341val_3412008-04-080 +183val_1832008-04-081 +84val_842008-04-080 +37val_372008-04-081 +448val_4482008-04-080 +194val_1942008-04-081 +477val_4772008-04-080 +169val_1692008-04-081 +400val_4002008-04-080 http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 0e4e706..6b22afc 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -547,6 +547,7 @@ minillaplocal.query.files=\ llap_smb.q,\ llap_vector_nohybridgrace.q,\ load_data_acid_rename.q,\ + load_data_using_job.q,\ load_dyn_part5.q,\ lvj_mapjoin.q,\ materialized_view_create.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/ql/src/java/org/apache/hadoop/hive/ql/Context.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index 0fedf0e..1921ea7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -162,6 +162,9 @@ public class Context { private StatsSource statsSource; private int executionIndex; + // Load data rewrite + private Table tempTableForLoad; + public void setOperation(Operation operation) { this.operation = operation; } @@ -516,7 +519,6 @@ public class Context { * - If path is on HDFS, then create a staging directory inside the path * * @param path Path used to verify the Filesystem to use for temporary directory - * @param isFinalJob true if the required {@link Path} will be used for the final job (e.g. the final FSOP) * * @return A path to the new temporary directory */ @@ -1071,4 +1073,12 @@ public class Context { public void setConf(HiveConf conf) { this.conf = conf; } + + public Table getTempTableForLoad() { + return tempTableForLoad; + } + + public void setTempTableForLoad(Table tempTableForLoad) { + this.tempTableForLoad = tempTableForLoad; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 94dd636..99df967 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -462,6 +462,8 @@ public enum ErrorMsg { true), ACID_OP_ON_INSERTONLYTRAN_TABLE(10414, "Attempt to do update or delete on table {0} that is " + "insert-only transactional", true), + LOAD_DATA_LAUNCH_JOB_IO_ERROR(10415, "Encountered I/O error while parsing rewritten load data into insert query"), + LOAD_DATA_LAUNCH_JOB_PARSE_ERROR(10416, "Encountered parse error while parsing rewritten load data into insert query"), //========================== 20000 range starts here ========================// http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index abd678b..e11965e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -530,7 +530,7 @@ public class Table implements Serializable { } public boolean isPartitionKey(String colName) { - return getPartColByName(colName) == null ? false : true; + return getPartColByName(colName) != null; } // TODO merge this with getBucketCols function @@ -1020,8 +1020,8 @@ public class Table implements Serializable { public static void validateColumns(List<FieldSchema> columns, List<FieldSchema> partCols) throws HiveException { Set<String> colNames = new HashSet<>(); - for (FieldSchema partCol: columns) { - String colName = normalize(partCol.getName()); + for (FieldSchema col: columns) { + String colName = normalize(col.getName()); if (colNames.contains(colName)) { throw new HiveException("Duplicate column name " + colName + " in the table definition."); http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index 8332bcc..550fe50 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -27,6 +27,8 @@ import java.util.Arrays; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.ArrayList; +import java.util.HashSet; import org.antlr.runtime.tree.Tree; import org.apache.commons.httpclient.util.URIUtil; @@ -38,6 +40,7 @@ import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.Task; @@ -56,16 +59,25 @@ import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.BasicStatsWork; -import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.mapred.InputFormat; import com.google.common.collect.Lists; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * LoadSemanticAnalyzer. * */ -public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer { +public class LoadSemanticAnalyzer extends SemanticAnalyzer { + private static final Logger LOG = LoggerFactory.getLogger(LoadSemanticAnalyzer.class); + private boolean queryReWritten = false; + + private final String tempTblNameSuffix = "__TEMP_TABLE_FOR_LOAD_DATA__"; + + // AST specific data + private Tree fromTree, tableTree; + private boolean isLocal = false, isOverWrite = false; public LoadSemanticAnalyzer(QueryState queryState) throws SemanticException { super(queryState); @@ -77,7 +89,7 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer { @Override public boolean accept(Path p) { String name = p.getName(); - return name.equals(EximUtil.METADATA_NAME) ? true : !name.startsWith("_") && !name.startsWith("."); + return name.equals(EximUtil.METADATA_NAME) || (!name.startsWith("_") && !name.startsWith(".")); } }); if ((srcs != null) && srcs.length == 1) { @@ -137,15 +149,14 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer { return new URI(fromScheme, fromAuthority, path, null, null); } - private List<FileStatus> applyConstraintsAndGetFiles(URI fromURI, Tree ast, - boolean isLocal, Table table) throws SemanticException { + private List<FileStatus> applyConstraintsAndGetFiles(URI fromURI, Table table) throws SemanticException { FileStatus[] srcs = null; // local mode implies that scheme should be "file" // we can change this going forward if (isLocal && !fromURI.getScheme().equals("file")) { - throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast, + throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(fromTree, "Source file system should be \"file\" if \"local\" is specified")); } @@ -153,14 +164,16 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer { FileSystem fileSystem = FileSystem.get(fromURI, conf); srcs = matchFilesOrDir(fileSystem, new Path(fromURI)); if (srcs == null || srcs.length == 0) { - throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, "No files matching path " + fromURI)); } for (FileStatus oneSrc : srcs) { if (oneSrc.isDir()) { - throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, - "source contains directory: " + oneSrc.getPath().toString())); + reparseAndSuperAnalyze(table, fromURI); + return null; +/* throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, + "source contains directory: " + oneSrc.getPath().toString()));*/ } } validateAcidFiles(table, srcs, fileSystem); @@ -184,44 +197,17 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer { int bucketId = Utilities.getBucketIdFromFile(bucketIdStr); LOG.debug("bucket ID for file " + oneSrc.getPath() + " = " + bucketId + " for table " + table.getFullyQualifiedName()); - if (bucketId == -1) { - throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg( - "The file name is invalid : " - + oneSrc.getPath().toString() + " for table " - + table.getFullyQualifiedName())); - } - if (bucketId >= numBuckets) { - throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg( - "The file name corresponds to invalid bucketId : " - + oneSrc.getPath().toString()) - + ". Maximum number of buckets can be " + numBuckets - + " for table " + table.getFullyQualifiedName()); - } - if (bucketArray[bucketId]) { - throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg( - "Multiple files for same bucket : " + bucketId - + ". Only 1 file per bucket allowed in single load command. To load " + - "multiple files for same bucket, use multiple statements for table " - + table.getFullyQualifiedName())); + if (bucketId == -1 || bucketId >= numBuckets || bucketArray[bucketId]) { + reparseAndSuperAnalyze(table, fromURI); + return null; } bucketArray[bucketId] = true; } } - else { - /** - * for loading into un-bucketed acid table, files can be named arbitrarily but they will - * be renamed during load. - * {@link Hive#mvFile(HiveConf, FileSystem, Path, FileSystem, Path, boolean, boolean, - * boolean, int)} - * and - * {@link Hive#copyFiles(HiveConf, FileSystem, FileStatus[], FileSystem, Path, boolean, - * boolean, List, boolean)} - */ - } } catch (IOException e) { // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils - throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast), e); + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree), e); } return Lists.newArrayList(srcs); @@ -250,11 +236,27 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer { } @Override + public void init(boolean clearPartsCache) { + Table tempTable = ctx.getTempTableForLoad(); + if (tempTable != null) { + // tempTable is only set when load is rewritten. + super.init(clearPartsCache); + tabNameToTabObject.put(tempTable.getTableName().toLowerCase(), tempTable); + } + } + + @Override public void analyzeInternal(ASTNode ast) throws SemanticException { - boolean isLocal = false; - boolean isOverWrite = false; - Tree fromTree = ast.getChild(0); - Tree tableTree = ast.getChild(1); + if (ctx.getTempTableForLoad() != null) { + super.analyzeInternal(ast); + } else { + analyzeLoad(ast); + } + } + + private void analyzeLoad(ASTNode ast) throws SemanticException { + fromTree = ast.getChild(0); + tableTree = ast.getChild(1); if (ast.getChildCount() == 4) { isLocal = true; @@ -274,10 +276,7 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer { try { String fromPath = stripQuotes(fromTree.getText()); fromURI = initializeFromURI(fromPath, isLocal); - } catch (IOException e) { - throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e - .getMessage()), e); - } catch (URISyntaxException e) { + } catch (IOException | URISyntaxException e) { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e .getMessage()), e); } @@ -298,20 +297,24 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer { List<FieldSchema> parts = ts.tableHandle.getPartitionKeys(); if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) { - throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg()); + // launch a tez job + reparseAndSuperAnalyze(ts.tableHandle, fromURI); + return; } List<String> bucketCols = ts.tableHandle.getBucketCols(); if (bucketCols != null && !bucketCols.isEmpty()) { String error = StrictChecks.checkBucketing(conf); if (error != null) { - throw new SemanticException("Please load into an intermediate table" - + " and use 'insert... select' to allow Hive to enforce bucketing. " + error); + // launch a tez job + reparseAndSuperAnalyze(ts.tableHandle, fromURI); + return; } } // make sure the arguments make sense - List<FileStatus> files = applyConstraintsAndGetFiles(fromURI, fromTree, isLocal, ts.tableHandle); + List<FileStatus> files = applyConstraintsAndGetFiles(fromURI, ts.tableHandle); + if (queryReWritten) return; // for managed tables, make sure the file formats match if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType()) @@ -430,4 +433,69 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer { " Error: " + e.getMessage()); } } + + // Rewrite the load to launch an insert job. + private void reparseAndSuperAnalyze(Table table, URI fromURI) throws SemanticException { + LOG.info("Load data triggered a Tez job instead of usual file operation"); + // Step 1 : Create a temp table object + // Create a Table object + Table tempTableObj = new Table(new org.apache.hadoop.hive.metastore.api.Table(table.getTTable())); + // Construct a temp table name + String tempTblName = table.getTableName() + tempTblNameSuffix; + tempTableObj.setTableName(tempTblName); + + // Move all the partition columns at the end of table columns + tempTableObj.setFields(table.getAllCols()); + // wipe out partition columns + tempTableObj.setPartCols(new ArrayList<>()); + + // Set data location + tempTableObj.setDataLocation(new Path(fromURI)); + + // Step 2 : create the Insert query + StringBuilder rewrittenQueryStr = new StringBuilder(); + + rewrittenQueryStr.append("insert into table "); + rewrittenQueryStr.append(getFullTableNameForSQL((ASTNode)(tableTree.getChild(0)))); + addPartitionColsToInsert(table.getPartCols(), rewrittenQueryStr); + rewrittenQueryStr.append(" select * from "); + rewrittenQueryStr.append(tempTblName); + + // Step 3 : parse the query + // Set dynamic partitioning to nonstrict so that queries do not need any partition + // references. + HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); + // Parse the rewritten query string + Context rewrittenCtx; + try { + rewrittenCtx = new Context(conf); + // We keep track of all the contexts that are created by this query + // so we can clear them when we finish execution + ctx.addRewrittenStatementContext(rewrittenCtx); + } catch (IOException e) { + throw new SemanticException(ErrorMsg.LOAD_DATA_LAUNCH_JOB_IO_ERROR.getMsg()); + } + rewrittenCtx.setExplainConfig(ctx.getExplainConfig()); + rewrittenCtx.setExplainPlan(ctx.isExplainPlan()); + rewrittenCtx.setCmd(rewrittenQueryStr.toString()); + rewrittenCtx.setTempTableForLoad(tempTableObj); + + ASTNode rewrittenTree; + try { + LOG.info("Going to reparse <" + ctx.getCmd() + "> as \n<" + rewrittenQueryStr.toString() + ">"); + rewrittenTree = ParseUtils.parse(rewrittenQueryStr.toString(), rewrittenCtx); + } catch (ParseException e) { + throw new SemanticException(ErrorMsg.LOAD_DATA_LAUNCH_JOB_PARSE_ERROR.getMsg(), e); + } + + // Step 4 : Reanalyze + super.analyze(rewrittenTree, rewrittenCtx); + + queryReWritten = true; + } + + @Override + public HashSet<WriteEntity> getAllOutputs() { + return outputs; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index fad0e5c..9e49bfa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -421,7 +421,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { listMapJoinOpsNoReducer = new ArrayList<AbstractMapJoinOperator<? extends MapJoinDesc>>(); groupOpToInputTables = new HashMap<GroupByOperator, Set<String>>(); prunedPartitions = new HashMap<String, PrunedPartitionList>(); - tabNameToTabObject = new HashMap<String, Table>(); unparseTranslator = new UnparseTranslator(conf); autogenColAliasPrfxLbl = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL); @@ -14779,4 +14778,46 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { AGGREGATE_REBUILD, NO_AGGREGATE_REBUILD } + + /** + * @return table name in db.table form with proper quoting/escaping to be used in a SQL statement + */ + protected String getFullTableNameForSQL(ASTNode n) throws SemanticException { + switch (n.getType()) { + case HiveParser.TOK_TABNAME: + String[] tableName = getQualifiedTableName(n); + return getDotName(new String[] { + HiveUtils.unparseIdentifier(tableName[0], this.conf), + HiveUtils.unparseIdentifier(tableName[1], this.conf) }); + case HiveParser.TOK_TABREF: + return getFullTableNameForSQL((ASTNode) n.getChild(0)); + default: + throw raiseWrongType("TOK_TABNAME", n); + } + } + + protected static IllegalArgumentException raiseWrongType(String expectedTokName, ASTNode n) { + return new IllegalArgumentException("Expected " + expectedTokName + "; got " + n.getType()); + } + + /** + * Append list of partition columns to Insert statement, i.e. the 1st set of partCol1,partCol2 + * INSERT INTO T PARTITION(partCol1,partCol2...) SELECT col1, ... partCol1,partCol2... + */ + protected void addPartitionColsToInsert(List<FieldSchema> partCols, StringBuilder rewrittenQueryStr) { + // If the table is partitioned we have to put the partition() clause in + if (partCols != null && partCols.size() > 0) { + rewrittenQueryStr.append(" partition ("); + boolean first = true; + for (FieldSchema fschema : partCols) { + if (first) + first = false; + else + rewrittenQueryStr.append(", "); + //would be nice if there was a way to determine if quotes are needed + rewrittenQueryStr.append(HiveUtils.unparseIdentifier(fschema.getName(), this.conf)); + } + rewrittenQueryStr.append(")"); + } + } } http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java index 2f3b07f..512f1ff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java @@ -350,26 +350,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer { currentOperation = Context.Operation.DELETE; reparseAndSuperAnalyze(tree); } - /** - * Append list of partition columns to Insert statement, i.e. the 1st set of partCol1,partCol2 - * INSERT INTO T PARTITION(partCol1,partCol2...) SELECT col1, ... partCol1,partCol2... - */ - private void addPartitionColsToInsert(List<FieldSchema> partCols, StringBuilder rewrittenQueryStr) { - // If the table is partitioned we have to put the partition() clause in - if (partCols != null && partCols.size() > 0) { - rewrittenQueryStr.append(" partition ("); - boolean first = true; - for (FieldSchema fschema : partCols) { - if (first) - first = false; - else - rewrittenQueryStr.append(", "); - //would be nice if there was a way to determine if quotes are needed - rewrittenQueryStr.append(HiveUtils.unparseIdentifier(fschema.getName(), this.conf)); - } - rewrittenQueryStr.append(")"); - } - } + /** * Append list of partition columns to Insert statement, i.e. the 2nd set of partCol1,partCol2 * INSERT INTO T PARTITION(partCol1,partCol2...) SELECT col1, ... partCol1,partCol2... @@ -1261,22 +1242,8 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer { throw raiseWrongType("TOK_TABREF|TOK_TABNAME|TOK_SUBQUERY", n); } } - /** - * @return table name in db.table form with proper quoting/escaping to be used in a SQL statement - */ - private String getFullTableNameForSQL(ASTNode n) throws SemanticException { - switch (n.getType()) { - case HiveParser.TOK_TABNAME: - String[] tableName = getQualifiedTableName(n); - return getDotName(new String[] { - HiveUtils.unparseIdentifier(tableName[0], this.conf), - HiveUtils.unparseIdentifier(tableName[1], this.conf) }); - case HiveParser.TOK_TABREF: - return getFullTableNameForSQL((ASTNode) n.getChild(0)); - default: - throw raiseWrongType("TOK_TABNAME", n); - } - } private static final class ReparseResult { + + private static final class ReparseResult { private final ASTNode rewrittenTree; private final Context rewrittenCtx; ReparseResult(ASTNode n, Context c) { @@ -1284,9 +1251,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer { rewrittenCtx = c; } } - private static IllegalArgumentException raiseWrongType(String expectedTokName, ASTNode n) { - return new IllegalArgumentException("Expected " + expectedTokName + "; got " + n.getType()); - } + private boolean isAliased(ASTNode n) { switch (n.getType()) { case HiveParser.TOK_TABREF: http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java index ec8c150..11c5930 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java @@ -352,6 +352,8 @@ public class TestTxnLoadData extends TxnCommandsBaseForTests { * if loaded data is not bucketed properly. This test is to capture that this is still the default. * If the default is changed, Load Data should probably do more validation to ensure data is * properly distributed into files and files are named correctly. + * With the availability of new feature to rewrite such "load data" commands into insert-as-select, + * the test should let the load data pass. */ @Test public void testValidations() throws Exception { @@ -364,7 +366,8 @@ public class TestTxnLoadData extends TxnCommandsBaseForTests { //this creates an ORC data file with correct schema under table root runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); CommandProcessorResponse cpr = runStatementOnDriverNegative("load data local inpath '" + getWarehouseDir() + "' into table T"); - Assert.assertTrue(cpr.getErrorMessage().contains("Load into bucketed tables are disabled")); + // This condition should not occur with the new support of rewriting load into IAS. + Assert.assertFalse(cpr.getErrorMessage().contains("Load into bucketed tables are disabled")); } private void checkExpected(List<String> rs, String[][] expected, String msg) { super.checkExpected(rs, expected, msg, LOG, true); http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/ql/src/test/queries/clientnegative/load_part_nospec.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/load_part_nospec.q b/ql/src/test/queries/clientnegative/load_part_nospec.q deleted file mode 100644 index 8151799..0000000 --- a/ql/src/test/queries/clientnegative/load_part_nospec.q +++ /dev/null @@ -1,2 +0,0 @@ -create table hive_test_src ( col1 string ) partitioned by (pcol1 string) stored as textfile; -load data local inpath '../../data/files/test.dat' into table hive_test_src; http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/ql/src/test/queries/clientnegative/nopart_load.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/nopart_load.q b/ql/src/test/queries/clientnegative/nopart_load.q deleted file mode 100644 index 966982f..0000000 --- a/ql/src/test/queries/clientnegative/nopart_load.q +++ /dev/null @@ -1,5 +0,0 @@ - -CREATE TABLE nopart_load(a STRING, b STRING) PARTITIONED BY (ds STRING); - -load data local inpath '../../data/files/kv1.txt' overwrite into table nopart_load ; - http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/ql/src/test/queries/clientpositive/load_data_using_job.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/load_data_using_job.q b/ql/src/test/queries/clientpositive/load_data_using_job.q new file mode 100644 index 0000000..dceca48 --- /dev/null +++ b/ql/src/test/queries/clientpositive/load_data_using_job.q @@ -0,0 +1,83 @@ +set hive.stats.column.autogather=false; +set hive.strict.checks.bucketing=false; + +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.auto.convert.join=true; +set hive.join.emit.interval=2; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; + +-- Single partition +-- Regular load happens. +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) STORED AS TEXTFILE; +explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); +select * from srcbucket_mapjoin; + +drop table srcbucket_mapjoin; + +-- Triggers a Tez job as partition info is missing from load data. +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) STORED AS TEXTFILE; +explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin; +select * from srcbucket_mapjoin; +drop table srcbucket_mapjoin; + +-- Multi partitions +-- Triggers a Tez job as partition info is missing from load data. +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string, hr int) STORED AS TEXTFILE; +explain load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin; +select * from srcbucket_mapjoin; +drop table srcbucket_mapjoin; + +-- Multi partitions and directory with files (no sub dirs) +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string, hr int) STORED AS TEXTFILE; +explain load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin; +load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin; +select * from srcbucket_mapjoin; +drop table srcbucket_mapjoin; + +-- Bucketing +CREATE TABLE srcbucket_mapjoin(key int, value string) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE; +explain load data local inpath '../../data/files/load_data_job/bucketing.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../../data/files/load_data_job/bucketing.txt' INTO TABLE srcbucket_mapjoin; +select * from srcbucket_mapjoin; +drop table srcbucket_mapjoin; + +-- Single partition and bucketing +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE; +explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin; +select * from srcbucket_mapjoin; +drop table srcbucket_mapjoin; + +-- Multiple partitions and bucketing +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string, hr int) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE; +explain load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin; +select * from srcbucket_mapjoin; +drop table srcbucket_mapjoin; + +-- Multiple partitions, bucketing, and directory with files (no sub dirs) +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string, hr int) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE; +explain load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin; +load data local inpath '../../data/files/load_data_job/partitions/subdir' INTO TABLE srcbucket_mapjoin; +select * from srcbucket_mapjoin; +drop table srcbucket_mapjoin; + +-- Multiple partitions, bucketing, and directory with files and sub dirs +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string, hr int) clustered by (key) sorted by (key) into 5 buckets STORED AS TEXTFILE; +explain load data local inpath '../../data/files/load_data_job/partitions' INTO TABLE srcbucket_mapjoin; +load data local inpath '../../data/files/load_data_job/partitions' INTO TABLE srcbucket_mapjoin; +select * from srcbucket_mapjoin; +drop table srcbucket_mapjoin; + +-- Single partition, multiple buckets +CREATE TABLE srcbucket_mapjoin(key int, value string, ds string) partitioned by (hr int) clustered by (key, value) sorted by (key, value) into 5 buckets STORED AS TEXTFILE; +explain load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin; +select * from srcbucket_mapjoin; +drop table srcbucket_mapjoin; http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/ql/src/test/results/clientnegative/load_part_nospec.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/load_part_nospec.q.out b/ql/src/test/results/clientnegative/load_part_nospec.q.out deleted file mode 100644 index bebaf92..0000000 --- a/ql/src/test/results/clientnegative/load_part_nospec.q.out +++ /dev/null @@ -1,9 +0,0 @@ -PREHOOK: query: create table hive_test_src ( col1 string ) partitioned by (pcol1 string) stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@hive_test_src -POSTHOOK: query: create table hive_test_src ( col1 string ) partitioned by (pcol1 string) stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@hive_test_src -FAILED: SemanticException [Error 10062]: Need to specify partition columns because the destination table is partitioned http://git-wip-us.apache.org/repos/asf/hive/blob/46c5580b/ql/src/test/results/clientnegative/nopart_load.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/nopart_load.q.out b/ql/src/test/results/clientnegative/nopart_load.q.out deleted file mode 100644 index 8815146..0000000 --- a/ql/src/test/results/clientnegative/nopart_load.q.out +++ /dev/null @@ -1,9 +0,0 @@ -PREHOOK: query: CREATE TABLE nopart_load(a STRING, b STRING) PARTITIONED BY (ds STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@nopart_load -POSTHOOK: query: CREATE TABLE nopart_load(a STRING, b STRING) PARTITIONED BY (ds STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@nopart_load -FAILED: SemanticException [Error 10062]: Need to specify partition columns because the destination table is partitioned