http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/test/resources/orc-file-dump.out ---------------------------------------------------------------------- diff --git a/orc/src/test/resources/orc-file-dump.out b/orc/src/test/resources/orc-file-dump.out deleted file mode 100644 index 70f7fbd..0000000 --- a/orc/src/test/resources/orc-file-dump.out +++ /dev/null @@ -1,195 +0,0 @@ -Structure for TestFileDump.testDump.orc -File Version: 0.12 with HIVE_13083 -Rows: 21000 -Compression: ZLIB -Compression size: 4096 -Type: struct<i:int,l:bigint,s:string> - -Stripe Statistics: - Stripe 1: - Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 515792826 - Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 9221614132680747961 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280 - Stripe 2: - Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 7673427 - Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 9222259462014003839 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504 - Stripe 3: - Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 132660742551 - Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641 - Stripe 4: - Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 8533549236 - Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470 - Stripe 5: - Column 0: count: 1000 hasNull: false - Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 - Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 - Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 - -File Statistics: - Column 0: count: 21000 hasNull: false - Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 193017464403 - Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 9222303228623055266 - Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761 - -Stripes: - Stripe: offset: 3 data: 63786 rows: 5000 tail: 79 index: 439 - Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 166 - Stream: column 2 section ROW_INDEX start: 186 length 169 - Stream: column 3 section ROW_INDEX start: 355 length 87 - Stream: column 1 section DATA start: 442 length 20035 - Stream: column 2 section DATA start: 20477 length 40050 - Stream: column 3 section DATA start: 60527 length 3543 - Stream: column 3 section LENGTH start: 64070 length 25 - Stream: column 3 section DICTIONARY_DATA start: 64095 length 133 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 - Encoding column 3: DICTIONARY_V2[35] - Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2145365268 max: 2135491313 sum: 7521792925 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -2139452528 max: 2147223299 sum: -12923774313 positions: 0,2050,488 - Entry 2: count: 1000 hasNull: false min: -2142420586 max: 2143898386 sum: -25521983511 positions: 4099,2054,464 - Entry 3: count: 1000 hasNull: false min: -2137233441 max: 2144267163 sum: 40993386199 positions: 8198,2058,440 - Entry 4: count: 1000 hasNull: false min: -2146021688 max: 2146838901 sum: -9553628474 positions: 12297,2062,416 - Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9200577545527640566 max: 9175500305011173751 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9203618157670445774 max: 9208123824411178101 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9218592812243954469 max: 9221351515892923972 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9206585617947511272 max: 9167703224425685487 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9206645795733282496 max: 9221614132680747961 positions: 28693,14,416 - Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 positions: 0,659,149 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 positions: 0,1531,3 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45 - Stripe: offset: 64307 data: 63775 rows: 5000 tail: 79 index: 432 - Stream: column 0 section ROW_INDEX start: 64307 length 17 - Stream: column 1 section ROW_INDEX start: 64324 length 164 - Stream: column 2 section ROW_INDEX start: 64488 length 168 - Stream: column 3 section ROW_INDEX start: 64656 length 83 - Stream: column 1 section DATA start: 64739 length 20035 - Stream: column 2 section DATA start: 84774 length 40050 - Stream: column 3 section DATA start: 124824 length 3532 - Stream: column 3 section LENGTH start: 128356 length 25 - Stream: column 3 section DICTIONARY_DATA start: 128381 length 133 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 - Encoding column 3: DICTIONARY_V2[35] - Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2143799121 max: 2145249879 sum: -6966266181 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -2146733128 max: 2147001622 sum: -35930106333 positions: 0,2050,488 - Entry 2: count: 1000 hasNull: false min: -2144302712 max: 2146299933 sum: 6944230435 positions: 4099,2054,464 - Entry 3: count: 1000 hasNull: false min: -2145172948 max: 2144335014 sum: -29624404959 positions: 8198,2058,440 - Entry 4: count: 1000 hasNull: false min: -2146428427 max: 2144067253 sum: 65584220465 positions: 12297,2062,416 - Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9218450653857701562 max: 9189819526332228512 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9220818777591257749 max: 9178821722829648113 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9220031433030423388 max: 9210838931786956852 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9208195729739635607 max: 9222259462014003839 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9174271499932339698 max: 9212277876771676916 positions: 28693,14,416 - Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3923 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3869 positions: 0,761,12 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,1472,70 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88 - Stripe: offset: 128593 data: 63787 rows: 5000 tail: 79 index: 438 - Stream: column 0 section ROW_INDEX start: 128593 length 17 - Stream: column 1 section ROW_INDEX start: 128610 length 163 - Stream: column 2 section ROW_INDEX start: 128773 length 168 - Stream: column 3 section ROW_INDEX start: 128941 length 90 - Stream: column 1 section DATA start: 129031 length 20035 - Stream: column 2 section DATA start: 149066 length 40050 - Stream: column 3 section DATA start: 189116 length 3544 - Stream: column 3 section LENGTH start: 192660 length 25 - Stream: column 3 section DICTIONARY_DATA start: 192685 length 133 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 - Encoding column 3: DICTIONARY_V2[35] - Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2146993718 max: 2144179881 sum: -7829543271 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -2144095505 max: 2144883384 sum: 51623839692 positions: 0,2050,488 - Entry 2: count: 1000 hasNull: false min: -2144113995 max: 2143773575 sum: 56574412741 positions: 4099,2054,464 - Entry 3: count: 1000 hasNull: false min: -2146954065 max: 2146794873 sum: 4336083432 positions: 8198,2058,440 - Entry 4: count: 1000 hasNull: false min: -2135511523 max: 2147378179 sum: 27955949957 positions: 12297,2062,416 - Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9211978436552246208 max: 9179058898902097152 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9195645160817780503 max: 9189147759444307708 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9202888157616520823 max: 9193561362676960747 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9216318198067839390 max: 9221286760675829363 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 positions: 28693,14,416 - Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 4008 positions: 0,634,174 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3999 positions: 0,1469,69 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43 - Stripe: offset: 192897 data: 63817 rows: 5000 tail: 79 index: 440 - Stream: column 0 section ROW_INDEX start: 192897 length 17 - Stream: column 1 section ROW_INDEX start: 192914 length 165 - Stream: column 2 section ROW_INDEX start: 193079 length 167 - Stream: column 3 section ROW_INDEX start: 193246 length 91 - Stream: column 1 section DATA start: 193337 length 20035 - Stream: column 2 section DATA start: 213372 length 40050 - Stream: column 3 section DATA start: 253422 length 3574 - Stream: column 3 section LENGTH start: 256996 length 25 - Stream: column 3 section DICTIONARY_DATA start: 257021 length 133 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 - Encoding column 3: DICTIONARY_V2[35] - Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2141355639 max: 2145520931 sum: 2726719912 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -2138324170 max: 2140167376 sum: -23606674002 positions: 0,2050,488 - Entry 2: count: 1000 hasNull: false min: -2146658006 max: 2144329742 sum: -41530109703 positions: 4099,2054,464 - Entry 3: count: 1000 hasNull: false min: -2144207593 max: 2139456355 sum: 13559842458 positions: 8198,2058,440 - Entry 4: count: 1000 hasNull: false min: -2145744719 max: 2145417153 sum: 57383770571 positions: 12297,2062,416 - Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 positions: 4099,2,488 - Entry 2: count: 1000 hasNull: false min: -9174483776261243438 max: 9208134757538374043 positions: 12297,6,464 - Entry 3: count: 1000 hasNull: false min: -9174329712613510612 max: 9197412874152820822 positions: 20495,10,440 - Entry 4: count: 1000 hasNull: false min: -9221162005892422758 max: 9220625004936875965 positions: 28693,14,416 - Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3901 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3900 positions: 0,431,431 - Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3909 positions: 0,1485,52 - Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104 - Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131 - Stripe: offset: 257233 data: 12943 rows: 1000 tail: 71 index: 131 - Stream: column 0 section ROW_INDEX start: 257233 length 12 - Stream: column 1 section ROW_INDEX start: 257245 length 38 - Stream: column 2 section ROW_INDEX start: 257283 length 41 - Stream: column 3 section ROW_INDEX start: 257324 length 40 - Stream: column 1 section DATA start: 257364 length 4007 - Stream: column 2 section DATA start: 261371 length 8010 - Stream: column 3 section DATA start: 269381 length 768 - Stream: column 3 section LENGTH start: 270149 length 25 - Stream: column 3 section DICTIONARY_DATA start: 270174 length 133 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 - Encoding column 3: DICTIONARY_V2[35] - Row group indices for column 1: - Entry 0: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 positions: 0,0,0 - Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 positions: 0,0,0 - Row group indices for column 3: - Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0 - -File length: 270923 bytes -Padding length: 0 bytes -Padding ratio: 0% -________________________________________________________________________________________________________________________ -
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/test/resources/orc-file-has-null.out ---------------------------------------------------------------------- diff --git a/orc/src/test/resources/orc-file-has-null.out b/orc/src/test/resources/orc-file-has-null.out deleted file mode 100644 index e98a73f..0000000 --- a/orc/src/test/resources/orc-file-has-null.out +++ /dev/null @@ -1,112 +0,0 @@ -Structure for TestOrcFile.testHasNull.orc -File Version: 0.12 with HIVE_13083 -Rows: 20000 -Compression: ZLIB -Compression size: 4096 -Type: struct<bytes1:binary,string1:string> - -Stripe Statistics: - Stripe 1: - Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false sum: 15000 - Column 2: count: 2000 hasNull: true min: RG1 max: RG3 sum: 6000 - Stripe 2: - Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false sum: 15000 - Column 2: count: 0 hasNull: true - Stripe 3: - Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false sum: 15000 - Column 2: count: 5000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 40000 - Stripe 4: - Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false sum: 15000 - Column 2: count: 0 hasNull: true - -File Statistics: - Column 0: count: 20000 hasNull: false - Column 1: count: 20000 hasNull: false sum: 60000 - Column 2: count: 7000 hasNull: true min: RG1 max: STRIPE-3 sum: 46000 - -Stripes: - Stripe: offset: 3 data: 220 rows: 5000 tail: 65 index: 154 - Stream: column 0 section ROW_INDEX start: 3 length 17 - Stream: column 1 section ROW_INDEX start: 20 length 60 - Stream: column 2 section ROW_INDEX start: 80 length 77 - Stream: column 1 section DATA start: 157 length 159 - Stream: column 1 section LENGTH start: 316 length 15 - Stream: column 2 section PRESENT start: 331 length 13 - Stream: column 2 section DATA start: 344 length 18 - Stream: column 2 section LENGTH start: 362 length 6 - Stream: column 2 section DICTIONARY_DATA start: 368 length 9 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DICTIONARY_V2[2] - Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: RG1 max: RG1 sum: 3000 positions: 0,0,0,0,0,0,0 - Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,4,488 - Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,4,488 - Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,12,488 - Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,12,488 - Stripe: offset: 442 data: 185 rows: 5000 tail: 64 index: 116 - Stream: column 0 section ROW_INDEX start: 442 length 17 - Stream: column 1 section ROW_INDEX start: 459 length 60 - Stream: column 2 section ROW_INDEX start: 519 length 39 - Stream: column 1 section DATA start: 558 length 159 - Stream: column 1 section LENGTH start: 717 length 15 - Stream: column 2 section PRESENT start: 732 length 11 - Stream: column 2 section DATA start: 743 length 0 - Stream: column 2 section LENGTH start: 743 length 0 - Stream: column 2 section DICTIONARY_DATA start: 743 length 0 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DICTIONARY_V2[0] - Row group indices for column 2: - Entry 0: count: 0 hasNull: true positions: 0,0,0,0,0,0,0 - Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,0,0 - Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0 - Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 - Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 - Stripe: offset: 807 data: 206 rows: 5000 tail: 60 index: 137 - Stream: column 0 section ROW_INDEX start: 807 length 17 - Stream: column 1 section ROW_INDEX start: 824 length 60 - Stream: column 2 section ROW_INDEX start: 884 length 60 - Stream: column 1 section DATA start: 944 length 159 - Stream: column 1 section LENGTH start: 1103 length 15 - Stream: column 2 section DATA start: 1118 length 15 - Stream: column 2 section LENGTH start: 1133 length 6 - Stream: column 2 section DICTIONARY_DATA start: 1139 length 11 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DICTIONARY_V2[1] - Row group indices for column 2: - Entry 0: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,0,0 - Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,4,488 - Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,12,464 - Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,20,440 - Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,28,416 - Stripe: offset: 1210 data: 185 rows: 5000 tail: 64 index: 116 - Stream: column 0 section ROW_INDEX start: 1210 length 17 - Stream: column 1 section ROW_INDEX start: 1227 length 60 - Stream: column 2 section ROW_INDEX start: 1287 length 39 - Stream: column 1 section DATA start: 1326 length 159 - Stream: column 1 section LENGTH start: 1485 length 15 - Stream: column 2 section PRESENT start: 1500 length 11 - Stream: column 2 section DATA start: 1511 length 0 - Stream: column 2 section LENGTH start: 1511 length 0 - Stream: column 2 section DICTIONARY_DATA start: 1511 length 0 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DICTIONARY_V2[0] - Row group indices for column 2: - Entry 0: count: 0 hasNull: true positions: 0,0,0,0,0,0,0 - Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,0,0 - Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0 - Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 - Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 - -File length: 1823 bytes -Padding length: 0 bytes -Padding ratio: 0% -________________________________________________________________________________________________________________________ - http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/packaging/pom.xml ---------------------------------------------------------------------- diff --git a/packaging/pom.xml b/packaging/pom.xml index bfe3637..2439e19 100644 --- a/packaging/pom.xml +++ b/packaging/pom.xml @@ -235,11 +235,6 @@ </dependency> <dependency> <groupId>org.apache.hive</groupId> - <artifactId>hive-orc</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>org.apache.hive</groupId> <artifactId>hive-storage-api</artifactId> </dependency> <dependency> http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index b4c0b81..5121770 100644 --- a/pom.xml +++ b/pom.xml @@ -43,7 +43,6 @@ <module>hplsql</module> <module>jdbc</module> <module>metastore</module> - <module>orc</module> <module>ql</module> <module>serde</module> <module>service-rpc</module> @@ -173,6 +172,7 @@ <libthrift.version>0.9.3</libthrift.version> <log4j2.version>2.6.2</log4j2.version> <opencsv.version>2.3</opencsv.version> + <orc.version>1.3.1</orc.version> <mockito-all.version>1.9.5</mockito-all.version> <mina.version>2.0.0-M5</mina.version> <netty.version>4.0.29.Final</netty.version> @@ -507,6 +507,21 @@ <artifactId>stax-api</artifactId> <version>${stax.version}</version> </dependency> + <dependency> + <groupId>org.apache.orc</groupId> + <artifactId>orc-core</artifactId> + <version>${orc.version}</version> + <exclusions> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.hive</groupId> + <artifactId>hive-storage-api</artifactId> + </exclusion> + </exclusions> + </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-storage-api</artifactId> @@ -620,11 +635,6 @@ <version>${datanucleus-jdo.version}</version> </dependency> <dependency> - <groupId>org.iq80.snappy</groupId> - <artifactId>snappy</artifactId> - <version>${snappy.version}</version> - </dependency> - <dependency> <groupId>org.json</groupId> <artifactId>json</artifactId> <version>${json.version}</version> http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/ql/pom.xml ---------------------------------------------------------------------- diff --git a/ql/pom.xml b/ql/pom.xml index b17288c..84e83ee 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -140,11 +140,6 @@ <version>${commons-lang3.version}</version> </dependency> <dependency> - <groupId>org.iq80.snappy</groupId> - <artifactId>snappy</artifactId> - <version>${snappy.version}</version> - </dependency> - <dependency> <groupId>commons-lang</groupId> <artifactId>commons-lang</artifactId> <version>${commons-lang.version}</version> @@ -291,6 +286,11 @@ <version>${hadoop.version}</version> <optional>true</optional> </dependency> + <dependency> + <groupId>org.apache.orc</groupId> + <artifactId>orc-tools</artifactId> + <version>${orc.version}</version> + </dependency> <dependency> <groupId>org.apache.ivy</groupId> @@ -861,14 +861,15 @@ <include>com.googlecode.javaewah:JavaEWAH</include> <include>javolution:javolution</include> <include>com.google.protobuf:protobuf-java</include> - <include>org.iq80.snappy:snappy</include> + <include>io.airlift:aircompressor</include> <include>org.codehaus.jackson:jackson-core-asl</include> <include>org.codehaus.jackson:jackson-mapper-asl</include> <include>com.google.guava:guava</include> <include>net.sf.opencsv:opencsv</include> <include>org.apache.hive:spark-client</include> <include>org.apache.hive:hive-storage-api</include> - <include>org.apache.hive:hive-orc</include> + <include>org.apache.orc:orc-core</include> + <include>org.apache.orc:orc-tools</include> <include>joda-time:joda-time</include> </includes> </artifactSet> http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java index 5366020..96ca736 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.orc.FileMetadata; +import org.apache.orc.PhysicalWriter; import org.apache.orc.impl.MemoryManager; import org.apache.orc.TypeDescription; import org.apache.orc.impl.OrcTail; @@ -267,6 +268,11 @@ public final class OrcFile extends org.apache.orc.OrcFile { return this; } + public WriterOptions physicalWriter(PhysicalWriter writer) { + super.physicalWriter(writer); + return this; + } + ObjectInspector getInspector() { return inspector; } http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 6281edd..99cc506 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -1490,14 +1490,15 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, stripeStats = orcTail.getStripeStatistics(); fileTypes = orcTail.getTypes(); TypeDescription fileSchema = OrcUtils.convertTypeFromProtobuf(fileTypes, 0); + Reader.Options readerOptions = new Reader.Options(context.conf); if (readerTypes == null) { readerIncluded = genIncludedColumns(fileSchema, context.conf); - evolution = new SchemaEvolution(fileSchema, readerIncluded); + evolution = new SchemaEvolution(fileSchema, readerOptions.include(readerIncluded)); } else { // The reader schema always comes in without ACID columns. TypeDescription readerSchema = OrcUtils.convertTypeFromProtobuf(readerTypes, 0); readerIncluded = genIncludedColumns(readerSchema, context.conf); - evolution = new SchemaEvolution(fileSchema, readerSchema, readerIncluded); + evolution = new SchemaEvolution(fileSchema, readerSchema, readerOptions.include(readerIncluded)); if (!isOriginal) { // The SchemaEvolution class has added the ACID metadata columns. Let's update our // readerTypes so PPD code will work correctly. http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java index 492c64c..65f4a24 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java @@ -85,7 +85,8 @@ public class OrcRecordUpdater implements RecordUpdater { private Path deleteEventPath; private final FileSystem fs; private OrcFile.WriterOptions writerOptions; - private Writer writer; + private Writer writer = null; + private boolean writerClosed = false; private Writer deleteEventWriter = null; private final FSDataOutputStream flushLengths; private final OrcStruct item; @@ -247,6 +248,14 @@ public class OrcRecordUpdater implements RecordUpdater { writerOptions = OrcFile.writerOptions(optionsCloneForDelta.getTableProperties(), optionsCloneForDelta.getConfiguration()); } + if (this.acidOperationalProperties.isSplitUpdate()) { + // If this is a split-update, we initialize a delete delta file path in anticipation that + // they would write update/delete events to that separate file. + // This writes to a file in directory which starts with "delete_delta_..." + // The actual initialization of a writer only happens if any delete events are written. + this.deleteEventPath = AcidUtils.createFilename(path, + optionsCloneForDelta.writingDeleteDelta(true)); + } // get buffer size and stripe size for base writer int baseBufferSizeValue = writerOptions.getBufferSize(); @@ -262,14 +271,6 @@ public class OrcRecordUpdater implements RecordUpdater { rowInspector = (StructObjectInspector)options.getInspector(); writerOptions.inspector(createEventSchema(findRecId(options.getInspector(), options.getRecordIdColumn()))); - this.writer = OrcFile.createWriter(this.path, writerOptions); - if (this.acidOperationalProperties.isSplitUpdate()) { - // If this is a split-update, we initialize a delete delta file path in anticipation that - // they would write update/delete events to that separate file. - // This writes to a file in directory which starts with "delete_delta_..." - // The actual initialization of a writer only happens if any delete events are written. - this.deleteEventPath = AcidUtils.createFilename(path, options.writingDeleteDelta(true)); - } item = new OrcStruct(FIELDS); item.setFieldValue(OPERATION, operation); item.setFieldValue(CURRENT_TRANSACTION, currentTransaction); @@ -367,6 +368,9 @@ public class OrcRecordUpdater implements RecordUpdater { item.setFieldValue(OrcRecordUpdater.OPERATION, new IntWritable(operation)); item.setFieldValue(OrcRecordUpdater.ROW, (operation == DELETE_OPERATION ? null : row)); indexBuilder.addKey(operation, originalTransaction, bucket.get(), rowId); + if (writer == null) { + writer = OrcFile.createWriter(path, writerOptions); + } writer.addRow(item); } @@ -469,6 +473,9 @@ public class OrcRecordUpdater implements RecordUpdater { throw new IllegalStateException("Attempting to flush a RecordUpdater on " + path + " with a single transaction."); } + if (writer == null) { + writer = OrcFile.createWriter(path, writerOptions); + } long len = writer.writeIntermediateFooter(); flushLengths.writeLong(len); OrcInputFormat.SHIMS.hflush(flushLengths); @@ -480,21 +487,19 @@ public class OrcRecordUpdater implements RecordUpdater { if (flushLengths == null) { fs.delete(path, false); } - } else { - if (writer != null) { - if (acidOperationalProperties.isSplitUpdate()) { - // When split-update is enabled, we can choose not to write - // any delta files when there are no inserts. In such cases only the delete_deltas - // would be written & they are closed separately below. - if (indexBuilder.acidStats.inserts > 0) { - writer.close(); // normal close, when there are inserts. - } else { - // Just remove insert delta paths, when there are no insert events. - fs.delete(path, false); - } - } else { - writer.close(); // normal close. + } else if (!writerClosed) { + if (acidOperationalProperties.isSplitUpdate()) { + // When split-update is enabled, we can choose not to write + // any delta files when there are no inserts. In such cases only the delete_deltas + // would be written & they are closed separately below. + if (writer != null && indexBuilder.acidStats.inserts > 0) { + writer.close(); // normal close, when there are inserts. + } + } else { + if (writer == null) { + writer = OrcFile.createWriter(path, writerOptions); } + writer.close(); // normal close. } if (deleteEventWriter != null) { if (deleteEventIndexBuilder.acidStats.deletes > 0) { @@ -505,7 +510,6 @@ public class OrcRecordUpdater implements RecordUpdater { fs.delete(deleteEventPath, false); } } - } if (flushLengths != null) { flushLengths.close(); @@ -513,6 +517,7 @@ public class OrcRecordUpdater implements RecordUpdater { } writer = null; deleteEventWriter = null; + writerClosed = true; } @Override @@ -524,11 +529,6 @@ public class OrcRecordUpdater implements RecordUpdater { return stats; } - @VisibleForTesting - Writer getWriter() { - return writer; - } - private static final Charset utf8 = Charset.forName("UTF-8"); private static final CharsetDecoder utf8Decoder = utf8.newDecoder(); http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java index 3e4ec2e..dcefada 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java @@ -60,9 +60,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspe import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; -import org.apache.orc.impl.PhysicalWriter; import com.google.common.annotations.VisibleForTesting; +import org.apache.orc.PhysicalWriter; /** * An ORC file writer. The file is divided into stripes, which is the natural @@ -97,15 +97,6 @@ public class WriterImpl extends org.apache.orc.impl.WriterImpl implements Writer this.fields = initializeFieldsFromOi(inspector); } - public WriterImpl(PhysicalWriter writer, - Path pathForMem, - OrcFile.WriterOptions opts) throws IOException { - super(writer, pathForMem, opts); - this.inspector = opts.getInspector(); - this.internalBatch = opts.getSchema().createRowBatch(opts.getBatchSize()); - this.fields = initializeFieldsFromOi(inspector); - } - private static StructField[] initializeFieldsFromOi(ObjectInspector inspector) { if (inspector instanceof StructObjectInspector) { List<? extends StructField> fieldList = @@ -329,9 +320,4 @@ public class WriterImpl extends org.apache.orc.impl.WriterImpl implements Writer flushInternalBatch(); super.close(); } - - @VisibleForTesting - PhysicalWriter getPhysicalWriter() { - return physWriter; - } } http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java index 0dba1a0..a434763 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.io.orc.encoded; import org.apache.orc.impl.RunLengthByteReader; +import org.apache.orc.impl.SchemaEvolution; import org.apache.orc.impl.StreamName; import java.io.IOException; @@ -57,8 +58,8 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private TimestampStreamReader(int columnId, SettableUncompressedStream present, SettableUncompressedStream data, SettableUncompressedStream nanos, boolean isFileCompressed, OrcProto.ColumnEncoding encoding, - boolean skipCorrupt, String writerTimezoneId) throws IOException { - super(columnId, present, data, nanos, encoding, skipCorrupt, writerTimezoneId); + TreeReaderFactory.Context context) throws IOException { + super(columnId, present, data, nanos, encoding, context); this.isFileCompressed = isFileCompressed; this._presentStream = present; this._secondsStream = data; @@ -117,8 +118,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private ColumnStreamData nanosStream; private CompressionCodec compressionCodec; private OrcProto.ColumnEncoding columnEncoding; - private boolean skipCorrupt; - private String writerTimezone; + private TreeReaderFactory.Context context; public StreamReaderBuilder setColumnIndex(int columnIndex) { this.columnIndex = columnIndex; @@ -150,13 +150,8 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { return this; } - public StreamReaderBuilder setWriterTimezone(String writerTimezoneId) { - this.writerTimezone = writerTimezoneId; - return this; - } - - public StreamReaderBuilder skipCorrupt(boolean skipCorrupt) { - this.skipCorrupt = skipCorrupt; + public StreamReaderBuilder setContext(TreeReaderFactory.Context context) { + this.context = context; return this; } @@ -175,7 +170,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { boolean isFileCompressed = compressionCodec != null; return new TimestampStreamReader(columnIndex, present, data, nanos, - isFileCompressed, columnEncoding, skipCorrupt, writerTimezone); + isFileCompressed, columnEncoding, context); } } @@ -196,8 +191,9 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private StringStreamReader(int columnId, SettableUncompressedStream present, SettableUncompressedStream data, SettableUncompressedStream length, SettableUncompressedStream dictionary, - boolean isFileCompressed, OrcProto.ColumnEncoding encoding) throws IOException { - super(columnId, present, data, length, dictionary, encoding); + boolean isFileCompressed, OrcProto.ColumnEncoding encoding, + TreeReaderFactory.Context context) throws IOException { + super(columnId, present, data, length, dictionary, encoding, context); this._isDictionaryEncoding = dictionary != null; this._isFileCompressed = isFileCompressed; this._presentStream = present; @@ -288,6 +284,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private ColumnStreamData lengthStream; private CompressionCodec compressionCodec; private OrcProto.ColumnEncoding columnEncoding; + private TreeReaderFactory.Context context; public StreamReaderBuilder setColumnIndex(int columnIndex) { @@ -325,6 +322,11 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { return this; } + public StreamReaderBuilder setContext(TreeReaderFactory.Context context) { + this.context = context; + return this; + } + public StringStreamReader build() throws IOException { SettableUncompressedStream present = StreamUtils .createSettableUncompressedStream(OrcProto.Stream.Kind.PRESENT.name(), @@ -343,7 +345,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { boolean isFileCompressed = compressionCodec != null; return new StringStreamReader(columnIndex, present, data, length, dictionary, - isFileCompressed, columnEncoding); + isFileCompressed, columnEncoding, context); } } @@ -360,8 +362,9 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private ShortStreamReader(int columnId, SettableUncompressedStream present, SettableUncompressedStream data, boolean isFileCompressed, - OrcProto.ColumnEncoding encoding) throws IOException { - super(columnId, present, data, encoding); + OrcProto.ColumnEncoding encoding, + TreeReaderFactory.Context context) throws IOException { + super(columnId, present, data, encoding, context); this.isFileCompressed = isFileCompressed; this._presentStream = present; this._dataStream = data; @@ -404,7 +407,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private ColumnStreamData dataStream; private CompressionCodec compressionCodec; private OrcProto.ColumnEncoding columnEncoding; - + private TreeReaderFactory.Context context; public StreamReaderBuilder setColumnIndex(int columnIndex) { this.columnIndex = columnIndex; @@ -431,6 +434,11 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { return this; } + public StreamReaderBuilder setContext(TreeReaderFactory.Context context) { + this.context = context; + return this; + } + public ShortStreamReader build() throws IOException { SettableUncompressedStream present = StreamUtils .createSettableUncompressedStream(OrcProto.Stream.Kind.PRESENT.name(), @@ -442,7 +450,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { boolean isFileCompressed = compressionCodec != null; return new ShortStreamReader(columnIndex, present, data, isFileCompressed, - columnEncoding); + columnEncoding, context); } } @@ -458,8 +466,9 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private LongStreamReader(int columnId, SettableUncompressedStream present, SettableUncompressedStream data, boolean isFileCompressed, - OrcProto.ColumnEncoding encoding, boolean skipCorrupt) throws IOException { - super(columnId, present, data, encoding, skipCorrupt); + OrcProto.ColumnEncoding encoding, TreeReaderFactory.Context context + ) throws IOException { + super(columnId, present, data, encoding, context); this._isFileCompressed = isFileCompressed; this._presentStream = present; this._dataStream = data; @@ -502,7 +511,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private ColumnStreamData dataStream; private CompressionCodec compressionCodec; private OrcProto.ColumnEncoding columnEncoding; - private boolean skipCorrupt; + private TreeReaderFactory.Context context; public StreamReaderBuilder setColumnIndex(int columnIndex) { @@ -530,8 +539,8 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { return this; } - public StreamReaderBuilder skipCorrupt(boolean skipCorrupt) { - this.skipCorrupt = skipCorrupt; + public StreamReaderBuilder setContext(TreeReaderFactory.Context context) { + this.context = context; return this; } @@ -546,7 +555,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { boolean isFileCompressed = compressionCodec != null; return new LongStreamReader(columnIndex, present, data, isFileCompressed, - columnEncoding, skipCorrupt); + columnEncoding, context); } } @@ -562,8 +571,9 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private IntStreamReader(int columnId, SettableUncompressedStream present, SettableUncompressedStream data, boolean isFileCompressed, - OrcProto.ColumnEncoding encoding) throws IOException { - super(columnId, present, data, encoding); + OrcProto.ColumnEncoding encoding, TreeReaderFactory.Context context + ) throws IOException { + super(columnId, present, data, encoding, context); this._isFileCompressed = isFileCompressed; this._dataStream = data; this._presentStream = present; @@ -606,7 +616,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private ColumnStreamData dataStream; private CompressionCodec compressionCodec; private OrcProto.ColumnEncoding columnEncoding; - + private TreeReaderFactory.Context context; public StreamReaderBuilder setColumnIndex(int columnIndex) { this.columnIndex = columnIndex; @@ -633,6 +643,11 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { return this; } + public StreamReaderBuilder setContext(TreeReaderFactory.Context context) { + this.context = context; + return this; + } + public IntStreamReader build() throws IOException { SettableUncompressedStream present = StreamUtils .createSettableUncompressedStream(OrcProto.Stream.Kind.PRESENT.name(), @@ -644,7 +659,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { boolean isFileCompressed = compressionCodec != null; return new IntStreamReader(columnIndex, present, data, isFileCompressed, - columnEncoding); + columnEncoding, context); } } @@ -794,7 +809,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private ColumnStreamData presentStream; private ColumnStreamData dataStream; private CompressionCodec compressionCodec; - + private TreeReaderFactory.Context context; public StreamReaderBuilder setColumnIndex(int columnIndex) { this.columnIndex = columnIndex; @@ -816,6 +831,11 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { return this; } + public StreamReaderBuilder setContext(TreeReaderFactory.Context context) { + this.context = context; + return this; + } + public DoubleStreamReader build() throws IOException { SettableUncompressedStream present = StreamUtils .createSettableUncompressedStream(OrcProto.Stream.Kind.PRESENT.name(), @@ -845,8 +865,9 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { SettableUncompressedStream presentStream, SettableUncompressedStream valueStream, SettableUncompressedStream scaleStream, boolean isFileCompressed, - OrcProto.ColumnEncoding encoding) throws IOException { - super(columnId, precision, scale, presentStream, valueStream, scaleStream, encoding); + OrcProto.ColumnEncoding encoding, TreeReaderFactory.Context context + ) throws IOException { + super(columnId, presentStream, valueStream, scaleStream, encoding, context); this._isFileCompressed = isFileCompressed; this._presentStream = presentStream; this._valueStream = valueStream; @@ -903,6 +924,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private int precision; private CompressionCodec compressionCodec; private OrcProto.ColumnEncoding columnEncoding; + private TreeReaderFactory.Context context; public StreamReaderBuilder setColumnIndex(int columnIndex) { @@ -920,6 +942,11 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { return this; } + public StreamReaderBuilder setContext(TreeReaderFactory.Context context) { + this.context = context; + return this; + } + public StreamReaderBuilder setPresentStream(ColumnStreamData presentStream) { this.presentStream = presentStream; return this; @@ -958,7 +985,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { boolean isFileCompressed = compressionCodec != null; return new DecimalStreamReader(columnIndex, precision, scale, presentInStream, valueInStream, - scaleInStream, isFileCompressed, columnEncoding); + scaleInStream, isFileCompressed, columnEncoding, context); } } @@ -974,8 +1001,9 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private DateStreamReader(int columnId, SettableUncompressedStream present, SettableUncompressedStream data, boolean isFileCompressed, - OrcProto.ColumnEncoding encoding) throws IOException { - super(columnId, present, data, encoding); + OrcProto.ColumnEncoding encoding, TreeReaderFactory.Context context + ) throws IOException { + super(columnId, present, data, encoding, context); this.isFileCompressed = isFileCompressed; this._presentStream = present; this._dataStream = data; @@ -1018,6 +1046,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private ColumnStreamData dataStream; private CompressionCodec compressionCodec; private OrcProto.ColumnEncoding columnEncoding; + private TreeReaderFactory.Context context; public StreamReaderBuilder setColumnIndex(int columnIndex) { this.columnIndex = columnIndex; @@ -1039,6 +1068,11 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { return this; } + public StreamReaderBuilder setContext(TreeReaderFactory.Context context) { + this.context = context; + return this; + } + public StreamReaderBuilder setColumnEncoding(OrcProto.ColumnEncoding encoding) { this.columnEncoding = encoding; return this; @@ -1056,7 +1090,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { boolean isFileCompressed = compressionCodec != null; return new DateStreamReader(columnIndex, present, data, isFileCompressed, - columnEncoding); + columnEncoding, context); } } @@ -1514,8 +1548,8 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private BinaryStreamReader(int columnId, SettableUncompressedStream present, SettableUncompressedStream data, SettableUncompressedStream length, boolean isFileCompressed, - OrcProto.ColumnEncoding encoding) throws IOException { - super(columnId, present, data, length, encoding); + OrcProto.ColumnEncoding encoding, TreeReaderFactory.Context context) throws IOException { + super(columnId, present, data, length, encoding, context); this._isFileCompressed = isFileCompressed; this._presentStream = present; this._dataStream = data; @@ -1570,7 +1604,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private ColumnStreamData lengthStream; private CompressionCodec compressionCodec; private OrcProto.ColumnEncoding columnEncoding; - + private TreeReaderFactory.Context context; public StreamReaderBuilder setColumnIndex(int columnIndex) { this.columnIndex = columnIndex; @@ -1602,6 +1636,11 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { return this; } + public StreamReaderBuilder setContext(TreeReaderFactory.Context context) { + this.context = context; + return this; + } + public BinaryStreamReader build() throws IOException { SettableUncompressedStream present = StreamUtils.createSettableUncompressedStream( OrcProto.Stream.Kind.PRESENT.name(), presentStream); @@ -1614,7 +1653,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { boolean isFileCompressed = compressionCodec != null; return new BinaryStreamReader(columnIndex, present, data, length, isFileCompressed, - columnEncoding); + columnEncoding, context); } } @@ -1715,7 +1754,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { public static StructTreeReader createRootTreeReader(TypeDescription schema, List<OrcProto.ColumnEncoding> encodings, EncodedColumnBatch<OrcBatchKey> batch, - CompressionCodec codec, boolean skipCorrupt, String tz, int[] columnMapping) + CompressionCodec codec, TreeReaderFactory.Context context, int[] columnMapping) throws IOException { if (schema.getCategory() != Category.STRUCT) { throw new AssertionError("Schema is not a struct: " + schema); @@ -1737,7 +1776,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { for (int schemaChildIx = 0, inclChildIx = -1; schemaChildIx < childCount; ++schemaChildIx) { if (!batch.hasData(children.get(schemaChildIx).getId())) continue; childReaders[++inclChildIx] = createEncodedTreeReader( - schema.getChildren().get(schemaChildIx), encodings, batch, codec, skipCorrupt, tz); + schema.getChildren().get(schemaChildIx), encodings, batch, codec, context); columnMapping[inclChildIx] = schemaChildIx; } return StructStreamReader.builder() @@ -1745,13 +1784,14 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { .setCompressionCodec(codec) .setColumnEncoding(encodings.get(0)) .setChildReaders(childReaders) + .setContext(context) .build(); } private static TreeReader createEncodedTreeReader(TypeDescription schema, List<OrcProto.ColumnEncoding> encodings, EncodedColumnBatch<OrcBatchKey> batch, - CompressionCodec codec, boolean skipCorrupt, String tz) throws IOException { + CompressionCodec codec, TreeReaderFactory.Context context) throws IOException { int columnIndex = schema.getId(); ColumnStreamData[] streamBuffers = batch.getColumnData(columnIndex); @@ -1775,7 +1815,8 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { LOG.debug("columnIndex: {} columnType: {} streamBuffers.length: {} columnEncoding: {}" + " present: {} data: {} dictionary: {} lengths: {} secondary: {} tz: {}", columnIndex, schema, streamBuffers.length, columnEncoding, present != null, - data, dictionary != null, lengths != null, secondary != null, tz); + data, dictionary != null, lengths != null, secondary != null, + context.getWriterTimezone()); } switch (schema.getCategory()) { case BINARY: @@ -1793,11 +1834,11 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { case TIMESTAMP: case DATE: return getPrimitiveTreeReaders(columnIndex, schema, codec, columnEncoding, - present, data, dictionary, lengths, secondary, skipCorrupt, tz); + present, data, dictionary, lengths, secondary, context); case LIST: TypeDescription elementType = schema.getChildren().get(0); TreeReader elementReader = createEncodedTreeReader( - elementType, encodings, batch, codec, skipCorrupt, tz); + elementType, encodings, batch, codec, context); return ListStreamReader.builder() .setColumnIndex(columnIndex) .setColumnEncoding(columnEncoding) @@ -1805,14 +1846,15 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { .setPresentStream(present) .setLengthStream(lengths) .setElementReader(elementReader) + .setContext(context) .build(); case MAP: TypeDescription keyType = schema.getChildren().get(0); TypeDescription valueType = schema.getChildren().get(1); TreeReader keyReader = createEncodedTreeReader( - keyType, encodings, batch, codec, skipCorrupt, tz); + keyType, encodings, batch, codec, context); TreeReader valueReader = createEncodedTreeReader( - valueType, encodings, batch, codec, skipCorrupt, tz); + valueType, encodings, batch, codec, context); return MapStreamReader.builder() .setColumnIndex(columnIndex) .setColumnEncoding(columnEncoding) @@ -1821,6 +1863,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { .setLengthStream(lengths) .setKeyReader(keyReader) .setValueReader(valueReader) + .setContext(context) .build(); case STRUCT: { int childCount = schema.getChildren().size(); @@ -1828,7 +1871,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { for (int i = 0; i < childCount; i++) { TypeDescription childType = schema.getChildren().get(i); childReaders[i] = createEncodedTreeReader( - childType, encodings, batch, codec, skipCorrupt, tz); + childType, encodings, batch, codec, context); } return StructStreamReader.builder() .setColumnIndex(columnIndex) @@ -1836,6 +1879,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { .setColumnEncoding(columnEncoding) .setPresentStream(present) .setChildReaders(childReaders) + .setContext(context) .build(); } case UNION: { @@ -1844,7 +1888,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { for (int i = 0; i < childCount; i++) { TypeDescription childType = schema.getChildren().get(i); childReaders[i] = createEncodedTreeReader( - childType, encodings, batch, codec, skipCorrupt, tz); + childType, encodings, batch, codec, context); } return UnionStreamReader.builder() .setColumnIndex(columnIndex) @@ -1853,6 +1897,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { .setPresentStream(present) .setDataStream(data) .setChildReaders(childReaders) + .setContext(context) .build(); } default: @@ -1863,7 +1908,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private static TreeReader getPrimitiveTreeReaders(final int columnIndex, TypeDescription columnType, CompressionCodec codec, OrcProto.ColumnEncoding columnEncoding, ColumnStreamData present, ColumnStreamData data, ColumnStreamData dictionary, - ColumnStreamData lengths, ColumnStreamData secondary, boolean skipCorrupt, String tz) + ColumnStreamData lengths, ColumnStreamData secondary, TreeReaderFactory.Context context) throws IOException { switch (columnType.getCategory()) { case BINARY: @@ -1874,6 +1919,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { .setLengthStream(lengths) .setCompressionCodec(codec) .setColumnEncoding(columnEncoding) + .setContext(context) .build(); case BOOLEAN: return BooleanStreamReader.builder() @@ -1896,6 +1942,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { .setDataStream(data) .setCompressionCodec(codec) .setColumnEncoding(columnEncoding) + .setContext(context) .build(); case INT: return IntStreamReader.builder() @@ -1904,6 +1951,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { .setDataStream(data) .setCompressionCodec(codec) .setColumnEncoding(columnEncoding) + .setContext(context) .build(); case LONG: return LongStreamReader.builder() @@ -1912,7 +1960,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { .setDataStream(data) .setCompressionCodec(codec) .setColumnEncoding(columnEncoding) - .skipCorrupt(skipCorrupt) + .setContext(context) .build(); case FLOAT: return FloatStreamReader.builder() @@ -1970,6 +2018,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { .setScaleStream(secondary) .setCompressionCodec(codec) .setColumnEncoding(columnEncoding) + .setContext(context) .build(); case TIMESTAMP: return TimestampStreamReader.builder() @@ -1979,8 +2028,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { .setNanosStream(secondary) .setCompressionCodec(codec) .setColumnEncoding(columnEncoding) - .setWriterTimezone(tz) - .skipCorrupt(skipCorrupt) + .setContext(context) .build(); case DATE: return DateStreamReader.builder() @@ -1989,6 +2037,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { .setDataStream(data) .setCompressionCodec(codec) .setColumnEncoding(columnEncoding) + .setContext(context) .build(); default: throw new AssertionError("Not a primitive category: " + columnType.getCategory()); @@ -2003,8 +2052,9 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { public ListStreamReader(final int columnIndex, final SettableUncompressedStream present, final SettableUncompressedStream lengthStream, final OrcProto.ColumnEncoding columnEncoding, final boolean isFileCompressed, - final TreeReader elementReader) throws IOException { - super(columnIndex, present, lengthStream, columnEncoding, elementReader); + final TreeReader elementReader, + TreeReaderFactory.Context context) throws IOException { + super(columnIndex, present, context, lengthStream, columnEncoding, elementReader); this._isFileCompressed = isFileCompressed; this._presentStream = present; this._lengthStream = lengthStream; @@ -2062,7 +2112,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private CompressionCodec compressionCodec; private OrcProto.ColumnEncoding columnEncoding; private TreeReader elementReader; - + private TreeReaderFactory.Context context; public ListStreamReader.StreamReaderBuilder setColumnIndex(int columnIndex) { this.columnIndex = columnIndex; @@ -2094,6 +2144,11 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { return this; } + public ListStreamReader.StreamReaderBuilder setContext(TreeReaderFactory.Context context) { + this.context = context; + return this; + } + public ListStreamReader build() throws IOException { SettableUncompressedStream present = StreamUtils .createSettableUncompressedStream(OrcProto.Stream.Kind.PRESENT.name(), @@ -2105,7 +2160,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { boolean isFileCompressed = compressionCodec != null; return new ListStreamReader(columnIndex, present, length, columnEncoding, isFileCompressed, - elementReader); + elementReader, context); } } @@ -2122,8 +2177,9 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { public MapStreamReader(final int columnIndex, final SettableUncompressedStream present, final SettableUncompressedStream lengthStream, final OrcProto.ColumnEncoding columnEncoding, final boolean isFileCompressed, - final TreeReader keyReader, final TreeReader valueReader) throws IOException { - super(columnIndex, present, lengthStream, columnEncoding, keyReader, valueReader); + final TreeReader keyReader, final TreeReader valueReader, + TreeReaderFactory.Context context) throws IOException { + super(columnIndex, present, context, lengthStream, columnEncoding, keyReader, valueReader); this._isFileCompressed = isFileCompressed; this._presentStream = present; this._lengthStream = lengthStream; @@ -2188,7 +2244,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private OrcProto.ColumnEncoding columnEncoding; private TreeReader keyReader; private TreeReader valueReader; - + private TreeReaderFactory.Context context; public MapStreamReader.StreamReaderBuilder setColumnIndex(int columnIndex) { this.columnIndex = columnIndex; @@ -2225,6 +2281,11 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { return this; } + public MapStreamReader.StreamReaderBuilder setContext(TreeReaderFactory.Context context) { + this.context = context; + return this; + } + public MapStreamReader build() throws IOException { SettableUncompressedStream present = StreamUtils .createSettableUncompressedStream(OrcProto.Stream.Kind.PRESENT.name(), @@ -2236,7 +2297,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { boolean isFileCompressed = compressionCodec != null; return new MapStreamReader(columnIndex, present, length, columnEncoding, isFileCompressed, - keyReader, valueReader); + keyReader, valueReader, context); } } @@ -2253,8 +2314,8 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { public StructStreamReader(final int columnIndex, final SettableUncompressedStream present, final OrcProto.ColumnEncoding columnEncoding, final boolean isFileCompressed, - final TreeReader[] childReaders) throws IOException { - super(columnIndex, present, columnEncoding, childReaders); + final TreeReader[] childReaders, TreeReaderFactory.Context context) throws IOException { + super(columnIndex, present, context, columnEncoding, childReaders); this._isFileCompressed = isFileCompressed; this._presentStream = present; } @@ -2303,7 +2364,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private CompressionCodec compressionCodec; private OrcProto.ColumnEncoding columnEncoding; private TreeReader[] childReaders; - + private TreeReaderFactory.Context context; public StructStreamReader.StreamReaderBuilder setColumnIndex(int columnIndex) { this.columnIndex = columnIndex; @@ -2330,6 +2391,11 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { return this; } + public StructStreamReader.StreamReaderBuilder setContext(TreeReaderFactory.Context context) { + this.context = context; + return this; + } + public StructStreamReader build() throws IOException { SettableUncompressedStream present = StreamUtils .createSettableUncompressedStream(OrcProto.Stream.Kind.PRESENT.name(), @@ -2337,7 +2403,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { boolean isFileCompressed = compressionCodec != null; return new StructStreamReader(columnIndex, present, columnEncoding, isFileCompressed, - childReaders); + childReaders, context); } } @@ -2354,8 +2420,8 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { public UnionStreamReader(final int columnIndex, final SettableUncompressedStream present, final SettableUncompressedStream dataStream, final OrcProto.ColumnEncoding columnEncoding, final boolean isFileCompressed, - final TreeReader[] childReaders) throws IOException { - super(columnIndex, present, columnEncoding, childReaders); + final TreeReader[] childReaders, TreeReaderFactory.Context context) throws IOException { + super(columnIndex, present, context, columnEncoding, childReaders); this._isFileCompressed = isFileCompressed; this._presentStream = present; this._dataStream = dataStream; @@ -2420,7 +2486,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { private CompressionCodec compressionCodec; private OrcProto.ColumnEncoding columnEncoding; private TreeReader[] childReaders; - + private TreeReaderFactory.Context context; public UnionStreamReader.StreamReaderBuilder setColumnIndex(int columnIndex) { this.columnIndex = columnIndex; @@ -2452,6 +2518,11 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { return this; } + public UnionStreamReader.StreamReaderBuilder setContext(TreeReaderFactory.Context context) { + this.context = context; + return this; + } + public UnionStreamReader build() throws IOException { SettableUncompressedStream present = StreamUtils.createSettableUncompressedStream( OrcProto.Stream.Kind.PRESENT.name(), presentStream); @@ -2461,7 +2532,7 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory { boolean isFileCompressed = compressionCodec != null; return new UnionStreamReader(columnIndex, present, data, - columnEncoding, isFileCompressed, childReaders); + columnEncoding, isFileCompressed, childReaders, context); } } http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java index eab1886..0ffa182 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Schema; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.orc.OrcConf; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,7 +53,19 @@ public class SetProcessor implements CommandProcessor { private static final Logger LOG = LoggerFactory.getLogger(SetProcessor.class); private static final String prefix = "set: "; - private static final Set<String> removedConfigs = Sets.newHashSet("hive.mapred.supports.subdirectories","hive.enforce.sorting","hive.enforce.bucketing", "hive.outerjoin.supports.filters"); + private static final Set<String> removedConfigs = + Sets.newHashSet("hive.mapred.supports.subdirectories", + "hive.enforce.sorting","hive.enforce.bucketing", + "hive.outerjoin.supports.filters"); + // Allow the user to set the ORC properties without getting an error. + static { + for(OrcConf var: OrcConf.values()) { + String name = var.getHiveConfName(); + if (name != null && name.startsWith("hive.")) { + removedConfigs.add(name); + } + } + } private static final String[] PASSWORD_STRINGS = new String[] {"password", "paswd", "pswd"}; http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index aa23df8..4fa0651 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -121,8 +121,9 @@ import org.apache.hadoop.mapred.RecordWriter; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Progressable; -import org.apache.orc.*; -import org.apache.orc.impl.PhysicalFsWriter; +import org.apache.orc.OrcConf; +import org.apache.orc.OrcProto; +import org.apache.orc.TypeDescription; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -2116,6 +2117,26 @@ public class TestInputOutputFormat { } /** + * Set the mockblocks for a file after it has been written + * @param path the path to modify + * @param conf the configuration + * @param blocks the blocks to uses + * @throws IOException + */ + static void setBlocks(Path path, Configuration conf, + MockBlock... blocks) throws IOException { + FileSystem mockFs = path.getFileSystem(conf); + MockOutputStream stream = (MockOutputStream) mockFs.create(path); + stream.setBlocks(blocks); + } + + static int getLength(Path path, Configuration conf) throws IOException { + FileSystem mockFs = path.getFileSystem(conf); + FileStatus stat = mockFs.getFileStatus(path); + return (int) stat.getLen(); + } + + /** * Test vectorization, non-acid, non-combine. * @throws Exception */ @@ -2132,15 +2153,16 @@ public class TestInputOutputFormat { "vectorization", inspector, true, 1); // write the orc file to the mock file system + Path path = new Path(conf.get("mapred.input.dir") + "/0_0"); Writer writer = - OrcFile.createWriter(new Path(conf.get("mapred.input.dir") + "/0_0"), + OrcFile.createWriter(path, OrcFile.writerOptions(conf).blockPadding(false) .bufferSize(1024).inspector(inspector)); for(int i=0; i < 10; ++i) { writer.addRow(new MyRow(i, 2*i)); } writer.close(); - getStreamFromWriter(writer).setBlocks(new MockBlock("host0", "host1")); + setBlocks(path, conf, new MockBlock("host0", "host1")); // call getsplits HiveInputFormat<?,?> inputFormat = @@ -2161,11 +2183,6 @@ public class TestInputOutputFormat { assertEquals(false, reader.next(key, value)); } - private MockOutputStream getStreamFromWriter(Writer writer) throws IOException { - PhysicalFsWriter pfr = (PhysicalFsWriter)((WriterImpl) writer).getPhysicalWriter(); - return (MockOutputStream)pfr.getStream(); - } - /** * Test vectorization, non-acid, non-combine. * @throws Exception @@ -2183,15 +2200,16 @@ public class TestInputOutputFormat { "vectorBuckets", inspector, true, 1); // write the orc file to the mock file system + Path path = new Path(conf.get("mapred.input.dir") + "/0_0"); Writer writer = - OrcFile.createWriter(new Path(conf.get("mapred.input.dir") + "/0_0"), + OrcFile.createWriter(path, OrcFile.writerOptions(conf).blockPadding(false) .bufferSize(1024).inspector(inspector)); for(int i=0; i < 10; ++i) { writer.addRow(new MyRow(i, 2*i)); } writer.close(); - getStreamFromWriter(writer).setBlocks(new MockBlock("host0", "host1")); + setBlocks(path, conf, new MockBlock("host0", "host1")); // call getsplits conf.setInt(hive_metastoreConstants.BUCKET_COUNT, 3); @@ -2229,9 +2247,9 @@ public class TestInputOutputFormat { BigRow row = new BigRow(i); writer.insert(10, row); } - WriterImpl baseWriter = (WriterImpl) writer.getWriter(); writer.close(false); - getStreamFromWriter(baseWriter).setBlocks(new MockBlock("host0", "host1")); + Path path = new Path("mock:/vectorizationAcid/p=0/base_0000010/bucket_00000"); + setBlocks(path, conf, new MockBlock("host0", "host1")); // call getsplits HiveInputFormat<?, ?> inputFormat = @@ -2309,9 +2327,10 @@ public class TestInputOutputFormat { writer.addRow(new MyRow(i, 2*i)); } writer.close(); - MockOutputStream outputStream = getStreamFromWriter(writer); - outputStream.setBlocks(new MockBlock("host0", "host1")); - int length0 = outputStream.file.length; + Path path = new Path("mock:/combination/p=0/0_0"); + setBlocks(path, conf, new MockBlock("host0", "host1")); + MockFileSystem mockFs = (MockFileSystem) partDir.getFileSystem(conf); + int length0 = getLength(path, conf); writer = OrcFile.createWriter(new Path(partDir, "1_0"), OrcFile.writerOptions(conf).blockPadding(false) @@ -2320,8 +2339,8 @@ public class TestInputOutputFormat { writer.addRow(new MyRow(i, 2*i)); } writer.close(); - outputStream = getStreamFromWriter(writer); - outputStream.setBlocks(new MockBlock("host1", "host2")); + Path path1 = new Path("mock:/combination/p=0/1_0"); + setBlocks(path1, conf, new MockBlock("host1", "host2")); // call getsplits HiveInputFormat<?,?> inputFormat = @@ -2336,7 +2355,7 @@ public class TestInputOutputFormat { assertEquals(partDir.toString() + "/0_0", split.getPath(0).toString()); assertEquals(partDir.toString() + "/1_0", split.getPath(1).toString()); assertEquals(length0, split.getLength(0)); - assertEquals(outputStream.file.length, split.getLength(1)); + assertEquals(getLength(path1, conf), split.getLength(1)); assertEquals(0, split.getOffset(0)); assertEquals(0, split.getOffset(1)); // hadoop-1 gets 3 and hadoop-2 gets 0. *sigh* @@ -2384,11 +2403,11 @@ public class TestInputOutputFormat { for(int i=0; i < 10; ++i) { writer.insert(10, new MyRow(i, 2 * i)); } - WriterImpl baseWriter = (WriterImpl) writer.getWriter(); writer.close(false); - MockOutputStream outputStream = getStreamFromWriter(baseWriter); - outputStream.setBlocks(new MockBlock("host1", "host2")); + // base file + Path base0 = new Path("mock:/combinationAcid/p=0/base_0000010/bucket_00000"); + setBlocks(base0, conf, new MockBlock("host1", "host2")); // write a delta file in partition 0 writer = new OrcRecordUpdater(partDir[0], @@ -2397,23 +2416,22 @@ public class TestInputOutputFormat { for(int i=10; i < 20; ++i) { writer.insert(10, new MyRow(i, 2*i)); } - WriterImpl deltaWriter = (WriterImpl) writer.getWriter(); - outputStream = getStreamFromWriter(deltaWriter); writer.close(false); - outputStream.setBlocks(new MockBlock("host1", "host2")); + Path base1 = new Path("mock:/combinationAcid/p=0/base_0000010/bucket_00001"); + setBlocks(base1, conf, new MockBlock("host1", "host2")); // write three files in partition 1 for(int bucket=0; bucket < BUCKETS; ++bucket) { + Path path = new Path(partDir[1], "00000" + bucket + "_0"); Writer orc = OrcFile.createWriter( - new Path(partDir[1], "00000" + bucket + "_0"), + path, OrcFile.writerOptions(conf) .blockPadding(false) .bufferSize(1024) .inspector(inspector)); orc.addRow(new MyRow(1, 2)); - outputStream = getStreamFromWriter(orc); orc.close(); - outputStream.setBlocks(new MockBlock("host3", "host4")); + setBlocks(path, conf, new MockBlock("host3", "host4")); } // call getsplits @@ -3633,13 +3651,13 @@ public class TestInputOutputFormat { } // Save the conf variable values so that they can be restored later. - long oldDefaultStripeSize = conf.getLong(HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE.varname, -1L); + long oldDefaultStripeSize = conf.getLong(OrcConf.STRIPE_SIZE.getHiveConfName(), -1L); long oldMaxSplitSize = conf.getLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, -1L); // Set the conf variable values for this test. long newStripeSize = 10000L; // 10000 bytes per stripe long newMaxSplitSize = 100L; // 1024 bytes per split - conf.setLong(HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE.varname, newStripeSize); + conf.setLong(OrcConf.STRIPE_SIZE.getHiveConfName(), newStripeSize); conf.setLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, newMaxSplitSize); AbstractSerDe serde = new OrcSerde(); @@ -3681,10 +3699,10 @@ public class TestInputOutputFormat { // Reset the conf variable values that we changed for this test. if (oldDefaultStripeSize != -1L) { - conf.setLong(HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE.varname, oldDefaultStripeSize); + conf.setLong(OrcConf.STRIPE_SIZE.getHiveConfName(), oldDefaultStripeSize); } else { // this means that nothing was set for default stripe size previously, so we should unset it. - conf.unset(HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE.varname); + conf.unset(OrcConf.STRIPE_SIZE.getHiveConfName()); } if (oldMaxSplitSize != -1L) { conf.setLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, oldMaxSplitSize); http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java index c7c2c9d..84e83df 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java @@ -85,6 +85,7 @@ import org.apache.orc.ColumnStatistics; import org.apache.orc.DecimalColumnStatistics; import org.apache.orc.DoubleColumnStatistics; import org.apache.orc.IntegerColumnStatistics; +import org.apache.orc.OrcConf; import org.apache.orc.impl.MemoryManager; import org.apache.orc.OrcProto; @@ -247,7 +248,7 @@ public class TestOrcFile { public void openFileSystem () throws Exception { conf = new Configuration(); if(zeroCopy) { - conf.setBoolean(HiveConf.ConfVars.HIVE_ORC_ZEROCOPY.varname, zeroCopy); + conf.setBoolean(OrcConf.USE_ZEROCOPY.getHiveConfName(), zeroCopy); } fs = FileSystem.getLocal(conf); testFilePath = new Path(workDir, "TestOrcFile." + @@ -1817,7 +1818,7 @@ public class TestOrcFile { assertEquals(COUNT, reader.getNumberOfRows()); /* enable zero copy record reader */ Configuration conf = new Configuration(); - HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_ORC_ZEROCOPY, true); + conf.setBoolean(OrcConf.USE_ZEROCOPY.getHiveConfName(), true); RecordReader rows = reader.rows(); /* all tests are identical to the other seek() tests */ OrcStruct row = null; http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java index 4656ab2..6bf1312 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java @@ -76,7 +76,7 @@ public class TestVectorizedOrcAcidRowBatchReader { } static String getColumnNamesProperty() { - return "x"; + return "field"; } static String getColumnTypesProperty() { return "bigint"; http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/ql/src/test/queries/clientpositive/orc_remove_cols.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_remove_cols.q b/ql/src/test/queries/clientpositive/orc_remove_cols.q index fdae064..c3c95f3 100644 --- a/ql/src/test/queries/clientpositive/orc_remove_cols.q +++ b/ql/src/test/queries/clientpositive/orc_remove_cols.q @@ -9,7 +9,7 @@ insert into table orc_partitioned partition (ds = 'tomorrow') select cint, cstri -- Use the old change the SERDE trick to avoid ORC DDL checks... and remove a column on the end. ALTER TABLE orc_partitioned SET SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'; -ALTER TABLE orc_partitioned REPLACE COLUMNS (cint int); +ALTER TABLE orc_partitioned REPLACE COLUMNS (a int); ALTER TABLE orc_partitioned SET SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'; SELECT * FROM orc_partitioned WHERE ds = 'today'; http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/ql/src/test/queries/clientpositive/orc_schema_evolution.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_schema_evolution.q b/ql/src/test/queries/clientpositive/orc_schema_evolution.q index c78cfe8..aa6fc58 100644 --- a/ql/src/test/queries/clientpositive/orc_schema_evolution.q +++ b/ql/src/test/queries/clientpositive/orc_schema_evolution.q @@ -21,21 +21,21 @@ select sum(hash(*)) from src_orc; insert overwrite table src_orc2 select * from src; select sum(hash(*)) from src_orc2; -alter table src_orc2 replace columns (k smallint, v string); +alter table src_orc2 replace columns (key smallint, val string); select sum(hash(*)) from src_orc2; -alter table src_orc2 replace columns (k int, v string); +alter table src_orc2 replace columns (key int, val string); select sum(hash(*)) from src_orc2; -alter table src_orc2 replace columns (k bigint, v string); +alter table src_orc2 replace columns (key bigint, val string); select sum(hash(*)) from src_orc2; -alter table src_orc2 replace columns (k bigint, v string, z int); +alter table src_orc2 replace columns (key bigint, val string, z int); select sum(hash(*)) from src_orc2; -alter table src_orc2 replace columns (k bigint, v string, z bigint); +alter table src_orc2 replace columns (key bigint, val string, z bigint); select sum(hash(*)) from src_orc2; -alter table src_orc2 replace columns (k bigint, v string, z bigint, y float); +alter table src_orc2 replace columns (key bigint, val string, z bigint, y float); select sum(hash(*)) from src_orc2; http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out b/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out index b798e82..7d01c69 100644 --- a/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out @@ -99,7 +99,7 @@ STAGE PLANS: serialization.ddl struct src_orc { string key, string value, string ds, string hr} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - totalSize 633 + totalSize 626 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.NullStructSerDe @@ -120,7 +120,7 @@ STAGE PLANS: serialization.ddl struct src_orc { string key, string value, string ds, string hr} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 633 + totalSize 626 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.src_orc
