[GitHub] carbondata issue #2654: [CARBONDATA-2896] Adaptive Encoding for Primitive da...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2654 Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/96/ ---
[GitHub] carbondata issue #2654: [CARBONDATA-2896] Adaptive Encoding for Primitive da...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2654 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8167/ ---
[GitHub] carbondata issue #2654: [CARBONDATA-2896] Adaptive Encoding for Primitive da...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2654 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6478/ ---
[GitHub] carbondata issue #2673: [WIP] Test Carbonstore
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2673 Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/95/ ---
[GitHub] carbondata pull request #2642: [CARBONDATA-2532][Integration] Carbon to supp...
Github user sujith71955 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2642#discussion_r213905161 --- Diff: integration/spark-datasource/src/main/spark2.3/org/apache/spark/sql/CarbonVectorProxy.java --- @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql; + +import java.math.BigInteger; + +import org.apache.spark.memory.MemoryMode; +import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.execution.vectorized.Dictionary; +import org.apache.spark.sql.execution.vectorized.WritableColumnVector; +import org.apache.spark.sql.types.*; +import org.apache.spark.sql.vectorized.ColumnarBatch; +import org.apache.spark.unsafe.types.CalendarInterval; +import org.apache.spark.unsafe.types.UTF8String; + +/** + * Adapter class which handles the columnar vector reading of the carbondata + * based on the spark ColumnVector and ColumnarBatch API. This proxy class + * handles the complexity of spark 2.3 version related api changes since + * spark ColumnVector and ColumnarBatch interfaces are still evolving. + */ +public class CarbonVectorProxy { + +private ColumnarBatch columnarBatch; +private WritableColumnVector[] columnVectors; + +/** + * Adapter class which handles the columnar vector reading of the carbondata + * based on the spark ColumnVector and ColumnarBatch API. This proxy class + * handles the complexity of spark 2.3 version related api changes since + * spark ColumnVector and ColumnarBatch interfaces are still evolving. + * + * @param memMode which represent the type onheap or offheap vector. + * @param rowNumrows number for vector reading + * @param structFileds, metadata related to current schema of table. + */ +public CarbonVectorProxy(MemoryMode memMode, int rowNum, StructField[] structFileds) { +columnVectors = ColumnVectorFactory +.getColumnVector(memMode, new StructType(structFileds), rowNum); +columnarBatch = new ColumnarBatch(columnVectors); +columnarBatch.setNumRows(rowNum); +} + +public CarbonVectorProxy(MemoryMode memMode, StructType outputSchema, int rowNum) { +columnVectors = ColumnVectorFactory +.getColumnVector(memMode, outputSchema, rowNum); +columnarBatch = new ColumnarBatch(columnVectors); +columnarBatch.setNumRows(rowNum); +} + +/** + * Returns the number of rows for read, including filtered rows. + */ +public int numRows() { +return columnarBatch.numRows(); +} + +public Object reserveDictionaryIds(int capacity, int ordinal) { +return columnVectors[ordinal].reserveDictionaryIds(capacity); +} + +/** + * This API will return a columnvector from a batch of column vector rows + * based on the ordinal + * + * @param ordinal + * @return + */ +public WritableColumnVector column(int ordinal) { +return (WritableColumnVector) columnarBatch.column(ordinal); +} + +public WritableColumnVector getColumnVector(int ordinal) { +return columnVectors[ordinal]; +} + +/** + * Resets this column for writing. The currently stored values are no longer accessible. + */ +public void reset() { +for (WritableColumnVector col : columnVectors) { +col.reset(); +} +} + +public void resetDictionaryIds(int ordinal) { +columnVectors[ordinal].getDictionaryIds().reset(); +} + +/** + * Returns the row in this batch at `rowId`. Returned row is reused across calls. + */ +public InternalRow getRow(int rowId) { +return columnarBatch.getRow(rowId); +} + + +/**
[GitHub] carbondata issue #2635: [CARBONDATA-2856][BloomDataMap] Fix bug in bloom ind...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2635 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/92/ ---
[GitHub] carbondata issue #2635: [CARBONDATA-2856][BloomDataMap] Fix bug in bloom ind...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2635 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8163/ ---
[GitHub] carbondata issue #2628: [CARBONDATA-2851][CARBONDATA-2852] Support zstd as c...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2628 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8165/ ---
[GitHub] carbondata pull request #2642: [CARBONDATA-2532][Integration] Carbon to supp...
Github user sandeep-katta commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2642#discussion_r213903286 --- Diff: integration/spark2/src/main/spark2.2/org/apache/spark/sql/CustomDeterministicExpression.scala --- @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.types.{DataType, StringType} + +/** + * Custom expression to override the deterministic property . + */ +case class CustomDeterministicExpression(nonDt: Expression ) extends Expression with Serializable{ --- End diff -- in 2.1 and 2.2 override def deterministic: Boolean = true in 2.3 override lazy val deterministic: Boolean = true ---
[GitHub] carbondata issue #2673: [WIP] Test Carbonstore
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2673 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6477/ ---
[GitHub] carbondata pull request #2642: [CARBONDATA-2532][Integration] Carbon to supp...
Github user sandeep-katta commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2642#discussion_r213902360 --- Diff: integration/spark-common/src/main/scala/org/apache/spark/sql/execution/streaming/CarbonAppendableStreamSink.scala --- @@ -122,7 +122,7 @@ class CarbonAppendableStreamSink( className = sparkSession.sessionState.conf.streamingFileCommitProtocolClass, jobId = batchId.toString, outputPath = fileLogPath, -isAppend = false) +false) --- End diff -- in 2.3 isAppend name is changed to dynamicPartitionOverwrite ---
[GitHub] carbondata issue #2628: [CARBONDATA-2851][CARBONDATA-2852] Support zstd as c...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2628 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6476/ ---
[GitHub] carbondata issue #2673: [WIP] Test Carbonstore
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2673 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8166/ ---
[GitHub] carbondata issue #2673: [WIP] Test Carbonstore
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2673 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6475/ ---
[GitHub] carbondata issue #2673: [WIP] Test Carbonstore
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2673 Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/93/ ---
[GitHub] carbondata issue #2673: [WIP] Test Carbonstore
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2673 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8164/ ---
[GitHub] carbondata issue #2628: [CARBONDATA-2851][CARBONDATA-2852] Support zstd as c...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2628 Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/94/ ---
[GitHub] carbondata issue #2635: [CARBONDATA-2856][BloomDataMap] Fix bug in bloom ind...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2635 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6474/ ---
[GitHub] carbondata pull request #2635: [CARBONDATA-2856][BloomDataMap] Fix bug in bl...
Github user xuchuanyin commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2635#discussion_r213897347 --- Diff: datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapWriter.java --- @@ -91,30 +91,28 @@ @Override protected byte[] convertDictionaryValue(int indexColIdx, Object value) { // input value from onPageAdded in load process is byte[] -byte[] fakeMdkBytes; -// this means that we need to pad some fake bytes -// to get the whole MDK in corresponding position -if (columnarSplitter.getBlockKeySize().length > indexCol2MdkIdx.size()) { - int totalSize = 0; - for (int size : columnarSplitter.getBlockKeySize()) { -totalSize += size; - } - fakeMdkBytes = new byte[totalSize]; - // put this bytes to corresponding position - int thisKeyIdx = indexCol2MdkIdx.get(indexColumns.get(indexColIdx).getColName()); - int destPos = 0; - for (int keyIdx = 0; keyIdx < columnarSplitter.getBlockKeySize().length; keyIdx++) { -if (thisKeyIdx == keyIdx) { - System.arraycopy(value, 0, - fakeMdkBytes, destPos, columnarSplitter.getBlockKeySize()[thisKeyIdx]); - break; -} -destPos += columnarSplitter.getBlockKeySize()[keyIdx]; +// This is used to deal with the multiple global dictionary column as index columns. +// The KeyGenerator works with the whole MDK while the value here only represent part of it, +// so we need to pad fake bytes to it in corresponding position. +int totalSize = 0; +for (int size : columnarSplitter.getBlockKeySize()) { + totalSize += size; +} +byte[] fakeMdkBytes = new byte[totalSize]; + +// put this bytes to corresponding position +int thisKeyIdx = indexCol2MdkIdx.get(indexColumns.get(indexColIdx).getColName()); +int destPos = 0; +for (int keyIdx = 0; keyIdx < columnarSplitter.getBlockKeySize().length; keyIdx++) { + if (thisKeyIdx == keyIdx) { +System.arraycopy(value, 0, fakeMdkBytes, destPos, --- End diff -- nice, it works. :+1: ---
[GitHub] carbondata issue #2657: [CARBONDATA-2884] Rename the methods of ByteUtil cla...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2657 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/90/ ---
[jira] [Resolved] (CARBONDATA-2899) Add MV modules to assembly JAR
[ https://issues.apache.org/jira/browse/CARBONDATA-2899?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Liang Chen resolved CARBONDATA-2899. Resolution: Fixed > Add MV modules to assembly JAR > -- > > Key: CARBONDATA-2899 > URL: https://issues.apache.org/jira/browse/CARBONDATA-2899 > Project: CarbonData > Issue Type: Improvement >Reporter: Jacky Li >Assignee: Jacky Li >Priority: Minor > Fix For: 1.5.0, 1.4.2 > > Time Spent: 1h > Remaining Estimate: 0h > > When compile the project with profile -Pmv, MV module class should be added > into assembly JAR -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Updated] (CARBONDATA-2899) Add MV modules to assembly JAR
[ https://issues.apache.org/jira/browse/CARBONDATA-2899?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Liang Chen updated CARBONDATA-2899: --- Priority: Minor (was: Major) > Add MV modules to assembly JAR > -- > > Key: CARBONDATA-2899 > URL: https://issues.apache.org/jira/browse/CARBONDATA-2899 > Project: CarbonData > Issue Type: Improvement >Reporter: Jacky Li >Assignee: Jacky Li >Priority: Minor > Fix For: 1.5.0, 1.4.2 > > Time Spent: 1h > Remaining Estimate: 0h > > When compile the project with profile -Pmv, MV module class should be added > into assembly JAR -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[GitHub] carbondata pull request #2668: [CARBONDATA-2899] Add MV module class to asse...
Github user asfgit closed the pull request at: https://github.com/apache/carbondata/pull/2668 ---
[GitHub] carbondata issue #2668: [CARBONDATA-2899] Add MV module class to assembly JA...
Github user chenliang613 commented on the issue: https://github.com/apache/carbondata/pull/2668 LGTM ---
[GitHub] carbondata issue #2607: [CARBONDATA-2818] Presto Upgrade to 0.206
Github user chenliang613 commented on the issue: https://github.com/apache/carbondata/pull/2607 @bhavya411 any new progress ? ---
[GitHub] carbondata issue #2657: [CARBONDATA-2884] Rename the methods of ByteUtil cla...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2657 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8161/ ---
[GitHub] carbondata issue #2673: [WIP] Test Carbonstore
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2673 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6473/ ---
[GitHub] carbondata issue #2657: [CARBONDATA-2884] Rename the methods of ByteUtil cla...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2657 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6472/ ---
[GitHub] carbondata issue #2673: [WIP] Test Carbonstore
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2673 Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/91/ ---
[GitHub] carbondata issue #2673: [WIP] Test Carbonstore
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2673 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8162/ ---
[GitHub] carbondata issue #2627: [CARBONDATA-2835] [MVDataMap] Block MV datamap on st...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2627 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6471/ ---
[GitHub] carbondata pull request #2673: [WIP] Test Carbonstore
GitHub user xubo245 opened a pull request: https://github.com/apache/carbondata/pull/2673 [WIP] Test Carbonstore Be sure to do all of the following checklist to help us incorporate your contribution quickly and easily: - [ ] Any interfaces changed? - [ ] Any backward compatibility impacted? - [ ] Document update required? - [ ] Testing done Please provide details on - Whether new unit test cases have been added or why no new tests are required? - How it is tested? Please attach test report. - Is it a performance related change? Please attach the performance test report. - Any additional information to help reviewers in testing this change. - [ ] For large changes, please consider breaking it into sub-tasks under an umbrella JIRA. You can merge this pull request into a Git repository by running: $ git pull https://github.com/xubo245/carbondata carbonstoreCS Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/2673.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #2673 commit e80126a208a173225c9899df8e4a9f177a07636d Author: Jacky Li Date: 2018-06-13T15:57:00Z [CARBONDATA-2609] Change RPC implementation to Hadoop RPC framework This closes #2372 commit 437556bb8d44380836956cc341718bb56dce5cdf Author: QiangCai Date: 2018-07-03T12:21:18Z [CARBONDATA-2690][CarbonStore] implement RESTful API: create table, load data and select This PR adds: 1.basic framework rewrite the carbon store's Master, Worker and Scheduler code in Java 2.RESTful API support create a table by using file meta store support load data to a table in single work support select data with a filter This closes #2440 commit 01c623d67dc0384674b2e489a88d819f687d6ac6 Author: xuchuanyin Date: 2018-06-13T01:03:28Z [CARBONDATA-2613] Support csv based carbon table 1. create csv based carbon table using CREATE TABLE fact_table (col1 bigint, col2 string, ..., col100 string) STORED BY 'CarbonData' TBLPROPERTIES( 'foramt'='csv', 'csv.delimiter'=',', 'csv.header'='col1,col2,col100') 2. Load data to this table using ALTER TABLE fact_table ADD SEGMENT LOCATION 'path/to/data1' This closes #2374 commit 464e948d55f43a71d82c5fb654f4e1ea66c1ef65 Author: Jacky Li Date: 2018-07-09T04:23:49Z [CARBONDATA-2705][CarbonStore] CarbonStore Java API and Implementation Support two implementations: 1.LocalCarbonStore for usage in local mode 2.DistributedCarbonStore leveraging multiple server (Master and Workers) via RPC This closes #2473 commit 4cd7f5881b5595cf2b321dcb295a270162e53623 Author: Jacky Li Date: 2018-07-10T13:20:45Z [CARBONDATA-2688][CarbonStore] Support SQL in REST API Support SQL interface in Horizon service. Support REST client for SQL This closes #2481 commit 72bdef53c6582e2b8127430675c1324c73ddc825 Author: Jacky Li Date: 2018-07-18T02:14:43Z [REBASE] Rebasing with master branch and Fixing rebase conflict commit a2d5b098d0764c61044039cc519601995347ae6d Author: Ajith Date: 2018-07-12T03:47:22Z [CARBONDATA-2736][CARBONSTORE] Kafka integration with Carbon StreamSQL Modification in this PR: 1.Pass source table properties to streamReader.load() 2.Do not pass schema when sparkSession.readStream 3.Remove querySchema validation against sink as dataFrame made from kafka source will not have schema ( its written in value column of schema ) 4.Extract the dataframe from kafka source which contain actual data schema @ writeStream This closes #2495 commit 8a63df595a10e2dbf03f08b108859f339ea4fe7a Author: Ajith Date: 2018-07-18T11:18:54Z [CARBONDATA-2752][CARBONSTORE] Carbon provide Zeppelin support This closes #2522 commit 65249c0aaa5ff50452f5df24477d0f26e06a70b4 Author: QiangCai Date: 2018-07-19T06:50:38Z [CARBONDATA-2767][CarbonStore] Fix task locality issue If the Spark cluster and the Hadoop cluster are two different machine cluster, the Spark tasks will run in RACK_LOCAL mode. This closes #2528 commit f75a5c861ce00df9e00c4dbe50433d8fcf07ad0f Author: QiangCai Date: 2018-07-24T03:18:59Z [CARBONDATA-2776][CarbonStore] Support ingesting data from Kafka service This closes #2544 commit 7443cd4da0d0d4d457456490a80995cdea7f7019 Author: Jacky Li Date: 2018-07-31T18:16:26Z [CARBONDATA-2825][CARBONDATA-2828] CarbonStore and InternalCarbonStore API This closes #2589 commit 87706e04887383b58eca42a4477abf14969a63d8 Author: Ajith Date: 2018-08-12T11:24:27Z [CARBONDATA-2826] support select using distributed carbon store
[GitHub] carbondata pull request #2657: [CARBONDATA-2884] Rename the methods of ByteU...
Github user QiangCai commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2657#discussion_r213886786 --- Diff: core/src/test/java/org/apache/carbondata/core/util/ByteUtilTest.java --- @@ -17,156 +17,246 @@ package org.apache.carbondata.core.util; import junit.framework.TestCase; + import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.util.ByteUtil.UnsafeComparer; + import org.junit.Before; import org.junit.Test; import java.nio.ByteBuffer; import java.nio.charset.Charset; - /** * This test will test the functionality of the Byte Util * for the comparision of 2 byte buffers */ public class ByteUtilTest extends TestCase { -String dimensionValue1 = "1235"; -String dimensionValue2 = "1234"; -private ByteBuffer buff1; -private ByteBuffer buff2; - -/** - * This method will form one single byte [] for all the high card dims. - * - * @param byteBufferArr - * @return - */ -public static byte[] packByteBufferIntoSingleByteArray( -ByteBuffer[] byteBufferArr) { -// for empty array means there is no data to remove dictionary. -if (null == byteBufferArr || byteBufferArr.length == 0) { -return null; -} -int noOfCol = byteBufferArr.length; -short toDetermineLengthOfByteArr = 2; -short offsetLen = (short) (noOfCol * 2 + toDetermineLengthOfByteArr); -int totalBytes = calculateTotalBytes(byteBufferArr) + offsetLen; - -ByteBuffer buffer = ByteBuffer.allocate(totalBytes); - -// write the length of the byte [] as first short -buffer.putShort((short) (totalBytes - toDetermineLengthOfByteArr)); -// writing the offset of the first element. -buffer.putShort(offsetLen); - -// prepare index for byte [] -for (int index = 0; index < byteBufferArr.length - 1; index++) { -ByteBuffer individualCol = byteBufferArr[index]; -// short lengthOfbytes = individualCol.getShort(); -int noOfBytes = individualCol.capacity(); - -buffer.putShort((short) (offsetLen + noOfBytes)); -offsetLen += noOfBytes; -individualCol.rewind(); -} - -// put actual data. -for (int index = 0; index < byteBufferArr.length; index++) { -ByteBuffer individualCol = byteBufferArr[index]; -buffer.put(individualCol.array()); -} - -buffer.rewind(); -return buffer.array(); + String dimensionValue1 = "1235"; + String dimensionValue2 = "1234"; + private ByteBuffer buff1; + private ByteBuffer buff2; + /** + * This method will form one single byte [] for all the high card dims. + * + * @param byteBufferArr + * @return + */ + public static byte[] packByteBufferIntoSingleByteArray(ByteBuffer[] byteBufferArr) { +// for empty array means there is no data to remove dictionary. +if (null == byteBufferArr || byteBufferArr.length == 0) { + return null; } +int noOfCol = byteBufferArr.length; +short toDetermineLengthOfByteArr = 2; +short offsetLen = (short) (noOfCol * 2 + toDetermineLengthOfByteArr); +int totalBytes = calculateTotalBytes(byteBufferArr) + offsetLen; -/** - * To calculate the total bytes in byte Buffer[]. - * - * @param byteBufferArr - * @return - */ -private static int calculateTotalBytes(ByteBuffer[] byteBufferArr) { -int total = 0; -for (int index = 0; index < byteBufferArr.length; index++) { -total += byteBufferArr[index].capacity(); -} -return total; -} +ByteBuffer buffer = ByteBuffer.allocate(totalBytes); -/** - * @throws Exception - */ -@Before -public void setUp() throws Exception { +// write the length of the byte [] as first short +buffer.putShort((short) (totalBytes - toDetermineLengthOfByteArr)); +// writing the offset of the first element. +buffer.putShort(offsetLen); -} - -@Test -public void testLessThan() { -dimensionValue1 = "a6aa1235"; -dimensionValue2 = "a5aa1234"; +// prepare index for byte [] +for (int index = 0; index < byteBufferArr.length - 1; index++) { + ByteBuffer individualCol = byteBufferArr[index]; + // short lengthOfbytes = individualCol.getShort(); + int
[GitHub] carbondata pull request #2657: [CARBONDATA-2884] Rename the methods of ByteU...
Github user QiangCai commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2657#discussion_r213886769 --- Diff: core/src/test/java/org/apache/carbondata/core/util/ByteUtilTest.java --- @@ -17,156 +17,246 @@ package org.apache.carbondata.core.util; import junit.framework.TestCase; + import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.util.ByteUtil.UnsafeComparer; + import org.junit.Before; import org.junit.Test; import java.nio.ByteBuffer; import java.nio.charset.Charset; - /** * This test will test the functionality of the Byte Util * for the comparision of 2 byte buffers */ public class ByteUtilTest extends TestCase { -String dimensionValue1 = "1235"; -String dimensionValue2 = "1234"; -private ByteBuffer buff1; -private ByteBuffer buff2; - -/** - * This method will form one single byte [] for all the high card dims. - * - * @param byteBufferArr - * @return - */ -public static byte[] packByteBufferIntoSingleByteArray( -ByteBuffer[] byteBufferArr) { -// for empty array means there is no data to remove dictionary. -if (null == byteBufferArr || byteBufferArr.length == 0) { -return null; -} -int noOfCol = byteBufferArr.length; -short toDetermineLengthOfByteArr = 2; -short offsetLen = (short) (noOfCol * 2 + toDetermineLengthOfByteArr); -int totalBytes = calculateTotalBytes(byteBufferArr) + offsetLen; - -ByteBuffer buffer = ByteBuffer.allocate(totalBytes); - -// write the length of the byte [] as first short -buffer.putShort((short) (totalBytes - toDetermineLengthOfByteArr)); -// writing the offset of the first element. -buffer.putShort(offsetLen); - -// prepare index for byte [] -for (int index = 0; index < byteBufferArr.length - 1; index++) { -ByteBuffer individualCol = byteBufferArr[index]; -// short lengthOfbytes = individualCol.getShort(); -int noOfBytes = individualCol.capacity(); - -buffer.putShort((short) (offsetLen + noOfBytes)); -offsetLen += noOfBytes; -individualCol.rewind(); -} - -// put actual data. -for (int index = 0; index < byteBufferArr.length; index++) { -ByteBuffer individualCol = byteBufferArr[index]; -buffer.put(individualCol.array()); -} - -buffer.rewind(); -return buffer.array(); + String dimensionValue1 = "1235"; + String dimensionValue2 = "1234"; + private ByteBuffer buff1; + private ByteBuffer buff2; + /** + * This method will form one single byte [] for all the high card dims. + * + * @param byteBufferArr + * @return --- End diff -- fixed ---
[GitHub] carbondata issue #2657: [CARBONDATA-2884] Rename the methods of ByteUtil cla...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2657 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6470/ ---
[GitHub] carbondata issue #2628: [CARBONDATA-2851][CARBONDATA-2852] Support zstd as c...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2628 Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/88/ ---
[GitHub] carbondata issue #2627: [CARBONDATA-2835] [MVDataMap] Block MV datamap on st...
Github user ndwangsen commented on the issue: https://github.com/apache/carbondata/pull/2627 retest sdv please ---
[GitHub] carbondata issue #2598: [CARBONDATA-2811][BloomDataMap] Add query test case ...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2598 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/87/ ---
[GitHub] carbondata issue #2598: [CARBONDATA-2811][BloomDataMap] Add query test case ...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2598 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8158/ ---
[GitHub] carbondata issue #2628: [CARBONDATA-2851][CARBONDATA-2852] Support zstd as c...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2628 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8159/ ---
[GitHub] carbondata issue #2664: [CARBONDATA-2895] Fix Query result mismatch with Bat...
Github user xuchuanyin commented on the issue: https://github.com/apache/carbondata/pull/2664 Hi @ajantha-bhat , after this change, have you tried data loading with sort_column_bounds and batch_sort? Does it work fine? ---
[GitHub] carbondata issue #2628: [CARBONDATA-2851][CARBONDATA-2852] Support zstd as c...
Github user xuchuanyin commented on the issue: https://github.com/apache/carbondata/pull/2628 retest this please ---
[GitHub] carbondata issue #2598: [CARBONDATA-2811][BloomDataMap] Add query test case ...
Github user kevinjmh commented on the issue: https://github.com/apache/carbondata/pull/2598 retest this please ---
[GitHub] carbondata issue #2654: [CARBONDATA-2896] Adaptive Encoding for Primitive da...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2654 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6469/ ---
[GitHub] carbondata issue #2654: [CARBONDATA-2896] Adaptive Encoding for Primitive da...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2654 Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/86/ ---
[GitHub] carbondata issue #2654: [CARBONDATA-2896] Adaptive Encoding for Primitive da...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2654 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8157/ ---
[GitHub] carbondata issue #2654: [CARBONDATA-2896] Adaptive Encoding for Primitive da...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2654 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6468/ ---
[GitHub] carbondata issue #2672: [HOTFIX] improve sdk multi-thread performance
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2672 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/83/ ---
[GitHub] carbondata issue #2664: [CARBONDATA-2895] Fix Query result mismatch with Bat...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2664 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8155/ ---
[GitHub] carbondata issue #2672: [HOTFIX] improve sdk multi-thread performance
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2672 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8154/ ---
[GitHub] carbondata issue #2654: [CARBONDATA-2896] Adaptive Encoding for Primitive da...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2654 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8156/ ---
[GitHub] carbondata issue #2671: [WIP]AVRO datatype support through SDK
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2671 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/82/ ---
[GitHub] carbondata issue #2671: [WIP]AVRO datatype support through SDK
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2671 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8153/ ---
[GitHub] carbondata issue #2654: [CARBONDATA-2896] Adaptive Encoding for Primitive da...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2654 Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/85/ ---
[GitHub] carbondata issue #2672: [HOTFIX] improve sdk multi-thread performance
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2672 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6467/ ---
[GitHub] carbondata issue #2664: [CARBONDATA-2895] Fix Query result mismatch with Bat...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2664 Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/84/ ---
[GitHub] carbondata issue #2664: [CARBONDATA-2895] Fix Query result mismatch with Bat...
Github user ajantha-bhat commented on the issue: https://github.com/apache/carbondata/pull/2664 retest this please ---
[GitHub] carbondata issue #2672: [HOTFIX] improve sdk multi-thread performance
Github user ajantha-bhat commented on the issue: https://github.com/apache/carbondata/pull/2672 @gvramana , @ravipesala please review ---
[GitHub] carbondata pull request #2672: [HOTFIX] improve sdk multi-thread performance
GitHub user ajantha-bhat opened a pull request: https://github.com/apache/carbondata/pull/2672 [HOTFIX] improve sdk multi-thread performance changes in this PR: currently writing rows to each writer iterator doesn't happen concurrently. This also can be made concurrently. Also for Avro can use sdkUserCore in input processor step. Be sure to do all of the following checklist to help us incorporate your contribution quickly and easily: - [ ] Any interfaces changed? NA - [ ] Any backward compatibility impacted? NA - [ ] Document update required? NA - [ ] Testing done done. UT already added. - [ ] For large changes, please consider breaking it into sub-tasks under an umbrella JIRA. NA You can merge this pull request into a Git repository by running: $ git pull https://github.com/ajantha-bhat/carbondata unmanaged_table Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/2672.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #2672 commit 8d786f3f1b1221bae77cd93256c7dd03a24e5acc Author: ajantha-bhat Date: 2018-08-29T17:41:09Z [HOTFIX] improve sdk multi-thread performance ---
[GitHub] carbondata issue #2671: [WIP]AVRO datatype support through SDK
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2671 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6466/ ---
[GitHub] carbondata issue #2662: [WIP][CARBONDATA-2889]Add decoder based fallback mec...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2662 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8152/ ---
[GitHub] carbondata issue #2663: [CARBONDATA-2894] Add support for complex map type t...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2663 Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/79/ ---
[GitHub] carbondata issue #2663: [CARBONDATA-2894] Add support for complex map type t...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2663 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8150/ ---
[GitHub] carbondata issue #2662: [WIP][CARBONDATA-2889]Add decoder based fallback mec...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2662 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6465/ ---
[GitHub] carbondata issue #2662: [WIP][CARBONDATA-2889]Add decoder based fallback mec...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2662 Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/81/ ---
[GitHub] carbondata issue #2662: [WIP][CARBONDATA-2889]Add decoder based fallback mec...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2662 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6464/ ---
[GitHub] carbondata issue #2663: [CARBONDATA-2894] Add support for complex map type t...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2663 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6463/ ---
[GitHub] carbondata issue #2671: [WIP]AVRO datatype support through SDK
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2671 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/78/ ---
[GitHub] carbondata issue #2671: [WIP]AVRO datatype support through SDK
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2671 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8149/ ---
[GitHub] carbondata issue #2642: [CARBONDATA-2532][Integration] Carbon to support spa...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2642 Please check the build failures of 2.3 CI http://136.243.101.176:8080/job/ManualApacheCarbonPRBuilder2.1/172/ ---
[GitHub] carbondata issue #2642: [CARBONDATA-2532][Integration] Carbon to support spa...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2642 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/77/ ---
[GitHub] carbondata issue #2671: [WIP]AVRO datatype support through SDK
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2671 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6462/ ---
[GitHub] carbondata pull request #2642: [CARBONDATA-2532][Integration] Carbon to supp...
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2642#discussion_r213700919 --- Diff: integration/spark2/src/main/spark2.2/org/apache/spark/sql/CustomDeterministicExpression.scala --- @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.types.{DataType, StringType} + +/** + * Custom expression to override the deterministic property . + */ +case class CustomDeterministicExpression(nonDt: Expression ) extends Expression with Serializable{ --- End diff -- I don't see any differences , why it is copied 3 times? ---
[GitHub] carbondata issue #2642: [CARBONDATA-2532][Integration] Carbon to support spa...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2642 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8148/ ---
[GitHub] carbondata pull request #2671: [WIP]AVRO datatype support through SDK
GitHub user Indhumathi27 opened a pull request: https://github.com/apache/carbondata/pull/2671 [WIP]AVRO datatype support through SDK This PR supports following Avro DataTypes to carbon format through SDK. Avro datatypes include, 1. Avro Union 2. Avro Enum 3. Avro Logical type Decimal Please refer JIRA CARBONDATA-2876 for further detail. - [ ] Any interfaces changed? - [ ] Any backward compatibility impacted? - [ ] Document update required? - [x] Testing done Test file has been added - [ ] For large changes, please consider breaking it into sub-tasks under an umbrella JIRA. You can merge this pull request into a Git repository by running: $ git pull https://github.com/Indhumathi27/carbondata avro_support_sdk Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/2671.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #2671 commit eae87521707fa86337a45d5677cc7d0f1f1fbfbc Author: Indhumathi27 Date: 2018-08-29T14:18:21Z Support Avro datatype conversion through SDK ---
[GitHub] carbondata pull request #2642: [CARBONDATA-2532][Integration] Carbon to supp...
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2642#discussion_r213695426 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala --- @@ -700,3 +724,7 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { } } } + +class CarbonPhysicalPlanException extends Exception { + +} --- End diff -- remove the empty braces ---
[GitHub] carbondata issue #2656: [CARBONDATA-2883][ExternalFormat] block some operati...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2656 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/75/ ---
[GitHub] carbondata pull request #2642: [CARBONDATA-2532][Integration] Carbon to supp...
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2642#discussion_r213689912 --- Diff: integration/spark2/pom.xml --- @@ -276,6 +312,8 @@ src/main/spark2.1 +src/main/spark2.3 +src/main/commonTo2.2And2.3 --- End diff -- I think here no need to include ---
[GitHub] carbondata pull request #2642: [CARBONDATA-2532][Integration] Carbon to supp...
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2642#discussion_r213689426 --- Diff: integration/spark-datasource/src/main/spark2.3/org/apache/spark/sql/CarbonVectorProxy.java --- @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql; + +import java.math.BigInteger; + +import org.apache.spark.memory.MemoryMode; +import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.execution.vectorized.Dictionary; +import org.apache.spark.sql.execution.vectorized.WritableColumnVector; +import org.apache.spark.sql.types.*; +import org.apache.spark.sql.vectorized.ColumnarBatch; +import org.apache.spark.unsafe.types.CalendarInterval; +import org.apache.spark.unsafe.types.UTF8String; + +/** + * Adapter class which handles the columnar vector reading of the carbondata + * based on the spark ColumnVector and ColumnarBatch API. This proxy class + * handles the complexity of spark 2.3 version related api changes since + * spark ColumnVector and ColumnarBatch interfaces are still evolving. + */ +public class CarbonVectorProxy { + +private ColumnarBatch columnarBatch; +private WritableColumnVector[] columnVectors; + +/** + * Adapter class which handles the columnar vector reading of the carbondata + * based on the spark ColumnVector and ColumnarBatch API. This proxy class + * handles the complexity of spark 2.3 version related api changes since + * spark ColumnVector and ColumnarBatch interfaces are still evolving. + * + * @param memMode which represent the type onheap or offheap vector. + * @param rowNumrows number for vector reading + * @param structFileds, metadata related to current schema of table. + */ +public CarbonVectorProxy(MemoryMode memMode, int rowNum, StructField[] structFileds) { +columnVectors = ColumnVectorFactory +.getColumnVector(memMode, new StructType(structFileds), rowNum); +columnarBatch = new ColumnarBatch(columnVectors); +columnarBatch.setNumRows(rowNum); +} + +public CarbonVectorProxy(MemoryMode memMode, StructType outputSchema, int rowNum) { +columnVectors = ColumnVectorFactory +.getColumnVector(memMode, outputSchema, rowNum); +columnarBatch = new ColumnarBatch(columnVectors); +columnarBatch.setNumRows(rowNum); +} + +/** + * Returns the number of rows for read, including filtered rows. + */ +public int numRows() { +return columnarBatch.numRows(); +} + +public Object reserveDictionaryIds(int capacity, int ordinal) { +return columnVectors[ordinal].reserveDictionaryIds(capacity); +} + +/** + * This API will return a columnvector from a batch of column vector rows + * based on the ordinal + * + * @param ordinal + * @return + */ +public WritableColumnVector column(int ordinal) { +return (WritableColumnVector) columnarBatch.column(ordinal); +} + +public WritableColumnVector getColumnVector(int ordinal) { +return columnVectors[ordinal]; +} + +/** + * Resets this column for writing. The currently stored values are no longer accessible. + */ +public void reset() { +for (WritableColumnVector col : columnVectors) { +col.reset(); +} +} + +public void resetDictionaryIds(int ordinal) { +columnVectors[ordinal].getDictionaryIds().reset(); +} + +/** + * Returns the row in this batch at `rowId`. Returned row is reused across calls. + */ +public InternalRow getRow(int rowId) { +return columnarBatch.getRow(rowId); +} + + +/**
[GitHub] carbondata pull request #2642: [CARBONDATA-2532][Integration] Carbon to supp...
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2642#discussion_r213688091 --- Diff: integration/spark-datasource/src/main/spark2.1/org/apache/spark/sql/CarbonVectorProxy.java --- @@ -0,0 +1,272 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql; + +import java.math.BigInteger; + +import org.apache.parquet.column.Dictionary; +import org.apache.spark.memory.MemoryMode; +import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.execution.vectorized.ColumnarBatch; +import org.apache.spark.sql.execution.vectorized.ColumnVector; +import org.apache.spark.sql.types.CalendarIntervalType; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.Decimal; +import org.apache.spark.sql.types.DecimalType; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.unsafe.types.CalendarInterval; +import org.apache.spark.unsafe.types.UTF8String; + +/** + * Adapter class which handles the columnar vector reading of the carbondata + * based on the spark ColumnVector and ColumnarBatch API. This proxy class + * handles the complexity of spark 2.1 version related api changes since + * spark ColumnVector and ColumnarBatch interfaces are still evolving. + */ +public class CarbonVectorProxy { --- End diff -- 2.1 and 2.2 should have same ---
[GitHub] carbondata pull request #2642: [CARBONDATA-2532][Integration] Carbon to supp...
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2642#discussion_r213686265 --- Diff: integration/spark-datasource/pom.xml --- @@ -192,5 +192,132 @@ true + + spark-2.1 + +2.1.0 +2.11 +2.11.8 + + + + +org.apache.maven.plugins +maven-compiler-plugin + + +src/main/spark2.2 +src/main/spark2.3 +src/main/commonTo2.2And2.3 --- End diff -- I think here we only require 2 packages 1. spark2.1andspark2.2 2. spark 2.3plus ---
[GitHub] carbondata issue #2656: [CARBONDATA-2883][ExternalFormat] block some operati...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2656 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8146/ ---
[GitHub] carbondata pull request #2642: [CARBONDATA-2532][Integration] Carbon to supp...
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2642#discussion_r213681553 --- Diff: integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala --- @@ -421,13 +421,13 @@ class CarbonScanRDD[T: ClassTag]( // create record reader for row format DataTypeUtil.setDataTypeConverter(dataTypeConverterClz.newInstance()) val inputFormat = new CarbonStreamInputFormat - val streamReader = inputFormat.createRecordReader(inputSplit, attemptContext) -.asInstanceOf[CarbonStreamRecordReader] - streamReader.setVectorReader(vectorReader) - streamReader.setInputMetricsStats(inputMetricsStats) + inputFormat.setVectorReader(vectorReader) --- End diff -- Create method with name `setIsVectorReader` ---
[GitHub] carbondata pull request #2642: [CARBONDATA-2532][Integration] Carbon to supp...
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2642#discussion_r213681240 --- Diff: integration/spark-common/src/main/scala/org/apache/spark/sql/execution/streaming/CarbonAppendableStreamSink.scala --- @@ -122,7 +122,7 @@ class CarbonAppendableStreamSink( className = sparkSession.sessionState.conf.streamingFileCommitProtocolClass, jobId = batchId.toString, outputPath = fileLogPath, -isAppend = false) +false) --- End diff -- Keep back the old code ---
[GitHub] carbondata issue #2666: [CARBONDATA-2898] Fix double boundary condition and ...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2666 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/74/ ---
[GitHub] carbondata issue #2666: [CARBONDATA-2898] Fix double boundary condition and ...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2666 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8145/ ---
[GitHub] carbondata issue #2628: [CARBONDATA-2851][CARBONDATA-2852] Support zstd as c...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2628 Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/76/ ---
[jira] [Resolved] (CARBONDATA-2869) SDK support for Map DataType
[ https://issues.apache.org/jira/browse/CARBONDATA-2869?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ravindra Pesala resolved CARBONDATA-2869. - Resolution: Fixed Fix Version/s: 1.5.0 > SDK support for Map DataType > > > Key: CARBONDATA-2869 > URL: https://issues.apache.org/jira/browse/CARBONDATA-2869 > Project: CarbonData > Issue Type: Sub-task >Reporter: Indhumathi Muthumurugesh >Assignee: Manish Gupta >Priority: Major > Fix For: 1.5.0 > > Time Spent: 3h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[GitHub] carbondata issue #2642: [CARBONDATA-2532][Integration] Carbon to support spa...
Github user sujith71955 commented on the issue: https://github.com/apache/carbondata/pull/2642 @jackylk Now spark 2.3.2 is about to release, can this PR works with all spark 2.3 branch including 2.3.2? As i told before there should not be much problem while rebasing with spark 2.3.2 version also, since its a minor versions the interfaces are intact till now. Rebasing effort with spark 2.3.2 should be very minimal ---
[GitHub] carbondata pull request #2649: [CARBONDATA-2869] Add support for Avro Map da...
Github user asfgit closed the pull request at: https://github.com/apache/carbondata/pull/2649 ---
[GitHub] carbondata issue #2666: [CARBONDATA-2898] Fix double boundary condition and ...
Github user KanakaKumar commented on the issue: https://github.com/apache/carbondata/pull/2666 LGTM ---
[GitHub] carbondata issue #2649: [CARBONDATA-2869] Add support for Avro Map data type...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2649 LGTM ---
[GitHub] carbondata pull request #2669: [Documentation] Added the missing links for o...
Github user asfgit closed the pull request at: https://github.com/apache/carbondata/pull/2669 ---
[GitHub] carbondata issue #2642: [CARBONDATA-2532][Integration] Carbon to support spa...
Github user sujith71955 commented on the issue: https://github.com/apache/carbondata/pull/2642 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6461/ Already failing in other builds also, seems to build environment issue ---
[GitHub] carbondata issue #2669: [Documentation] Added the missing links for online d...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2669 LGTM ---
[GitHub] carbondata issue #2642: [CARBONDATA-2532][Integration] Carbon to support spa...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2642 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6461/ ---
[GitHub] carbondata issue #2628: [CARBONDATA-2851][CARBONDATA-2852] Support zstd as c...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2628 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8147/ ---
[GitHub] carbondata issue #2628: [CARBONDATA-2851][CARBONDATA-2852] Support zstd as c...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2628 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/6460/ ---
[GitHub] carbondata issue #2670: [WIP] Support binary datatype
Github user xuchuanyin commented on the issue: https://github.com/apache/carbondata/pull/2670 Problems that may be ignored are that during loading 1. We use a buffer to store one row and the row is 2MB fow now 2. For a column page, we compress it as a byte array and its length is of 'INT' datatype, which means that a page at most 2GB. So do we have any restrictions on the binary data type? ---
[GitHub] carbondata pull request #2670: [WIP] Support binary datatype
Github user xuchuanyin commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2670#discussion_r213660172 --- Diff: processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/BinaryFieldConverterImpl.java --- @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.processing.loading.converter.impl; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.datastore.row.CarbonRow; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure; +import org.apache.carbondata.processing.loading.DataField; +import org.apache.carbondata.processing.loading.converter.BadRecordLogHolder; +import org.apache.carbondata.processing.loading.converter.FieldConverter; +import org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException; +import org.apache.carbondata.processing.util.CarbonDataProcessorUtil; + +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.binary.Hex; +/** + * Converter for binary + */ +public class BinaryFieldConverterImpl implements FieldConverter { + private static final LogService LOGGER = + LogServiceFactory.getLogService(BinaryFieldConverterImpl.class.getName()); + private int index; + private DataType dataType; + private CarbonMeasure measure; + private String nullformat; + private boolean isEmptyBadRecord; + private DataField dataField; + public BinaryFieldConverterImpl(DataField dataField, String nullformat, int index, + boolean isEmptyBadRecord) { +this.dataType = dataField.getColumn().getDataType(); +this.measure = (CarbonMeasure) dataField.getColumn(); +this.nullformat = nullformat; +this.index = index; +this.isEmptyBadRecord = isEmptyBadRecord; +this.dataField = dataField; + } + @Override public void convert(CarbonRow row, BadRecordLogHolder logHolder) + throws CarbonDataLoadingException { +String value = row.getString(index); +if (value != null) { + try { +byte[] bytes = Hex.decodeHex(value.toCharArray()); +row.update(bytes, index); + } catch (DecoderException e) { +if (LOGGER.isDebugEnabled()) { + LOGGER.debug("Can not convert value to Binary type value. Value considered as null."); +} +logHolder.setReason( + CarbonDataProcessorUtil.prepareFailureReason(measure.getColName(), dataType)); +row.update(null, index); + } +} + } + + @Override public Object convert(Object value, BadRecordLogHolder logHolder) --- End diff -- Move 'Override' to the previous line ---