[GitHub] incubator-carbondata pull request #805: [CARBONDATA-934] Cast Filter Express...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/incubator-carbondata/pull/805#discussion_r112639983 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/CarbonBoundReference.scala --- @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.spark --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1152: [CARBONDATA-1288] Secure Dictionary Server Im...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1152#discussion_r126936641 --- Diff: integration/spark2/src/main/java/org/apache/carbondata/spark/dictionary/client/SecureDictionaryClientHandler.java --- @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.spark.dictionary.client; + +import java.nio.ByteBuffer; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.dictionary.generator.key.DictionaryMessage; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.Unpooled; +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelFutureListener; +import io.netty.channel.ChannelHandlerContext; +import org.apache.spark.network.client.RpcResponseCallback; +import org.apache.spark.network.client.TransportClient; +import org.apache.spark.network.server.OneForOneStreamManager; +import org.apache.spark.network.server.RpcHandler; +import org.apache.spark.network.server.StreamManager; + +/** + * Client handler to get data. + */ +public class SecureDictionaryClientHandler extends RpcHandler { + + private static final LogService LOGGER = + LogServiceFactory.getLogService(SecureDictionaryClientHandler.class.getName()); + + private final BlockingQueue responseMsgQueue = new LinkedBlockingQueue<>(); + + private ChannelHandlerContext ctx; + + private DictionaryChannelFutureListener channelFutureListener; + + /** + * client send request to server + * + * @param key DictionaryMessage + * @return DictionaryMessage + */ + public DictionaryMessage getDictionary(DictionaryMessage key, TransportClient client) { +DictionaryMessage dictionaryMessage; +ByteBuffer resp = null; +try { + + ByteBuf buffer = ByteBufAllocator.DEFAULT.heapBuffer(); + key.writeData(buffer); + resp = client.sendRpcSync(buffer.nioBuffer(), 10); +} catch (Exception e) { + LOGGER.error(e, "Error while send request to server "); +} +try { + ByteBuf data = Unpooled.wrappedBuffer(resp); + DictionaryMessage key1 = new DictionaryMessage(); + if (data == null) { +StringBuilder message = new StringBuilder(); +message.append("DictionaryMessage { ColumnName: ").append(key1.getColumnName()) +.append(", DictionaryValue: ").append(key1.getDictionaryValue()).append(", type: ") +.append(key1.getType()); +throw new RuntimeException("Request timed out for key : " + message); + } + key1.readSecureData(data); + data.release(); + return key1; +} catch (Exception e) { + LOGGER.error(e); + throw new RuntimeException(e); +} + } + + @Override public void receive(TransportClient transportClient, ByteBuffer byteBuffer, + RpcResponseCallback rpcResponseCallback) { +try { + ByteBuf data = Unpooled.wrappedBuffer(byteBuffer); + DictionaryMessage key = new DictionaryMessage(); + key.readSecureData(data); + data.release(); + responseMsgQueue.offer(key); +} catch (Exception e) { + LOGGER.error(e); + throw e; +} + } + + @Override public StreamManager getStreamManager() { +return new OneForOneStreamManager(); + } + + private static class DictionaryChannelFutureListener implements ChannelFutureListener { --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitH
[GitHub] carbondata pull request #1152: [CARBONDATA-1288] Secure Dictionary Server Im...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1152#discussion_r126904475 --- Diff: core/src/main/java/org/apache/carbondata/core/dictionary/server/DictionaryServer.java --- @@ -14,153 +14,28 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.carbondata.core.dictionary.server; - -import org.apache.carbondata.common.logging.LogService; -import org.apache.carbondata.common.logging.LogServiceFactory; -import org.apache.carbondata.core.constants.CarbonCommonConstants; -import org.apache.carbondata.core.dictionary.generator.key.DictionaryMessage; -import org.apache.carbondata.core.dictionary.generator.key.DictionaryMessageType; -import org.apache.carbondata.core.util.CarbonProperties; - -import io.netty.bootstrap.ServerBootstrap; -import io.netty.channel.ChannelInitializer; -import io.netty.channel.ChannelOption; -import io.netty.channel.ChannelPipeline; -import io.netty.channel.EventLoopGroup; -import io.netty.channel.nio.NioEventLoopGroup; -import io.netty.channel.socket.SocketChannel; -import io.netty.channel.socket.nio.NioServerSocketChannel; -import io.netty.handler.codec.LengthFieldBasedFrameDecoder; - -/** - * Dictionary Server to generate dictionary keys. - */ -public class DictionaryServer { - - private static final LogService LOGGER = - LogServiceFactory.getLogService(DictionaryServer.class.getName()); - private DictionaryServerHandler dictionaryServerHandler; - - private EventLoopGroup boss; - private EventLoopGroup worker; - private int port; - private static Object lock = new Object(); - private static DictionaryServer INSTANCE = null; +package org.apache.carbondata.core.dictionary.server; - private DictionaryServer(int port) { -startServer(port); - } +import org.apache.spark.SparkConf; - public static DictionaryServer getInstance(int port) { -if (INSTANCE == null) { - synchronized (lock) { -if (INSTANCE == null) { - INSTANCE = new DictionaryServer(port); -} - } -} -return INSTANCE; - } +public interface DictionaryServer { - /** - * start dictionary server - * - * @param port - */ - private void startServer(int port) { -dictionaryServerHandler = new DictionaryServerHandler(); -String workerThreads = CarbonProperties.getInstance() -.getProperty(CarbonCommonConstants.DICTIONARY_WORKER_THREADS, -CarbonCommonConstants.DICTIONARY_WORKER_THREADS_DEFAULT); -boss = new NioEventLoopGroup(1); -worker = new NioEventLoopGroup(Integer.parseInt(workerThreads)); -// Configure the server. -bindToPort(port); - } + public void startServer(SparkConf conf, String host, int port); --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1152: [CARBONDATA-1288] Secure Dictionary Server Im...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1152#discussion_r126904515 --- Diff: core/src/main/java/org/apache/carbondata/core/dictionary/server/DictionaryServer.java --- @@ -14,153 +14,28 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.carbondata.core.dictionary.server; - -import org.apache.carbondata.common.logging.LogService; -import org.apache.carbondata.common.logging.LogServiceFactory; -import org.apache.carbondata.core.constants.CarbonCommonConstants; -import org.apache.carbondata.core.dictionary.generator.key.DictionaryMessage; -import org.apache.carbondata.core.dictionary.generator.key.DictionaryMessageType; -import org.apache.carbondata.core.util.CarbonProperties; - -import io.netty.bootstrap.ServerBootstrap; -import io.netty.channel.ChannelInitializer; -import io.netty.channel.ChannelOption; -import io.netty.channel.ChannelPipeline; -import io.netty.channel.EventLoopGroup; -import io.netty.channel.nio.NioEventLoopGroup; -import io.netty.channel.socket.SocketChannel; -import io.netty.channel.socket.nio.NioServerSocketChannel; -import io.netty.handler.codec.LengthFieldBasedFrameDecoder; - -/** - * Dictionary Server to generate dictionary keys. - */ -public class DictionaryServer { - - private static final LogService LOGGER = - LogServiceFactory.getLogService(DictionaryServer.class.getName()); - private DictionaryServerHandler dictionaryServerHandler; - - private EventLoopGroup boss; - private EventLoopGroup worker; - private int port; - private static Object lock = new Object(); - private static DictionaryServer INSTANCE = null; +package org.apache.carbondata.core.dictionary.server; - private DictionaryServer(int port) { -startServer(port); - } +import org.apache.spark.SparkConf; - public static DictionaryServer getInstance(int port) { -if (INSTANCE == null) { - synchronized (lock) { -if (INSTANCE == null) { - INSTANCE = new DictionaryServer(port); -} - } -} -return INSTANCE; - } +public interface DictionaryServer { - /** - * start dictionary server - * - * @param port - */ - private void startServer(int port) { -dictionaryServerHandler = new DictionaryServerHandler(); -String workerThreads = CarbonProperties.getInstance() -.getProperty(CarbonCommonConstants.DICTIONARY_WORKER_THREADS, -CarbonCommonConstants.DICTIONARY_WORKER_THREADS_DEFAULT); -boss = new NioEventLoopGroup(1); -worker = new NioEventLoopGroup(Integer.parseInt(workerThreads)); -// Configure the server. -bindToPort(port); - } + public void startServer(SparkConf conf, String host, int port); - /** - * Binds dictionary server to an available port. - * - * @param port - */ - private void bindToPort(int port) { -long start = System.currentTimeMillis(); -// Configure the server. -int i = 0; -while (i < 10) { - int newPort = port + i; - try { -ServerBootstrap bootstrap = new ServerBootstrap(); -bootstrap.group(boss, worker); -bootstrap.channel(NioServerSocketChannel.class); -bootstrap.childHandler(new ChannelInitializer() { - @Override public void initChannel(SocketChannel ch) throws Exception { -ChannelPipeline pipeline = ch.pipeline(); -pipeline -.addLast("LengthDecoder", -new LengthFieldBasedFrameDecoder(1048576, 0, -2, 0, 2)); -pipeline.addLast("DictionaryServerHandler", dictionaryServerHandler); - } -}); -bootstrap.childOption(ChannelOption.SO_KEEPALIVE, true); -bootstrap.bind(newPort).sync(); -LOGGER.audit("Dictionary Server started, Time spent " + (System.currentTimeMillis() - start) -+ " Listening on port " + newPort); -this.port = newPort; -break; - } catch (Exception e) { -LOGGER.error(e, "Dictionary Server Failed to bind to port:"); -if (i == 9) { - throw new RuntimeException("Dictionary Server Could not bind to any port"); -} - } - i++; -} - } + public void bindToPort(SparkConf orgConf, String host, int port); --- End diff -- Done --- If your project is set up for it, you can reply to this email and have you
[GitHub] carbondata pull request #1152: [CARBONDATA-1288] Secure Dictionary Server Im...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1152#discussion_r126935953 --- Diff: integration/spark2/src/main/java/org/apache/carbondata/spark/dictionary/client/SecureDictionaryClientHandler.java --- @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.spark.dictionary.client; + +import java.nio.ByteBuffer; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.dictionary.generator.key.DictionaryMessage; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.Unpooled; +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelFutureListener; +import io.netty.channel.ChannelHandlerContext; +import org.apache.spark.network.client.RpcResponseCallback; +import org.apache.spark.network.client.TransportClient; +import org.apache.spark.network.server.OneForOneStreamManager; +import org.apache.spark.network.server.RpcHandler; +import org.apache.spark.network.server.StreamManager; + +/** + * Client handler to get data. + */ +public class SecureDictionaryClientHandler extends RpcHandler { + + private static final LogService LOGGER = + LogServiceFactory.getLogService(SecureDictionaryClientHandler.class.getName()); + + private final BlockingQueue responseMsgQueue = new LinkedBlockingQueue<>(); + + private ChannelHandlerContext ctx; + + private DictionaryChannelFutureListener channelFutureListener; + + /** + * client send request to server + * + * @param key DictionaryMessage + * @return DictionaryMessage + */ + public DictionaryMessage getDictionary(DictionaryMessage key, TransportClient client) { +DictionaryMessage dictionaryMessage; +ByteBuffer resp = null; +try { + --- End diff -- Handles. If response is Null the data will be handled. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1152: [CARBONDATA-1288] Secure Dictionary Server Im...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1152#discussion_r126904754 --- Diff: core/src/main/java/org/apache/carbondata/core/dictionary/server/DictionaryServer.java --- @@ -14,153 +14,28 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.carbondata.core.dictionary.server; - -import org.apache.carbondata.common.logging.LogService; -import org.apache.carbondata.common.logging.LogServiceFactory; -import org.apache.carbondata.core.constants.CarbonCommonConstants; -import org.apache.carbondata.core.dictionary.generator.key.DictionaryMessage; -import org.apache.carbondata.core.dictionary.generator.key.DictionaryMessageType; -import org.apache.carbondata.core.util.CarbonProperties; - -import io.netty.bootstrap.ServerBootstrap; -import io.netty.channel.ChannelInitializer; -import io.netty.channel.ChannelOption; -import io.netty.channel.ChannelPipeline; -import io.netty.channel.EventLoopGroup; -import io.netty.channel.nio.NioEventLoopGroup; -import io.netty.channel.socket.SocketChannel; -import io.netty.channel.socket.nio.NioServerSocketChannel; -import io.netty.handler.codec.LengthFieldBasedFrameDecoder; - -/** - * Dictionary Server to generate dictionary keys. - */ -public class DictionaryServer { - - private static final LogService LOGGER = - LogServiceFactory.getLogService(DictionaryServer.class.getName()); - private DictionaryServerHandler dictionaryServerHandler; - - private EventLoopGroup boss; - private EventLoopGroup worker; - private int port; - private static Object lock = new Object(); - private static DictionaryServer INSTANCE = null; +package org.apache.carbondata.core.dictionary.server; - private DictionaryServer(int port) { -startServer(port); - } +import org.apache.spark.SparkConf; - public static DictionaryServer getInstance(int port) { -if (INSTANCE == null) { - synchronized (lock) { -if (INSTANCE == null) { - INSTANCE = new DictionaryServer(port); -} - } -} -return INSTANCE; - } +public interface DictionaryServer { - /** - * start dictionary server - * - * @param port - */ - private void startServer(int port) { -dictionaryServerHandler = new DictionaryServerHandler(); -String workerThreads = CarbonProperties.getInstance() -.getProperty(CarbonCommonConstants.DICTIONARY_WORKER_THREADS, -CarbonCommonConstants.DICTIONARY_WORKER_THREADS_DEFAULT); -boss = new NioEventLoopGroup(1); -worker = new NioEventLoopGroup(Integer.parseInt(workerThreads)); -// Configure the server. -bindToPort(port); - } + public void startServer(SparkConf conf, String host, int port); - /** - * Binds dictionary server to an available port. - * - * @param port - */ - private void bindToPort(int port) { -long start = System.currentTimeMillis(); -// Configure the server. -int i = 0; -while (i < 10) { - int newPort = port + i; - try { -ServerBootstrap bootstrap = new ServerBootstrap(); -bootstrap.group(boss, worker); -bootstrap.channel(NioServerSocketChannel.class); -bootstrap.childHandler(new ChannelInitializer() { - @Override public void initChannel(SocketChannel ch) throws Exception { -ChannelPipeline pipeline = ch.pipeline(); -pipeline -.addLast("LengthDecoder", -new LengthFieldBasedFrameDecoder(1048576, 0, -2, 0, 2)); -pipeline.addLast("DictionaryServerHandler", dictionaryServerHandler); - } -}); -bootstrap.childOption(ChannelOption.SO_KEEPALIVE, true); -bootstrap.bind(newPort).sync(); -LOGGER.audit("Dictionary Server started, Time spent " + (System.currentTimeMillis() - start) -+ " Listening on port " + newPort); -this.port = newPort; -break; - } catch (Exception e) { -LOGGER.error(e, "Dictionary Server Failed to bind to port:"); -if (i == 9) { - throw new RuntimeException("Dictionary Server Could not bind to any port"); -} - } - i++; -} - } + public void bindToPort(SparkConf orgConf, String host, int port); - /** - * - * @return Port on which the DictionaryServer has started. - */ - publ
[GitHub] carbondata pull request #1152: [CARBONDATA-1288] Secure Dictionary Server Im...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1152#discussion_r126906879 --- Diff: core/src/main/java/org/apache/carbondata/core/dictionary/server/NonSecureDictionaryServerHandler.java --- @@ -76,8 +75,7 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception * @param ctx * @param cause */ - @Override - public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) { + @Override public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) { --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1152: [CARBONDATA-1288] Secure Dictionary Server Im...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1152#discussion_r126900457 --- Diff: core/src/main/java/org/apache/carbondata/core/dictionary/generator/ServerDictionaryGenerator.java --- @@ -26,6 +26,7 @@ import org.apache.carbondata.core.metadata.schema.table.CarbonTable; import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; + --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1152: [CARBONDATA-1288] Secure Dictionary Server Im...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1152#discussion_r126900846 --- Diff: core/src/main/java/org/apache/carbondata/core/dictionary/generator/key/DictionaryMessage.java --- @@ -50,7 +50,32 @@ */ private DictionaryMessageType type; - public void readData(ByteBuf byteBuf) { + public void readNonSecureData(ByteBuf byteBuf) { +byteBuf.resetReaderIndex(); --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1152: [CARBONDATA-1288] Secure Dictionary Server Im...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1152#discussion_r126930980 --- Diff: core/src/main/java/org/apache/carbondata/core/dictionary/service/NonSecureDictionaryServiceProvider.java --- @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.dictionary.service; + +import org.apache.carbondata.core.dictionary.client.DictionaryClient; +import org.apache.carbondata.core.dictionary.client.NonSecureDictionaryClient; + +public class NonSecureDictionaryServiceProvider implements DictionaryServiceProvider { + private int port = 0; + + public NonSecureDictionaryServiceProvider(int port) { +this.port = port; + } + + // @Override public DictionaryServer getDictionaryServer() { --- End diff -- Sending the server instance in provider is not needed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1152: [WIP] Secure Dictionary Server Implementation
GitHub user sounakr opened a pull request: https://github.com/apache/carbondata/pull/1152 [WIP] Secure Dictionary Server Implementation Secure Dictionary Implementation Along with Non Secure. You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata Secure_Dictionry Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/1152.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1152 commit d0aca3a94f07e3dc4d456e7e263dfe905dcafd8b Author: sounak <sounak.chakrabo...@huawei.com> Date: 2017-07-06T15:18:54Z Secure Dictionary Server Implementation Secure Dictionary Implementation Along with Non Secure. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata issue #1152: [WIP] Secure Dictionary Server Implementation
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1152 retest this please --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1152: [CARBONDATA-1288] Secure Dictionary Server Im...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1152#discussion_r126932824 --- Diff: integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala --- @@ -59,7 +59,7 @@ class SerializableConfiguration(@transient var value: Configuration) extends Ser @transient private val LOGGER = LogServiceFactory.getLogService(this.getClass.getCanonicalName) - private def writeObject(out: ObjectOutputStream): Unit = + private def writeObject(out: ObjectOutputStream): Unit = { --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1152: [CARBONDATA-1288] Secure Dictionary Server Im...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1152#discussion_r126973284 --- Diff: integration/spark2/src/main/java/org/apache/carbondata/spark/dictionary/client/SecureDictionaryClientHandler.java --- @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.spark.dictionary.client; + +import java.nio.ByteBuffer; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.dictionary.generator.key.DictionaryMessage; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.Unpooled; +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelFutureListener; +import io.netty.channel.ChannelHandlerContext; +import org.apache.spark.network.client.RpcResponseCallback; +import org.apache.spark.network.client.TransportClient; +import org.apache.spark.network.server.OneForOneStreamManager; +import org.apache.spark.network.server.RpcHandler; +import org.apache.spark.network.server.StreamManager; + +/** + * Client handler to get data. + */ +public class SecureDictionaryClientHandler extends RpcHandler { + + private static final LogService LOGGER = + LogServiceFactory.getLogService(SecureDictionaryClientHandler.class.getName()); + + private final BlockingQueue responseMsgQueue = new LinkedBlockingQueue<>(); + + private ChannelHandlerContext ctx; + + private DictionaryChannelFutureListener channelFutureListener; + + /** + * client send request to server + * + * @param key DictionaryMessage + * @return DictionaryMessage + */ + public DictionaryMessage getDictionary(DictionaryMessage key, TransportClient client) { +DictionaryMessage dictionaryMessage; +ByteBuffer resp = null; +try { + + ByteBuf buffer = ByteBufAllocator.DEFAULT.heapBuffer(); + key.writeData(buffer); + resp = client.sendRpcSync(buffer.nioBuffer(), 10); +} catch (Exception e) { --- End diff -- aLready Handled --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata issue #1152: [WIP] Secure Dictionary Server Implementation
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1152 retest this please --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata issue #1152: [CARBONDATA-1288] Secure Dictionary Server Implement...
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1152 retest this please --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1111: Rectify Vector Buffer Overflow Calculation
GitHub user sounakr opened a pull request: https://github.com/apache/carbondata/pull/ Rectify Vector Buffer Overflow Calculation Rectify Vector Buffer Overflow calculation. Previously we are keeping track of all the deleted rows from the buffer which is not needed as deleted rows are not physically removed from buffer. Better to make all calculations with total number of rows which is being filled in the buffer. You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata Dictionary_Based_vector_reader Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes # commit 319dde19753dbf8cd47143f4e64e4f6d42acf93b Author: sounakr <soun...@gmail.com> Date: 2017-06-28T19:45:21Z Rectify Vector Buffer Calculation --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata issue #1079: [WIP]Measure Filter implementation
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1079 retest this please --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1124: [CARBONDATA-1257] Measure Filter implementati...
Github user sounakr closed the pull request at: https://github.com/apache/carbondata/pull/1124 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125190753 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/ColumnPageStatsVO.java --- @@ -56,9 +65,7 @@ public ColumnPageStatsVO(DataType dataType) { nonExistValue = Double.MIN_VALUE; break; case DECIMAL: -max = new BigDecimal(Double.MIN_VALUE); -min = new BigDecimal(Double.MAX_VALUE); -nonExistValue = new BigDecimal(Double.MIN_VALUE); +this.zeroDecimal = new BigDecimal(0); --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125199276 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java --- @@ -17,65 +17,174 @@ package org.apache.carbondata.core.scan.filter.executer; import java.io.IOException; +import java.math.BigDecimal; +import java.nio.ByteBuffer; import java.util.BitSet; +import java.util.Comparator; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; +import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.partition.PartitionFilterUtil; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; import org.apache.carbondata.core.util.BitSetGroup; import org.apache.carbondata.core.util.ByteUtil; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; public class IncludeFilterExecuterImpl implements FilterExecuter { protected DimColumnResolvedFilterInfo dimColumnEvaluatorInfo; protected DimColumnExecuterFilterInfo dimColumnExecuterInfo; + protected MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo; + protected MeasureColumnExecuterFilterInfo msrColumnExecutorInfo; protected SegmentProperties segmentProperties; + protected boolean isDimensionPresentInCurrentBlock = false; + protected boolean isMeasurePresentInCurrentBlock = false; /** * is dimension column data is natural sorted */ - private boolean isNaturalSorted; + private boolean isNaturalSorted = false; --- End diff -- Done. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125199254 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java --- @@ -18,56 +18,152 @@ import java.io.IOException; import java.util.BitSet; +import java.util.Comparator; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; +import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.partition.PartitionFilterUtil; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; import org.apache.carbondata.core.util.BitSetGroup; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; public class ExcludeFilterExecuterImpl implements FilterExecuter { protected DimColumnResolvedFilterInfo dimColEvaluatorInfo; protected DimColumnExecuterFilterInfo dimColumnExecuterInfo; + protected MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo; + protected MeasureColumnExecuterFilterInfo msrColumnExecutorInfo; protected SegmentProperties segmentProperties; + protected boolean isDimensionPresentInCurrentBlock = false; + protected boolean isMeasurePresentInCurrentBlock = false; /** * is dimension column data is natural sorted */ - private boolean isNaturalSorted; + private boolean isNaturalSorted = false; + public ExcludeFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, - SegmentProperties segmentProperties) { -this.dimColEvaluatorInfo = dimColEvaluatorInfo; -dimColumnExecuterInfo = new DimColumnExecuterFilterInfo(); + MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo, SegmentProperties segmentProperties, + boolean isMeasure) { this.segmentProperties = segmentProperties; - FilterUtil.prepareKeysFromSurrogates(dimColEvaluatorInfo.getFilterValues(), segmentProperties, -dimColEvaluatorInfo.getDimension(), dimColumnExecuterInfo); -isNaturalSorted = dimColEvaluatorInfo.getDimension().isUseInvertedIndex() && dimColEvaluatorInfo -.getDimension().isSortColumn(); +if (isMeasure == false) { + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + dimColumnExecuterInfo = new DimColumnExecuterFilterInfo(); + + FilterUtil.prepareKeysFromSurrogates(dimColEvaluatorInfo.getFilterValues(), segmentProperties, + dimColEvaluatorInfo.getDimension(), dimColumnExecuterInfo, null, null); + isDimensionPresentInCurrentBlock = true; + isNaturalSorted = + dimColEvaluatorInfo.getDimension().isUseInvertedIndex() && dimColEvaluatorInfo + .getDimension().isSortColumn(); +} else { + this.msrColumnEvaluatorInfo = msrColumnEvaluatorInfo; + msrColumnExecutorInfo = new MeasureColumnExecuterFilterInfo(); + FilterUtil + .prepareKeysFromSurrogates(msrColumnEvaluatorInfo.getFilterValues(), segmentProperties, + null, null, msrColumnEvaluatorInfo.getMeasure(), msrColumnExecutorInfo); + isMeasurePresentInCurrentBlock = true; +} + } @Override public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) throws IOException { -int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping() -.get(dimColEvaluatorInfo.getColumnIndex()); -if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) { - blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() - .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex); +if (isDimensionPresentInCurrentBlock == true) { + int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimColEvaluatorInfo.getColumnIndex()); + if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) { +blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() +.getDimensionChunk(blockChunkHolder.getFileReade
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125199288 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java --- @@ -17,65 +17,174 @@ package org.apache.carbondata.core.scan.filter.executer; import java.io.IOException; +import java.math.BigDecimal; +import java.nio.ByteBuffer; import java.util.BitSet; +import java.util.Comparator; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; +import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.partition.PartitionFilterUtil; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; import org.apache.carbondata.core.util.BitSetGroup; import org.apache.carbondata.core.util.ByteUtil; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; public class IncludeFilterExecuterImpl implements FilterExecuter { protected DimColumnResolvedFilterInfo dimColumnEvaluatorInfo; protected DimColumnExecuterFilterInfo dimColumnExecuterInfo; + protected MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo; + protected MeasureColumnExecuterFilterInfo msrColumnExecutorInfo; protected SegmentProperties segmentProperties; + protected boolean isDimensionPresentInCurrentBlock = false; + protected boolean isMeasurePresentInCurrentBlock = false; --- End diff -- Done. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125191772 --- Diff: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java --- @@ -113,6 +115,143 @@ public static Object getMeasureValueBasedOnDataType(String msrValue, DataType da } } + public static Object getMeasureObjectFromDataType(byte[] data, DataType dataType) { +ByteBuffer bb = ByteBuffer.wrap(data); +switch (dataType) { + case SHORT: + case INT: + case LONG: +return bb.getLong(); + case DECIMAL: +return byteToBigDecimal(data); + default: +return bb.getDouble(); +} + } + + /** + * This method will convert a given ByteArray to its specific type + * + * @param msrValue + * @param dataType + * @param carbonMeasure + * @return + */ + // public static byte[] getMeasureByteArrayBasedOnDataType(String msrValue, DataType dataType, + // CarbonMeasure carbonMeasure) { + //switch (dataType) { + // case DECIMAL: + //BigDecimal bigDecimal = + //new BigDecimal(msrValue).setScale(carbonMeasure.getScale(), RoundingMode.HALF_UP); + // return ByteUtil.toBytes(normalizeDecimalValue(bigDecimal, carbonMeasure.getPrecision())); + // case SHORT: + //return ByteUtil.toBytes((Short.parseShort(msrValue))); + // case INT: + //return ByteUtil.toBytes(Integer.parseInt(msrValue)); + // case LONG: + //return ByteUtil.toBytes(Long.valueOf(msrValue)); + // default: + //Double parsedValue = Double.valueOf(msrValue); + //if (Double.isInfinite(parsedValue) || Double.isNaN(parsedValue)) { + // return null; + //} + //return ByteUtil.toBytes(parsedValue); + //} + // } + public static byte[] getMeasureByteArrayBasedOnDataTypes(String msrValue, DataType dataType, + CarbonMeasure carbonMeasure) { +ByteBuffer b; +switch (dataType) { + case BYTE: + case SHORT: + case INT: + case LONG: +b = ByteBuffer.allocate(8); +b.putLong(Long.valueOf(msrValue)); +b.flip(); +return b.array(); + case DOUBLE: +b = ByteBuffer.allocate(8); +b.putDouble(Double.valueOf(msrValue)); +b.flip(); +return b.array(); + case DECIMAL: +BigDecimal bigDecimal = +new BigDecimal(msrValue).setScale(carbonMeasure.getScale(), RoundingMode.HALF_UP); +return DataTypeUtil +.bigDecimalToByte(normalizeDecimalValue(bigDecimal, carbonMeasure.getPrecision())); + default: +throw new IllegalArgumentException("Invalid data type: " + dataType); +} + } + + /** + * This method will convert a given ByteArray to its specific type + * + * @param msrValue + * @param dataType + * @param carbonMeasure + * @return + */ + public static byte[] getMeasureByteArrayBasedOnDataType(ColumnPage measurePage, int index, --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125191770 --- Diff: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java --- @@ -113,6 +115,143 @@ public static Object getMeasureValueBasedOnDataType(String msrValue, DataType da } } + public static Object getMeasureObjectFromDataType(byte[] data, DataType dataType) { +ByteBuffer bb = ByteBuffer.wrap(data); +switch (dataType) { + case SHORT: + case INT: + case LONG: +return bb.getLong(); + case DECIMAL: +return byteToBigDecimal(data); + default: +return bb.getDouble(); +} + } + + /** + * This method will convert a given ByteArray to its specific type + * + * @param msrValue + * @param dataType + * @param carbonMeasure + * @return + */ + // public static byte[] getMeasureByteArrayBasedOnDataType(String msrValue, DataType dataType, + // CarbonMeasure carbonMeasure) { + //switch (dataType) { + // case DECIMAL: + //BigDecimal bigDecimal = + //new BigDecimal(msrValue).setScale(carbonMeasure.getScale(), RoundingMode.HALF_UP); + // return ByteUtil.toBytes(normalizeDecimalValue(bigDecimal, carbonMeasure.getPrecision())); + // case SHORT: + //return ByteUtil.toBytes((Short.parseShort(msrValue))); + // case INT: + //return ByteUtil.toBytes(Integer.parseInt(msrValue)); + // case LONG: + //return ByteUtil.toBytes(Long.valueOf(msrValue)); + // default: + //Double parsedValue = Double.valueOf(msrValue); + //if (Double.isInfinite(parsedValue) || Double.isNaN(parsedValue)) { + // return null; + //} + //return ByteUtil.toBytes(parsedValue); + //} + // } --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125191810 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java --- @@ -395,6 +440,58 @@ public static DimColumnFilterInfo getNoDictionaryValKeyMemberForFilter( } /** + * This method will get the no dictionary data based on filters and same + * will be in ColumnFilterInfo + * + * @param evaluateResultListFinal + * @param isIncludeFilter + * @return ColumnFilterInfo + */ + public static ColumnFilterInfo getMeasureValKeyMemberForFilter( + List evaluateResultListFinal, boolean isIncludeFilter, DataType dataType, + CarbonMeasure carbonMeasure) throws FilterUnsupportedException { +List<byte[]> filterValuesList = new ArrayList<byte[]>(20); +String result = null; +try { + int length = evaluateResultListFinal.size(); + for (int i = 0; i < length; i++) { +result = evaluateResultListFinal.get(i); +if (CarbonCommonConstants.MEMBER_DEFAULT_VAL.equals(result)) { + filterValuesList.add(new byte[0]); + continue; +} +// TODO have to understand what method to be used for measures. +// filterValuesList +// .add(DataTypeUtil.getBytesBasedOnDataTypeForNoDictionaryColumn(result, dataType)); + +filterValuesList +.add(DataTypeUtil.getMeasureByteArrayBasedOnDataTypes(result, dataType, carbonMeasure)); --- End diff -- Currently we are storing filter keys of measures in byte array format. In next optimization phase will change to Object array of respective type to avoid conversion. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125198925 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java --- @@ -18,56 +18,152 @@ import java.io.IOException; import java.util.BitSet; +import java.util.Comparator; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; +import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.partition.PartitionFilterUtil; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; import org.apache.carbondata.core.util.BitSetGroup; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; public class ExcludeFilterExecuterImpl implements FilterExecuter { protected DimColumnResolvedFilterInfo dimColEvaluatorInfo; protected DimColumnExecuterFilterInfo dimColumnExecuterInfo; + protected MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo; + protected MeasureColumnExecuterFilterInfo msrColumnExecutorInfo; protected SegmentProperties segmentProperties; + protected boolean isDimensionPresentInCurrentBlock = false; + protected boolean isMeasurePresentInCurrentBlock = false; /** * is dimension column data is natural sorted */ - private boolean isNaturalSorted; + private boolean isNaturalSorted = false; + public ExcludeFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, - SegmentProperties segmentProperties) { -this.dimColEvaluatorInfo = dimColEvaluatorInfo; -dimColumnExecuterInfo = new DimColumnExecuterFilterInfo(); + MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo, SegmentProperties segmentProperties, + boolean isMeasure) { this.segmentProperties = segmentProperties; - FilterUtil.prepareKeysFromSurrogates(dimColEvaluatorInfo.getFilterValues(), segmentProperties, -dimColEvaluatorInfo.getDimension(), dimColumnExecuterInfo); -isNaturalSorted = dimColEvaluatorInfo.getDimension().isUseInvertedIndex() && dimColEvaluatorInfo -.getDimension().isSortColumn(); +if (isMeasure == false) { --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125190940 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java --- @@ -395,6 +440,58 @@ public static DimColumnFilterInfo getNoDictionaryValKeyMemberForFilter( } /** + * This method will get the no dictionary data based on filters and same + * will be in ColumnFilterInfo + * + * @param evaluateResultListFinal + * @param isIncludeFilter + * @return ColumnFilterInfo + */ + public static ColumnFilterInfo getMeasureValKeyMemberForFilter( + List evaluateResultListFinal, boolean isIncludeFilter, DataType dataType, + CarbonMeasure carbonMeasure) throws FilterUnsupportedException { +List<byte[]> filterValuesList = new ArrayList<byte[]>(20); +String result = null; +try { + int length = evaluateResultListFinal.size(); + for (int i = 0; i < length; i++) { +result = evaluateResultListFinal.get(i); +if (CarbonCommonConstants.MEMBER_DEFAULT_VAL.equals(result)) { + filterValuesList.add(new byte[0]); + continue; +} +// TODO have to understand what method to be used for measures. +// filterValuesList +// .add(DataTypeUtil.getBytesBasedOnDataTypeForNoDictionaryColumn(result, dataType)); --- End diff -- Done. Removed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125190884 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java --- @@ -180,9 +185,27 @@ private static FilterExecuter createFilterExecuterTree( * @return */ private static FilterExecuter getIncludeFilterExecuter( - DimColumnResolvedFilterInfo dimColResolvedFilterInfo, SegmentProperties segmentProperties) { - -if (dimColResolvedFilterInfo.getDimension().isColumnar()) { + DimColumnResolvedFilterInfo dimColResolvedFilterInfo, + MeasureColumnResolvedFilterInfo msrColResolvedFilterInfo, + SegmentProperties segmentProperties) { +if (null != msrColResolvedFilterInfo && msrColResolvedFilterInfo.getMeasure().isColumnar()) { + CarbonMeasure measuresFromCurrentBlock = segmentProperties + .getMeasureFromCurrentBlock(msrColResolvedFilterInfo.getMeasure().getColumnId()); + if (null != measuresFromCurrentBlock) { +// update dimension and column index according to the dimension position in current block +MeasureColumnResolvedFilterInfo msrColResolvedFilterInfoCopyObject = +msrColResolvedFilterInfo.getCopyObject(); + msrColResolvedFilterInfoCopyObject.setMeasure(measuresFromCurrentBlock); + msrColResolvedFilterInfoCopyObject.setColumnIndex(measuresFromCurrentBlock.getOrdinal()); + msrColResolvedFilterInfoCopyObject.setType(measuresFromCurrentBlock.getDataType()); +return new IncludeFilterExecuterImpl(null, msrColResolvedFilterInfoCopyObject, +segmentProperties, true); + } else { +return new RestructureIncludeFilterExecutorImpl(dimColResolvedFilterInfo, +msrColResolvedFilterInfo, segmentProperties, true); + } +} --- End diff -- No, This whole block belongs to Measure. Line 220 points to Restructure in case of dimentions. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125191734 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java --- @@ -74,80 +87,205 @@ private void ifDefaultValueMatchesFilter() { } } } +} else if (!msrColEvalutorInfoList.isEmpty() && !isMeasurePresentInCurrentBlock[0]) { + CarbonMeasure measure = this.msrColEvalutorInfoList.get(0).getMeasure(); + byte[] defaultValue = measure.getDefaultValue(); + if (null != defaultValue) { +for (int k = 0; k < filterRangeValues.length; k++) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangeValues[k], defaultValue); + if (maxCompare < 0) { +isDefaultValuePresentInFilter = true; +break; + } +} + } } } @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) { BitSet bitSet = new BitSet(1); -boolean isScanRequired = -isScanRequired(blockMaxValue[dimensionBlocksIndex[0]], filterRangeValues); +boolean isScanRequired = false; +byte[] maxValue = null; +if (isMeasurePresentInCurrentBlock[0] || isDimensionPresentInCurrentBlock[0]) { + if (isMeasurePresentInCurrentBlock[0]) { +maxValue = blockMaxValue[measureBlocksIndex[0] + lastDimensionColOrdinal]; +isScanRequired = +isScanRequired(maxValue, filterRangeValues, msrColEvalutorInfoList.get(0).getType()); + } else { +maxValue = blockMaxValue[dimensionBlocksIndex[0]]; +isScanRequired = isScanRequired(maxValue, filterRangeValues); + } +} else { + isScanRequired = isDefaultValuePresentInFilter; +} + if (isScanRequired) { bitSet.set(0); } return bitSet; } + private boolean isScanRequired(byte[] blockMaxValue, byte[][] filterValues) { boolean isScanRequired = false; -if (isDimensionPresentInCurrentBlock[0]) { - for (int k = 0; k < filterValues.length; k++) { -// filter value should be in range of max and min value i.e -// max>filtervalue>min -// so filter-max should be negative -int maxCompare = ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[k], blockMaxValue); -// if any filter value is in range than this block needs to be -// scanned means always less than block max range. -if (maxCompare < 0) { - isScanRequired = true; - break; -} +for (int k = 0; k < filterValues.length; k++) { + // filter value should be in range of max and min value i.e + // max>filtervalue>min + // so filter-max should be negative + int maxCompare = ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[k], blockMaxValue); + // if any filter value is in range than this block needs to be + // scanned less than equal to max range. + if (maxCompare < 0) { +isScanRequired = true; +break; } -} else { - isScanRequired = isDefaultValuePresentInFilter; } return isScanRequired; } + private boolean isScanRequired(byte[] maxValue, byte[][] filterValue, + DataType dataType) { +for (int i = 0; i < filterValue.length; i++) { + if (filterValue[i].length == 0 || maxValue.length == 0) { +return isScanRequired(maxValue, filterValue); + } + switch (dataType) { --- End diff -- In Line 150 is a special Null Value case, rest of the cases comparator is being used. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125199504 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java --- @@ -152,12 +261,31 @@ private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnD public BitSet isScanRequired(byte[][] blkMaxVal, byte[][] blkMinVal) { BitSet bitSet = new BitSet(1); -byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys(); -int columnIndex = dimColumnEvaluatorInfo.getColumnIndex(); -int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping().get(columnIndex); +byte[][] filterValues = null; +int columnIndex = 0; +int blockIndex = 0; +boolean isScanRequired = false; + +if (isDimensionPresentInCurrentBlock == true) { + filterValues = dimColumnExecuterInfo.getFilterKeys(); + columnIndex = dimColumnEvaluatorInfo.getColumnIndex(); + blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping().get(columnIndex); + isScanRequired = + isScanRequired(blkMaxVal[blockIndex], blkMinVal[blockIndex], filterValues); + +} else if (isMeasurePresentInCurrentBlock) { + filterValues = msrColumnExecutorInfo.getFilterKeys(); + columnIndex = msrColumnEvaluatorInfo.getColumnIndex(); + // blockIndex = + // segmentProperties.getDimensionOrdinalToBlockMapping().get(columnIndex) + segmentProperties + // .getLastDimensionColOrdinal(); --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125191402 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/partition/PartitionFilterUtil.java --- @@ -107,6 +131,12 @@ public static Comparator getComparatorByDataType(DataType dataType) { } } + static class DecimalComparator implements Comparator { --- End diff -- Done. Removed --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125191432 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/partition/PartitionFilterUtil.java --- @@ -76,24 +99,25 @@ public static Comparator getComparatorByDataType(DataType dataType) { static class DoubleComparator implements Comparator { @Override public int compare(Object key1, Object key2) { - double result = (double) key1 - (double) key2; - if (result < 0) { + double key1Double1 = (double)key1; --- End diff -- There is a scenario is a variable is a negative one the Key1 - Key2 wont give proper output. Better to check greater or less than operator. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125191629 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java --- @@ -74,80 +87,205 @@ private void ifDefaultValueMatchesFilter() { } } } +} else if (!msrColEvalutorInfoList.isEmpty() && !isMeasurePresentInCurrentBlock[0]) { + CarbonMeasure measure = this.msrColEvalutorInfoList.get(0).getMeasure(); + byte[] defaultValue = measure.getDefaultValue(); + if (null != defaultValue) { +for (int k = 0; k < filterRangeValues.length; k++) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangeValues[k], defaultValue); --- End diff -- Currently Filter Keys are in ByteArray and values for restructuring is also same. Is this still required. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125191929 --- Diff: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java --- @@ -113,6 +115,143 @@ public static Object getMeasureValueBasedOnDataType(String msrValue, DataType da } } + public static Object getMeasureObjectFromDataType(byte[] data, DataType dataType) { +ByteBuffer bb = ByteBuffer.wrap(data); --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125190853 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java --- @@ -180,9 +185,27 @@ private static FilterExecuter createFilterExecuterTree( * @return */ private static FilterExecuter getIncludeFilterExecuter( - DimColumnResolvedFilterInfo dimColResolvedFilterInfo, SegmentProperties segmentProperties) { - -if (dimColResolvedFilterInfo.getDimension().isColumnar()) { + DimColumnResolvedFilterInfo dimColResolvedFilterInfo, + MeasureColumnResolvedFilterInfo msrColResolvedFilterInfo, + SegmentProperties segmentProperties) { +if (null != msrColResolvedFilterInfo && msrColResolvedFilterInfo.getMeasure().isColumnar()) { --- End diff -- Done. Removed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125198825 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java --- @@ -1042,12 +1144,17 @@ public static FilterExecuter getFilterExecuterTree( * @param dimension * @param dimColumnExecuterInfo */ - public static void prepareKeysFromSurrogates(DimColumnFilterInfo filterValues, + public static void prepareKeysFromSurrogates(ColumnFilterInfo filterValues, SegmentProperties segmentProperties, CarbonDimension dimension, - DimColumnExecuterFilterInfo dimColumnExecuterInfo) { -byte[][] keysBasedOnFilter = getKeyArray(filterValues, dimension, segmentProperties); -dimColumnExecuterInfo.setFilterKeys(keysBasedOnFilter); - + DimColumnExecuterFilterInfo dimColumnExecuterInfo, CarbonMeasure measures, + MeasureColumnExecuterFilterInfo msrColumnExecuterInfo) { +if (null != measures) { --- End diff -- This if check is required in order to setFilterKeys in respective measures or dimensions. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125199168 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java --- @@ -18,56 +18,152 @@ import java.io.IOException; import java.util.BitSet; +import java.util.Comparator; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; +import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.partition.PartitionFilterUtil; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; import org.apache.carbondata.core.util.BitSetGroup; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; public class ExcludeFilterExecuterImpl implements FilterExecuter { protected DimColumnResolvedFilterInfo dimColEvaluatorInfo; protected DimColumnExecuterFilterInfo dimColumnExecuterInfo; + protected MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo; + protected MeasureColumnExecuterFilterInfo msrColumnExecutorInfo; protected SegmentProperties segmentProperties; + protected boolean isDimensionPresentInCurrentBlock = false; + protected boolean isMeasurePresentInCurrentBlock = false; --- End diff -- Done, removed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1124: Measure Filter implementation For Branch 1.1
GitHub user sounakr opened a pull request: https://github.com/apache/carbondata/pull/1124 Measure Filter implementation For Branch 1.1 Problem : Measure filter evaluation goes through RowLevelEvaluation. But after implementation of Min and Max for measures in block level, include exclude, greater than less than filter evaluators can do block prunning and can handle evaluation. Solution: Push measure evaluation to Include, exclude, greter than less than evaluators. Commits: Measure Implementation for Include and Exclude Filter RowLevel Measure Implementation RowLevel Less LessThan Greater GreaterThan Implementation for measure Rectify Datatype Conversion Measure Restructure Changes for Measure Rebase to Branch-1.1 Handling DataType Comparision and Rebase Error Rectify Review Comments Implementation You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata measure_branch-1.1 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/1124.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1124 commit e78a34473499b2c421accb99f23d5257dafff517 Author: sounakr <soun...@gmail.com> Date: 2017-06-20T17:22:36Z Measure Filter implementation Measure Implementation for Include and Exclude Filter RowLevel Measure Implementation RowLevel Less LessThan Greater GreaterThan Implementation for measure Rectify Datatype Conversion Measure Restructure Changes for Measure Rebase to Branch-1.1 Handling DataType Comparision and Rebase Error Rectify Review Comments Implementation --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125190787 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java --- @@ -137,6 +137,8 @@ */ private int numberOfNoDictSortColumns; + private int lastDimensionColumnOrdinal; --- End diff -- Removed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125191362 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/ConditionalFilterResolverImpl.java --- @@ -198,21 +237,31 @@ public AbsoluteTableIdentifier getTableIdentifier() { */ public void getStartKey(SegmentProperties segmentProperties, long[] startKey, SortedMap<Integer, byte[]> setOfStartKeyByteArray, List<long[]> startKeyList) { - FilterUtil.getStartKey(dimColResolvedFilterInfo.getDimensionResolvedFilterInstance(), -segmentProperties, startKey, startKeyList); - FilterUtil.getStartKeyForNoDictionaryDimension(dimColResolvedFilterInfo, -segmentProperties, setOfStartKeyByteArray); +if (null != dimColResolvedFilterInfo) { + FilterUtil.getStartKey(dimColResolvedFilterInfo.getDimensionResolvedFilterInstance(), + segmentProperties, startKey, startKeyList); + FilterUtil.getStartKeyForNoDictionaryDimension(dimColResolvedFilterInfo, segmentProperties, + setOfStartKeyByteArray); +} +// else { +// FilterUtil.getStartKey(dimColResolvedFilterInfo.getDimensionResolvedFilterInstance(), +// segmentProperties, startKey, startKeyList); +// FilterUtil.getStartKeyForNoDictionaryDimension(dimColResolvedFilterInfo, segmentProperties, +// setOfStartKeyByteArray); +//} --- End diff -- Done. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125191345 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java --- @@ -395,6 +440,58 @@ public static DimColumnFilterInfo getNoDictionaryValKeyMemberForFilter( } /** + * This method will get the no dictionary data based on filters and same + * will be in ColumnFilterInfo + * + * @param evaluateResultListFinal + * @param isIncludeFilter + * @return ColumnFilterInfo + */ + public static ColumnFilterInfo getMeasureValKeyMemberForFilter( + List evaluateResultListFinal, boolean isIncludeFilter, DataType dataType, + CarbonMeasure carbonMeasure) throws FilterUnsupportedException { +List<byte[]> filterValuesList = new ArrayList<byte[]>(20); +String result = null; +try { + int length = evaluateResultListFinal.size(); + for (int i = 0; i < length; i++) { +result = evaluateResultListFinal.get(i); +if (CarbonCommonConstants.MEMBER_DEFAULT_VAL.equals(result)) { + filterValuesList.add(new byte[0]); + continue; +} +// TODO have to understand what method to be used for measures. +// filterValuesList +// .add(DataTypeUtil.getBytesBasedOnDataTypeForNoDictionaryColumn(result, dataType)); + +filterValuesList +.add(DataTypeUtil.getMeasureByteArrayBasedOnDataTypes(result, dataType, carbonMeasure)); + + } +} catch (Throwable ex) { + throw new FilterUnsupportedException("Unsupported Filter condition: " + result, ex); +} + +Comparator<byte[]> filterMeasureComaparator = new Comparator<byte[]>() { + + @Override public int compare(byte[] filterMember1, byte[] filterMember2) { +// TODO Auto-generated method stub +return ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterMember1, filterMember2); --- End diff -- We are converting String dataTypes into Bytes array and then saving into filterValueList. From populateFilterResolvedInfo itself we convert all dataType to strings and then pass it along. We may have to rectify and pass on actual datatype from populateFilterResolvedInfo. But all filterValue Comparision in measures are currently sequential, so there no chance of getting a wrong result. i.e. in Include, RowLevelLessThan, RowLevelLessThanEqual, RowLevelGrtThanEqual, RowLevelGrtThan. Also Range is not implemented yet for measures where 2 filter values should be in ascending order. In the next stage optimization we can hold the filter values in actual datatype and have comparator for each. This was comparision will be proper and we dont have to convert to datatype to byte and again back to object while doing actual comparision, we can carry object all along. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125191383 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/ConditionalFilterResolverImpl.java --- @@ -44,16 +44,22 @@ protected boolean isExpressionResolve; protected boolean isIncludeFilter; private DimColumnResolvedFilterInfo dimColResolvedFilterInfo; + private MeasureColumnResolvedFilterInfo msrColResolvedFilterInfo; private AbsoluteTableIdentifier tableIdentifier; + private boolean isMeasure; --- End diff -- Done. Removed --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125191588 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/MeasureColumnExecuterFilterInfo.java --- @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.scan.filter.executer; + +public class MeasureColumnExecuterFilterInfo { + + byte[][] filterKeys; --- End diff -- In Current Implementation kept filterkeys as Byte Array to keep it simple and in sync with dimention array. During actual comparision the filter keys are converted back to Objects and compared. In next phase optimization will change the Filter Keys to hold objects. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1013: [CARBONDATA-1154] IUD Performance Changes
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1013#discussion_r121572395 --- Diff: hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java --- @@ -340,23 +340,37 @@ private static AbsoluteTableIdentifier getAbsoluteTableIdentifier(Configuration List result = new LinkedList(); FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor(); +UpdateVO invalidBlockVOForSegmentId = null; +Boolean IUDTable = false; --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1013: [CARBONDATA-1154] IUD Performance Changes
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1013#discussion_r121572267 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java --- @@ -184,22 +184,29 @@ private SegmentTaskIndexWrapper loadAndGetTaskIdToSegmentsMap( SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier); String segmentId = null; +UpdateVO updateVO = null; TaskBucketHolder taskBucketHolder = null; try { while (iteratorOverSegmentBlocksInfos.hasNext()) { +// Initialize the UpdateVO to Null for each segment. +updateVO = null; --- End diff -- Done. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1072: Implicit Column Fix
GitHub user sounakr opened a pull request: https://github.com/apache/carbondata/pull/1072 Implicit Column Fix Projection on implicit column is failing with garbage values when vector read is enabled. Select getTupleId as tupleId from table; You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata Implit_Column Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/1072.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1072 commit 54a5501d5c3b26a704f759d0567663353b8a12e6 Author: sounakr <soun...@gmail.com> Date: 2017-06-21T11:31:29Z Implicit Column Fix --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1069: [WIP] Measure Filter implementation
GitHub user sounakr opened a pull request: https://github.com/apache/carbondata/pull/1069 [WIP] Measure Filter implementation Measure Filter Implementation --- You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata master Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/1069.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1069 commit 718a96326167caa0a29942938de708634ca202e1 Author: sounakr <soun...@gmail.com> Date: 2017-06-20T17:22:36Z Measure Filter implementation --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1069: [WIP] Measure Filter implementation
Github user sounakr closed the pull request at: https://github.com/apache/carbondata/pull/1069 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
GitHub user sounakr opened a pull request: https://github.com/apache/carbondata/pull/1079 [WIP]Measure Filter implementation Measure Filter Implementation You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata measure_filter Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/1079.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1079 commit b3fa1780ae0e26fa379d812f9aec1c1c6274b8c6 Author: sounakr <soun...@gmail.com> Date: 2017-06-20T17:22:36Z Measure Filter implementation --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #929: [CARBONDATA-1070]Not In Filter Expression Null...
GitHub user sounakr opened a pull request: https://github.com/apache/carbondata/pull/929 [CARBONDATA-1070]Not In Filter Expression Null Value Handling Problem : Filter Test Case failure. a) Nullpointer Handling in Not Expression. b) LessThan Filter Expression : Wrong calculation of StartKey for setting the Bits. You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata filter_fix Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/929.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #929 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #955: [WIP] Inset Filter Pushdown in 2.1
Github user sounakr closed the pull request at: https://github.com/apache/carbondata/pull/955 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #955: [WIP] Inset Filter Pushdown in 2.1
GitHub user sounakr reopened a pull request: https://github.com/apache/carbondata/pull/955 [WIP] Inset Filter Pushdown in 2.1 Inset Filter PushDown. You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata inset Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/955.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #955 commit 0619ed47ce29846e5af224efb468bbbe610f38db Author: sounakr <soun...@gmail.com> Date: 2017-05-26T11:29:44Z Inset Filter --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #955: [WIP] Inset Filter Pushdown in 2.1
Github user sounakr closed the pull request at: https://github.com/apache/carbondata/pull/955 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1013: [WIP]IUD Performance Changes
GitHub user sounakr opened a pull request: https://github.com/apache/carbondata/pull/1013 [WIP]IUD Performance Changes IUD Performance Changes 1. Get invalid blocks ony when there is a Update Performed in the Table. 2. As UpdateVO is per segment basis no need to call it for each blocks. --- You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata IUD_Performance Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/1013.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1013 commit a67ed7b89f321cb14e29d828fe2bd6f2554dc38a Author: sounakr <soun...@gmail.com> Date: 2017-06-08T14:58:50Z IUD Performance Changes --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] incubator-carbondata pull request #888: Cast Optimization Case Statement Rec...
GitHub user sounakr opened a pull request: https://github.com/apache/incubator-carbondata/pull/888 Cast Optimization Case Statement Rectify Problem : Cast Expression Optimization code was not working as expected due to wrong case statement. Fix: Fixed the case statements in Cast Expression optimization. You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata cast_opt_rectify Alternatively you can review and apply these changes as the patch at: https://github.com/apache/incubator-carbondata/pull/888.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #888 commit 90deaea92cc51a7e273f6e3a3f6f36a35a85c86d Author: sounakr <soun...@gmail.com> Date: 2017-05-05T12:47:05Z Cast Optimization Case Statement Rectify --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] carbondata pull request #1359: [CARBONDATA-1480]Min Max Index Example for Da...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1359#discussion_r139123481 --- Diff: examples/spark2/src/main/scala/org/apache/carbondata/examples/MinMaxDataMapFactory.java --- @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.examples; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.core.cache.Cache; +import org.apache.carbondata.core.cache.CacheProvider; +import org.apache.carbondata.core.cache.CacheType; +import org.apache.carbondata.core.datamap.DataMapDistributable; +import org.apache.carbondata.core.datamap.DataMapMeta; +import org.apache.carbondata.core.datamap.TableDataMap; +import org.apache.carbondata.core.datamap.dev.DataMap; +import org.apache.carbondata.core.datamap.dev.DataMapFactory; +import org.apache.carbondata.core.datamap.dev.DataMapWriter; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.events.ChangeEvent; +import org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier; +import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMap; +import org.apache.carbondata.core.indexstore.schema.FilterType; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; + + +/** + * Table map for blocklet + */ +public class MinMaxDataMapFactory implements DataMapFactory { + + private AbsoluteTableIdentifier identifier; + + // segmentId -> list of index file + private Map<String, List> segmentMap = new HashMap<>(); + + private Cache<TableBlockIndexUniqueIdentifier, DataMap> cache; + + @Override + public void init(AbsoluteTableIdentifier identifier, String dataMapName) { +this.identifier = identifier; +cache = CacheProvider.getInstance() --- End diff -- Removed. ---
[GitHub] carbondata pull request #1359: [CARBONDATA-1480]Min Max Index Example for Da...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1359#discussion_r139123880 --- Diff: core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapWriter.java --- @@ -32,7 +32,12 @@ /** * End of block notification */ - void onBlockEnd(String blockId); + void onBlockEnd(String blockId, String directoryPath); + + /** + * End of block notification when index got created. + */ + void onBlockEndWithIndex(String blockId, String directoryPath); --- End diff -- But during onBlockEnd as the carbonIndex is not yet written, we wont be able to access the carbonIndex files. In the example i am gathering informations from CarbonIndex Files too. Better to keep hook after writing Index Files also. In future we may need some more hooks at different points. ---
[GitHub] carbondata pull request #1359: [CARBONDATA-1480]Min Max Index Example for Da...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1359#discussion_r139124564 --- Diff: core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java --- @@ -31,7 +31,8 @@ /** * It is called to load the data map to memory or to initialize it. */ - void init(String filePath) throws MemoryException, IOException; + void init(String blockletIndexPath, String customIndexPath, String segmentId) --- End diff -- In this example Along with Min and Max Information i am keeping few more information for building the BlockLet. Both indexes are independent but with the current example implementation i read the Min and Max index and and then read the carbonindex index also in order to get the column cardanality and segmentproperties. These values are used to form the blocklet used for pruning. ---
[GitHub] carbondata pull request #1359: [CARBONDATA-1480]Min Max Index Example for Da...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1359#discussion_r139068734 --- Diff: core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapWriter.java --- @@ -32,7 +32,12 @@ /** * End of block notification */ - void onBlockEnd(String blockId); + void onBlockEnd(String blockId, String directoryPath); + + /** + * End of block notification when index got created. + */ + void onBlockEndWithIndex(String blockId, String directoryPath); --- End diff -- onBlockEnd Method is called once the block is written. onBlockEndWithIndex is called once the index is also written after the carbondata is written out. ---
[GitHub] carbondata pull request #1359: [CARBONDATA-1480]Min Max Index Example for Da...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1359#discussion_r139092331 --- Diff: core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java --- @@ -31,7 +31,8 @@ /** * It is called to load the data map to memory or to initialize it. */ - void init(String filePath) throws MemoryException, IOException; + void init(String blockletIndexPath, String customIndexPath, String segmentId) --- End diff -- For Min Max Index creation like segment properties and other things i am taking input from regular carbonindex file too. So by design we can have one parameter as primitive index path other can be of the new custom index file path. ---
[GitHub] carbondata pull request #1359: [CARBONDATA-1480]Min Max Index Example for Da...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1359#discussion_r139890369 --- Diff: core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java --- @@ -42,6 +44,15 @@ */ List prune(FilterResolverIntf filterExp); + /** + * Prune the datamap with blockletId. It returns the list of + * blocklets where these filters can exist. + * + * @param filterExp + * @param blockletId + * @return + */ + List pruneBlockletFromBlockId(FilterResolverIntf filterExp, int blockletId); --- End diff -- BlockletId is the output of Min Max DataMap and the same is passed to BlockletDataMap in order to form the complete blocklet. Instead of declaring the method pruneBlockletFromBlockId in DataMap, the same can be made a local function to blockletId. ---
[GitHub] carbondata issue #1359: [CARBONDATA-1480]Min Max Index Example for DataMap
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1359 @ravipesala and @jackylk , sure will make it simple. Will check if some more interfaces needs to be opened. ---
[GitHub] carbondata pull request #1359: [CARBONDATA-1480]Min Max DataMap
GitHub user sounakr opened a pull request: https://github.com/apache/carbondata/pull/1359 [CARBONDATA-1480]Min Max DataMap Datamap Example. Implementation of Min Max Index through Datamap. And Using the Index while prunning. --- You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata minmax Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/1359.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1359 commit a46e3b7c609e070f052017edabef9355668cf00a Author: sounakr <soun...@gmail.com> Date: 2017-09-13T11:57:23Z Min Max DataMap ---
[GitHub] carbondata pull request #1329: [WIP] Min Max DataMap Example
Github user sounakr closed the pull request at: https://github.com/apache/carbondata/pull/1329 ---
[GitHub] carbondata pull request #1359: [CARBONDATA-1480]Min Max Index Example for Da...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1359#discussion_r143714171 --- Diff: examples/spark2/src/main/scala/org/apache/carbondata/examples/MinMaxBlockletComparator.java --- @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.examples; + +import java.nio.ByteBuffer; +import java.util.Comparator; + +import org.apache.carbondata.core.util.ByteUtil; + + +/** + * Data map comparator + */ +public class MinMaxBlockletComparator implements Comparator<byte[]> { --- End diff -- Removed ---
[GitHub] carbondata pull request #1359: [CARBONDATA-1480]Min Max Index Example for Da...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1359#discussion_r143741467 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java --- @@ -219,4 +225,27 @@ public DataMapMeta getMeta() { // TODO: pass SORT_COLUMNS into this class return null; } + + @Override public SegmentProperties getSegmentProperties(String segmentId) throws IOException { +SegmentProperties segmentProperties = null; --- End diff -- Done. ---
[GitHub] carbondata pull request #1359: [CARBONDATA-1480]Min Max Index Example for Da...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1359#discussion_r143714831 --- Diff: examples/spark2/src/main/scala/org/apache/carbondata/examples/MinMaxDataMap.java --- @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.examples; + +import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.List; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.cache.Cacheable; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.dev.DataMap; +import org.apache.carbondata.core.datastore.IndexKey; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.executer.FilterExecuter; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.carbondata.core.util.CarbonUtil; + +import com.google.gson.Gson; + +/** + * Datamap implementation for min max blocklet. + */ +public class MinMaxDataMap implements DataMap, Cacheable { --- End diff -- Done ---
[GitHub] carbondata pull request #1359: [CARBONDATA-1480]Min Max Index Example for Da...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1359#discussion_r143714764 --- Diff: examples/spark2/src/main/scala/org/apache/carbondata/examples/MinMaxDataMap.java --- @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.examples; + +import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.List; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.cache.Cacheable; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.dev.DataMap; +import org.apache.carbondata.core.datastore.IndexKey; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.executer.FilterExecuter; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.carbondata.core.util.CarbonUtil; + +import com.google.gson.Gson; + +/** + * Datamap implementation for min max blocklet. + */ +public class MinMaxDataMap implements DataMap, Cacheable { + + private static final LogService LOGGER = + LogServiceFactory.getLogService(MinMaxDataMap.class.getName()); + + public static final String NAME = "clustered.minmax.btree.blocklet"; + + private String filePath; + + private MinMaxIndexBlockDetails[] readMinMaxDataMap; + + @Override public void init(String filePath) throws MemoryException, IOException { +this.filePath = filePath; +CarbonFile[] listFiles = getCarbonIndexFiles(filePath, "0"); +for (int i = 0; i < listFiles.length; i++) { + readMinMaxDataMap = readJson(listFiles[i].getPath()); +} + } + + private CarbonFile[] getCarbonIndexFiles(String filePath, String segmentId) { --- End diff -- Done. ---
[GitHub] carbondata pull request #1359: [CARBONDATA-1480]Min Max Index Example for Da...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1359#discussion_r143721417 --- Diff: examples/spark2/src/main/scala/org/apache/carbondata/examples/MinMaxDataMapFactory.java --- @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.examples; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.core.cache.Cache; +import org.apache.carbondata.core.cache.CacheProvider; +import org.apache.carbondata.core.cache.CacheType; +import org.apache.carbondata.core.datamap.DataMapDistributable; +import org.apache.carbondata.core.datamap.DataMapMeta; +import org.apache.carbondata.core.datamap.TableDataMap; +import org.apache.carbondata.core.datamap.dev.DataMap; +import org.apache.carbondata.core.datamap.dev.DataMapFactory; +import org.apache.carbondata.core.datamap.dev.DataMapWriter; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.events.ChangeEvent; +import org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier; +import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMap; +import org.apache.carbondata.core.indexstore.schema.FilterType; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; + + +/** + * Min Max DataMap Factory + */ +public class MinMaxDataMapFactory implements DataMapFactory { + + private AbsoluteTableIdentifier identifier; + + // segmentId -> list of index file + private Map<String, List> segmentMap = new HashMap<>(); + + @Override + public void init(AbsoluteTableIdentifier identifier, String dataMapName) { +this.identifier = identifier; + } + + /** + * createWriter will return the MinMaxDataWriter. + * @param segmentId + * @return + */ + @Override + public DataMapWriter createWriter(String segmentId) { +return new MinMaxDataWriter(); + } + + /** + * getDataMaps Factory method Initializes the Min Max Data Map and returns. + * @param segmentId + * @return + * @throws IOException + */ + @Override + public List getDataMaps(String segmentId) throws IOException { +List tableBlockIndexUniqueIdentifiers = +segmentMap.get(segmentId); +List dataMapList = new ArrayList<>(); +if (tableBlockIndexUniqueIdentifiers == null) { + tableBlockIndexUniqueIdentifiers = new ArrayList<>(); + CarbonFile[] listFiles = getCarbonIndexFiles(segmentId); + for (int i = 0; i < listFiles.length; i++) { +tableBlockIndexUniqueIdentifiers.add( +new TableBlockIndexUniqueIdentifier(identifier, segmentId, listFiles[i].getName())); + } +} +// Form a dataMap of Type MinMaxDataMap. +MinMaxDataMap dataMap = new MinMaxDataMap(); +try { + dataMap.init(tableBlockIndexUniqueIdentifiers.get(0).getFilePath()); +} catch (MemoryException ex) { + +} +dataMapList.add(dataMap); +return dataMapList; + } + + /** + * Routine to retrieve the carbonIndex. + * @param segmentId + * @return + */ + private CarbonFile[] getCarbonIndexFiles(String segmentId) { --- End diff -- removed ---
[GitHub] carbondata issue #1359: [CARBONDATA-1480]Min Max Index Example for DataMap
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1359 Retest this please. ---
[GitHub] carbondata issue #1359: [CARBONDATA-1480]Min Max Index Example for DataMap
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1359 Retest this please. ---
[GitHub] carbondata issue #1359: [CARBONDATA-1480]Min Max Index Example for DataMap
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1359 Retest this please ---
[GitHub] carbondata pull request #1359: [CARBONDATA-1480]Min Max Index Example for Da...
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1359#discussion_r144249578 --- Diff: core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapWriter.java --- @@ -45,14 +45,13 @@ * @param blockletId sequence number of blocklet in the block */ void onBlockletEnd(int blockletId); - /** * Add the column pages row to the datamap, order of pages is same as `indexColumns` in * DataMapMeta returned in DataMapFactory. * * Implementation should copy the content of `pages` as needed, because `pages` memory * may be freed after this method returns, if using unsafe column page. */ - void onPageAdded(int blockletId, int pageId, ColumnPage[] pages); + void onPageAdded(int blockletId, int pageId, ColumnPage[] pages, String directoryPath); --- End diff -- This is not a directory path, instead the full path to the block. SO this is constant for a block. So removed it from OnPageAdded and passed as a extra parameter to OnBlockStart as this blockPath has to be initialized on every block start. ---
[GitHub] carbondata issue #1359: [CARBONDATA-1480]Min Max Index Example for DataMap
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1359 Retest this please. ---
[GitHub] carbondata issue #1399: [CARBONDATA-1570] [WIP] Lucene Datamap Implementatio...
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1399 Retest this please. ---
[GitHub] carbondata pull request #1329: [WIP] Min Max DataMap Example
GitHub user sounakr opened a pull request: https://github.com/apache/carbondata/pull/1329 [WIP] Min Max DataMap Example Min Max DataMap Example You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata MinMaxDataMap Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/1329.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1329 commit c01d4cd6dfa172564eff2cf53cadab5cb3d2b46a Author: sounakr <soun...@gmail.com> Date: 2017-09-06T04:45:25Z Min Max DataMap 1st Phase ---
[GitHub] carbondata issue #1359: [CARBONDATA-1480]Min Max Index Example for DataMap
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1359 Retest this please ---
[GitHub] carbondata pull request #1399: [WIP] Lucene Datamap Example Implementation.
GitHub user sounakr opened a pull request: https://github.com/apache/carbondata/pull/1399 [WIP] Lucene Datamap Example Implementation. Be sure to do all of the following to help us incorporate your contribution quickly and easily: - [ ] Make sure the PR title is formatted like: `[CARBONDATA-] Description of pull request` - [ ] Make sure tests pass via `mvn clean verify`. (Even better, enable Travis-CI on your fork and ensure the whole test matrix passes). - [ ] Replace `` in the title with the actual Jira issue number, if there is one. - [ ] If this contribution is large, please file an Apache [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.txt). - [ ] Testing done Please provide details on - Whether new unit test cases have been added or why no new tests are required? - What manual testing you have done? - Any additional information to help reviewers in testing this change. - [ ] For large changes, please consider breaking it into sub-tasks under an umbrella JIRA. --- You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata lucene Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/1399.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1399 commit a604b027a58ee1b4b2f7e4465b934159ddc0b016 Author: sounak chakraborty <sounak@sounaks-macbook-pro.local> Date: 2017-10-02T01:58:11Z Lucene Index ---
[GitHub] carbondata issue #1399: [WIP] Lucene Datamap Example Implementation.
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1399 Retest this please. ---
[GitHub] carbondata issue #1399: [CARBONDATA-1570] [WIP] Lucene Datamap Implementatio...
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1399 Retest this please. ---
[GitHub] carbondata issue #1359: [CARBONDATA-1480]Min Max Index Example for DataMap
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1359 Retest this please. ---
[GitHub] carbondata pull request #1431: [WIP] DataMap Access Path Optimization
GitHub user sounakr opened a pull request: https://github.com/apache/carbondata/pull/1431 [WIP] DataMap Access Path Optimization Optimization for DataMap AccessPath decision and execution based on coverage and stats. You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata datamap_accesspath Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/1431.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1431 commit 93b0864ad11ffbe4b5e6695b472fe7a8cc544e30 Author: sounakr <soun...@gmail.com> Date: 2017-10-24T04:52:52Z DataMap Access Path ---
[GitHub] carbondata issue #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1469 Retest this please ---
[GitHub] carbondata issue #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on the issue: https://github.com/apache/carbondata/pull/1469 Retest this please ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153076436 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/schema/AlterTableDropColumnCommand.scala --- @@ -20,9 +20,10 @@ package org.apache.spark.sql.execution.command.schema import scala.collection.JavaConverters._ import scala.collection.mutable.ListBuffer +import org.apache.hadoop.hive.ql.session.SessionState --- End diff -- Done ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153076449 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/schema/AlterTableDropColumnCommand.scala --- @@ -120,10 +121,7 @@ private[sql] case class AlterTableDropColumnCommand( val schemaEvolutionEntry = new SchemaEvolutionEntry(timeStamp) schemaEvolutionEntry.setRemoved(deletedColumnSchema.toList.asJava) AlterTableUtil -.updateSchemaInfo(carbonTable, - schemaEvolutionEntry, - tableInfo)(sparkSession, - sparkSession.sessionState.asInstanceOf[CarbonSessionState]) + .updateSchemaInfo(carbonTable, schemaEvolutionEntry, tableInfo)(sparkSession) --- End diff -- Done ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153076559 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonAnalysisRules.scala --- @@ -24,17 +24,15 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, NamedExpress import org.apache.spark.sql.catalyst.plans.Inner import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ -import org.apache.spark.sql.execution.SparkSqlParser import org.apache.spark.sql.execution.command.mutation.ProjectForDeleteCommand import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.CarbonExpressions.CarbonUnresolvedRelation import org.apache.carbondata.core.constants.CarbonCommonConstants -/** - * Insert into carbon table from other source - */ --- End diff -- Moved Back. ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153076325 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/CarbonExpressions.scala --- @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec +import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Expression} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} +import org.apache.spark.sql.execution.command.DescribeTableCommand +import org.apache.spark.sql.types.DataType + +/** + * This class contains the wrappers of all the case classes which are common + * across spark version 2.1 and 2.2 but have change in parameter list. + * Below are the overriden unapply methods in order to make it work + * across both the version of spark2.1 and spark 2.2 + */ +object CarbonExpressions { + + /** + * unapply method of Cast class. + */ + object MatchCast { +def unapply(expr: Expression): Option[(Attribute, DataType)] = { + if (expr.isInstanceOf[Cast] && expr.asInstanceOf[Cast].child.isInstanceOf[Attribute]) { +Some((expr.asInstanceOf[Cast].child.asInstanceOf[Attribute], expr.asInstanceOf[Cast].child + .dataType)) + } else { +None + } +} + } + + /** + * unapply method of Describe Table format. + */ + object CarbonDescribeTable { +def unapply(plan: LogicalPlan): Option[(TableIdentifier, TablePartitionSpec, Boolean)] = { + if (plan.isInstanceOf[DescribeTableCommand]) { +val describeTableCommand = plan.asInstanceOf[DescribeTableCommand] +if (describeTableCommand.table.isInstanceOf[TableIdentifier] && + describeTableCommand.partitionSpec.isInstanceOf[TablePartitionSpec] && +describeTableCommand.isExtended.isInstanceOf[Boolean]) { + Some(describeTableCommand.table, +describeTableCommand.partitionSpec, +describeTableCommand.isExtended) +} else { + None +} + } else { +None + } +} + } + + /** + * unapply method of SubqueryAlias. + */ + object CarbonSubqueryAlias { +def unapply(plan: LogicalPlan): Option[(String, LogicalPlan)] = { + if (plan.isInstanceOf[SubqueryAlias]) { --- End diff -- Done ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153076329 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/CarbonExpressions.scala --- @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec +import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Expression} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} +import org.apache.spark.sql.execution.command.DescribeTableCommand +import org.apache.spark.sql.types.DataType + +/** + * This class contains the wrappers of all the case classes which are common + * across spark version 2.1 and 2.2 but have change in parameter list. + * Below are the overriden unapply methods in order to make it work + * across both the version of spark2.1 and spark 2.2 + */ +object CarbonExpressions { + + /** + * unapply method of Cast class. + */ + object MatchCast { +def unapply(expr: Expression): Option[(Attribute, DataType)] = { + if (expr.isInstanceOf[Cast] && expr.asInstanceOf[Cast].child.isInstanceOf[Attribute]) { +Some((expr.asInstanceOf[Cast].child.asInstanceOf[Attribute], expr.asInstanceOf[Cast].child + .dataType)) + } else { +None + } +} + } + + /** + * unapply method of Describe Table format. + */ + object CarbonDescribeTable { +def unapply(plan: LogicalPlan): Option[(TableIdentifier, TablePartitionSpec, Boolean)] = { + if (plan.isInstanceOf[DescribeTableCommand]) { +val describeTableCommand = plan.asInstanceOf[DescribeTableCommand] +if (describeTableCommand.table.isInstanceOf[TableIdentifier] && + describeTableCommand.partitionSpec.isInstanceOf[TablePartitionSpec] && +describeTableCommand.isExtended.isInstanceOf[Boolean]) { + Some(describeTableCommand.table, +describeTableCommand.partitionSpec, +describeTableCommand.isExtended) +} else { + None +} + } else { +None + } +} + } + + /** + * unapply method of SubqueryAlias. + */ + object CarbonSubqueryAlias { +def unapply(plan: LogicalPlan): Option[(String, LogicalPlan)] = { + if (plan.isInstanceOf[SubqueryAlias]) { +val subqueryAlias = plan.asInstanceOf[SubqueryAlias] +if (subqueryAlias.alias.isInstanceOf[String] && +subqueryAlias.child.isInstanceOf[LogicalPlan]) { + Some(subqueryAlias.alias, +subqueryAlias.child) +} else { + None +} + } else { +None + } +} + } + + /** + * uapply method of UnresolvedRelation + */ + object CarbonUnresolvedRelation { +def unapply(plan: LogicalPlan): Option[(TableIdentifier)] = { + if (plan.isInstanceOf[UnresolvedRelation]) { --- End diff -- Done ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153076321 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/CarbonExpressions.scala --- @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec +import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Expression} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} +import org.apache.spark.sql.execution.command.DescribeTableCommand +import org.apache.spark.sql.types.DataType + +/** + * This class contains the wrappers of all the case classes which are common + * across spark version 2.1 and 2.2 but have change in parameter list. + * Below are the overriden unapply methods in order to make it work + * across both the version of spark2.1 and spark 2.2 + */ +object CarbonExpressions { + + /** + * unapply method of Cast class. + */ + object MatchCast { +def unapply(expr: Expression): Option[(Attribute, DataType)] = { + if (expr.isInstanceOf[Cast] && expr.asInstanceOf[Cast].child.isInstanceOf[Attribute]) { +Some((expr.asInstanceOf[Cast].child.asInstanceOf[Attribute], expr.asInstanceOf[Cast].child + .dataType)) + } else { +None + } +} + } + + /** + * unapply method of Describe Table format. + */ + object CarbonDescribeTable { +def unapply(plan: LogicalPlan): Option[(TableIdentifier, TablePartitionSpec, Boolean)] = { + if (plan.isInstanceOf[DescribeTableCommand]) { --- End diff -- Done ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153076314 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/CarbonExpressions.scala --- @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec +import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Expression} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} +import org.apache.spark.sql.execution.command.DescribeTableCommand +import org.apache.spark.sql.types.DataType + +/** + * This class contains the wrappers of all the case classes which are common + * across spark version 2.1 and 2.2 but have change in parameter list. + * Below are the overriden unapply methods in order to make it work + * across both the version of spark2.1 and spark 2.2 + */ +object CarbonExpressions { + + /** + * unapply method of Cast class. + */ + object MatchCast { +def unapply(expr: Expression): Option[(Attribute, DataType)] = { + if (expr.isInstanceOf[Cast] && expr.asInstanceOf[Cast].child.isInstanceOf[Attribute]) { --- End diff -- Done. ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153076491 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/DDLStrategy.scala --- @@ -26,13 +26,19 @@ import org.apache.spark.sql.execution.command.management.{AlterTableCompactionCo import org.apache.spark.sql.execution.command.partition.ShowCarbonPartitionsCommand import org.apache.spark.sql.execution.command.schema.{AlterTableAddColumnCommand, AlterTableDataTypeChangeCommand, AlterTableDropColumnCommand, AlterTableRenameTableCommand} import org.apache.spark.sql.hive.execution.command.{CarbonDropDatabaseCommand, CarbonResetCommand, CarbonSetCommand} +import org.apache.spark.sql.CarbonExpressions.{CarbonDescribeTable => DescribeTableCommand} +import org.apache.spark.sql.catalyst.catalog.CatalogTypes import org.apache.carbondata.core.util.CarbonUtil import org.apache.carbondata.spark.exception.MalformedCarbonCommandException /** * Carbon strategies for ddl commands */ +case class CarbonDescribeTableCommand ( --- End diff -- Done ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153076471 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/DDLStrategy.scala --- @@ -26,13 +26,19 @@ import org.apache.spark.sql.execution.command.management.{AlterTableCompactionCo import org.apache.spark.sql.execution.command.partition.ShowCarbonPartitionsCommand import org.apache.spark.sql.execution.command.schema.{AlterTableAddColumnCommand, AlterTableDataTypeChangeCommand, AlterTableDropColumnCommand, AlterTableRenameTableCommand} import org.apache.spark.sql.hive.execution.command.{CarbonDropDatabaseCommand, CarbonResetCommand, CarbonSetCommand} +import org.apache.spark.sql.CarbonExpressions.{CarbonDescribeTable => DescribeTableCommand} +import org.apache.spark.sql.catalyst.catalog.CatalogTypes --- End diff -- Done ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153076462 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala --- @@ -42,6 +43,7 @@ import org.apache.carbondata.spark.CarbonAliasDecoderRelation import org.apache.carbondata.spark.rdd.CarbonScanRDD import org.apache.carbondata.spark.util.CarbonScalaUtil + --- End diff -- Done ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153076600 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonAnalysisRules.scala --- @@ -58,6 +57,8 @@ object CarbonPreInsertionCasts extends Rule[LogicalPlan] { ) } if (child.output.size >= relation.carbonRelation.output.size) { + sparkVersion21 = !CarbonClassReflectionUtils.hasField("query", InsertIntoCarbonTable) --- End diff -- In some places the SparkContext or SparkSession is directly not available. Therefore used the approach of accessing the field and if not present the assume it as other version. ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153076517 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/DDLStrategy.scala --- @@ -155,6 +161,14 @@ class DDLStrategy(sparkSession: SparkSession) extends SparkStrategy { val cmd = CreateDataSourceTableCommand(updatedCatalog, ignoreIfExists = mode == SaveMode.Ignore) ExecutedCommandExec(cmd) :: Nil + case CreateDataSourceTableCommand(table, ignoreIfExists) +if table.provider.get != DDLUtils.HIVE_PROVIDER + && table.provider.get.equals("org.apache.spark.sql.CarbonSource") => +val updatedCatalog = + CarbonSource.updateCatalogTableWithCarbonSchema(table, sparkSession) --- End diff -- Done ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153076529 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/DDLStrategy.scala --- @@ -155,6 +161,14 @@ class DDLStrategy(sparkSession: SparkSession) extends SparkStrategy { val cmd = CreateDataSourceTableCommand(updatedCatalog, ignoreIfExists = mode == SaveMode.Ignore) ExecutedCommandExec(cmd) :: Nil + case CreateDataSourceTableCommand(table, ignoreIfExists) +if table.provider.get != DDLUtils.HIVE_PROVIDER + && table.provider.get.equals("org.apache.spark.sql.CarbonSource") => +val updatedCatalog = + CarbonSource.updateCatalogTableWithCarbonSchema(table, sparkSession) +val cmd = + CreateDataSourceTableCommand(updatedCatalog, ignoreIfExists) --- End diff -- Done ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153094267 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonAnalysisRules.scala --- @@ -95,12 +107,23 @@ case class CarbonIUDAnalysisRule(sparkSession: SparkSession) extends Rule[Logica def prepareTargetReleation(relation: UnresolvedRelation): SubqueryAlias = { val tupleId = UnresolvedAlias(Alias(UnresolvedFunction("getTupleId", Seq.empty, isDistinct = false), "tupleId")()) + + val localAlias = alias match { +case Some(a) => Some(alias.toSeq) +case _ => None + } --- End diff -- Done ---
[GitHub] carbondata pull request #1469: [WIP] Spark-2.2 Carbon Integration - Phase 1
Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1469#discussion_r153094337 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonAnalysisRules.scala --- @@ -95,12 +107,23 @@ case class CarbonIUDAnalysisRule(sparkSession: SparkSession) extends Rule[Logica def prepareTargetReleation(relation: UnresolvedRelation): SubqueryAlias = { val tupleId = UnresolvedAlias(Alias(UnresolvedFunction("getTupleId", Seq.empty, isDistinct = false), "tupleId")()) + + val localAlias = alias match { +case Some(a) => Some(alias.toSeq) +case _ => None + } val projList = Seq( -UnresolvedAlias(UnresolvedStar(Option(table.alias.toSeq))), tupleId) - // include tuple id and rest of the required columns in subqury - SubqueryAlias(table.alias.getOrElse(""), -Project(projList, relation), Option(table.tableIdentifier)) +UnresolvedAlias(UnresolvedStar(localAlias)), tupleId) + + val subqueryAlias = +CarbonClassReflectionUtils + .getSubqueryAlias(sparkSession, +alias, +Project(projList, relation), +Some(table.tableIdentifier)) + subqueryAlias --- End diff -- Done ---