[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102393352
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/AbstractServerConnection.java
 ---
@@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc;
+
+import io.netty.channel.socket.SocketChannel;
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.hadoop.security.HadoopKerberosName;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.slf4j.Logger;
+
+import javax.security.auth.login.LoginException;
+import javax.security.sasl.SaslException;
+import javax.security.sasl.SaslServer;
+import java.io.IOException;
+
+public abstract class AbstractServerConnection
--- End diff --

Fixed all the warnings, thanks for the catch.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102393799
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/ServerAuthenticationHandler.java
 ---
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc.security;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.Internal.EnumLite;
+import com.google.protobuf.InvalidProtocolBufferException;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.ByteBufInputStream;
+import org.apache.drill.exec.proto.UserBitShared.SaslMessage;
+import org.apache.drill.exec.proto.UserBitShared.SaslStatus;
+import org.apache.drill.exec.rpc.RequestHandler;
+import org.apache.drill.exec.rpc.Response;
+import org.apache.drill.exec.rpc.ResponseSender;
+import org.apache.drill.exec.rpc.RpcException;
+import org.apache.drill.exec.rpc.ServerConnection;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import javax.security.sasl.SaslException;
+import javax.security.sasl.SaslServer;
+import java.io.IOException;
+import java.lang.reflect.UndeclaredThrowableException;
+import java.security.PrivilegedExceptionAction;
+import java.util.EnumMap;
+import java.util.Map;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+public class ServerAuthenticationHandler implements RequestHandler {
+  private static final org.slf4j.Logger logger =
+  org.slf4j.LoggerFactory.getLogger(ServerAuthenticationHandler.class);
+
+  private static final ImmutableMap 
RESPONSE_PROCESSORS;
+
+  static {
+final Map map = new 
EnumMap<>(SaslStatus.class);
+map.put(SaslStatus.SASL_START, new SaslStartProcessor());
+map.put(SaslStatus.SASL_IN_PROGRESS, new SaslInProgressProcessor());
+map.put(SaslStatus.SASL_SUCCESS, new SaslSuccessProcessor());
+map.put(SaslStatus.SASL_FAILED, new SaslFailedProcessor());
+RESPONSE_PROCESSORS = Maps.immutableEnumMap(map);
+  }
+
+  private final RequestHandler requestHandler;
+  private final int saslRequestTypeValue;
+  private final T saslResponseType;
+
+  public ServerAuthenticationHandler(final RequestHandler 
requestHandler, final int saslRequestTypeValue,
+ final T saslResponseType) {
+this.requestHandler = requestHandler;
+this.saslRequestTypeValue = saslRequestTypeValue;
+this.saslResponseType = saslResponseType;
+  }
+
+  @Override
+  public void handle(C connection, int rpcType, ByteBuf pBody, ByteBuf 
dBody, ResponseSender sender)
+  throws RpcException {
+final String remoteAddress = connection.getRemoteAddress().toString();
+
+// exchange involves server "challenges" and client "responses" 
(initiated by client)
+if (saslRequestTypeValue == rpcType) {
+  final SaslMessage saslResponse;
+  try {
+saslResponse = SaslMessage.PARSER.parseFrom(new 
ByteBufInputStream(pBody));
+  } catch (final InvalidProtocolBufferException e) {
+handleAuthFailure(connection, remoteAddress, sender, e, 
saslResponseType);
+return;
+  }
+
+  logger.trace("Received SASL message {} from {}", 
saslResponse.getStatus(), remoteAddress);
+  final SaslResponseProcessor processor = 
RESPONSE_PROCESSORS.get(saslResponse.getStatus());
+  if (processor == null) {
+logger.info("Unknown message type from client from {}. Will stop 
authentication.", remoteAddress);
+handleAuthFailure(connection, remoteAddress, sender, new 
SaslException("Received unexpected message"),
+saslResponseType);
+return;
+  }
+
+  final 

[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102395784
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserClient.java ---
@@ -88,22 +129,183 @@ public void submitQuery(UserResultsListener 
resultsListener, RunQuery query) {
 send(queryResultHandler.getWrappedListener(resultsListener), 
RpcType.RUN_QUERY, query, QueryId.class);
   }
 
-  public void connect(RpcConnectionHandler handler, 
DrillbitEndpoint endpoint,
-  UserProperties props, UserBitShared.UserCredentials 
credentials) {
+  public CheckedFuture connect(DrillbitEndpoint 
endpoint, DrillProperties parameters,
+   UserCredentials 
credentials) {
+final FutureHandler handler = new FutureHandler();
 UserToBitHandshake.Builder hsBuilder = UserToBitHandshake.newBuilder()
 .setRpcVersion(UserRpcConfig.RPC_VERSION)
 .setSupportListening(true)
 .setSupportComplexTypes(supportComplexTypes)
 .setSupportTimeout(true)
 .setCredentials(credentials)
-.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName));
+.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName))
+.setSaslSupport(SaslSupport.SASL_AUTH)
+.setProperties(parameters.serializeForServer());
+this.properties = parameters;
+
+
connectAsClient(queryResultHandler.getWrappedConnectionHandler(handler),
+hsBuilder.build(), endpoint.getAddress(), endpoint.getUserPort());
+return handler;
+  }
+
+  /**
+   * Check (after {@link #connect connecting}) if server requires 
authentication.
+   *
+   * @return true if server requires authentication
+   */
+  public boolean serverRequiresAuthentication() {
+return supportedAuthMechs != null;
+  }
+
+  /**
+   * Returns a list of supported authentication mechanism. If called 
before {@link #connect connecting},
+   * returns null. If called after {@link #connect connecting}, returns a 
list of supported mechanisms
+   * iff authentication is required.
+   *
+   * @return list of supported authentication mechanisms
+   */
+  public List getSupportedAuthenticationMechanisms() {
--- End diff --

Applications (maybe in future) can choose a mechanism. Example:
```
userClient.connect(props);
list = userClient.getSupportedAuthenticationMechanisms();
// pick one from 'list', maybe through callback to user
userClient.authenticate(props2);
```


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102396422
  
--- Diff: 
exec/rpc/src/main/java/org/apache/drill/exec/rpc/ClientConnection.java ---
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc;
+
+import javax.security.sasl.SaslClient;
+
+public interface ClientConnection extends RemoteConnection {
--- End diff --

Same comment as one of the above (saslClient will be used for future 
messages on the connection).


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102391399
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ControlConnectionConfig.java
 ---
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc.control;
+
+import org.apache.drill.exec.exception.DrillbitStartupException;
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint;
+import org.apache.drill.exec.rpc.BitConnectionConfig;
+import org.apache.drill.exec.server.BootStrapContext;
+import org.apache.drill.exec.work.batch.ControlMessageHandler;
+
+// package private
+class ControlConnectionConfig extends BitConnectionConfig {
+//  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(ControlConnectionConfig.class);
+
+  private final ControlMessageHandler handler;
+
+  private DrillbitEndpoint localEndpoint;
+
+  ControlConnectionConfig(BufferAllocator allocator, BootStrapContext 
context, ControlMessageHandler handler)
+  throws DrillbitStartupException {
+super(allocator, context);
+this.handler = handler;
+  }
+
+  @Override
+  public String getName() {
+return "control"; // unused
+  }
+
+  ControlMessageHandler getMessageHandler() {
+return handler;
+  }
+
+  void setLocalEndpoint(DrillbitEndpoint endpoint) {
--- End diff --

Hmm what was the previous approach?

I only moved this code from another class. Immutability would be nice, but 
I am not sure what the change entails (maybe a little involved).


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102389973
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ControlClient.java
 ---
@@ -89,14 +90,36 @@ public MessageLite getResponseDefaultInstance(int 
rpcType) throws RpcException {
   }
 
   @Override
-  protected Response handle(ControlConnection connection, int rpcType, 
ByteBuf pBody, ByteBuf dBody) throws RpcException {
-return handler.handle(connection, rpcType, pBody, dBody);
+  protected void handle(ControlConnection connection, int rpcType, ByteBuf 
pBody, ByteBuf dBody,
+ResponseSender sender) throws RpcException {
+connection.getCurrentHandler().handle(connection, rpcType, pBody, 
dBody, sender);
   }
 
   @Override
   protected void validateHandshake(BitControlHandshake handshake) throws 
RpcException {
 if (handshake.getRpcVersion() != ControlRpcConfig.RPC_VERSION) {
-  throw new RpcException(String.format("Invalid rpc version.  Expected 
%d, actual %d.", handshake.getRpcVersion(), ControlRpcConfig.RPC_VERSION));
+  throw new RpcException(String.format("Invalid rpc version.  Expected 
%d, actual %d.",
+  handshake.getRpcVersion(), ControlRpcConfig.RPC_VERSION));
+}
+
+if (handshake.getAuthenticationMechanismsCount() != 0) { // remote 
requires authentication
+  final SaslClient saslClient;
--- End diff --

The saslClient is instantiated differently in user->bit comm. and bit->bit 
comm.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102396712
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ControlClient.java
 ---
@@ -105,8 +128,83 @@ protected void finalizeConnection(BitControlHandshake 
handshake, ControlConnecti
 connection.setEndpoint(handshake.getEndpoint());
   }
 
-  public ControlConnection getConnection() {
-return this.connection;
+  @Override
+  protected  RpcCommand
+  getInitialCommand(final RpcCommand command) {
+if (config.getAuthMechanismToUse() == null) {
+  return super.getInitialCommand(command);
+} else {
+  return new AuthenticationCommand<>(command);
--- End diff --

Thanks for the catch.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102396769
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/data/DataClient.java ---
@@ -75,27 +87,118 @@ public MessageLite getResponseDefaultInstance(int 
rpcType) throws RpcException {
   }
 
   @Override
-  protected Response handle(DataClientConnection connection, int rpcType, 
ByteBuf pBody, ByteBuf dBody) throws RpcException {
+  protected void handle(DataClientConnection connection, int rpcType, 
ByteBuf pBody, ByteBuf dBody,
+ResponseSender sender) throws RpcException {
 throw new UnsupportedOperationException("DataClient is unidirectional 
by design.");
   }
 
   BufferAllocator getAllocator() {
-return allocator;
+return config.getAllocator();
   }
 
   @Override
   protected void validateHandshake(BitServerHandshake handshake) throws 
RpcException {
 if (handshake.getRpcVersion() != DataRpcConfig.RPC_VERSION) {
-  throw new RpcException(String.format("Invalid rpc version.  Expected 
%d, actual %d.", handshake.getRpcVersion(), DataRpcConfig.RPC_VERSION));
+  throw new RpcException(String.format("Invalid rpc version.  Expected 
%d, actual %d.",
+  handshake.getRpcVersion(), DataRpcConfig.RPC_VERSION));
+}
+
+if (handshake.getAuthenticationMechanismsCount() != 0) { // remote 
requires authentication
+  final SaslClient saslClient;
+  try {
+saslClient = 
config.getAuthFactory(handshake.getAuthenticationMechanismsList())
+.createSaslClient(UserGroupInformation.getLoginUser(),
+config.getSaslClientProperties(remoteEndpoint));
+  } catch (final IOException e) {
+throw new RpcException(String.format("Failed to initiate 
authenticate to %s", remoteEndpoint.getAddress()), e);
+  }
+  if (saslClient == null) {
+throw new RpcException("Unexpected failure. Could not initiate 
SASL exchange.");
+  }
+  connection.setSaslClient(saslClient);
+} else {
+  if (config.getAuthMechanismToUse() != null) {
+throw new RpcException(String.format("Drillbit (%s) does not 
require auth, but auth is enabled.",
+remoteEndpoint.getAddress()));
+  }
 }
   }
 
   @Override
-  protected void finalizeConnection(BitServerHandshake handshake, 
DataClientConnection connection) {
+  protected  RpcCommand
+  getInitialCommand(final RpcCommand command) {
+if (config.getAuthMechanismToUse() == null) {
+  return super.getInitialCommand(command);
+} else {
+  return new AuthenticationCommand<>(command);
--- End diff --

Yes.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102396526
  
--- Diff: 
exec/rpc/src/main/java/org/apache/drill/exec/rpc/ServerConnection.java ---
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc;
+
+import javax.security.sasl.SaslServer;
+import java.io.IOException;
+
+public interface ServerConnection extends 
RemoteConnection {
+
+  // init only once
+  void initSaslServer(String mechanismName) throws IOException;
--- End diff --

Same comment as one of the above (saslServer will be used for future 
messages on the connection).


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102391859
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/AuthenticationOutcomeListener.java
 ---
@@ -0,0 +1,238 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc.security;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.Internal.EnumLite;
+import com.google.protobuf.MessageLite;
+import io.netty.buffer.ByteBuf;
+import org.apache.drill.exec.proto.UserBitShared.SaslMessage;
+import org.apache.drill.exec.proto.UserBitShared.SaslStatus;
+import org.apache.drill.exec.rpc.BasicClient;
+import org.apache.drill.exec.rpc.ClientConnection;
+import org.apache.drill.exec.rpc.RpcException;
+import org.apache.drill.exec.rpc.RpcOutcomeListener;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import javax.security.sasl.SaslClient;
+import javax.security.sasl.SaslException;
+import java.io.IOException;
+import java.lang.reflect.UndeclaredThrowableException;
+import java.security.PrivilegedExceptionAction;
+import java.util.EnumMap;
+import java.util.Map;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+public class AuthenticationOutcomeListener
+implements RpcOutcomeListener {
+  private static final org.slf4j.Logger logger =
+  
org.slf4j.LoggerFactory.getLogger(AuthenticationOutcomeListener.class);
+
+  private static final ImmutableMap 
CHALLENGE_PROCESSORS;
+  static {
+final Map map = new 
EnumMap<>(SaslStatus.class);
+map.put(SaslStatus.SASL_IN_PROGRESS, new SaslInProgressProcessor());
+map.put(SaslStatus.SASL_SUCCESS, new SaslSuccessProcessor());
+map.put(SaslStatus.SASL_FAILED, new SaslFailedProcessor());
+CHALLENGE_PROCESSORS = Maps.immutableEnumMap(map);
+  }
+
+  private final BasicClient 
client;
+  private final R connection;
+  private final T saslRpcType;
+  private final UserGroupInformation ugi;
+  private final RpcOutcomeListener rpcOutcomeListener;
+
+  public AuthenticationOutcomeListener(BasicClient client,
+   R connection, T saslRpcType, 
UserGroupInformation ugi,
+   RpcOutcomeListener 
rpcOutcomeListener) {
+this.client = client;
+this.connection = connection;
+this.saslRpcType = saslRpcType;
+this.ugi = ugi;
+this.rpcOutcomeListener = rpcOutcomeListener;
+  }
+
+  public void initiate(final String mechanismName) {
+logger.trace("Initiating SASL exchange.");
+try {
+  final ByteString responseData;
+  final SaslClient saslClient = connection.getSaslClient();
+  if (saslClient.hasInitialResponse()) {
+responseData = ByteString.copyFrom(evaluateChallenge(ugi, 
saslClient, new byte[0]));
+  } else {
+responseData = ByteString.EMPTY;
+  }
+  client.send(new AuthenticationOutcomeListener<>(client, connection, 
saslRpcType, ugi, rpcOutcomeListener),
+  connection,
+  saslRpcType,
+  SaslMessage.newBuilder()
+  .setMechanism(mechanismName)
+  .setStatus(SaslStatus.SASL_START)
+  .setData(responseData)
+  .build(),
+  SaslMessage.class,
+  true /** the connection will not be backed up at this point */);
+  logger.trace("Initiated SASL exchange.");
+} catch (final Exception e) {
+  rpcOutcomeListener.failed(RpcException.mapException(e));
+}
+  }
+
+  @Override
+  public void 

[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102390094
  
--- Diff: common/src/main/java/org/apache/drill/common/KerberosUtil.java ---
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.common;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
+
+public final class KerberosUtil {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(KerberosUtil.class);
+
+  public static final String KERBEROS_SASL_NAME = "GSSAPI";
+
+  public static final String KERBEROS_SIMPLE_NAME = "KERBEROS";
--- End diff --

+ For a user, _auth=KERBEROS_ seems easier than _auth=GSSAPI_, and so I 
thought why not for admin as well (just like other Hadoop projects).
+ [This 
link](http://docs.oracle.com/javase/jndi/tutorial/ldap/security/gssapi.html) 
says _GSS-API SASL mechanism was retrofitted to mean only Kerberos v5._


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #754: DRILL-5275: Sort spill is slow due to repeated allo...

2017-02-21 Thread paul-rogers
Github user paul-rogers commented on a diff in the pull request:

https://github.com/apache/drill/pull/754#discussion_r102382826
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/cache/VectorAccessibleSerializable.java
 ---
@@ -57,6 +57,12 @@
   private BatchSchema.SelectionVectorMode svMode = 
BatchSchema.SelectionVectorMode.NONE;
   private SelectionVector2 sv2;
 
+  /**
+   * Disk I/O buffer used for all reads and writes of DrillBufs.
+   */
+
+  private byte buffer[] = new byte[32*1024];
--- End diff --

The key issues with the location of the buffer are:

* Want to reuse the buffer as much as possible. (Hence, putting it on the 
operator is a good idea.)
* Want to keep interfaces simple (passing the buffer from the operator to 
everything that needs it is awkward.)
* Can only be shared by a single thread, obviously.

After playing around, it turns out we can move the read & write methods 
onto the {{BufferAllocator}} class. This makes them available to anything that 
uses Drillbufs. And, it allows the allocator to hold on to the (shared) I/O 
buffer.

Reflecting on the sort, it becomes clear that such a change is necessary. 
In the merge phase, the sort will have many spill runs open; each will have its 
own {{VectorAccessibleSerializable}}, each with its own buffer. Moving the 
buffer to the allocator reduces the needs to a single, shared, buffer.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #747: DRILL-5257: Run-time control of query profiles

2017-02-21 Thread ppadma
Github user ppadma commented on a diff in the pull request:

https://github.com/apache/drill/pull/747#discussion_r102378871
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/work/foreman/Foreman.java ---
@@ -178,6 +182,19 @@ public Foreman(final WorkerBee bee, final 
DrillbitContext drillbitContext,
 final QueryState initialState = queuingEnabled ? QueryState.ENQUEUED : 
QueryState.STARTING;
 recordNewState(initialState);
 enqueuedQueries.inc();
+
+profileOption = setProfileOption(queryContext.getOptions());
+  }
+
+  private ProfileOption setProfileOption(OptionManager options) {
+if (! options.getOption(ExecConstants.ENABLE_QUERY_PROFILE_VALIDATOR)) 
{
--- End diff --

nit: extra space after !


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #747: DRILL-5257: Run-time control of query profiles

2017-02-21 Thread ppadma
Github user ppadma commented on a diff in the pull request:

https://github.com/apache/drill/pull/747#discussion_r102378651
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java ---
@@ -413,4 +413,15 @@
 
   String DYNAMIC_UDF_SUPPORT_ENABLED = "exec.udf.enable_dynamic_support";
   BooleanValidator DYNAMIC_UDF_SUPPORT_ENABLED_VALIDATOR = new 
BooleanValidator(DYNAMIC_UDF_SUPPORT_ENABLED, true, true);
+
+  /**
+   * Option to save query profiles. If false, no query profile will be 
saved
+   * for any query.
+   */
+  String ENABLE_QUERY_PROFILE_OPTION = "exec.query_profile.enable";
--- End diff --

After reading your comment, it feels like exec.query_profile.save would be 
a better choice. 


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #754: DRILL-5275: Sort spill is slow due to repeated allo...

2017-02-21 Thread paul-rogers
Github user paul-rogers commented on a diff in the pull request:

https://github.com/apache/drill/pull/754#discussion_r102378905
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/cache/VectorAccessibleSerializable.java
 ---
@@ -57,6 +57,12 @@
   private BatchSchema.SelectionVectorMode svMode = 
BatchSchema.SelectionVectorMode.NONE;
   private SelectionVector2 sv2;
 
+  /**
+   * Disk I/O buffer used for all reads and writes of DrillBufs.
+   */
+
+  private byte buffer[] = new byte[32*1024];
--- End diff --

Read a 18 GB file on disk using just a test program that uses various size 
buffers.

{code}
32K buffer: Rate: 799 MB/s
64K buffer: Rate: 766 MB/s
{code}

So, seems no advantage of a larger buffer. (Tests with a smaller buffer do 
slow things down, hence the 32K size.)

On direct memory: can't use direct memory as the fundamental problem is 
that data is in a direct memory DrillBuf, and must be copied to heap memory for 
writing. The original code does the copy by allocating a heap buffer the same 
size as the vector (16 MB, 32 MB or larger.) This code does the copying by 
reusing the same buffer over and over.

No need to hold the buffer on the operator. This class is used for an 
entire spill/read session.

What may be an issue, however, is the merge phase of a sort when many files 
are open and so many buffers are created. The reads are synchronous, so they 
could share a buffer.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #747: DRILL-5257: Run-time control of query profiles

2017-02-21 Thread paul-rogers
Github user paul-rogers commented on a diff in the pull request:

https://github.com/apache/drill/pull/747#discussion_r102378009
  
--- Diff: exec/java-exec/src/main/resources/drill-module.conf ---
@@ -162,6 +162,9 @@ drill.exec: {
   size: 1
 }
   },
+  foreman : {
--- End diff --

Done


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #747: DRILL-5257: Run-time control of query profiles

2017-02-21 Thread paul-rogers
Github user paul-rogers commented on a diff in the pull request:

https://github.com/apache/drill/pull/747#discussion_r102377957
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/work/foreman/Foreman.java ---
@@ -853,7 +875,9 @@ public void close() throws Exception {
   // storage write; query completion occurs in parallel with profile
   // persistence.
 
-  queryManager.writeFinalProfile(uex);
+  if (profileOption == ProfileOption.ASYNC) {
+writeProfile(uex);
--- End diff --

I dislike redundant code, but sure, it's just one line, I can paste it 
twice.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #747: DRILL-5257: Run-time control of query profiles

2017-02-21 Thread paul-rogers
Github user paul-rogers commented on a diff in the pull request:

https://github.com/apache/drill/pull/747#discussion_r10236
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java ---
@@ -413,4 +413,19 @@
 
   String DYNAMIC_UDF_SUPPORT_ENABLED = "exec.udf.enable_dynamic_support";
   BooleanValidator DYNAMIC_UDF_SUPPORT_ENABLED_VALIDATOR = new 
BooleanValidator(DYNAMIC_UDF_SUPPORT_ENABLED, true, true);
+
+  /**
+   * Option to save query profiles.
+   * 
+   * async (default): Write query profile after last response
+   * to the client.
+   * sync: Write the query profile before the last response to
+   * the client. Very useful for testing to avoid race conditions.
+   * none: Don't write the query profile at all. Useful when running
+   * many production jobs that do not need to be reviewed.
+   * 
+   */
+  String QUERY_PROFILE_OPTION = "exec.profile";
--- End diff --

Revised to be two boolean flags.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #750: DRILL-5273: CompliantTextReader excessive memory us...

2017-02-21 Thread chunhui-shi
Github user chunhui-shi commented on a diff in the pull request:

https://github.com/apache/drill/pull/750#discussion_r102377194
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
 ---
@@ -118,12 +118,21 @@ public boolean apply(@Nullable SchemaPath path) {
* @param outputMutator  Used to create the schema in the output record 
batch
* @throws ExecutionSetupException
*/
+  @SuppressWarnings("resource")
   @Override
   public void setup(OperatorContext context, OutputMutator outputMutator) 
throws ExecutionSetupException {
 
 oContext = context;
-readBuffer = context.getManagedBuffer(READ_BUFFER);
-whitespaceBuffer = context.getManagedBuffer(WHITE_SPACE_BUFFER);
+// Note: DO NOT use managed buffers here. They remain in existence
+// until the fragment is shut down. The buffers here are large.
--- End diff --

I think the reason you chose to use context.getAllocator() was you don't 
want to fragmentize managed buffer?
Otherwise you might just call readBuffer.close()? Was there any problem 
with managed buffer's release? Just curious about the "DO NOT use managed 
buffer here" part. Besides that, +1.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill issue #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on the issue:

https://github.com/apache/drill/pull/729
  
I have addressed the comments from the earlier pull request. Please take a 
look


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill issue #755: DRILL-5270: Improve loading of profiles listing in the Web...

2017-02-21 Thread kkhatua
Github user kkhatua commented on the issue:

https://github.com/apache/drill/pull/755
  
For 8266 profiles, when measured from Chrome browser's Network tool:
```
Load First Time: 2.43s 
Load Second Time (no new profiles): 829ms
```


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #613: DRILL-4730: Update JDBC DatabaseMetaData implementa...

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/613#discussion_r102364337
  
--- Diff: 
exec/jdbc/src/main/java/org/apache/drill/jdbc/impl/DrillMetaImpl.java ---
@@ -78,19 +111,237 @@ private MetaResultSet s(String s) {
 }
   }
 
+  /** Information about type mapping. */
+  private static class TypeInfo {
+private static Map MAPPING = 
ImmutableMap. builder()
+.put(boolean.class, of(Types.BOOLEAN, "BOOLEAN"))
+.put(Boolean.class, of(Types.BOOLEAN, "BOOLEAN"))
+.put(Byte.TYPE, of(Types.TINYINT, "TINYINT"))
+.put(Byte.class, of(Types.TINYINT, "TINYINT"))
+.put(Short.TYPE, of(Types.SMALLINT, "SMALLINT"))
+.put(Short.class, of(Types.SMALLINT, "SMALLINT"))
+.put(Integer.TYPE, of(Types.INTEGER, "INTEGER"))
+.put(Integer.class, of(Types.INTEGER, "INTEGER"))
+.put(Long.TYPE,  of(Types.BIGINT, "BIGINT"))
+.put(Long.class, of(Types.BIGINT, "BIGINT"))
+.put(Float.TYPE, of(Types.FLOAT, "FLOAT"))
+.put(Float.class,  of(Types.FLOAT, "FLOAT"))
+.put(Double.TYPE,  of(Types.DOUBLE, "DOUBLE"))
+.put(Double.class, of(Types.DOUBLE, "DOUBLE"))
+.put(String.class, of(Types.VARCHAR, "CHARACTER VARYING"))
+.put(java.sql.Date.class, of(Types.DATE, "DATE"))
+.put(Time.class, of(Types.TIME, "TIME"))
+.put(Timestamp.class, of(Types.TIMESTAMP, "TIMESTAMP"))
+.build();
+
+private final int sqlType;
+private final String sqlTypeName;
+
+public TypeInfo(int sqlType, String sqlTypeName) {
+  this.sqlType = sqlType;
+  this.sqlTypeName = sqlTypeName;
+}
 
+private static TypeInfo of(int sqlType, String sqlTypeName) {
+  return new TypeInfo(sqlType, sqlTypeName);
+}
 
-  @Override
-  protected  MetaResultSet createEmptyResultSet(Class clazz) {
-return s(
-"SELECT '' AS `Interim zero-row result set` "  // dummy row type
-+ "FROM INFORMATION_SCHEMA.CATALOGS "  // any table
-+ "LIMIT 0"// zero rows
-);
+public static TypeInfo get(Class clazz) {
+  return MAPPING.get(clazz);
+}
   }
 
-  @Override
-  public MetaResultSet getTables(String catalog, final Pat schemaPattern, 
final Pat tableNamePattern,
+  /** Metadata describing a column.
+   * Copied from Avatica with several fixes
+   * */
+  public static class MetaColumn implements Named {
+public final String tableCat;
+public final String tableSchem;
+public final String tableName;
+public final String columnName;
+public final int dataType;
+public final String typeName;
+public final Integer columnSize;
+public final Integer bufferLength = null;
+public final Integer decimalDigits;
+public final Integer numPrecRadix;
+public final int nullable;
+public final String remarks = null;
+public final String columnDef = null;
+public final Integer sqlDataType = null;
+public final Integer sqlDatetimeSub = null;
+public final Integer charOctetLength;
+public final int ordinalPosition;
+@NotNull
+public final String isNullable;
+public final String scopeCatalog = null;
+public final String scopeSchema = null;
+public final String scopeTable = null;
+public final Short sourceDataType = null;
+@NotNull
+public final String isAutoincrement = "";
+@NotNull
+public final String isGeneratedcolumn = "";
+
+public MetaColumn(
+String tableCat,
+String tableSchem,
+String tableName,
+String columnName,
+int dataType,
+String typeName,
+Integer columnSize,
+Integer decimalDigits,
+Integer numPrecRadix,
+int nullable,
+Integer charOctetLength,
+int ordinalPosition,
+String isNullable) {
+  this.tableCat = tableCat;
+  this.tableSchem = tableSchem;
+  this.tableName = tableName;
+  this.columnName = columnName;
+  this.dataType = dataType;
+  this.typeName = typeName;
+  this.columnSize = columnSize;
+  this.decimalDigits = decimalDigits;
+  this.numPrecRadix = numPrecRadix;
+  this.nullable = nullable;
+  this.charOctetLength = charOctetLength;
+  this.ordinalPosition = ordinalPosition;
+  this.isNullable = isNullable;
+}
+
+@Override
+public String getName() {
+  return columnName;
+}
  

[GitHub] drill issue #755: DRILL-5270: Improve loading of profiles listing in the Web...

2017-02-21 Thread kkhatua
Github user kkhatua commented on the issue:

https://github.com/apache/drill/pull/755
  
A summary of the performance is available in this 
[comment](https://issues.apache.org/jira/browse/DRILL-5270?focusedCommentId=15877119=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-15877119)
 on the JIRA (DRILL-5270)



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #613: DRILL-4730: Update JDBC DatabaseMetaData implementa...

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/613#discussion_r102363803
  
--- Diff: 
exec/jdbc/src/main/java/org/apache/drill/jdbc/impl/DrillCursor.java ---
@@ -24,33 +24,260 @@
 import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.LinkedBlockingDeque;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.calcite.avatica.AvaticaResultSet;
+import org.apache.calcite.avatica.AvaticaStatement;
 import org.apache.calcite.avatica.ColumnMetaData;
+import org.apache.calcite.avatica.Meta;
+import org.apache.calcite.avatica.Meta.Signature;
 import org.apache.calcite.avatica.util.ArrayImpl.Factory;
 import org.apache.calcite.avatica.util.Cursor;
 import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.exec.client.DrillClient;
 import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.proto.UserBitShared.QueryId;
+import org.apache.drill.exec.proto.UserBitShared.QueryResult.QueryState;
+import org.apache.drill.exec.proto.UserBitShared.QueryType;
+import org.apache.drill.exec.proto.helper.QueryIdHelper;
 import org.apache.drill.exec.record.BatchSchema;
 import org.apache.drill.exec.record.RecordBatchLoader;
+import org.apache.drill.exec.rpc.ConnectionThrottle;
 import org.apache.drill.exec.rpc.user.QueryDataBatch;
+import org.apache.drill.exec.rpc.user.UserResultsListener;
 import org.apache.drill.exec.store.ischema.InfoSchemaConstants;
+import org.apache.drill.jdbc.SchemaChangeListener;
 import org.slf4j.Logger;
 
+import com.google.common.collect.Queues;
+
 
 class DrillCursor implements Cursor {
+
+  
+  // ResultsListener:
+  static class ResultsListener implements UserResultsListener {
+private static final org.slf4j.Logger logger =
+org.slf4j.LoggerFactory.getLogger(ResultsListener.class);
+
+private static volatile int nextInstanceId = 1;
+
+/** (Just for logging.) */
+private final int instanceId;
+
+private final int batchQueueThrottlingThreshold;
+
+/** (Just for logging.) */
+private volatile QueryId queryId;
+
+/** (Just for logging.) */
+private int lastReceivedBatchNumber;
+/** (Just for logging.) */
+private int lastDequeuedBatchNumber;
+
+private volatile UserException executionFailureException;
+
+// TODO:  Revisit "completed".  Determine and document exactly what it
+// means.  Some uses imply that it means that incoming messages 
indicate
+// that the _query_ has _terminated_ (not necessarily _completing_
+// normally), while some uses imply that it's some other state of the
+// ResultListener.  Some uses seem redundant.)
+volatile boolean completed = false;
+
+/** Whether throttling of incoming data is active. */
+private final AtomicBoolean throttled = new AtomicBoolean( false );
+private volatile ConnectionThrottle throttle;
+
+private volatile boolean closed = false;
+
+private final CountDownLatch firstMessageReceived = new 
CountDownLatch(1);
+
+final LinkedBlockingDeque batchQueue =
+Queues.newLinkedBlockingDeque();
+
+
+/**
+ * ...
+ * @param  batchQueueThrottlingThreshold
+ * queue size threshold for throttling server
+ */
+ResultsListener( int batchQueueThrottlingThreshold ) {
+  instanceId = nextInstanceId++;
+  this.batchQueueThrottlingThreshold = batchQueueThrottlingThreshold;
+  logger.debug( "[#{}] Query listener created.", instanceId );
+}
+
+/**
+ * Starts throttling if not currently throttling.
+ * @param  throttle  the "throttlable" object to throttle
+ * @return  true if actually started (wasn't throttling already)
+ */
+private boolean startThrottlingIfNot( ConnectionThrottle throttle ) {
+  final boolean started = throttled.compareAndSet( false, true );
+  if ( started ) {
+this.throttle = throttle;
+throttle.setAutoRead(false);
+  }
+  return started;
+}
+
+/**
+ * Stops throttling if currently throttling.
+ * @return  true if actually stopped (was throttling)
+ */
+private boolean stopThrottlingIfSo() {
+  final boolean stopped = throttled.compareAndSet( true, false );
+  if ( stopped ) {
+throttle.setAutoRead(true);

[GitHub] drill pull request #755: DRILL-5270: Improve loading of profiles listing in ...

2017-02-21 Thread kkhatua
GitHub user kkhatua opened a pull request:

https://github.com/apache/drill/pull/755

DRILL-5270: Improve loading of profiles listing in the WebUI

Using Hadoop API to filter and reduce profile list load time
Using an in-memory treeSet-based cache, maintain the list of most recent
profiles.

You can merge this pull request into a Git repository by running:

$ git pull https://github.com/kkhatua/drill DRILL-5270

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/drill/pull/755.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #755


commit a5f20643850ad399622e5df9a6f37713545dc7a6
Author: Kunal Khatua 
Date:   2017-02-22T01:20:48Z

DRILL-5270: Improve loading of profiles listing in the WebUI

Using Hadoop API to filter and reduce profile list load time
Using an in-memory treeSet-based cache, maintain the list of most recent
profiles.




---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL

2017-02-21 Thread sohami
Github user sohami commented on a diff in the pull request:

https://github.com/apache/drill/pull/752#discussion_r102362622
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java
 ---
@@ -109,7 +121,37 @@ public MockSchema(MockStorageEngine engine) {
 
 @Override
 public Table getTable(String name) {
-  Pattern p = Pattern.compile("(\\w+)_(\\d+)(k|m)?", 
Pattern.CASE_INSENSITIVE);
+  if (name.toLowerCase().endsWith(".json") ) {
+return getConfigFile(name);
+  } else {
+return getDirectTable(name);
+  }
+}
+
+private Table getConfigFile(String name) {
+  final URL url = Resources.getResource(name);
+  if (url == null) {
+throw new IllegalArgumentException(
+"Unable to find mock table config file " + name);
+  }
+  MockTableDef mockTableDefn;
+  try {
+String json = Resources.toString(url, Charsets.UTF_8);
+final ObjectMapper mapper = new ObjectMapper();
+mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, 
true);
+mockTableDefn = mapper.readValue(json, MockTableDef.class);
+  } catch (JsonParseException e) {
+throw new IllegalArgumentException( "Unable to parse mock table 
definition file: " + name, e );
+  } catch (JsonMappingException e) {
+throw new IllegalArgumentException( "Unable to Jackson deserialize 
mock table definition file: " + name, e );
+  } catch (IOException e) {
+throw new IllegalArgumentException( "Unable to read mock table 
definition file: " + name, e );
+  }
+  return new DynamicDrillTable(engine, this.name, 
mockTableDefn.getEntries() );
--- End diff --

Please remove extra space before last` )`. there are few other places too 
like line 169 in this file, line 199 (MockTableDef.java), etc.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL

2017-02-21 Thread sohami
Github user sohami commented on a diff in the pull request:

https://github.com/apache/drill/pull/752#discussion_r102336171
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/package-info.java 
---
@@ -60,14 +62,26 @@
  * The mode is one of the supported Drill
  * {@link DataMode} names: usually OPTIONAL or 
REQUIRED.
  * 
+ * 
+ * Recent extensions include:
+ * 
+ * repeat in either the "entry" or "record" elements allow
--- End diff --

I just found repeat definition in `MockColumn` but not in `MockScanEntry` 
whereas here in comment and example `example-mock.json` we are showing repeat 
property at entry level. Is this work in progress ?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL

2017-02-21 Thread sohami
Github user sohami commented on a diff in the pull request:

https://github.com/apache/drill/pull/752#discussion_r102320948
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java
 ---
@@ -89,14 +85,30 @@ public boolean supportsRead() {
 return true;
   }
 
-//  public static class ImplicitTable extends DynamicDrillTable {
-//
-//public ImplicitTable(StoragePlugin plugin, String storageEngineName,
-//Object selection) {
-//  super(plugin, storageEngineName, selection);
-//}
-//
-//  }
+  /**
+   * Resolves table names within the mock data source. Tables can be of 
two forms:
+   * 
+   * _
+   * 
+   * Where the "name" can be anything, "n" is the number of rows, and 
"unit" is
+   * the units for the row count: non, K (thousand) or M (million).
+   * 
+   * The above form generates a table directly with no other information 
needed.
+   * Column names must be provided, and must be of the form:
+   * 
+   * _
+   * 
+   * Where the name can be anything, the type must be i (integer), d 
(double)
+   * or s (string, AKA VarChar). The length is needed only for string 
fields.
--- End diff --

how about boolean (b) as a type ?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL

2017-02-21 Thread sohami
Github user sohami commented on a diff in the pull request:

https://github.com/apache/drill/pull/752#discussion_r102294277
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/BooleanGen.java 
---
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import java.util.Random;
+
+import org.apache.drill.exec.vector.BitVector;
+import org.apache.drill.exec.vector.ValueVector;
+
+public class BooleanGen implements FieldGen {
+
+  Random rand = new Random( );
+
+  @Override
+  public void setup(ColumnDef colDef) { }
+
+  public int value( ) {
--- End diff --

Extra space between `()`


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL

2017-02-21 Thread sohami
Github user sohami commented on a diff in the pull request:

https://github.com/apache/drill/pull/752#discussion_r102298318
  
--- Diff: 
exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/ExpressionInterpreterTest.java
 ---
@@ -124,7 +125,7 @@ public void interpreterDateTest() throws Exception {
 final BitControl.PlanFragment planFragment = 
BitControl.PlanFragment.getDefaultInstance();
 final QueryContextInformation queryContextInfo = 
planFragment.getContext();
 final inttimeZoneIndex = 
queryContextInfo.getTimeZone();
-final org.joda.time.DateTimeZone timeZone = 
org.joda.time.DateTimeZone.forID(org.apache.drill.exec.expr.fn.impl.DateUtility.getTimeZone(timeZoneIndex));
+final DateTimeZone timeZone =
DateTimeZone.forID(org.apache.drill.exec.expr.fn.impl.DateUtility.getTimeZone(timeZoneIndex));
--- End diff --

Please remove extra space after `=`


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL

2017-02-21 Thread sohami
Github user sohami commented on a diff in the pull request:

https://github.com/apache/drill/pull/752#discussion_r102360734
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockGroupScanPOP.java
 ---
@@ -75,20 +76,50 @@
*/
 
   private boolean extended;
+  private ScanStats scanStats = ScanStats.TRIVIAL_TABLE;
 
   @JsonCreator
   public MockGroupScanPOP(@JsonProperty("url") String url,
-  @JsonProperty("extended") Boolean extended,
   @JsonProperty("entries") List readEntries) {
 super((String) null);
 this.readEntries = readEntries;
 this.url = url;
-this.extended = extended == null ? false : extended;
+
+// Compute decent row-count stats for this mock data source so that
+// the planner is "fooled" into thinking that this operator wil do
+// disk I/O.
+
+int rowCount = 0;
+int rowWidth = 0;
+for (MockScanEntry entry : readEntries) {
+  rowCount += entry.getRecords();
+  int width = 0;
+  if (entry.getTypes() == null) {
+width = 50;
+  } else {
+for (MockColumn col : entry.getTypes()) {
+  int colWidth = 0;
+  if (col.getWidthValue() == 0) {
+colWidth = TypeHelper.getSize(col.getMajorType());
+  } else {
+colWidth = col.getWidthValue();
+  }
+  colWidth *= col.getRepeatCount();
+  width += colWidth;
+}
+  }
+  rowWidth = Math.max(rowWidth, width);
--- End diff --

`rowWidth` seems to be `maxRowWidth` and `width` is `rowWidth`. Can we 
please rename these ?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #613: DRILL-4730: Update JDBC DatabaseMetaData implementa...

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/613#discussion_r102363018
  
--- Diff: 
exec/jdbc/src/test/java/org/apache/drill/jdbc/DatabaseMetaDataGetColumnsTest.java
 ---
@@ -2712,22 +2712,18 @@ public void 
test_SOURCE_DATA_TYPE_hasSameNameAndLabel() throws SQLException {
 
   @Test
   public void test_SOURCE_DATA_TYPE_hasRightTypeString() throws 
SQLException {
-// TODO(DRILL-2135):  Resolve workaround:
-//assertThat( rsMetadata.getColumnTypeName( 22 ), equalTo( "SMALLINT" 
) );
-assertThat( rowsMetadata.getColumnTypeName( 22 ), equalTo( "INTEGER" ) 
);
+assertThat( rowsMetadata.getColumnTypeName( 22 ), equalTo( "SMALLINT" 
) );
--- End diff --

no, it isn't unfortunately


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102304062
  
--- Diff: 
exec/rpc/src/main/java/org/apache/drill/exec/rpc/ClientConnection.java ---
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc;
+
+import javax.security.sasl.SaslClient;
+
+public interface ClientConnection extends RemoteConnection {
--- End diff --

Is `ClientConnection` the right place to pass along the saslClient vs 
`AuthenticationCommand`?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102344639
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/ServerAuthenticationHandler.java
 ---
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc.security;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.Internal.EnumLite;
+import com.google.protobuf.InvalidProtocolBufferException;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.ByteBufInputStream;
+import org.apache.drill.exec.proto.UserBitShared.SaslMessage;
+import org.apache.drill.exec.proto.UserBitShared.SaslStatus;
+import org.apache.drill.exec.rpc.RequestHandler;
+import org.apache.drill.exec.rpc.Response;
+import org.apache.drill.exec.rpc.ResponseSender;
+import org.apache.drill.exec.rpc.RpcException;
+import org.apache.drill.exec.rpc.ServerConnection;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import javax.security.sasl.SaslException;
+import javax.security.sasl.SaslServer;
+import java.io.IOException;
+import java.lang.reflect.UndeclaredThrowableException;
+import java.security.PrivilegedExceptionAction;
+import java.util.EnumMap;
+import java.util.Map;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+public class ServerAuthenticationHandler implements RequestHandler {
+  private static final org.slf4j.Logger logger =
+  org.slf4j.LoggerFactory.getLogger(ServerAuthenticationHandler.class);
+
+  private static final ImmutableMap 
RESPONSE_PROCESSORS;
+
+  static {
+final Map map = new 
EnumMap<>(SaslStatus.class);
+map.put(SaslStatus.SASL_START, new SaslStartProcessor());
+map.put(SaslStatus.SASL_IN_PROGRESS, new SaslInProgressProcessor());
+map.put(SaslStatus.SASL_SUCCESS, new SaslSuccessProcessor());
+map.put(SaslStatus.SASL_FAILED, new SaslFailedProcessor());
+RESPONSE_PROCESSORS = Maps.immutableEnumMap(map);
+  }
+
+  private final RequestHandler requestHandler;
+  private final int saslRequestTypeValue;
+  private final T saslResponseType;
+
+  public ServerAuthenticationHandler(final RequestHandler 
requestHandler, final int saslRequestTypeValue,
+ final T saslResponseType) {
+this.requestHandler = requestHandler;
+this.saslRequestTypeValue = saslRequestTypeValue;
+this.saslResponseType = saslResponseType;
+  }
+
+  @Override
+  public void handle(C connection, int rpcType, ByteBuf pBody, ByteBuf 
dBody, ResponseSender sender)
+  throws RpcException {
+final String remoteAddress = connection.getRemoteAddress().toString();
+
+// exchange involves server "challenges" and client "responses" 
(initiated by client)
+if (saslRequestTypeValue == rpcType) {
+  final SaslMessage saslResponse;
+  try {
+saslResponse = SaslMessage.PARSER.parseFrom(new 
ByteBufInputStream(pBody));
+  } catch (final InvalidProtocolBufferException e) {
+handleAuthFailure(connection, remoteAddress, sender, e, 
saslResponseType);
+return;
+  }
+
+  logger.trace("Received SASL message {} from {}", 
saslResponse.getStatus(), remoteAddress);
+  final SaslResponseProcessor processor = 
RESPONSE_PROCESSORS.get(saslResponse.getStatus());
+  if (processor == null) {
+logger.info("Unknown message type from client from {}. Will stop 
authentication.", remoteAddress);
+handleAuthFailure(connection, remoteAddress, sender, new 
SaslException("Received unexpected message"),
+saslResponseType);
+return;
+  }
+
+  final 

[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102295865
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/data/DataClient.java ---
@@ -75,27 +87,118 @@ public MessageLite getResponseDefaultInstance(int 
rpcType) throws RpcException {
   }
 
   @Override
-  protected Response handle(DataClientConnection connection, int rpcType, 
ByteBuf pBody, ByteBuf dBody) throws RpcException {
+  protected void handle(DataClientConnection connection, int rpcType, 
ByteBuf pBody, ByteBuf dBody,
+ResponseSender sender) throws RpcException {
 throw new UnsupportedOperationException("DataClient is unidirectional 
by design.");
   }
 
   BufferAllocator getAllocator() {
-return allocator;
+return config.getAllocator();
   }
 
   @Override
   protected void validateHandshake(BitServerHandshake handshake) throws 
RpcException {
 if (handshake.getRpcVersion() != DataRpcConfig.RPC_VERSION) {
-  throw new RpcException(String.format("Invalid rpc version.  Expected 
%d, actual %d.", handshake.getRpcVersion(), DataRpcConfig.RPC_VERSION));
+  throw new RpcException(String.format("Invalid rpc version.  Expected 
%d, actual %d.",
+  handshake.getRpcVersion(), DataRpcConfig.RPC_VERSION));
+}
+
+if (handshake.getAuthenticationMechanismsCount() != 0) { // remote 
requires authentication
+  final SaslClient saslClient;
+  try {
+saslClient = 
config.getAuthFactory(handshake.getAuthenticationMechanismsList())
+.createSaslClient(UserGroupInformation.getLoginUser(),
+config.getSaslClientProperties(remoteEndpoint));
+  } catch (final IOException e) {
+throw new RpcException(String.format("Failed to initiate 
authenticate to %s", remoteEndpoint.getAddress()), e);
+  }
+  if (saslClient == null) {
+throw new RpcException("Unexpected failure. Could not initiate 
SASL exchange.");
+  }
+  connection.setSaslClient(saslClient);
+} else {
+  if (config.getAuthMechanismToUse() != null) {
+throw new RpcException(String.format("Drillbit (%s) does not 
require auth, but auth is enabled.",
+remoteEndpoint.getAddress()));
+  }
 }
   }
 
   @Override
-  protected void finalizeConnection(BitServerHandshake handshake, 
DataClientConnection connection) {
+  protected  RpcCommand
+  getInitialCommand(final RpcCommand command) {
+if (config.getAuthMechanismToUse() == null) {
+  return super.getInitialCommand(command);
+} else {
+  return new AuthenticationCommand<>(command);
--- End diff --

shouldn't we use `super.getInitialCommand(command)` here too?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r101598598
  
--- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp ---
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include "saslAuthenticatorImpl.hpp"
+
+#include "drillClientImpl.hpp"
+#include "logger.hpp"
+
+namespace Drill {
+
+#define DEFAULT_SERVICE_NAME "drill"
+
+#define KERBEROS_SIMPLE_NAME "kerberos"
+#define KERBEROS_SASL_NAME "gssapi"
+#define PLAIN_NAME "plain"
+
+const std::map 
SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of
+(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME)
+(PLAIN_NAME, PLAIN_NAME)
+;
+
+boost::mutex SaslAuthenticatorImpl::s_mutex;
+bool SaslAuthenticatorImpl::s_initialized = false;
+
+SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* 
const properties) :
+m_properties(properties), m_pConnection(NULL), m_pwd_secret(NULL) {
+
+if (!s_initialized) {
+boost::lock_guard 
lock(SaslAuthenticatorImpl::s_mutex);
+if (!s_initialized) {
+// set plugin path if provided
+if (DrillClientConfig::getSaslPluginPath()) {
+char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath());
+sasl_set_path(0, saslPluginPath);
+}
+
+// loads all the available mechanism and factories in the 
sasl_lib referenced by the path
+const int err = sasl_client_init(NULL);
+if (0 != err) {
+std::stringstream errMsg;
+errMsg << "Failed to load authentication libraries. code: 
" << err;
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << errMsg << std::endl;)
+throw std::runtime_error(errMsg.str().c_str());
+}
+{ // for debugging purposes
+const char **mechanisms = sasl_global_listmech();
+int i = 0;
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "SASL mechanisms 
available on client: " << std::endl;)
+while (mechanisms[i] != NULL) {
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << i << " : " << 
mechanisms[i] << std::endl;)
+i++;
+}
+}
+s_initialized = true;
+}
+}
+}
+
+SaslAuthenticatorImpl::~SaslAuthenticatorImpl() {
+if (m_pwd_secret) {
+free(m_pwd_secret);
+}
+// may be used to negotiated security layers before disposing in the 
future
+if (m_pConnection) {
+sasl_dispose(_pConnection);
+}
+m_pConnection = NULL;
+}
+
+typedef int (*sasl_callback_proc_t)(void); // see sasl_callback_ft
+
+int SaslAuthenticatorImpl::userNameCallback(void *context, int id, const 
char **result, unsigned *len) {
+const std::string* const username = static_cast(context);
+
+if ((SASL_CB_USER == id || SASL_CB_AUTHNAME == id)
+&& username != NULL) {
+*result = username->c_str();
+// *len = (unsigned int) username->length();
+}
+return SASL_OK;
+}
+
+int SaslAuthenticatorImpl::passwordCallback(sasl_conn_t *conn, void 
*context, int id, sasl_secret_t **psecret) {
+const SaslAuthenticatorImpl* const authenticator = static_cast(context);
+
+if (SASL_CB_PASS == id) {
+*psecret = authenticator->m_pwd_secret;
+}
+return SASL_OK;
+}
+
+int SaslAuthenticatorImpl::init(const std::vector& 
mechanisms, exec::shared::SaslMessage& response) {
+// find and set parameters
+std::string authMechanismToUse;
+std::string serviceName;
+std::string serviceHost;
+for (size_t i = 0; i < m_properties->size(); 

[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r101116414
  
--- Diff: 
common/src/main/java/org/apache/drill/common/map/CaseInsensitiveMap.java ---
@@ -55,6 +55,18 @@
   }
 
   /**
+   * Returns a new instance of {@link java.util.HashMap}, with key 
case-insensitivity, of expected size.
--- End diff --

okay :)


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102347409
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserClient.java ---
@@ -88,22 +124,178 @@ public void submitQuery(UserResultsListener 
resultsListener, RunQuery query) {
 send(queryResultHandler.getWrappedListener(resultsListener), 
RpcType.RUN_QUERY, query, QueryId.class);
   }
 
-  public void connect(RpcConnectionHandler handler, 
DrillbitEndpoint endpoint,
-  UserProperties props, UserBitShared.UserCredentials 
credentials) {
+  public CheckedFuture connect(DrillbitEndpoint 
endpoint, DrillProperties parameters,
+   UserCredentials 
credentials) {
+final FutureHandler handler = new FutureHandler();
 UserToBitHandshake.Builder hsBuilder = UserToBitHandshake.newBuilder()
 .setRpcVersion(UserRpcConfig.RPC_VERSION)
 .setSupportListening(true)
 .setSupportComplexTypes(supportComplexTypes)
 .setSupportTimeout(true)
 .setCredentials(credentials)
-.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName));
+.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName))
+.setSaslSupport(SaslSupport.SASL_AUTH)
+.setProperties(parameters.serializeForServer());
+this.properties = parameters;
+
+
connectAsClient(queryResultHandler.getWrappedConnectionHandler(handler),
+hsBuilder.build(), endpoint.getAddress(), endpoint.getUserPort());
+return handler;
+  }
 
-if (props != null) {
-  hsBuilder.setProperties(props);
+  /**
+   * Check (after {@link #connect connecting}) if server requires 
authentication.
+   *
+   * @return true if server requires authentication
+   */
+  public boolean serverRequiresAuthentication() {
+return serverAuthMechanisms != null;
+  }
+
+  /**
+   * Returns a list of supported authentication mechanism. If called 
before {@link #connect connecting},
+   * returns null. If called after {@link #connect connecting}, returns a 
list of supported mechanisms
+   * iff authentication is required.
+   *
+   * @return list of supported authentication mechanisms
+   */
+  public List getSupportedAuthenticationMechanisms() {
+return serverAuthMechanisms;
+  }
+
+  /**
+   * Authenticate to the server asynchronously. Returns a future that 
{@link CheckedFuture#checkedGet results}
+   * in null if authentication succeeds, or throws a {@link SaslException} 
with relevant message if
+   * authentication fails.
+   *
+   * This method uses properties provided at {@link #connect connection 
time} and override them with the
+   * given properties, if any.
+   *
+   * @param overrides parameter overrides
+   * @return result of authentication request
+   */
+  public CheckedFuture authenticate(final 
DrillProperties overrides) {
--- End diff --

is there any need (other than for testing?) to not include authentication 
in the connection process?

From my point of view, it should be included since the user already 
provided all the needed properties (overrides is NULL in DrillClient), and the 
user cannot do anything until authenticated anyway...


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r101601121
  
--- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp ---
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include "saslAuthenticatorImpl.hpp"
+
+#include "drillClientImpl.hpp"
+#include "logger.hpp"
+
+namespace Drill {
+
+#define DEFAULT_SERVICE_NAME "drill"
+
+#define KERBEROS_SIMPLE_NAME "kerberos"
+#define KERBEROS_SASL_NAME "gssapi"
+#define PLAIN_NAME "plain"
+
+const std::map 
SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of
+(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME)
+(PLAIN_NAME, PLAIN_NAME)
+;
+
+boost::mutex SaslAuthenticatorImpl::s_mutex;
+bool SaslAuthenticatorImpl::s_initialized = false;
+
+SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* 
const properties) :
+m_properties(properties), m_pConnection(NULL), m_pwd_secret(NULL) {
+
+if (!s_initialized) {
+boost::lock_guard 
lock(SaslAuthenticatorImpl::s_mutex);
+if (!s_initialized) {
+// set plugin path if provided
+if (DrillClientConfig::getSaslPluginPath()) {
+char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath());
+sasl_set_path(0, saslPluginPath);
+}
+
+// loads all the available mechanism and factories in the 
sasl_lib referenced by the path
+const int err = sasl_client_init(NULL);
+if (0 != err) {
+std::stringstream errMsg;
+errMsg << "Failed to load authentication libraries. code: 
" << err;
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << errMsg << std::endl;)
+throw std::runtime_error(errMsg.str().c_str());
+}
+{ // for debugging purposes
+const char **mechanisms = sasl_global_listmech();
+int i = 0;
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "SASL mechanisms 
available on client: " << std::endl;)
+while (mechanisms[i] != NULL) {
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << i << " : " << 
mechanisms[i] << std::endl;)
+i++;
+}
+}
+s_initialized = true;
+}
+}
+}
+
+SaslAuthenticatorImpl::~SaslAuthenticatorImpl() {
+if (m_pwd_secret) {
+free(m_pwd_secret);
+}
+// may be used to negotiated security layers before disposing in the 
future
+if (m_pConnection) {
+sasl_dispose(_pConnection);
+}
+m_pConnection = NULL;
+}
+
+typedef int (*sasl_callback_proc_t)(void); // see sasl_callback_ft
+
+int SaslAuthenticatorImpl::userNameCallback(void *context, int id, const 
char **result, unsigned *len) {
+const std::string* const username = static_cast(context);
+
+if ((SASL_CB_USER == id || SASL_CB_AUTHNAME == id)
+&& username != NULL) {
+*result = username->c_str();
+// *len = (unsigned int) username->length();
+}
+return SASL_OK;
+}
+
+int SaslAuthenticatorImpl::passwordCallback(sasl_conn_t *conn, void 
*context, int id, sasl_secret_t **psecret) {
+const SaslAuthenticatorImpl* const authenticator = static_cast(context);
+
+if (SASL_CB_PASS == id) {
+*psecret = authenticator->m_pwd_secret;
+}
+return SASL_OK;
+}
+
+int SaslAuthenticatorImpl::init(const std::vector& 
mechanisms, exec::shared::SaslMessage& response) {
+// find and set parameters
+std::string authMechanismToUse;
+std::string serviceName;
+std::string serviceHost;
+for (size_t i = 0; i < m_properties->size(); 

[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102330939
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/plain/PlainFactory.java
 ---
@@ -0,0 +1,166 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc.security.plain;
+
+import org.apache.drill.common.config.DrillProperties;
+import org.apache.drill.exec.rpc.security.AuthenticatorFactory;
+import org.apache.drill.exec.rpc.security.FastSaslServerFactory;
+import org.apache.drill.exec.rpc.security.FastSaslClientFactory;
+import org.apache.drill.exec.rpc.user.security.UserAuthenticationException;
+import org.apache.drill.exec.rpc.user.security.UserAuthenticator;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import javax.security.auth.callback.Callback;
+import javax.security.auth.callback.CallbackHandler;
+import javax.security.auth.callback.NameCallback;
+import javax.security.auth.callback.PasswordCallback;
+import javax.security.auth.callback.UnsupportedCallbackException;
+import javax.security.auth.login.LoginException;
+import javax.security.sasl.AuthorizeCallback;
+import javax.security.sasl.SaslClient;
+import javax.security.sasl.SaslException;
+import javax.security.sasl.SaslServer;
+import java.io.IOException;
+import java.security.Security;
+import java.util.Map;
+
+public class PlainFactory implements AuthenticatorFactory {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(PlainFactory.class);
+
+  public static final String SIMPLE_NAME = PlainServer.MECHANISM_NAME;
+
+  static {
+Security.addProvider(new PlainServer.PlainServerProvider());
+  }
+
+  private final UserAuthenticator authenticator;
+
+  public PlainFactory() {
+this.authenticator = null;
+  }
+
+  public PlainFactory(final UserAuthenticator authenticator) {
+this.authenticator = authenticator;
+  }
+
+  @Override
+  public String getSimpleName() {
+return SIMPLE_NAME;
+  }
+
+  @Override
+  public UserGroupInformation createAndLoginUser(Map 
properties) throws IOException {
+final Configuration conf = new Configuration();
+UserGroupInformation.setConfiguration(conf);
+try {
+  return UserGroupInformation.getCurrentUser();
+} catch (final IOException e) {
+  logger.debug("Login failed.", e);
+  final Throwable cause = e.getCause();
+  if (cause instanceof LoginException) {
+throw new SaslException("Failed to login.", cause);
+  }
+  throw new SaslException("Unexpected failure trying to login. ", 
cause);
+}
+  }
+
+  @Override
+  public SaslServer createSaslServer(final UserGroupInformation ugi, final 
Map properties)
+  throws SaslException {
+return 
FastSaslServerFactory.getInstance().createSaslServer(SIMPLE_NAME, null /** 
protocol */,
+null /** serverName */, properties, new 
PlainServerCallbackHandler());
+  }
+
+  @Override
+  public SaslClient createSaslClient(final UserGroupInformation ugi, final 
Map properties)
+  throws SaslException {
+final String userName = (String) properties.get(DrillProperties.USER);
+final String password = (String) 
properties.get(DrillProperties.PASSWORD);
+
+return FastSaslClientFactory.getInstance().createSaslClient(new 
String[]{SIMPLE_NAME},
+null /** authorization ID */, null, null, properties, new 
CallbackHandler() {
+  @Override
+  public void handle(final Callback[] callbacks) throws 
IOException, UnsupportedCallbackException {
+for (final Callback callback : callbacks) {
+  if (callback instanceof NameCallback) {
+

[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102346707
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserClient.java ---
@@ -88,22 +129,183 @@ public void submitQuery(UserResultsListener 
resultsListener, RunQuery query) {
 send(queryResultHandler.getWrappedListener(resultsListener), 
RpcType.RUN_QUERY, query, QueryId.class);
   }
 
-  public void connect(RpcConnectionHandler handler, 
DrillbitEndpoint endpoint,
-  UserProperties props, UserBitShared.UserCredentials 
credentials) {
+  public CheckedFuture connect(DrillbitEndpoint 
endpoint, DrillProperties parameters,
+   UserCredentials 
credentials) {
+final FutureHandler handler = new FutureHandler();
 UserToBitHandshake.Builder hsBuilder = UserToBitHandshake.newBuilder()
 .setRpcVersion(UserRpcConfig.RPC_VERSION)
 .setSupportListening(true)
 .setSupportComplexTypes(supportComplexTypes)
 .setSupportTimeout(true)
 .setCredentials(credentials)
-.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName));
+.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName))
+.setSaslSupport(SaslSupport.SASL_AUTH)
+.setProperties(parameters.serializeForServer());
+this.properties = parameters;
+
+
connectAsClient(queryResultHandler.getWrappedConnectionHandler(handler),
+hsBuilder.build(), endpoint.getAddress(), endpoint.getUserPort());
+return handler;
+  }
+
+  /**
+   * Check (after {@link #connect connecting}) if server requires 
authentication.
+   *
+   * @return true if server requires authentication
+   */
+  public boolean serverRequiresAuthentication() {
+return supportedAuthMechs != null;
+  }
+
+  /**
+   * Returns a list of supported authentication mechanism. If called 
before {@link #connect connecting},
+   * returns null. If called after {@link #connect connecting}, returns a 
list of supported mechanisms
+   * iff authentication is required.
+   *
+   * @return list of supported authentication mechanisms
+   */
+  public List getSupportedAuthenticationMechanisms() {
--- End diff --

what's the use-case? debugging?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r101121400
  
--- Diff: contrib/native/client/src/clientlib/drillClientImpl.cpp ---
@@ -412,37 +427,155 @@ connectionStatus_t 
DrillClientImpl::validateHandshake(DrillUserProperties* prope
 if(ret!=CONN_SUCCESS){
 return ret;
 }
-if(this->m_handshakeStatus != exec::user::SUCCESS){
-switch(this->m_handshakeStatus){
-case exec::user::RPC_VERSION_MISMATCH:
-DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Invalid rpc version. 
 Expected "
-<< DRILL_RPC_VERSION << ", actual "<< 
m_handshakeVersion << "." << std::endl;)
-return handleConnError(CONN_BAD_RPC_VER,
-getMessage(ERR_CONN_BAD_RPC_VER, DRILL_RPC_VERSION,
-m_handshakeVersion,
-this->m_handshakeErrorId.c_str(),
-this->m_handshakeErrorMsg.c_str()));
-case exec::user::AUTH_FAILED:
-DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Authentication 
failed." << std::endl;)
-return handleConnError(CONN_AUTH_FAILED,
-getMessage(ERR_CONN_AUTHFAIL,
-this->m_handshakeErrorId.c_str(),
-this->m_handshakeErrorMsg.c_str()));
-case exec::user::UNKNOWN_FAILURE:
-DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Unknown error during 
handshake." << std::endl;)
-return handleConnError(CONN_HANDSHAKE_FAILED,
-getMessage(ERR_CONN_UNKNOWN_ERR,
-this->m_handshakeErrorId.c_str(),
-this->m_handshakeErrorMsg.c_str()));
-default:
-break;
+
+switch(this->m_handshakeStatus) {
+case exec::user::SUCCESS:
+// reset io_service after handshake is validated before 
running queries
+m_io_service.reset();
+return CONN_SUCCESS;
+case exec::user::RPC_VERSION_MISMATCH:
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Invalid rpc version.  
Expected "
+<< DRILL_RPC_VERSION << ", actual "<< m_handshakeVersion 
<< "." << std::endl;)
+return handleConnError(CONN_BAD_RPC_VER, 
getMessage(ERR_CONN_BAD_RPC_VER, DRILL_RPC_VERSION,
+
m_handshakeVersion,
+
this->m_handshakeErrorId.c_str(),
+
this->m_handshakeErrorMsg.c_str()));
+case exec::user::AUTH_FAILED:
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Authentication failed." 
<< std::endl;)
+return handleConnError(CONN_AUTH_FAILED, 
getMessage(ERR_CONN_AUTHFAIL,
+
this->m_handshakeErrorId.c_str(),
+
this->m_handshakeErrorMsg.c_str()));
+case exec::user::UNKNOWN_FAILURE:
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Unknown error during 
handshake." << std::endl;)
+return handleConnError(CONN_HANDSHAKE_FAILED, 
getMessage(ERR_CONN_UNKNOWN_ERR,
+ 
this->m_handshakeErrorId.c_str(),
+ 
this->m_handshakeErrorMsg.c_str()));
+case exec::user::AUTH_REQUIRED:
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Server requires SASL 
authentication." << std::endl;)
+return handleAuthentication(properties);
+default:
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Unknown return status." 
<< std::endl;)
+return handleConnError(CONN_HANDSHAKE_FAILED, 
getMessage(ERR_CONN_UNKNOWN_ERR,
+ 
this->m_handshakeErrorId.c_str(),
+ 
this->m_handshakeErrorMsg.c_str()));
+}
+}
+
+connectionStatus_t DrillClientImpl::handleAuthentication(const 
DrillUserProperties *userProperties) {
+try {
+m_saslAuthenticator = new SaslAuthenticatorImpl(userProperties);
--- End diff --

- maybe we should use smart pointers
- I'm not strongly opinionated against exceptions in C++, but this is not a 
common pattern in drill code base


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled 

[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r101599452
  
--- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp ---
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include "saslAuthenticatorImpl.hpp"
+
+#include "drillClientImpl.hpp"
+#include "logger.hpp"
+
+namespace Drill {
+
+#define DEFAULT_SERVICE_NAME "drill"
+
+#define KERBEROS_SIMPLE_NAME "kerberos"
--- End diff --

Why not keeping the SASL names since they are well-defined? (and also 
uppercased)


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102307826
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserServer.java ---
@@ -358,25 +291,59 @@ public BitToUserHandshake 
getHandshakeResponse(UserToBitHandshake inbound) throw
 return handleFailure(respBuilder, 
HandshakeStatus.RPC_VERSION_MISMATCH, errMsg, null);
   }
 
-  if (authenticator != null) {
+  connection.setHandshake(inbound);
+
+  if (!config.isAuthEnabled()) {
+
connection.finalizeSession(inbound.getCredentials().getUserName());
+respBuilder.setStatus(HandshakeStatus.SUCCESS);
+return respBuilder.build();
+  }
+
+  final boolean clientSupportsSasl = inbound.hasSaslSupport() &&
+  (inbound.getSaslSupport().ordinal() >= 
SaslSupport.SASL_AUTH.ordinal());
+  if (!clientSupportsSasl) { // for backward compatibility < 1.10
+final String userName = inbound.getCredentials().getUserName();
+if (logger.isTraceEnabled()) {
+  logger.trace("User {} on connection {} is likely using an 
older client.",
+  userName, connection.getRemoteAddress());
+}
 try {
   String password = "";
   final UserProperties props = inbound.getProperties();
   for (int i = 0; i < props.getPropertiesCount(); i++) {
 Property prop = props.getProperties(i);
-if (UserSession.PASSWORD.equalsIgnoreCase(prop.getKey())) {
+if 
(DrillProperties.PASSWORD.equalsIgnoreCase(prop.getKey())) {
   password = prop.getValue();
   break;
 }
   }
-  
authenticator.authenticate(inbound.getCredentials().getUserName(), password);
+  final PlainFactory plainFactory = 
config.getAuthProvider().getPlainFactory();
--- End diff --

instead of keeping a deprecated methods, why not looking for `PLAIN` auth 
mechanism, and instead of calling authenticate, do an internal sasl session?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102278730
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/AbstractClientConnection.java
 ---
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc;
+
+import io.netty.channel.socket.SocketChannel;
+import org.slf4j.Logger;
+
+import javax.security.sasl.SaslClient;
+import javax.security.sasl.SaslException;
+
+public abstract class AbstractClientConnection extends 
AbstractRemoteConnection implements ClientConnection {
+//  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(AbstractClientConnection.class);
+
+  private SaslClient saslClient;
+
+  public AbstractClientConnection(SocketChannel channel, String name) {
+super(channel, name);
+  }
+
+  protected abstract Logger getLogger();
+
+  @Override
+  public void setSaslClient(final SaslClient saslClient) {
+assert this.saslClient == null;
+this.saslClient = saslClient;
--- End diff --

According to the interface comment, it's supposed to be only set one, but 
this is not checked.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102279444
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/AbstractServerConnection.java
 ---
@@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc;
+
+import io.netty.channel.socket.SocketChannel;
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.hadoop.security.HadoopKerberosName;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.slf4j.Logger;
+
+import javax.security.auth.login.LoginException;
+import javax.security.sasl.SaslException;
+import javax.security.sasl.SaslServer;
+import java.io.IOException;
+
+public abstract class AbstractServerConnection
--- End diff --

Your generic type is not fully defined:
`AbstractServerConnection>`
(it happens all over the place btw. I suspect this is an IntelliJ thing, as 
Eclipse is printing out lots of warnings. If you can change your IDE settings, 
and get all the rawtypes warning introduced by your patches, it would be nice!)

Although the type definition looks correct, I also wonder if the type to be 
used shouldn't be `AbstractServerConnection>` 
instead (as we should rely on the interface instead of the abstract class)?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r101123168
  
--- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp ---
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include "saslAuthenticatorImpl.hpp"
+
+#include "drillClientImpl.hpp"
+#include "logger.hpp"
+
+namespace Drill {
+
+#define DEFAULT_SERVICE_NAME "drill"
+
+#define KERBEROS_SIMPLE_NAME "kerberos"
+#define KERBEROS_SASL_NAME "gssapi"
+#define PLAIN_NAME "plain"
+
+const std::map 
SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of
+(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME)
+(PLAIN_NAME, PLAIN_NAME)
+;
+
+boost::mutex SaslAuthenticatorImpl::s_mutex;
+bool SaslAuthenticatorImpl::s_initialized = false;
+
+SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* 
const properties) :
+m_properties(properties), m_pConnection(NULL), m_secret(NULL) {
+
+if (!s_initialized) {
+boost::lock_guard 
lock(SaslAuthenticatorImpl::s_mutex);
+if (!s_initialized) {
+s_initialized = true;
+
+// set plugin path if provided
+if (DrillClientConfig::getSaslPluginPath()) {
+char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath());
+sasl_set_path(0, saslPluginPath);
+}
+
+sasl_client_init(NULL);
+{ // for debugging purposes
+const char **mechanisms = sasl_global_listmech();
+int i = 1;
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "SASL mechanisms 
available on client: " << std::endl;)
+while (mechanisms[i] != NULL) {
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << i << " : " << 
mechanisms[i] << std::endl;)
+i++;
+}
+}
+}
+}
+}
+
+SaslAuthenticatorImpl::~SaslAuthenticatorImpl() {
+if (m_secret) {
+free(m_secret);
+}
+// may be used to negotiated security layers before disposing in the 
future
+if (m_pConnection) {
+sasl_dispose(_pConnection);
+}
+m_pConnection = NULL;
+}
+
+typedef int (*sasl_callback_proc_t)(void); // see sasl_callback_ft
+
+int SaslAuthenticatorImpl::userNameCallback(void *context, int id, const 
char **result, unsigned *len) {
--- End diff --

smart pointers can be used too here


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r101600048
  
--- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.hpp ---
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DRILLCLIENT_SASLAUTHENTICATORIMPL_HPP
+#define DRILLCLIENT_SASLAUTHENTICATORIMPL_HPP
+
+#include 
+#include 
+#include 
+#include "drill/drillClient.hpp"
+#include "UserBitShared.pb.h"
+
+#include "sasl/sasl.h"
+#include "sasl/saslplug.h"
+
+namespace Drill {
+
+class SaslAuthenticatorImpl {
+
+public:
+
+static const std::map MECHANISM_MAPPING;
--- End diff --

not sure why this is public, isn't this something internal?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r101602793
  
--- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp ---
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include "saslAuthenticatorImpl.hpp"
+
+#include "drillClientImpl.hpp"
+#include "logger.hpp"
+
+namespace Drill {
+
+#define DEFAULT_SERVICE_NAME "drill"
+
+#define KERBEROS_SIMPLE_NAME "kerberos"
+#define KERBEROS_SASL_NAME "gssapi"
+#define PLAIN_NAME "plain"
+
+const std::map 
SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of
+(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME)
+(PLAIN_NAME, PLAIN_NAME)
+;
+
+boost::mutex SaslAuthenticatorImpl::s_mutex;
+bool SaslAuthenticatorImpl::s_initialized = false;
+
+SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* 
const properties) :
+m_properties(properties), m_pConnection(NULL), m_pwd_secret(NULL) {
+
+if (!s_initialized) {
+boost::lock_guard 
lock(SaslAuthenticatorImpl::s_mutex);
+if (!s_initialized) {
+// set plugin path if provided
+if (DrillClientConfig::getSaslPluginPath()) {
+char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath());
+sasl_set_path(0, saslPluginPath);
+}
+
+// loads all the available mechanism and factories in the 
sasl_lib referenced by the path
+const int err = sasl_client_init(NULL);
+if (0 != err) {
+std::stringstream errMsg;
+errMsg << "Failed to load authentication libraries. code: 
" << err;
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << errMsg << std::endl;)
+throw std::runtime_error(errMsg.str().c_str());
+}
+{ // for debugging purposes
+const char **mechanisms = sasl_global_listmech();
+int i = 0;
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "SASL mechanisms 
available on client: " << std::endl;)
+while (mechanisms[i] != NULL) {
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << i << " : " << 
mechanisms[i] << std::endl;)
+i++;
+}
+}
+s_initialized = true;
+}
+}
+}
+
+SaslAuthenticatorImpl::~SaslAuthenticatorImpl() {
+if (m_pwd_secret) {
+free(m_pwd_secret);
+}
+// may be used to negotiated security layers before disposing in the 
future
+if (m_pConnection) {
+sasl_dispose(_pConnection);
+}
+m_pConnection = NULL;
+}
+
+typedef int (*sasl_callback_proc_t)(void); // see sasl_callback_ft
+
+int SaslAuthenticatorImpl::userNameCallback(void *context, int id, const 
char **result, unsigned *len) {
+const std::string* const username = static_cast(context);
+
+if ((SASL_CB_USER == id || SASL_CB_AUTHNAME == id)
+&& username != NULL) {
+*result = username->c_str();
+// *len = (unsigned int) username->length();
+}
+return SASL_OK;
+}
+
+int SaslAuthenticatorImpl::passwordCallback(sasl_conn_t *conn, void 
*context, int id, sasl_secret_t **psecret) {
+const SaslAuthenticatorImpl* const authenticator = static_cast(context);
+
+if (SASL_CB_PASS == id) {
+*psecret = authenticator->m_pwd_secret;
+}
+return SASL_OK;
+}
+
+int SaslAuthenticatorImpl::init(const std::vector& 
mechanisms, exec::shared::SaslMessage& response) {
+// find and set parameters
+std::string authMechanismToUse;
+std::string serviceName;
+std::string serviceHost;
+for (size_t i = 0; i < m_properties->size(); 

[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r101631177
  
--- Diff: common/src/main/java/org/apache/drill/common/KerberosUtil.java ---
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.common;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
+
+public final class KerberosUtil {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(KerberosUtil.class);
+
+  public static final String KERBEROS_SASL_NAME = "GSSAPI";
+
+  public static final String KERBEROS_SIMPLE_NAME = "KERBEROS";
+
+  public static final String HOSTNAME_PATTERN = "_HOST";
+
+  /**
+   * Returns principal of format primary/instance@REALM.
+   *
+   * @param primary non-null primary component
+   * @param instance non-null instance component
+   * @param realm non-null realm component
+   * @return principal of format primary/instance@REALM
+   */
+  public static String getPrincipalFromParts(final String primary, final 
String instance, final String realm) {
+return checkNotNull(primary) + "/" +
+checkNotNull(instance) + "@" +
+checkNotNull(realm);
+  }
+
+  /**
+   * Expects principal of the format primary/instance@REALM.
+   *
+   * @param principal principal
+   * @return components
+   */
+  public static String[] splitPrincipalIntoParts(final String principal) {
+final String[] components = principal.split("[/@]");
+checkState(components.length == 3);
+checkNotNull(components[0]);
+checkNotNull(components[1]);
+checkNotNull(components[2]);
+return components;
+  }
+
+  public static String canonicalizedInstanceName(String instanceName, 
final String canonicalName) {
--- End diff --

(style) for consistency, use verb present tense for methods 
(canonicalizeInstanceName)


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r101598438
  
--- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp ---
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include "saslAuthenticatorImpl.hpp"
+
+#include "drillClientImpl.hpp"
+#include "logger.hpp"
+
+namespace Drill {
+
+#define DEFAULT_SERVICE_NAME "drill"
+
+#define KERBEROS_SIMPLE_NAME "kerberos"
+#define KERBEROS_SASL_NAME "gssapi"
+#define PLAIN_NAME "plain"
+
+const std::map 
SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of
+(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME)
+(PLAIN_NAME, PLAIN_NAME)
+;
+
+boost::mutex SaslAuthenticatorImpl::s_mutex;
+bool SaslAuthenticatorImpl::s_initialized = false;
+
+SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* 
const properties) :
+m_properties(properties), m_pConnection(NULL), m_pwd_secret(NULL) {
+
+if (!s_initialized) {
+boost::lock_guard 
lock(SaslAuthenticatorImpl::s_mutex);
+if (!s_initialized) {
+// set plugin path if provided
+if (DrillClientConfig::getSaslPluginPath()) {
+char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath());
+sasl_set_path(0, saslPluginPath);
+}
+
+// loads all the available mechanism and factories in the 
sasl_lib referenced by the path
+const int err = sasl_client_init(NULL);
+if (0 != err) {
+std::stringstream errMsg;
+errMsg << "Failed to load authentication libraries. code: 
" << err;
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << errMsg << std::endl;)
+throw std::runtime_error(errMsg.str().c_str());
+}
+{ // for debugging purposes
+const char **mechanisms = sasl_global_listmech();
+int i = 0;
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "SASL mechanisms 
available on client: " << std::endl;)
+while (mechanisms[i] != NULL) {
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << i << " : " << 
mechanisms[i] << std::endl;)
+i++;
+}
+}
+s_initialized = true;
+}
+}
+}
+
+SaslAuthenticatorImpl::~SaslAuthenticatorImpl() {
+if (m_pwd_secret) {
+free(m_pwd_secret);
+}
+// may be used to negotiated security layers before disposing in the 
future
+if (m_pConnection) {
+sasl_dispose(_pConnection);
+}
+m_pConnection = NULL;
+}
+
+typedef int (*sasl_callback_proc_t)(void); // see sasl_callback_ft
+
+int SaslAuthenticatorImpl::userNameCallback(void *context, int id, const 
char **result, unsigned *len) {
+const std::string* const username = static_cast(context);
+
+if ((SASL_CB_USER == id || SASL_CB_AUTHNAME == id)
+&& username != NULL) {
+*result = username->c_str();
+// *len = (unsigned int) username->length();
+}
+return SASL_OK;
+}
+
+int SaslAuthenticatorImpl::passwordCallback(sasl_conn_t *conn, void 
*context, int id, sasl_secret_t **psecret) {
+const SaslAuthenticatorImpl* const authenticator = static_cast(context);
+
+if (SASL_CB_PASS == id) {
+*psecret = authenticator->m_pwd_secret;
+}
+return SASL_OK;
+}
+
+int SaslAuthenticatorImpl::init(const std::vector& 
mechanisms, exec::shared::SaslMessage& response) {
+// find and set parameters
+std::string authMechanismToUse;
+std::string serviceName;
+std::string serviceHost;
+for (size_t i = 0; i < m_properties->size(); 

[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r101600545
  
--- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp ---
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include "saslAuthenticatorImpl.hpp"
+
+#include "drillClientImpl.hpp"
+#include "logger.hpp"
+
+namespace Drill {
+
+#define DEFAULT_SERVICE_NAME "drill"
+
+#define KERBEROS_SIMPLE_NAME "kerberos"
+#define KERBEROS_SASL_NAME "gssapi"
+#define PLAIN_NAME "plain"
+
+const std::map 
SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of
+(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME)
+(PLAIN_NAME, PLAIN_NAME)
+;
+
+boost::mutex SaslAuthenticatorImpl::s_mutex;
+bool SaslAuthenticatorImpl::s_initialized = false;
+
+SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* 
const properties) :
+m_properties(properties), m_pConnection(NULL), m_pwd_secret(NULL) {
+
+if (!s_initialized) {
+boost::lock_guard 
lock(SaslAuthenticatorImpl::s_mutex);
+if (!s_initialized) {
+// set plugin path if provided
+if (DrillClientConfig::getSaslPluginPath()) {
+char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath());
+sasl_set_path(0, saslPluginPath);
+}
+
+// loads all the available mechanism and factories in the 
sasl_lib referenced by the path
+const int err = sasl_client_init(NULL);
+if (0 != err) {
+std::stringstream errMsg;
+errMsg << "Failed to load authentication libraries. code: 
" << err;
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << errMsg << std::endl;)
--- End diff --

or inline errMsg directly into the log statement?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102336062
  
--- Diff: common/src/main/java/org/apache/drill/common/KerberosUtil.java ---
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.common;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
+
+public final class KerberosUtil {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(KerberosUtil.class);
+
+  public static final String KERBEROS_SASL_NAME = "GSSAPI";
+
+  public static final String KERBEROS_SIMPLE_NAME = "KERBEROS";
--- End diff --

why not using the SASL name? (in theory, gssapi can work with other systems 
than kerberos)


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102348994
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserServer.java ---
@@ -358,25 +291,59 @@ public BitToUserHandshake 
getHandshakeResponse(UserToBitHandshake inbound) throw
 return handleFailure(respBuilder, 
HandshakeStatus.RPC_VERSION_MISMATCH, errMsg, null);
   }
 
-  if (authenticator != null) {
+  connection.setHandshake(inbound);
+
+  if (!config.isAuthEnabled()) {
+
connection.finalizeSession(inbound.getCredentials().getUserName());
+respBuilder.setStatus(HandshakeStatus.SUCCESS);
+return respBuilder.build();
+  }
+
+  final boolean clientSupportsSasl = inbound.hasSaslSupport() &&
+  (inbound.getSaslSupport().ordinal() >= 
SaslSupport.SASL_AUTH.ordinal());
+  if (!clientSupportsSasl) { // for backward compatibility < 1.10
+final String userName = inbound.getCredentials().getUserName();
+if (logger.isTraceEnabled()) {
+  logger.trace("User {} on connection {} is likely using an 
older client.",
+  userName, connection.getRemoteAddress());
+}
 try {
   String password = "";
   final UserProperties props = inbound.getProperties();
   for (int i = 0; i < props.getPropertiesCount(); i++) {
 Property prop = props.getProperties(i);
-if (UserSession.PASSWORD.equalsIgnoreCase(prop.getKey())) {
+if 
(DrillProperties.PASSWORD.equalsIgnoreCase(prop.getKey())) {
   password = prop.getValue();
   break;
 }
   }
-  
authenticator.authenticate(inbound.getCredentials().getUserName(), password);
+  final PlainFactory plainFactory = 
config.getAuthProvider().getPlainFactory();
+  if (plainFactory == null) {
+throw new UserAuthenticationException("The server no 
longer supports username/password" +
+" based authentication. Please talk to your system 
administrator.");
+  }
+  plainFactory.getAuthenticator()
+  .authenticate(userName, password);
+  connection.changeHandlerTo(config.getMessageHandler());
+  connection.finalizeSession(userName);
+  respBuilder.setStatus(HandshakeStatus.SUCCESS);
+  if (logger.isTraceEnabled()) {
+logger.trace("Authenticated {} successfully using PLAIN 
from {}", userName,
+connection.getRemoteAddress());
+  }
+  return respBuilder.build();
 } catch (UserAuthenticationException ex) {
   return handleFailure(respBuilder, 
HandshakeStatus.AUTH_FAILED, ex.getMessage(), ex);
 }
   }
 
-  connection.setUser(inbound);
-  return respBuilder.setStatus(HandshakeStatus.SUCCESS).build();
+  // mention server's authentication capabilities
+  
respBuilder.addAllAuthenticationMechanisms(config.getAuthProvider().getAllFactoryNames());
+
+  // for now, this means PLAIN credentials will be sent over twice
--- End diff --

just a thought if this is something you're worried about. What prevents the 
server to detect the plain credentials from the user, and try to perform the 
authentication directly (if plain is supported)?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102303605
  
--- Diff: 
exec/rpc/src/main/java/org/apache/drill/exec/rpc/ServerConnection.java ---
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc;
+
+import javax.security.sasl.SaslServer;
+import java.io.IOException;
+
+public interface ServerConnection extends 
RemoteConnection {
+
+  // init only once
+  void initSaslServer(String mechanismName) throws IOException;
--- End diff --

should the server connection exposes SASL protocol directly? isn't 
ServerAuthenticationHandler a better place to manage the saslServer instance?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102342473
  
--- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp ---
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include "saslAuthenticatorImpl.hpp"
+
+#include "drillClientImpl.hpp"
+#include "logger.hpp"
+
+namespace Drill {
+
+#define DEFAULT_SERVICE_NAME "drill"
--- End diff --

these defines should probably be (static) constant instead.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102344797
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/ClientAuthenticatorProvider.java
 ---
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc.security;
+
+import org.apache.drill.common.AutoCloseables;
+import org.apache.drill.common.exceptions.DrillRuntimeException;
+import org.apache.drill.common.map.CaseInsensitiveMap;
+import org.apache.drill.exec.rpc.security.kerberos.KerberosFactory;
+import org.apache.drill.exec.rpc.security.plain.PlainFactory;
+
+import javax.security.sasl.SaslException;
+import java.util.Map;
+import java.util.Set;
+
+public class ClientAuthenticatorProvider implements AuthenticatorProvider {
+  private static final org.slf4j.Logger logger =
+  org.slf4j.LoggerFactory.getLogger(ClientAuthenticatorProvider.class);
+
+  private static final String customFactories = 
System.getProperty("customAuthFactories");
--- End diff --

shouldn't this property be prefixed with `drill.`?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102294263
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ControlConnectionConfig.java
 ---
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc.control;
+
+import org.apache.drill.exec.exception.DrillbitStartupException;
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint;
+import org.apache.drill.exec.rpc.BitConnectionConfig;
+import org.apache.drill.exec.server.BootStrapContext;
+import org.apache.drill.exec.work.batch.ControlMessageHandler;
+
+// package private
+class ControlConnectionConfig extends BitConnectionConfig {
+//  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(ControlConnectionConfig.class);
+
+  private final ControlMessageHandler handler;
+
+  private DrillbitEndpoint localEndpoint;
+
+  ControlConnectionConfig(BufferAllocator allocator, BootStrapContext 
context, ControlMessageHandler handler)
+  throws DrillbitStartupException {
+super(allocator, context);
+this.handler = handler;
+  }
+
+  @Override
+  public String getName() {
+return "control"; // unused
+  }
+
+  ControlMessageHandler getMessageHandler() {
+return handler;
+  }
+
+  void setLocalEndpoint(DrillbitEndpoint endpoint) {
--- End diff --

Can we avoid modifying the original config to return the endpoint created 
by the server? it seems to me that the previous approach sounded safer as 
config is usually considered immutable.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102292416
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ControlClient.java
 ---
@@ -105,8 +128,83 @@ protected void finalizeConnection(BitControlHandshake 
handshake, ControlConnecti
 connection.setEndpoint(handshake.getEndpoint());
   }
 
-  public ControlConnection getConnection() {
-return this.connection;
+  @Override
+  protected  RpcCommand
+  getInitialCommand(final RpcCommand command) {
+if (config.getAuthMechanismToUse() == null) {
+  return super.getInitialCommand(command);
+} else {
+  return new AuthenticationCommand<>(command);
--- End diff --

shouldn't we use `super.getInitialCommand(command)` too?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102344235
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/AuthenticationOutcomeListener.java
 ---
@@ -0,0 +1,238 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.rpc.security;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.Internal.EnumLite;
+import com.google.protobuf.MessageLite;
+import io.netty.buffer.ByteBuf;
+import org.apache.drill.exec.proto.UserBitShared.SaslMessage;
+import org.apache.drill.exec.proto.UserBitShared.SaslStatus;
+import org.apache.drill.exec.rpc.BasicClient;
+import org.apache.drill.exec.rpc.ClientConnection;
+import org.apache.drill.exec.rpc.RpcException;
+import org.apache.drill.exec.rpc.RpcOutcomeListener;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import javax.security.sasl.SaslClient;
+import javax.security.sasl.SaslException;
+import java.io.IOException;
+import java.lang.reflect.UndeclaredThrowableException;
+import java.security.PrivilegedExceptionAction;
+import java.util.EnumMap;
+import java.util.Map;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+public class AuthenticationOutcomeListener
+implements RpcOutcomeListener {
+  private static final org.slf4j.Logger logger =
+  
org.slf4j.LoggerFactory.getLogger(AuthenticationOutcomeListener.class);
+
+  private static final ImmutableMap 
CHALLENGE_PROCESSORS;
+  static {
+final Map map = new 
EnumMap<>(SaslStatus.class);
+map.put(SaslStatus.SASL_IN_PROGRESS, new SaslInProgressProcessor());
+map.put(SaslStatus.SASL_SUCCESS, new SaslSuccessProcessor());
+map.put(SaslStatus.SASL_FAILED, new SaslFailedProcessor());
+CHALLENGE_PROCESSORS = Maps.immutableEnumMap(map);
+  }
+
+  private final BasicClient 
client;
+  private final R connection;
+  private final T saslRpcType;
+  private final UserGroupInformation ugi;
+  private final RpcOutcomeListener rpcOutcomeListener;
+
+  public AuthenticationOutcomeListener(BasicClient client,
+   R connection, T saslRpcType, 
UserGroupInformation ugi,
+   RpcOutcomeListener 
rpcOutcomeListener) {
+this.client = client;
+this.connection = connection;
+this.saslRpcType = saslRpcType;
+this.ugi = ugi;
+this.rpcOutcomeListener = rpcOutcomeListener;
+  }
+
+  public void initiate(final String mechanismName) {
+logger.trace("Initiating SASL exchange.");
+try {
+  final ByteString responseData;
+  final SaslClient saslClient = connection.getSaslClient();
+  if (saslClient.hasInitialResponse()) {
+responseData = ByteString.copyFrom(evaluateChallenge(ugi, 
saslClient, new byte[0]));
+  } else {
+responseData = ByteString.EMPTY;
+  }
+  client.send(new AuthenticationOutcomeListener<>(client, connection, 
saslRpcType, ugi, rpcOutcomeListener),
+  connection,
+  saslRpcType,
+  SaslMessage.newBuilder()
+  .setMechanism(mechanismName)
+  .setStatus(SaslStatus.SASL_START)
+  .setData(responseData)
+  .build(),
+  SaslMessage.class,
+  true /** the connection will not be backed up at this point */);
+  logger.trace("Initiated SASL exchange.");
+} catch (final Exception e) {
+  rpcOutcomeListener.failed(RpcException.mapException(e));
+}
+  }
+
+  @Override
+  public void failed(RpcException 

[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102358541
  
--- Diff: contrib/native/client/cmakeModules/FindSASL.cmake ---
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# - Try to find Cyrus SASL
+
+if (MSVC)
--- End diff --

Please also update the readme txt files on how to get/install SASL.

On Mac 10.11 and higher, it looks like using Apple SASL implementation is 
deprecated (I get deprecation warnings from the compiler)


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r101605131
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/client/DrillClient.java ---
@@ -435,9 +427,14 @@ public synchronized boolean reconnect() {
   }
 
   private void connect(DrillbitEndpoint endpoint) throws RpcException {
-final FutureHandler f = new FutureHandler();
-client.connect(f, endpoint, props, getUserCredentials());
-f.checkedGet();
+client.connect(endpoint, properties, 
getUserCredentials()).checkedGet();
+if (client.serverRequiresAuthentication()) {
+  try {
+client.authenticate(null).checkedGet();
--- End diff --

shouldn't this be done directly by the rpc client?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication

2017-02-21 Thread laurentgo
Github user laurentgo commented on a diff in the pull request:

https://github.com/apache/drill/pull/578#discussion_r102299705
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ControlClient.java
 ---
@@ -89,14 +90,36 @@ public MessageLite getResponseDefaultInstance(int 
rpcType) throws RpcException {
   }
 
   @Override
-  protected Response handle(ControlConnection connection, int rpcType, 
ByteBuf pBody, ByteBuf dBody) throws RpcException {
-return handler.handle(connection, rpcType, pBody, dBody);
+  protected void handle(ControlConnection connection, int rpcType, ByteBuf 
pBody, ByteBuf dBody,
+ResponseSender sender) throws RpcException {
+connection.getCurrentHandler().handle(connection, rpcType, pBody, 
dBody, sender);
   }
 
   @Override
   protected void validateHandshake(BitControlHandshake handshake) throws 
RpcException {
 if (handshake.getRpcVersion() != ControlRpcConfig.RPC_VERSION) {
-  throw new RpcException(String.format("Invalid rpc version.  Expected 
%d, actual %d.", handshake.getRpcVersion(), ControlRpcConfig.RPC_VERSION));
+  throw new RpcException(String.format("Invalid rpc version.  Expected 
%d, actual %d.",
+  handshake.getRpcVersion(), ControlRpcConfig.RPC_VERSION));
+}
+
+if (handshake.getAuthenticationMechanismsCount() != 0) { // remote 
requires authentication
+  final SaslClient saslClient;
--- End diff --

shouldn't the SASL client instantiation happens in `AuthenticationCommand` 
(which in turns provide it to `AuthenticationOutcomeRpcListener`). It looks 
like it would be more contained.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102345408
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdSelectivity.java
 ---
@@ -0,0 +1,219 @@

+/***
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ 
**/
+package org.apache.drill.exec.planner.cost;
+
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.SingleRel;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdSelectivity;
+import org.apache.calcite.rel.metadata.RelMdUtil;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.rex.RexVisitor;
+import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.Util;
+import org.apache.drill.exec.planner.common.DrillJoinRelBase;
+import org.apache.drill.exec.planner.common.DrillRelOptUtil;
+import org.apache.drill.exec.planner.logical.DrillTable;
+import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class DrillRelMdSelectivity extends RelMdSelectivity {
+  private static final org.slf4j.Logger logger =
+  org.slf4j.LoggerFactory.getLogger(RelMdSelectivity.class);
+
+  private static final DrillRelMdSelectivity INSTANCE =
+  new DrillRelMdSelectivity();
+
+  public static final RelMetadataProvider SOURCE =
+  ReflectiveRelMetadataProvider.reflectiveSource(
+  BuiltInMethod.SELECTIVITY.method, INSTANCE);
+
+  @Override
+  public Double getSelectivity(RelNode rel, RexNode predicate) {
+if (rel instanceof TableScan) {
+  return getScanSelectivity((TableScan) rel, predicate);
+} else if (rel instanceof DrillJoinRelBase) {
+  return getJoinSelectivity(((DrillJoinRelBase) rel), predicate);
+} else if (rel instanceof SingleRel && 
!DrillRelOptUtil.guessRows(rel)) {
+return RelMetadataQuery.getSelectivity(((SingleRel) 
rel).getInput(), predicate);
+} else if (rel instanceof RelSubset && 
!DrillRelOptUtil.guessRows(rel)) {
+  if (((RelSubset) rel).getBest() != null) {
+return RelMetadataQuery.getSelectivity(((RelSubset)rel).getBest(), 
predicate);
+  } else if (((RelSubset)rel).getOriginal() != null) {
+return 
RelMetadataQuery.getSelectivity(((RelSubset)rel).getOriginal(), predicate);
+  } else {
+return super.getSelectivity(rel, predicate);
+  }
+} else {
+  return super.getSelectivity(rel, predicate);
+}
+  }
+
+  private Double getJoinSelectivity(DrillJoinRelBase rel, RexNode 
predicate) {
+double sel = 1.0;
+// determine which filters apply to the left vs right
+RexNode leftPred = null;
+RexNode rightPred = null;
+JoinRelType joinType = rel.getJoinType();
+final RexBuilder rexBuilder = rel.getCluster().getRexBuilder();
+int[] adjustments = new int[rel.getRowType().getFieldCount()];
+
+if (DrillRelOptUtil.guessRows(rel)) {
+  return super.getSelectivity(rel, predicate);
+}
+
+if (predicate != 

[jira] [Resolved] (DRILL-5103) External Sort pop shadows variable "initialAllocation" from AbstractBase

2017-02-21 Thread Paul Rogers (JIRA)

 [ 
https://issues.apache.org/jira/browse/DRILL-5103?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Paul Rogers resolved DRILL-5103.

Resolution: Fixed

> External Sort pop shadows variable "initialAllocation" from AbstractBase
> 
>
> Key: DRILL-5103
> URL: https://issues.apache.org/jira/browse/DRILL-5103
> Project: Apache Drill
>  Issue Type: Sub-task
>Affects Versions: 1.8.0
>Reporter: Paul Rogers
>Assignee: Paul Rogers
>
> The Physical Operator (pop) definition for External sort contains the 
> following:
> {code}
> public class ExternalSort extends Sort {
>   private long initialAllocation = 2000;
> {code}
> But,
> {code}
> public abstract class AbstractBase implements PhysicalOperator{
>   protected long initialAllocation = 100L;
> {code}
> The result is that the variable in {{ExternalSort}} shadows the one in 
> {{AbstractBase}}. Jackson deserialization sets the one in {{AbstractBase}}, 
> but the method {{getInitialAllocation()}} returns the shadowed one in 
> {{ExternalSort}}.
> Remove the shadowed {{initialAllocation}} from {{ExternalSort}} and move the 
> initializer into the constructor.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102327621
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsAggBatch.java
 ---
@@ -0,0 +1,256 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.sun.codemodel.JExpr;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.exec.exception.ClassTransformationException;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ClassGenerator;
+import org.apache.drill.exec.expr.CodeGenerator;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.ValueVectorWriteExpression;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsAggregate;
+import org.apache.drill.exec.physical.impl.aggregate.StreamingAggBatch;
+import org.apache.drill.exec.physical.impl.aggregate.StreamingAggTemplate;
+import org.apache.drill.exec.physical.impl.aggregate.StreamingAggregator;
+import org.apache.drill.exec.planner.physical.StatsAggPrel.OperatorPhase;
+import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TypedFieldId;
+import org.apache.drill.exec.store.ImplicitColumnExplorer;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.complex.FieldIdUtil;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.IOException;
+import java.util.GregorianCalendar;
+import java.util.List;
+import java.util.TimeZone;
+
+/**
+ * TODO: This needs cleanup. Currently the key values are constants and we 
compare the constants
+ * for every record. Seems unnecessary.
+ *
+ * Example input and output:
+ * Schema of incoming batch: region_id (VARCHAR), sales_city (VARCHAR), 
cnt (BIGINT)
+ * Schema of output:
+ *"schema" : BIGINT - Schema number. For each schema change this 
number is incremented.
+ *"computed" : BIGINT - What time is it computed?
+ *"columns"   : MAP - Column names
+ *   "region_id"  : VARCHAR
+ *   "sales_city" : VARCHAR
+ *   "cnt": VARCHAR
+ *"statscount" : MAP
+ *   "region_id"  : BIGINT - statscount(region_id) - aggregation over 
all values of region_id
+ *  in incoming batch
+ *   "sales_city" : BIGINT - statscount(sales_city)
+ *   "cnt": BIGINT - statscount(cnt)
+ *"nonnullstatcount" : MAP
+ *   "region_id"  : BIGINT - nonnullstatcount(region_id)
+ *   "sales_city" : BIGINT - nonnullstatcount(sales_city)
+ *   "cnt": BIGINT - nonnullstatcount(cnt)
+ *    another map for next stats function 
+ */
+public class StatisticsAggBatch extends StreamingAggBatch {
+  private List functions;
+  private int schema = 0;
+
+  public StatisticsAggBatch(StatisticsAggregate popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws OutOfMemoryException {
+super(popConfig, incoming, context);
+this.functions = popConfig.getFunctions();
+  }
+
+  private void createKeyColumn(String name, LogicalExpression expr, 
List keyExprs,
+  List 

[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102326907
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[jira] [Created] (DRILL-5287) Provide option to skip updates of ephemeral state changes in Zookeeper

2017-02-21 Thread Padma Penumarthy (JIRA)
Padma Penumarthy created DRILL-5287:
---

 Summary: Provide option to skip updates of ephemeral state changes 
in Zookeeper
 Key: DRILL-5287
 URL: https://issues.apache.org/jira/browse/DRILL-5287
 Project: Apache Drill
  Issue Type: Bug
Affects Versions: 1.9.0
Reporter: Padma Penumarthy
Assignee: Padma Penumarthy
 Fix For: 1.10


We put transient profiles in zookeeper and update state as query progresses and 
changes states. It is observed that this adds latency of ~45msec for each 
update in the query execution path. This gets even worse when high number of 
concurrent queries are in progress. For concurrency=100, the average query 
response time even for short queries  is 8 sec vs 0.2 sec with these updates 
disabled. For short lived queries in a high-throughput scenario, it is of no 
value to update state changes in zookeeper. We need an option to disable these 
updates for short running operational queries.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102325216
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102324705
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102324596
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102323849
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102322555
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102320614
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102319897
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102316247
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/config/UnpivotMaps.java
 ---
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.config;
+
+import java.util.List;
+
+import org.apache.drill.exec.physical.base.AbstractSingle;
+import org.apache.drill.exec.physical.base.PhysicalOperator;
+import org.apache.drill.exec.physical.base.PhysicalVisitor;
+import org.apache.drill.exec.proto.UserBitShared.CoreOperatorType;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+
+@JsonTypeName("unpivot-maps")
+public class UnpivotMaps extends AbstractSingle {
--- End diff --

This is generic in the sense that it can unpivot any given set of maps 
given that they are of
the same size and contain the same keys(columns).


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102314383
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillStatsTable.java
 ---
@@ -0,0 +1,347 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.common;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonGetter;
+import com.fasterxml.jackson.annotation.JsonSetter;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonSubTypes;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Stopwatch;
+import com.google.common.collect.Maps;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelVisitor;
+import org.apache.calcite.rel.core.TableScan;
+import org.apache.drill.common.exceptions.DrillRuntimeException;
+import org.apache.drill.exec.ops.QueryContext;
+import org.apache.drill.exec.planner.logical.DrillTable;
+import org.apache.drill.exec.util.ImpersonationUtil;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.joda.time.DateTime;
+
+/**
+ * Wraps the stats table info including schema and tableName. Also 
materializes stats from storage
+ * and keeps them in memory.
+ */
+public class DrillStatsTable {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(DrillStatsTable.class);
+  private final FileSystem fs;
+  private final Path tablePath;
+
+  /**
+   * List of columns in stats table.
+   */
+  public static final String COL_COLUMN = "column";
+  public static final String COL_COMPUTED = "computed";
+  public static final String COL_STATCOUNT = "statcount";
+  public static final String COL_NDV = "ndv";
+
+  private final String schemaName;
+  private final String tableName;
+
+  private final Map ndv = Maps.newHashMap();
+  private double rowCount = -1;
+
+  private boolean materialized = false;
+
+  private TableStatistics statistics = null;
+
+  public DrillStatsTable(String schemaName, String tableName, Path 
tablePath, FileSystem fs) {
+this.schemaName = schemaName;
+this.tableName = tableName;
+this.tablePath = tablePath;
+this.fs = 
ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), 
fs.getConf());
+  }
+
+  public String getSchemaName() {
+return schemaName;
+  }
+
+  public String getTableName() {
+return tableName;
+  }
+  /**
+   * Get number of distinct values of given column. If stats are not 
present for the given column,
+   * a null is returned.
+   *
+   * Note: returned data may not be accurate. Accuracy depends on whether 
the table data has changed after the
+   * stats are computed.
+   *
+   * @param col
+   * @return
+   */
+  public Double getNdv(String col) {
+// Stats might not have materialized because of errors.
+if (!materialized) {
+  return null;
+}
+final String upperCol = col.toUpperCase();
+final Long ndvCol = ndv.get(upperCol);
+// Ndv estimation techniques like HLL may over-estimate, hence cap it 
at rowCount
+if (ndvCol != null) {
+  return Math.min(ndvCol, rowCount);
--- End diff --

Histograms would help with the data skew. When we have histograms, the NDV 
would be obtained from the Histograms. Stats will be off by default (so not as 

[jira] [Created] (DRILL-5286) When rel and target candidate set is the same, planner should not need to do convert for the relNode since it must have been done

2017-02-21 Thread Chunhui Shi (JIRA)
Chunhui Shi created DRILL-5286:
--

 Summary: When rel and target candidate set is the same, planner 
should not need to do convert for the relNode since it must have been done
 Key: DRILL-5286
 URL: https://issues.apache.org/jira/browse/DRILL-5286
 Project: Apache Drill
  Issue Type: Bug
Reporter: Chunhui Shi
Assignee: Chunhui Shi






--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102310795
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102306890
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[GitHub] drill issue #738: DRILL-5190: Display planning and queued time for a query i...

2017-02-21 Thread sudheeshkatkam
Github user sudheeshkatkam commented on the issue:

https://github.com/apache/drill/pull/738
  
+1


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill issue #751: DRILL-5259: Allow listing a user-defined number of profile...

2017-02-21 Thread sudheeshkatkam
Github user sudheeshkatkam commented on the issue:

https://github.com/apache/drill/pull/751
  
+1


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[jira] [Resolved] (DRILL-5203) Provide per-test control over logging for unit tests

2017-02-21 Thread Paul Rogers (JIRA)

 [ 
https://issues.apache.org/jira/browse/DRILL-5203?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Paul Rogers resolved DRILL-5203.

Resolution: Fixed

Added as part of the new "cluster fixture" unit test framework.

> Provide per-test control over logging for unit tests
> 
>
> Key: DRILL-5203
> URL: https://issues.apache.org/jira/browse/DRILL-5203
> Project: Apache Drill
>  Issue Type: Improvement
>  Components: Tools, Build & Test
>Affects Versions: 1.9.0
>Reporter: Paul Rogers
>Assignee: Paul Rogers
>Priority: Minor
>
> Drill provides extensive logging. In production, users typically turn on all 
> logging to some useful level, say WARN for normal operation or DEBUG when 
> problems occur.
> Drill has a wide variety of unit tests, each of which covers some particular 
> part of the product. When working in that area, we wish to turn on logging 
> just for that one area, and often just for the test being used to drive 
> development (in Test-Driven Development fashion.)
> Today, we control logging only via the single, shared {{logback.xml}} file in 
> {{$DRILL_HOME/conf}} in production, and the {{logback-test.xml}} file in the 
> {{src/resources}} directory during development. This is a very blunt tool: it 
> affects all tests and is cumbersome to use on a per-test basis.
> This, then is the motivation for a "log fixture": a simple piece of code that 
> lets us turn on very targeted logging for the duration of a single test, then 
> restore the defaults afterwards.
> Example:
> {code}
>   @Test
>   public void testSomething() throws Exception {
> LogFixtureBuilder logBuilder = LogFixture.builder()
> .toConsole()
> .disable() // Turn off all logging...
> // Except for this one logger
> .logger(ExternalSortBatch.class, Level.DEBUG);
> try (LogFixture logs = logBuilder.build()) {
>   // Do the test here: the one logger is set to debug to the console
> }
> // Logging back to configured settings
>   }
> {code}
> Alternatively, the settings can affect an entire test class:
> {code}
>   private static LogFixture logFixture;
>   @BeforeClass
>   public static void setup() {
> logFixture = LogFixture.builder()
> .toConsole()
> .disable()
> .logger(ExternalSortBatch.class, Level.DEBUG)
> .build();
>   }
>   @AfterClass
>   public void tearDown() {
> logFixture.close();
>   }
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[GitHub] drill pull request #754: DRILL-5275: Sort spill is slow due to repeated allo...

2017-02-21 Thread Ben-Zvi
Github user Ben-Zvi commented on a diff in the pull request:

https://github.com/apache/drill/pull/754#discussion_r102295524
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/cache/VectorAccessibleSerializable.java
 ---
@@ -57,6 +57,12 @@
   private BatchSchema.SelectionVectorMode svMode = 
BatchSchema.SelectionVectorMode.NONE;
   private SelectionVector2 sv2;
 
+  /**
+   * Disk I/O buffer used for all reads and writes of DrillBufs.
+   */
+
+  private byte buffer[] = new byte[32*1024];
--- End diff --

(1) Is 32K too small ? (because it is heap?) Does a higher size perform 
better ?
(2) Would it work better if the operator would allocate this serialization 
buffer from the direct memory (and only when spilling), and then free it at the 
end ?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102295174
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102295106
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[GitHub] drill pull request #742: DRILL-5242: The UI breaks when rendering profiles h...

2017-02-21 Thread kkhatua
Github user kkhatua closed the pull request at:

https://github.com/apache/drill/pull/742


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill issue #742: DRILL-5242: The UI breaks when rendering profiles having u...

2017-02-21 Thread kkhatua
Github user kkhatua commented on the issue:

https://github.com/apache/drill/pull/742
  
This closes PR #742 


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill issue #739: DRILL-5230: Translation of millisecond duration into hours...

2017-02-21 Thread kkhatua
Github user kkhatua commented on the issue:

https://github.com/apache/drill/pull/739
  
This closes PR #739 


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #739: DRILL-5230: Translation of millisecond duration int...

2017-02-21 Thread kkhatua
Github user kkhatua closed the pull request at:

https://github.com/apache/drill/pull/739


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102294356
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[jira] [Created] (DRILL-5285) Provide detailed, accurate estimate of size consumed by a record batch

2017-02-21 Thread Paul Rogers (JIRA)
Paul Rogers created DRILL-5285:
--

 Summary: Provide detailed, accurate estimate of size consumed by a 
record batch
 Key: DRILL-5285
 URL: https://issues.apache.org/jira/browse/DRILL-5285
 Project: Apache Drill
  Issue Type: Sub-task
Affects Versions: 1.10.0
Reporter: Paul Rogers
Assignee: Paul Rogers
 Fix For: 1.10.0


DRILL-5080 introduced a {{RecordBatchSizer}} that estimates the space taken by 
a record batch and determines batch "density."

Drill provides a large variety of vectors, each with their own internal 
structure and collections of vectors. For example, fixed vectors use just a 
data vector. Nullable vectors add an "is set" vector. Variable length vectors 
add an offset vector. Repeated vectors add a second offset vector.

The original {{RecordBatchSizer}} attempted to compute sizes for all these 
vector types. But, the complexity got to be out of hand. This ticket requests 
to simply bite the bullet and move the calculations into each vector type so 
that the {{RecordBatchSizer}} can simply use the results of the calculations.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102292075
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[jira] [Created] (DRILL-5284) Roll-up of final fixes for managed sort

2017-02-21 Thread Paul Rogers (JIRA)
Paul Rogers created DRILL-5284:
--

 Summary: Roll-up of final fixes for managed sort
 Key: DRILL-5284
 URL: https://issues.apache.org/jira/browse/DRILL-5284
 Project: Apache Drill
  Issue Type: Bug
Affects Versions: 1.10.0
Reporter: Paul Rogers
Assignee: Paul Rogers
 Fix For: 1.10.0


The managed external sort was introduced in DRILL-5080. Since that time, 
extensive testing has identified a number of minor fixes and improvements. 
Given the long PR cycles, it is not practical to spend a week or two to do a PR 
for each fix individually. This ticket represents a roll-up of a combination of 
a number of fixes. Small fixes are listed here, larger items appear as 
sub-tasks.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)


[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102291655
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillFilterRelBase.java
 ---
@@ -113,6 +114,6 @@ public double getRows() {
 selectivity = filterMaxSelectivityEstimateFactor;
   }
 }
-return selectivity*RelMetadataQuery.getRowCount(getInput());
+return NumberUtil.multiply(selectivity, 
RelMetadataQuery.getRowCount(getInput()));
--- End diff --

Done


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102291099
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/unpivot/UnpivotMapsRecordBatch.java
 ---
@@ -0,0 +1,276 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.unpivot;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.UnpivotMaps;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+/**
+ * Unpivot maps. Assumptions are:
+ *  1) all child vectors in a map are of same type.
+ *  2) Each map contains the same number of fields and field names are 
also same (types could be different).
+ *
+ * Example input and output:
+ * Schema of input:
+ *"schema": BIGINT - Schema number. For each schema change 
this number is incremented.
+ *"computed"  : BIGINT - What time is it computed?
+ *"columns" : MAP - Column names
+ *   "region_id"  : VARCHAR
+ *   "sales_city" : VARCHAR
+ *   "cnt": VARCHAR
+ *"statscount" : MAP
+ *   "region_id"  : BIGINT - statscount(region_id) - aggregation over 
all values of region_id
+ *  in incoming batch
+ *   "sales_city" : BIGINT - statscount(sales_city)
+ *   "cnt": BIGINT - statscount(cnt)
+ *"nonnullstatcount" : MAP
+ *   "region_id"  : BIGINT - nonnullstatcount(region_id)
+ *   "sales_city" : BIGINT - nonnullstatcount(sales_city)
+ *   "cnt": BIGINT - nonnullstatcount(cnt)
+ *    another map for next stats function 
+ *
+ * Schema of output:
--- End diff --

For now, we stick to the original reviewed design. We can revisit the 
design later, if required. 


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102290203
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatchCreator.java
 ---
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.base.Preconditions;
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.physical.impl.BatchCreator;
+import org.apache.drill.exec.record.CloseableRecordBatch;
+import org.apache.drill.exec.record.RecordBatch;
+
+import java.util.List;
+
+@SuppressWarnings("unused")
--- End diff --

Copy-paste artifact. Removed it.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102290224
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/unpivot/UnpivotMapsBatchCreator.java
 ---
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.unpivot;
+
+import java.util.List;
+
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.UnpivotMaps;
+import org.apache.drill.exec.physical.impl.BatchCreator;
+import org.apache.drill.exec.record.CloseableRecordBatch;
+import org.apache.drill.exec.record.RecordBatch;
+
+import com.google.common.base.Preconditions;
+
+@SuppressWarnings("unused")
--- End diff --

Copy-paste artifact. Removed it.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102289330
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet

2017-02-21 Thread gparai
Github user gparai commented on a diff in the pull request:

https://github.com/apache/drill/pull/729#discussion_r102286549
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java
 ---
@@ -0,0 +1,532 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.ErrorCollector;
+import org.apache.drill.common.expression.ErrorCollectorImpl;
+import org.apache.drill.common.expression.FunctionCallFactory;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.NullableFloat8Holder;
+import org.apache.drill.exec.expr.holders.ObjectHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.record.AbstractSingleRecordBatch;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.exec.record.TransferPair;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.DateVector;
+import org.apache.drill.exec.vector.BigIntVector;
+import org.apache.drill.exec.vector.NullableBigIntVector;
+import org.apache.drill.exec.vector.NullableFloat8Vector;
+import org.apache.drill.exec.vector.NullableVarBinaryVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+
+public class StatisticsMergeBatch extends 
AbstractSingleRecordBatch {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class);
+  private Map functions;
+  private boolean first = true;
+  private boolean finished = false;
+  private int schema = 0;
+  private int recordCount = 0;
+  private List keyList = null;
+  private Map dataSrcVecMap = null;
+  // Map of non-map fields to VV in the incoming schema
+  private Map copySrcVecMap = null;
+  private Map> aggregationMap = null;
+  public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch 
incoming,
+  FragmentContext context) throws 
OutOfMemoryException {
+super(popConfig, context, incoming);
+this.functions = new HashMap<>();
+this.aggregationMap = new HashMap<>();
+
+/*for (String key : popConfig.getFunctions()) {
+  aggregationMap.put(key, new HashMap());
+  if (key.equalsIgnoreCase("statcount") || 
key.equalsIgnoreCase("nonnullstatcount")) {
+functions.put(key, "sum");
+  } else if (key.equalsIgnoreCase("hll")) {
+

  1   2   >