[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94696573 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/plain/PlainServer.java --- @@ -0,0 +1,175 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.security.plain; + +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import javax.security.auth.callback.UnsupportedCallbackException; +import javax.security.sasl.AuthorizeCallback; +import javax.security.sasl.Sasl; +import javax.security.sasl.SaslException; +import javax.security.sasl.SaslServer; +import javax.security.sasl.SaslServerFactory; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.security.Provider; +import java.util.Map; + +/** + * Plain SaslServer implementation. See https://tools.ietf.org/html/rfc4616 + */ +public class PlainServer implements SaslServer { +// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(PlainServer.class); + + private static final String UTF_8_NULL = "\u"; + + public static final String MECHANISM_NAME = "PLAIN"; + + public static class PlainServerFactory implements SaslServerFactory { + +@Override +public SaslServer createSaslServer(final String mechanism, final String protocol, final String serverName, + final Map props, final CallbackHandler cbh) +throws SaslException { + return MECHANISM_NAME.equals(mechanism) ? --- End diff -- please indent the ternary operator properly to make it readable. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94879981 --- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp --- @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "saslAuthenticatorImpl.hpp" + +#include "drillClientImpl.hpp" +#include "logger.hpp" + +namespace Drill { + +#define DEFAULT_SERVICE_NAME "drill" + +#define KERBEROS_SIMPLE_NAME "kerberos" +#define KERBEROS_SASL_NAME "gssapi" +#define PLAIN_NAME "plain" + +const std::map SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of +(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME) +(PLAIN_NAME, PLAIN_NAME) +; + +boost::mutex SaslAuthenticatorImpl::s_mutex; +bool SaslAuthenticatorImpl::s_initialized = false; + +SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* const properties) : +m_properties(properties), m_pConnection(NULL), m_secret(NULL) { + +if (!s_initialized) { +boost::lock_guard lock(SaslAuthenticatorImpl::s_mutex); +if (!s_initialized) { +s_initialized = true; + +// set plugin path if provided +if (DrillClientConfig::getSaslPluginPath()) { +char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath()); +sasl_set_path(0, saslPluginPath); +} + +sasl_client_init(NULL); --- End diff -- It will help if we can put a comment here that sasl_client_init loads all the available mechanism and factories in the sasl_lib referenced by the path --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94849762 --- Diff: protocol/src/main/protobuf/User.proto --- @@ -111,6 +115,21 @@ message BitToUserHandshake { optional string errorId = 4; optional string errorMessage = 5; optional RpcEndpointInfos server_infos = 6; + repeated string authenticationMechanisms = 7; --- End diff -- shouldn't this be optional ? I am not sure when new server builds this message and send back to older client then how this field is ignored on client side without being optional. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94867052 --- Diff: exec/java-exec/src/test/java/org/apache/drill/exec/rpc/user/security/TestKerberosSaslAuthentication.java --- @@ -0,0 +1,239 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.user.security; + +import com.google.common.collect.Lists; +import com.typesafe.config.ConfigValueFactory; +import org.apache.drill.BaseTestQuery; +import org.apache.drill.common.config.ConnectionParameters; +import org.apache.drill.common.config.DrillConfig; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.rpc.user.security.testing.UserAuthenticatorTestImpl; +import org.apache.drill.exec.security.impl.LoginManagerImpl; +import org.apache.hadoop.security.authentication.util.KerberosName; +import org.apache.hadoop.security.authentication.util.KerberosUtil; +import org.apache.kerby.kerberos.kerb.KrbException; +import org.apache.kerby.kerberos.kerb.client.JaasKrbUtil; +import org.apache.kerby.kerberos.kerb.server.SimpleKdcServer; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; +import sun.security.krb5.Config; + +import javax.security.auth.Subject; +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Field; +import java.net.ServerSocket; +import java.nio.file.Files; +import java.security.PrivilegedExceptionAction; +import java.util.Properties; + +@Ignore("Expects users to exist. Set SERVER_SHORT_NAME to current user name to run the tests.") +public class TestKerberosSaslAuthentication extends BaseTestQuery { + private static final org.slf4j.Logger logger = + org.slf4j.LoggerFactory.getLogger(TestKerberosSaslAuthentication.class); + + private static File workspace; + + private static File kdcDir; + private static SimpleKdcServer kdc; + private static int kdcPort; + + private static final String HOSTNAME = "localhost"; + private static final String REALM = "EXAMPLE.COM"; + + private static final String CLIENT_SHORT_NAME = "client"; + private static final String CLIENT_PRINCIPAL = CLIENT_SHORT_NAME + "@" + REALM; + private static final String SERVER_SHORT_NAME = "server"; + private static final String SERVER_PRINCIPAL = SERVER_SHORT_NAME + "/" + HOSTNAME + "@" + REALM; + + private static File keytabDir; + private static File clientKeytab; + private static File serverKeytab; + + private static boolean kdcStarted; + + @BeforeClass + public static void setupKdc() throws Exception { +kdc = new SimpleKdcServer(); +workspace = new File(getTempDir("kerberos_target")); + +kdcDir = new File(workspace, TestKerberosSaslAuthentication.class.getSimpleName()); +kdcDir.mkdirs(); +kdc.setWorkDir(kdcDir); + +kdc.setKdcHost(HOSTNAME); +kdcPort = getFreePort(); +kdc.setAllowTcp(true); +kdc.setAllowUdp(false); +kdc.setKdcTcpPort(kdcPort); + +logger.debug("Starting KDC server at {}:{}", HOSTNAME, kdcPort); + +kdc.init(); +kdc.start(); +kdcStarted = true; + + +keytabDir = new File(workspace, TestKerberosSaslAuthentication.class.getSimpleName() ++ "_keytabs"); +keytabDir.mkdirs(); +setupUsers(keytabDir); + +// Kerby sets "java.security.krb5.conf" for us! +System.clearProperty("java.security.auth.login.config"); +System.setProperty("javax.security.auth.useSubjectCredsOnly", "false"); +// Uncomment the following lines for debugging. +// System.setProperty("sun.security.spnego.debug", &q
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94701486 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserServer.java --- @@ -246,28 +163,78 @@ protected void handle(UserClientConnectionImpl connection, int rpcType, ByteBuf public class UserClientConnectionImpl extends RemoteConnection implements UserClientConnection { private UserSession session; +private SaslServer saslServer; +private RequestHandler currentHandler; +private UserToBitHandshake inbound; public UserClientConnectionImpl(SocketChannel channel) { super(channel, "user client"); + currentHandler = authFactory == null ? handler : new UserServerAuthenticationHandler(handler, loginManager); } void disableReadTimeout() { getChannel().pipeline().remove(BasicServer.TIMEOUT_HANDLER); } -void setUser(final UserToBitHandshake inbound) throws IOException { +void setHandshake(final UserToBitHandshake inbound) { + this.inbound = inbound; +} + +void initSaslServer(final String mechanismName, final Map properties) +throws IllegalStateException, IllegalArgumentException, SaslException { + if (saslServer != null) { +throw new IllegalStateException("SASL server already initialized."); + } + + this.saslServer = authFactory.getMechanism(mechanismName) + .createSaslServer(properties); + if (saslServer == null) { +throw new SaslException("Server could not initiate authentication. Insufficient parameters?"); --- End diff -- Shouldn't the exception message here be "Failed to initialize Sasl Server", since that's what the function is doing ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94700422 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserAuthenticationUtil.java --- @@ -0,0 +1,255 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.user; + +import com.google.common.base.Function; +import com.google.common.base.Strings; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterators; +import org.apache.drill.common.KerberosUtil; +import org.apache.drill.common.config.ConnectionParameters; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.security.UserGroupInformation; + +import javax.annotation.Nullable; +import javax.security.auth.Subject; +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import javax.security.auth.callback.UnsupportedCallbackException; +import javax.security.auth.login.LoginException; +import javax.security.sasl.Sasl; +import javax.security.sasl.SaslClient; +import javax.security.sasl.SaslException; +import java.io.IOException; +import java.lang.reflect.UndeclaredThrowableException; +import java.security.AccessController; +import java.security.PrivilegedExceptionAction; +import java.util.List; +import java.util.Set; + +public final class UserAuthenticationUtil { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(UserAuthenticationUtil.class); + + private static final String PLAIN_MECHANISM = "PLAIN"; + + private static final String DEFAULT_SERVICE_NAME = System.getProperty("service.name.primary", "drill"); + + private static final String DEFAULT_REALM_NAME = System.getProperty("service.name.realm", "default"); + + public enum ClientAuthenticationProvider { + +KERBEROS { + @Override + public UserGroupInformation login(final ConnectionParameters parameters) throws SaslException { +final Configuration conf = new Configuration(); +conf.set(CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION, +UserGroupInformation.AuthenticationMethod.KERBEROS.toString()); +UserGroupInformation.setConfiguration(conf); + +final String keytab = parameters.getParameter(ConnectionParameters.KEYTAB); +final boolean assumeSubject = parameters.getParameter(ConnectionParameters.KERBEROS_FROM_SUBJECT) != null && + Boolean.parseBoolean(parameters.getParameter(ConnectionParameters.KERBEROS_FROM_SUBJECT)); +try { + final UserGroupInformation ugi; + if (assumeSubject) { +ugi = UserGroupInformation.getUGIFromSubject(Subject.getSubject(AccessController.getContext())); +logger.debug("Assuming subject for {}.", ugi.getShortUserName()); + } else { +if (keytab != null) { + ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI( + parameters.getParameter(ConnectionParameters.USER), keytab); + logger.debug("Logged in {} using keytab.", ugi.getShortUserName()); +} else { + // includes Kerberos ticket login + ugi = UserGroupInformation.getCurrentUser(); + logger.debug("Logged in {} using ticket.", ugi.getShortUserName()); +} + } + return ugi; +} catch (final IOException e) { + logger.debug("Login failed.", e); + final Throwable cause = e.getCause(); + if (cause instanceof LoginExcep
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94880411 --- Diff: contrib/native/client/src/clientlib/drillClientImpl.hpp --- @@ -522,6 +534,13 @@ class DrillClientImpl : public DrillClientImplBase{ exec::user::RpcEndpointInfos m_serverInfos; bool m_bIsConnected; +std::vector m_mechanisms; --- End diff -- Please change it to be "m_supportedMechanisms" as in Java client or "m_serverMechanisms" since these are list of mechanisms supported by server not client. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94881310 --- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp --- @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "saslAuthenticatorImpl.hpp" + +#include "drillClientImpl.hpp" +#include "logger.hpp" + +namespace Drill { + +#define DEFAULT_SERVICE_NAME "drill" + +#define KERBEROS_SIMPLE_NAME "kerberos" +#define KERBEROS_SASL_NAME "gssapi" +#define PLAIN_NAME "plain" + +const std::map SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of +(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME) +(PLAIN_NAME, PLAIN_NAME) +; + +boost::mutex SaslAuthenticatorImpl::s_mutex; +bool SaslAuthenticatorImpl::s_initialized = false; + +SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* const properties) : +m_properties(properties), m_pConnection(NULL), m_secret(NULL) { + +if (!s_initialized) { +boost::lock_guard lock(SaslAuthenticatorImpl::s_mutex); +if (!s_initialized) { +s_initialized = true; + +// set plugin path if provided +if (DrillClientConfig::getSaslPluginPath()) { +char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath()); +sasl_set_path(0, saslPluginPath); +} + +sasl_client_init(NULL); +{ // for debugging purposes +const char **mechanisms = sasl_global_listmech(); +int i = 1; +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "SASL mechanisms available on client: " << std::endl;) +while (mechanisms[i] != NULL) { +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << i << " : " << mechanisms[i] << std::endl;) +i++; +} +} +} +} +} + +SaslAuthenticatorImpl::~SaslAuthenticatorImpl() { +if (m_secret) { +free(m_secret); +} +// may be used to negotiated security layers before disposing in the future +if (m_pConnection) { +sasl_dispose(&m_pConnection); +} +m_pConnection = NULL; +} + +typedef int (*sasl_callback_proc_t)(void); // see sasl_callback_ft + +int SaslAuthenticatorImpl::userNameCallback(void *context, int id, const char **result, unsigned *len) { --- End diff -- Is memory for "context" object released during "sasl_dispose" of m_pConnection ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94866946 --- Diff: exec/java-exec/src/test/java/org/apache/drill/exec/server/TestOptionsAuthEnabled.java --- @@ -17,7 +17,10 @@ */ package org.apache.drill.exec.server; +import com.google.common.collect.Lists; +import com.typesafe.config.ConfigValueFactory; import org.apache.drill.BaseTestQuery; +import org.apache.drill.common.config.ConnectionParameters; import org.apache.drill.common.config.DrillConfig; --- End diff -- It would be great to add negative. Some of which I can think of are as below. Not sure if they are already covered. 1) Server not configured for any mechanism. 2) Server configured for mechanism but client requesting some other mechanism for authentication 3) Client getting service ticket for one host but sending message to different host. 4) Client trying to authenticate with kdc using wrong creds. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94825267 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserClientAuthenticationHandler.java --- @@ -0,0 +1,229 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.user; + +import com.google.common.collect.ImmutableMap; +import com.google.common.util.concurrent.SettableFuture; +import com.google.protobuf.ByteString; +import io.netty.buffer.ByteBuf; +import org.apache.drill.exec.proto.UserProtos.RpcType; +import org.apache.drill.exec.proto.UserProtos.SaslMessage; +import org.apache.drill.exec.proto.UserProtos.SaslStatus; +import org.apache.drill.exec.rpc.RpcException; +import org.apache.drill.exec.rpc.RpcOutcomeListener; +import org.apache.hadoop.security.UserGroupInformation; + +import javax.security.sasl.SaslClient; +import javax.security.sasl.SaslException; +import java.io.IOException; +import java.lang.reflect.UndeclaredThrowableException; +import java.security.PrivilegedExceptionAction; + +import static com.google.common.base.Preconditions.checkNotNull; + +// package private +class UserClientAuthenticationHandler implements RpcOutcomeListener { + private static final org.slf4j.Logger logger = + org.slf4j.LoggerFactory.getLogger(UserClientAuthenticationHandler.class); + + private static final ImmutableMap CHALLENGE_PROCESSORS = + ImmutableMap.builder() + .put(SaslStatus.SASL_IN_PROGRESS, new SaslInProgressProcessor()) + .put(SaslStatus.SASL_SUCCESS, new SaslSuccessProcessor()) + .put(SaslStatus.SASL_FAILED, new SaslFailedProcessor()) + .build(); + + private final UserClient client; + private final UserGroupInformation ugi; + private final SettableFuture settableFuture; + + public UserClientAuthenticationHandler(UserClient client, UserGroupInformation ugi, + SettableFuture settableFuture) { +this.client = client; +this.ugi = ugi; +this.settableFuture = settableFuture; + } + + public void initiate(final String mechanismName) { +try { + final ByteString responseData; + final SaslClient saslClient = client.getSaslClient(); + if (saslClient.hasInitialResponse()) { +responseData = ByteString.copyFrom(evaluateChallenge(ugi, saslClient, new byte[0])); + } else { +responseData = ByteString.EMPTY; + } + client.send(new UserClientAuthenticationHandler(client, ugi, settableFuture), + RpcType.SASL_MESSAGE, + SaslMessage.newBuilder() + .setMechanism(mechanismName) + .setStatus(SaslStatus.SASL_START) + .setData(responseData) + .build(), + SaslMessage.class); + logger.trace("Initiated SASL exchange."); +} catch (final Exception e) { + settableFuture.setException(e); +} + } + + @Override + public void failed(RpcException ex) { +settableFuture.setException(new SaslException("Unexpected failure", ex)); + } + + @Override + public void success(SaslMessage value, ByteBuf buffer) { +logger.trace("Server responded with message of type: {}", value.getStatus()); +final SaslChallengeProcessor processor = CHALLENGE_PROCESSORS.get(value.getStatus()); +if (processor == null) { + settableFuture.setException(new SaslException("Server sent a corrupt message.")); +} else { + try { +final SaslChallengeContext context = +new SaslChallengeContext(value, client.getSaslClient(), ugi, settableFuture); + +final SaslMessage saslResponse = processor.process(context); + +if (saslResponse != null) { +
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94689336 --- Diff: common/src/main/java/org/apache/drill/common/KerberosUtil.java --- @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.common; + +import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.base.Preconditions.checkState; + +public final class KerberosUtil { +// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(KerberosUtil.class); --- End diff -- Please remove the commented line. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94880079 --- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp --- @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "saslAuthenticatorImpl.hpp" + +#include "drillClientImpl.hpp" +#include "logger.hpp" + +namespace Drill { + +#define DEFAULT_SERVICE_NAME "drill" + +#define KERBEROS_SIMPLE_NAME "kerberos" +#define KERBEROS_SASL_NAME "gssapi" +#define PLAIN_NAME "plain" + +const std::map SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of +(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME) +(PLAIN_NAME, PLAIN_NAME) +; + +boost::mutex SaslAuthenticatorImpl::s_mutex; +bool SaslAuthenticatorImpl::s_initialized = false; + +SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* const properties) : +m_properties(properties), m_pConnection(NULL), m_secret(NULL) { + +if (!s_initialized) { +boost::lock_guard lock(SaslAuthenticatorImpl::s_mutex); +if (!s_initialized) { +s_initialized = true; + +// set plugin path if provided +if (DrillClientConfig::getSaslPluginPath()) { --- End diff -- What happens if sasl plugin path is not provided ? As per sas_client_init doc it will return the result code like below. We should handle the return code properly. /* initialize the SASL client drivers * callbacks -- base callbacks for all client connections * returns: * SASL_OK-- Success * SASL_NOMEM -- Not enough memory * SASL_BADVERS -- Mechanism version mismatch * SASL_BADPARAM -- error in config file * SASL_NOMECH-- No mechanisms available * ... */ int sasl_client_init(const sasl_callback_t *callbacks) { --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94864855 --- Diff: exec/java-exec/src/test/java/org/apache/drill/exec/rpc/user/security/TestCustomUserAuthenticator.java --- @@ -56,16 +68,27 @@ public void positiveUserAuth() throws Exception { runTest(TEST_USER_2, TEST_USER_2_PASSWORD); } - @Test public void negativeUserAuth() throws Exception { negativeAuthHelper(TEST_USER_1, "blah.. blah.."); negativeAuthHelper(TEST_USER_2, "blah.. blah.."); -negativeAuthHelper(TEST_USER_2, ""); negativeAuthHelper("invalidUserName", "blah.. blah.."); } @Test + public void emptyPassword() throws Exception { +try { + runTest(TEST_USER_2, ""); + fail("Expected an exception."); +} catch (RpcException e) { + final String exMsg = e.getMessage(); + assertThat(exMsg, containsString("Insufficient credentials")); --- End diff -- We should check the "cause" rather than message string since that can change over time ? Same in negativeAuthHelper. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94695872 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/plain/PlainServer.java --- @@ -0,0 +1,175 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.security.plain; + +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import javax.security.auth.callback.UnsupportedCallbackException; +import javax.security.sasl.AuthorizeCallback; +import javax.security.sasl.Sasl; +import javax.security.sasl.SaslException; +import javax.security.sasl.SaslServer; +import javax.security.sasl.SaslServerFactory; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.security.Provider; +import java.util.Map; + +/** + * Plain SaslServer implementation. See https://tools.ietf.org/html/rfc4616 + */ +public class PlainServer implements SaslServer { +// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(PlainServer.class); --- End diff -- Remove this --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94862522 --- Diff: exec/java-exec/src/test/java/org/apache/drill/exec/rpc/security/TestSaslExchange.java --- @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.security; + +import com.google.common.collect.Lists; +import com.typesafe.config.ConfigValueFactory; +import org.apache.drill.BaseTestQuery; +import org.apache.drill.common.config.ConnectionParameters; +import org.apache.drill.common.config.DrillConfig; +import org.apache.drill.exec.ExecConstants; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.security.Security; +import java.util.Properties; + +public class TestSaslExchange extends BaseTestQuery { + + @BeforeClass + public static void setup() { +Security.addProvider(new SimpleProvider()); +FastSaslServerFactory.reload(); + +final Properties props = cloneDefaultTestConfigProperties(); +final DrillConfig newConfig = new DrillConfig(DrillConfig.create(props) +.withValue(ExecConstants.USER_AUTHENTICATION_ENABLED, +ConfigValueFactory.fromAnyRef("true")) +.withValue(ExecConstants.AUTHENTICATION_MECHANISMS, + ConfigValueFactory.fromIterable(Lists.newArrayList(SimpleMechanism.MECHANISM_NAME))), +false); + +final Properties connectionProps = new Properties(); +connectionProps.setProperty(ConnectionParameters.PASSWORD, "anything works!"); +updateTestCluster(3, newConfig, connectionProps); --- End diff -- How does client knows to instantiate "SimpleMechanism" in this case? I don't see getClientAuthenticationHandler generating authHandler for "SimpleMechanism". Also we are not setting "auth" in connection string. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r94700671 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserAuthenticationUtil.java --- @@ -0,0 +1,255 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.user; + +import com.google.common.base.Function; +import com.google.common.base.Strings; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterators; +import org.apache.drill.common.KerberosUtil; +import org.apache.drill.common.config.ConnectionParameters; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.security.UserGroupInformation; + +import javax.annotation.Nullable; +import javax.security.auth.Subject; +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import javax.security.auth.callback.UnsupportedCallbackException; +import javax.security.auth.login.LoginException; +import javax.security.sasl.Sasl; +import javax.security.sasl.SaslClient; +import javax.security.sasl.SaslException; +import java.io.IOException; +import java.lang.reflect.UndeclaredThrowableException; +import java.security.AccessController; +import java.security.PrivilegedExceptionAction; +import java.util.List; +import java.util.Set; + +public final class UserAuthenticationUtil { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(UserAuthenticationUtil.class); + + private static final String PLAIN_MECHANISM = "PLAIN"; + + private static final String DEFAULT_SERVICE_NAME = System.getProperty("service.name.primary", "drill"); + + private static final String DEFAULT_REALM_NAME = System.getProperty("service.name.realm", "default"); + + public enum ClientAuthenticationProvider { + +KERBEROS { + @Override + public UserGroupInformation login(final ConnectionParameters parameters) throws SaslException { +final Configuration conf = new Configuration(); +conf.set(CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION, +UserGroupInformation.AuthenticationMethod.KERBEROS.toString()); +UserGroupInformation.setConfiguration(conf); + +final String keytab = parameters.getParameter(ConnectionParameters.KEYTAB); +final boolean assumeSubject = parameters.getParameter(ConnectionParameters.KERBEROS_FROM_SUBJECT) != null && + Boolean.parseBoolean(parameters.getParameter(ConnectionParameters.KERBEROS_FROM_SUBJECT)); +try { + final UserGroupInformation ugi; + if (assumeSubject) { +ugi = UserGroupInformation.getUGIFromSubject(Subject.getSubject(AccessController.getContext())); +logger.debug("Assuming subject for {}.", ugi.getShortUserName()); + } else { +if (keytab != null) { + ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI( + parameters.getParameter(ConnectionParameters.USER), keytab); + logger.debug("Logged in {} using keytab.", ugi.getShortUserName()); +} else { + // includes Kerberos ticket login + ugi = UserGroupInformation.getCurrentUser(); + logger.debug("Logged in {} using ticket.", ugi.getShortUserName()); +} + } + return ugi; +} catch (final IOException e) { + logger.debug("Login failed.", e); + final Throwable cause = e.getCause(); + if (cause instanceof LoginExcep
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r95032760 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ClusterFixture.java --- @@ -0,0 +1,405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.drill.test; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import org.apache.commons.io.FileUtils; +import org.apache.drill.DrillTestWrapper.TestServices; +import org.apache.drill.QueryTestUtil; +import org.apache.drill.common.config.DrillConfig; +import org.apache.drill.common.exceptions.ExecutionSetupException; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.ZookeeperHelper; +import org.apache.drill.exec.client.DrillClient; +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.drill.exec.memory.RootAllocatorFactory; +import org.apache.drill.exec.proto.UserBitShared.QueryType; +import org.apache.drill.exec.rpc.user.QueryDataBatch; +import org.apache.drill.exec.server.Drillbit; +import org.apache.drill.exec.server.RemoteServiceSet; +import org.apache.drill.exec.store.StoragePluginRegistry; +import org.apache.drill.exec.store.StoragePluginRegistryImpl; +import org.apache.drill.exec.store.dfs.FileSystemConfig; +import org.apache.drill.exec.store.dfs.FileSystemPlugin; +import org.apache.drill.exec.store.dfs.WorkspaceConfig; +import org.apache.drill.exec.store.mock.MockStorageEngine; +import org.apache.drill.exec.store.mock.MockStorageEngineConfig; +import org.apache.drill.exec.util.TestUtilities; + +import com.google.common.base.Charsets; +import com.google.common.base.Preconditions; +import com.google.common.io.Files; +import com.google.common.io.Resources; + +/** + * Test fixture to start a Drillbit with provide options, create a client, + * and execute queries. Can be used in JUnit tests, or in ad-hoc programs. + * Provides a builder to set the necessary embedded Drillbit and client + * options, then creates the requested Drillbit and client. + */ + +public class ClusterFixture implements AutoCloseable { +// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ClientFixture.class); + public static final String ENABLE_FULL_CACHE = "drill.exec.test.use-full-cache"; + public static final int MAX_WIDTH_PER_NODE = 2; + + @SuppressWarnings("serial") + public static final Properties TEST_CONFIGURATIONS = new Properties() { +{ + // Properties here mimic those in drill-root/pom.xml, Surefire plugin + // configuration. They allow tests to run successfully in Eclipse. + + put(ExecConstants.SYS_STORE_PROVIDER_LOCAL_ENABLE_WRITE, false); + put(ExecConstants.HTTP_ENABLE, false); + put(Drillbit.SYSTEM_OPTIONS_NAME, "org.apache.drill.exec.compile.ClassTransformer.scalar_replacement=on"); + put(QueryTestUtil.TEST_QUERY_PRINTING_SILENT, true); + put("drill.catastrophic_to_standard_out", true); + + // See Drillbit.close. The Drillbit normally waits a specified amount + // of time for ZK registration to drop. But, embedded Drillbits normally + // don't use ZK, so no need to wait. + + put(ExecConstants.ZK_REFRESH, 0); + + // This is just a test, no need to be heavy-duty on threads. + // This is the number of server and client RPC threads. The + // production default is DEFAULT_SERVER_RPC_THREADS. + + put(ExecConstants.BIT_SERVER_RPC_THREADS, 2); + +
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r95036224 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ClusterFixture.java --- @@ -0,0 +1,405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.drill.test; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import org.apache.commons.io.FileUtils; +import org.apache.drill.DrillTestWrapper.TestServices; +import org.apache.drill.QueryTestUtil; +import org.apache.drill.common.config.DrillConfig; +import org.apache.drill.common.exceptions.ExecutionSetupException; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.ZookeeperHelper; +import org.apache.drill.exec.client.DrillClient; +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.drill.exec.memory.RootAllocatorFactory; +import org.apache.drill.exec.proto.UserBitShared.QueryType; +import org.apache.drill.exec.rpc.user.QueryDataBatch; +import org.apache.drill.exec.server.Drillbit; +import org.apache.drill.exec.server.RemoteServiceSet; +import org.apache.drill.exec.store.StoragePluginRegistry; +import org.apache.drill.exec.store.StoragePluginRegistryImpl; +import org.apache.drill.exec.store.dfs.FileSystemConfig; +import org.apache.drill.exec.store.dfs.FileSystemPlugin; +import org.apache.drill.exec.store.dfs.WorkspaceConfig; +import org.apache.drill.exec.store.mock.MockStorageEngine; +import org.apache.drill.exec.store.mock.MockStorageEngineConfig; +import org.apache.drill.exec.util.TestUtilities; + +import com.google.common.base.Charsets; +import com.google.common.base.Preconditions; +import com.google.common.io.Files; +import com.google.common.io.Resources; + +/** + * Test fixture to start a Drillbit with provide options, create a client, + * and execute queries. Can be used in JUnit tests, or in ad-hoc programs. + * Provides a builder to set the necessary embedded Drillbit and client + * options, then creates the requested Drillbit and client. + */ + +public class ClusterFixture implements AutoCloseable { +// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ClientFixture.class); + public static final String ENABLE_FULL_CACHE = "drill.exec.test.use-full-cache"; + public static final int MAX_WIDTH_PER_NODE = 2; + + @SuppressWarnings("serial") + public static final Properties TEST_CONFIGURATIONS = new Properties() { +{ + // Properties here mimic those in drill-root/pom.xml, Surefire plugin + // configuration. They allow tests to run successfully in Eclipse. + + put(ExecConstants.SYS_STORE_PROVIDER_LOCAL_ENABLE_WRITE, false); + put(ExecConstants.HTTP_ENABLE, false); + put(Drillbit.SYSTEM_OPTIONS_NAME, "org.apache.drill.exec.compile.ClassTransformer.scalar_replacement=on"); + put(QueryTestUtil.TEST_QUERY_PRINTING_SILENT, true); + put("drill.catastrophic_to_standard_out", true); + + // See Drillbit.close. The Drillbit normally waits a specified amount + // of time for ZK registration to drop. But, embedded Drillbits normally + // don't use ZK, so no need to wait. + + put(ExecConstants.ZK_REFRESH, 0); + + // This is just a test, no need to be heavy-duty on threads. + // This is the number of server and client RPC threads. The + // production default is DEFAULT_SERVER_RPC_THREADS. + + put(ExecConstants.BIT_SERVER_RPC_THREADS, 2); + +
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r95038015 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ClusterFixture.java --- @@ -0,0 +1,405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.drill.test; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import org.apache.commons.io.FileUtils; +import org.apache.drill.DrillTestWrapper.TestServices; +import org.apache.drill.QueryTestUtil; +import org.apache.drill.common.config.DrillConfig; +import org.apache.drill.common.exceptions.ExecutionSetupException; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.ZookeeperHelper; +import org.apache.drill.exec.client.DrillClient; +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.drill.exec.memory.RootAllocatorFactory; +import org.apache.drill.exec.proto.UserBitShared.QueryType; +import org.apache.drill.exec.rpc.user.QueryDataBatch; +import org.apache.drill.exec.server.Drillbit; +import org.apache.drill.exec.server.RemoteServiceSet; +import org.apache.drill.exec.store.StoragePluginRegistry; +import org.apache.drill.exec.store.StoragePluginRegistryImpl; +import org.apache.drill.exec.store.dfs.FileSystemConfig; +import org.apache.drill.exec.store.dfs.FileSystemPlugin; +import org.apache.drill.exec.store.dfs.WorkspaceConfig; +import org.apache.drill.exec.store.mock.MockStorageEngine; +import org.apache.drill.exec.store.mock.MockStorageEngineConfig; +import org.apache.drill.exec.util.TestUtilities; + +import com.google.common.base.Charsets; +import com.google.common.base.Preconditions; +import com.google.common.io.Files; +import com.google.common.io.Resources; + +/** + * Test fixture to start a Drillbit with provide options, create a client, + * and execute queries. Can be used in JUnit tests, or in ad-hoc programs. + * Provides a builder to set the necessary embedded Drillbit and client + * options, then creates the requested Drillbit and client. + */ + +public class ClusterFixture implements AutoCloseable { +// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ClientFixture.class); + public static final String ENABLE_FULL_CACHE = "drill.exec.test.use-full-cache"; + public static final int MAX_WIDTH_PER_NODE = 2; + + @SuppressWarnings("serial") + public static final Properties TEST_CONFIGURATIONS = new Properties() { +{ + // Properties here mimic those in drill-root/pom.xml, Surefire plugin + // configuration. They allow tests to run successfully in Eclipse. + + put(ExecConstants.SYS_STORE_PROVIDER_LOCAL_ENABLE_WRITE, false); + put(ExecConstants.HTTP_ENABLE, false); + put(Drillbit.SYSTEM_OPTIONS_NAME, "org.apache.drill.exec.compile.ClassTransformer.scalar_replacement=on"); + put(QueryTestUtil.TEST_QUERY_PRINTING_SILENT, true); + put("drill.catastrophic_to_standard_out", true); + + // See Drillbit.close. The Drillbit normally waits a specified amount + // of time for ZK registration to drop. But, embedded Drillbits normally + // don't use ZK, so no need to wait. + + put(ExecConstants.ZK_REFRESH, 0); + + // This is just a test, no need to be heavy-duty on threads. + // This is the number of server and client RPC threads. The + // production default is DEFAULT_SERVER_RPC_THREADS. + + put(ExecConstants.BIT_SERVER_RPC_THREADS, 2); + +
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r95026609 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/FixtureBuilder.java --- @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.drill.test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.ZookeeperHelper; + +/** + * Build a Drillbit and client with the options provided. The simplest + * builder starts an embedded Drillbit, with the "dfs_test" name space, + * a max width (parallelization) of 2. + */ + +public class FixtureBuilder { + + public static class RuntimeOption { +public String key; +public Object value; + +public RuntimeOption(String key, Object value) { + this.key = key; + this.value = value; +} + } + + // Values in the drill-module.conf file for values that are customized + // in the defaults. + + public static final int DEFAULT_ZK_REFRESH = 500; // ms + public static final int DEFAULT_SERVER_RPC_THREADS = 10; + public static final int DEFAULT_SCAN_THREADS = 8; + + public static Properties defaultProps() { +Properties props = new Properties(); +props.putAll(ClusterFixture.TEST_CONFIGURATIONS); +return props; + } + + String configResource; + Properties configProps; + boolean enableFullCache; + List sessionOptions; + List systemOptions; + int bitCount = 1; + String bitNames[]; + int zkCount; + ZookeeperHelper zkHelper; + + /** + * Use the given configuration properties to start the embedded Drillbit. + * @param configProps a collection of config properties + * @return this builder + * @see {@link #configProperty(String, Object)} + */ + + public FixtureBuilder configProps(Properties configProps) { +this.configProps = configProps; +return this; + } + + /** + * Use the given configuration file, stored as a resource, to start the + * embedded Drillbit. Note that the resource file should have the two + * following settings to work as a test: + * + * drill.exec.sys.store.provider.local.write : false, + * drill.exec.http.enabled : false + * + * It may be more convenient to add your settings to the default + * config settings with {@link #configProperty(String, Object)}. + * @param configResource path to the file that contains the + * config file to be read + * @return this builder + * @see {@link #configProperty(String, Object)} + */ + + public FixtureBuilder configResource(String configResource) { + +// TypeSafe gets unhappy about a leading slash, but other functions +// require it. Silently discard the leading slash if given to +// preserve the test writer's sanity. + +this.configResource = ClusterFixture.trimSlash(configResource); +return this; + } + + /** + * Add an additional boot-time property for the embedded Drillbit. + * @param key config property name + * @param value property value + * @return this builder + */ + + public FixtureBuilder configProperty(String key, Object value) { +if (configProps == null) { + configProps = defaultProps(); +} +configProps.put(key, value.toString()); +return this; + } + + /** + * Provide a session option to be set once the Drillbit + * is started. + * + * @param key the name of the session option + * @param value the value of the session option + * @return this builder + * @see {@link ClusterFixture#alterSession(String, Obje
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r95019120 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/FixtureBuilder.java --- @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.drill.test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.ZookeeperHelper; + +/** + * Build a Drillbit and client with the options provided. The simplest + * builder starts an embedded Drillbit, with the "dfs_test" name space, + * a max width (parallelization) of 2. + */ + +public class FixtureBuilder { + + public static class RuntimeOption { +public String key; +public Object value; + +public RuntimeOption(String key, Object value) { + this.key = key; + this.value = value; +} + } + + // Values in the drill-module.conf file for values that are customized + // in the defaults. + + public static final int DEFAULT_ZK_REFRESH = 500; // ms + public static final int DEFAULT_SERVER_RPC_THREADS = 10; + public static final int DEFAULT_SCAN_THREADS = 8; + + public static Properties defaultProps() { +Properties props = new Properties(); +props.putAll(ClusterFixture.TEST_CONFIGURATIONS); +return props; + } + + String configResource; + Properties configProps; + boolean enableFullCache; + List sessionOptions; + List systemOptions; + int bitCount = 1; + String bitNames[]; + int zkCount; + ZookeeperHelper zkHelper; + + /** + * Use the given configuration properties to start the embedded Drillbit. + * @param configProps a collection of config properties + * @return this builder + * @see {@link #configProperty(String, Object)} + */ + + public FixtureBuilder configProps(Properties configProps) { +this.configProps = configProps; +return this; + } + + /** + * Use the given configuration file, stored as a resource, to start the + * embedded Drillbit. Note that the resource file should have the two + * following settings to work as a test: + * + * drill.exec.sys.store.provider.local.write : false, + * drill.exec.http.enabled : false + * + * It may be more convenient to add your settings to the default + * config settings with {@link #configProperty(String, Object)}. + * @param configResource path to the file that contains the + * config file to be read + * @return this builder + * @see {@link #configProperty(String, Object)} + */ + + public FixtureBuilder configResource(String configResource) { + +// TypeSafe gets unhappy about a leading slash, but other functions +// require it. Silently discard the leading slash if given to +// preserve the test writer's sanity. + +this.configResource = ClusterFixture.trimSlash(configResource); +return this; + } + + /** + * Add an additional boot-time property for the embedded Drillbit. + * @param key config property name + * @param value property value + * @return this builder + */ + + public FixtureBuilder configProperty(String key, Object value) { +if (configProps == null) { + configProps = defaultProps(); +} +configProps.put(key, value.toString()); +return this; + } + + /** + * Provide a session option to be set once the Drillbit + * is started. + * + * @param key the name of the session option + * @param value the value of the session option + * @return this builder + * @see {@link ClusterFixture#alterSession(String, Obje
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r95047142 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ProfileParser.java --- @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.drill.test; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonNumber; +import javax.json.JsonObject; +import javax.json.JsonReader; +import javax.json.JsonValue; + +/** + * Parses a query profile and provides access to various bits of the profile + * for diagnostic purposes during tests. + */ + +public class ProfileParser { + + JsonObject profile; + List plans; + + public ProfileParser( File file ) throws IOException { +try (FileReader fileReader = new FileReader(file); + JsonReader reader = Json.createReader(fileReader)) { + profile = (JsonObject) reader.read(); +} + } + + public String getQuery( ) { +return profile.get("query").toString(); + } + + public String getPlan() { +return profile.get("plan").toString(); + } + + public List getPlans() { +if ( plans != null ) { + return plans; } +String plan = getPlan( ); +Pattern p = Pattern.compile( "(\\d\\d-\\d+[^]*)n", Pattern.MULTILINE ); +Matcher m = p.matcher(plan); +plans = new ArrayList<>( ); +while ( m.find() ) { + plans.add(m.group(1)); +} +return plans; + } + + public String getScan( ) { +int n = getPlans( ).size(); +Pattern p = Pattern.compile( "\\d+-\\d+\\s+(\\w+)\\(" ); +for ( int i = n-1; i >= 0; i-- ) { + String plan = plans.get( i ); + Matcher m = p.matcher( plan ); + if ( ! m.find() ) { continue; } + if ( m.group(1).equals( "Scan" ) ) { +return plan; } +} +return null; + } + + public List getColumns( String plan ) { +Pattern p = Pattern.compile( "RecordType\\((.*)\\):" ); +Matcher m = p.matcher(plan); +if ( ! m.find() ) { return null; } +String frag = m.group(1); +String parts[] = frag.split( ", " ); +List fields = new ArrayList<>( ); +for ( String part : parts ) { + String halves[] = part.split( " " ); + fields.add( new FieldDef( halves[1], halves[0] ) ); +} +return fields; + } + + public Map getOperators( ) { +Map ops = new HashMap<>(); +int n = getPlans( ).size(); +Pattern p = Pattern.compile( "\\d+-(\\d+)\\s+(\\w+)" ); --- End diff -- Verified all the regex with a sample plan from a query profile. Looks good! --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r95035750 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ClusterFixture.java --- @@ -0,0 +1,405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.drill.test; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import org.apache.commons.io.FileUtils; +import org.apache.drill.DrillTestWrapper.TestServices; +import org.apache.drill.QueryTestUtil; +import org.apache.drill.common.config.DrillConfig; +import org.apache.drill.common.exceptions.ExecutionSetupException; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.ZookeeperHelper; +import org.apache.drill.exec.client.DrillClient; +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.drill.exec.memory.RootAllocatorFactory; +import org.apache.drill.exec.proto.UserBitShared.QueryType; +import org.apache.drill.exec.rpc.user.QueryDataBatch; +import org.apache.drill.exec.server.Drillbit; +import org.apache.drill.exec.server.RemoteServiceSet; +import org.apache.drill.exec.store.StoragePluginRegistry; +import org.apache.drill.exec.store.StoragePluginRegistryImpl; +import org.apache.drill.exec.store.dfs.FileSystemConfig; +import org.apache.drill.exec.store.dfs.FileSystemPlugin; +import org.apache.drill.exec.store.dfs.WorkspaceConfig; +import org.apache.drill.exec.store.mock.MockStorageEngine; +import org.apache.drill.exec.store.mock.MockStorageEngineConfig; +import org.apache.drill.exec.util.TestUtilities; + +import com.google.common.base.Charsets; +import com.google.common.base.Preconditions; +import com.google.common.io.Files; +import com.google.common.io.Resources; + +/** + * Test fixture to start a Drillbit with provide options, create a client, + * and execute queries. Can be used in JUnit tests, or in ad-hoc programs. + * Provides a builder to set the necessary embedded Drillbit and client + * options, then creates the requested Drillbit and client. + */ + +public class ClusterFixture implements AutoCloseable { +// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ClientFixture.class); + public static final String ENABLE_FULL_CACHE = "drill.exec.test.use-full-cache"; + public static final int MAX_WIDTH_PER_NODE = 2; + + @SuppressWarnings("serial") + public static final Properties TEST_CONFIGURATIONS = new Properties() { +{ + // Properties here mimic those in drill-root/pom.xml, Surefire plugin + // configuration. They allow tests to run successfully in Eclipse. + + put(ExecConstants.SYS_STORE_PROVIDER_LOCAL_ENABLE_WRITE, false); + put(ExecConstants.HTTP_ENABLE, false); + put(Drillbit.SYSTEM_OPTIONS_NAME, "org.apache.drill.exec.compile.ClassTransformer.scalar_replacement=on"); + put(QueryTestUtil.TEST_QUERY_PRINTING_SILENT, true); + put("drill.catastrophic_to_standard_out", true); + + // See Drillbit.close. The Drillbit normally waits a specified amount + // of time for ZK registration to drop. But, embedded Drillbits normally + // don't use ZK, so no need to wait. + + put(ExecConstants.ZK_REFRESH, 0); + + // This is just a test, no need to be heavy-duty on threads. + // This is the number of server and client RPC threads. The + // production default is DEFAULT_SERVER_RPC_THREADS. + + put(ExecConstants.BIT_SERVER_RPC_THREADS, 2); + +
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r95046976 --- Diff: exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/TestSimpleExternalSort.java --- @@ -22,220 +22,167 @@ import java.util.List; -import org.apache.drill.BaseTestQuery; -import org.apache.drill.common.config.DrillConfig; import org.apache.drill.common.expression.ExpressionPosition; import org.apache.drill.common.expression.SchemaPath; -import org.apache.drill.common.util.FileUtils; import org.apache.drill.common.util.TestTools; -import org.apache.drill.exec.client.DrillClient; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.record.RecordBatchLoader; import org.apache.drill.exec.rpc.user.QueryDataBatch; -import org.apache.drill.exec.server.Drillbit; -import org.apache.drill.exec.server.RemoteServiceSet; import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.test.ClientFixture; +import org.apache.drill.test.ClusterFixture; +import org.apache.drill.test.ClusterTest; +import org.apache.drill.test.DrillTest; +import org.apache.drill.test.FixtureBuilder; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TestRule; -import com.google.common.base.Charsets; -import com.google.common.io.Files; - -@Ignore -public class TestSimpleExternalSort extends BaseTestQuery { +public class TestSimpleExternalSort extends DrillTest { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestSimpleExternalSort.class); - DrillConfig c = DrillConfig.create(); - @Rule public final TestRule TIMEOUT = TestTools.getTimeoutRule(8); @Test - public void mergeSortWithSv2() throws Exception { -List results = testPhysicalFromFileWithResults("xsort/one_key_sort_descending_sv2.json"); -int count = 0; -for(QueryDataBatch b : results) { - if (b.getHeader().getRowCount() != 0) { -count += b.getHeader().getRowCount(); - } -} -assertEquals(50, count); - -long previousBigInt = Long.MAX_VALUE; - -int recordCount = 0; -int batchCount = 0; - -for (QueryDataBatch b : results) { - if (b.getHeader().getRowCount() == 0) { -break; - } - batchCount++; - RecordBatchLoader loader = new RecordBatchLoader(allocator); - loader.load(b.getHeader().getDef(),b.getData()); - BigIntVector c1 = (BigIntVector) loader.getValueAccessorById(BigIntVector.class, - loader.getValueVectorId(new SchemaPath("blue", ExpressionPosition.UNKNOWN)).getFieldIds()).getValueVector(); - - - BigIntVector.Accessor a1 = c1.getAccessor(); + public void mergeSortWithSv2Legacy() throws Exception { +mergeSortWithSv2(true); + } - for (int i =0; i < c1.getAccessor().getValueCount(); i++) { -recordCount++; -assertTrue(String.format("%d > %d", previousBigInt, a1.get(i)), previousBigInt >= a1.get(i)); -previousBigInt = a1.get(i); - } - loader.clear(); - b.release(); + /** + * Tests the external sort using an in-memory sort. Relies on default memory + * settings to be large enough to do the in-memory sort (there is, + * unfortunately, no way to double-check that no spilling was done.) + * This must be checked manually by setting a breakpoint in the in-memory + * sort routine. + * + * @param testLegacy + * @throws Exception + */ + + private void mergeSortWithSv2(boolean testLegacy) throws Exception { +try (ClusterFixture cluster = ClusterFixture.standardCluster( ); + ClientFixture client = cluster.clientFixture()) { + chooseImpl(client, testLegacy); + List results = client.queryBuilder().physicalResource("xsort/one_key_sort_descending_sv2.json").results(); + assertEquals(50, client.countResults( results )); + validateResults(client.allocator(), results); } + } -System.out.println(String.format("Sorted %,d records in %d batches.", recordCount, batchCount)); + private void chooseImpl(ClientFixture client, boolean testLegacy) throws Exception { --- End diff -- Will the implementation for this be added once the new sort mechanism is checked in ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r95044899 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ProfileParser.java --- @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.drill.test; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonNumber; +import javax.json.JsonObject; +import javax.json.JsonReader; +import javax.json.JsonValue; + +/** + * Parses a query profile and provides access to various bits of the profile + * for diagnostic purposes during tests. + */ + +public class ProfileParser { + + JsonObject profile; + List plans; + + public ProfileParser( File file ) throws IOException { +try (FileReader fileReader = new FileReader(file); + JsonReader reader = Json.createReader(fileReader)) { + profile = (JsonObject) reader.read(); +} + } + + public String getQuery( ) { +return profile.get("query").toString(); + } + + public String getPlan() { +return profile.get("plan").toString(); + } + + public List getPlans() { +if ( plans != null ) { + return plans; } +String plan = getPlan( ); +Pattern p = Pattern.compile( "(\\d\\d-\\d+[^]*)n", Pattern.MULTILINE ); +Matcher m = p.matcher(plan); +plans = new ArrayList<>( ); +while ( m.find() ) { + plans.add(m.group(1)); +} +return plans; + } + + public String getScan( ) { +int n = getPlans( ).size(); +Pattern p = Pattern.compile( "\\d+-\\d+\\s+(\\w+)\\(" ); --- End diff -- So we are just trying to find if this plan has Scan keyword after fragment number. Can't we simply use plan.contains("Scan") ? I guess the scan keyword will only be in plans which are actually Scan's.? Also there can be multiple Scans in the plan right ? In which case we should return list of plans here. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r94505587 --- Diff: exec/java-exec/pom.xml --- @@ -458,6 +458,12 @@ httpdlog-parser 2.4 + --- End diff -- Please fix the indentation --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r95041561 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java --- @@ -0,0 +1,314 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.drill.test; + +import java.util.List; + +import org.apache.drill.PlanTestBase; +import org.apache.drill.QueryTestUtil; +import org.apache.drill.common.config.DrillConfig; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.exec.client.PrintingResultsListener; +import org.apache.drill.exec.client.QuerySubmitter.Format; +import org.apache.drill.exec.proto.UserBitShared.QueryId; +import org.apache.drill.exec.proto.UserBitShared.QueryType; +import org.apache.drill.exec.proto.helper.QueryIdHelper; +import org.apache.drill.exec.record.RecordBatchLoader; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.rpc.RpcException; +import org.apache.drill.exec.rpc.user.AwaitableUserResultsListener; +import org.apache.drill.exec.rpc.user.QueryDataBatch; +import org.apache.drill.exec.rpc.user.UserResultsListener; +import org.apache.drill.exec.util.VectorUtil; +import org.apache.drill.exec.vector.NullableVarCharVector; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.test.BufferingQueryEventListener.QueryEvent; + +import com.google.common.base.Preconditions; + +/** + * Builder for a Drill query. Provides all types of query formats, + * and a variety of ways to run the query. + */ + +public class QueryBuilder { + + /** + * Summary results of a query: records, batches, run time. + */ + + public static class QuerySummary { +private final QueryId queryId; +private final int records; +private final int batches; +private final long ms; + +public QuerySummary(QueryId queryId, int recordCount, int batchCount, long elapsed) { + this.queryId = queryId; + records = recordCount; + batches = batchCount; + ms = elapsed; +} + +public long recordCount( ) { return records; } +public int batchCount( ) { return batches; } +public long runTimeMs( ) { return ms; } +public QueryId queryId( ) { return queryId; } +public String queryIdString( ) { return QueryIdHelper.getQueryId(queryId); } + + } + + private final ClientFixture client; + private QueryType queryType; + private String queryText; + + QueryBuilder(ClientFixture client) { +this.client = client; + } + + public QueryBuilder query(QueryType type, String text) { +queryType = type; +queryText = text; +return this; + } + + public QueryBuilder sql(String sql) { +return query( QueryType.SQL, sql ); + } + + public QueryBuilder sql(String query, Object... args) { +return sql(String.format(query, args)); + } + + public QueryBuilder physical(String plan) { +return query( QueryType.PHYSICAL, plan); + } + + public QueryBuilder sqlResource(String resource) { +sql(ClusterFixture.loadResource(resource)); +return this; + } + + public QueryBuilder sqlResource(String resource, Object... args) { +sql(ClusterFixture.loadResource(resource), args); +return this; + } + + public QueryBuilder physicalResource(String resource) { +physical(ClusterFixture.loadResource(resource)); +return this; + } + + /** + * Run the query returning just a summary of the results: record count, + * batch count and run time. Handy when doing performance tests when the + * validity of the results is verified in some other test. + * + * @retu
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r95045450 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/FieldDef.java --- @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.drill.test; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Basic representation of a column parsed from a query profile. + * Idea is to use this to generate mock data that represents a + * query obtained from a user. This is a work in progress. + */ + +public class FieldDef { + public enum Type { VARCHAR, DOUBLE }; + public enum TypeHint { DATE, TIME }; + + public final String name; + public final String typeStr; + public final Type type; + public int length; + public TypeHint hint; + + public FieldDef( String name, String typeStr ) { +this.name = name; +this.typeStr = typeStr; +Pattern p = Pattern.compile( "(\\w+)(?:\\((\\d+)\\))?" ); +Matcher m = p.matcher(typeStr); +if ( ! m.matches() ) { throw new IllegalStateException( ); } +if ( m.group(2) == null ) { + length = 0; +} else { + length = Integer.parseInt( m.group(2) ); --- End diff -- It would be great to add a comment that group(2) is for VARCHAR types which has field length. e.g.: VARCHAR(2147483647) --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #708: DRILL-5152: Enhance the mock data source: better da...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/708#discussion_r95047869 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockSubScanPOP.java --- @@ -40,12 +39,12 @@ private final String url; protected final List readEntries; -// private final OperatorCost cost; -// private final Size size; - private LinkedList[] mappings; + private boolean extended; @JsonCreator - public MockSubScanPOP(@JsonProperty("url") String url, @JsonProperty("entries") List readEntries) { + public MockSubScanPOP(@JsonProperty("url") String url, --- End diff -- Can we overload the constructor instead ? That way for new parameters default value will be initialized and we don't have to change the older usage. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #708: DRILL-5152: Enhance the mock data source: better da...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/708#discussion_r94893846 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/IntGen.java --- @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.mock; + +import java.util.Random; + +import org.apache.drill.exec.vector.IntVector; +import org.apache.drill.exec.vector.ValueVector; + +public class IntGen implements FieldGen { + + Random rand = new Random( ); + + @Override + public void setup(ColumnDef colDef) { } + + public int value( ) { --- End diff -- shouldn't be private for all the types ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #708: DRILL-5152: Enhance the mock data source: better da...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/708#discussion_r95051723 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ExtendedMockRecordReader.java --- @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.mock; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.drill.common.exceptions.ExecutionSetupException; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.ops.OperatorContext; +import org.apache.drill.exec.physical.impl.OutputMutator; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.store.AbstractRecordReader; +import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockColumn; +import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockScanEntry; +import org.apache.drill.exec.vector.AllocationHelper; +import org.apache.drill.exec.vector.ValueVector; + +public class ExtendedMockRecordReader extends AbstractRecordReader { + + private ValueVector[] valueVectors; + private int batchRecordCount; + private int recordsRead; + + private final MockScanEntry config; + private final FragmentContext context; + private final ColumnDef fields[]; + + public ExtendedMockRecordReader(FragmentContext context, MockScanEntry config) { +this.context = context; +this.config = config; + +fields = buildColumnDefs( ); + } + + private ColumnDef[] buildColumnDefs() { +List defs = new ArrayList<>( ); + +// Look for duplicate names. Bad things happen when the sama name +// appears twice. + +Set names = new HashSet<>(); +MockColumn cols[] = config.getTypes(); +for ( int i = 0; i < cols.length; i++ ) { + MockColumn col = cols[i]; + if (names.contains(col.name)) { +throw new IllegalArgumentException("Duplicate column name: " + col.name); + } + names.add(col.name); + int repeat = Math.min( 1, col.getRepeatCount( ) ); + if ( repeat == 1 ) { +defs.add( new ColumnDef(col) ); + } else { +for ( int j = 0; j < repeat; j++ ) { + defs.add( new ColumnDef(col, j+1) ); +} + } +} +ColumnDef[] defArray = new ColumnDef[defs.size()]; +defs.toArray(defArray); +return defArray; + } + + private int getEstimatedRecordSize(MockColumn[] types) { +int size = 0; +for (int i = 0; i < fields.length; i++) { + size += TypeHelper.getSize(fields[i].getConfig().getMajorType()); +} +return size; + } + + private MaterializedField getVector(String name, MajorType type, int length) { --- End diff -- `length` parameter is not used anywhere ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #708: DRILL-5152: Enhance the mock data source: better da...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/708#discussion_r95051644 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ExtendedMockRecordReader.java --- @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.mock; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.drill.common.exceptions.ExecutionSetupException; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.ops.OperatorContext; +import org.apache.drill.exec.physical.impl.OutputMutator; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.store.AbstractRecordReader; +import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockColumn; +import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockScanEntry; +import org.apache.drill.exec.vector.AllocationHelper; +import org.apache.drill.exec.vector.ValueVector; + +public class ExtendedMockRecordReader extends AbstractRecordReader { + + private ValueVector[] valueVectors; + private int batchRecordCount; + private int recordsRead; + + private final MockScanEntry config; + private final FragmentContext context; + private final ColumnDef fields[]; + + public ExtendedMockRecordReader(FragmentContext context, MockScanEntry config) { +this.context = context; +this.config = config; + +fields = buildColumnDefs( ); + } + + private ColumnDef[] buildColumnDefs() { +List defs = new ArrayList<>( ); + +// Look for duplicate names. Bad things happen when the sama name --- End diff -- same --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #708: DRILL-5152: Enhance the mock data source: better da...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/708#discussion_r95047914 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockGroupScanPOP.java --- @@ -104,44 +102,46 @@ public String toString() { } @JsonInclude(Include.NON_NULL) - public static class MockColumn{ + public static class MockColumn { @JsonProperty("type") public MinorType minorType; public String name; public DataMode mode; public Integer width; public Integer precision; public Integer scale; - +public String generator; +public Integer repeat; @JsonCreator -public MockColumn(@JsonProperty("name") String name, @JsonProperty("type") MinorType minorType, @JsonProperty("mode") DataMode mode, @JsonProperty("width") Integer width, @JsonProperty("precision") Integer precision, @JsonProperty("scale") Integer scale) { --- End diff -- Same here - for overloading constructor. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #708: DRILL-5152: Enhance the mock data source: better da...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/708#discussion_r95051613 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ColumnDef.java --- @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.mock; + +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockColumn; + +/** + * Defines a column for the "enhanced" version of the mock data + * source. This class is built from the column definitions in either + * the physical plan or an SQL statement (which gives rise to a + * physical plan.) + */ + +public class ColumnDef { + public MockColumn mockCol; + public String name; + public int width; + public FieldGen generator; + + public ColumnDef( MockColumn mockCol ) { +this.mockCol = mockCol; +name = mockCol.getName(); +width = TypeHelper.getSize(mockCol.getMajorType()); +makeGenerator( ); + } + + /** + * Create the data generator class for this column. The generator is + * created to match the data type by default. Or, the plan can + * specify a generator class (in which case the plan must ensure that + * the generator produces the correct value for the column data type.) + * The generator names a class: either a fully qualified name, or a + * class in this package. + */ + + private void makeGenerator( ) { +String genName = mockCol.getGenerator( ); +if ( genName != null ) { + if ( ! genName.contains(".") ) { +genName = "org.apache.drill.exec.store.mock." + genName; + } + try { +ClassLoader cl = getClass( ).getClassLoader(); +Class genClass = cl.loadClass(genName); +generator = (FieldGen) genClass.newInstance( ); + } catch (ClassNotFoundException | InstantiationException + | IllegalAccessException | ClassCastException e) { +throw new IllegalArgumentException( "Generator " + genName + " is undefined for mock field " + name ); + } + generator.setup( this ); + return; +} + +makeDefaultGenerator( ); + } + + private void makeDefaultGenerator( ) { + +MinorType minorType = mockCol.getMinorType(); +switch ( minorType ) { +case BIGINT: + break; +case BIT: + break; +case DATE: + break; +case DECIMAL18: + break; +case DECIMAL28DENSE: + break; +case DECIMAL28SPARSE: + break; +case DECIMAL38DENSE: + break; +case DECIMAL38SPARSE: + break; +case DECIMAL9: + break; +case FIXED16CHAR: + break; +case FIXEDBINARY: + break; +case FIXEDCHAR: + break; +case FLOAT4: + break; +case FLOAT8: + generator = new DoubleGen( ); + break; +case GENERIC_OBJECT: + break; +case INT: + generator = new IntGen( ); + break; +case INTERVAL: + break; +case INTERVALDAY: + break; +case INTERVALYEAR: + break; +case LATE: + break; +case LIST: + break; +case MAP: + break; +case MONEY: + break; +case NULL: + break; +case SMALLINT: + break; +case TIME: + break; +case TIMESTAMP: + break; +case TIMESTAMPTZ: + break; +case TIMETZ: + break; +case TINYINT: + break; +case UINT1: + break; +case UINT2: + break; +case UINT4: + break; +case UINT8: +
[GitHub] drill pull request #708: DRILL-5152: Enhance the mock data source: better da...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/708#discussion_r94893412 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockSubScanPOP.java --- @@ -40,12 +39,12 @@ private final String url; protected final List readEntries; -// private final OperatorCost cost; -// private final Size size; - private LinkedList[] mappings; + private boolean extended; --- End diff -- final ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #708: DRILL-5152: Enhance the mock data source: better da...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/708#discussion_r95068510 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java --- @@ -346,6 +368,7 @@ public SchemaFactory getSchemaFactory() { public class DrillSchemaFactory implements SchemaFactory { +@SuppressWarnings({ "resource" }) --- End diff -- same here --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #708: DRILL-5152: Enhance the mock data source: better da...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/708#discussion_r95068533 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ColumnDef.java --- @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.mock; + +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockColumn; + +/** + * Defines a column for the "enhanced" version of the mock data + * source. This class is built from the column definitions in either + * the physical plan or an SQL statement (which gives rise to a + * physical plan.) + */ + +public class ColumnDef { + public MockColumn mockCol; + public String name; + public int width; + public FieldGen generator; + + public ColumnDef( MockColumn mockCol ) { --- End diff -- Earlier I though extra spaces were intentional but since you fixed in some places it looks more to be editor problem ? I am still seeing at multiple places, please fix if needed. E.g. `ColumnDef( )`, `makeGenerator( )` and in all the Gen.java files. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #708: DRILL-5152: Enhance the mock data source: better da...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/708#discussion_r95068660 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockSubScanPOP.java --- @@ -56,11 +79,11 @@ public MockSubScanPOP(@JsonProperty("url") String url, @JsonProperty("entries") //this.cost = cost; //this.size = size; this.url = url; +this.extended = extended == null ? false : extended; } - public String getUrl() { -return url; - } + public String getUrl() { return url; } + public boolean isExtended( ) { return extended; } --- End diff -- formatting --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #708: DRILL-5152: Enhance the mock data source: better da...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/708#discussion_r95068500 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java --- @@ -125,9 +125,10 @@ public void init() throws DrillbitStartupException { availablePlugins = findAvailablePlugins(classpathScan); // create registered plugins defined in "storage-plugins.json" -this.plugins.putAll(createPlugins()); +plugins.putAll(createPlugins()); } + @SuppressWarnings({ "resource" }) --- End diff -- {} array notation not required. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r96785539 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java --- @@ -57,20 +166,36 @@ private final int records; private final int batches; private final long ms; +private final QueryState finalState; +private final Exception error; -public QuerySummary(QueryId queryId, int recordCount, int batchCount, long elapsed) { +public QuerySummary(QueryId queryId, int recordCount, int batchCount, long elapsed, QueryState state) { this.queryId = queryId; records = recordCount; batches = batchCount; ms = elapsed; + finalState = state; + error = null; } -public long recordCount( ) { return records; } -public int batchCount( ) { return batches; } -public long runTimeMs( ) { return ms; } -public QueryId queryId( ) { return queryId; } -public String queryIdString( ) { return QueryIdHelper.getQueryId(queryId); } +public QuerySummary(QueryId queryId, int recordCount, int batchCount, long elapsed, Exception ex) { + this.queryId = queryId; + records = recordCount; + batches = batchCount; + ms = elapsed; + finalState = null; + error = ex; +} +public boolean failed() { return error != null; } +public boolean succeeded() { return error == null; } --- End diff -- formatting here ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r96785470 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java --- @@ -49,6 +57,107 @@ public class QueryBuilder { /** + * Listener used to retrieve the query summary (only) asynchronously + * using a {@link QuerySummaryFuture}. + */ + + public class SummaryOnlyQueryEventListener implements UserResultsListener { + +private final QuerySummaryFuture future; +private QueryId queryId; +private int recordCount; +private int batchCount; +private long startTime; + +public SummaryOnlyQueryEventListener(QuerySummaryFuture future) { + this.future = future; + startTime = System.currentTimeMillis(); +} + +@Override +public void queryIdArrived(QueryId queryId) { + this.queryId = queryId; +} + +@Override +public void submissionFailed(UserException ex) { + future.completed(new QuerySummary(queryId, recordCount, batchCount, + System.currentTimeMillis() - startTime, ex)); +} + +@Override +public void dataArrived(QueryDataBatch result, ConnectionThrottle throttle) { + batchCount++; + recordCount += result.getHeader().getRowCount(); + result.release(); +} + +@Override +public void queryCompleted(QueryState state) { + future.completed(new QuerySummary(queryId, recordCount, batchCount, + System.currentTimeMillis() - startTime, state)); +} + } + + /** + * The future used to wait for the completion of an async query. Returns + * just the summary of the query. + */ + + public class QuerySummaryFuture implements Future { + +/** + * Synchronizes the listener thread and the test thread that + * launched the query. + */ + --- End diff -- please remove extra space here and other places too. Like line 120 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r96785415 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java --- @@ -49,6 +57,107 @@ public class QueryBuilder { /** + * Listener used to retrieve the query summary (only) asynchronously + * using a {@link QuerySummaryFuture}. + */ + + public class SummaryOnlyQueryEventListener implements UserResultsListener { + +private final QuerySummaryFuture future; +private QueryId queryId; +private int recordCount; +private int batchCount; +private long startTime; + +public SummaryOnlyQueryEventListener(QuerySummaryFuture future) { + this.future = future; + startTime = System.currentTimeMillis(); +} + +@Override +public void queryIdArrived(QueryId queryId) { + this.queryId = queryId; +} + +@Override +public void submissionFailed(UserException ex) { + future.completed(new QuerySummary(queryId, recordCount, batchCount, + System.currentTimeMillis() - startTime, ex)); --- End diff -- formatting --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #710: DRILL-5126: Provide simplified, unified "cluster fi...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/710#discussion_r96785327 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/LogFixture.java --- @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test; + +import java.util.ArrayList; +import java.util.List; + +import org.slf4j.LoggerFactory; + +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.Logger; +import ch.qos.logback.classic.LoggerContext; +import ch.qos.logback.classic.encoder.PatternLayoutEncoder; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.ConsoleAppender; + +/** + * Establishes test-specific logging without having to alter the global + * logback-test.xml file. Allows directing output to the console + * (if not already configured) and setting the log level on specific loggers + * of interest in the test. The fixture automatically restores the original + * log configuration on exit. + * + * Typical usage: + * {@literal @}Test + * public void myTest() { + * LogFixtureBuilder logBuilder = LogFixture.builder() + * .toConsole() + * .disable() // Silence all other loggers + * .logger(ExternalSortBatch.class, Level.DEBUG); + * try (LogFixture logs = logBuilder.build()) { + * // Test code here + * } + * } + * + * You can – and should – combine the log fixtue with the + * cluster and client fixtures to have complete control over your test-time + * Drill environment. + */ + +public class LogFixture implements AutoCloseable { + + // Elapsed time in ms, log level, thread, logger, message. + + public static final String DEFAULT_CONSOLE_FORMAT = "%r %level [%thread] [%logger] - %msg%n"; + private static final String DRILL_PACKAGE_NAME = "org.apache.drill"; + + /** + * Memento for a logger name and level. + */ + public static class LogSpec { +String loggerName; +Level logLevel; + +public LogSpec(String loggerName, Level level) { + this.loggerName = loggerName; + this.logLevel = level; +} + } + + /** + * Builds the log settings to be used for a test. The log settings here + * add to those specified in a logback.xml or + * logback-test.xml file on your class path. In particular, if + * the logging configuration already redirects the Drill logger to the + * console, setting console logging here does nothing. + */ + + public static class LogFixtureBuilder { + +private String consoleFormat = DEFAULT_CONSOLE_FORMAT; +private boolean logToConsole; +private List loggers = new ArrayList<>(); + +/** + * Send all enabled logging to the console (if not already configured.) Some + * Drill log configuration files send the root to the console (or file), but + * the Drill loggers to Lilith. In that case, Lilith "hides" the console + * logger. Using this call adds a console logger to the Drill logger so that + * output does, in fact, go to the console regardless of the configuration + * in the Logback configuration file. + * + * @return this builder + */ +public LogFixtureBuilder toConsole() { + logToConsole = true; + return this; +} + +/** + * Send logging to the console using the defined format. + * + * @param format valid Logback log format + * @return this builder + */ + +public LogFixtureBuilder toConsole(String format) { + consoleFormat = format; + return toConsole(); +} + +/** + * Set a specific logger to the given level. + * + * @param loggerName name of the logger (typically used for package-level
[GitHub] drill issue #710: DRILL-5126: Provide simplified, unified "cluster fixture" ...
Github user sohami commented on the issue: https://github.com/apache/drill/pull/710 +1 LGTM --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r99256160 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ControlClient.java --- @@ -89,14 +89,42 @@ public MessageLite getResponseDefaultInstance(int rpcType) throws RpcException { } @Override - protected Response handle(ControlConnection connection, int rpcType, ByteBuf pBody, ByteBuf dBody) throws RpcException { -return handler.handle(connection, rpcType, pBody, dBody); + protected void handle(ControlConnection connection, int rpcType, ByteBuf pBody, ByteBuf dBody, +ResponseSender sender) throws RpcException { +connection.getCurrentHandler().handle(connection, rpcType, pBody, dBody, sender); } @Override protected void validateHandshake(BitControlHandshake handshake) throws RpcException { if (handshake.getRpcVersion() != ControlRpcConfig.RPC_VERSION) { - throw new RpcException(String.format("Invalid rpc version. Expected %d, actual %d.", handshake.getRpcVersion(), ControlRpcConfig.RPC_VERSION)); + throw new RpcException(String.format("Invalid rpc version. Expected %d, actual %d.", + handshake.getRpcVersion(), ControlRpcConfig.RPC_VERSION)); +} + +if (handshake.getAuthenticationMechanismsCount() != 0) { // remote requires authentication --- End diff -- I think we should also have a check for the case if Authentication is enabled on this **client** and for some reason **server** is sending empty list of mechanisms list (may be wrong config) then we should throw exception. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r99257662 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/BitConnectionConfigImpl.java --- @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.control; + +import org.apache.drill.common.KerberosUtil; +import org.apache.drill.common.config.DrillConfig; +import org.apache.drill.common.config.DrillProperties; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint; +import org.apache.drill.exec.rpc.BitConnectionConfig; +import org.apache.drill.exec.rpc.security.AuthenticatorProvider; +import org.apache.drill.exec.server.BootStrapContext; +import org.apache.drill.exec.work.batch.ControlMessageHandler; +import org.apache.hadoop.security.HadoopKerberosName; +import org.apache.hadoop.security.UserGroupInformation; + +import java.io.IOException; +import java.util.Map; + +// package private +class BitConnectionConfigImpl implements BitConnectionConfig { +//private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(BitConnectionConfigImpl.class); + + private final BufferAllocator allocator; + private final BootStrapContext context; + private final ControlMessageHandler handler; + + private final AuthenticatorProvider authProvider; + private final String authMechanismToUse; + private final String clusterId; + + private DrillbitEndpoint localEndpoint; + + BitConnectionConfigImpl(BufferAllocator allocator, BootStrapContext context, ControlMessageHandler handler) { +this.allocator = allocator; +this.context = context; +this.handler = handler; + +final DrillConfig config = context.getConfig(); +this.authProvider = config.getBoolean(ExecConstants.BIT_AUTHENTICATION_ENABLED) +? context.getAuthProvider() +: null; +this.authMechanismToUse = config.getBoolean(ExecConstants.BIT_AUTHENTICATION_ENABLED) +? config.getString(ExecConstants.BIT_AUTHENTICATION_MECHANISM) +: null; +this.clusterId = config.getBoolean(ExecConstants.USE_CLUSTER_ID_AS_KERBEROS_INSTANCE_NAME) --- End diff -- Seems like we are missing this check when authentication is enabled. Same for Data implementation ``` if (authProvider.getAllFactoryNames().size() == 0) { throw new DrillbitStartupException("Authentication enabled, but no mechanisms found. Please check " + "authentication configuration."); } ``` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r99257059 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/data/DataClient.java --- @@ -75,27 +85,106 @@ public MessageLite getResponseDefaultInstance(int rpcType) throws RpcException { } @Override - protected Response handle(DataClientConnection connection, int rpcType, ByteBuf pBody, ByteBuf dBody) throws RpcException { + protected void handle(DataClientConnection connection, int rpcType, ByteBuf pBody, ByteBuf dBody, +ResponseSender sender) throws RpcException { throw new UnsupportedOperationException("DataClient is unidirectional by design."); } BufferAllocator getAllocator() { -return allocator; +return config.getAllocator(); } @Override protected void validateHandshake(BitServerHandshake handshake) throws RpcException { if (handshake.getRpcVersion() != DataRpcConfig.RPC_VERSION) { - throw new RpcException(String.format("Invalid rpc version. Expected %d, actual %d.", handshake.getRpcVersion(), DataRpcConfig.RPC_VERSION)); + throw new RpcException(String.format("Invalid rpc version. Expected %d, actual %d.", + handshake.getRpcVersion(), DataRpcConfig.RPC_VERSION)); +} + +if (handshake.getAuthenticationMechanismsCount() != 0) { // remote requires authentication --- End diff -- Same as in ControlClient --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r99261445 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserClient.java --- @@ -88,22 +129,183 @@ public void submitQuery(UserResultsListener resultsListener, RunQuery query) { send(queryResultHandler.getWrappedListener(resultsListener), RpcType.RUN_QUERY, query, QueryId.class); } - public void connect(RpcConnectionHandler handler, DrillbitEndpoint endpoint, - UserProperties props, UserBitShared.UserCredentials credentials) { + public CheckedFuture connect(DrillbitEndpoint endpoint, DrillProperties parameters, + UserCredentials credentials) { +final FutureHandler handler = new FutureHandler(); UserToBitHandshake.Builder hsBuilder = UserToBitHandshake.newBuilder() .setRpcVersion(UserRpcConfig.RPC_VERSION) .setSupportListening(true) .setSupportComplexTypes(supportComplexTypes) .setSupportTimeout(true) .setCredentials(credentials) -.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName)); +.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName)) +.setSaslSupport(SaslSupport.SASL_AUTH) +.setProperties(parameters.serializeForServer()); +this.properties = parameters; + + connectAsClient(queryResultHandler.getWrappedConnectionHandler(handler), +hsBuilder.build(), endpoint.getAddress(), endpoint.getUserPort()); +return handler; + } + + /** + * Check (after {@link #connect connecting}) if server requires authentication. + * + * @return true if server requires authentication + */ + public boolean serverRequiresAuthentication() { +return supportedAuthMechs != null; + } + + /** + * Returns a list of supported authentication mechanism. If called before {@link #connect connecting}, + * returns null. If called after {@link #connect connecting}, returns a list of supported mechanisms + * iff authentication is required. + * + * @return list of supported authentication mechanisms + */ + public List getSupportedAuthenticationMechanisms() { --- End diff -- not used anywhere. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r99265626 --- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp --- @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "saslAuthenticatorImpl.hpp" + +#include "drillClientImpl.hpp" +#include "logger.hpp" + +namespace Drill { + +#define DEFAULT_SERVICE_NAME "drill" + +#define KERBEROS_SIMPLE_NAME "kerberos" +#define KERBEROS_SASL_NAME "gssapi" +#define PLAIN_NAME "plain" + +const std::map SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of +(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME) +(PLAIN_NAME, PLAIN_NAME) +; + +boost::mutex SaslAuthenticatorImpl::s_mutex; +bool SaslAuthenticatorImpl::s_initialized = false; + +SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* const properties) : +m_properties(properties), m_pConnection(NULL), m_secret(NULL) { + +if (!s_initialized) { +boost::lock_guard lock(SaslAuthenticatorImpl::s_mutex); +if (!s_initialized) { +// set plugin path if provided +if (DrillClientConfig::getSaslPluginPath()) { +char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath()); +sasl_set_path(0, saslPluginPath); +} + +sasl_client_init(NULL); --- End diff -- Not sure if earlier comment got deleted, re-writing again. `sasl_client_init` can throw below error which we should check and handle. ``` SASL_OK Success SASL_BADVERS Mechanism version mismatch SASL_BADPARAM Error in config file SASL_NOMEM Not enough memory to complete operation ``` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r99260686 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/plain/PlainServer.java --- @@ -0,0 +1,175 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.security.plain; + +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import javax.security.auth.callback.UnsupportedCallbackException; +import javax.security.sasl.AuthorizeCallback; +import javax.security.sasl.Sasl; +import javax.security.sasl.SaslException; +import javax.security.sasl.SaslServer; +import javax.security.sasl.SaslServerFactory; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.security.Provider; +import java.util.Map; + +/** + * Plain SaslServer implementation. See https://tools.ietf.org/html/rfc4616 + */ +public class PlainServer implements SaslServer { +// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(PlainServer.class); + + private static final String UTF_8_NULL = "\u"; + + public static final String MECHANISM_NAME = "PLAIN"; + + public static class PlainServerFactory implements SaslServerFactory { + +@Override +public SaslServer createSaslServer(final String mechanism, final String protocol, final String serverName, + final Map props, final CallbackHandler cbh) +throws SaslException { + return MECHANISM_NAME.equals(mechanism) + ? props == null +? new PlainServer(cbh) +: ("true".equals(props.get(Sasl.POLICY_NOPLAINTEXT)) ? null : new PlainServer(cbh)) + : null; +} + +@Override +public String[] getMechanismNames(final Map props) { + return props == null || "false".equals(props.get(Sasl.POLICY_NOPLAINTEXT)) --- End diff -- we should change this check as well like above to be consistent. Since right now if `props` is not null and POLICY_NOPLAINTEXT property is absent then we will return empty string. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r99257914 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/data/DataConnectionManager.java --- @@ -21,30 +21,28 @@ import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint; import org.apache.drill.exec.proto.UserBitShared.RpcChannel; import org.apache.drill.exec.rpc.ReconnectingConnection; -import org.apache.drill.exec.server.BootStrapContext; public class DataConnectionManager extends ReconnectingConnection{ +// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(DataConnectionManager.class); --- End diff -- Please remove this. There are few other places too with this commented lines. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r99247679 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ConnectionManagerRegistry.java --- @@ -32,24 +29,19 @@ private final ConcurrentMap registry = Maps.newConcurrentMap(); - private final ControlMessageHandler handler; - private final BootStrapContext context; - private volatile DrillbitEndpoint localEndpoint; - private final BufferAllocator allocator; + private final BitConnectionConfigImpl config; - public ConnectionManagerRegistry(BufferAllocator allocator, ControlMessageHandler handler, BootStrapContext context) { -super(); -this.handler = handler; -this.context = context; -this.allocator = allocator; + public ConnectionManagerRegistry(BitConnectionConfigImpl config) { +this.config = config; } - public ControlConnectionManager getConnectionManager(DrillbitEndpoint endpoint) { -assert localEndpoint != null : "DrillbitEndpoint must be set before a connection manager can be retrieved"; -ControlConnectionManager m = registry.get(endpoint); + public ControlConnectionManager getConnectionManager(DrillbitEndpoint remoteEndpoint) { +assert config.getLocalEndpoint() != null : +"DrillbitEndpoint must be set before a connection manager can be retrieved"; +ControlConnectionManager m = registry.get(remoteEndpoint); if (m == null) { - m = new ControlConnectionManager(allocator, endpoint, localEndpoint, handler, context); - ControlConnectionManager m2 = registry.putIfAbsent(endpoint, m); + m = new ControlConnectionManager(config, remoteEndpoint); + ControlConnectionManager m2 = registry.putIfAbsent(remoteEndpoint, m); --- End diff -- can be final. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r99256832 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ControllerImpl.java --- @@ -39,36 +39,33 @@ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ControllerImpl.class); private volatile ControlServer server; - private final ControlMessageHandler handler; private final BootStrapContext context; private final ConnectionManagerRegistry connectionRegistry; - private final boolean allowPortHunting; private final CustomHandlerRegistry handlerRegistry; + private final BitConnectionConfigImpl config; - public ControllerImpl(BootStrapContext context, ControlMessageHandler handler, BufferAllocator allocator, - boolean allowPortHunting) { -super(); -this.handler = handler; + public ControllerImpl(BootStrapContext context, BufferAllocator allocator, ControlMessageHandler handler) { this.context = context; -this.connectionRegistry = new ConnectionManagerRegistry(allocator, handler, context); -this.allowPortHunting = allowPortHunting; +config = new BitConnectionConfigImpl(allocator, context, handler); +this.connectionRegistry = new ConnectionManagerRegistry(config); this.handlerRegistry = handler.getHandlerRegistry(); } @Override - public DrillbitEndpoint start(DrillbitEndpoint partialEndpoint) throws DrillbitStartupException { -server = new ControlServer(handler, context, connectionRegistry); + public DrillbitEndpoint start(DrillbitEndpoint partialEndpoint, final boolean allowPortHunting) + throws DrillbitStartupException { --- End diff -- nothing inside the constructor throws **`DrillbitStartupException`** --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r99261692 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserClient.java --- @@ -88,22 +129,183 @@ public void submitQuery(UserResultsListener resultsListener, RunQuery query) { send(queryResultHandler.getWrappedListener(resultsListener), RpcType.RUN_QUERY, query, QueryId.class); } - public void connect(RpcConnectionHandler handler, DrillbitEndpoint endpoint, - UserProperties props, UserBitShared.UserCredentials credentials) { + public CheckedFuture connect(DrillbitEndpoint endpoint, DrillProperties parameters, + UserCredentials credentials) { +final FutureHandler handler = new FutureHandler(); UserToBitHandshake.Builder hsBuilder = UserToBitHandshake.newBuilder() .setRpcVersion(UserRpcConfig.RPC_VERSION) .setSupportListening(true) .setSupportComplexTypes(supportComplexTypes) .setSupportTimeout(true) .setCredentials(credentials) -.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName)); +.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName)) +.setSaslSupport(SaslSupport.SASL_AUTH) +.setProperties(parameters.serializeForServer()); +this.properties = parameters; + + connectAsClient(queryResultHandler.getWrappedConnectionHandler(handler), +hsBuilder.build(), endpoint.getAddress(), endpoint.getUserPort()); +return handler; + } + + /** + * Check (after {@link #connect connecting}) if server requires authentication. + * + * @return true if server requires authentication + */ + public boolean serverRequiresAuthentication() { +return supportedAuthMechs != null; + } + + /** + * Returns a list of supported authentication mechanism. If called before {@link #connect connecting}, + * returns null. If called after {@link #connect connecting}, returns a list of supported mechanisms + * iff authentication is required. + * + * @return list of supported authentication mechanisms + */ + public List getSupportedAuthenticationMechanisms() { +return supportedAuthMechs; + } -if (props != null) { - hsBuilder.setProperties(props); + /** + * Authenticate to the server asynchronously. Returns a future that {@link CheckedFuture#checkedGet results} + * in null if authentication succeeds, or throws a {@link SaslException} with relevant message if + * authentication fails. + * + * This method uses properties provided at {@link #connect connection time} and override them with the + * given properties, if any. + * + * @param overrides parameter overrides + * @return result of authentication request + */ + public CheckedFuture authenticate(final DrillProperties overrides) { +if (supportedAuthMechs == null) { + throw new IllegalStateException("Server does not require authentication."); } +properties.merge(overrides); +final Map propertiesMap = properties.stringPropertiesAsMap(); - this.connectAsClient(queryResultHandler.getWrappedConnectionHandler(handler), -hsBuilder.build(), endpoint.getAddress(), endpoint.getUserPort()); +final SettableFuture settableFuture = SettableFuture.create(); // future used in SASL exchange +final CheckedFuture future = +new AbstractCheckedFuture(settableFuture) { + + @Override + protected SaslException mapException(Exception e) { +if (connection != null) { + connection.close(); // to ensure connection is dropped +} +if (e instanceof ExecutionException) { + final Throwable cause = e.getCause(); + if (cause instanceof SaslException) { +return new SaslException("Authentication failed: " + cause.getMessage(), cause); + } +} +return new SaslException("Authentication failed unexpectedly.", e); + } +}; + +final AuthenticatorFactory factory; +try { + factory = getAuthenticatorFactory(); +} catch (final SaslException e) { + settableFuture.setException(e); + return future; +} + +final String mechanismName = factory.getSimpleName(); +logger.trace("Will try to login for {} mechanism.", mechanismName); +final UserGroupInformation ugi; +try { + ugi = factory.createAndLoginUser(propertiesMap
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r99257539 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/data/DataConnectionCreator.java --- @@ -37,40 +37,28 @@ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(DataConnectionCreator.class); private volatile DataServer server; - private final BootStrapContext context; - private final WorkEventBus workBus; - private final WorkerBee bee; - private final boolean allowPortHunting; - private ConcurrentMap connectionManager = Maps.newConcurrentMap(); - private final BufferAllocator dataAllocator; + private final ConcurrentMap connectionManager = Maps.newConcurrentMap(); - public DataConnectionCreator( - BootStrapContext context, - BufferAllocator allocator, - WorkEventBus workBus, - WorkerBee bee, - boolean allowPortHunting) { -super(); -this.context = context; -this.workBus = workBus; -this.bee = bee; -this.allowPortHunting = allowPortHunting; -this.dataAllocator = allocator; + private final BitConnectionConfigImpl config; + + public DataConnectionCreator(BootStrapContext context, BufferAllocator allocator, WorkEventBus workBus, + WorkerBee bee) { +config = new BitConnectionConfigImpl(allocator, context, new DataServerRequestHandler(workBus, bee)); } - public DrillbitEndpoint start(DrillbitEndpoint partialEndpoint) throws DrillbitStartupException { -server = new DataServer(context, dataAllocator, workBus, bee); + public DrillbitEndpoint start(DrillbitEndpoint partialEndpoint, boolean allowPortHunting) + throws DrillbitStartupException { --- End diff -- Same as ControlServer --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r99678863 --- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp --- @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "saslAuthenticatorImpl.hpp" + +#include "drillClientImpl.hpp" +#include "logger.hpp" + +namespace Drill { + +#define DEFAULT_SERVICE_NAME "drill" + +#define KERBEROS_SIMPLE_NAME "kerberos" +#define KERBEROS_SASL_NAME "gssapi" +#define PLAIN_NAME "plain" + +const std::map SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of +(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME) +(PLAIN_NAME, PLAIN_NAME) +; + +boost::mutex SaslAuthenticatorImpl::s_mutex; +bool SaslAuthenticatorImpl::s_initialized = false; + +SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* const properties) : +m_properties(properties), m_pConnection(NULL), m_secret(NULL) { + +if (!s_initialized) { +boost::lock_guard lock(SaslAuthenticatorImpl::s_mutex); +if (!s_initialized) { +// set plugin path if provided +if (DrillClientConfig::getSaslPluginPath()) { +char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath()); +sasl_set_path(0, saslPluginPath); +} + +sasl_client_init(NULL); +{ // for debugging purposes +const char **mechanisms = sasl_global_listmech(); +int i = 0; +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "SASL mechanisms available on client: " << std::endl;) +while (mechanisms[i] != NULL) { +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << i << " : " << mechanisms[i] << std::endl;) +i++; +} +} +s_initialized = true; +} +} +} + +SaslAuthenticatorImpl::~SaslAuthenticatorImpl() { +if (m_secret) { +free(m_secret); +} +// may be used to negotiated security layers before disposing in the future +if (m_pConnection) { +sasl_dispose(&m_pConnection); +} +m_pConnection = NULL; +} + +typedef int (*sasl_callback_proc_t)(void); // see sasl_callback_ft + +int SaslAuthenticatorImpl::userNameCallback(void *context, int id, const char **result, unsigned *len) { +const std::string* const username = static_cast(context); + +if ((SASL_CB_USER == id || SASL_CB_AUTHNAME == id) +&& username != NULL) { +*result = username->c_str(); +// *len = (unsigned int) username->length(); +} +return SASL_OK; +} + +int SaslAuthenticatorImpl::passwordCallback(sasl_conn_t *conn, void *context, int id, sasl_secret_t **psecret) { +const SaslAuthenticatorImpl* const authenticator = static_cast(context); + +if (SASL_CB_PASS == id) { +const std::string password = authenticator->m_password; +const size_t length = password.length(); +authenticator->m_secret->len = length; +std::memcpy(authenticator->m_secret->data, password.c_str(), length); +*psecret = authenticator->m_secret; +} +return SASL_OK; +} + +int SaslAuthenticatorImpl::init(const std::vector& mechanisms, exec::shared::SaslMessage& response) { +// find and set parameters +std::string authMechanismToUse; +std::string serviceName; +std::string serviceHost; +for (size_t i = 0; i < m_properties->size(); i++) { +const std::string key = m_proper
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r100907447 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/data/BitConnectionConfig.java --- @@ -0,0 +1,106 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.data; --- End diff -- Please move this file under _org.apache.drill.exec.rpc_ package. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill issue #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on the issue: https://github.com/apache/drill/pull/578 Just one last comment regarding moving BitConnectionConfig file. Apart from that LGTM. +1 from my side. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r101175920 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/client/DrillClient.java --- @@ -435,9 +427,14 @@ public synchronized boolean reconnect() { } private void connect(DrillbitEndpoint endpoint) throws RpcException { -final FutureHandler f = new FutureHandler(); -client.connect(f, endpoint, props, getUserCredentials()); -f.checkedGet(); +client.connect(endpoint, properties, getUserCredentials()).checkedGet(); +if (client.serverRequiresAuthentication()) { + try { +client.authenticate(null).checkedGet(); + } catch (SaslException e) { +throw new RpcException(e); + } --- End diff -- Originally if client fail's to connect to server because of Authentication failure, then it doesn't retry since `InvalidConnectionInfoException` is thrown and handled separately. But If authentication using SASL fails then we are always throwing `RpcException` and we will end up doing retry. We should have a check for exception thrown by SASL authentication failure and doesn't do retry in that scenario as well. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/752#discussion_r102298318 --- Diff: exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/ExpressionInterpreterTest.java --- @@ -124,7 +125,7 @@ public void interpreterDateTest() throws Exception { final BitControl.PlanFragment planFragment = BitControl.PlanFragment.getDefaultInstance(); final QueryContextInformation queryContextInfo = planFragment.getContext(); final inttimeZoneIndex = queryContextInfo.getTimeZone(); -final org.joda.time.DateTimeZone timeZone = org.joda.time.DateTimeZone.forID(org.apache.drill.exec.expr.fn.impl.DateUtility.getTimeZone(timeZoneIndex)); +final DateTimeZone timeZone = DateTimeZone.forID(org.apache.drill.exec.expr.fn.impl.DateUtility.getTimeZone(timeZoneIndex)); --- End diff -- Please remove extra space after `=` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/752#discussion_r102360734 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockGroupScanPOP.java --- @@ -75,20 +76,50 @@ */ private boolean extended; + private ScanStats scanStats = ScanStats.TRIVIAL_TABLE; @JsonCreator public MockGroupScanPOP(@JsonProperty("url") String url, - @JsonProperty("extended") Boolean extended, @JsonProperty("entries") List readEntries) { super((String) null); this.readEntries = readEntries; this.url = url; -this.extended = extended == null ? false : extended; + +// Compute decent row-count stats for this mock data source so that +// the planner is "fooled" into thinking that this operator wil do +// disk I/O. + +int rowCount = 0; +int rowWidth = 0; +for (MockScanEntry entry : readEntries) { + rowCount += entry.getRecords(); + int width = 0; + if (entry.getTypes() == null) { +width = 50; + } else { +for (MockColumn col : entry.getTypes()) { + int colWidth = 0; + if (col.getWidthValue() == 0) { +colWidth = TypeHelper.getSize(col.getMajorType()); + } else { +colWidth = col.getWidthValue(); + } + colWidth *= col.getRepeatCount(); + width += colWidth; +} + } + rowWidth = Math.max(rowWidth, width); --- End diff -- `rowWidth` seems to be `maxRowWidth` and `width` is `rowWidth`. Can we please rename these ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/752#discussion_r102336171 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/package-info.java --- @@ -60,14 +62,26 @@ * The mode is one of the supported Drill * {@link DataMode} names: usually OPTIONAL or REQUIRED. * + * + * Recent extensions include: + * + * repeat in either the "entry" or "record" elements allow --- End diff -- I just found repeat definition in `MockColumn` but not in `MockScanEntry` whereas here in comment and example `example-mock.json` we are showing repeat property at entry level. Is this work in progress ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/752#discussion_r102362622 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java --- @@ -109,7 +121,37 @@ public MockSchema(MockStorageEngine engine) { @Override public Table getTable(String name) { - Pattern p = Pattern.compile("(\\w+)_(\\d+)(k|m)?", Pattern.CASE_INSENSITIVE); + if (name.toLowerCase().endsWith(".json") ) { +return getConfigFile(name); + } else { +return getDirectTable(name); + } +} + +private Table getConfigFile(String name) { + final URL url = Resources.getResource(name); + if (url == null) { +throw new IllegalArgumentException( +"Unable to find mock table config file " + name); + } + MockTableDef mockTableDefn; + try { +String json = Resources.toString(url, Charsets.UTF_8); +final ObjectMapper mapper = new ObjectMapper(); +mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true); +mockTableDefn = mapper.readValue(json, MockTableDef.class); + } catch (JsonParseException e) { +throw new IllegalArgumentException( "Unable to parse mock table definition file: " + name, e ); + } catch (JsonMappingException e) { +throw new IllegalArgumentException( "Unable to Jackson deserialize mock table definition file: " + name, e ); + } catch (IOException e) { +throw new IllegalArgumentException( "Unable to read mock table definition file: " + name, e ); + } + return new DynamicDrillTable(engine, this.name, mockTableDefn.getEntries() ); --- End diff -- Please remove extra space before last` )`. there are few other places too like line 169 in this file, line 199 (MockTableDef.java), etc. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/752#discussion_r102320948 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java --- @@ -89,14 +85,30 @@ public boolean supportsRead() { return true; } -// public static class ImplicitTable extends DynamicDrillTable { -// -//public ImplicitTable(StoragePlugin plugin, String storageEngineName, -//Object selection) { -// super(plugin, storageEngineName, selection); -//} -// -// } + /** + * Resolves table names within the mock data source. Tables can be of two forms: + * + * _ + * + * Where the "name" can be anything, "n" is the number of rows, and "unit" is + * the units for the row count: non, K (thousand) or M (million). + * + * The above form generates a table directly with no other information needed. + * Column names must be provided, and must be of the form: + * + * _ + * + * Where the name can be anything, the type must be i (integer), d (double) + * or s (string, AKA VarChar). The length is needed only for string fields. --- End diff -- how about boolean (b) as a type ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/752#discussion_r102294277 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/BooleanGen.java --- @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.mock; + +import java.util.Random; + +import org.apache.drill.exec.vector.BitVector; +import org.apache.drill.exec.vector.ValueVector; + +public class BooleanGen implements FieldGen { + + Random rand = new Random( ); + + @Override + public void setup(ColumnDef colDef) { } + + public int value( ) { --- End diff -- Extra space between `()` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r102585253 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ClusterFixture.java --- @@ -295,8 +396,96 @@ public void close() throws Exception { if (ex != null) { throw ex; } + +// Delete any local files, if we wrote to the local +// persistent store. But, leave the files if the user wants +// to review them, for debugging, say. Note that, even if the +// files are preserved here, they will be removed when the +// next cluster fixture starts, else the CTTAS initialization +// will fail. + +if (! preserveLocalFiles) { +try { + removeLocalFiles(); +} catch (Exception e) { + ex = ex == null ? e : ex; +} +} + +// Remove temporary directories created for this cluster session. + +try { + removeTempDirs(); +} catch (Exception e) { + ex = ex == null ? e : ex; +} } + /** + * Removes files stored locally in the "local store provider." + * Required because CTTAS setup fails if these files are left from one + * run to the next. + * + * @throws IOException if a directory cannot be deleted + */ + + private void removeLocalFiles() throws IOException { + +// Don't delete if this is not a local Drillbit. + +if (! isLocal) { + return; +} + +// Don't delete if we did not write. + +if (! config.getBoolean(ExecConstants.SYS_STORE_PROVIDER_LOCAL_ENABLE_WRITE)) { + return; +} + +// Remove the local files if they exist. + +String localStoreLocation = config.getString(ExecConstants.SYS_STORE_PROVIDER_LOCAL_PATH); +File storeDir = new File(localStoreLocation); +if (! storeDir.exists()) { + return; +} +if (storeDir.exists()) { --- End diff -- Check not needed since we are already checking above. Also we can directly use the `removeDir` method below. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r102607033 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ProfileParser.java --- @@ -138,9 +414,208 @@ public long getMetric(int id) { } } - public Map getOpInfo( ) { + /** + * Information about an operator definition: the plan-time information + * that appears in the plan portion of the profile. Also holds the + * "actuals" from the minor fragment portion of the profile. + * Allows integrating the "planned" vs. "actual" performance of the + * query. + */ + + public static class OpDefInfo { +public String opName; +public boolean isInferred; +public int majorId; +public int stepId; +public String args; +public List columns; +public int globalLevel; +public int localLevel; +public int id; +public int branchId; +public boolean isBranchRoot; +public double estMemoryCost; +public double estNetCost; +public double estIOCost; +public double estCpuCost; +public double estRowCost; +public double estRows; +public String name; +public long actualMemory; +public int actualBatches; +public long actualRows; +public OpDefInfo inferredParent; +public List opExecs = new ArrayList<>( ); +public List children = new ArrayList<>( ); + +// 00-00Screen : rowType = RecordType(VARCHAR(10) Year, VARCHAR(65536) Month, VARCHAR(100) Devices, VARCHAR(100) Tier, VARCHAR(100) LOB, CHAR(10) Gateway, BIGINT Day, BIGINT Hour, INTEGER Week, VARCHAR(100) Week_end_date, BIGINT Usage_Cnt): \ +// rowcount = 100.0, cumulative cost = {7.42124276972414E9 rows, 7.663067406383167E10 cpu, 0.0 io, 2.24645048816E10 network, 2.692766612982188E8 memory}, id = 129302 +// +// 00-01 Project(Year=[$0], Month=[$1], Devices=[$2], Tier=[$3], LOB=[$4], Gateway=[$5], Day=[$6], Hour=[$7], Week=[$8], Week_end_date=[$9], Usage_Cnt=[$10]) : +// rowType = RecordType(VARCHAR(10) Year, VARCHAR(65536) Month, VARCHAR(100) Devices, VARCHAR(100) Tier, VARCHAR(100) LOB, CHAR(10) Gateway, BIGINT Day, BIGINT Hour, INTEGER Week, VARCHAR(100) Week_end_date, BIGINT Usage_Cnt): rowcount = 100.0, cumulative cost = {7.42124275972414E9 rows, 7.663067405383167E10 cpu, 0.0 io, 2.24645048816E10 network, 2.692766612982188E8 memory}, id = 129301 + +public OpDefInfo(String plan) { + Pattern p = Pattern.compile( "^(\\d+)-(\\d+)(\\s+)(\\w+)(?:\\((.*)\\))?\\s*:\\s*(.*)$" ); + Matcher m = p.matcher(plan); + if (!m.matches()) { +throw new IllegalStateException( "Could not parse plan: " + plan ); + } + majorId = Integer.parseInt(m.group(1)); + stepId = Integer.parseInt(m.group(2)); + name = m.group(4); + args = m.group(5); + String tail = m.group(6); + String indent = m.group(3); + globalLevel = (indent.length() - 4) / 2; + + p = Pattern.compile("rowType = RecordType\\((.*)\\): (rowcount .*)"); + m = p.matcher(tail); --- End diff -- Looks like this regex will be `rowType = RecordType\((.*)\):(\s*)\\?(\s*)(rowcount .*)` with extra `(\s*)\\?(\s*)` in between. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r102610181 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ProfileParser.java --- @@ -42,44 +48,207 @@ public class ProfileParser { JsonObject profile; + String query; List plans; + List operations; + Map fragments = new HashMap<>(); + private List topoOrder; public ProfileParser( File file ) throws IOException { try (FileReader fileReader = new FileReader(file); JsonReader reader = Json.createReader(fileReader)) { profile = (JsonObject) reader.read(); } + +parse(); + } + + private void parse() { +parseQuery(); +parsePlans(); +buildFrags(); +parseFragProfiles(); +mapOpProfiles(); +aggregateOpers(); +buildTree(); + } + + private void parseQuery() { +query = profile.getString("query"); +query = query.replace("//n", "\n"); + } + + /** + * Parse a text version of the plan as it appears in the JSON + * query profile. + */ + + private static class PlanParser { + +List plans = new ArrayList<>(); +List operations = new ArrayList<>(); +List sorted = new ArrayList<>(); + +public void parsePlans(String plan) { + plans = new ArrayList<>( ); + String parts[] = plan.split("\n"); + for (String part : parts) { +plans.add(part); +OpDefInfo opDef = new OpDefInfo( part ); +operations.add(opDef); + } + sortList(); +} + +public void sortList() { --- End diff -- can be private. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r102603020 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ProfileParser.java --- @@ -42,44 +48,207 @@ public class ProfileParser { JsonObject profile; + String query; List plans; + List operations; + Map fragments = new HashMap<>(); + private List topoOrder; public ProfileParser( File file ) throws IOException { try (FileReader fileReader = new FileReader(file); JsonReader reader = Json.createReader(fileReader)) { profile = (JsonObject) reader.read(); } + +parse(); + } + + private void parse() { +parseQuery(); +parsePlans(); +buildFrags(); +parseFragProfiles(); +mapOpProfiles(); +aggregateOpers(); +buildTree(); + } + + private void parseQuery() { +query = profile.getString("query"); +query = query.replace("//n", "\n"); + } + + /** + * Parse a text version of the plan as it appears in the JSON + * query profile. + */ + + private static class PlanParser { + +List plans = new ArrayList<>(); +List operations = new ArrayList<>(); +List sorted = new ArrayList<>(); + +public void parsePlans(String plan) { + plans = new ArrayList<>( ); + String parts[] = plan.split("\n"); + for (String part : parts) { +plans.add(part); +OpDefInfo opDef = new OpDefInfo( part ); +operations.add(opDef); + } + sortList(); +} + +public void sortList() { + List raw = new ArrayList<>( ); + raw.addAll( operations ); + Collections.sort( raw, new Comparator() { +@Override +public int compare(OpDefInfo o1, OpDefInfo o2) { + int result = Integer.compare(o1.majorId, o2.majorId); + if ( result == 0 ) { +result = Integer.compare(o1.stepId, o2.stepId); + } + return result; +} + }); + int currentFrag = 0; + int currentStep = 0; + for ( OpDefInfo opDef : raw ) { +if ( currentFrag < opDef.majorId ) { + currentFrag++; + OpDefInfo sender = new OpDefInfo( currentFrag, 0 ); + sender.isInferred = true; + sender.name = "Sender"; + sorted.add(sender); + currentStep = 1; + opDef.inferredParent = sender; + sender.children.add( opDef ); +} +if ( opDef.stepId > currentStep ) { + OpDefInfo unknown = new OpDefInfo( currentFrag, currentStep ); + unknown.isInferred = true; + unknown.name = "Unknown"; + sorted.add(unknown); + opDef.inferredParent = unknown; + unknown.children.add( opDef ); +} +sorted.add( opDef ); +currentStep = opDef.stepId + 1; + } +} + } + + /** + * Parse the plan portion of the query profile. + */ + + private void parsePlans() { +PlanParser parser = new PlanParser(); +String plan = getPlan( ); +parser.parsePlans(plan); +plans = parser.plans; +topoOrder = parser.operations; +operations = parser.sorted; + } --- End diff -- wrong assignment ?` topoOrder = parser.sorted` and `operations = parser.operations` ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r102589695 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ClusterFixture.java --- @@ -99,33 +111,83 @@ // behavior. Production default is DEFAULT_SCAN_THREADS put(ExecConstants.SCAN_THREADPOOL_SIZE, 4); + + // Define a useful root location for the ZK persistent + // storage. Profiles will go here when running in distributed + // mode. + + put(ZookeeperPersistentStoreProvider.DRILL_EXEC_SYS_STORE_PROVIDER_ZK_BLOBROOT, "/tmp/drill/log"); } }; public static final String DEFAULT_BIT_NAME = "drillbit"; private DrillConfig config; - private Map bits = new HashMap<>(); + private Map bits = new HashMap<>(); private Drillbit defaultDrillbit; private BufferAllocator allocator; private boolean ownsZK; private ZookeeperHelper zkHelper; private RemoteServiceSet serviceSet; - private String dfsTestTmpSchemaLocation; + private File dfsTestTempDir; protected List clients = new ArrayList<>(); + private boolean usesZk; + private boolean preserveLocalFiles; + private boolean isLocal; + + /** + * Temporary directories created for this test cluster. + * Each is removed when closing the cluster. + */ + + private List tempDirs = new ArrayList<>(); + + ClusterFixture(FixtureBuilder builder) { + +String zkConnect = configureZk(builder); +try { + createConfig(builder, zkConnect); --- End diff -- In createConfig we are calling `getServiceSetWithFullCache(config, allocator)`. But we are creating `allocator` later on. We should assign allocator before calling above method. But allocator creation also takes in config so probably we have to do inside `createConfig` method. On a side note I don't see `getServiceSetWithFullCache(config, allocator)` using allocator or config anyway. Not sure why we are passing in first place. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r102584427 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ClusterFixture.java --- @@ -295,8 +396,96 @@ public void close() throws Exception { if (ex != null) { throw ex; } + +// Delete any local files, if we wrote to the local +// persistent store. But, leave the files if the user wants +// to review them, for debugging, say. Note that, even if the +// files are preserved here, they will be removed when the +// next cluster fixture starts, else the CTTAS initialization +// will fail. + +if (! preserveLocalFiles) { +try { + removeLocalFiles(); +} catch (Exception e) { + ex = ex == null ? e : ex; +} +} + +// Remove temporary directories created for this cluster session. + +try { + removeTempDirs(); +} catch (Exception e) { + ex = ex == null ? e : ex; +} } + /** + * Removes files stored locally in the "local store provider." + * Required because CTTAS setup fails if these files are left from one + * run to the next. + * + * @throws IOException if a directory cannot be deleted + */ + + private void removeLocalFiles() throws IOException { + +// Don't delete if this is not a local Drillbit. + +if (! isLocal) { + return; +} + +// Don't delete if we did not write. + +if (! config.getBoolean(ExecConstants.SYS_STORE_PROVIDER_LOCAL_ENABLE_WRITE)) { + return; --- End diff -- If the previous test wants to preserve the file then we will not delete upon cluster shutdown. But then if next test starts and doesn't want to save profiles this option will be `false` by default and on cluster startup we will still not delete the old files ? which is the intention in `startDrillbits()` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r102613259 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ProfileParser.java --- @@ -138,9 +414,208 @@ public long getMetric(int id) { } } - public Map getOpInfo( ) { + /** + * Information about an operator definition: the plan-time information + * that appears in the plan portion of the profile. Also holds the + * "actuals" from the minor fragment portion of the profile. + * Allows integrating the "planned" vs. "actual" performance of the + * query. + */ + + public static class OpDefInfo { +public String opName; +public boolean isInferred; +public int majorId; +public int stepId; +public String args; +public List columns; +public int globalLevel; +public int localLevel; +public int id; +public int branchId; +public boolean isBranchRoot; +public double estMemoryCost; +public double estNetCost; +public double estIOCost; +public double estCpuCost; +public double estRowCost; +public double estRows; +public String name; +public long actualMemory; +public int actualBatches; +public long actualRows; +public OpDefInfo inferredParent; +public List opExecs = new ArrayList<>( ); +public List children = new ArrayList<>( ); + +// 00-00Screen : rowType = RecordType(VARCHAR(10) Year, VARCHAR(65536) Month, VARCHAR(100) Devices, VARCHAR(100) Tier, VARCHAR(100) LOB, CHAR(10) Gateway, BIGINT Day, BIGINT Hour, INTEGER Week, VARCHAR(100) Week_end_date, BIGINT Usage_Cnt): \ +// rowcount = 100.0, cumulative cost = {7.42124276972414E9 rows, 7.663067406383167E10 cpu, 0.0 io, 2.24645048816E10 network, 2.692766612982188E8 memory}, id = 129302 +// +// 00-01 Project(Year=[$0], Month=[$1], Devices=[$2], Tier=[$3], LOB=[$4], Gateway=[$5], Day=[$6], Hour=[$7], Week=[$8], Week_end_date=[$9], Usage_Cnt=[$10]) : +// rowType = RecordType(VARCHAR(10) Year, VARCHAR(65536) Month, VARCHAR(100) Devices, VARCHAR(100) Tier, VARCHAR(100) LOB, CHAR(10) Gateway, BIGINT Day, BIGINT Hour, INTEGER Week, VARCHAR(100) Week_end_date, BIGINT Usage_Cnt): rowcount = 100.0, cumulative cost = {7.42124275972414E9 rows, 7.663067405383167E10 cpu, 0.0 io, 2.24645048816E10 network, 2.692766612982188E8 memory}, id = 129301 + +public OpDefInfo(String plan) { + Pattern p = Pattern.compile( "^(\\d+)-(\\d+)(\\s+)(\\w+)(?:\\((.*)\\))?\\s*:\\s*(.*)$" ); + Matcher m = p.matcher(plan); + if (!m.matches()) { +throw new IllegalStateException( "Could not parse plan: " + plan ); + } + majorId = Integer.parseInt(m.group(1)); + stepId = Integer.parseInt(m.group(2)); + name = m.group(4); + args = m.group(5); + String tail = m.group(6); + String indent = m.group(3); + globalLevel = (indent.length() - 4) / 2; --- End diff -- Group 3 is looking for empty spaces. Can indent.length() be < 4 (like empty string) ? In which case globalLevel will be -ve which will cause issue in `buildTree()`. Looks like globalLevel needs to be atleast 1. Better to have a check here ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r102618884 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ProfileParser.java --- @@ -138,9 +414,208 @@ public long getMetric(int id) { } } - public Map getOpInfo( ) { + /** + * Information about an operator definition: the plan-time information + * that appears in the plan portion of the profile. Also holds the + * "actuals" from the minor fragment portion of the profile. + * Allows integrating the "planned" vs. "actual" performance of the + * query. + */ + + public static class OpDefInfo { +public String opName; +public boolean isInferred; +public int majorId; +public int stepId; +public String args; +public List columns; +public int globalLevel; +public int localLevel; +public int id; +public int branchId; +public boolean isBranchRoot; +public double estMemoryCost; +public double estNetCost; +public double estIOCost; +public double estCpuCost; +public double estRowCost; +public double estRows; +public String name; +public long actualMemory; +public int actualBatches; +public long actualRows; +public OpDefInfo inferredParent; +public List opExecs = new ArrayList<>( ); +public List children = new ArrayList<>( ); + +// 00-00Screen : rowType = RecordType(VARCHAR(10) Year, VARCHAR(65536) Month, VARCHAR(100) Devices, VARCHAR(100) Tier, VARCHAR(100) LOB, CHAR(10) Gateway, BIGINT Day, BIGINT Hour, INTEGER Week, VARCHAR(100) Week_end_date, BIGINT Usage_Cnt): \ +// rowcount = 100.0, cumulative cost = {7.42124276972414E9 rows, 7.663067406383167E10 cpu, 0.0 io, 2.24645048816E10 network, 2.692766612982188E8 memory}, id = 129302 +// +// 00-01 Project(Year=[$0], Month=[$1], Devices=[$2], Tier=[$3], LOB=[$4], Gateway=[$5], Day=[$6], Hour=[$7], Week=[$8], Week_end_date=[$9], Usage_Cnt=[$10]) : +// rowType = RecordType(VARCHAR(10) Year, VARCHAR(65536) Month, VARCHAR(100) Devices, VARCHAR(100) Tier, VARCHAR(100) LOB, CHAR(10) Gateway, BIGINT Day, BIGINT Hour, INTEGER Week, VARCHAR(100) Week_end_date, BIGINT Usage_Cnt): rowcount = 100.0, cumulative cost = {7.42124275972414E9 rows, 7.663067405383167E10 cpu, 0.0 io, 2.24645048816E10 network, 2.692766612982188E8 memory}, id = 129301 + +public OpDefInfo(String plan) { + Pattern p = Pattern.compile( "^(\\d+)-(\\d+)(\\s+)(\\w+)(?:\\((.*)\\))?\\s*:\\s*(.*)$" ); + Matcher m = p.matcher(plan); + if (!m.matches()) { +throw new IllegalStateException( "Could not parse plan: " + plan ); + } + majorId = Integer.parseInt(m.group(1)); + stepId = Integer.parseInt(m.group(2)); + name = m.group(4); + args = m.group(5); + String tail = m.group(6); + String indent = m.group(3); + globalLevel = (indent.length() - 4) / 2; + + p = Pattern.compile("rowType = RecordType\\((.*)\\): (rowcount .*)"); + m = p.matcher(tail); + if ( m.matches() ) { +columns = parseCols(m.group(1)); +tail = m.group(2); + } + + p = Pattern.compile( "rowcount = ([\\d.E]+), cumulative cost = \\{([\\d.E]+) rows, ([\\d.E]+) cpu, ([\\d.E]+) io, ([\\d.E]+) network, ([\\d.E]+) memory\\}, id = (\\d+)"); + m = p.matcher(tail); + if (! m.matches()) { +throw new IllegalStateException("Could not parse costs: " + tail ); + } + estRows = Double.parseDouble(m.group(1)); + estRowCost = Double.parseDouble(m.group(2)); + estCpuCost = Double.parseDouble(m.group(3)); + estIOCost = Double.parseDouble(m.group(4)); + estNetCost = Double.parseDouble(m.group(5)); + estMemoryCost = Double.parseDouble(m.group(6)); + id = Integer.parseInt(m.group(7)); +} + +public void printTree(String indent) { + new TreePrinter().visit(this); +} + +public OpDefInfo(int major, int id) { + majorId = major; + stepId = id; +} + +@Override +public String toString() { + String head = "[OpDefInfo " + majorId + "-" + stepId + ": " + name; + if ( isInferred ) { +head += " (" + opName + ")"; + } + return head + "]"; +} + } + + /** + * Visit a tree of operator definitions to support printing, + * analysis and other tasks. + */ + + public static class TreeV
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r102612813 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ProfileParser.java --- @@ -42,44 +48,207 @@ public class ProfileParser { JsonObject profile; + String query; List plans; + List operations; + Map fragments = new HashMap<>(); + private List topoOrder; public ProfileParser( File file ) throws IOException { try (FileReader fileReader = new FileReader(file); JsonReader reader = Json.createReader(fileReader)) { profile = (JsonObject) reader.read(); } + +parse(); + } + + private void parse() { +parseQuery(); +parsePlans(); +buildFrags(); +parseFragProfiles(); +mapOpProfiles(); +aggregateOpers(); +buildTree(); + } + + private void parseQuery() { +query = profile.getString("query"); +query = query.replace("//n", "\n"); + } + + /** + * Parse a text version of the plan as it appears in the JSON + * query profile. + */ + + private static class PlanParser { + +List plans = new ArrayList<>(); +List operations = new ArrayList<>(); +List sorted = new ArrayList<>(); + +public void parsePlans(String plan) { + plans = new ArrayList<>( ); + String parts[] = plan.split("\n"); + for (String part : parts) { +plans.add(part); +OpDefInfo opDef = new OpDefInfo( part ); +operations.add(opDef); + } + sortList(); +} + +public void sortList() { + List raw = new ArrayList<>( ); + raw.addAll( operations ); + Collections.sort( raw, new Comparator() { +@Override +public int compare(OpDefInfo o1, OpDefInfo o2) { + int result = Integer.compare(o1.majorId, o2.majorId); + if ( result == 0 ) { +result = Integer.compare(o1.stepId, o2.stepId); + } + return result; +} + }); + int currentFrag = 0; + int currentStep = 0; + for ( OpDefInfo opDef : raw ) { +if ( currentFrag < opDef.majorId ) { + currentFrag++; + OpDefInfo sender = new OpDefInfo( currentFrag, 0 ); + sender.isInferred = true; + sender.name = "Sender"; + sorted.add(sender); + currentStep = 1; + opDef.inferredParent = sender; + sender.children.add( opDef ); +} +if ( opDef.stepId > currentStep ) { + OpDefInfo unknown = new OpDefInfo( currentFrag, currentStep ); + unknown.isInferred = true; + unknown.name = "Unknown"; + sorted.add(unknown); + opDef.inferredParent = unknown; + unknown.children.add( opDef ); +} +sorted.add( opDef ); +currentStep = opDef.stepId + 1; + } +} + } + + /** + * Parse the plan portion of the query profile. + */ + + private void parsePlans() { +PlanParser parser = new PlanParser(); +String plan = getPlan( ); +parser.parsePlans(plan); +plans = parser.plans; +topoOrder = parser.operations; +operations = parser.sorted; + } + + private void buildFrags() { +for (OpDefInfo opDef : operations) { + FragInfo major = fragments.get(opDef.majorId); + if (major == null) { +major = new FragInfo(opDef.majorId); +fragments.put(opDef.majorId, major); + } + major.ops.add(opDef); +} + } + + private static List parseCols(String cols) { +String parts[] = cols.split( ", " ); +List fields = new ArrayList<>( ); +for ( String part : parts ) { + String halves[] = part.split( " " ); + fields.add( new FieldDef( halves[1], halves[0] ) ); +} +return fields; + } + + private void parseFragProfiles() { +JsonArray frags = getFragmentProfile( ); +for (JsonObject fragProfile : frags.getValuesAs(JsonObject.class)) { + int mId = fragProfile.getInt("majorFragmentId"); + FragInfo major = fragments.get(mId); + major.parse(fragProfile); +} + } + + private void mapOpProfiles() { +for (FragInfo major : fragments.values()) { + for (MinorFragInfo minor : major.minors) { +minor.mapOpProfiles(major); + } +} + } + + /** + * A typ
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r102581476 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ClusterFixture.java --- @@ -295,8 +396,96 @@ public void close() throws Exception { if (ex != null) { throw ex; --- End diff -- It looks like we are doing best effort in release or closing all the resources. So this condition should be in the end after removing local files and temp dirs. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r102369640 --- Diff: common/src/test/java/org/apache/drill/test/DrillTest.java --- @@ -69,6 +71,25 @@ @Rule public TestName TEST_NAME = new TestName(); + /** + * Option to cause tests to produce verbose output. Many tests provide + * detailed information to stdout when enabled. To enable: + * + * java ... -Dtest.verbose=true ... + */ + public static final String VERBOSE_OUTPUT = "test.verbose"; + + protected static final boolean verbose = Boolean.parseBoolean(System.getProperty(VERBOSE_OUTPUT)); + + /** + * Output destination for verbose tset output. Rather than using --- End diff -- typo `test` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r102587851 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ClusterFixture.java --- @@ -99,33 +111,83 @@ // behavior. Production default is DEFAULT_SCAN_THREADS put(ExecConstants.SCAN_THREADPOOL_SIZE, 4); + + // Define a useful root location for the ZK persistent + // storage. Profiles will go here when running in distributed + // mode. + + put(ZookeeperPersistentStoreProvider.DRILL_EXEC_SYS_STORE_PROVIDER_ZK_BLOBROOT, "/tmp/drill/log"); } }; public static final String DEFAULT_BIT_NAME = "drillbit"; private DrillConfig config; - private Map bits = new HashMap<>(); + private Map bits = new HashMap<>(); private Drillbit defaultDrillbit; private BufferAllocator allocator; private boolean ownsZK; private ZookeeperHelper zkHelper; private RemoteServiceSet serviceSet; - private String dfsTestTmpSchemaLocation; + private File dfsTestTempDir; protected List clients = new ArrayList<>(); + private boolean usesZk; + private boolean preserveLocalFiles; + private boolean isLocal; + + /** + * Temporary directories created for this test cluster. + * Each is removed when closing the cluster. + */ + + private List tempDirs = new ArrayList<>(); + + ClusterFixture(FixtureBuilder builder) { + +String zkConnect = configureZk(builder); +try { + createConfig(builder, zkConnect); + startDrillbits(builder); + applyOptions(builder); +} catch (Exception e) { + // Translate exceptions to unchecked to avoid cluttering + // tests. Failures will simply fail the test itself. + + throw new IllegalStateException( "Cluster fixture setup failed", e ); +} + +// Some operations need an allocator. - protected ClusterFixture(FixtureBuilder builder) throws Exception { +allocator = RootAllocatorFactory.newRoot(config); + } + + private String configureZk(FixtureBuilder builder) { // Start ZK if requested. +String zkConnect = null; if (builder.zkHelper != null) { + // Case where the test itself started ZK and we're only using it. + zkHelper = builder.zkHelper; ownsZK = false; -} else if (builder.zkCount > 0) { - zkHelper = new ZookeeperHelper(true); - zkHelper.startZookeeper(builder.zkCount); +} else if (builder.localZkCount > 0) { + // Case where we need a local ZK just for this test cluster. + + zkHelper = new ZookeeperHelper("dummy"); + zkHelper.startZookeeper(builder.localZkCount); ownsZK = true; } +if (zkHelper != null) { + zkConnect = zkHelper.getConnectionString(); + // Forced to disable this, because currently we leak memory which is a known issue for query cancellations. + // Setting this causes unittests to fail. + builder.configProperty(ExecConstants.RETURN_ERROR_FOR_FAILURE_IN_CANCELLED_FRAGMENTS, true); +} +return zkConnect; --- End diff -- We can set the `ZK_CONNECTION` property inside configProperty here itself. That way we don't have to get the connectString from here and pass to createConfig where we are doing `null` check for `zkConnect` string. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r102600191 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ExampleTest.java --- @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test; + +import static org.junit.Assert.*; + --- End diff -- please remove wildcard import since we are only using `assertEquals` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r103052249 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ProfileParser.java --- @@ -138,9 +414,208 @@ public long getMetric(int id) { } } - public Map getOpInfo( ) { + /** + * Information about an operator definition: the plan-time information + * that appears in the plan portion of the profile. Also holds the + * "actuals" from the minor fragment portion of the profile. + * Allows integrating the "planned" vs. "actual" performance of the + * query. + */ + + public static class OpDefInfo { +public String opName; +public boolean isInferred; +public int majorId; +public int stepId; +public String args; +public List columns; +public int globalLevel; +public int localLevel; +public int id; +public int branchId; +public boolean isBranchRoot; +public double estMemoryCost; +public double estNetCost; +public double estIOCost; +public double estCpuCost; +public double estRowCost; +public double estRows; +public String name; +public long actualMemory; +public int actualBatches; +public long actualRows; +public OpDefInfo inferredParent; +public List opExecs = new ArrayList<>( ); +public List children = new ArrayList<>( ); + +// 00-00Screen : rowType = RecordType(VARCHAR(10) Year, VARCHAR(65536) Month, VARCHAR(100) Devices, VARCHAR(100) Tier, VARCHAR(100) LOB, CHAR(10) Gateway, BIGINT Day, BIGINT Hour, INTEGER Week, VARCHAR(100) Week_end_date, BIGINT Usage_Cnt): \ +// rowcount = 100.0, cumulative cost = {7.42124276972414E9 rows, 7.663067406383167E10 cpu, 0.0 io, 2.24645048816E10 network, 2.692766612982188E8 memory}, id = 129302 +// +// 00-01 Project(Year=[$0], Month=[$1], Devices=[$2], Tier=[$3], LOB=[$4], Gateway=[$5], Day=[$6], Hour=[$7], Week=[$8], Week_end_date=[$9], Usage_Cnt=[$10]) : +// rowType = RecordType(VARCHAR(10) Year, VARCHAR(65536) Month, VARCHAR(100) Devices, VARCHAR(100) Tier, VARCHAR(100) LOB, CHAR(10) Gateway, BIGINT Day, BIGINT Hour, INTEGER Week, VARCHAR(100) Week_end_date, BIGINT Usage_Cnt): rowcount = 100.0, cumulative cost = {7.42124275972414E9 rows, 7.663067405383167E10 cpu, 0.0 io, 2.24645048816E10 network, 2.692766612982188E8 memory}, id = 129301 + +public OpDefInfo(String plan) { + Pattern p = Pattern.compile( "^(\\d+)-(\\d+)(\\s+)(\\w+)(?:\\((.*)\\))?\\s*:\\s*(.*)$" ); + Matcher m = p.matcher(plan); + if (!m.matches()) { +throw new IllegalStateException( "Could not parse plan: " + plan ); + } + majorId = Integer.parseInt(m.group(1)); + stepId = Integer.parseInt(m.group(2)); + name = m.group(4); + args = m.group(5); + String tail = m.group(6); + String indent = m.group(3); + globalLevel = (indent.length() - 4) / 2; + + p = Pattern.compile("rowType = RecordType\\((.*)\\): (rowcount .*)"); + m = p.matcher(tail); --- End diff -- Discussed offline. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill issue #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on the issue: https://github.com/apache/drill/pull/753 Apart from fixing regex in ProfileParser.java, changes looks good to me. +1 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #753: DRILL-5260: Extend "Cluster Fixture" test framework
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/753#discussion_r103052237 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ProfileParser.java --- @@ -138,9 +414,208 @@ public long getMetric(int id) { } } - public Map getOpInfo( ) { + /** + * Information about an operator definition: the plan-time information + * that appears in the plan portion of the profile. Also holds the + * "actuals" from the minor fragment portion of the profile. + * Allows integrating the "planned" vs. "actual" performance of the + * query. + */ + + public static class OpDefInfo { +public String opName; +public boolean isInferred; +public int majorId; +public int stepId; +public String args; +public List columns; +public int globalLevel; +public int localLevel; +public int id; +public int branchId; +public boolean isBranchRoot; +public double estMemoryCost; +public double estNetCost; +public double estIOCost; +public double estCpuCost; +public double estRowCost; +public double estRows; +public String name; +public long actualMemory; +public int actualBatches; +public long actualRows; +public OpDefInfo inferredParent; +public List opExecs = new ArrayList<>( ); +public List children = new ArrayList<>( ); + +// 00-00Screen : rowType = RecordType(VARCHAR(10) Year, VARCHAR(65536) Month, VARCHAR(100) Devices, VARCHAR(100) Tier, VARCHAR(100) LOB, CHAR(10) Gateway, BIGINT Day, BIGINT Hour, INTEGER Week, VARCHAR(100) Week_end_date, BIGINT Usage_Cnt): \ +// rowcount = 100.0, cumulative cost = {7.42124276972414E9 rows, 7.663067406383167E10 cpu, 0.0 io, 2.24645048816E10 network, 2.692766612982188E8 memory}, id = 129302 +// +// 00-01 Project(Year=[$0], Month=[$1], Devices=[$2], Tier=[$3], LOB=[$4], Gateway=[$5], Day=[$6], Hour=[$7], Week=[$8], Week_end_date=[$9], Usage_Cnt=[$10]) : +// rowType = RecordType(VARCHAR(10) Year, VARCHAR(65536) Month, VARCHAR(100) Devices, VARCHAR(100) Tier, VARCHAR(100) LOB, CHAR(10) Gateway, BIGINT Day, BIGINT Hour, INTEGER Week, VARCHAR(100) Week_end_date, BIGINT Usage_Cnt): rowcount = 100.0, cumulative cost = {7.42124275972414E9 rows, 7.663067405383167E10 cpu, 0.0 io, 2.24645048816E10 network, 2.692766612982188E8 memory}, id = 129301 + +public OpDefInfo(String plan) { + Pattern p = Pattern.compile( "^(\\d+)-(\\d+)(\\s+)(\\w+)(?:\\((.*)\\))?\\s*:\\s*(.*)$" ); + Matcher m = p.matcher(plan); + if (!m.matches()) { +throw new IllegalStateException( "Could not parse plan: " + plan ); + } + majorId = Integer.parseInt(m.group(1)); + stepId = Integer.parseInt(m.group(2)); + name = m.group(4); + args = m.group(5); + String tail = m.group(6); + String indent = m.group(3); + globalLevel = (indent.length() - 4) / 2; + + p = Pattern.compile("rowType = RecordType\\((.*)\\): (rowcount .*)"); + m = p.matcher(tail); + if ( m.matches() ) { +columns = parseCols(m.group(1)); +tail = m.group(2); + } + + p = Pattern.compile( "rowcount = ([\\d.E]+), cumulative cost = \\{([\\d.E]+) rows, ([\\d.E]+) cpu, ([\\d.E]+) io, ([\\d.E]+) network, ([\\d.E]+) memory\\}, id = (\\d+)"); + m = p.matcher(tail); + if (! m.matches()) { +throw new IllegalStateException("Could not parse costs: " + tail ); + } + estRows = Double.parseDouble(m.group(1)); + estRowCost = Double.parseDouble(m.group(2)); + estCpuCost = Double.parseDouble(m.group(3)); + estIOCost = Double.parseDouble(m.group(4)); + estNetCost = Double.parseDouble(m.group(5)); + estMemoryCost = Double.parseDouble(m.group(6)); + id = Integer.parseInt(m.group(7)); +} + +public void printTree(String indent) { + new TreePrinter().visit(this); +} + +public OpDefInfo(int major, int id) { + majorId = major; + stepId = id; +} + +@Override +public String toString() { + String head = "[OpDefInfo " + majorId + "-" + stepId + ": " + name; + if ( isInferred ) { +head += " (" + opName + ")"; + } + return head + "]"; +} + } + + /** + * Visit a tree of operator definitions to support printing, + * analysis and other tasks. + */ + + public static class TreeV
[GitHub] drill issue #752: DRILL-5258: Access mock data definition from SQL
Github user sohami commented on the issue: https://github.com/apache/drill/pull/752 Thanks for the change. LGTM. +1 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #768: DRILL-5313: Fix build failure in C++ client
GitHub user sohami opened a pull request: https://github.com/apache/drill/pull/768 DRILL-5313: Fix build failure in C++ client You can merge this pull request into a Git repository by running: $ git pull https://github.com/sohami/drill DRILL-5313 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/drill/pull/768.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #768 commit 40e81345d6406cc1ceb38dd1b036332726a40b1a Author: Sorabh Hamirwasia Date: 2017-03-03T03:42:32Z DRILL-5313: Fix build failure in C++ client --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill issue #768: DRILL-5313: Fix build failure in C++ client
Github user sohami commented on the issue: https://github.com/apache/drill/pull/768 Thanks for the actual fix. I will close this pull request. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #768: DRILL-5313: Fix build failure in C++ client
Github user sohami closed the pull request at: https://github.com/apache/drill/pull/768 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #773: DRILL-4335: Apache Drill should support network enc...
GitHub user sohami opened a pull request: https://github.com/apache/drill/pull/773 DRILL-4335: Apache Drill should support network encryption. DRILL-4335: Apache Drill should support network encryption. This pull request add's network encryption capability between Java Drill Client and Drillbit channel and also between Drillbit to Drillbit channel. It is extending SASL Authentication framework to support encryption. You can merge this pull request into a Git repository by running: $ git pull https://github.com/sohami/drill DRILL-4335-Java-02282017 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/drill/pull/773.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #773 commit 8d34bc669ac06cdf1cab12169cfc604d3b503ce3 Author: Sorabh Hamirwasia Date: 2017-02-02T02:44:21Z DRILL-4335: Encryption Protocol changes commit 5d0c10efe5142fec1aa256b6c61ff09196c6ca0f Author: Sorabh Hamirwasia Date: 2017-02-15T23:14:41Z DRILL-4335: Remove support to allow older clients to connect to a secure cluster with encryption enabled. commit 93e133a4c1998a0718134f75596c702f593a2fe1 Author: Sorabh Hamirwasia Date: 2017-02-27T22:54:22Z DRILL-4335: Adding handlers for chunking/encrypting/decrypting RPC msgs commit 4ca982ed36c6fd95201b80d8e89c7b4202479d35 Author: Sorabh Hamirwasia Date: 2017-02-27T23:04:19Z DRILL-4335: Adding configs and constants commit fc91cf98f1b9814931a8f751249edad695e4ccd7 Author: Sorabh Hamirwasia Date: 2017-02-27T23:21:01Z DRILL-4335: Adding encryption support for client->bit channel commit b3af9eb021095a1f05fbc89d941d6c9530933b40 Author: Sorabh Hamirwasia Date: 2017-02-27T23:24:32Z DRILL-4335: Adding test for client->bit channel encryption commit e66b40765ad2793155b9ccfcff0af17344a0af7a Author: Sorabh Hamirwasia Date: 2017-02-27T23:25:27Z DRILL-4335: Adding encryption support for bit->bit channel commit bad6ccd569762af1146f152f8bb757625bf6bb17 Author: Sorabh Hamirwasia Date: 2017-02-27T23:29:31Z DRILL-4335: UI changes for counters, encryption status commit e983d1a1d3537212b00616df3b46fbd52f8512c3 Author: Sorabh Hamirwasia Date: 2017-02-28T07:27:23Z DRILL-4335: Add test for bit->bit encryption and failure case for old clients commit b0cbbbd9312b67352c9117cd5e15f727faaa2cfb Author: Sorabh Hamirwasia Date: 2017-02-28T07:28:14Z DRILL-4335: Bug fix for missing config exception commit 9d5dc7e364e7a3baf18c58b1fe56399c98f9041b Author: Sorabh Hamirwasia Date: 2017-02-28T08:43:49Z DRILL-4335: Add client connection parameter for encryption. Add new and fix old test for client->bit encryption commit c8e6b8a27aea28a04a73e00c960868b461382ba3 Author: Sorabh Hamirwasia Date: 2017-03-01T23:26:30Z DRILL-4335: Improve logic for connection counter commit 9b7b38f624bd42100316adf4fde7256783bcf183 Author: Sorabh Hamirwasia Date: 2017-03-02T19:24:29Z DRILL-4335: Improving saslClient/saslServer cleanup to handle all the cases --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #772: DRILL-5316: Check drillbitsVector count from zoo_ge...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/772#discussion_r104606833 --- Diff: contrib/native/client/src/clientlib/zookeeperClient.cpp --- @@ -138,6 +138,11 @@ int ZookeeperClient::getAllDrillbits(const std::string& connectStr, std::vector< DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "\t Unshuffled Drillbit id: " << drillbits[i] << std::endl;) } } +else{ --- End diff -- Agreed. Should be handled in caller (i.e. DrillClient). If the returned vector size is zero then we should check that in DrillClient and close client connection with error as `ERR_CONN_ZKNODBIT`. Something like below: `return handleConnError(CONN_INVALID_INPUT, getMessage(ERR_CONN_ZKNODBIT, pathToDrill.c_str()));` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #772: DRILL-5316: Check drillbits size before we attempt ...
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/772#discussion_r105286121 --- Diff: contrib/native/client/src/clientlib/drillClientImpl.cpp --- @@ -2143,6 +2146,9 @@ connectionStatus_t PooledDrillClientImpl::connect(const char* connStr, DrillUser Utils::shuffle(drillbits); // The original shuffled order is maintained if we shuffle first and then add any missing elements Utils::add(m_drillbits, drillbits); +if (m_drillbits.empty()){ +return handleConnError(CONN_FAILURE, getMessage(ERR_CONN_ZKNODBIT)); --- End diff -- Since we are not removing the offline nodes from m_drillbits then I think we should return connection error before shuffle. Let's say on first client connection we get all the active node from zookeeper and store it in m_drillbits. Then all the nodes went dead or offline. In the next connection request, zookeeper will return zero drillbits but since m_drillbits is not empty we will still try to connect and fail later. Instead I think zero drillbits returned from zookeeper is a good indication that we won't be able to connect to any other node already present inside m_drillbits and should fail there itself ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill issue #772: DRILL-5316: Check drillbits size before we attempt to acce...
Github user sohami commented on the issue: https://github.com/apache/drill/pull/772 Thanks @superbstreak for the change. LGTM. +1 from my side. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #788: DRILL-5318: Sub-operator test fixture
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/788#discussion_r108094926 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/ConfigBuilder.java --- @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.drill.test; + +import java.util.Properties; +import java.util.Map.Entry; + +import org.apache.drill.common.config.DrillConfig; + +/** + * Builds a {@link DrillConfig} for use in tests. Use this when a config + * is needed by itself, separate from an embedded Drillbit. + */ +public class ConfigBuilder { + + protected String configResource; + protected Properties configProps; + + /** + * Use the given configuration properties as overrides. + * @param configProps a collection of config properties + * @return this builder + * @see {@link #configProperty(String, Object)} + */ + + public ConfigBuilder configProps(Properties configProps) { +if (hasResource()) { + // Drill provides no constructor for this use case. + throw new IllegalArgumentException( "Cannot provide both a config resource and config properties."); +} +if (this.configProps == null) { + this.configProps = configProps; +} else { + this.configProps.putAll(configProps); +} +return this; + } + + /** + * Use the given configuration file, stored as a resource, to initialize + * the Drill config. Note that the resource file should have the two + * following settings to work as a config for an embedded Drillbit: + * + * drill.exec.sys.store.provider.local.write : false, + * drill.exec.http.enabled : false + * + * It may be more convenient to add your settings to the default + * config settings with {@link #configProperty(String, Object)}. + * @param configResource path to the file that contains the + * config file to be read + * @return this builder + * @see {@link #configProperty(String, Object)} + */ + + public ConfigBuilder resource(String configResource) { + +if (configProps != null) { + // Drill provides no constructor for this use case. + throw new IllegalArgumentException( "Cannot provide both a config resource and config properties."); +} + +// TypeSafe gets unhappy about a leading slash, but other functions +// require it. Silently discard the leading slash if given to +// preserve the test writer's sanity. + +this.configResource = ClusterFixture.trimSlash(configResource); +return this; + } + + /** + * Add an additional boot-time property for the embedded Drillbit. + * @param key config property name + * @param value property value + * @return this builder + */ + + public ConfigBuilder put(String key, Object value) { +if (hasResource()) { + // Drill provides no constructor for this use case. + throw new IllegalArgumentException( "Cannot provide both a config resource and config properties."); +} +if (configProps == null) { + configProps = new Properties(); +} +configProps.put(key, value.toString()); +return this; + } + + public DrillConfig build() { + +// Create a config +// Because of the way DrillConfig works, we can set the ZK +// connection string only if a property set is provided. + +if (hasResource()) { + return DrillConfig.create(configResource); +} else if (configProps != null) { + return DrillConfig.create(configProperties()); +} else { + return DrillConfig.create(); +} + } + + private Properties configProperties() { --- End diff -- This
[GitHub] drill pull request #788: DRILL-5318: Sub-operator test fixture
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/788#discussion_r108095955 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java --- @@ -271,6 +276,91 @@ public QuerySummary run() throws Exception { } /** + * Run the query and return the first result set as a + * {@link DirectRowSet} object that can be inspected directly + * by the code using a {@link RowSetReader}. + * + * An enhancement is to provide a way to read a series of result + * batches as row sets. + * @return a row set that represents the first batch returned from + * the query + * @throws RpcException if anything goes wrong + */ + + public DirectRowSet rowSet() throws RpcException { + +// Ignore all but the first non-empty batch. + +QueryDataBatch dataBatch = null; +for (QueryDataBatch batch : results()) { + if (dataBatch == null && batch.getHeader().getRowCount() != 0) { +dataBatch = batch; --- End diff -- break ? after first non-empty batch is found ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #788: DRILL-5318: Sub-operator test fixture
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/788#discussion_r108563545 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java --- @@ -271,6 +276,91 @@ public QuerySummary run() throws Exception { } /** + * Run the query and return the first result set as a + * {@link DirectRowSet} object that can be inspected directly + * by the code using a {@link RowSetReader}. + * + * An enhancement is to provide a way to read a series of result + * batches as row sets. + * @return a row set that represents the first batch returned from + * the query + * @throws RpcException if anything goes wrong + */ + + public DirectRowSet rowSet() throws RpcException { + +// Ignore all but the first non-empty batch. + +QueryDataBatch dataBatch = null; +for (QueryDataBatch batch : results()) { + if (dataBatch == null && batch.getHeader().getRowCount() != 0) { +dataBatch = batch; + } else { +batch.release(); + } +} + +// No results? + +if (dataBatch == null) { + return null; +} + +// Unload the batch and convert to a row set. + +final RecordBatchLoader loader = new RecordBatchLoader(client.allocator()); +try { + loader.load(dataBatch.getHeader().getDef(), dataBatch.getData()); + dataBatch.release(); + VectorContainer container = loader.getContainer(); + container.setRecordCount(loader.getRecordCount()); + return new DirectRowSet(client.allocator(), container); +} catch (SchemaChangeException e) { + throw new IllegalStateException(e); +} + } + + /** + * Run the query that is expected to return (at least) one row + * with the only (or first) column returning a long value. + * + * @return the value of the first column of the first row + * @throws RpcException if anything goes wrong + */ + + public long singletonLong() throws RpcException { +RowSet rowSet = rowSet(); +if (rowSet == null) { + throw new IllegalStateException("No rows returned"); +} +RowSetReader reader = rowSet.reader(); +reader.next(); +long value = reader.column(0).getLong(); --- End diff -- Can we please refactor the common portion in singletonLong and singletonInt ? Since I guess in future if we support retrieving more types then again it will be repeated? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #785: DRILL-5323: Test tools for row sets
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/785#discussion_r108814142 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/rowSet/DirectRowSet.java --- @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet; + +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; +import org.apache.drill.exec.record.VectorAccessible; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.record.selection.SelectionVector2; +import org.apache.drill.exec.vector.AllocationHelper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.test.rowSet.AbstractRowSetAccessor.AbstractRowIndex; +import org.apache.drill.test.rowSet.AbstractRowSetAccessor.BoundedRowIndex; +import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet; + +public class DirectRowSet extends AbstractSingleRowSet implements ExtendableRowSet { + + private static class DirectRowIndex extends BoundedRowIndex { + +public DirectRowIndex(int rowCount) { + super(rowCount); +} + +@Override +public int index() { return rowIndex; } + +@Override +public int batch() { return 0; } --- End diff -- Can be moved to AbstractRowIndex class to return 0 by default. Override by derived class like HyperRowIndex to have different implementation. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #785: DRILL-5323: Test tools for row sets
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/785#discussion_r108823826 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriterImpl.java --- @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet; + +import java.math.BigDecimal; + +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.accessor.AbstractColumnWriter; +import org.apache.drill.exec.vector.accessor.ColumnAccessorFactory; +import org.apache.drill.exec.vector.accessor.ColumnWriter; +import org.apache.drill.test.rowSet.RowSet.RowSetWriter; +import org.joda.time.Period; + +/** + * Implements a row set writer on top of a {@link RowSet} + * container. + */ + +public class RowSetWriterImpl extends AbstractRowSetAccessor implements RowSetWriter { + + private final AbstractColumnWriter writers[]; + + public RowSetWriterImpl(AbstractSingleRowSet recordSet, AbstractRowIndex rowIndex) { +super(rowIndex, recordSet.schema().access()); +ValueVector[] valueVectors = recordSet.vectors(); +writers = new AbstractColumnWriter[valueVectors.length]; +int posn = 0; +for (int i = 0; i < writers.length; i++) { + writers[posn] = ColumnAccessorFactory.newWriter(valueVectors[i].getField().getType()); --- End diff -- we can use _"i"_ instead of _"posn"_ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #785: DRILL-5323: Test tools for row sets
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/785#discussion_r108768096 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetUtilities.java --- @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet; + +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.selection.SelectionVector2; +import org.apache.drill.exec.vector.accessor.AccessorUtilities; +import org.apache.drill.exec.vector.accessor.ColumnAccessor.ValueType; +import org.apache.drill.exec.vector.accessor.ColumnWriter; +import org.apache.drill.test.rowSet.RowSet.RowSetWriter; +import org.joda.time.Duration; +import org.joda.time.Period; + +public class RowSetUtilities { + + private RowSetUtilities() { } + + public static void reverse(SelectionVector2 sv2) { +int count = sv2.getCount(); +for (int i = 0; i < count / 2; i++) { + char temp = sv2.getIndex(i); + int dest = count - 1 - i; + sv2.setIndex(i, sv2.getIndex(dest)); + sv2.setIndex(dest, temp); +} + } + + /** + * Set a test data value from an int. Uses the type information of the + * column to handle interval types. Else, uses the value type of the + * accessor. The value set here is purely for testing; the mapping + * from ints to intervals has no real meaning. + * + * @param rowWriter + * @param index + * @param value + */ + + public static void setFromInt(RowSetWriter rowWriter, int index, int value) { +ColumnWriter writer = rowWriter.column(index); +if (writer.valueType() == ValueType.PERIOD) { + setPeriodFromInt(writer, rowWriter.schema().column(index).getType().getMinorType(), value); +} else { + AccessorUtilities.setFromInt(writer, value); +} + } + + public static void setPeriodFromInt(ColumnWriter writer, MinorType minorType, + int value) { +switch (minorType) { +case INTERVAL: + writer.setPeriod(Duration.millis(value).toPeriod()); + break; +case INTERVALYEAR: + writer.setPeriod(Period.years(value / 12).withMonths(value % 12)); + break; +case INTERVALDAY: + int sec = value % 60; + value = value / 60; + int min = value % 60; + value = value / 60; + writer.setPeriod(Period.days(value).withMinutes(min).withSeconds(sec)); --- End diff -- Looks like it's missing calculation to hours and then days --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #785: DRILL-5323: Test tools for row sets
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/785#discussion_r108761814 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java --- @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; +import org.apache.drill.exec.record.MaterializedField; + +/** + * Builder of a row set schema expressed as a list of materialized + * fields. Optimized for use when creating schemas by hand in tests. + * + * Example usage to create the following schema: + * (c: INT, a: MAP(c: VARCHAR, d: INT, e: MAP(f: VARCHAR), g: INT), h: BIGINT) --- End diff -- ... a: MAP(_b: VARCHAR_ .. instead of _c:VARCHAR_ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #785: DRILL-5323: Test tools for row sets
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/785#discussion_r108808598 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java --- @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet; + +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer; +import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.complex.MapVector; +import org.apache.drill.test.rowSet.RowSet.SingleRowSet; +import org.apache.drill.test.rowSet.RowSetSchema.LogicalColumn; +import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema; + +public abstract class AbstractSingleRowSet extends AbstractRowSet implements SingleRowSet { + + public abstract static class StructureBuilder { +protected final PhysicalSchema schema; +protected final BufferAllocator allocator; +protected final ValueVector[] valueVectors; +protected final MapVector[] mapVectors; +protected int vectorIndex; +protected int mapIndex; + +public StructureBuilder(BufferAllocator allocator, RowSetSchema schema) { + this.allocator = allocator; + this.schema = schema.physical(); + valueVectors = new ValueVector[schema.access().count()]; + if (schema.access().mapCount() == 0) { +mapVectors = null; + } else { +mapVectors = new MapVector[schema.access().mapCount()]; + } +} + } + + public static class VectorBuilder extends StructureBuilder { + +public VectorBuilder(BufferAllocator allocator, RowSetSchema schema) { + super(allocator, schema); +} + +public ValueVector[] buildContainer(VectorContainer container) { + for (int i = 0; i < schema.count(); i++) { +LogicalColumn colSchema = schema.column(i); +@SuppressWarnings("resource") +ValueVector v = TypeHelper.getNewVector(colSchema.field, allocator, null); +container.add(v); +if (colSchema.field.getType().getMinorType() == MinorType.MAP) { + MapVector mv = (MapVector) v; + mapVectors[mapIndex++] = mv; + buildMap(mv, colSchema.mapSchema); +} else { + valueVectors[vectorIndex++] = v; +} + } + container.buildSchema(SelectionVectorMode.NONE); + return valueVectors; +} + +private void buildMap(MapVector mapVector, PhysicalSchema mapSchema) { + for (int i = 0; i < mapSchema.count(); i++) { +LogicalColumn colSchema = mapSchema.column(i); +MajorType type = colSchema.field.getType(); +Class vectorClass = TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode()); +@SuppressWarnings("resource") +ValueVector v = mapVector.addOrGet(colSchema.field.getName(), type, vectorClass); +if (type.getMinorType() == MinorType.MAP) { + MapVector mv = (MapVector) v; + mapVectors[mapIndex++] = mv; + buildMap(mv, colSchema.mapSchema); +} else { + valueVectors[vectorIndex++] = v; +} + } +} + } + + public static class VectorMapper extends StructureBuilder { + +public VectorMapper(BufferAllocator allocator, RowSetSchema schema) { + super(allocator, schema); +}
[GitHub] drill pull request #785: DRILL-5323: Test tools for row sets
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/785#discussion_r108758552 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/rowSet/HyperRowSetImpl.java --- @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; +import org.apache.drill.exec.record.HyperVectorWrapper; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.record.selection.SelectionVector4; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.accessor.AccessorUtilities; +import org.apache.drill.exec.vector.complex.AbstractMapVector; +import org.apache.drill.test.rowSet.AbstractRowSetAccessor.BoundedRowIndex; +import org.apache.drill.test.rowSet.RowSet.HyperRowSet; +import org.apache.drill.test.rowSet.RowSetSchema.LogicalColumn; +import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema; + +public class HyperRowSetImpl extends AbstractRowSet implements HyperRowSet { + + public static class HyperRowIndex extends BoundedRowIndex { + +private final SelectionVector4 sv4; + +public HyperRowIndex(SelectionVector4 sv4) { + super(sv4.getCount()); + this.sv4 = sv4; +} + +@Override +public int index() { + return AccessorUtilities.sv4Index(sv4.get(rowIndex)); +} + +@Override +public int batch( ) { + return AccessorUtilities.sv4Batch(sv4.get(rowIndex)); +} + } + + /** + * Build a hyper row set by restructuring a hyper vector bundle into a uniform + * shape. Consider this schema: + * { a: 10, b: { c: 20, d: { e: 30 } } } + * + * The hyper container, with two batches, has this structure: + * + * Batchab + * 0Int vectorMap Vector(Int vector, Map Vector(Int vector)) + * 1Int vectorMap Vector(Int vector, Map Vector(Int vector)) + * + * + * The above table shows that top-level scalar vectors (such as the Int Vector for column + * a) appear "end-to-end" as a hyper-vector. Maps also appear end-to-end. But, the + * contents of the map (column c) do not appear end-to-end. Instead, they appear as + * contents in the map vector. To get to c, one indexes into the map vector, steps inside + * the map to find c and indexes to the right row. + * + * Similarly, the maps for d do not appear end-to-end, one must step to the right batch + * in b, then step to d. + * + * Finally, to get to e, one must step + * into the hyper vector for b, then steps to the proper batch, steps to d, step to e + * and finally step to the row within e. This is a very complex, costly indexing scheme + * that differs depending on map nesting depth. + * + * To simplify access, this class restructures the maps to flatten the scalar vectors + * into end-to-end hyper vectors. For example, for the above: + * + * + * Batchacd + * 0Int vectorInt vectorInt vector + * 1Int vectorInt vectorInt vector + * + * + * The maps are still available as hyper vectors, but separated into map fields. + * (Scalar access no longer needs to access the maps.) The result is a uniform + * addressing scheme for both top-level and nested vectors. + */ + + public static class HyperVectorBuilder { + +protected final HyperVectorWrapper valueVectors[]; +protected final HyperVectorWrapper mapVectors[]; +private final List nestedScalars[]; +private int vectorIndex;
[GitHub] drill pull request #785: DRILL-5323: Test tools for row sets
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/785#discussion_r108777101 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetPrinter.java --- @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet; + +import java.io.PrintStream; + +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; +import org.apache.drill.test.rowSet.RowSet.RowSetReader; +import org.apache.drill.test.rowSet.RowSetSchema.AccessSchema; + +public class RowSetPrinter { + private RowSet rowSet; + + public RowSetPrinter(RowSet rowSet) { +this.rowSet = rowSet; + } + + public void print() { +print(System.out); + } + + public void print(PrintStream out) { +SelectionVectorMode selectionMode = rowSet.getIndirectionType(); +RowSetReader reader = rowSet.reader(); +int colCount = reader.width(); +printSchema(out, selectionMode); +while (reader.next()) { + printHeader(out, reader, selectionMode); + for (int i = 0; i < colCount; i++) { +if (i > 0) { + out.print(", "); +} +out.print(reader.getAsString(i)); + } + out.println(); +} + } + + private void printSchema(PrintStream out, SelectionVectorMode selectionMode) { +out.print("#"); +switch (selectionMode) { +case FOUR_BYTE: + out.print(" (batch #, row #)"); + break; +case TWO_BYTE: + out.print(" (row #)"); + break; +default: + break; +} +out.print(": "); +AccessSchema schema = rowSet.schema().access(); +for (int i = 0; i < schema.count(); i++) { + if (i > 0) { --- End diff -- How about _maps_ inside AccessSchema ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #788: DRILL-5318: Sub-operator test fixture
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/788#discussion_r109555392 --- Diff: exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java --- @@ -271,6 +276,91 @@ public QuerySummary run() throws Exception { } /** + * Run the query and return the first result set as a + * {@link DirectRowSet} object that can be inspected directly + * by the code using a {@link RowSetReader}. + * + * An enhancement is to provide a way to read a series of result + * batches as row sets. + * @return a row set that represents the first batch returned from + * the query + * @throws RpcException if anything goes wrong + */ + + public DirectRowSet rowSet() throws RpcException { + +// Ignore all but the first non-empty batch. + +QueryDataBatch dataBatch = null; +for (QueryDataBatch batch : results()) { + if (dataBatch == null && batch.getHeader().getRowCount() != 0) { +dataBatch = batch; --- End diff -- Ok makes sense after reading this: _"The reason for the funny code is that we want to release memory for all but the first batch."_ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---