[GitHub] drill pull request: DRILL-4313: Improve method of picking a random...

2016-03-04 Thread parthchandra
Github user parthchandra commented on a diff in the pull request:

https://github.com/apache/drill/pull/396#discussion_r55117425
  
--- Diff: contrib/native/client/src/clientlib/drillClientImpl.cpp ---
@@ -1392,6 +1387,198 @@ void DrillClientQueryResult::clearAndDestroy(){
 }
 }
 
+
+connectionStatus_t PooledDrillClientImpl::connect(const char* connStr){
+connectionStatus_t stat = CONN_SUCCESS;
+std::string pathToDrill, protocol, hostPortStr;
+std::string host;
+std::string port;
+m_connectStr=connStr;
+Utils::parseConnectStr(connStr, pathToDrill, protocol, hostPortStr);
+if(!strcmp(protocol.c_str(), "zk")){
+// Get a list of drillbits
+ZookeeperImpl zook;
+std::vector drillbits;
+int err = zook.getAllDrillbits(hostPortStr.c_str(), 
pathToDrill.c_str(), drillbits);
+if(!err){
+Utils::shuffle(drillbits);
+// The original shuffled order is maintained if we shuffle 
first and then add any missing elements
+Utils::add(m_drillbits, drillbits);
+exec::DrillbitEndpoint e;
+size_t nextIndex=0;
+boost::lock_guard cLock(m_cMutex);
+m_lastConnection++;
+nextIndex = (m_lastConnection)%(getDrillbitCount());
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Pooled Connection"
+<< "(" << (void*)this << ")"
+<< ": Current counter is: " 
+<< m_lastConnection << std::endl;)
+err=zook.getEndPoint(m_drillbits, nextIndex, e);
+if(!err){
+host=boost::lexical_cast(e.address());
+port=boost::lexical_cast(e.user_port());
+}
+}
+if(err){
+return handleConnError(CONN_ZOOKEEPER_ERROR, 
getMessage(ERR_CONN_ZOOKEEPER, zook.getError().c_str()));
+}
+zook.close();
+m_bIsDirectConnection=false;
+}else if(!strcmp(protocol.c_str(), "local")){
+char tempStr[MAX_CONNECT_STR+1];
+strncpy(tempStr, hostPortStr.c_str(), MAX_CONNECT_STR); 
tempStr[MAX_CONNECT_STR]=0;
+host=strtok(tempStr, ":");
+port=strtok(NULL, "");
+m_bIsDirectConnection=true;
+}else{
+return handleConnError(CONN_INVALID_INPUT, 
getMessage(ERR_CONN_UNKPROTO, protocol.c_str()));
+}
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Connecting to endpoint: (Pooled) 
" << host << ":" << port << std::endl;)
+DrillClientImpl* pDrillClientImpl = new DrillClientImpl();
+stat =  pDrillClientImpl->connect(host.c_str(), port.c_str());
+if(stat == CONN_SUCCESS){
+m_clientConnections.push_back(pDrillClientImpl);
+}else{
+DrillClientError* pErr = pDrillClientImpl->getError();
+handleConnError((connectionStatus_t)pErr->status, pErr->msg);
+delete pDrillClientImpl;
+}
+return stat;
+}
+
+connectionStatus_t 
PooledDrillClientImpl::validateHandshake(DrillUserProperties* props){
+// Assume there is one valid connection to at least one drillbit
+connectionStatus_t stat=CONN_FAILURE;
+// Keep a copy of the user properties
+if(props!=NULL){
+m_pUserProperties = new DrillUserProperties;
+for(size_t i=0; isize(); i++){
+m_pUserProperties->setProperty(
+props->keyAt(i),
+props->valueAt(i)
+);
+}
+}
+DrillClientImpl* pDrillClientImpl = getOneConnection();
+if(pDrillClientImpl != NULL){
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Validating handshake: 
(Pooled) " << pDrillClientImpl->m_connectedHost << std::endl;)
+stat=pDrillClientImpl->validateHandshake(m_pUserProperties);
+}
+else{
+stat =  handleConnError(CONN_NOTCONNECTED, 
getMessage(ERR_CONN_NOCONN));
+}
+return stat;
+}
+
+DrillClientQueryResult* 
PooledDrillClientImpl::SubmitQuery(::exec::shared::QueryType t, const 
std::string& plan, pfnQueryResultsListener listener, void* listenerCtx){
+DrillClientQueryResult* pDrillClientQueryResult = NULL;
+DrillClientImpl* pDrillClientImpl = NULL;
+pDrillClientImpl = getOneConnection();
+if(pDrillClientImpl != NULL){
+
pDrillClientQueryResult=pDrillClientImpl->SubmitQuery(t,plan,listener,listenerCtx);
+m_queriesExecuted++;
+}
+return pDrillClientQueryResult;
+}
+
+void PooledDrillClientImpl::freeQueryResources(DrillClientQueryResult* 
pQryResult){
+// Nothing to do. If this class ever keeps track of executing queries 
then it will need 
+// to

[GitHub] drill pull request: DRILL-4313: Improve method of picking a random...

2016-03-04 Thread parthchandra
Github user parthchandra commented on a diff in the pull request:

https://github.com/apache/drill/pull/396#discussion_r55117339
  
--- Diff: contrib/native/client/example/pooledConnections.cpp ---
@@ -0,0 +1,300 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "drill/drillc.hpp"
+
+int nOptions=5;
+
+struct Option{
+char name[32];
+char desc[128];
+bool required;
+}qsOptions[]= {
+{"query", "Query strings, separated by semicolons", true},
+{"connectStr", "Connect string", true},
+{"logLevel", "Logging level [trace|debug|info|warn|error|fatal]", 
false},
+{"numConnections", "Number of simultaneous connections", true},
+{"numIterations", "Number of iterations to run. Each query is sent to 
each connection this many times", true}
+};
+
+std::map qsOptionValues;
+
+const char* exceptionInject="alter session set 
`drill.exec.testing.controls` = '{ \"injections\" : [{ 
\"type\":\"exception\",\"siteClass\":\"org.apache.drill.exec.work.fragment.FragmentExecutor\",\"desc\":\"fragment-execution\",\"nSkip\":0,\"nFire\":1,\"exceptionClass\":\"java.lang.OutOfMemoryError\"}]}'";
+
+Drill::status_t SchemaListener(void* ctx, Drill::FieldDefPtr fields, 
Drill::DrillClientError* err){
+if(!err){
+printf("SCHEMA CHANGE DETECTED:\n");
+for(size_t i=0; isize(); i++){
+std::string name= fields->at(i)->getName();
+printf("%s\t", name.c_str());
+}
+printf("\n");
+return Drill::QRY_SUCCESS ;
+}else{
+std::cerr<< "ERROR: " << err->msg << std::endl;
+return Drill::QRY_FAILURE;
+}
+}
+
+boost::mutex listenerMutex;
+Drill::status_t QueryResultsListener(void* ctx, Drill::RecordBatch* b, 
Drill::DrillClientError* err){
+boost::lock_guard listenerLock(listenerMutex);
+if(!err){
+if(b!=NULL){
+b->print(std::cout, 0); // print all rows
+std::cout << "DATA RECEIVED ..." << std::endl;
+delete b; // we're done with this batch, we can delete it
+return Drill::QRY_FAILURE;
+}else{
+std::cout << "Query Complete." << std::endl;
+return Drill::QRY_SUCCESS;
+   }
+}else{
+assert(b==NULL);
+switch(err->status) {
+case Drill::QRY_COMPLETED:
+case Drill::QRY_CANCELED:
+std::cerr<< "INFO: " << err->msg << std::endl;
+return Drill::QRY_SUCCESS;
--- End diff --

Yes. Bit of a bad design there. The return value of failure is actually to 
signal that the listener that the query completed. WE added another api to 
check the status explicitly later. Unfortunately, the ODBC driver now uses this 
so hard to change.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request: DRILL-4313: Improve method of picking a random...

2016-03-04 Thread parthchandra
Github user parthchandra commented on a diff in the pull request:

https://github.com/apache/drill/pull/396#discussion_r55117302
  
--- Diff: contrib/native/client/example/pooledConnections.cpp ---
@@ -0,0 +1,300 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "drill/drillc.hpp"
+
+int nOptions=5;
+
+struct Option{
+char name[32];
+char desc[128];
+bool required;
+}qsOptions[]= {
+{"query", "Query strings, separated by semicolons", true},
+{"connectStr", "Connect string", true},
+{"logLevel", "Logging level [trace|debug|info|warn|error|fatal]", 
false},
+{"numConnections", "Number of simultaneous connections", true},
+{"numIterations", "Number of iterations to run. Each query is sent to 
each connection this many times", true}
+};
+
+std::map qsOptionValues;
+
+const char* exceptionInject="alter session set 
`drill.exec.testing.controls` = '{ \"injections\" : [{ 
\"type\":\"exception\",\"siteClass\":\"org.apache.drill.exec.work.fragment.FragmentExecutor\",\"desc\":\"fragment-execution\",\"nSkip\":0,\"nFire\":1,\"exceptionClass\":\"java.lang.OutOfMemoryError\"}]}'";
--- End diff --

What do i need to add?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


Re: Time for the 1.6 Release

2016-03-04 Thread Parth Chandra
Okay here's the list  of  JIRA's pending. It looks like we need to get some
more time to get the PRs still under review merged, so I'll wait over the
weekend.
It looks like the PRs that no reviewers assigned in the list below may not
make it into the release.

Thanks



Committed for 1.6 -

DRILL-4384 - Query profile is missing important information on WebUi -
Merged
DRILL-3488/pr 388 (Java 1.8 support) - Merged.
DRILL-4410/pr 380 (listvector should initiatlize bits...) - Merged
DRILL-4383/pr 375 (Allow custom configs for S3, Kerberos, etc) - Merged
DRILL-4465/pr 401 (Simplify Calcite parsing & planning integration) - Merged
DRILL-4467 - Baselines need updating.
DRILL-4449/pr 389 (Wrong results when metadata cache is used..) Merged
DRILL-4437 (and others)/pr 394 (Operator unit test framework). Waiting to
be merged
DRILL-4449/pr 389 (Wrong results when metadata cache is used..) -  Waiting
to be merged

Need review -
DRILL-4281/pr 400 (Drill should support inbound impersonation) (Jacques to
review)
DRILL-4372/pr 377(?) (Drill Operators and Functions should correctly expose
their types within Calcite.) - (Jinfeng to review)
DRILL-4313/pr 396  (Improved client randomization. Update JIRA with
warnings about using the feature ) (Hanifi/Sudheesh/Paul to review.)
DRILL-4375/pr 402 (Fix the maven release profile) - (Aditya to review)



DRILL-4452/pr 395 (Update Avatica Driver to latest Calcite)
DRILL-4332/pr 389 (Make vector comparison order stable in test framework)
DRILL-4411/pr 381 (hash join over-memory condition)
DRILL-4387/pr 379 (GroupScan should not use star column)
DRILL-4184/pr 372 (support variable length decimal fields in parquet)
DRILL-4120 - dir0 does not work when the directory structure contains Avro
files - Partial patch available.
DRILL-4203/pr 341 (fix dates written into parquet files to conform to
parquet format spec)


On Fri, Mar 4, 2016 at 6:24 PM, Jacques Nadeau  wrote:

> Awesome. Thanks Chun!
> On Mar 4, 2016 5:51 PM, "Chun Chang"  wrote:
>
> > Jacques submitted a PR for fixing the failed baselines. I've merged them
> > into automation master and confirmed the failed tests are all passing
> now.
> > Thanks.
> >
> > -Chun
> >
> >
> > On Thu, Mar 3, 2016 at 10:48 PM, Jacques Nadeau 
> > wrote:
> >
> > > I think we need to include DRILL-4467
> > > . I think it is a
> one
> > > line patch and it provides unpredictable plans at a minimum but may
> also
> > > present invalid result. Still need to think through the second half.
> I've
> > > seen this plan instability in some of my recent test runs (even without
> > > Java 8) when running extended HBase tests.
> > >
> > > --
> > > Jacques Nadeau
> > > CTO and Co-Founder, Dremio
> > >
> > > On Thu, Mar 3, 2016 at 10:02 PM, Parth Chandra 
> > wrote:
> > >
> > > > Updated list  (I'll follow up with the folks named here separately) -
> > > >
> > > > Committed for 1.6 -
> > > >
> > > > DRILL-4384 - Query profile is missing important information on WebUi
> -
> > > > Merged
> > > > DRILL-3488/pr 388 (Java 1.8 support) - Merged.
> > > > DRILL-4410/pr 380 (listvector should initiatlize bits...) - Merged
> > > > DRILL-4383/pr 375 (Allow custom configs for S3, Kerberos, etc) -
> Merged
> > > > DRILL-4465/pr 401 (Simplify Calcite parsing & planning integration) -
> > > > Waiting to be merged
> > > > DRILL-4437 (and others)/pr 394 (Operator unit test framework).
> Waiting
> > to
> > > > be merged.
> > > >
> > > > DRILL-4281/pr 400 (Drill should support inbound impersonation)
> (Jacques
> > > to
> > > > review)
> > > > DRILL-4372/pr 377(?) (Drill Operators and Functions should correctly
> > > expose
> > > > their types within Calcite.) - Waiting for Aman to review. (Owners:
> > > Hsuan,
> > > > Jinfeng, Aman, Sudheesh)
> > > > DRILL-4313/pr 396  (Improved client randomization. Update JIRA with
> > > > warnings about using the feature ) (Sudheesh to review.)
> > > > DRILL-4449/pr 389 (Wrong results when metadata cache is used..) (Aman
> > to
> > > > review)
> > > > DRILL-4069/pr 352 Enable RPC thread offload by default (Owner:
> > Sudheesh)
> > > >
> > > > Need review -
> > > > DRILL-4375/pr 402 (Fix the maven release profile)
> > > > DRILL-4452/pr 395 (Update Avatica Driver to latest Calcite)
> > > > DRILL-4332/pr 389 (Make vector comparison order stable in test
> > framework)
> > > > DRILL-4411/pr 381 (hash join over-memory condition)
> > > > DRILL-4387/pr 379 (GroupScan should not use star column)
> > > > DRILL-4184/pr 372 (support variable length decimal fields in parquet)
> > > > DRILL-4120 - dir0 does not work when the directory structure contains
> > > Avro
> > > > files - Partial patch available.
> > > > DRILL-4203/pr 341 (fix dates written into parquet files to conform to
> > > > parquet format spec)
> > > >
> > > > Not included (yet) -
> > > > DRILL-3149 - No patch available
> > > > DRILL-4441 - IN operator does not work with Avro reader - No patch
> > > > available
> > > > DRILL-3745/pr 399 - Hive cha

Re: Time for the 1.6 Release

2016-03-04 Thread Jacques Nadeau
Awesome. Thanks Chun!
On Mar 4, 2016 5:51 PM, "Chun Chang"  wrote:

> Jacques submitted a PR for fixing the failed baselines. I've merged them
> into automation master and confirmed the failed tests are all passing now.
> Thanks.
>
> -Chun
>
>
> On Thu, Mar 3, 2016 at 10:48 PM, Jacques Nadeau 
> wrote:
>
> > I think we need to include DRILL-4467
> > . I think it is a one
> > line patch and it provides unpredictable plans at a minimum but may also
> > present invalid result. Still need to think through the second half. I've
> > seen this plan instability in some of my recent test runs (even without
> > Java 8) when running extended HBase tests.
> >
> > --
> > Jacques Nadeau
> > CTO and Co-Founder, Dremio
> >
> > On Thu, Mar 3, 2016 at 10:02 PM, Parth Chandra 
> wrote:
> >
> > > Updated list  (I'll follow up with the folks named here separately) -
> > >
> > > Committed for 1.6 -
> > >
> > > DRILL-4384 - Query profile is missing important information on WebUi -
> > > Merged
> > > DRILL-3488/pr 388 (Java 1.8 support) - Merged.
> > > DRILL-4410/pr 380 (listvector should initiatlize bits...) - Merged
> > > DRILL-4383/pr 375 (Allow custom configs for S3, Kerberos, etc) - Merged
> > > DRILL-4465/pr 401 (Simplify Calcite parsing & planning integration) -
> > > Waiting to be merged
> > > DRILL-4437 (and others)/pr 394 (Operator unit test framework). Waiting
> to
> > > be merged.
> > >
> > > DRILL-4281/pr 400 (Drill should support inbound impersonation) (Jacques
> > to
> > > review)
> > > DRILL-4372/pr 377(?) (Drill Operators and Functions should correctly
> > expose
> > > their types within Calcite.) - Waiting for Aman to review. (Owners:
> > Hsuan,
> > > Jinfeng, Aman, Sudheesh)
> > > DRILL-4313/pr 396  (Improved client randomization. Update JIRA with
> > > warnings about using the feature ) (Sudheesh to review.)
> > > DRILL-4449/pr 389 (Wrong results when metadata cache is used..) (Aman
> to
> > > review)
> > > DRILL-4069/pr 352 Enable RPC thread offload by default (Owner:
> Sudheesh)
> > >
> > > Need review -
> > > DRILL-4375/pr 402 (Fix the maven release profile)
> > > DRILL-4452/pr 395 (Update Avatica Driver to latest Calcite)
> > > DRILL-4332/pr 389 (Make vector comparison order stable in test
> framework)
> > > DRILL-4411/pr 381 (hash join over-memory condition)
> > > DRILL-4387/pr 379 (GroupScan should not use star column)
> > > DRILL-4184/pr 372 (support variable length decimal fields in parquet)
> > > DRILL-4120 - dir0 does not work when the directory structure contains
> > Avro
> > > files - Partial patch available.
> > > DRILL-4203/pr 341 (fix dates written into parquet files to conform to
> > > parquet format spec)
> > >
> > > Not included (yet) -
> > > DRILL-3149 - No patch available
> > > DRILL-4441 - IN operator does not work with Avro reader - No patch
> > > available
> > > DRILL-3745/pr 399 - Hive char support - New feature - Needs QA - Not
> > > included in 1.6
> > > DRILL-3623 - Limit 0 should avoid execution when querying a known
> schema.
> > > (Need to add limitations of current impl). Intrusive change; should be
> > > included at beginning of release cycle.
> > > DRILL-4416/pr 385 (quote path separator) (Owner: Hanifi) - Causes leak.
> > >
> > > Others -
> > > DRILL-2517   - Already resolved.
> > > DRILL-3688/pr 382 (skip.header.line.count in hive). - Already merged.
> PR
> > > needs to be closed.
> > >
> > >
> > > On Thu, Mar 3, 2016 at 9:44 PM, Parth Chandra 
> wrote:
> > >
> > > > Right. My mistake. Thanks, Jacques, for reviewing.
> > > >
> > > > On Thu, Mar 3, 2016 at 9:08 PM, Zelaine Fong 
> > wrote:
> > > >
> > > >> DRILL-4281/pr 400 (Drill should support inbound impersonation)
> > (Sudheesh
> > > >> to
> > > >> review)
> > > >>
> > > >> Sudheesh is the fixer of DRILL-4281, so I don't think he can be the
> > > >> reviewer :).
> > > >>
> > > >> -- Zelaine
> > > >>
> > > >> On Thu, Mar 3, 2016 at 6:30 PM, Parth Chandra 
> > > wrote:
> > > >>
> > > >> > Here's an updated list with names of reviewers added. If anyone
> else
> > > is
> > > >> > reviewing the open PRs please let me know. Some PRs have owners
> > names
> > > >> that
> > > >> > I will follow up with.
> > > >> > Jason, I've included your JIRA in the list.
> > > >> >
> > > >> >
> > > >> > Committed for 1.6 -
> > > >> >
> > > >> > DRILL-4384 - Query profile is missing important information on
> > WebUi -
> > > >> > Merged
> > > >> > DRILL-3488/pr 388 (Java 1.8 support) - Merged.
> > > >> > DRILL-4410/pr 380 (listvector should initiatlize bits...) - Merged
> > > >> > DRILL-4383/pr 375 (Allow custom configs for S3, Kerberos, etc) -
> > > Merged
> > > >> > DRILL-4465/pr 401 (Simplify Calcite parsing & planning
> integration)
> > -
> > > >> > Waiting to be merged
> > > >> >
> > > >> > DRILL-4281/pr 400 (Drill should support inbound impersonation)
> > > >> (Sudheesh to
> > > >> > review)
> > > >> > DRILL-4372/pr 377(?) (Drill Operators and Functions should
> correctly
> > > >> expose
> 

Re: Time for the 1.6 Release

2016-03-04 Thread Chun Chang
Jacques submitted a PR for fixing the failed baselines. I've merged them
into automation master and confirmed the failed tests are all passing now.
Thanks.

-Chun


On Thu, Mar 3, 2016 at 10:48 PM, Jacques Nadeau  wrote:

> I think we need to include DRILL-4467
> . I think it is a one
> line patch and it provides unpredictable plans at a minimum but may also
> present invalid result. Still need to think through the second half. I've
> seen this plan instability in some of my recent test runs (even without
> Java 8) when running extended HBase tests.
>
> --
> Jacques Nadeau
> CTO and Co-Founder, Dremio
>
> On Thu, Mar 3, 2016 at 10:02 PM, Parth Chandra  wrote:
>
> > Updated list  (I'll follow up with the folks named here separately) -
> >
> > Committed for 1.6 -
> >
> > DRILL-4384 - Query profile is missing important information on WebUi -
> > Merged
> > DRILL-3488/pr 388 (Java 1.8 support) - Merged.
> > DRILL-4410/pr 380 (listvector should initiatlize bits...) - Merged
> > DRILL-4383/pr 375 (Allow custom configs for S3, Kerberos, etc) - Merged
> > DRILL-4465/pr 401 (Simplify Calcite parsing & planning integration) -
> > Waiting to be merged
> > DRILL-4437 (and others)/pr 394 (Operator unit test framework). Waiting to
> > be merged.
> >
> > DRILL-4281/pr 400 (Drill should support inbound impersonation) (Jacques
> to
> > review)
> > DRILL-4372/pr 377(?) (Drill Operators and Functions should correctly
> expose
> > their types within Calcite.) - Waiting for Aman to review. (Owners:
> Hsuan,
> > Jinfeng, Aman, Sudheesh)
> > DRILL-4313/pr 396  (Improved client randomization. Update JIRA with
> > warnings about using the feature ) (Sudheesh to review.)
> > DRILL-4449/pr 389 (Wrong results when metadata cache is used..) (Aman to
> > review)
> > DRILL-4069/pr 352 Enable RPC thread offload by default (Owner: Sudheesh)
> >
> > Need review -
> > DRILL-4375/pr 402 (Fix the maven release profile)
> > DRILL-4452/pr 395 (Update Avatica Driver to latest Calcite)
> > DRILL-4332/pr 389 (Make vector comparison order stable in test framework)
> > DRILL-4411/pr 381 (hash join over-memory condition)
> > DRILL-4387/pr 379 (GroupScan should not use star column)
> > DRILL-4184/pr 372 (support variable length decimal fields in parquet)
> > DRILL-4120 - dir0 does not work when the directory structure contains
> Avro
> > files - Partial patch available.
> > DRILL-4203/pr 341 (fix dates written into parquet files to conform to
> > parquet format spec)
> >
> > Not included (yet) -
> > DRILL-3149 - No patch available
> > DRILL-4441 - IN operator does not work with Avro reader - No patch
> > available
> > DRILL-3745/pr 399 - Hive char support - New feature - Needs QA - Not
> > included in 1.6
> > DRILL-3623 - Limit 0 should avoid execution when querying a known schema.
> > (Need to add limitations of current impl). Intrusive change; should be
> > included at beginning of release cycle.
> > DRILL-4416/pr 385 (quote path separator) (Owner: Hanifi) - Causes leak.
> >
> > Others -
> > DRILL-2517   - Already resolved.
> > DRILL-3688/pr 382 (skip.header.line.count in hive). - Already merged. PR
> > needs to be closed.
> >
> >
> > On Thu, Mar 3, 2016 at 9:44 PM, Parth Chandra  wrote:
> >
> > > Right. My mistake. Thanks, Jacques, for reviewing.
> > >
> > > On Thu, Mar 3, 2016 at 9:08 PM, Zelaine Fong 
> wrote:
> > >
> > >> DRILL-4281/pr 400 (Drill should support inbound impersonation)
> (Sudheesh
> > >> to
> > >> review)
> > >>
> > >> Sudheesh is the fixer of DRILL-4281, so I don't think he can be the
> > >> reviewer :).
> > >>
> > >> -- Zelaine
> > >>
> > >> On Thu, Mar 3, 2016 at 6:30 PM, Parth Chandra 
> > wrote:
> > >>
> > >> > Here's an updated list with names of reviewers added. If anyone else
> > is
> > >> > reviewing the open PRs please let me know. Some PRs have owners
> names
> > >> that
> > >> > I will follow up with.
> > >> > Jason, I've included your JIRA in the list.
> > >> >
> > >> >
> > >> > Committed for 1.6 -
> > >> >
> > >> > DRILL-4384 - Query profile is missing important information on
> WebUi -
> > >> > Merged
> > >> > DRILL-3488/pr 388 (Java 1.8 support) - Merged.
> > >> > DRILL-4410/pr 380 (listvector should initiatlize bits...) - Merged
> > >> > DRILL-4383/pr 375 (Allow custom configs for S3, Kerberos, etc) -
> > Merged
> > >> > DRILL-4465/pr 401 (Simplify Calcite parsing & planning integration)
> -
> > >> > Waiting to be merged
> > >> >
> > >> > DRILL-4281/pr 400 (Drill should support inbound impersonation)
> > >> (Sudheesh to
> > >> > review)
> > >> > DRILL-4372/pr 377(?) (Drill Operators and Functions should correctly
> > >> expose
> > >> > their types within Calcite.) - Waiting for Aman to review. (Owners:
> > >> Hsuan,
> > >> > Jinfeng, Aman, Sudheesh)
> > >> > DRILL-4313/pr 396  (Improved client randomization. Update JIRA with
> > >> > warnings about using the feature ) (Sudheesh to review.)
> > >> > DRILL-4437 (and others)/pr 394 (Operator unit test framework).
> (P

[jira] [Created] (DRILL-4479) JsonReader should pick a less restrictive type when creating the default column

2016-03-04 Thread Aman Sinha (JIRA)
Aman Sinha created DRILL-4479:
-

 Summary: JsonReader should pick a less restrictive type when 
creating the default column
 Key: DRILL-4479
 URL: https://issues.apache.org/jira/browse/DRILL-4479
 Project: Apache Drill
  Issue Type: Bug
  Components: Storage - JSON
Affects Versions: 1.5.0
Reporter: Aman Sinha


This JIRA is related to DRILL-3806 but has a narrower scope, so I decided to 
create separate one. 

The JsonReader has the method ensureAtLeastOneField() (see 
https://github.com/apache/drill/blob/master/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java#L91)
 that ensures that when no columns are found, create an empty one and it 
chooses to create a nullable int column.  One consequence is that queries of 
the following type fail:

{noformat}
select c1 from dfs.`mostlynulls.json`;
...
...
| null  |
| null  |
Error: DATA_READ ERROR: Error parsing JSON - You tried to write a VarChar type 
when you are using a ValueWriter of type NullableIntWriterImpl.

File  /Users/asinha/data/mostlynulls.json
Record  4097
{noformat}

In this file the first 4096 rows have NULL values for c1 followed by rows that 
have a valid string.  

It would be useful for the Json reader to choose a less restrictive type such 
as varchar in order to allow more types of queries to run.  





--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[GitHub] drill pull request: DRILL-4313: Improve method of picking a random...

2016-03-04 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/396#discussion_r55112200
  
--- Diff: contrib/native/client/example/pooledConnections.cpp ---
@@ -0,0 +1,300 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "drill/drillc.hpp"
+
+int nOptions=5;
+
+struct Option{
+char name[32];
+char desc[128];
+bool required;
+}qsOptions[]= {
+{"query", "Query strings, separated by semicolons", true},
+{"connectStr", "Connect string", true},
+{"logLevel", "Logging level [trace|debug|info|warn|error|fatal]", 
false},
+{"numConnections", "Number of simultaneous connections", true},
+{"numIterations", "Number of iterations to run. Each query is sent to 
each connection this many times", true}
+};
+
+std::map qsOptionValues;
+
+const char* exceptionInject="alter session set 
`drill.exec.testing.controls` = '{ \"injections\" : [{ 
\"type\":\"exception\",\"siteClass\":\"org.apache.drill.exec.work.fragment.FragmentExecutor\",\"desc\":\"fragment-execution\",\"nSkip\":0,\"nFire\":1,\"exceptionClass\":\"java.lang.OutOfMemoryError\"}]}'";
--- End diff --

not valid; missing injection site in FragmentExecutor?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request: DRILL-4313: Improve method of picking a random...

2016-03-04 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/396#discussion_r55112202
  
--- Diff: contrib/native/client/example/pooledConnections.cpp ---
@@ -0,0 +1,300 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "drill/drillc.hpp"
+
+int nOptions=5;
+
+struct Option{
+char name[32];
+char desc[128];
+bool required;
+}qsOptions[]= {
+{"query", "Query strings, separated by semicolons", true},
+{"connectStr", "Connect string", true},
+{"logLevel", "Logging level [trace|debug|info|warn|error|fatal]", 
false},
+{"numConnections", "Number of simultaneous connections", true},
+{"numIterations", "Number of iterations to run. Each query is sent to 
each connection this many times", true}
+};
+
+std::map qsOptionValues;
+
+const char* exceptionInject="alter session set 
`drill.exec.testing.controls` = '{ \"injections\" : [{ 
\"type\":\"exception\",\"siteClass\":\"org.apache.drill.exec.work.fragment.FragmentExecutor\",\"desc\":\"fragment-execution\",\"nSkip\":0,\"nFire\":1,\"exceptionClass\":\"java.lang.OutOfMemoryError\"}]}'";
+
+Drill::status_t SchemaListener(void* ctx, Drill::FieldDefPtr fields, 
Drill::DrillClientError* err){
+if(!err){
+printf("SCHEMA CHANGE DETECTED:\n");
+for(size_t i=0; isize(); i++){
+std::string name= fields->at(i)->getName();
+printf("%s\t", name.c_str());
+}
+printf("\n");
+return Drill::QRY_SUCCESS ;
+}else{
+std::cerr<< "ERROR: " << err->msg << std::endl;
+return Drill::QRY_FAILURE;
+}
+}
+
+boost::mutex listenerMutex;
+Drill::status_t QueryResultsListener(void* ctx, Drill::RecordBatch* b, 
Drill::DrillClientError* err){
+boost::lock_guard listenerLock(listenerMutex);
+if(!err){
+if(b!=NULL){
+b->print(std::cout, 0); // print all rows
+std::cout << "DATA RECEIVED ..." << std::endl;
+delete b; // we're done with this batch, we can delete it
+return Drill::QRY_FAILURE;
+}else{
+std::cout << "Query Complete." << std::endl;
+return Drill::QRY_SUCCESS;
+   }
+}else{
+assert(b==NULL);
+switch(err->status) {
+case Drill::QRY_COMPLETED:
+case Drill::QRY_CANCELED:
+std::cerr<< "INFO: " << err->msg << std::endl;
+return Drill::QRY_SUCCESS;
--- End diff --

Confusing, since query succeeds when error != null ?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request: DRILL-4313: Improve method of picking a random...

2016-03-04 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/396#discussion_r55112213
  
--- Diff: contrib/native/client/src/clientlib/drillClientImpl.cpp ---
@@ -1392,6 +1387,198 @@ void DrillClientQueryResult::clearAndDestroy(){
 }
 }
 
+
+connectionStatus_t PooledDrillClientImpl::connect(const char* connStr){
+connectionStatus_t stat = CONN_SUCCESS;
+std::string pathToDrill, protocol, hostPortStr;
+std::string host;
+std::string port;
+m_connectStr=connStr;
+Utils::parseConnectStr(connStr, pathToDrill, protocol, hostPortStr);
+if(!strcmp(protocol.c_str(), "zk")){
+// Get a list of drillbits
+ZookeeperImpl zook;
+std::vector drillbits;
+int err = zook.getAllDrillbits(hostPortStr.c_str(), 
pathToDrill.c_str(), drillbits);
+if(!err){
+Utils::shuffle(drillbits);
+// The original shuffled order is maintained if we shuffle 
first and then add any missing elements
+Utils::add(m_drillbits, drillbits);
+exec::DrillbitEndpoint e;
+size_t nextIndex=0;
+boost::lock_guard cLock(m_cMutex);
+m_lastConnection++;
+nextIndex = (m_lastConnection)%(getDrillbitCount());
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Pooled Connection"
+<< "(" << (void*)this << ")"
+<< ": Current counter is: " 
+<< m_lastConnection << std::endl;)
+err=zook.getEndPoint(m_drillbits, nextIndex, e);
+if(!err){
+host=boost::lexical_cast(e.address());
+port=boost::lexical_cast(e.user_port());
+}
+}
+if(err){
+return handleConnError(CONN_ZOOKEEPER_ERROR, 
getMessage(ERR_CONN_ZOOKEEPER, zook.getError().c_str()));
+}
+zook.close();
+m_bIsDirectConnection=false;
+}else if(!strcmp(protocol.c_str(), "local")){
+char tempStr[MAX_CONNECT_STR+1];
+strncpy(tempStr, hostPortStr.c_str(), MAX_CONNECT_STR); 
tempStr[MAX_CONNECT_STR]=0;
+host=strtok(tempStr, ":");
+port=strtok(NULL, "");
+m_bIsDirectConnection=true;
+}else{
+return handleConnError(CONN_INVALID_INPUT, 
getMessage(ERR_CONN_UNKPROTO, protocol.c_str()));
+}
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Connecting to endpoint: (Pooled) 
" << host << ":" << port << std::endl;)
+DrillClientImpl* pDrillClientImpl = new DrillClientImpl();
+stat =  pDrillClientImpl->connect(host.c_str(), port.c_str());
+if(stat == CONN_SUCCESS){
+m_clientConnections.push_back(pDrillClientImpl);
+}else{
+DrillClientError* pErr = pDrillClientImpl->getError();
+handleConnError((connectionStatus_t)pErr->status, pErr->msg);
+delete pDrillClientImpl;
+}
+return stat;
+}
+
+connectionStatus_t 
PooledDrillClientImpl::validateHandshake(DrillUserProperties* props){
+// Assume there is one valid connection to at least one drillbit
+connectionStatus_t stat=CONN_FAILURE;
+// Keep a copy of the user properties
+if(props!=NULL){
+m_pUserProperties = new DrillUserProperties;
+for(size_t i=0; isize(); i++){
+m_pUserProperties->setProperty(
+props->keyAt(i),
+props->valueAt(i)
+);
+}
+}
+DrillClientImpl* pDrillClientImpl = getOneConnection();
+if(pDrillClientImpl != NULL){
+DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Validating handshake: 
(Pooled) " << pDrillClientImpl->m_connectedHost << std::endl;)
+stat=pDrillClientImpl->validateHandshake(m_pUserProperties);
+}
+else{
+stat =  handleConnError(CONN_NOTCONNECTED, 
getMessage(ERR_CONN_NOCONN));
+}
+return stat;
+}
+
+DrillClientQueryResult* 
PooledDrillClientImpl::SubmitQuery(::exec::shared::QueryType t, const 
std::string& plan, pfnQueryResultsListener listener, void* listenerCtx){
+DrillClientQueryResult* pDrillClientQueryResult = NULL;
+DrillClientImpl* pDrillClientImpl = NULL;
+pDrillClientImpl = getOneConnection();
+if(pDrillClientImpl != NULL){
+
pDrillClientQueryResult=pDrillClientImpl->SubmitQuery(t,plan,listener,listenerCtx);
+m_queriesExecuted++;
+}
+return pDrillClientQueryResult;
+}
+
+void PooledDrillClientImpl::freeQueryResources(DrillClientQueryResult* 
pQryResult){
+// Nothing to do. If this class ever keeps track of executing queries 
then it will need 
+// 

[jira] [Created] (DRILL-4478) binary_string cannot convert buffer that were not start from 0 correctly

2016-03-04 Thread Chunhui Shi (JIRA)
Chunhui Shi created DRILL-4478:
--

 Summary: binary_string cannot convert buffer that were not start 
from 0 correctly
 Key: DRILL-4478
 URL: https://issues.apache.org/jira/browse/DRILL-4478
 Project: Apache Drill
  Issue Type: Bug
  Components: Execution - Codegen
Reporter: Chunhui Shi


When binary_string was called multiple times, it can only convert the first one 
correctly if the drillbuf start from 0. For the second and afterwards calls, 
because the drillbuf is not starting from 0 thus 
DrillStringUtils.parseBinaryString could not do the work correctly.




--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[GitHub] drill pull request: DRILL-4281: Support authorized proxy users to ...

2016-03-04 Thread sudheeshkatkam
Github user sudheeshkatkam commented on the pull request:

https://github.com/apache/drill/pull/400#issuecomment-192538620
  
@jacques-n can you please review?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request: DRILL-4281: Support authorized users to delega...

2016-03-04 Thread sudheeshkatkam
Github user sudheeshkatkam commented on the pull request:

https://github.com/apache/drill/pull/400#issuecomment-192535617
  
I apologize that my terminology is confusing everyone.

I've updated the [design 
document](https://docs.google.com/document/d/1g0KgugVdRbbIxxZrSCtO1PEHlvwczTLDb38k-npvwjA)
 and the PR to include terms that (hopefully) users and developers can 
understand.

(1) User delegation itself is not a suitable name for this feature, since 
this feature is extending the current impersonation model. So when _user 
impersonation_ is enabled, this _inbound_ or _client_ impersonation feature is 
also enabled.
(2) I am going to use a variant of Jacques' suggestion for _inbound_ 
impersonation policies. I think `proxy_principals` and `target_principals` are 
apt. For example, an admin would setup policies by using:
```
ALTER SYSTEM SET `exec.impersonation.inbound_policies` = '[
  { proxy_principals  : { users : [ "user0_1"] },
target_principals : { users : ["*"] } },
  { proxy_principals  : { groups : ["group5_1"] },
target_principals : { groups : ["group4_2"] } }
]';
```

Overall, when impersonation is enabled, both _inbound_ impersonation and 
_outbound_ impersonation are  allowed.
+ _Inbound_ or _client_ impersonation: an authorized user can impersonate a 
target user, and the session user is set accordingly.
+ _Outbound_ or _storage plugin_ impersonation: Drill itself impersonates 
as the session user when interacting with storage plugins.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[jira] [Created] (DRILL-4477) Wrong Plan (potentially wrong result) if wrapping a query with SELECT * FROM

2016-03-04 Thread Sean Hsuan-Yi Chu (JIRA)
Sean Hsuan-Yi Chu created DRILL-4477:


 Summary: Wrong Plan (potentially wrong result) if wrapping a query 
with SELECT * FROM
 Key: DRILL-4477
 URL: https://issues.apache.org/jira/browse/DRILL-4477
 Project: Apache Drill
  Issue Type: Bug
  Components: Query Planning & Optimization
Reporter: Sean Hsuan-Yi Chu


For example, a query  
{code}
select * from (select s.name, v.name, v.registration from 
cp.`tpch/region.parquet` s left outer join cp.`tpch/nation.parquet` v
on (s.name = v.name) 
where s.age < 30) t 
{code}
gives a plan as below:
{code}
+--+--+
| text | json |
+--+--+
| 00-00Screen
00-01  Project(name=[$0], name0=[$1], registration=[$2])
00-02Project(name=[$0], name0=[$0], registration=[$3])
00-03  Project(name=[$2], age=[$3], name0=[$0], registration=[$1])
00-04HashJoin(condition=[=($2, $0)], joinType=[right])
00-06  Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath 
[path=classpath:/tpch/nation.parquet]], 
selectionRoot=classpath:/tpch/nation.parquet, numFiles=1, 
usedMetadataFile=false, columns=[`name`, `registration`]]])
00-05  Project(name0=[$0], age=[$1])
00-07SelectionVectorRemover
00-08  Filter(condition=[<($1, 30)])
00-09Scan(groupscan=[ParquetGroupScan 
[entries=[ReadEntryWithPath [path=classpath:/tpch/region.parquet]], 
selectionRoot=classpath:/tpch/region.parquet, numFiles=1, 
usedMetadataFile=false, columns=[`name`, `age`]]])
{code}
In the line 00-02, both name and name0 point at the same incoming column 
(probably due to the JOIN CONDITION). 

However. the fact that these two are the JOIN condition does not make a case 
that they must be equal since implicit casting might be invoked to perform the 
JOIN condition.

Interestingly, if the SELECT * FROM wrapper is removed, this bug won't be 
exposed: 
{code}
select s.name, v.name, v.registration from cp.`tpch/region.parquet` s left 
outer join cp.`tpch/nation.parquet` v on (s.name = v.name) 
where s.age < 30
{code}

gives 
{code}
00-00Screen
00-01  Project(name=[$0], name0=[$1], registration=[$2])
00-02Project(name=[$2], name0=[$0], registration=[$1])
00-03  HashJoin(condition=[=($2, $0)], joinType=[right])
00-05Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath 
[path=classpath:/tpch/nation.parquet]], 
selectionRoot=classpath:/tpch/nation.parquet, numFiles=1, 
usedMetadataFile=false, columns=[`name`, `registration`]]])
00-04Project(name0=[$0])
00-06  Project(name=[$0])
00-07SelectionVectorRemover
00-08  Filter(condition=[<($1, 30)])
00-09Scan(groupscan=[ParquetGroupScan 
[entries=[ReadEntryWithPath [path=classpath:/tpch/region.parquet]], 
selectionRoot=classpath:/tpch/region.parquet, numFiles=1, 
usedMetadataFile=false, columns=[`name`, `age`]]])
{code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[GitHub] drill pull request: DRILL-4474: Ensure that ConvertCountToDirectSc...

2016-03-04 Thread amansinha100
Github user amansinha100 commented on the pull request:

https://github.com/apache/drill/pull/406#issuecomment-192531157
  
+1.   Yes, seems like the project was not being checked at all and the 
nullability check did not work for expressions inside the count aggregate. 


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request: DRILL-4281: Support authorized users to delega...

2016-03-04 Thread sudheeshkatkam
Github user sudheeshkatkam commented on a diff in the pull request:

https://github.com/apache/drill/pull/400#discussion_r55106082
  
--- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java ---
@@ -88,6 +89,7 @@
   String USER_AUTHENTICATION_ENABLED = 
"drill.exec.security.user.auth.enabled";
   String USER_AUTHENTICATOR_IMPL = "drill.exec.security.user.auth.impl";
   String PAM_AUTHENTICATOR_PROFILES = 
"drill.exec.security.user.auth.pam_profiles";
+  String USER_DELEGATION_ENABLED = "drill.exec.delegation.enabled";
--- End diff --

Makes sense.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request: DRILL-4449: Wrong results when metadata cache ...

2016-03-04 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/drill/pull/392


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request: DRILL-4476: Allow UnionAllRecordBatch to manag...

2016-03-04 Thread hsuanyi
Github user hsuanyi commented on the pull request:

https://github.com/apache/drill/pull/407#issuecomment-192523111
  
@amansinha100 can you help review this?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request: DRILL-4476: Allow UnionAllRecordBatch to manag...

2016-03-04 Thread hsuanyi
GitHub user hsuanyi opened a pull request:

https://github.com/apache/drill/pull/407

DRILL-4476: Allow UnionAllRecordBatch to manager situations where lef…

…t input side or both sides come(s) from empty source(s).

You can merge this pull request into a Git repository by running:

$ git pull https://github.com/hsuanyi/incubator-drill DRILL-4476

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/drill/pull/407.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #407


commit 5a384a14f23827aee8b20a8ac6529b33bc66ee2e
Author: Hsuan-Yi Chu 
Date:   2016-03-04T21:50:02Z

DRILL-4476: Allow UnionAllRecordBatch to manager situations where left 
input side or both sides come(s) from empty source(s).




---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[jira] [Created] (DRILL-4476) Enhance Union-All operator for dealing with empty left input or empty both inputs

2016-03-04 Thread Sean Hsuan-Yi Chu (JIRA)
Sean Hsuan-Yi Chu created DRILL-4476:


 Summary: Enhance Union-All operator for dealing with empty left 
input or empty both inputs
 Key: DRILL-4476
 URL: https://issues.apache.org/jira/browse/DRILL-4476
 Project: Apache Drill
  Issue Type: Bug
  Components: Execution - Relational Operators
Reporter: Sean Hsuan-Yi Chu
Assignee: Sean Hsuan-Yi Chu


Union-All operator does not deal with the situation where left side comes from 
empty source.

Due to DRILL-2288's enhancement for empty sources, Union-All operator now can 
be allowed to support this scenario.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[GitHub] drill pull request: DRILL-4474: Ensure that ConvertCountToDirectSc...

2016-03-04 Thread jacques-n
GitHub user jacques-n opened a pull request:

https://github.com/apache/drill/pull/406

DRILL-4474: Ensure that ConvertCountToDirectScan only pushes through 
project when project is trivial.

There are probably additional optimizations here but as the code stood, we 
completely ignored whatever was in the project above the scan below the 
count(x).

You can merge this pull request into a Git repository by running:

$ git pull https://github.com/jacques-n/drill DRILL-4474

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/drill/pull/406.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #406


commit 5ab13c68357008c07a86386aa14f6f92d8584e9b
Author: Jacques Nadeau 
Date:   2016-03-04T21:27:26Z

DRILL-4474: Ensure that ConvertCountToDirectScan only pushes through 
project when project is trivial.




---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[jira] [Created] (DRILL-4475) Joins containing join key between varbinary and non-varbinary fails

2016-03-04 Thread Krystal (JIRA)
Krystal created DRILL-4475:
--

 Summary: Joins containing join key between varbinary and 
non-varbinary fails
 Key: DRILL-4475
 URL: https://issues.apache.org/jira/browse/DRILL-4475
 Project: Apache Drill
  Issue Type: Bug
  Components: Query Planning & Optimization
Affects Versions: 1.5.0
Reporter: Krystal
Assignee: Sean Hsuan-Yi Chu


Join queries that contain one join key as varbinary and the other join key as a 
non-varbinary such as varchar fails.  For example:

select s.name, s.age, s.gpa, v.registration from student_csv_v s inner join 
voter_csv_v v on (s.name = v.name);
Error: VALIDATION ERROR: From line 1, column 95 to line 1, column 109: Cannot 
apply '=' to arguments of type ' = '. Supported 
form(s): ' = '

s.name is a varbinary and v.name is char(30).  Drill should implicitly cast the 
join keys to varchar at planning time.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Created] (DRILL-4474) Inconsistent behavior while using COUNT in select (Apache drill 1.2.0)

2016-03-04 Thread Shankar (JIRA)
Shankar created DRILL-4474:
--

 Summary: Inconsistent behavior while using COUNT in select (Apache 
drill 1.2.0)
 Key: DRILL-4474
 URL: https://issues.apache.org/jira/browse/DRILL-4474
 Project: Apache Drill
  Issue Type: Test
Affects Versions: 1.2.0
 Environment: m3.xlarge AWS instances ( 3 nodes)
CentOS6.5 x64
Reporter: Shankar


{quote}

* we are using drill to retrieve the business data from game analytic. 
* we are running below queries on table of size 50GB (parquet)

* We have found some major inconsistency in data when we use COUNT function.
* Below is the case by case queries and their output. {color:blue}*Please 
analyse it carefully, to for clear understanding of behaviour. *{color}

* Please let me know how to resolve this ? (or any earlier JIRA has been 
already created). 
* Hope this may be fixed in later versions. If not please do the needful.

{quote}


--
CASE-1 (Wrong result)
--


{color:red}
{quote}


0: jdbc:drill:> select  
. . . . . . . > count(case when t.id = '/confirmDrop/btnYes/' and t.event = 
'Click' then sessionid end) as cnt
. . . . . . . > from dfs.tmp.a_games_log_visit_base t
. . . . . . . > ; 
+---+
|   count   |
+---+
| 27645752  |
+---+
1 row selected (0.281 seconds)

{quote}


--
CASE-2 (Wrong result)
--
{quote}

0: jdbc:drill:> select  
. . . . . . . > count(sessionid), 
. . . . . . . > count(case when t.id = '/confirmDrop/btnYes/' and t.event = 
'Click' then sessionid end) as cnt
. . . . . . . > from dfs.tmp.a_games_log_visit_base t
. . . . . . . > ; 
+---+---+
|  EXPR$0   |  cnt  |
+---+---+
| 37772844  | 2108  |
+---+---+
1 row selected (12.597 seconds)
{quote}




--
CASE-3 (Wrong result, only first count is correct)
--
{quote}

0: jdbc:drill:> select  
. . . . . . . > count(distinct sessionid), 
. . . . . . . > count(case when t.id = '/confirmDrop/btnYes/' and t.event = 
'Click' then sessionid end) as cnt
. . . . . . . > from dfs.tmp.a_games_log_visit_base t
. . . . . . . > ; 
+-+---+
| EXPR$0  |cnt|
+-+---+
| 201941  | 37772844  |
+-+---+
1 row selected (8.259 seconds)
{quote}

{color}



{color:green}
--
CASE-4 (Correct result)
--
{quote}

0: jdbc:drill:> select  
. . . . . . . > count(distinct case when t.id = '/confirmDrop/btnYes/' and 
t.event = 'Click' then sessionid end) as cnt
. . . . . . . > from dfs.tmp.a_games_log_visit_base t
. . . . . . . > ; 
+--+
| cnt  |
+--+
| 525  |
+--+
1 row selected (14.318 seconds)
{quote}


--
CASE-5 (Correct result)
--

{quote}

0: jdbc:drill:> select  
. . . . . . . > count(sessionid),
. . . . . . . > count(distinct sessionid)
. . . . . . . > from dfs.tmp.a_games_log_visit_base t
. . . . . . . > where ( t.id = '/confirmDrop/btnYes/' and t.event = 'Click')
. . . . . . . > ;
+-+-+
| EXPR$0  | EXPR$1  |
+-+-+
| 2108| 525 |
+-+-+
1 row selected (19.355 seconds)

{quote}
{color}



--
CASE-6
--




{quote}


0: jdbc:drill:> explain plan for 
. . . . . . . > 
. . . . . . . > select  
. . . . .

Re: Heads up on trivial project fix: plan baselines in regression suite need updating

2016-03-04 Thread Jacques Nadeau
I think it is ~40 in our suite. Not sure on yours. If you have a failed run
on your side, share the output and I may be able to propose a patch to your
suite:

It is a one line change.

https://github.com/apache/drill/commit/edea8b1cf4e5476d803e8b87c79e08e8c3263e04#diff-ca259849558f34142f1e17066df42a9fR259

Are you guys convinced that this isn't ever a correctness issue? That would
be my main hesitation to removing this. It is clearly a bug.

--
Jacques Nadeau
CTO and Co-Founder, Dremio

On Fri, Mar 4, 2016 at 10:35 AM, Parth Chandra  wrote:

> I'd be more comfortable if we merged this in after the release. Updating
> the test baselines will delay the release considerably - I would want the
> new baselines to be verified manually which is always time consuming.
> How many tests are affected?
>
>
>
> On Fri, Mar 4, 2016 at 10:03 AM, Jacques Nadeau 
> wrote:
>
> > Do you think we should back out? It seemed like this could likely cause
> > correctness issues although we may be safe with our name based
> resolution.
> > On Mar 4, 2016 9:56 AM, "Aman Sinha"  wrote:
> >
> > > @jacques, thanks for the heads-up, although it comes too close to the
> > > release date :).  I agree that the plan tests should be targeted to a
> > > narrow scope by specifying the sub-pattern it is supposed to test.
>  That
> > > said, it is a lot easier for the tester to capture the entire plan
> since
> > > he/she may miss an important detail if a sub-plan is captured, so this
> > > requires close interaction with the developer (which depending on
> various
> > > factors may take longer while the test needs to be checked-in).
> > > BTW, Calcite unit tests capture entire plan.  I am not sure if similar
> > > issue has been discussed on Calcite dev list in the past.
> > >
> > > -Aman
> > >
> > > On Fri, Mar 4, 2016 at 4:19 AM, Jacques Nadeau 
> > wrote:
> > >
> > > > I just merged a simple fix that Laurent found for DRILL-4467.
> > > >
> > > > This fix ensures consistent column ordering when pushing projection
> > into
> > > a
> > > > scan and invalid plans. This is good and was causing excessive
> > operators
> > > > and pushdown failure in some cases.
> > > >
> > > > However, this fix removes a number of trivial projects (that were
> > > > previously not detected as such) in a large set of queries. This
> means
> > > that
> > > > a number of plan baselines will need to be updated in the extended
> > > > regression suite to avoid consideration of the trivial project. This
> > > > underscores an issue I see in these tests. In virtually all cases
> I've
> > > > seen, the purpose of the test shouldn't care whether the trivial
> > project
> > > is
> > > > part of the plan. However, the baseline is over-reaching in its
> > > definition,
> > > > including a bunch of nodes irrelevant to the purpose of the test. One
> > > > example might be here:
> > > >
> > > >
> > > >
> > >
> >
> https://github.com/mapr/drill-test-framework/blob/master/framework/resources/Functional/filter/pushdown/plan/q23.res
> > > >
> > > > In this baseline, we're testing that the filter is pushed past the
> > > > aggregation. That means what we really need to be testing is a
> > multiline
> > > > plan pattern of
> > > >
> > > > HashAgg.*Filter.*Scan.*
> > > >
> > > > or better
> > > >
> > > > HashAgg.*Filter\(condition=\[=\(\$0, 10\)\]\).*Scan.*
> > > >
> > > > However, you can see that the actual expected result includes the
> > > > entire structure of the plan (but not the pushed down filter
> > > > condition). This causes the plan to fail now that DRILL-4467 is
> > > > merged. As part of the fixes to these plans, we should really make
> > > > sure that the scope of the baseline is only focused on the relevant
> > > > issue to avoid nominal changes from causing testing false positives.
> > > >
> > > >
> > > >
> > > > --
> > > > Jacques Nadeau
> > > > CTO and Co-Founder, Dremio
> > > >
> > >
> >
>


Re: Heads up on trivial project fix: plan baselines in regression suite need updating

2016-03-04 Thread Chun Chang
I think it depends on which cluster. I saw 8 failures on most of the
clusters, but 44 failures on one cluster. I guess all of them need to be
looked at and modified.

On Fri, Mar 4, 2016 at 10:35 AM, Parth Chandra  wrote:

> I'd be more comfortable if we merged this in after the release. Updating
> the test baselines will delay the release considerably - I would want the
> new baselines to be verified manually which is always time consuming.
> How many tests are affected?
>
>
>
> On Fri, Mar 4, 2016 at 10:03 AM, Jacques Nadeau 
> wrote:
>
> > Do you think we should back out? It seemed like this could likely cause
> > correctness issues although we may be safe with our name based
> resolution.
> > On Mar 4, 2016 9:56 AM, "Aman Sinha"  wrote:
> >
> > > @jacques, thanks for the heads-up, although it comes too close to the
> > > release date :).  I agree that the plan tests should be targeted to a
> > > narrow scope by specifying the sub-pattern it is supposed to test.
>  That
> > > said, it is a lot easier for the tester to capture the entire plan
> since
> > > he/she may miss an important detail if a sub-plan is captured, so this
> > > requires close interaction with the developer (which depending on
> various
> > > factors may take longer while the test needs to be checked-in).
> > > BTW, Calcite unit tests capture entire plan.  I am not sure if similar
> > > issue has been discussed on Calcite dev list in the past.
> > >
> > > -Aman
> > >
> > > On Fri, Mar 4, 2016 at 4:19 AM, Jacques Nadeau 
> > wrote:
> > >
> > > > I just merged a simple fix that Laurent found for DRILL-4467.
> > > >
> > > > This fix ensures consistent column ordering when pushing projection
> > into
> > > a
> > > > scan and invalid plans. This is good and was causing excessive
> > operators
> > > > and pushdown failure in some cases.
> > > >
> > > > However, this fix removes a number of trivial projects (that were
> > > > previously not detected as such) in a large set of queries. This
> means
> > > that
> > > > a number of plan baselines will need to be updated in the extended
> > > > regression suite to avoid consideration of the trivial project. This
> > > > underscores an issue I see in these tests. In virtually all cases
> I've
> > > > seen, the purpose of the test shouldn't care whether the trivial
> > project
> > > is
> > > > part of the plan. However, the baseline is over-reaching in its
> > > definition,
> > > > including a bunch of nodes irrelevant to the purpose of the test. One
> > > > example might be here:
> > > >
> > > >
> > > >
> > >
> >
> https://github.com/mapr/drill-test-framework/blob/master/framework/resources/Functional/filter/pushdown/plan/q23.res
> > > >
> > > > In this baseline, we're testing that the filter is pushed past the
> > > > aggregation. That means what we really need to be testing is a
> > multiline
> > > > plan pattern of
> > > >
> > > > HashAgg.*Filter.*Scan.*
> > > >
> > > > or better
> > > >
> > > > HashAgg.*Filter\(condition=\[=\(\$0, 10\)\]\).*Scan.*
> > > >
> > > > However, you can see that the actual expected result includes the
> > > > entire structure of the plan (but not the pushed down filter
> > > > condition). This causes the plan to fail now that DRILL-4467 is
> > > > merged. As part of the fixes to these plans, we should really make
> > > > sure that the scope of the baseline is only focused on the relevant
> > > > issue to avoid nominal changes from causing testing false positives.
> > > >
> > > >
> > > >
> > > > --
> > > > Jacques Nadeau
> > > > CTO and Co-Founder, Dremio
> > > >
> > >
> >
>


Re: Heads up on trivial project fix: plan baselines in regression suite need updating

2016-03-04 Thread Parth Chandra
I'd be more comfortable if we merged this in after the release. Updating
the test baselines will delay the release considerably - I would want the
new baselines to be verified manually which is always time consuming.
How many tests are affected?



On Fri, Mar 4, 2016 at 10:03 AM, Jacques Nadeau  wrote:

> Do you think we should back out? It seemed like this could likely cause
> correctness issues although we may be safe with our name based resolution.
> On Mar 4, 2016 9:56 AM, "Aman Sinha"  wrote:
>
> > @jacques, thanks for the heads-up, although it comes too close to the
> > release date :).  I agree that the plan tests should be targeted to a
> > narrow scope by specifying the sub-pattern it is supposed to test.   That
> > said, it is a lot easier for the tester to capture the entire plan since
> > he/she may miss an important detail if a sub-plan is captured, so this
> > requires close interaction with the developer (which depending on various
> > factors may take longer while the test needs to be checked-in).
> > BTW, Calcite unit tests capture entire plan.  I am not sure if similar
> > issue has been discussed on Calcite dev list in the past.
> >
> > -Aman
> >
> > On Fri, Mar 4, 2016 at 4:19 AM, Jacques Nadeau 
> wrote:
> >
> > > I just merged a simple fix that Laurent found for DRILL-4467.
> > >
> > > This fix ensures consistent column ordering when pushing projection
> into
> > a
> > > scan and invalid plans. This is good and was causing excessive
> operators
> > > and pushdown failure in some cases.
> > >
> > > However, this fix removes a number of trivial projects (that were
> > > previously not detected as such) in a large set of queries. This means
> > that
> > > a number of plan baselines will need to be updated in the extended
> > > regression suite to avoid consideration of the trivial project. This
> > > underscores an issue I see in these tests. In virtually all cases I've
> > > seen, the purpose of the test shouldn't care whether the trivial
> project
> > is
> > > part of the plan. However, the baseline is over-reaching in its
> > definition,
> > > including a bunch of nodes irrelevant to the purpose of the test. One
> > > example might be here:
> > >
> > >
> > >
> >
> https://github.com/mapr/drill-test-framework/blob/master/framework/resources/Functional/filter/pushdown/plan/q23.res
> > >
> > > In this baseline, we're testing that the filter is pushed past the
> > > aggregation. That means what we really need to be testing is a
> multiline
> > > plan pattern of
> > >
> > > HashAgg.*Filter.*Scan.*
> > >
> > > or better
> > >
> > > HashAgg.*Filter\(condition=\[=\(\$0, 10\)\]\).*Scan.*
> > >
> > > However, you can see that the actual expected result includes the
> > > entire structure of the plan (but not the pushed down filter
> > > condition). This causes the plan to fail now that DRILL-4467 is
> > > merged. As part of the fixes to these plans, we should really make
> > > sure that the scope of the baseline is only focused on the relevant
> > > issue to avoid nominal changes from causing testing false positives.
> > >
> > >
> > >
> > > --
> > > Jacques Nadeau
> > > CTO and Co-Founder, Dremio
> > >
> >
>


[GitHub] drill pull request: DRILL-4281: Support authorized users to delega...

2016-03-04 Thread kbotzum
Github user kbotzum commented on the pull request:

https://github.com/apache/drill/pull/400#issuecomment-192389699
  
Sorry to be dense, but what are the Hadoop terms? In my understanding 
Hadoop uses the term impersonation to mean what I'm calling outbound 
impersonation. I'm not aware of a Hadoop term for inbound impersonation - I 
think only HS2 supports inbound impersonation because of the Hue use case.

I think I'm using both the Hadoop term and the more general term I'm just 
trying to disambiguate.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


Re: Heads up on trivial project fix: plan baselines in regression suite need updating

2016-03-04 Thread Jacques Nadeau
Do you think we should back out? It seemed like this could likely cause
correctness issues although we may be safe with our name based resolution.
On Mar 4, 2016 9:56 AM, "Aman Sinha"  wrote:

> @jacques, thanks for the heads-up, although it comes too close to the
> release date :).  I agree that the plan tests should be targeted to a
> narrow scope by specifying the sub-pattern it is supposed to test.   That
> said, it is a lot easier for the tester to capture the entire plan since
> he/she may miss an important detail if a sub-plan is captured, so this
> requires close interaction with the developer (which depending on various
> factors may take longer while the test needs to be checked-in).
> BTW, Calcite unit tests capture entire plan.  I am not sure if similar
> issue has been discussed on Calcite dev list in the past.
>
> -Aman
>
> On Fri, Mar 4, 2016 at 4:19 AM, Jacques Nadeau  wrote:
>
> > I just merged a simple fix that Laurent found for DRILL-4467.
> >
> > This fix ensures consistent column ordering when pushing projection into
> a
> > scan and invalid plans. This is good and was causing excessive operators
> > and pushdown failure in some cases.
> >
> > However, this fix removes a number of trivial projects (that were
> > previously not detected as such) in a large set of queries. This means
> that
> > a number of plan baselines will need to be updated in the extended
> > regression suite to avoid consideration of the trivial project. This
> > underscores an issue I see in these tests. In virtually all cases I've
> > seen, the purpose of the test shouldn't care whether the trivial project
> is
> > part of the plan. However, the baseline is over-reaching in its
> definition,
> > including a bunch of nodes irrelevant to the purpose of the test. One
> > example might be here:
> >
> >
> >
> https://github.com/mapr/drill-test-framework/blob/master/framework/resources/Functional/filter/pushdown/plan/q23.res
> >
> > In this baseline, we're testing that the filter is pushed past the
> > aggregation. That means what we really need to be testing is a multiline
> > plan pattern of
> >
> > HashAgg.*Filter.*Scan.*
> >
> > or better
> >
> > HashAgg.*Filter\(condition=\[=\(\$0, 10\)\]\).*Scan.*
> >
> > However, you can see that the actual expected result includes the
> > entire structure of the plan (but not the pushed down filter
> > condition). This causes the plan to fail now that DRILL-4467 is
> > merged. As part of the fixes to these plans, we should really make
> > sure that the scope of the baseline is only focused on the relevant
> > issue to avoid nominal changes from causing testing false positives.
> >
> >
> >
> > --
> > Jacques Nadeau
> > CTO and Co-Founder, Dremio
> >
>


[GitHub] drill pull request: DRILL-4281: Support authorized users to delega...

2016-03-04 Thread NeerajaRentachintala
Github user NeerajaRentachintala commented on the pull request:

https://github.com/apache/drill/pull/400#issuecomment-192382894
  
Folks
Why do we need to invent new terms here. What Drill is trying to here is a
very common use case for other tools in Hadoop eco system as well.
Can we just pick Hadoop terminology and move on.


-Neeraja

On Fri, Mar 4, 2016 at 7:23 AM, Keys Botzum 
wrote:

> One suggestion. The terms I use when discussing the concepts with
> customers are Drill inbound impersonation and Drill outbound 
impersonation.
> That is absolutely clear. It does imply implementation to a bit but I 
think
> makes intent very clear. You can then derive other terms from that such as
> "can_impersonate_inbound" and
> "principals_authorized_for_inbound_impersonation." Yea, really wording but
> very clear. We can of course shorten to "princ_auth_inbound_imper" in 
code.
>
> Would that make sense?
>
> By the way the use of the termed chained in the context of security
> usually implied something like chained delegation which makes me think of 
a
> security identity that is both end to end secure like kerberos and 
includes
> all identities that were passed through. That's not this so I'd avoid use
> of the term chained.
>
> —
> Reply to this email directly or view it on GitHub
> .
>



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


Re: Heads up on trivial project fix: plan baselines in regression suite need updating

2016-03-04 Thread Aman Sinha
@jacques, thanks for the heads-up, although it comes too close to the
release date :).  I agree that the plan tests should be targeted to a
narrow scope by specifying the sub-pattern it is supposed to test.   That
said, it is a lot easier for the tester to capture the entire plan since
he/she may miss an important detail if a sub-plan is captured, so this
requires close interaction with the developer (which depending on various
factors may take longer while the test needs to be checked-in).
BTW, Calcite unit tests capture entire plan.  I am not sure if similar
issue has been discussed on Calcite dev list in the past.

-Aman

On Fri, Mar 4, 2016 at 4:19 AM, Jacques Nadeau  wrote:

> I just merged a simple fix that Laurent found for DRILL-4467.
>
> This fix ensures consistent column ordering when pushing projection into a
> scan and invalid plans. This is good and was causing excessive operators
> and pushdown failure in some cases.
>
> However, this fix removes a number of trivial projects (that were
> previously not detected as such) in a large set of queries. This means that
> a number of plan baselines will need to be updated in the extended
> regression suite to avoid consideration of the trivial project. This
> underscores an issue I see in these tests. In virtually all cases I've
> seen, the purpose of the test shouldn't care whether the trivial project is
> part of the plan. However, the baseline is over-reaching in its definition,
> including a bunch of nodes irrelevant to the purpose of the test. One
> example might be here:
>
>
> https://github.com/mapr/drill-test-framework/blob/master/framework/resources/Functional/filter/pushdown/plan/q23.res
>
> In this baseline, we're testing that the filter is pushed past the
> aggregation. That means what we really need to be testing is a multiline
> plan pattern of
>
> HashAgg.*Filter.*Scan.*
>
> or better
>
> HashAgg.*Filter\(condition=\[=\(\$0, 10\)\]\).*Scan.*
>
> However, you can see that the actual expected result includes the
> entire structure of the plan (but not the pushed down filter
> condition). This causes the plan to fail now that DRILL-4467 is
> merged. As part of the fixes to these plans, we should really make
> sure that the scope of the baseline is only focused on the relevant
> issue to avoid nominal changes from causing testing false positives.
>
>
>
> --
> Jacques Nadeau
> CTO and Co-Founder, Dremio
>


[GitHub] drill pull request: DRILL-4449: Wrong results when metadata cache ...

2016-03-04 Thread amansinha100
Github user amansinha100 commented on the pull request:

https://github.com/apache/drill/pull/392#issuecomment-192361296
  
+1.   
I understand based on conversation with @adeneche (before he went on 
vacation) that he could not easily repro through a unit test.  However, 
functional QA test coverage will be added. 


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request: DRILL-4446: Support mandatory work assignment ...

2016-03-04 Thread vkorukanti
Github user vkorukanti commented on the pull request:

https://github.com/apache/drill/pull/403#issuecomment-192332765
  
Patch update.
+ Address review comments including unittest for 
HardAffinityFragmentParallelizer
+ Fix Limit0 issue when one of the GroupScans in limit0 query has hard 
assignment requirements (in which case we can't run the entire query in one 
fragment). @sudheeshkatkam Can you review this change for any perf 
implications? I tested limit0 queries on parquet table containing more than 1k 
files. I didn't see any regressions.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] drill pull request: DRILL-4281: Support authorized users to delega...

2016-03-04 Thread kbotzum
Github user kbotzum commented on the pull request:

https://github.com/apache/drill/pull/400#issuecomment-192320077
  
One suggestion. The terms I use when discussing the concepts with customers 
are Drill inbound impersonation and Drill outbound impersonation. That is 
absolutely clear. It does imply implementation to a bit but I think makes 
intent very clear.  You can then derive other terms from that such as 
"can_impersonate_inbound" and 
"principals_authorized_for_inbound_impersonation." Yea, really wording but very 
clear. We can of course shorten to "princ_auth_inbound_imper" in code.

Would that make sense?

By the way the use of the termed chained in the context of security usually 
implied something like chained delegation which makes me think of a security 
identity that is both end to end secure like kerberos and includes all 
identities that were passed through. That's not this so I'd avoid use of the 
term chained.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


Heads up on trivial project fix: plan baselines in regression suite need updating

2016-03-04 Thread Jacques Nadeau
I just merged a simple fix that Laurent found for DRILL-4467.

This fix ensures consistent column ordering when pushing projection into a
scan and invalid plans. This is good and was causing excessive operators
and pushdown failure in some cases.

However, this fix removes a number of trivial projects (that were
previously not detected as such) in a large set of queries. This means that
a number of plan baselines will need to be updated in the extended
regression suite to avoid consideration of the trivial project. This
underscores an issue I see in these tests. In virtually all cases I've
seen, the purpose of the test shouldn't care whether the trivial project is
part of the plan. However, the baseline is over-reaching in its definition,
including a bunch of nodes irrelevant to the purpose of the test. One
example might be here:

https://github.com/mapr/drill-test-framework/blob/master/framework/resources/Functional/filter/pushdown/plan/q23.res

In this baseline, we're testing that the filter is pushed past the
aggregation. That means what we really need to be testing is a multiline
plan pattern of

HashAgg.*Filter.*Scan.*

or better

HashAgg.*Filter\(condition=\[=\(\$0, 10\)\]\).*Scan.*

However, you can see that the actual expected result includes the
entire structure of the plan (but not the pushed down filter
condition). This causes the plan to fail now that DRILL-4467 is
merged. As part of the fixes to these plans, we should really make
sure that the scope of the baseline is only focused on the relevant
issue to avoid nominal changes from causing testing false positives.



--
Jacques Nadeau
CTO and Co-Founder, Dremio


[jira] [Resolved] (DRILL-4467) Invalid projection created using PrelUtil.getColumns

2016-03-04 Thread Jacques Nadeau (JIRA)

 [ 
https://issues.apache.org/jira/browse/DRILL-4467?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jacques Nadeau resolved DRILL-4467.
---
Resolution: Fixed

Fixed with edea8b1cf4e5476d803e8b87c79e08e8c3263e04

> Invalid projection created using PrelUtil.getColumns
> 
>
> Key: DRILL-4467
> URL: https://issues.apache.org/jira/browse/DRILL-4467
> Project: Apache Drill
>  Issue Type: Bug
>Reporter: Laurent Goujon
>Assignee: Jacques Nadeau
>Priority: Critical
> Fix For: 1.6.0
>
>
> In {{DrillPushProjIntoScan}}, a new scan and a new projection are created 
> using {{PrelUtil#getColumn(RelDataType, List)}}.
> The returned {{ProjectPushInfo}} instance has several fields, one of them is 
> {{desiredFields}} which is the list of projected fields. There's one instance 
> per {{RexNode}} but because instances were initially added to a set, they 
> might not be in the same order as the order they were created.
> The issue happens in the following code:
> {code:java}
>   List newProjects = Lists.newArrayList();
>   for (RexNode n : proj.getChildExps()) {
> newProjects.add(n.accept(columnInfo.getInputRewriter()));
>   }
> {code}
> This code creates a new list of projects out of the initial ones, by mapping 
> the indices from the old projects to the new projects, but the indices of the 
> new RexNode instances might be out of order (because of the ordering of 
> desiredFields). And if indices are out of order, the check 
> {{ProjectRemoveRule.isTrivial(newProj)}} will fail.
> My guess is that desiredFields ordering should be preserved when instances 
> are added, to satisfy the condition above.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Created] (DRILL-4473) Removing trivial projects reveals bugs in handling of nonexistent columns in StreamingAggregate

2016-03-04 Thread Jacques Nadeau (JIRA)
Jacques Nadeau created DRILL-4473:
-

 Summary: Removing trivial projects reveals bugs in handling of 
nonexistent columns in StreamingAggregate
 Key: DRILL-4473
 URL: https://issues.apache.org/jira/browse/DRILL-4473
 Project: Apache Drill
  Issue Type: Bug
Reporter: Jacques Nadeau


We see a couple unit test failures in working with nonexistent columns once 
DRILL-4467 is fixed. This is because trivial projects no longer protect 
StreamingAggregate from non-existent columns. This is likely due to an 
incorrect check before throwing a Unsupported error. An unknown/ANY type should 
probably be allowed in the case of using sum/max/stddev

{code:title=Plan before DRILL-4467}
VOLCANO:Physical Planning (71ms):
ScreenPrel: rowcount = 1.0, cumulative cost = {464.1 rows, 2375.1 cpu, 0.0 io, 
0.0 network, 0.0 memory}, id = 185
  ProjectPrel(col1=[$0], col2=[$1], col3=[$2], col4=[$3], col5=[$4]): rowcount 
= 1.0, cumulative cost = {464.0 rows, 2375.0 cpu, 0.0 io, 0.0 network, 0.0 
memory}, id = 184
StreamAggPrel(group=[{}], col1=[SUM($0)], col2=[SUM($1)], col3=[SUM($2)], 
col4=[SUM($3)], col5=[SUM($4)]): rowcount = 1.0, cumulative cost = {464.0 rows, 
2375.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 183
  LimitPrel(offset=[0], fetch=[0]): rowcount = 1.0, cumulative cost = 
{463.0 rows, 2315.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 182
ProjectPrel(int_col=[$0], bigint_col=[$3], float4_col=[$4], 
float8_col=[$1], interval_year_col=[$2]): rowcount = 463.0, cumulative cost = 
{463.0 rows, 2315.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 181
  ScanPrel(groupscan=[EasyGroupScan 
[selectionRoot=classpath:/employee.json, numFiles=1, columns=[`int_col`, 
`bigint_col`, `float4_col`, `float8_col`, `interval_year_col`], 
files=[classpath:/employee.json]]]): rowcount = 463.0, cumulative cost = {463.0 
rows, 2315.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 160
{code}

{code:title=Plan after DRILL-4467}
VOLCANO:Physical Planning (63ms):
ScreenPrel: rowcount = 1.0, cumulative cost = {464.1 rows, 2375.1 cpu, 0.0 io, 
0.0 network, 0.0 memory}, id = 151
  ProjectPrel(col1=[$0], col2=[$1], col3=[$2], col4=[$3], col5=[$4]): rowcount 
= 1.0, cumulative cost = {464.0 rows, 2375.0 cpu, 0.0 io, 0.0 network, 0.0 
memory}, id = 150
StreamAggPrel(group=[{}], col1=[SUM($0)], col2=[SUM($1)], col3=[SUM($2)], 
col4=[SUM($3)], col5=[SUM($4)]): rowcount = 1.0, cumulative cost = {464.0 rows, 
2375.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 149
  LimitPrel(offset=[0], fetch=[0]): rowcount = 1.0, cumulative cost = 
{463.0 rows, 2315.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 148
ScanPrel(groupscan=[EasyGroupScan 
[selectionRoot=classpath:/employee.json, numFiles=1, columns=[`int_col`, 
`bigint_col`, `float4_col`, `float8_col`, `interval_year_col`], 
files=[classpath:/employee.json]]]): rowcount = 463.0, cumulative cost = {463.0 
rows, 2315.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 141


Tests disabled referring to this bug in TestAggregateFunctions show multiple 
examples of this behavior.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)


[jira] [Created] (DRILL-4472) Pushing Filter past Union All fails: DRILL-3257 regressed DRILL-2746 but unit test update break test goal

2016-03-04 Thread Jacques Nadeau (JIRA)
Jacques Nadeau created DRILL-4472:
-

 Summary: Pushing Filter past Union All fails: DRILL-3257 regressed 
DRILL-2746 but unit test update break test goal
 Key: DRILL-4472
 URL: https://issues.apache.org/jira/browse/DRILL-4472
 Project: Apache Drill
  Issue Type: Bug
  Components: Query Planning & Optimization
Reporter: Jacques Nadeau
Assignee: Aman Sinha


While reviewing DRILL-4467, I discovered this test. 

https://github.com/apache/drill/blame/master/exec/java-exec/src/test/java/org/apache/drill/TestUnionAll.java#L560

As you can see, the test is checking that test name confirms that filter is 
pushed below union all. However, as you can see, the expected result in 
DRILL-3257 was updated to a plan which doesn't push the in clause below the 
filter. I'm disabling the test since 4467 happens to remove what becomes a 
trivial project. However, we really should fix the core problem (a regression 
of DRILL-2746.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)