[GitHub] [hive] zabetak commented on a change in pull request #1178: [DRAFT] Support for Prepare/Execute statement in Hive

GitBox Fri, 26 Jun 2020 07:39:40 -0700


zabetak commented on a change in pull request #1178:
URL: https://github.com/apache/hive/pull/1178#discussion_r446201195




##########
File path: ql/src/java/org/apache/hadoop/hive/ql/Compiler.java
##########
@@ -72,19 +65,52 @@
   private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME);
   private static final LogHelper CONSOLE = new LogHelper(LOG);
 
-  private final Context context;
-  private final DriverContext driverContext;
+  protected final Context context;
+  protected final DriverContext driverContext;
   private final DriverState driverState;
   private final PerfLogger perfLogger = SessionState.getPerfLogger();
 
   private ASTNode tree;
 
+  /*
+   * Compiler needs to know if given statement is PREPARE/EXECUTE or otherwise
+   * to orchestrate the planning.
+   * e.g. if plan should be stored or if parameter binding should be done.

Review comment:
       I was thinking that maybe we could store every plan and not only those 
that are explicitly prepared. The same query may appear multiple times even if 
it is not part of a prepared statement. 
   
   Parameter binding could be determined by the presence/absence of parameters 
so maybe we don't really need the enum.

##########
File path: ql/src/java/org/apache/hadoop/hive/ql/PreparePlanUtils.java
##########
@@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql;
+
+import java.io.*;
+import java.util.*;
+
+import org.apache.hadoop.hive.common.type.*;
+import org.apache.hadoop.hive.conf.*;
+import org.apache.hadoop.hive.ql.exec.*;
+import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils;
+import org.apache.hadoop.hive.ql.exec.tez.*;
+import org.apache.hadoop.hive.ql.exec.vector.*;
+import org.apache.hadoop.hive.ql.hooks.HookUtils;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
+import org.apache.hadoop.hive.ql.lockmgr.LockException;
+import org.apache.hadoop.hive.ql.log.PerfLogger;
+import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.optimizer.physical.*;
+import org.apache.hadoop.hive.ql.parse.*;
+import org.apache.hadoop.hive.ql.parse.type.*;
+import org.apache.hadoop.hive.ql.plan.*;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
+import 
org.apache.hadoop.hive.ql.security.authorization.command.CommandAuthorizer;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+import org.apache.hadoop.hive.ql.udf.generic.*;
+import org.apache.hadoop.hive.serde.*;
+import org.apache.hadoop.hive.serde2.typeinfo.*;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.thrift.TException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.ImmutableMap;
+
+/**
+ * The compiler compiles the command, by creating a QueryPlan from a String 
command.
+ * Also opens a transaction if necessary.
+ */
+public class PreparePlanUtils {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(PreparePlanUtils.class);
+
+  private static Set<Operator<?>> getAllFetchOperators(FetchTask task) {
+    if (task.getWork().getSource() == null)  {
+      return Collections.EMPTY_SET;
+    }
+    Set<Operator<?>> operatorList =  new HashSet<>();
+    operatorList.add(task.getWork().getSource());
+    return 
AnnotateRunTimeStatsOptimizer.getAllOperatorsForSimpleFetch(operatorList);
+  }
+
+  /*
+   * Retrieve name for PREPARE/EXECUTE statement
+   * Make sure the tree is either EXECUTE or PREPARE
+   */
+  protected static String getPrepareStatementName(ASTNode tree) {
+    if (tree.getType() == HiveParser.TOK_EXPLAIN) {
+      tree = (ASTNode)tree.getChildren().get(0);
+    }
+    assert (tree.getType() == HiveParser.TOK_PREPARE
+        || tree.getType() == HiveParser.TOK_EXECUTE);
+    return ((ASTNode)tree.getChildren().get(1)).getText();
+  }
+
+  public static void bindDynamicParams(QueryPlan plan, Map<Integer, String> 
parameterMap) {

Review comment:
       In order to avoid modifying the plan and adding per operator specific 
code below maybe we could use a UDF when generating the plan with dynamic 
parameters that performs a lookup in the map to obtain the correct value. Maybe 
other parts of the code could also be simplified like this.

##########
File path: ql/src/java/org/apache/hadoop/hive/ql/Compiler.java
##########
@@ -72,19 +65,52 @@
   private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME);
   private static final LogHelper CONSOLE = new LogHelper(LOG);
 
-  private final Context context;
-  private final DriverContext driverContext;
+  protected final Context context;
+  protected final DriverContext driverContext;
   private final DriverState driverState;
   private final PerfLogger perfLogger = SessionState.getPerfLogger();
 
   private ASTNode tree;
 
+  /*
+   * Compiler needs to know if given statement is PREPARE/EXECUTE or otherwise
+   * to orchestrate the planning.
+   * e.g. if plan should be stored or if parameter binding should be done.

Review comment:
       I am thinking something in the lines of Postgres plan_cache_mode but 
maybe less ambitious to begin with.
   
   _By default (that is, when plan_cache_mode is set to auto), the server will 
automatically choose whether to use a generic or custom plan for a prepared 
statement that has parameters. The current rule for this is that the first five 
executions are done with custom plans and the average estimated cost of those 
plans is calculated. Then a generic plan is created and its estimated cost is 
compared to the average custom-plan cost. Subsequent executions use the generic 
plan if its cost is not so much higher than the average custom-plan cost as to 
make repeated replanning seem preferable._
   
   In other words use the SQL string or something equivalent as the key for the 
cache. 

##########
File path: ql/src/test/queries/clientpositive/prepare_plan.q
##########
@@ -0,0 +1,49 @@
+--! qt:dataset:src
+--! qt:dataset:alltypesorc
+
+set hive.explain.user=false;
+--explain select * from src where key > '0' limit 10;
+set hive.vectorized.execution.enabled=false;
+
+
+explain extended prepare pcount as select count(*) from src where key > $1;
+prepare pcount as select count(*) from src where key > $1;
+execute pcount (1,200);
+select count(*) from src where key > '200';
+
+-- single param
+explain extended prepare p1 as select * from src where key > $1 order by key 
limit 10;
+explain extended select * from src where key > '200' order by key limit 10;
+prepare p1 as select * from src where key > $1 order by key limit 10;
+
+execute p1 (1,200);
+select * from src where key > '200' order by key limit 10;

Review comment:
       I don't think it is necessary to execute the same query twice if it is 
just to show that the results are the same. If the results are verified when 
.out is committed I think it's sufficient. 

##########
File path: parser/src/java/org/apache/hadoop/hive/ql/parse/HiveLexerParent.g
##########
@@ -471,6 +472,11 @@ Number
     (Digit)+ ( DOT (Digit)* (Exponent)? | Exponent)?
     ;
 
+ParamLiteral
+    :
+    (DOLLAR) (Digit)+ { setText(getText().substring(1, getText().length())); }

Review comment:
       DBMS use various notations for dynamic parameters when it comes to 
PREPARE/EXECUTE syntax but at JDBC level I think everybody accept `?`. The SQL 
standard and JDBC specification use `?` for parameters so to keep things 
uniform I think it makes sense to also use this notation.  
   

##########
File path: ql/src/test/queries/clientpositive/prepare_plan.q
##########
@@ -0,0 +1,49 @@
+--! qt:dataset:src
+--! qt:dataset:alltypesorc
+
+set hive.explain.user=false;
+--explain select * from src where key > '0' limit 10;
+set hive.vectorized.execution.enabled=false;
+
+
+explain extended prepare pcount as select count(*) from src where key > $1;
+prepare pcount as select count(*) from src where key > $1;
+execute pcount (1,200);
+select count(*) from src where key > '200';
+
+-- single param
+explain extended prepare p1 as select * from src where key > $1 order by key 
limit 10;
+explain extended select * from src where key > '200' order by key limit 10;
+prepare p1 as select * from src where key > $1 order by key limit 10;
+
+execute p1 (1,200);
+select * from src where key > '200' order by key limit 10;
+
+-- same query, different param
+execute p1 (1,0);
+select * from src where key > '0' order by key limit 10;
+
+-- multiple parameters
+explain prepare p2 as select min(ctinyint), max(cbigint) from alltypesorc 
where cint > ($1 + $2 + $3) group by ctinyint;
+prepare p2 as select min(ctinyint), max(cbigint) from alltypesorc where cint > 
($1 + $2 + $3) group by ctinyint;
+
+execute p2 ( 1,0, 2,1, 3, 2);
+select min(ctinyint), max(cbigint) from alltypesorc where cint > (0 + 1 + 2) 
group by ctinyint;

Review comment:
       Since we are executing the query and verifying the results we may want 
to add an ORDER BY clause to guarantee that the order is not gonna change. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [hive] zabetak commented on a change in pull request #1178: [DRAFT] Support for Prepare/Execute statement in Hive

Reply via email to