illiabarbashov-sketch commented on code in PR #6456:
URL: https://github.com/apache/hive/pull/6456#discussion_r3350244446
##########
ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java:
##########
@@ -222,6 +240,21 @@ private Set<Integer> getFetchInputAtCloseList() {
return retval;
}
+ private void initSkewJoinNames(int maxAlias) {
+ joinSkewKeyColumns = new String[maxAlias];
+ joinSkewTableAliases = new String[maxAlias];
+
+ String[] descKeyNames = conf.getSkewJoinKeyNames();
+ String[] descTableAliases = conf.getSkewJoinTableAliases();
+
+ for (int pos = 0; pos < maxAlias; pos++) {
+ joinSkewKeyColumns[pos] = (descKeyNames != null && pos <
descKeyNames.length
Review Comment:
updated
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java:
##########
@@ -626,6 +628,123 @@ private void convertJoinSMBJoin(JoinOperator joinOp,
OptimizeTezProcContext cont
}
}
mergeJoinOp.cloneOriginalParentsList(mergeJoinOp.getParentOperators());
+
+ // Resolve original table names and key column names from the compile-time
+ // operator tree only when skew monitoring is actually enabled
+ // (hive.merge.join.skew.threshold > 0). Tree traversal is skipped
+ // entirely when the feature is off so there is no overhead for the
+ // common case.
+ if (HiveConf.getLongVar(context.conf,
+ HiveConf.ConfVars.HIVE_MERGE_JOIN_SKEW_THRESHOLD) > 0) {
+ populateSkewJoinNames(joinOp, mergeJoinOp);
+ }
+ }
+
+ /**
+ * Resolves the original table name (or query alias) and join key column
names
+ * for each join input position at <em>compile time</em>, while the full
operator
+ * tree is still available. The results are stored as non-transient fields
in
+ * {@link CommonMergeJoinDesc} so they survive plan serialization to the Tez
task
+ * and can be read by the skew-join monitor at runtime.
+ *
+ */
+ private void populateSkewJoinNames(JoinOperator joinOp,
+ CommonMergeJoinOperator mergeJoinOp) {
+ List<Operator<? extends OperatorDesc>> parents =
joinOp.getParentOperators();
+ if (parents == null || parents.isEmpty()) {
+ return;
+ }
+
+ int numPositions = parents.size();
+ String[] keyNames = new String[numPositions];
+ String[] tableAliases = new String[numPositions];
+
+ for (int pos = 0; pos < numPositions; pos++) {
+ Operator<? extends OperatorDesc> parent = parents.get(pos);
+ if (parent == null) {
+ keyNames[pos] = "unknown";
Review Comment:
updated
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]