[ 
https://issues.apache.org/jira/browse/IMPALA-9788?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17118950#comment-17118950
 ] 

Tim Armstrong commented on IMPALA-9788:
---------------------------------------

I think this "couldn't resolve slot descriptor" error is probably also related 
somehow:
{noformat}
I0528 17:53:40.480123   557 coordinator.cc:463] 
8448c72470b5edb8:c13e264300000000] starting execution on 2 backends for 
query_id=8448c72470b5edb8:c13e264300000000
I0528 17:53:40.480386    78 control-service.cc:153] 
8448c72470b5edb8:c13e264300000000] ExecQueryFInstances(): 
query_id=8448c72470b5edb8:c13e264300000000 coord=192.168.16.9:22000 #instances=4
I0528 17:53:40.480388    79 control-service.cc:153] 
8448c72470b5edb8:c13e264300000000] ExecQueryFInstances(): 
query_id=8448c72470b5edb8:c13e264300000000 coord=192.168.16.9:22000 #instances=1
I0528 17:53:40.480620   559 slot-ref.cc:85] 8448c72470b5edb8:c13e264300000000] 
couldn't resolve slot descriptor 0
I0528 17:53:40.480734   557 coordinator.cc:520] 
8448c72470b5edb8:c13e264300000000] started execution on 2 backends for 
query_id=8448c72470b5edb8:c13e264300000000
I0528 17:53:40.480782   561 query-state.cc:747] 
8448c72470b5edb8:c13e264300000000] Executing instance. 
instance_id=8448c72470b5edb8:c13e264300000000 fragment_idx=0 
per_fragment_instance_idx=0 coord_state_idx=1 #in-flight=1
I0528 17:53:40.481376   559 status.cc:129] 8448c72470b5edb8:c13e264300000000] 
couldn't resolve slot descriptor 0
    @           0xc35229
    @          0x1615069
    @          0x1600048
    @          0x160256d
    @          0x1594226
    @          0x15916b4
    @          0x14ba682
    @          0x14ba7c5
    @          0x119d21a
    @          0x119e47b
    @          0x1185f7a
    @          0x11a6c1f
    @          0x1442bba
    @          0x1443a89
    @          0x1c2fbf9
    @     0x7facce8176da
    @     0x7faccb37788e
E0528 17:53:40.481597   559 exec-node.cc:110] 
8448c72470b5edb8:c13e264300000000] Could not construct plan tree:
TPlan {
  01: nodes (list) = list<struct>[2] {
    [0] = TPlanNode {
      01: node_id (i32) = 3,
      02: node_type (i32) = 3,
      03: num_children (i32) = 1,
      04: limit (i64) = -1,
      05: row_tuples (list) = list<i32>[1] {
        [0] = 1,
      },
      06: nullable_tuples (list) = list<bool>[1] {
        [0] = false,
      },
      08: disable_codegen (bool) = true,
      15: agg_node (struct) = TAggregationNode {
        01: aggregators (list) = list<struct>[1] {
          [0] = TAggregator {
            02: aggregate_functions (list) = list<struct>[1] {
              [0] = TExpr {
                01: nodes (list) = list<struct>[2] {
                  [0] = TExprNode {
                    01: node_type (i32) = 15,
                    02: type (struct) = TColumnType {
                      01: types (list) = list<struct>[1] {
                        [0] = TTypeNode {
                          01: type (i32) = 0,
                          02: scalar_type (struct) = TScalarType {
                            01: type (i32) = 6,
                          },
                        },
                      },
                    },
                    03: num_children (i32) = 1,
                    04: is_constant (bool) = false,
                    05: fn (struct) = TFunction {
                      01: name (struct) = TFunctionName {
                        01: db_name (string) = "_impala_builtins",
                        02: function_name (string) = "count",
                      },
                      02: binary_type (i32) = 0,
                      03: arg_types (list) = list<struct>[0] {
                      },
                      04: ret_type (struct) = TColumnType {
                        01: types (list) = list<struct>[1] {
                          [0] = TTypeNode {
                            01: type (i32) = 0,
                            02: scalar_type (struct) = TScalarType {
                              01: type (i32) = 6,
                            },
                          },
                        },
                      },
                      05: has_var_args (bool) = false,
                      07: signature (string) = "count()",
                      10: aggregate_fn (struct) = TAggregateFunction {
                        01: intermediate_type (struct) = TColumnType {
                          01: types (list) = list<struct>[1] {
                            [0] = TTypeNode {
                              01: type (i32) = 0,
                              02: scalar_type (struct) = TScalarType {
                                01: type (i32) = 6,
                              },
                            },
                          },
                        },
                        02: is_analytic_only_fn (bool) = false,
                        03: update_fn_symbol (string) = 
"_ZN6impala18AggregateFunctions15CountStarUpdateEPN10impala_udf15FunctionContextEPNS1_9BigIntValE",
                        04: init_fn_symbol (string) = 
"_ZN6impala18AggregateFunctions8InitZeroIN10impala_udf9BigIntValEEEvPNS2_15FunctionContextEPT_",
                        06: merge_fn_symbol (string) = 
"_ZN6impala18AggregateFunctions10CountMergeEPN10impala_udf15FunctionContextERKNS1_9BigIntValEPS4_",
                        09: remove_fn_symbol (string) = 
"_ZN6impala18AggregateFunctions15CountStarRemoveEPN10impala_udf15FunctionContextEPNS1_9BigIntValE",
                        10: ignores_distinct (bool) = false,
                      },
                      11: is_persistent (bool) = true,
                      12: last_modified_time (i64) = -1,
                    },
                    19: agg_expr (struct) = TAggregateExpr {
                      01: is_merge_agg (bool) = true,
                      02: arg_types (list) = list<struct>[1] {
                        [0] = TColumnType {
                          01: types (list) = list<struct>[1] {
                            [0] = TTypeNode {
                              01: type (i32) = 0,
                              02: scalar_type (struct) = TScalarType {
                                01: type (i32) = 6,
                              },
                            },
                          },
                        },
                      },
                    },
                  },
                  [1] = TExprNode {
                    01: node_type (i32) = 12,
                    02: type (struct) = TColumnType {
                      01: types (list) = list<struct>[1] {
                        [0] = TTypeNode {
                          01: type (i32) = 0,
                          02: scalar_type (struct) = TScalarType {
                            01: type (i32) = 6,
                          },
                        },
                      },
                    },
                    03: num_children (i32) = 0,
                    04: is_constant (bool) = false,
                    15: slot_ref (struct) = TSlotRef {
                      01: slot_id (i32) = 0,
                    },
                  },
                },
              },
            },
            03: intermediate_tuple_id (i32) = 1,
            04: output_tuple_id (i32) = 1,
            05: need_finalize (bool) = true,
            06: use_streaming_preaggregation (bool) = false,
            07: resource_profile (struct) = TBackendResourceProfile {
              01: min_reservation (i64) = 0,
              02: max_reservation (i64) = 9223372036854775807,
              03: spillable_buffer_size (i64) = 2097152,
              04: max_row_buffer_size (i64) = 2097152,
            },
          },
        },
        02: estimated_input_cardinality (i64) = 1,
        03: replicate_input (bool) = false,
      },
      21: label (string) = "03:AGGREGATE",
      22: label_detail (string) = "FINALIZE",
      23: estimated_stats (struct) = TExecStats {
        03: cardinality (i64) = 1,
        04: memory_used (i64) = 10485760,
      },
      25: resource_profile (struct) = TBackendResourceProfile {
        01: min_reservation (i64) = 0,
        02: max_reservation (i64) = 9223372036854775807,
        03: spillable_buffer_size (i64) = 2097152,
        04: max_row_buffer_size (i64) = 2097152,
      },
      27: pipelines (list) = list<struct>[2] {
        [0] = TPipelineMembership {
          01: pipe_id (i32) = 3,
          02: height (i32) = 0,
          03: phase (i32) = 3,
        },
        [1] = TPipelineMembership {
          01: pipe_id (i32) = 1,
          02: height (i32) = 2,
          03: phase (i32) = 2,
        },
      },
    },
    [1] = TPlanNode {
      01: node_id (i32) = 2,
      02: node_type (i32) = 6,
      03: num_children (i32) = 0,
      04: limit (i64) = -1,
      05: row_tuples (list) = list<i32>[1] {
        [0] = 1,
      },
      06: nullable_tuples (list) = list<bool>[1] {
        [0] = false,
      },
      08: disable_codegen (bool) = true,
      18: exchange_node (struct) = TExchangeNode {
        01: input_row_tuples (list) = list<i32>[1] {
          [0] = 1,
        },
      },
      21: label (string) = "02:EXCHANGE",
      22: label_detail (string) = "UNPARTITIONED",
      23: estimated_stats (struct) = TExecStats {
        03: cardinality (i64) = 1,
        04: memory_used (i64) = 16384,
      },
      25: resource_profile (struct) = TBackendResourceProfile {
        01: min_reservation (i64) = 0,
        02: max_reservation (i64) = 0,
      },
      27: pipelines (list) = list<struct>[1] {
        [0] = TPipelineMembership {
          01: pipe_id (i32) = 1,
          02: height (i32) = 1,
          03: phase (i32) = 3,
        },
      },
    },
  },
}

{noformat}

> Weird things happen when impalad restarts with different hostname but same IP
> -----------------------------------------------------------------------------
>
>                 Key: IMPALA-9788
>                 URL: https://issues.apache.org/jira/browse/IMPALA-9788
>             Project: IMPALA
>          Issue Type: Bug
>          Components: Backend
>    Affects Versions: Impala 3.4.0
>            Reporter: Tim Armstrong
>            Assignee: Sahil Takiar
>            Priority: Critical
>         Attachments: Screenshot from 2020-05-28 10-53-16.png, 
> get-root-sink-resolved.txt, statestore.log
>
>
> I was messing around with running impala in a single-node dockerized 
> configuration and ran into a bunch of weirdness stemming when I restarted the 
> impalad. It got into a state where where was a new and old statestore 
> registration with the same IP/port and different hostnames (since docker 
> generates new hostnames for each incarnation of the container).
> I saw a crash in Coordinator::GetRootSink(). The cause of that is the 
> coordinator treating the same impalad as two distinct backends, and sending 
> two execute RPCs to the backend (this is a single node cluster).
> {noformat}
> I0528 17:32:41.760128   573 coordinator.cc:143] 
> f84b158b036445ad:3a9defdf00000000] Exec() 
> query_id=f84b158b036445ad:3a9defdf00000000 stmt=SELECT COUNT(*) FROM 
> tpcds_kudu.call_center
> I0528 17:32:41.760670   573 coordinator.cc:463] 
> f84b158b036445ad:3a9defdf00000000] starting execution on 2 backends for 
> query_id=f84b158b036445ad:3a9defdf00000000
> ..
> I0528 17:32:41.762449    78 control-service.cc:153] 
> f84b158b036445ad:3a9defdf00000000] ExecQueryFInstances(): 
> query_id=f84b158b036445ad:3a9defdf00000000 coord=a16ac03fc53b:22000 
> #instances=1
> I0528 17:32:41.761706    79 control-service.cc:153] 
> f84b158b036445ad:3a9defdf00000000] ExecQueryFInstances(): 
> query_id=f84b158b036445ad:3a9defdf00000000 coord=a16ac03fc53b:22000 
> #instances=4
> ..
> Wrote minidump to 
> /opt/impala/logs/minidumps/impalad/15727084-c931-49e1-62d37e86-75cfe0f6.dmp
> #
> # A fatal error has been detected by the Java Runtime Environment:
> #
> #  SIGSEGV (0xb) at pc=0x00000000011a0d50, pid=1, tid=0x00007f92b5e8c700
> #
> # JRE version: OpenJDK Runtime Environment (8.0_242-b08) (build 
> 1.8.0_242-8u242-b08-0ubuntu3~18.04-b08)
> # Java VM: OpenJDK 64-Bit Server VM (25.242-b08 mixed mode linux-amd64 
> compressed oops)
> # Problematic frame:
> Wrote minidump to 
> /opt/impala/logs/minidumps/impalad/15727084-c931-49e1-62d37e86-75cfe0f6.dmp
> # C  [impalad+0xda0d50]  impala::FragmentInstanceState::GetRootSink() 
> const+0x0
> #
> # Core dump written. Default location: /opt/impala/core or core.1
> #
> # An error report file with more information is saved as:
> # /opt/impala/hs_err_pid1.log
> #
> # If you would like to submit a bug report, please visit:
> #   http://bugreport.java.com/bugreport/crash.jsp
> #
> {noformat}
> CC [~twm378]
> At a separate time I saw it trip the "Tried to add existing backend to 
> executor group" case in ExecutorGroup::AddExecutor().
> {noformat}
> >>void ExecutorGroup::AddExecutor(const BackendDescriptorPB& be_desc) {
>     // be_desc.is_executor can be false for the local backend when scheduling 
> queries to run
>     // on the coordinator host.
>     DCHECK(!be_desc.ip_address().empty());
>     Executors& be_descs = executor_map_[be_desc.ip_address()];
>     auto eq = [&be_desc](const BackendDescriptorPB& existing) {
>       // The IP addresses must already match, so it is sufficient to check 
> the port.
>       DCHECK_EQ(existing.ip_address(), be_desc.ip_address());
>       return existing.address().port() == be_desc.address().port();
>     };
>     if (find_if(be_descs.begin(), be_descs.end(), eq) != be_descs.end()) {
>       LOG(DFATAL) << "Tried to add existing backend to executor group: "
>                   << be_desc.krpc_address();
>       return;
>     }
>     if (!CheckConsistencyOrWarn(be_desc)) {
>       LOG(WARNING) << "Ignoring inconsistent backend for executor group: "
>                    << be_desc.krpc_address();
>       return;
>     }
>     if (be_descs.empty()) {
>       executor_ip_hash_ring_.AddNode(be_desc.ip_address());
>     }
>     be_descs.push_back(be_desc);
>     executor_ip_map_[be_desc.address().hostname()] = be_desc.ip_address();
>   }
> {noformat}
> I'm not sure if using the hostname to identify impalads is even useful at 
> this point,  we could probably simplify this by using IP address only.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to