IMPALA-5940: Avoid log spew by using Status::Expected.
In IMPALA-5926, we fixed a case where closing the session triggered a
stack trace in the logs which impacted performance for short-running
queries. Looking at log files from several active clusters, I identified a few
other cases where we could clean up log spew with the same (trivial)
approach.
The following messages will no longer log:
"Failed to codegen MaterializeTuple() due to unsupported type: $0"
"Not implemented for this format."
"ScalarFnCall Codegen not supported for CHAR"
"Could not codegen CodegenMaterializeExprs: "
"Could not codegen TupleRowComparator::Compare(): $0"
These codegen related messages were happening at every execution node, and were
therefore very common.
In addition, the following messages, which were very frequent in the logs, now
will log, but without the back trace:
"Query Id ... not found."
To do this, I changed them from logging implicitly via Status(...)
to logging explicitly.
I have not fixed this message:
"... Client ... timed-out during recv call."
It's in DoRpc(), and not all callers of DoRpc() have independent
logging. For now, we'll trade a little bit of log spam for clearer
debugging.
The snippet I used to identify these was:
find . -type f -name '*IMPALAD*.gz' | xargs gzcat | awk '/^I/ { if(x) {
print x; } x = "" } /status.cc/ { x=" "; } { if(x) { x=x $0 } }' | sed -e
's/0x[0-9a-fx]* //g' | sed -e 's/[0-9a-f]\{16\}:[0-9a-f]*/QUERYID/g' | tr -s
'\t' ' ' | tr '[0-9]' 'N' | sort | uniq -c | sort -n | tee output.txt
I also analyzed some logs using SQL, against a pre-processed logs table:
with v as (
select regexp_replace(
regexp_replace(
translate(substr(message, 42), "\n\t", " "),
"[a-zA-Z0-9.-]*[.][a-zA-Z0-9-]*:[0-9]*",
"<host>"),
"@.*$", "@@@...") as m
from logs_table where `class`="status.cc")
select m, count(*) from v group by 1 order by 2 desc limit 100
Testing:
* Automated tests.
* Manual testing for one of the new back-trace-suppressed paths:
$ impala-python shell/gen-py/ImpalaService/ImpalaService-remote -h
localhost:21000 GetRuntimeProfile 'beeswaxd.ttypes.QueryHandle()'
Traceback (most recent call last):
File "shell/gen-py/ImpalaService/ImpalaService-remote", line 106, in
<module>
pp.pprint(client.GetRuntimeProfile(eval(args[0]),))
File "/home/philip/src/impala/shell/gen-py/ImpalaService/ImpalaService.py",
line 161, in GetRuntimeProfile
return self.recv_GetRuntimeProfile()
File "/home/philip/src/impala/shell/gen-py/ImpalaService/ImpalaService.py",
line 184, in recv_GetRuntimeProfile
raise result.error
beeswaxd.ttypes.BeeswaxException:
BeeswaxException(handle=QueryHandle(log_context='', id=''), log_context='',
SQLState='HY000', _message='GetRuntimeProfile error: Query id 0:0 not
found.\n', errorCode=0)
$ grep 'Query id' logs/cluster/impalad.INFO | tail -n 1
I0926 20:29:09.999944 6787 impala-server.cc:642] Query id 0:0 not found.
Change-Id: I38088482377a1c3e794a9c8178ef83f29957a330
Reviewed-on: http://gerrit.cloudera.org:8080/8100
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/2747d521
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/2747d521
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/2747d521
Branch: refs/heads/master
Commit: 2747d521ad92ea38f3cf25fee6de6777dbf95677
Parents: 192cd96
Author: Philip Zeyliger <[email protected]>
Authored: Thu Sep 14 20:35:30 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Tue Oct 10 02:20:49 2017 +0000
----------------------------------------------------------------------
be/src/exec/hdfs-avro-scanner.cc | 2 +-
be/src/exec/hdfs-scan-node-base.cc | 2 +-
be/src/exprs/scalar-fn-call.cc | 4 ++--
be/src/runtime/tuple.cc | 5 ++---
be/src/service/impala-server.cc | 14 ++++++++------
be/src/util/tuple-row-compare.cc | 4 ++--
6 files changed, 16 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2747d521/be/src/exec/hdfs-avro-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-avro-scanner.cc b/be/src/exec/hdfs-avro-scanner.cc
index 6771e63..b6883ac 100644
--- a/be/src/exec/hdfs-avro-scanner.cc
+++ b/be/src/exec/hdfs-avro-scanner.cc
@@ -969,7 +969,7 @@ Status HdfsAvroScanner::CodegenReadScalar(const
AvroSchemaElement& element,
read_field_fn = codegen->GetFunction(IRFunction::READ_AVRO_DECIMAL,
false);
break;
default:
- return Status(Substitute(
+ return Status::Expected(Substitute(
"Failed to codegen MaterializeTuple() due to unsupported type: $0",
element.schema->type));
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2747d521/be/src/exec/hdfs-scan-node-base.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scan-node-base.cc
b/be/src/exec/hdfs-scan-node-base.cc
index 1cfee5e..1c6a4de 100644
--- a/be/src/exec/hdfs-scan-node-base.cc
+++ b/be/src/exec/hdfs-scan-node-base.cc
@@ -307,7 +307,7 @@ void HdfsScanNodeBase::Codegen(RuntimeState* state) {
default:
// No codegen for this format
fn = NULL;
- status = Status("Not implemented for this format.");
+ status = Status::Expected("Not implemented for this format.");
}
DCHECK(fn != NULL || !status.ok());
const char* format_name =
_THdfsFileFormat_VALUES_TO_NAMES.find(format)->second;
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2747d521/be/src/exprs/scalar-fn-call.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/scalar-fn-call.cc b/be/src/exprs/scalar-fn-call.cc
index 82b886c..20dbe07 100644
--- a/be/src/exprs/scalar-fn-call.cc
+++ b/be/src/exprs/scalar-fn-call.cc
@@ -296,12 +296,12 @@ Status ScalarFnCall::GetCodegendComputeFn(LlvmCodeGen*
codegen, Function** fn) {
return Status::OK();
}
if (type_.type == TYPE_CHAR) {
- return Status("ScalarFnCall Codegen not supported for CHAR");
+ return Status::Expected("ScalarFnCall Codegen not supported for CHAR");
}
for (int i = 0; i < GetNumChildren(); ++i) {
if (children_[i]->type().type == TYPE_CHAR) {
*fn = NULL;
- return Status("ScalarFnCall Codegen not supported for CHAR");
+ return Status::Expected("ScalarFnCall Codegen not supported for CHAR");
}
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2747d521/be/src/runtime/tuple.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/tuple.cc b/be/src/runtime/tuple.cc
index 252175f..1dab5a5 100644
--- a/be/src/runtime/tuple.cc
+++ b/be/src/runtime/tuple.cc
@@ -305,9 +305,8 @@ Status Tuple::CodegenMaterializeExprs(LlvmCodeGen* codegen,
bool collect_string_
Status status = slot_materialize_exprs[i]->GetCodegendComputeFn(codegen,
&materialize_expr_fns[i]);
if (!status.ok()) {
- stringstream ss;
- ss << "Could not codegen CodegenMaterializeExprs: " <<
status.GetDetail();
- return Status(ss.str());
+ return Status::Expected(Substitute("Could not codegen
CodegenMaterializeExprs: $0",
+ status.GetDetail()));
}
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2747d521/be/src/service/impala-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index fabc8fa..9574fb8 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -636,9 +636,10 @@ Status ImpalaServer::GetRuntimeProfileStr(const TUniqueId&
query_id,
lock_guard<mutex> l(query_log_lock_);
QueryLogIndex::const_iterator query_record =
query_log_index_.find(query_id);
if (query_record == query_log_index_.end()) {
- stringstream ss;
- ss << "Query id " << PrintId(query_id) << " not found.";
- return Status(ss.str());
+ // Common error, so logging explicitly and eliding Status's stack trace.
+ string err = strings::Substitute("Query id $0 not found.",
PrintId(query_id));
+ VLOG(1) << err;
+ return Status::Expected(err);
}
RETURN_IF_ERROR(CheckProfileAccess(user,
query_record->second->effective_user,
query_record->second->user_has_profile_access));
@@ -691,9 +692,10 @@ Status ImpalaServer::GetExecSummary(const TUniqueId&
query_id, const string& use
}
}
if (is_query_missing) {
- stringstream ss;
- ss << "Query id " << PrintId(query_id) << " not found.";
- return Status(ss.str());
+ // Common error, so logging explicitly and eliding Status's stack trace.
+ string err = strings::Substitute("Query id $0 not found.",
PrintId(query_id));
+ VLOG(1) << err;
+ return Status::Expected(err);
}
RETURN_IF_ERROR(CheckProfileAccess(user, effective_user,
user_has_profile_access));
*result = exec_summary;
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2747d521/be/src/util/tuple-row-compare.cc
----------------------------------------------------------------------
diff --git a/be/src/util/tuple-row-compare.cc b/be/src/util/tuple-row-compare.cc
index 9a012ca..3d43576 100644
--- a/be/src/util/tuple-row-compare.cc
+++ b/be/src/util/tuple-row-compare.cc
@@ -211,8 +211,8 @@ Status TupleRowComparator::CodegenCompare(LlvmCodeGen*
codegen, Function** fn) {
for (int i = 0; i < ordering_exprs.size(); ++i) {
Status status = ordering_exprs[i]->GetCodegendComputeFn(codegen,
&key_fns[i]);
if (!status.ok()) {
- return Status(Substitute("Could not codegen
TupleRowComparator::Compare(): $0",
- status.GetDetail()));
+ return Status::Expected(Substitute(
+ "Could not codegen TupleRowComparator::Compare(): $0",
status.GetDetail()));
}
}