IMPALA-5031: memcpy cannot take null arguments
This patch fixes UBSAN "null pointer passed as argument" errors in
data loading. These are undefined behavior according to "7.1.4 Use of
library functions" in the C99 standard (which is included in C++14 in
section [intro.refs]):
If an argument to a function has an invalid value (such as a value
outside the domain of the function, or a pointer outside the
address space of the program, or a null pointer, or a pointer to
non-modifiable storage when the corresponding parameter is not
const-qualified) or a type (after promotion) not expected by a
function with variable number of arguments, the behavior is
undefined.
The interesting parts of the backtraces for the errors fixed in this
patch are below:
runtime/string-buffer.h:54:12: runtime error: null pointer passed as
argument 1, which is declared to never be null
/usr/include/string.h:43:45: note: nonnull attribute specified here
StringBuffer::Append(char const*, long) runtime/string-buffer.h:54:5
ColumnStatsBase::CopyToBuffer(StringBuffer*, StringValue*)
exec/parquet-column-stats.cc:151:51
ColumnStats<StringValue>::MaterializeStringValuesToInternalBuffers()
exec/parquet-column-stats.inline.h:237:70
HdfsParquetTableWriter::BaseColumnWriter::MaterializeStatsValues()
exec/hdfs-parquet-table-writer.cc:149:63
HdfsParquetTableWriter::AppendRows(RowBatch*, vector<int> const&, bool*)
exec/hdfs-parquet-table-writer.cc:1129:53
HdfsTableSink::WriteRowsToPartition(RuntimeState*, RowBatch*,
pair<unique_ptr<OutputPartition, default_delete<OutputPartition> >, vector<int,
all> > >*) exec/hdfs-table-sink.cc:256:71
HdfsTableSink::Send(RuntimeState*, RowBatch*) exec/hdfs-table-sink.cc:591:45
util/streaming-sampler.h:111:22: runtime error: null pointer passed as
argument 2, which is declared to never be null
/usr/include/string.h:43:45: note: nonnull attribute specified here
StreamingSampler<long, 64>::SetSamples(int, vector<long> const&)
util/streaming-sampler.h:111:5
RuntimeProfile::Update(vector<TRuntimeProfileNode> const&, int*)
util/runtime-profile.cc:313:30
RuntimeProfile::Update(TRuntimeProfileTree const&)
util/runtime-profile.cc:246:3
Coordinator::BackendState::InstanceStats::Update(TFragmentInstanceExecStatus
const&, Coordinator::ExecSummary*, ProgressUpdater*)
runtime/coordinator-backend-state.cc:474:13
Coordinator::BackendState::ApplyExecStatusReport(TReportExecStatusParams
const&, Coordinator::ExecSummary*, ProgressUpdater*)
runtime/coordinator-backend-state.cc:287:21
Coordinator::UpdateBackendExecStatus(TReportExecStatusParams const&)
runtime/coordinator.cc:679:22
ClientRequestState::UpdateBackendExecStatus(TReportExecStatusParams const&)
service/client-request-state.cc:1254:18
ImpalaServer::ReportExecStatus(TReportExecStatusResult&,
TReportExecStatusParams const&) service/impala-server.cc:1343:18
ImpalaInternalService::ReportExecStatus(TReportExecStatusResult&,
TReportExecStatusParams const&) service/impala-internal-service.cc:87:19
Change-Id: Ib9acc8c32409e67253a987eb3d1fd7d921efcb51
Reviewed-on: http://gerrit.cloudera.org:8080/11812
Reviewed-by: Jim Apple <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/f90931e5
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/f90931e5
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/f90931e5
Branch: refs/heads/branch-3.1.0
Commit: f90931e541d45f24cc305a521756fea618b7c4c5
Parents: c8c06dd
Author: Jim Apple <[email protected]>
Authored: Sat Oct 27 16:57:55 2018 -0700
Committer: Zoltan Borok-Nagy <[email protected]>
Committed: Tue Nov 13 12:50:23 2018 +0100
----------------------------------------------------------------------
be/src/runtime/string-buffer.h | 3 ++-
be/src/util/streaming-sampler.h | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/f90931e5/be/src/runtime/string-buffer.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/string-buffer.h b/be/src/runtime/string-buffer.h
index 2188e4d..8416405 100644
--- a/be/src/runtime/string-buffer.h
+++ b/be/src/runtime/string-buffer.h
@@ -23,6 +23,7 @@
#include "runtime/mem-pool.h"
#include "runtime/mem-tracker.h"
#include "runtime/string-value.h"
+#include "util/ubsan.h"
namespace impala {
@@ -51,7 +52,7 @@ class StringBuffer {
Status Append(const char* str, int64_t str_len) WARN_UNUSED_RESULT {
int64_t new_len = len_ + str_len;
if (UNLIKELY(new_len > buffer_size_)) RETURN_IF_ERROR(GrowBuffer(new_len));
- memcpy(buffer_ + len_, str, str_len);
+ Ubsan::MemCpy(buffer_ + len_, str, str_len);
len_ += str_len;
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/impala/blob/f90931e5/be/src/util/streaming-sampler.h
----------------------------------------------------------------------
diff --git a/be/src/util/streaming-sampler.h b/be/src/util/streaming-sampler.h
index dfa3dab..027f330 100644
--- a/be/src/util/streaming-sampler.h
+++ b/be/src/util/streaming-sampler.h
@@ -108,7 +108,7 @@ class StreamingSampler {
boost::lock_guard<SpinLock> l(lock_);
period_ = period;
samples_collected_ = samples.size();
- memcpy(samples_, samples.data(), sizeof(T) * samples_collected_);
+ Ubsan::MemCpy(samples_, samples.data(), sizeof(T) * samples_collected_);
current_sample_sum_ = 0;
current_sample_count_ = 0;
current_sample_total_time_ = 0;