IMPALA-5031: memcpy cannot take null arguments This patch fixes UBSAN "null pointer passed as argument" errors in data loading. These are undefined behavior according to "7.1.4 Use of library functions" in the C99 standard (which is included in C++14 in section [intro.refs]):
If an argument to a function has an invalid value (such as a value outside the domain of the function, or a pointer outside the address space of the program, or a null pointer, or a pointer to non-modifiable storage when the corresponding parameter is not const-qualified) or a type (after promotion) not expected by a function with variable number of arguments, the behavior is undefined. The interesting parts of the backtraces for the errors fixed in this patch are below: runtime/string-buffer.h:54:12: runtime error: null pointer passed as argument 1, which is declared to never be null /usr/include/string.h:43:45: note: nonnull attribute specified here StringBuffer::Append(char const*, long) runtime/string-buffer.h:54:5 ColumnStatsBase::CopyToBuffer(StringBuffer*, StringValue*) exec/parquet-column-stats.cc:151:51 ColumnStats<StringValue>::MaterializeStringValuesToInternalBuffers() exec/parquet-column-stats.inline.h:237:70 HdfsParquetTableWriter::BaseColumnWriter::MaterializeStatsValues() exec/hdfs-parquet-table-writer.cc:149:63 HdfsParquetTableWriter::AppendRows(RowBatch*, vector<int> const&, bool*) exec/hdfs-parquet-table-writer.cc:1129:53 HdfsTableSink::WriteRowsToPartition(RuntimeState*, RowBatch*, pair<unique_ptr<OutputPartition, default_delete<OutputPartition> >, vector<int, all> > >*) exec/hdfs-table-sink.cc:256:71 HdfsTableSink::Send(RuntimeState*, RowBatch*) exec/hdfs-table-sink.cc:591:45 util/streaming-sampler.h:111:22: runtime error: null pointer passed as argument 2, which is declared to never be null /usr/include/string.h:43:45: note: nonnull attribute specified here StreamingSampler<long, 64>::SetSamples(int, vector<long> const&) util/streaming-sampler.h:111:5 RuntimeProfile::Update(vector<TRuntimeProfileNode> const&, int*) util/runtime-profile.cc:313:30 RuntimeProfile::Update(TRuntimeProfileTree const&) util/runtime-profile.cc:246:3 Coordinator::BackendState::InstanceStats::Update(TFragmentInstanceExecStatus const&, Coordinator::ExecSummary*, ProgressUpdater*) runtime/coordinator-backend-state.cc:474:13 Coordinator::BackendState::ApplyExecStatusReport(TReportExecStatusParams const&, Coordinator::ExecSummary*, ProgressUpdater*) runtime/coordinator-backend-state.cc:287:21 Coordinator::UpdateBackendExecStatus(TReportExecStatusParams const&) runtime/coordinator.cc:679:22 ClientRequestState::UpdateBackendExecStatus(TReportExecStatusParams const&) service/client-request-state.cc:1254:18 ImpalaServer::ReportExecStatus(TReportExecStatusResult&, TReportExecStatusParams const&) service/impala-server.cc:1343:18 ImpalaInternalService::ReportExecStatus(TReportExecStatusResult&, TReportExecStatusParams const&) service/impala-internal-service.cc:87:19 Change-Id: Ib9acc8c32409e67253a987eb3d1fd7d921efcb51 Reviewed-on: http://gerrit.cloudera.org:8080/11812 Reviewed-by: Jim Apple <jbapple-imp...@apache.org> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/f90931e5 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/f90931e5 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/f90931e5 Branch: refs/heads/branch-3.1.0 Commit: f90931e541d45f24cc305a521756fea618b7c4c5 Parents: c8c06dd Author: Jim Apple <jbapple-imp...@apache.org> Authored: Sat Oct 27 16:57:55 2018 -0700 Committer: Zoltan Borok-Nagy <borokna...@cloudera.com> Committed: Tue Nov 13 12:50:23 2018 +0100 ---------------------------------------------------------------------- be/src/runtime/string-buffer.h | 3 ++- be/src/util/streaming-sampler.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/f90931e5/be/src/runtime/string-buffer.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/string-buffer.h b/be/src/runtime/string-buffer.h index 2188e4d..8416405 100644 --- a/be/src/runtime/string-buffer.h +++ b/be/src/runtime/string-buffer.h @@ -23,6 +23,7 @@ #include "runtime/mem-pool.h" #include "runtime/mem-tracker.h" #include "runtime/string-value.h" +#include "util/ubsan.h" namespace impala { @@ -51,7 +52,7 @@ class StringBuffer { Status Append(const char* str, int64_t str_len) WARN_UNUSED_RESULT { int64_t new_len = len_ + str_len; if (UNLIKELY(new_len > buffer_size_)) RETURN_IF_ERROR(GrowBuffer(new_len)); - memcpy(buffer_ + len_, str, str_len); + Ubsan::MemCpy(buffer_ + len_, str, str_len); len_ += str_len; return Status::OK(); } http://git-wip-us.apache.org/repos/asf/impala/blob/f90931e5/be/src/util/streaming-sampler.h ---------------------------------------------------------------------- diff --git a/be/src/util/streaming-sampler.h b/be/src/util/streaming-sampler.h index dfa3dab..027f330 100644 --- a/be/src/util/streaming-sampler.h +++ b/be/src/util/streaming-sampler.h @@ -108,7 +108,7 @@ class StreamingSampler { boost::lock_guard<SpinLock> l(lock_); period_ = period; samples_collected_ = samples.size(); - memcpy(samples_, samples.data(), sizeof(T) * samples_collected_); + Ubsan::MemCpy(samples_, samples.data(), sizeof(T) * samples_collected_); current_sample_sum_ = 0; current_sample_count_ = 0; current_sample_total_time_ = 0;