IMPALA-5031: memcpy cannot take null arguments

This patch fixes UBSAN "null pointer passed as argument" errors in
data loading. These are undefined behavior according to "7.1.4 Use of
library functions" in the C99 standard (which is included in C++14 in
section [intro.refs]):

    If an argument to a function has an invalid value (such as a value
    outside the domain of the function, or a pointer outside the
    address space of the program, or a null pointer, or a pointer to
    non-modifiable storage when the corresponding parameter is not
    const-qualified) or a type (after promotion) not expected by a
    function with variable number of arguments, the behavior is
    undefined.

The interesting parts of the backtraces for the errors fixed in this
patch are below:

    runtime/string-buffer.h:54:12: runtime error: null pointer passed as 
argument 1, which is declared to never be null
    /usr/include/string.h:43:45: note: nonnull attribute specified here
    StringBuffer::Append(char const*, long) runtime/string-buffer.h:54:5
    ColumnStatsBase::CopyToBuffer(StringBuffer*, StringValue*) 
exec/parquet-column-stats.cc:151:51
    ColumnStats<StringValue>::MaterializeStringValuesToInternalBuffers() 
exec/parquet-column-stats.inline.h:237:70
    HdfsParquetTableWriter::BaseColumnWriter::MaterializeStatsValues() 
exec/hdfs-parquet-table-writer.cc:149:63
    HdfsParquetTableWriter::AppendRows(RowBatch*, vector<int> const&, bool*) 
exec/hdfs-parquet-table-writer.cc:1129:53
    HdfsTableSink::WriteRowsToPartition(RuntimeState*, RowBatch*, 
pair<unique_ptr<OutputPartition, default_delete<OutputPartition> >, vector<int, 
all> > >*) exec/hdfs-table-sink.cc:256:71
    HdfsTableSink::Send(RuntimeState*, RowBatch*) exec/hdfs-table-sink.cc:591:45

    util/streaming-sampler.h:111:22: runtime error: null pointer passed as 
argument 2, which is declared to never be null
    /usr/include/string.h:43:45: note: nonnull attribute specified here
    StreamingSampler<long, 64>::SetSamples(int, vector<long> const&) 
util/streaming-sampler.h:111:5
    RuntimeProfile::Update(vector<TRuntimeProfileNode> const&, int*) 
util/runtime-profile.cc:313:30
    RuntimeProfile::Update(TRuntimeProfileTree const&) 
util/runtime-profile.cc:246:3
    
Coordinator::BackendState::InstanceStats::Update(TFragmentInstanceExecStatus 
const&, Coordinator::ExecSummary*, ProgressUpdater*) 
runtime/coordinator-backend-state.cc:474:13
    Coordinator::BackendState::ApplyExecStatusReport(TReportExecStatusParams 
const&, Coordinator::ExecSummary*, ProgressUpdater*) 
runtime/coordinator-backend-state.cc:287:21
    Coordinator::UpdateBackendExecStatus(TReportExecStatusParams const&) 
runtime/coordinator.cc:679:22
    ClientRequestState::UpdateBackendExecStatus(TReportExecStatusParams const&) 
service/client-request-state.cc:1254:18
    ImpalaServer::ReportExecStatus(TReportExecStatusResult&, 
TReportExecStatusParams const&) service/impala-server.cc:1343:18
    ImpalaInternalService::ReportExecStatus(TReportExecStatusResult&, 
TReportExecStatusParams const&) service/impala-internal-service.cc:87:19

Change-Id: Ib9acc8c32409e67253a987eb3d1fd7d921efcb51
Reviewed-on: http://gerrit.cloudera.org:8080/11812
Reviewed-by: Jim Apple <jbapple-imp...@apache.org>
Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/f90931e5
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/f90931e5
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/f90931e5

Branch: refs/heads/branch-3.1.0
Commit: f90931e541d45f24cc305a521756fea618b7c4c5
Parents: c8c06dd
Author: Jim Apple <jbapple-imp...@apache.org>
Authored: Sat Oct 27 16:57:55 2018 -0700
Committer: Zoltan Borok-Nagy <borokna...@cloudera.com>
Committed: Tue Nov 13 12:50:23 2018 +0100

----------------------------------------------------------------------
 be/src/runtime/string-buffer.h  | 3 ++-
 be/src/util/streaming-sampler.h | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/f90931e5/be/src/runtime/string-buffer.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/string-buffer.h b/be/src/runtime/string-buffer.h
index 2188e4d..8416405 100644
--- a/be/src/runtime/string-buffer.h
+++ b/be/src/runtime/string-buffer.h
@@ -23,6 +23,7 @@
 #include "runtime/mem-pool.h"
 #include "runtime/mem-tracker.h"
 #include "runtime/string-value.h"
+#include "util/ubsan.h"
 
 namespace impala {
 
@@ -51,7 +52,7 @@ class StringBuffer {
   Status Append(const char* str, int64_t str_len) WARN_UNUSED_RESULT {
     int64_t new_len = len_ + str_len;
     if (UNLIKELY(new_len > buffer_size_)) RETURN_IF_ERROR(GrowBuffer(new_len));
-    memcpy(buffer_ + len_, str, str_len);
+    Ubsan::MemCpy(buffer_ + len_, str, str_len);
     len_ += str_len;
     return Status::OK();
   }

http://git-wip-us.apache.org/repos/asf/impala/blob/f90931e5/be/src/util/streaming-sampler.h
----------------------------------------------------------------------
diff --git a/be/src/util/streaming-sampler.h b/be/src/util/streaming-sampler.h
index dfa3dab..027f330 100644
--- a/be/src/util/streaming-sampler.h
+++ b/be/src/util/streaming-sampler.h
@@ -108,7 +108,7 @@ class StreamingSampler {
     boost::lock_guard<SpinLock> l(lock_);
     period_ = period;
     samples_collected_ = samples.size();
-    memcpy(samples_, samples.data(), sizeof(T) * samples_collected_);
+    Ubsan::MemCpy(samples_, samples.data(), sizeof(T) * samples_collected_);
     current_sample_sum_ = 0;
     current_sample_count_ = 0;
     current_sample_total_time_ = 0;

Reply via email to