eldenmoon opened a new issue, #11093:
URL: https://github.com/apache/doris/issues/11093

   ### Search before asking
   
   - [X] I had searched in the 
[issues](https://github.com/apache/incubator-doris/issues?q=is%3Aissue) and 
found no similar issues.
   
   
   ### Version
   
   master
   
   ### What's Wrong?
   
   ```
   doris_be: 
/mnt/hdd02/lihangyu/eldenmoon/selectdb/be/src/vec/olap/olap_data_convertor.h:
   224: const void* 
doris::vectorized::OlapBlockDataConvertor::OlapColumnDataConvertorSimp
   le<T>::get_data_at(size_t) const [with T = signed char; size_t = long 
unsigned int]: As
   sertion `offset < _num_rows' failed.
   *** Aborted at 1658389160 (unix time) try "date -d @1658389160" if you are 
using GNU da
   te ***
   *** SIGABRT unkown detail explain (@0x2477ef) received by PID 2389999 (TID 
0x7f79aef997
   00) from PID 2389999; stack trace: ***
    0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*, void*)
   at /mnt/hdd02/lihangyu/eldenmoon/selectdb/be/src/common/signal_handler.h:407
    1# 0x00007F7CCFECC400 in /lib64/libc.so.6
    2# raise in /lib64/libc.so.6
    3# abort in /lib64/libc.so.6
    4# _nl_load_domain.cold.0 in /lib64/libc.so.6
    5# 0x00007F7CCFEC4A76 in /lib64/libc.so.6
    6# 
doris::vectorized::OlapBlockDataConvertor::OlapColumnDataConvertorSimple<signed 
char>::get_data_at(unsigned long) const at 
/mnt/hdd02/lihangyu/eldenmoon/selectdb/be/src/vec/olap/olap_data_convertor.h:225
    7# 
doris::vectorized::OlapBlockDataConvertor::OlapColumnDataConvertorArray::convert_to_olap(unsigned
 char const*, doris::vectorized::ColumnArray const*, 
doris::vectorized::DataTypeArray const*) at 
/mnt/hdd02/lihangyu/eldenmoon/selectdb/be/src/vec/olap/olap_data_convertor.cpp:676
    8# 
doris::vectorized::OlapBlockDataConvertor::OlapColumnDataConvertorArray::convert_to_olap()
 at 
/mnt/hdd02/lihangyu/eldenmoon/selectdb/be/src/vec/olap/olap_data_convertor.cpp:612
    9# doris::vectorized::OlapBlockDataConvertor::convert_column_data(unsigned 
long) at 
/mnt/hdd02/lihangyu/eldenmoon/selectdb/be/src/vec/olap/olap_data_convertor.cpp:127
   10# doris::segment_v2::SegmentWriter::append_block(doris::vectorized::Block 
const*, unsigned long, unsigned long) at 
/mnt/hdd02/lihangyu/eldenmoon/selectdb/be/src/olap/rowset/segment_v2/segment_writer.cpp:174
   11# doris::BetaRowsetWriter::_add_block(doris::vectorized::Block const*, 
std::unique_ptr<doris::segment_v2::SegmentWriter, 
std::default_delete<doris::segment_v2::SegmentWriter> >*) at 
/mnt/hdd02/lihangyu/eldenmoon/selectdb/be/src/olap/rowset/beta_rowset_writer.cpp:141
   12# doris::BetaRowsetWriter::flush_single_memtable(doris::vectorized::Block 
const*) at 
/mnt/hdd02/lihangyu/eldenmoon/selectdb/be/src/olap/rowset/beta_rowset_writer.cpp:242
   13# doris::MemTable::_do_flush(long&) at 
/mnt/hdd02/lihangyu/eldenmoon/selectdb/be/src/olap/memtable.cpp:479
   14# doris::MemTable::flush() at 
/mnt/hdd02/lihangyu/eldenmoon/selectdb/be/src/olap/memtable.cpp:421
   ```
   
   ### What You Expected?
   
   no core
   
   ### How to Reproduce?
   
   writing data to triger block divided into serveral segments when flush a 
singe memtable
   see the triger condition `max_row_add` bellow
   ```
   Status BetaRowsetWriter::_add_block(const vectorized::Block* block,
                                       
std::unique_ptr<segment_v2::SegmentWriter>* segment_writer) {
       size_t block_size_in_bytes = block->bytes();
       size_t block_row_num = block->rows();
       size_t row_avg_size_in_bytes = std::max((size_t)1, block_size_in_bytes / 
block_row_num);
       size_t row_offset = 0;
   
       do {
           auto max_row_add = 
(*segment_writer)->max_row_to_add(row_avg_size_in_bytes);
           if (UNLIKELY(max_row_add < 1)) {
               // no space for another signle row, need flush now
               RETURN_NOT_OK(_flush_segment_writer(segment_writer));
               RETURN_NOT_OK(_create_segment_writer(segment_writer, block));
               max_row_add = 
(*segment_writer)->max_row_to_add(row_avg_size_in_bytes);
               DCHECK(max_row_add > 0);
           }
   
           size_t input_row_num = std::min(block_row_num - row_offset, 
size_t(max_row_add));
           auto s = (*segment_writer)->append_block(block, row_offset, 
input_row_num);
           if (UNLIKELY(!s.ok())) {
               LOG(WARNING) << "failed to append block: " << s.to_string();
               return 
Status::OLAPInternalError(OLAP_ERR_WRITER_DATA_WRITE_ERROR);
           }
           row_offset += input_row_num;
       } while (row_offset < block_row_num);
   
       _num_rows_written += block_row_num;
       return Status::OK();
   }
   ```
   
   ### Anything Else?
   
    
   // get_data_at should use offset - offsets[start_index] since
   // start_index may be changed after 
OlapColumnDataConvertorArray::set_source_column.
   // Using just offset may access the memory out of _item_convertor's data 
range,
   
collection_value->set_data(const_cast<void*>(_item_convertor->get_data_at(offset
 - offsets[start_index])));
   
   ```
   Status OlapBlockDataConvertor::OlapColumnDataConvertorArray::convert_to_olap(
           const UInt8* null_map, const ColumnArray* column_array,
           const DataTypeArray* data_type_array) {
       const UInt8* item_null_map = nullptr;
       ColumnPtr item_data = column_array->get_data_ptr();
       if (column_array->get_data().is_nullable()) {
           const auto& data_nullable_column =
                   assert_cast<const ColumnNullable&>(column_array->get_data());
           item_null_map = data_nullable_column.get_null_map_data().data();
           item_data = data_nullable_column.get_nested_column_ptr();
       }
   
       const auto& offsets = column_array->get_offsets();
       int64_t start_index = _row_pos - 1;
       int64_t end_index = _row_pos + _num_rows - 1;
       auto start = offsets[start_index];
       auto size = offsets[end_index] - start;
   
       ColumnWithTypeAndName item_typed_column = {
               item_data, remove_nullable(data_type_array->get_nested_type()), 
""};
       _item_convertor->set_source_column(item_typed_column, start, size);
       _item_convertor->convert_to_olap();
       if (WhichDataType(item_typed_column.type).is_int8()) {
           LOG(INFO) << "ecounter signed char char";
       }
       CollectionValue* collection_value = _values.data();
       for (size_t i = 0; i < _num_rows; ++i, ++collection_value) {
           int64_t cur_pos = _row_pos + i;
           int64_t prev_pos = cur_pos - 1;
           if (_nullmap && _nullmap[cur_pos]) {
               continue;
           }
           auto offset = offsets[prev_pos];
           auto size = offsets[cur_pos] - offsets[prev_pos];
           new (collection_value) CollectionValue(size);
   
           if (size == 0) {
               continue;
           }
   
           if (column_array->get_data().is_nullable()) {
               collection_value->set_has_null(true);
               collection_value->set_null_signs(
                       const_cast<bool*>(reinterpret_cast<const 
bool*>(item_null_map + offset)));
           }
           // get_data_at should use offset - offsets[start_index] since
           // start_index may be changed after 
OlapColumnDataConvertorArray::set_source_column.
           // Using just offset may access the memory out of _item_convertor's 
data range,
           
collection_value->set_data(const_cast<void*>(_item_convertor->get_data_at(offset
 - offsets[start_index])));
       }
       return Status::OK();
   }
   ```
   
   ### Are you willing to submit PR?
   
   - [X] Yes I am willing to submit a PR!
   
   ### Code of Conduct
   
   - [X] I agree to follow this project's [Code of 
Conduct](https://www.apache.org/foundation/policies/conduct)
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to