yordan-pavlov commented on a change in pull request #1849:
URL: https://github.com/apache/arrow-datafusion/pull/1849#discussion_r810676619
##########
File path: datafusion/src/row/writer.rs
##########
@@ -50,6 +59,100 @@ pub fn write_batch_unchecked(
offsets
}
+/// Append batch from `row_idx` to `output` buffer start from `offset`
+/// # Panics
+///
+/// This function will panic if the output buffer doesn't have enough space to
hold all the rows
+#[cfg(feature = "jit")]
+pub fn write_batch_unchecked_jit(
+ output: &mut [u8],
+ offset: usize,
+ batch: &RecordBatch,
+ row_idx: usize,
+ schema: Arc<Schema>,
+ assembler: &Assembler,
+) -> Result<Vec<usize>> {
+ let mut writer = RowWriter::new(&schema);
+ let mut current_offset = offset;
+ let mut offsets = vec![];
+ register_write_functions(assembler)?;
+ let gen_func = gen_write_row(&schema, assembler)?;
+ let mut jit = assembler.create_jit();
+ let code_ptr = jit.compile(gen_func)?;
+
+ let code_fn = unsafe {
+ std::mem::transmute::<_, fn(&mut RowWriter, usize,
&RecordBatch)>(code_ptr)
+ };
+
+ for cur_row in row_idx..batch.num_rows() {
+ offsets.push(current_offset);
+ code_fn(&mut writer, cur_row, batch);
+ writer.end_padding();
+ let row_width = writer.row_width;
+ output[current_offset..current_offset + row_width]
+ .copy_from_slice(writer.get_row());
+ current_offset += row_width;
+ writer.reset()
+ }
+ Ok(offsets)
+}
+
+#[cfg(feature = "jit")]
+pub fn bench_write_batch(
+ batches: &[Vec<RecordBatch>],
+ schema: Arc<Schema>,
+) -> Result<Vec<usize>> {
+ let mut writer = RowWriter::new(&schema);
+ let mut lengths = vec![];
+
+ for batch in batches.iter().flatten() {
+ for cur_row in 0..batch.num_rows() {
+ let row_width = write_row(&mut writer, cur_row, batch);
+ lengths.push(row_width);
+ writer.reset()
+ }
+ }
+
+ Ok(lengths)
+}
+
+#[cfg(feature = "jit")]
+pub fn bench_write_batch_jit(
+ batches: &[Vec<RecordBatch>],
+ schema: Arc<Schema>,
+) -> Result<Vec<usize>> {
+ let assembler = Assembler::default();
+ let mut writer = RowWriter::new(&schema);
+ let mut lengths = vec![];
+ register_write_functions(&assembler)?;
+ let gen_func = gen_write_row(&schema, &assembler)?;
+ let mut jit = assembler.create_jit();
+ let code_ptr = jit.compile(gen_func)?;
+ let code_fn = unsafe {
+ std::mem::transmute::<_, fn(&mut RowWriter, usize,
&RecordBatch)>(code_ptr)
+ };
+
+ for batch in batches.iter().flatten() {
+ for cur_row in 0..batch.num_rows() {
+ code_fn(&mut writer, cur_row, batch);
+ writer.end_padding();
+ lengths.push(writer.row_width);
+ writer.reset()
+ }
+ }
+ Ok(lengths)
+}
+
+#[cfg(feature = "jit")]
+pub fn bench_write_batch_jit_dummy(schema: Arc<Schema>) -> Result<()> {
Review comment:
where is this `bench_write_batch_jit_dummy` function used? if it isn't
used should it be removed?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]