ahmet-uyar commented on a change in pull request #11872: URL: https://github.com/apache/arrow/pull/11872#discussion_r763271175
########## File path: cpp/src/arrow/ipc/read_write_test.cc ########## @@ -1727,6 +1727,52 @@ TEST(TestIpcFileFormat, FooterMetaData) { ASSERT_TRUE(out_metadata->Equals(*metadata)); } +TEST_F(TestWriteRecordBatch, CompressionRatio) { + // ARROW-8823: Calculating the compression ratio + FileWriterHelper helper; + IpcWriteOptions write_options1 = IpcWriteOptions::Defaults(); + IpcWriteOptions write_options2 = IpcWriteOptions::Defaults(); + ASSERT_OK_AND_ASSIGN(write_options2.codec, util::Codec::Create(Compression::LZ4_FRAME)); + + // pre-computed compression ratios for record batches with Compression::LZ4_FRAME + std::vector<float> comp_ratios{1.0f, 0.64f, 0.79924363f}; + + std::vector<std::shared_ptr<RecordBatch>> batches(3); + // empty record batch + ASSERT_OK(MakeIntBatchSized(0, &batches[0])); + // record batch with int values + ASSERT_OK(MakeIntBatchSized(2000, &batches[1], 100)); + + // record batch with DictionaryArray + random::RandomArrayGenerator rg(/*seed=*/0); + int64_t length = 500; + int dict_size = 50; + std::shared_ptr<Array> dict = rg.String(dict_size, /*min_length=*/5, /*max_length=*/5, /*null_probability=*/0); + std::shared_ptr<Array> indices = rg.Int32(length, /*min=*/0, /*max=*/dict_size - 1, /*null_probability=*/0.1); + auto dict_type = dictionary(int32(), utf8()); + auto dict_field = field("f1", dict_type); + ASSERT_OK_AND_ASSIGN(auto dict_array, + DictionaryArray::FromArrays(dict_type, indices, dict)); + + auto schema = ::arrow::schema({field("f0", utf8()), dict_field}); + batches[2] = + RecordBatch::Make(schema, length, {rg.String(500, 0, 10, 0.1), dict_array}); + + for(size_t i = 0; i < batches.size(); ++i) { Review comment: Done. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org