Qifan Chen has posted comments on this change. (
http://gerrit.cloudera.org:8080/17860 )
Change subject: IMPALA-9873: Avoid materilization of columns for filtered out
rows in Parquet table.
......................................................................
Patch Set 12:
Wrote the following code for randomly generating selected rows. Have not get a
chance to test it out yet (some runtime issue with by dev box).
// This tests checks conversion of 'selected_rows' with randomly generated
// 'true' values to 'ScratchMicroBatch';
TEST_F(ScratchTupleBatchTest, TestRandomGeneratedMicroBatches) {
const int BATCH_SIZE = 1024;
scoped_ptr<ScratchTupleBatch> scratch_batch(
new ScratchTupleBatch(*desc_, BATCH_SIZE, &tracker_));
scratch_batch->num_tuples = BATCH_SIZE;
// gaps to try
vector<int> gaps = {5, 16, 29, 37, 1025};
for (auto n : gaps) {
// Set randomly locations as selected.
srand (time(NULL));
for (int batch_idx = 0; batch_idx < BATCH_SIZE; ++batch_idx) {
scratch_batch->selected_rows[batch_idx] = rand() < (RAND_MAX / 2);
}
ScratchMicroBatch micro_batches[BATCH_SIZE];
int batches = scratch_batch->GetMicroBatches(n, micro_batches);
EXPECT_TRUE(batches > 1);
EXPECT_TRUE(batches <= BATCH_SIZE);
// Verify every batch
for (int i = 0; i < batches; i++) {
const ScratchMicroBatch& batch = micro_batches[i];
EXPECT_TRUE(batch.start <= batch.end);
EXPECT_TRUE(batch.length == batch.end - batch.start + 1);
EXPECT_TRUE(batch.start);
EXPECT_TRUE(batch.end);
int last_true_idx = batch.start;
for (int j = batch.start + 1; j < batch.end; j++) {
if (scratch_batch->selected_rows[j]) {
EXPECT_TRUE(j - last_true_idx + 1 <= n);
last_true_idx = j;
}
}
}
// Verify any two consecutive batches i and i+1
for (int i = 0; i < batches - 1; i++) {
const ScratchMicroBatch& batch = micro_batches[i];
const ScratchMicroBatch& nbatch = micro_batches[i + 1];
EXPECT_TRUE(batch.end < nbatch.start);
EXPECT_TRUE(nbatch.start - batch.end >= n);
// Any row in betweeen the two batches should not be selected
for (int j=batch.end+1; j<nbatch.start; j++) {
EXPECT_FALSE(scratch_batch->selected_rows[j]);
}
}
}
}
--
To view, visit http://gerrit.cloudera.org:8080/17860
To unsubscribe, visit http://gerrit.cloudera.org:8080/settings
Gerrit-Project: Impala-ASF
Gerrit-Branch: master
Gerrit-MessageType: comment
Gerrit-Change-Id: I46406c913297d5bbbec3ccae62a83bb214ed2c60
Gerrit-Change-Number: 17860
Gerrit-PatchSet: 12
Gerrit-Owner: Amogh Margoor <[email protected]>
Gerrit-Reviewer: Amogh Margoor <[email protected]>
Gerrit-Reviewer: Impala Public Jenkins <[email protected]>
Gerrit-Reviewer: Kurt Deschler <[email protected]>
Gerrit-Reviewer: Qifan Chen <[email protected]>
Gerrit-Reviewer: Zoltan Borok-Nagy <[email protected]>
Gerrit-Comment-Date: Wed, 27 Oct 2021 14:34:11 +0000
Gerrit-HasComments: No