Qifan Chen has posted comments on this change. ( 
http://gerrit.cloudera.org:8080/17860 )

Change subject: IMPALA-9873: Avoid materilization of columns for filtered out 
rows in Parquet table.
......................................................................


Patch Set 12:

Wrote the following code for randomly generating selected rows. Have not get a 
chance to test it out  yet (some runtime issue with by dev box).

// This tests checks conversion of 'selected_rows' with randomly generated
// 'true' values to 'ScratchMicroBatch';                                    
TEST_F(ScratchTupleBatchTest, TestRandomGeneratedMicroBatches) {
  const int BATCH_SIZE = 1024;    
  scoped_ptr<ScratchTupleBatch> scratch_batch(                                  
         
      new ScratchTupleBatch(*desc_, BATCH_SIZE, &tracker_));         
  scratch_batch->num_tuples = BATCH_SIZE;      
  // gaps to try           
  vector<int> gaps = {5, 16, 29, 37, 1025};                                     
  for (auto n : gaps) {                                     
    // Set randomly locations as selected.
    srand (time(NULL));                                                         
             
    for (int batch_idx = 0; batch_idx < BATCH_SIZE; ++batch_idx) {     
      scratch_batch->selected_rows[batch_idx] = rand() < (RAND_MAX / 2);
    }                        
    ScratchMicroBatch micro_batches[BATCH_SIZE];                                
    
    int batches = scratch_batch->GetMicroBatches(n, micro_batches);
    EXPECT_TRUE(batches > 1);             
    EXPECT_TRUE(batches <= BATCH_SIZE);
    // Verify every batch                                                  
    for (int i = 0; i < batches; i++) {              
      const ScratchMicroBatch& batch = micro_batches[i];
      EXPECT_TRUE(batch.start <= batch.end);                                    
      EXPECT_TRUE(batch.length == batch.end - batch.start + 1);
      EXPECT_TRUE(batch.start);
      EXPECT_TRUE(batch.end);                                                   
      int last_true_idx = batch.start;                  
      for (int j = batch.start + 1; j < batch.end; j++) {
        if (scratch_batch->selected_rows[j]) {                                  
     
          EXPECT_TRUE(j - last_true_idx + 1 <= n);             
          last_true_idx = j;             
        }            
      }                                                                     
    }                                                   
    // Verify any two consecutive batches i and i+1
    for (int i = 0; i < batches - 1; i++) {                                     
        
      const ScratchMicroBatch& batch = micro_batches[i];          
      const ScratchMicroBatch& nbatch = micro_batches[i + 1];
      EXPECT_TRUE(batch.end < nbatch.start);                                    
              
      EXPECT_TRUE(nbatch.start - batch.end >= n);                   
      // Any row in betweeen the two batches should not be selected
      for (int j=batch.end+1; j<nbatch.start; j++) {
        EXPECT_FALSE(scratch_batch->selected_rows[j]);                       
      }                                                  
    }                                
  }                                                                             
            
}


--
To view, visit http://gerrit.cloudera.org:8080/17860
To unsubscribe, visit http://gerrit.cloudera.org:8080/settings

Gerrit-Project: Impala-ASF
Gerrit-Branch: master
Gerrit-MessageType: comment
Gerrit-Change-Id: I46406c913297d5bbbec3ccae62a83bb214ed2c60
Gerrit-Change-Number: 17860
Gerrit-PatchSet: 12
Gerrit-Owner: Amogh Margoor <[email protected]>
Gerrit-Reviewer: Amogh Margoor <[email protected]>
Gerrit-Reviewer: Impala Public Jenkins <[email protected]>
Gerrit-Reviewer: Kurt Deschler <[email protected]>
Gerrit-Reviewer: Qifan Chen <[email protected]>
Gerrit-Reviewer: Zoltan Borok-Nagy <[email protected]>
Gerrit-Comment-Date: Wed, 27 Oct 2021 14:34:11 +0000
Gerrit-HasComments: No

Reply via email to