alamb commented on code in PR #4132:
URL: https://github.com/apache/arrow-datafusion/pull/4132#discussion_r1015930022


##########
datafusion/core/src/physical_plan/file_format/parquet/page_filter.rs:
##########
@@ -421,7 +423,16 @@ macro_rules! get_min_max_values_for_page_index {
                     vec.iter().map(|x| x.$func().cloned()),
                 )))
             }
-            Index::INT96(_) | Index::BYTE_ARRAY(_) | 
Index::FIXED_LEN_BYTE_ARRAY(_) => {
+            Index::BYTE_ARRAY(index) => {
+                let vec = &index.indexes;
+                let array: StringArray = vec

Review Comment:
   I am not 100% sure if this is ok (like what if the parquet data got mapped 
to a LargeStringArray? 🤔 



##########
datafusion/core/tests/parquet_filter_pushdown.rs:
##########
@@ -266,20 +266,17 @@ async fn single_file_small_data_pages() {
     // page 3:                                     DLE:RLE RLE:RLE 
VLE:RLE_DICTIONARY ST:[min: djzdyiecnumrsrcbizwlqzdhnpoiqdh, max: 
fktdcgtmzvoedpwhfevcvvrtaurzgex, num_nulls not defined] CRC:[none] SZ:7 VC:9216
     // page 4:                                     DLE:RLE RLE:RLE 
VLE:RLE_DICTIONARY ST:[min: fktdcgtmzvoedpwhfevcvvrtaurzgex, max: 
fwtdpgtxwqkkgtgvthhwycrvjiizdifyp, num_nulls not defined] CRC:[none] SZ:7 
VC:9216
     // page 5:                                     DLE:RLE RLE:RLE 
VLE:RLE_DICTIONARY ST:[min: fwtdpgtxwqkkgtgvthhwycrvjiizdifyp, max: 
iadnalqpdzthpifrvewossmpqibgtsuin, num_nulls not defined] CRC:[none] SZ:7 
VC:7739
-    //
-    // This test currently fails due to 
https://github.com/apache/arrow-datafusion/issues/3833
-    // (page index pruning not implemented for byte array)
-
-    // TestCase::new(&test_parquet_file)
-    //     .with_name("selective")
-    //     // predicagte is chosen carefully to prune pages 0, 1, 2, 3, 4
-    //     // pod = 'iadnalqpdzthpifrvewossmpqibgtsuin'
-    //     
.with_filter(col("pod").eq(lit("iadnalqpdzthpifrvewossmpqibgtsuin")))
-    //     .with_pushdown_expected(PushdownExpected::Some)
-    //     
.with_page_index_filtering_expected(PageIndexFilteringExpected::Some)
-    //     .with_expected_rows(2574)
-    //     .run()
-    //     .await;
+
+    TestCase::new(&test_parquet_file)
+        .with_name("selective")
+        // predicate is chosen carefully to prune pages 0, 1, 2, 3, 4
+        // pod = 'iadnalqpdzthpifrvewossmpqibgtsuin'
+        .with_filter(col("pod").eq(lit("iadnalqpdzthpifrvewossmpqibgtsuin")))
+        .with_pushdown_expected(PushdownExpected::Some)
+        .with_page_index_filtering_expected(PageIndexFilteringExpected::Some)
+        .with_expected_rows(2574)
+        .run()
+        .await;

Review Comment:
   this now passes!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to