wgtmac commented on issue #66:
URL: https://github.com/apache/parquet-testing/issues/66#issuecomment-2570208546

   I think alltypes_tiny_pages.parquet is what you want.
   
   ```
   File path:  alltypes_tiny_pages.parquet
   Created by: parquet-mr version 1.12.0-SNAPSHOT (build 
6901a2040848c6b37fa61f4b0a76246445f396db)
   Properties:
     writer.model.name: 2.1.1-cdh6.x-SNAPSHOT
   Schema:
   message hive_schema {
     optional int32 id;
     optional boolean bool_col;
     optional int32 tinyint_col (INTEGER(8,true));
     optional int32 smallint_col (INTEGER(16,true));
     optional int32 int_col;
     optional int64 bigint_col;
     optional float float_col;
     optional double double_col;
     optional binary date_string_col (STRING);
     optional binary string_col (STRING);
     optional int96 timestamp_col;
     optional int32 year;
     optional int32 month;
   }
   
   
   Row group 0:  count: 7300  44.33 B records  start: 4  total(compressed): 
315.995 kB total(uncompressed):315.995 kB
   
--------------------------------------------------------------------------------
                    type      encodings count     avg size   nulls   min / max
   id               INT32     _   _     7300      5.11 B     0       "0" / 
"7299"
   bool_col         BOOLEAN   _   _     7300      0.41 B     0       "false" / 
"true"
   tinyint_col      INT32     _ _ R     7300      1.70 B     0       "0" / "9"
   smallint_col     INT32     _ _ R     7300      1.70 B     0       "0" / "9"
   int_col          INT32     _ _ R     7300      1.70 B     0       "0" / "9"
   bigint_col       INT64     _ _ R     7300      2.40 B     0       "0" / "90"
   float_col        FLOAT     _ _ R     7300      1.70 B     0       "-0.0" / 
"9.9"
   double_col       DOUBLE    _ _ R     7300      2.40 B     0       "-0.0" / 
"90.89999999999999"
   date_string_col  BINARY    _ _ R     7300      5.77 B     0       "01/01/09" 
/ "12/31/10"
   string_col       BINARY    _ _ R     7300      1.79 B     0       "0" / "9"
   timestamp_col    INT96     _ _ R     7300      17.33 B    0
   year             INT32     _ _ R     7300      1.14 B     0       "2009" / 
"2010"
   month            INT32     _ _ R     7300      1.18 B     0       "1" / "12"
   ```
   
   ```
      {
         "rowGroupOrdinal" : 0,
         "encodingStats" : {
           "dictStats" : {
             "PLAIN_DICTIONARY" : 1
           },
           "dataStats" : {
             "PLAIN_DICTIONARY" : 325
           },
           "usesV2Pages" : false
         },
         "properties" : {
           "codec" : "UNCOMPRESSED",
           "path" : {
             "p" : [ "tinyint_col" ]
           },
           "type" : {
             "name" : "tinyint_col",
             "repetition" : "OPTIONAL",
             "logicalTypeAnnotation" : {
               "bitWidth" : 8,
               "isSigned" : true
             },
             "id" : null,
             "primitive" : "INT32",
             "length" : 0,
             "decimalMeta" : null,
             "columnOrder" : {
               "columnOrderName" : "TYPE_DEFINED_ORDER"
             }
           },
           "encodings" : [ "RLE", "PLAIN_DICTIONARY", "BIT_PACKED" ]
         },
         "columnIndexReference" : {
           "offset" : 328009,
           "length" : 3919
         },
         "offsetIndexReference" : {
           "offset" : 398637,
           "length" : 3251
         },
         "bloomFilterOffset" : -1,
         "bloomFilterLength" : -1,
         "firstDataPage" : -2147443297,
         "dictionaryPageOffset" : -2147483648,
         "valueCount" : -2147476348,
         "totalSize" : -2147471254,
         "totalUncompressedSize" : -2147471254,
         "statistics" : {
           "type" : {
             "name" : "tinyint_col",
             "repetition" : "OPTIONAL",
             "logicalTypeAnnotation" : {
               "bitWidth" : 8,
               "isSigned" : true
             },
             "id" : null,
             "primitive" : "INT32",
             "length" : 0,
             "decimalMeta" : null,
             "columnOrder" : {
               "columnOrderName" : "TYPE_DEFINED_ORDER"
             }
           },
           "comparator" : { },
           "hasNonNullValue" : true,
           "num_nulls" : 0,
           "stringifier" : {
             "name" : "DEFAULT_STRINGIFIER",
             "digits" : "0123456789ABCDEF"
           },
           "max" : 9,
           "min" : 0
         },
         "sizeStatistics" : null
       }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to