wgtmac commented on issue #66:
URL: https://github.com/apache/parquet-testing/issues/66#issuecomment-2570208546
I think alltypes_tiny_pages.parquet is what you want.
```
File path: alltypes_tiny_pages.parquet
Created by: parquet-mr version 1.12.0-SNAPSHOT (build
6901a2040848c6b37fa61f4b0a76246445f396db)
Properties:
writer.model.name: 2.1.1-cdh6.x-SNAPSHOT
Schema:
message hive_schema {
optional int32 id;
optional boolean bool_col;
optional int32 tinyint_col (INTEGER(8,true));
optional int32 smallint_col (INTEGER(16,true));
optional int32 int_col;
optional int64 bigint_col;
optional float float_col;
optional double double_col;
optional binary date_string_col (STRING);
optional binary string_col (STRING);
optional int96 timestamp_col;
optional int32 year;
optional int32 month;
}
Row group 0: count: 7300 44.33 B records start: 4 total(compressed):
315.995 kB total(uncompressed):315.995 kB
--------------------------------------------------------------------------------
type encodings count avg size nulls min / max
id INT32 _ _ 7300 5.11 B 0 "0" /
"7299"
bool_col BOOLEAN _ _ 7300 0.41 B 0 "false" /
"true"
tinyint_col INT32 _ _ R 7300 1.70 B 0 "0" / "9"
smallint_col INT32 _ _ R 7300 1.70 B 0 "0" / "9"
int_col INT32 _ _ R 7300 1.70 B 0 "0" / "9"
bigint_col INT64 _ _ R 7300 2.40 B 0 "0" / "90"
float_col FLOAT _ _ R 7300 1.70 B 0 "-0.0" /
"9.9"
double_col DOUBLE _ _ R 7300 2.40 B 0 "-0.0" /
"90.89999999999999"
date_string_col BINARY _ _ R 7300 5.77 B 0 "01/01/09"
/ "12/31/10"
string_col BINARY _ _ R 7300 1.79 B 0 "0" / "9"
timestamp_col INT96 _ _ R 7300 17.33 B 0
year INT32 _ _ R 7300 1.14 B 0 "2009" /
"2010"
month INT32 _ _ R 7300 1.18 B 0 "1" / "12"
```
```
{
"rowGroupOrdinal" : 0,
"encodingStats" : {
"dictStats" : {
"PLAIN_DICTIONARY" : 1
},
"dataStats" : {
"PLAIN_DICTIONARY" : 325
},
"usesV2Pages" : false
},
"properties" : {
"codec" : "UNCOMPRESSED",
"path" : {
"p" : [ "tinyint_col" ]
},
"type" : {
"name" : "tinyint_col",
"repetition" : "OPTIONAL",
"logicalTypeAnnotation" : {
"bitWidth" : 8,
"isSigned" : true
},
"id" : null,
"primitive" : "INT32",
"length" : 0,
"decimalMeta" : null,
"columnOrder" : {
"columnOrderName" : "TYPE_DEFINED_ORDER"
}
},
"encodings" : [ "RLE", "PLAIN_DICTIONARY", "BIT_PACKED" ]
},
"columnIndexReference" : {
"offset" : 328009,
"length" : 3919
},
"offsetIndexReference" : {
"offset" : 398637,
"length" : 3251
},
"bloomFilterOffset" : -1,
"bloomFilterLength" : -1,
"firstDataPage" : -2147443297,
"dictionaryPageOffset" : -2147483648,
"valueCount" : -2147476348,
"totalSize" : -2147471254,
"totalUncompressedSize" : -2147471254,
"statistics" : {
"type" : {
"name" : "tinyint_col",
"repetition" : "OPTIONAL",
"logicalTypeAnnotation" : {
"bitWidth" : 8,
"isSigned" : true
},
"id" : null,
"primitive" : "INT32",
"length" : 0,
"decimalMeta" : null,
"columnOrder" : {
"columnOrderName" : "TYPE_DEFINED_ORDER"
}
},
"comparator" : { },
"hasNonNullValue" : true,
"num_nulls" : 0,
"stringifier" : {
"name" : "DEFAULT_STRINGIFIER",
"digits" : "0123456789ABCDEF"
},
"max" : 9,
"min" : 0
},
"sizeStatistics" : null
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]