MachaelLee commented on PR #4567:
URL: https://github.com/apache/arrow-rs/pull/4567#issuecomment-1650906034
>
The meta data as follow, we can see the last `ColumnChunkMetaData` in the
`RowGroupMetaData`.
```
object_meta:ObjectMeta { location: Path { raw: "15.sst" }, last_modified:
2023-07-24T09:16:27.232704798Z, size: 4402 },
parquet_meta:ParquetMetaData { file_metadata: FileMetaData { version: 1,
num_rows: 15, created_by: Some("parquet-rs version 43.0.0"),
key_value_metadata: None, schema_descr: SchemaDescriptor { schema: GroupType {
basic_info: BasicTypeInfo { name: "arrow_schema", repetition: None,
converted_type: NONE, logical_type: None, id: None }, fields: [PrimitiveType {
basic_info: BasicTypeInfo { name: "tsid", repetition: Some(REQUIRED),
converted_type: UINT_64, logical_type: Some(Integer { bit_width: 64, is_signed:
false }), id: None }, physical_type: INT64, type_length: -1, scale: -1,
precision: -1 }, PrimitiveType { basic_info: BasicTypeInfo { name: "timestamp",
repetition: Some(REQUIRED), converted_type: TIMESTAMP_MILLIS, logical_type:
Some(Timestamp { is_adjusted_to_u_t_c: false, unit: MILLIS(MilliSeconds) }),
id: None }, physical_type: INT64, type_length: -1, scale: -1, precision: -1 },
PrimitiveType { basic_info: BasicTypeInfo { name: "instance", repetition:
Some(OPTIONAL), converted_t
ype: UTF8, logical_type: Some(String), id: None }, physical_type: BYTE_ARRAY,
type_length: -1, scale: -1, precision: -1 }, PrimitiveType { basic_info:
BasicTypeInfo { name: "job", repetition: Some(OPTIONAL), converted_type: UTF8,
logical_type: Some(String), id: None }, physical_type: BYTE_ARRAY, type_length:
-1, scale: -1, precision: -1 }, PrimitiveType { basic_info: BasicTypeInfo {
name: "quantile", repetition: Some(OPTIONAL), converted_type: UTF8,
logical_type: Some(String), id: None }, physical_type: BYTE_ARRAY, type_length:
-1, scale: -1, precision: -1 }, PrimitiveType { basic_info: BasicTypeInfo {
name: "value", repetition: Some(OPTIONAL), converted_type: NONE, logical_type:
None, id: None }, physical_type: DOUBLE, type_length: -1, scale: -1, precision:
-1 }] } }, column_orders: None },
row_groups: [
RowGroupMetaData {
columns: [
ColumnChunkMetaData { column_type: INT64, column_path:
ColumnPath { parts: ["tsid"] }, column_descr: ColumnDescriptor {
primitive_type: PrimitiveType { basic_info: BasicTypeInfo { name: "tsid",
repetition: Some(REQUIRED), converted_type: UINT_64, logical_type: Some(Integer
{ bit_width: 64, is_signed: false }), id: None }, physical_type: INT64,
type_length: -1, scale: -1, precision: -1 }, max_def_level: 0, max_rep_level:
0, path: ColumnPath { parts: ["tsid"] } }, encodings: [PLAIN, RLE,
RLE_DICTIONARY], file_path: None, file_offset: 105, num_values: 15,
compression: ZSTD(ZstdLevel(1)), total_compressed_size: 101,
total_uncompressed_size: 83, data_page_offset: 51, index_page_offset: None,
dictionary_page_offset: Some(4), statistics: Some(Int64({min:
Some(2396824053319244218), max: Some(-6082033603500125970), distinct_count:
None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible:
false})), encoding_stats: None, bloom_filter_offset: None, offset_index
_offset: Some(1095), offset_index_length: Some(10), column_index_offset:
Some(933), column_index_length: Some(31) },
ColumnChunkMetaData { column_type: INT64, column_path:
ColumnPath { parts: ["timestamp"] }, column_descr: ColumnDescriptor {
primitive_type: PrimitiveType { basic_info: BasicTypeInfo { name: "timestamp",
repetition: Some(REQUIRED), converted_type: TIMESTAMP_MILLIS, logical_type:
Some(Timestamp { is_adjusted_to_u_t_c: false, unit: MILLIS(MilliSeconds) }),
id: None }, physical_type: INT64, type_length: -1, scale: -1, precision: -1 },
max_def_level: 0, max_rep_level: 0, path: ColumnPath { parts: ["timestamp"] }
}, encodings: [PLAIN, RLE, RLE_DICTIONARY], file_path: None, file_offset: 275,
num_values: 15, compression: ZSTD(ZstdLevel(1)), total_compressed_size: 119,
total_uncompressed_size: 101, data_page_offset: 219, index_page_offset: None,
dictionary_page_offset: Some(156), statistics: Some(Int64({min:
Some(1690019857962), max: Some(1690019977962), distinct_count: None,
null_count: 0, min_max_deprecated: false, min_max_backwards_compatible:
false})), encoding_stats: None
, bloom_filter_offset: None, offset_index_offset: Some(1105),
offset_index_length: Some(11), column_index_offset: Some(964),
column_index_length: Some(31) },
ColumnChunkMetaData { column_type: BYTE_ARRAY, column_path:
ColumnPath { parts: ["instance"] }, column_descr: ColumnDescriptor {
primitive_type: PrimitiveType { basic_info: BasicTypeInfo { name: "instance",
repetition: Some(OPTIONAL), converted_type: UTF8, logical_type: Some(String),
id: None }, physical_type: BYTE_ARRAY, type_length: -1, scale: -1, precision:
-1 }, max_def_level: 1, max_rep_level: 0, path: ColumnPath { parts:
["instance"] } }, encodings: [PLAIN, RLE, RLE_DICTIONARY], file_path: None,
file_offset: 460, num_values: 15, compression: ZSTD(ZstdLevel(1)),
total_compressed_size: 109, total_uncompressed_size: 91, data_page_offset: 392,
index_page_offset: None, dictionary_page_offset: Some(351), statistics:
Some(ByteArray({min: Some(ByteArray { data: "localhost:9090" }), max:
Some(ByteArray { data: "localhost:9090" }), distinct_count: None, null_count:
0, min_max_deprecated: false, min_max_backwards_compatible: false})),
encoding_stats: None, bloom_filter_offs
et: None, offset_index_offset: Some(1116), offset_index_length: Some(12),
column_index_offset: Some(995), column_index_length: Some(43) },
ColumnChunkMetaData { column_type: BYTE_ARRAY, column_path:
ColumnPath { parts: ["job"] }, column_descr: ColumnDescriptor { primitive_type:
PrimitiveType { basic_info: BasicTypeInfo { name: "job", repetition:
Some(OPTIONAL), converted_type: UTF8, logical_type: Some(String), id: None },
physical_type: BYTE_ARRAY, type_length: -1, scale: -1, precision: -1 },
max_def_level: 1, max_rep_level: 0, path: ColumnPath { parts: ["job"] } },
encodings: [PLAIN, RLE, RLE_DICTIONARY], file_path: None, file_offset: 624,
num_values: 15, compression: ZSTD(ZstdLevel(1)), total_compressed_size: 97,
total_uncompressed_size: 79, data_page_offset: 564, index_page_offset: None,
dictionary_page_offset: Some(527), statistics: Some(ByteArray({min:
Some(ByteArray { data: "prometheus" }), max: Some(ByteArray { data:
"prometheus" }), distinct_count: None, null_count: 0, min_max_deprecated:
false, min_max_backwards_compatible: false})), encoding_stats: None,
bloom_filter_offset: None, offset_index_o
ffset: Some(1128), offset_index_length: Some(11), column_index_offset:
Some(1038), column_index_length: Some(35) },
ColumnChunkMetaData { column_type: BYTE_ARRAY, column_path:
ColumnPath { parts: ["quantile"] }, column_descr: ColumnDescriptor {
primitive_type: PrimitiveType { basic_info: BasicTypeInfo { name: "quantile",
repetition: Some(OPTIONAL), converted_type: UTF8, logical_type: Some(String),
id: None }, physical_type: BYTE_ARRAY, type_length: -1, scale: -1, precision:
-1 }, max_def_level: 1, max_rep_level: 0, path: ColumnPath { parts:
["quantile"] } }, encodings: [PLAIN, RLE, RLE_DICTIONARY], file_path: None,
file_offset: 774, num_values: 15, compression: ZSTD(ZstdLevel(1)),
total_compressed_size: 96, total_uncompressed_size: 78, data_page_offset: 723,
index_page_offset: None, dictionary_page_offset: Some(678), statistics:
Some(ByteArray({min: Some(ByteArray { data: "0.5" }), max: Some(ByteArray {
data: "0.99" }), distinct_count: None, null_count: 0, min_max_deprecated:
false, min_max_backwards_compatible: false})), encoding_stats: None,
bloom_filter_offset: None, offset_index
_offset: Some(1139), offset_index_length: Some(11), column_index_offset:
Some(1073), column_index_length: Some(22) },
ColumnChunkMetaData { column_type: DOUBLE, column_path:
ColumnPath { parts: ["value"] }, column_descr: ColumnDescriptor {
primitive_type: PrimitiveType { basic_info: BasicTypeInfo { name: "value",
repetition: Some(OPTIONAL), converted_type: NONE, logical_type: None, id: None
}, physical_type: DOUBLE, type_length: -1, scale: -1, precision: -1 },
max_def_level: 1, max_rep_level: 0, path: ColumnPath { parts: ["value"] } },
encodings: [PLAIN, RLE, RLE_DICTIONARY], file_path: None, file_offset: 901,
num_values: 15, compression: ZSTD(ZstdLevel(1)), total_compressed_size: 81,
total_uncompressed_size: 168, data_page_offset: 859, index_page_offset: None,
dictionary_page_offset: Some(820), statistics: Some(Double({min: None, max:
None, distinct_count: None, null_count: 0, min_max_deprecated: true,
min_max_backwards_compatible: true})), encoding_stats: None,
bloom_filter_offset: None, offset_index_offset: None, offset_index_length:
None, column_index_offset: None, column_index_le
ngth: None }]
, num_rows: 15, sorting_columns: None, total_byte_size: 600, schema_descr:
SchemaDescriptor { schema: GroupType { basic_info: BasicTypeInfo { name:
"arrow_schema", repetition: None, converted_type: NONE, logical_type: None, id:
None }, fields: [PrimitiveType { basic_info: BasicTypeInfo { name: "tsid",
repetition: Some(REQUIRED), converted_type: UINT_64, logical_type: Some(Integer
{ bit_width: 64, is_signed: false }), id: None }, physical_type: INT64,
type_length: -1, scale: -1, precision: -1 }, PrimitiveType { basic_info:
BasicTypeInfo { name: "timestamp", repetition: Some(REQUIRED), converted_type:
TIMESTAMP_MILLIS, logical_type: Some(Timestamp { is_adjusted_to_u_t_c: false,
unit: MILLIS(MilliSeconds) }), id: None }, physical_type: INT64, type_length:
-1, scale: -1, precision: -1 }, PrimitiveType { basic_info: BasicTypeInfo {
name: "instance", repetition: Some(OPTIONAL), converted_type: UTF8,
logical_type: Some(String), id: None }, physical_type: BYTE_ARRAY, type_length:
-1, sca
le: -1, precision: -1 }, PrimitiveType { basic_info: BasicTypeInfo { name:
"job", repetition: Some(OPTIONAL), converted_type: UTF8, logical_type:
Some(String), id: None }, physical_type: BYTE_ARRAY, type_length: -1, scale:
-1, precision: -1 }, PrimitiveType { basic_info: BasicTypeInfo { name:
"quantile", repetition: Some(OPTIONAL), converted_type: UTF8, logical_type:
Some(String), id: None }, physical_type: BYTE_ARRAY, type_length: -1, scale:
-1, precision: -1 }, PrimitiveType { basic_info: BasicTypeInfo { name: "value",
repetition: Some(OPTIONAL), converted_type: NONE, logical_type: None, id: None
}, physical_type: DOUBLE, type_length: -1, scale: -1, precision: -1 }] } },
page_offset_index: None }], page_indexes: None, offset_indexes: None },
custom_meta:ParquetMetaData { min_key:
"0421433ac70f3499ba03800001897d08862afffffffffffff3a7fffffffe", max_key:
"04ab9846a95f62d0ee03800001897d0a5aeafffffffffffff3a3fffffffd", time_range:
TimeRange { inclusive_start: Timestamp(1690012800000),
exclusive_end: Timestamp(1690020000000) }, max_sequence: 3164, schema: Schema
{ timestamp_index: 1, tsid_index: Some(0), column_schemas: ColumnSchemas {
columns: [ColumnSchema { id: 1, name: "tsid", data_type: UInt64, is_nullable:
false, is_tag: false, comment: "", escaped_name: "tsid", default_value: None },
ColumnSchema { id: 2, name: "timestamp", data_type: Timestamp, is_nullable:
false, is_tag: false, comment: "", escaped_name: "timestamp", default_value:
None }, ColumnSchema { id: 3, name: "instance", data_type: String, is_nullable:
true, is_tag: true, comment: "", escaped_name: "instance", default_value: None
}, ColumnSchema { id: 4, name: "job", data_type: String, is_nullable: true,
is_tag: true, comment: "", escaped_name: "job", default_value: None },
ColumnSchema { id: 5, name: "quantile", data_type: String, is_nullable: true,
is_tag: true, comment: "", escaped_name: "quantile", default_value: None },
ColumnSchema { id: 6, name: "value", data_type: Double, is_nullable: true
, is_tag: false, comment: "", escaped_name: "value", default_value: None }] },
version: 1, primary_key_indexes: [0, 1] }, filter_size: 0,
collapsible_cols_idx: [] }
```
```
ColumnChunkMetaData { column_type: DOUBLE, column_path:
ColumnPath { parts: ["value"] }, column_descr: ColumnDescriptor {
primitive_type: PrimitiveType { basic_info: BasicTypeInfo { name: "value",
repetition: Some(OPTIONAL), converted_type: NONE, logical_type: None, id: None
}, physical_type: DOUBLE, type_length: -1, scale: -1, precision: -1 },
max_def_level: 1, max_rep_level: 0, path: ColumnPath { parts: ["value"] } },
encodings: [PLAIN, RLE, RLE_DICTIONARY], file_path: None, file_offset: 901,
num_values: 15, compression: ZSTD(ZstdLevel(1)), total_compressed_size: 81,
total_uncompressed_size: 168, data_page_offset: 859, index_page_offset: None,
dictionary_page_offset: Some(820), statistics: Some(Double({min: None, max:
None, distinct_count: None, null_count: 0, min_max_deprecated: true,
min_max_backwards_compatible: true})), encoding_stats: None,
bloom_filter_offset: None, offset_index_offset: None, offset_index_length:
None, column_index_offset: None, column_index_le
ngth: None }]
```
In `statistics`, `min` and `max` are both `None`, leading the
`offset_index_offset: None, offset_index_length: None, column_index_offset:
None, column_index_length: None`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]