MachaelLee commented on PR #4567:
URL: https://github.com/apache/arrow-rs/pull/4567#issuecomment-1650906034

   > 
   The meta data as follow, we can see the last `ColumnChunkMetaData`  in the 
`RowGroupMetaData`. 
   ```
   object_meta:ObjectMeta { location: Path { raw: "15.sst" }, last_modified: 
2023-07-24T09:16:27.232704798Z, size: 4402 }, 
   parquet_meta:ParquetMetaData { file_metadata: FileMetaData { version: 1, 
num_rows: 15, created_by: Some("parquet-rs version 43.0.0"), 
key_value_metadata: None, schema_descr: SchemaDescriptor { schema: GroupType { 
basic_info: BasicTypeInfo { name: "arrow_schema", repetition: None, 
converted_type: NONE, logical_type: None, id: None }, fields: [PrimitiveType { 
basic_info: BasicTypeInfo { name: "tsid", repetition: Some(REQUIRED), 
converted_type: UINT_64, logical_type: Some(Integer { bit_width: 64, is_signed: 
false }), id: None }, physical_type: INT64, type_length: -1, scale: -1, 
precision: -1 }, PrimitiveType { basic_info: BasicTypeInfo { name: "timestamp", 
repetition: Some(REQUIRED), converted_type: TIMESTAMP_MILLIS, logical_type: 
Some(Timestamp { is_adjusted_to_u_t_c: false, unit: MILLIS(MilliSeconds) }), 
id: None }, physical_type: INT64, type_length: -1, scale: -1, precision: -1 }, 
PrimitiveType { basic_info: BasicTypeInfo { name: "instance", repetition: 
Some(OPTIONAL), converted_t
 ype: UTF8, logical_type: Some(String), id: None }, physical_type: BYTE_ARRAY, 
type_length: -1, scale: -1, precision: -1 }, PrimitiveType { basic_info: 
BasicTypeInfo { name: "job", repetition: Some(OPTIONAL), converted_type: UTF8, 
logical_type: Some(String), id: None }, physical_type: BYTE_ARRAY, type_length: 
-1, scale: -1, precision: -1 }, PrimitiveType { basic_info: BasicTypeInfo { 
name: "quantile", repetition: Some(OPTIONAL), converted_type: UTF8, 
logical_type: Some(String), id: None }, physical_type: BYTE_ARRAY, type_length: 
-1, scale: -1, precision: -1 }, PrimitiveType { basic_info: BasicTypeInfo { 
name: "value", repetition: Some(OPTIONAL), converted_type: NONE, logical_type: 
None, id: None }, physical_type: DOUBLE, type_length: -1, scale: -1, precision: 
-1 }] } }, column_orders: None }, 
   row_groups: [
   
       RowGroupMetaData { 
           columns: [
               ColumnChunkMetaData { column_type: INT64, column_path: 
ColumnPath { parts: ["tsid"] }, column_descr: ColumnDescriptor { 
primitive_type: PrimitiveType { basic_info: BasicTypeInfo { name: "tsid", 
repetition: Some(REQUIRED), converted_type: UINT_64, logical_type: Some(Integer 
{ bit_width: 64, is_signed: false }), id: None }, physical_type: INT64, 
type_length: -1, scale: -1, precision: -1 }, max_def_level: 0, max_rep_level: 
0, path: ColumnPath { parts: ["tsid"] } }, encodings: [PLAIN, RLE, 
RLE_DICTIONARY], file_path: None, file_offset: 105, num_values: 15, 
compression: ZSTD(ZstdLevel(1)), total_compressed_size: 101, 
total_uncompressed_size: 83, data_page_offset: 51, index_page_offset: None, 
dictionary_page_offset: Some(4), statistics: Some(Int64({min: 
Some(2396824053319244218), max: Some(-6082033603500125970), distinct_count: 
None, null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: 
false})), encoding_stats: None, bloom_filter_offset: None, offset_index
 _offset: Some(1095), offset_index_length: Some(10), column_index_offset: 
Some(933), column_index_length: Some(31) }, 
               ColumnChunkMetaData { column_type: INT64, column_path: 
ColumnPath { parts: ["timestamp"] }, column_descr: ColumnDescriptor { 
primitive_type: PrimitiveType { basic_info: BasicTypeInfo { name: "timestamp", 
repetition: Some(REQUIRED), converted_type: TIMESTAMP_MILLIS, logical_type: 
Some(Timestamp { is_adjusted_to_u_t_c: false, unit: MILLIS(MilliSeconds) }), 
id: None }, physical_type: INT64, type_length: -1, scale: -1, precision: -1 }, 
max_def_level: 0, max_rep_level: 0, path: ColumnPath { parts: ["timestamp"] } 
}, encodings: [PLAIN, RLE, RLE_DICTIONARY], file_path: None, file_offset: 275, 
num_values: 15, compression: ZSTD(ZstdLevel(1)), total_compressed_size: 119, 
total_uncompressed_size: 101, data_page_offset: 219, index_page_offset: None, 
dictionary_page_offset: Some(156), statistics: Some(Int64({min: 
Some(1690019857962), max: Some(1690019977962), distinct_count: None, 
null_count: 0, min_max_deprecated: false, min_max_backwards_compatible: 
false})), encoding_stats: None
 , bloom_filter_offset: None, offset_index_offset: Some(1105), 
offset_index_length: Some(11), column_index_offset: Some(964), 
column_index_length: Some(31) }, 
               ColumnChunkMetaData { column_type: BYTE_ARRAY, column_path: 
ColumnPath { parts: ["instance"] }, column_descr: ColumnDescriptor { 
primitive_type: PrimitiveType { basic_info: BasicTypeInfo { name: "instance", 
repetition: Some(OPTIONAL), converted_type: UTF8, logical_type: Some(String), 
id: None }, physical_type: BYTE_ARRAY, type_length: -1, scale: -1, precision: 
-1 }, max_def_level: 1, max_rep_level: 0, path: ColumnPath { parts: 
["instance"] } }, encodings: [PLAIN, RLE, RLE_DICTIONARY], file_path: None, 
file_offset: 460, num_values: 15, compression: ZSTD(ZstdLevel(1)), 
total_compressed_size: 109, total_uncompressed_size: 91, data_page_offset: 392, 
index_page_offset: None, dictionary_page_offset: Some(351), statistics: 
Some(ByteArray({min: Some(ByteArray { data: "localhost:9090" }), max: 
Some(ByteArray { data: "localhost:9090" }), distinct_count: None, null_count: 
0, min_max_deprecated: false, min_max_backwards_compatible: false})), 
encoding_stats: None, bloom_filter_offs
 et: None, offset_index_offset: Some(1116), offset_index_length: Some(12), 
column_index_offset: Some(995), column_index_length: Some(43) }, 
               ColumnChunkMetaData { column_type: BYTE_ARRAY, column_path: 
ColumnPath { parts: ["job"] }, column_descr: ColumnDescriptor { primitive_type: 
PrimitiveType { basic_info: BasicTypeInfo { name: "job", repetition: 
Some(OPTIONAL), converted_type: UTF8, logical_type: Some(String), id: None }, 
physical_type: BYTE_ARRAY, type_length: -1, scale: -1, precision: -1 }, 
max_def_level: 1, max_rep_level: 0, path: ColumnPath { parts: ["job"] } }, 
encodings: [PLAIN, RLE, RLE_DICTIONARY], file_path: None, file_offset: 624, 
num_values: 15, compression: ZSTD(ZstdLevel(1)), total_compressed_size: 97, 
total_uncompressed_size: 79, data_page_offset: 564, index_page_offset: None, 
dictionary_page_offset: Some(527), statistics: Some(ByteArray({min: 
Some(ByteArray { data: "prometheus" }), max: Some(ByteArray { data: 
"prometheus" }), distinct_count: None, null_count: 0, min_max_deprecated: 
false, min_max_backwards_compatible: false})), encoding_stats: None, 
bloom_filter_offset: None, offset_index_o
 ffset: Some(1128), offset_index_length: Some(11), column_index_offset: 
Some(1038), column_index_length: Some(35) }, 
               ColumnChunkMetaData { column_type: BYTE_ARRAY, column_path: 
ColumnPath { parts: ["quantile"] }, column_descr: ColumnDescriptor { 
primitive_type: PrimitiveType { basic_info: BasicTypeInfo { name: "quantile", 
repetition: Some(OPTIONAL), converted_type: UTF8, logical_type: Some(String), 
id: None }, physical_type: BYTE_ARRAY, type_length: -1, scale: -1, precision: 
-1 }, max_def_level: 1, max_rep_level: 0, path: ColumnPath { parts: 
["quantile"] } }, encodings: [PLAIN, RLE, RLE_DICTIONARY], file_path: None, 
file_offset: 774, num_values: 15, compression: ZSTD(ZstdLevel(1)), 
total_compressed_size: 96, total_uncompressed_size: 78, data_page_offset: 723, 
index_page_offset: None, dictionary_page_offset: Some(678), statistics: 
Some(ByteArray({min: Some(ByteArray { data: "0.5" }), max: Some(ByteArray { 
data: "0.99" }), distinct_count: None, null_count: 0, min_max_deprecated: 
false, min_max_backwards_compatible: false})), encoding_stats: None, 
bloom_filter_offset: None, offset_index
 _offset: Some(1139), offset_index_length: Some(11), column_index_offset: 
Some(1073), column_index_length: Some(22) }, 
               ColumnChunkMetaData { column_type: DOUBLE, column_path: 
ColumnPath { parts: ["value"] }, column_descr: ColumnDescriptor { 
primitive_type: PrimitiveType { basic_info: BasicTypeInfo { name: "value", 
repetition: Some(OPTIONAL), converted_type: NONE, logical_type: None, id: None 
}, physical_type: DOUBLE, type_length: -1, scale: -1, precision: -1 }, 
max_def_level: 1, max_rep_level: 0, path: ColumnPath { parts: ["value"] } }, 
encodings: [PLAIN, RLE, RLE_DICTIONARY], file_path: None, file_offset: 901, 
num_values: 15, compression: ZSTD(ZstdLevel(1)), total_compressed_size: 81, 
total_uncompressed_size: 168, data_page_offset: 859, index_page_offset: None, 
dictionary_page_offset: Some(820), statistics: Some(Double({min: None, max: 
None, distinct_count: None, null_count: 0, min_max_deprecated: true, 
min_max_backwards_compatible: true})), encoding_stats: None, 
bloom_filter_offset: None, offset_index_offset: None, offset_index_length: 
None, column_index_offset: None, column_index_le
 ngth: None }]
    , num_rows: 15, sorting_columns: None, total_byte_size: 600, schema_descr: 
SchemaDescriptor { schema: GroupType { basic_info: BasicTypeInfo { name: 
"arrow_schema", repetition: None, converted_type: NONE, logical_type: None, id: 
None }, fields: [PrimitiveType { basic_info: BasicTypeInfo { name: "tsid", 
repetition: Some(REQUIRED), converted_type: UINT_64, logical_type: Some(Integer 
{ bit_width: 64, is_signed: false }), id: None }, physical_type: INT64, 
type_length: -1, scale: -1, precision: -1 }, PrimitiveType { basic_info: 
BasicTypeInfo { name: "timestamp", repetition: Some(REQUIRED), converted_type: 
TIMESTAMP_MILLIS, logical_type: Some(Timestamp { is_adjusted_to_u_t_c: false, 
unit: MILLIS(MilliSeconds) }), id: None }, physical_type: INT64, type_length: 
-1, scale: -1, precision: -1 }, PrimitiveType { basic_info: BasicTypeInfo { 
name: "instance", repetition: Some(OPTIONAL), converted_type: UTF8, 
logical_type: Some(String), id: None }, physical_type: BYTE_ARRAY, type_length: 
-1, sca
 le: -1, precision: -1 }, PrimitiveType { basic_info: BasicTypeInfo { name: 
"job", repetition: Some(OPTIONAL), converted_type: UTF8, logical_type: 
Some(String), id: None }, physical_type: BYTE_ARRAY, type_length: -1, scale: 
-1, precision: -1 }, PrimitiveType { basic_info: BasicTypeInfo { name: 
"quantile", repetition: Some(OPTIONAL), converted_type: UTF8, logical_type: 
Some(String), id: None }, physical_type: BYTE_ARRAY, type_length: -1, scale: 
-1, precision: -1 }, PrimitiveType { basic_info: BasicTypeInfo { name: "value", 
repetition: Some(OPTIONAL), converted_type: NONE, logical_type: None, id: None 
}, physical_type: DOUBLE, type_length: -1, scale: -1, precision: -1 }] } }, 
page_offset_index: None }], page_indexes: None, offset_indexes: None }, 
custom_meta:ParquetMetaData { min_key: 
"0421433ac70f3499ba03800001897d08862afffffffffffff3a7fffffffe", max_key: 
"04ab9846a95f62d0ee03800001897d0a5aeafffffffffffff3a3fffffffd", time_range: 
TimeRange { inclusive_start: Timestamp(1690012800000), 
 exclusive_end: Timestamp(1690020000000) }, max_sequence: 3164, schema: Schema 
{ timestamp_index: 1, tsid_index: Some(0), column_schemas: ColumnSchemas { 
columns: [ColumnSchema { id: 1, name: "tsid", data_type: UInt64, is_nullable: 
false, is_tag: false, comment: "", escaped_name: "tsid", default_value: None }, 
ColumnSchema { id: 2, name: "timestamp", data_type: Timestamp, is_nullable: 
false, is_tag: false, comment: "", escaped_name: "timestamp", default_value: 
None }, ColumnSchema { id: 3, name: "instance", data_type: String, is_nullable: 
true, is_tag: true, comment: "", escaped_name: "instance", default_value: None 
}, ColumnSchema { id: 4, name: "job", data_type: String, is_nullable: true, 
is_tag: true, comment: "", escaped_name: "job", default_value: None }, 
ColumnSchema { id: 5, name: "quantile", data_type: String, is_nullable: true, 
is_tag: true, comment: "", escaped_name: "quantile", default_value: None }, 
ColumnSchema { id: 6, name: "value", data_type: Double, is_nullable: true
 , is_tag: false, comment: "", escaped_name: "value", default_value: None }] }, 
version: 1, primary_key_indexes: [0, 1] }, filter_size: 0, 
collapsible_cols_idx: [] }
   ```
   
   ```
               ColumnChunkMetaData { column_type: DOUBLE, column_path: 
ColumnPath { parts: ["value"] }, column_descr: ColumnDescriptor { 
primitive_type: PrimitiveType { basic_info: BasicTypeInfo { name: "value", 
repetition: Some(OPTIONAL), converted_type: NONE, logical_type: None, id: None 
}, physical_type: DOUBLE, type_length: -1, scale: -1, precision: -1 }, 
max_def_level: 1, max_rep_level: 0, path: ColumnPath { parts: ["value"] } }, 
encodings: [PLAIN, RLE, RLE_DICTIONARY], file_path: None, file_offset: 901, 
num_values: 15, compression: ZSTD(ZstdLevel(1)), total_compressed_size: 81, 
total_uncompressed_size: 168, data_page_offset: 859, index_page_offset: None, 
dictionary_page_offset: Some(820), statistics: Some(Double({min: None, max: 
None, distinct_count: None, null_count: 0, min_max_deprecated: true, 
min_max_backwards_compatible: true})), encoding_stats: None, 
bloom_filter_offset: None, offset_index_offset: None, offset_index_length: 
None, column_index_offset: None, column_index_le
 ngth: None }]
   ```
   In `statistics`, `min` and `max` are both `None`, leading the 
`offset_index_offset: None, offset_index_length: None, column_index_offset: 
None, column_index_length: None`


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to