wgtmac commented on code in PR #144: URL: https://github.com/apache/parquet-site/pull/144#discussion_r2633460853
########## data/implementations/engines.yaml: ########## @@ -4,52 +4,195 @@ language: C++ repo_url: https://github.com/apache/arrow/tree/main/cpp/src/parquet docs_url: https://arrow.apache.org/docs/cpp/parquet.html - versions: {} + versions: + default: "15.0.0" + releases: + "15.0.0": + release_date: "2024-01-21" + release_notes_url: "https://arrow.apache.org/release/15.0.0.html" + "16.0.0": + release_date: "2024-04-20" + release_notes_url: "https://arrow.apache.org/release/16.0.0.html" + "17.0.0": + release_date: "2024-07-16" + release_notes_url: "https://arrow.apache.org/release/17.0.0.html" + "18.0.0": + release_date: "2024-10-28" + release_notes_url: "https://arrow.apache.org/release/18.0.0.html" + "19.0.0": + release_date: "2025-01-16" + release_notes_url: "https://arrow.apache.org/release/19.0.0.html" + "20.0.0": + release_date: "2025-04-27" + release_notes_url: "https://arrow.apache.org/release/20.0.0.html" + "21.0.0": + release_date: "2025-07-17" + release_notes_url: "https://arrow.apache.org/release/21.0.0.html" + "22.0.0": + release_date: "2025-10-24" + release_notes_url: "https://arrow.apache.org/release/22.0.0.html" - id: parquet-java name: parquet-java display_name: Parquet Java language: Java repo_url: https://github.com/apache/parquet-java docs_url: null - versions: {} + versions: + default: "1.16.0" + releases: + "1.15.0": + release_date: "2024-12-02" + release_notes_url: "https://github.com/apache/parquet-java/releases/tag/apache-parquet-1.15.0" + "1.16.0": + release_date: "2025-09-03" + release_notes_url: "https://github.com/apache/parquet-java/releases/tag/apache-parquet-1.16.0" - id: arrow-go name: arrow-go display_name: Apache Arrow Go language: Go repo_url: https://github.com/apache/arrow-go/tree/main/parquet docs_url: null - versions: {} + versions: + default: "22.0.0" + releases: + "15.0.0": + release_date: "2024-01-21" + release_notes_url: "https://arrow.apache.org/release/15.0.0.html" + "16.0.0": + release_date: "2024-04-20" + release_notes_url: "https://arrow.apache.org/release/16.0.0.html" + "17.0.0": + release_date: "2024-07-16" + release_notes_url: "https://arrow.apache.org/release/17.0.0.html" + "18.0.0": + release_date: "2024-10-28" + release_notes_url: "https://arrow.apache.org/release/18.0.0.html" + "18.3.0": + release_date: "2024-05-09" + release_notes_url: "https://github.com/apache/arrow-go/releases/tag/v18.3.0" + "18.4.0": + release_date: "2024-07-21" + release_notes_url: "https://github.com/apache/arrow-go/releases/tag/v18.4.0" + "19.0.0": + release_date: "2025-01-16" + release_notes_url: "https://github.com/apache/arrow-go/releases/tag/v19.0.0" + "20.0.0": + release_date: "2025-04-27" + release_notes_url: "https://github.com/apache/arrow-go/releases/tag/v20.0.0" + "21.0.0": + release_date: "2025-07-17" + release_notes_url: "https://github.com/apache/arrow-go/releases/tag/v21.0.0" + "22.0.0": + release_date: "2025-10-24" + release_notes_url: "https://github.com/apache/arrow-go/releases/tag/v22.0.0" - id: arrow-rs name: arrow-rs display_name: Apache Arrow Rust language: Rust repo_url: https://github.com/apache/arrow-rs/blob/main/parquet/README.md docs_url: null - versions: {} + versions: + default: "15.0.0" Review Comment: Same question here ########## layouts/shortcodes/implementation-status.html: ########## @@ -125,11 +240,206 @@ <h3 id="{{ $categoryId }}"><a href="#{{ $categoryId }}">{{ $category.name }}</a> </tbody> </table> {{- /* Render footnotes if any */ -}} - {{- if gt (len $noteMap) 0 -}} + {{- if gt (len $noteMap) 0 -}} <b>Notes:</b> {{- range $noteRowIndex, $noteRow := $noteMap }} <div id="note-{{ index $noteRow 2}}-{{index $noteRow 0 }}">({{ add $noteRowIndex 1}}) {{ index $noteRow 1 }}</div> {{- end -}} {{- end -}} {{- end -}} {{- end -}} + +{{- /* Render minimum version table by year */ -}} +<h3 id="read-support-by-year"><a href="#read-support-by-year">Minimum Version for Read Support by Year</a></h3> +<p>This table shows the minimum engine version required to read Parquet files using features introduced in each year. Only includes compression, encodings, physical types, and logical types. Features without a specified format version are assumed to have been added prior to 2023.</p> + +<p><b>Note:</b> This data was originally collected in December 2025, and not all data was backfilled. It is likely older releases of each engine support reading all features for 2023 and before. As volunteers have time they are invited to add more granular details on releases. Generally, versions are expected to be accurate for any year 2025 and after.</p> + +{{- /* Collect all features with format_version and group by year */ -}} +{{- $featuresByYear := dict "2023" (slice) "2024" (slice) "2025" (slice) -}} +{{- $excludedFeatures := slice "encoding-bit-packed" "compression-lz4-deprecated" "compression-lzo" "logical-enum" "logical-uuid" "logical-bson" "logical-json" "logical-interval" -}} +{{- $includedCategories := slice "compressions" "encodings" "physical-types" "logical-types" -}} + +{{- range $categories -}} + {{- $categoryId := .id -}} + {{- /* Only include specific categories */ -}} + {{- if in $includedCategories $categoryId -}} + {{- $featuresData := index site.Data.implementations.features $categoryId -}} + {{- if $featuresData -}} + {{- range $featuresData.features -}} + {{- $feature := . -}} + {{- /* Skip excluded features */ -}} + {{- if not (in $excludedFeatures .id) -}} + {{- $year := "" -}} + {{- if .format_version -}} + {{- $dateStr := .format_version.date -}} + {{- if hasPrefix $dateStr "2025" -}} + {{- $year = "2025" -}} + {{- else if hasPrefix $dateStr "2024" -}} + {{- $year = "2024" -}} + {{- else -}} + {{- $year = "2023" -}} + {{- end -}} + {{- else -}} + {{- /* Features without format_version are assumed pre-2023 */ -}} + {{- $year = "2023" -}} + {{- end -}} + + {{- $currentList := index $featuresByYear $year -}} + {{- $featuresByYear = merge $featuresByYear (dict $year ($currentList | append $feature)) -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} +{{- end -}} + +{{- /* Build list of excluded feature names for display */ -}} +{{- $excludedFeatureNames := slice -}} +{{- range $categories -}} + {{- $categoryId := .id -}} + {{- if in $includedCategories $categoryId -}} + {{- $featuresData := index site.Data.implementations.features $categoryId -}} + {{- if $featuresData -}} + {{- range $featuresData.features -}} + {{- if in $excludedFeatures .id -}} + {{- $excludedFeatureNames = $excludedFeatureNames | append .display_name -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} +{{- end -}} + +{{- if gt (len $excludedFeatureNames) 0 -}} +<p><strong>Note:</strong> The following features are excluded from this table: {{ delimit $excludedFeatureNames ", " }}.</p> +{{- end -}} + +{{- /* Calculate minimum version for each engine and year */ -}} +<table class="table table-striped"> + <thead> + <tr> + <th>Engine</th> + <th>≤2023 Features</th> + <th>2024 Features</th> + <th>2025 Features</th> + </tr> + </thead> + <tbody> + {{- range $engines -}} + {{- $engine := . -}} Review Comment: I found the rendered result for arrow-go is confusing but I find it hard to interpret this logic. <img width="914" height="118" alt="image" src="https://github.com/user-attachments/assets/23c9d32d-d5b6-4461-ae5c-58b4ba6a27d2" /> ########## content/en/docs/File Format/implementationstatus.md: ########## @@ -12,7 +12,7 @@ of this page by opening an issue or submitting a pull request. ### Legend The value in each box means: -* ✅: supported. Footnote added when support is partial. +* ✅: supported. Footnote added when support is partial. When data is available links to release notes are provided on the implementing version. Review Comment: ```suggestion * ✅: supported. Footnote added when support is partial. When data is available, links to release notes are provided on the implementing version. ``` ########## data/implementations/engines.yaml: ########## @@ -4,52 +4,195 @@ language: C++ repo_url: https://github.com/apache/arrow/tree/main/cpp/src/parquet docs_url: https://arrow.apache.org/docs/cpp/parquet.html - versions: {} + versions: + default: "15.0.0" Review Comment: Why default is 15.0.0? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
