martin-g commented on code in PR #19367: URL: https://github.com/apache/datafusion/pull/19367#discussion_r2626538820
########## datafusion/macros/src/metric_doc.rs: ########## @@ -0,0 +1,243 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Metrics documentation macros and helpers. + +use proc_macro::TokenStream; +use quote::{format_ident, quote}; +use syn::{ + Attribute, Data, DeriveInput, Expr, ExprLit, Lit, Path, Token, parse::Parser, + spanned::Spanned, +}; + +pub fn metric_doc(args: TokenStream, input: TokenStream) -> TokenStream { + let args_ts = proc_macro2::TokenStream::from(args.clone()); + let parsed: syn::punctuated::Punctuated<syn::Meta, Token![,]> = + syn::punctuated::Punctuated::parse_terminated + .parse2(args_ts.clone()) + .unwrap_or_default(); + + let is_metrics_struct = parsed.is_empty() || parsed.iter().all(is_position_arg); + + let derive_attr = if is_metrics_struct { + quote!(#[derive(datafusion_macros::DocumentedMetrics)]) + } else { + quote!(#[derive(datafusion_macros::DocumentedExec)]) + }; + + let meta_attr = quote!(#[metric_doc_attr(#args_ts)]); + + let input_ts = proc_macro2::TokenStream::from(input); + quote!(#derive_attr #meta_attr #input_ts).into() +} + +fn is_position_arg(meta: &syn::Meta) -> bool { + matches!(meta, syn::Meta::Path(p) if p.is_ident("common")) +} + +pub fn derive_documented_metrics(input: TokenStream) -> TokenStream { + match documented_metrics_impl(input) { + Ok(tokens) => tokens, + Err(err) => err.to_compile_error().into(), + } +} + +pub fn derive_documented_exec(input: TokenStream) -> TokenStream { + match documented_exec_impl(input) { + Ok(tokens) => tokens, + Err(err) => err.to_compile_error().into(), + } +} + +fn documented_metrics_impl(input: TokenStream) -> syn::Result<TokenStream> { + let input = syn::parse::<DeriveInput>(input)?; + let ident = input.ident; + let doc = doc_expr(&input.attrs); + let position = metrics_position(&input.attrs)?; + + let Data::Struct(data) = input.data else { + return Err(syn::Error::new( + ident.span(), + "DocumentedMetrics can only be derived for structs", + )); + }; + + let mut fields_docs = vec![]; + + for field in data.fields { + let Some(field_ident) = field.ident else { + return Err(syn::Error::new( + field.span(), + "DocumentedMetrics does not support tuple structs", + )); + }; + + let field_type = &field.ty; + let doc = doc_expr(&field.attrs); + fields_docs.push( + quote! { datafusion_doc::metric_doc_sections::MetricFieldDoc { + name: stringify!(#field_ident), + doc: #doc, + type_name: stringify!(#field_type), + } }, + ); + } + + let position = match position { + MetricDocPosition::Common => { + quote!(datafusion_doc::metric_doc_sections::MetricDocPosition::Common) + } + MetricDocPosition::Operator => { + quote!(datafusion_doc::metric_doc_sections::MetricDocPosition::Operator) + } + }; + + let mod_ident = format_ident!("__datafusion_doc_metrics_{}", ident); + + Ok(quote! { + #[allow(non_snake_case)] + mod #mod_ident { + use super::*; + + static DOCUMENTATION: datafusion_doc::metric_doc_sections::MetricDoc = + datafusion_doc::metric_doc_sections::MetricDoc { + name: stringify!(#ident), + doc: #doc, + fields: &[#(#fields_docs),*], + position: #position, + }; + + impl datafusion_doc::metric_doc_sections::DocumentedMetrics for super::#ident { + const DOC: &'static datafusion_doc::metric_doc_sections::MetricDoc = + &DOCUMENTATION; + } + + datafusion_doc::metric_doc_sections::inventory::submit! { + datafusion_doc::metric_doc_sections::MetricDocEntry(&DOCUMENTATION) + } + } + } + .into()) +} + +fn documented_exec_impl(input: TokenStream) -> syn::Result<TokenStream> { + let input = syn::parse::<DeriveInput>(input)?; + let ident = input.ident; + let doc = doc_expr(&input.attrs); + let metrics = metrics_list(&input.attrs)?; + Review Comment: ```suggestion let Data::Struct(data) = input.data else { return Err(syn::Error::new( ident.span(), "DocumentedExec can only be derived for structs", )); }; ``` ########## datafusion/macros/src/metric_doc.rs: ########## @@ -0,0 +1,243 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Metrics documentation macros and helpers. + +use proc_macro::TokenStream; +use quote::{format_ident, quote}; +use syn::{ + Attribute, Data, DeriveInput, Expr, ExprLit, Lit, Path, Token, parse::Parser, + spanned::Spanned, +}; + +pub fn metric_doc(args: TokenStream, input: TokenStream) -> TokenStream { + let args_ts = proc_macro2::TokenStream::from(args.clone()); + let parsed: syn::punctuated::Punctuated<syn::Meta, Token![,]> = + syn::punctuated::Punctuated::parse_terminated + .parse2(args_ts.clone()) + .unwrap_or_default(); + + let is_metrics_struct = parsed.is_empty() || parsed.iter().all(is_position_arg); + + let derive_attr = if is_metrics_struct { + quote!(#[derive(datafusion_macros::DocumentedMetrics)]) + } else { + quote!(#[derive(datafusion_macros::DocumentedExec)]) + }; + + let meta_attr = quote!(#[metric_doc_attr(#args_ts)]); + + let input_ts = proc_macro2::TokenStream::from(input); + quote!(#derive_attr #meta_attr #input_ts).into() +} + +fn is_position_arg(meta: &syn::Meta) -> bool { + matches!(meta, syn::Meta::Path(p) if p.is_ident("common")) +} + +pub fn derive_documented_metrics(input: TokenStream) -> TokenStream { + match documented_metrics_impl(input) { + Ok(tokens) => tokens, + Err(err) => err.to_compile_error().into(), + } +} + +pub fn derive_documented_exec(input: TokenStream) -> TokenStream { + match documented_exec_impl(input) { + Ok(tokens) => tokens, + Err(err) => err.to_compile_error().into(), + } +} + +fn documented_metrics_impl(input: TokenStream) -> syn::Result<TokenStream> { + let input = syn::parse::<DeriveInput>(input)?; + let ident = input.ident; + let doc = doc_expr(&input.attrs); + let position = metrics_position(&input.attrs)?; + + let Data::Struct(data) = input.data else { + return Err(syn::Error::new( + ident.span(), + "DocumentedMetrics can only be derived for structs", + )); + }; + + let mut fields_docs = vec![]; + + for field in data.fields { + let Some(field_ident) = field.ident else { + return Err(syn::Error::new( + field.span(), + "DocumentedMetrics does not support tuple structs", + )); + }; + + let field_type = &field.ty; + let doc = doc_expr(&field.attrs); + fields_docs.push( + quote! { datafusion_doc::metric_doc_sections::MetricFieldDoc { + name: stringify!(#field_ident), + doc: #doc, + type_name: stringify!(#field_type), + } }, + ); + } + + let position = match position { + MetricDocPosition::Common => { + quote!(datafusion_doc::metric_doc_sections::MetricDocPosition::Common) + } + MetricDocPosition::Operator => { + quote!(datafusion_doc::metric_doc_sections::MetricDocPosition::Operator) + } + }; + + let mod_ident = format_ident!("__datafusion_doc_metrics_{}", ident); + + Ok(quote! { + #[allow(non_snake_case)] + mod #mod_ident { + use super::*; + + static DOCUMENTATION: datafusion_doc::metric_doc_sections::MetricDoc = + datafusion_doc::metric_doc_sections::MetricDoc { + name: stringify!(#ident), + doc: #doc, + fields: &[#(#fields_docs),*], + position: #position, + }; + + impl datafusion_doc::metric_doc_sections::DocumentedMetrics for super::#ident { + const DOC: &'static datafusion_doc::metric_doc_sections::MetricDoc = + &DOCUMENTATION; + } + + datafusion_doc::metric_doc_sections::inventory::submit! { + datafusion_doc::metric_doc_sections::MetricDocEntry(&DOCUMENTATION) + } + } + } + .into()) +} + +fn documented_exec_impl(input: TokenStream) -> syn::Result<TokenStream> { + let input = syn::parse::<DeriveInput>(input)?; + let ident = input.ident; + let doc = doc_expr(&input.attrs); + let metrics = metrics_list(&input.attrs)?; + + let metrics_refs = metrics.iter().map(|metric| { + quote! { <#metric as datafusion_doc::metric_doc_sections::DocumentedMetrics>::DOC } + }); + + let mod_ident = format_ident!("__datafusion_doc_exec_{}", ident); + + Ok(quote! { + #[allow(non_snake_case)] + mod #mod_ident { + use super::*; + + static DOCUMENTATION: datafusion_doc::metric_doc_sections::ExecDoc = + datafusion_doc::metric_doc_sections::ExecDoc { + name: stringify!(#ident), + doc: #doc, + metrics: &[#(#metrics_refs),*], + }; + + impl datafusion_doc::metric_doc_sections::DocumentedExec for super::#ident { + fn exec_doc( + ) -> &'static datafusion_doc::metric_doc_sections::ExecDoc { + &DOCUMENTATION + } + } + + datafusion_doc::metric_doc_sections::inventory::submit! { + datafusion_doc::metric_doc_sections::ExecDocEntry(&DOCUMENTATION) + } + } + } + .into()) +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum MetricDocPosition { + Common, + Operator, +} + +fn metrics_position(attrs: &[Attribute]) -> syn::Result<MetricDocPosition> { + let mut position = MetricDocPosition::Operator; + for attr in attrs { + if is_metrics_attr(attr) { + attr.parse_nested_meta(|meta| { + if meta.path.is_ident("common") { + position = MetricDocPosition::Common; + return Ok(()); + } + Err(meta.error("unsupported metric_doc attribute")) + })?; + } + } + Ok(position) +} + +fn metrics_list(attrs: &[Attribute]) -> syn::Result<Vec<Path>> { + let mut metrics = vec![]; + for attr in attrs { + if is_metrics_attr(attr) { + attr.parse_nested_meta(|meta| { + metrics.push(meta.path); + Ok(()) + })?; + } + } + + Ok(metrics) +} + +fn doc_expr(attrs: &[Attribute]) -> proc_macro2::TokenStream { + let mut doc_lines = Vec::new(); + + for attr in attrs { + if !attr.path().is_ident("doc") { + continue; + } + + if let syn::Meta::NameValue(meta) = &attr.meta + && let Expr::Lit(ExprLit { + lit: Lit::Str(lit_str), + .. + }) = &meta.value + { + doc_lines.push(lit_str.value()); + } + } + + if doc_lines.is_empty() { + quote!("") + } else { + let sanitized = doc_lines + .into_iter() + .map(|line| line.trim().to_string()) Review Comment: ```suggestion .map(|line| line.trim_end().to_string()) ``` Preserve any whitespace used intentionally for indentation. ########## datafusion/doc/Cargo.toml: ########## @@ -28,6 +28,9 @@ license = { workspace = true } authors = { workspace = true } rust-version = { workspace = true } +[dependencies] +inventory = "0.3.15" Review Comment: Any reason why ".15" is chosen here ? The latest one is ".21" and it is the chosen one in [Cargo.lock](https://github.com/apache/datafusion/pull/19367/changes#diff-13ee4b2252c9e516a0547f2891aa2105c3ca71c6d7a1e682c69be97998dfc87eR3824) Better use "0.3" ########## datafusion/macros/Cargo.toml: ########## @@ -39,11 +39,10 @@ workspace = true [lib] name = "datafusion_macros" -# lib.rs to be re-added in the future -path = "src/user_doc.rs" proc-macro = true [dependencies] datafusion-doc = { workspace = true } +proc-macro2 = "1.0.89" Review Comment: Why not a more recent version ? The latest available is 1.0.103 ########## datafusion/macros/src/metric_doc.rs: ########## @@ -0,0 +1,243 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Metrics documentation macros and helpers. + +use proc_macro::TokenStream; +use quote::{format_ident, quote}; +use syn::{ + Attribute, Data, DeriveInput, Expr, ExprLit, Lit, Path, Token, parse::Parser, + spanned::Spanned, +}; + +pub fn metric_doc(args: TokenStream, input: TokenStream) -> TokenStream { + let args_ts = proc_macro2::TokenStream::from(args.clone()); + let parsed: syn::punctuated::Punctuated<syn::Meta, Token![,]> = + syn::punctuated::Punctuated::parse_terminated + .parse2(args_ts.clone()) + .unwrap_or_default(); Review Comment: Why defaulting here ? Wouldn't it better to report it as a compilation error ? ```suggestion match syn::punctuated::Punctuated::parse_terminated.parse2(args_ts.clone()) { Ok(p) => p, Err(err) => return err.to_compile_error().into(), }; ``` ########## dev/update_metric_docs.sh: ########## @@ -0,0 +1,68 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -euo pipefail + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "${SOURCE_DIR}/../" && pwd + +TARGET_FILE="docs/source/user-guide/metrics.md" +PRINT_METRIC_DOCS_COMMAND="cargo run --manifest-path datafusion/core/Cargo.toml --bin print_metric_docs" + +echo "Inserting header" +cat <<'EOF' > "$TARGET_FILE" +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<!--- +This file was generated by the dev/update_metric_docs.sh script. +Do not edit it manually as changes will be overwritten. +Instead, add #[metric_doc] attributes to the metrics and execs you want to document. +--> + +# Metrics + +DataFusion operators expose runtime metrics so you can understand where time is spent and how much data flows through the pipeline. See more in [EXPLAIN ANALYZE](sql/explain.md#explain-analyze). + +EOF + +echo "Running CLI and inserting metric docs table" +$PRINT_METRIC_DOCS_COMMAND >> "$TARGET_FILE" + +echo "Running prettier" +npx [email protected] --write "$TARGET_FILE" Review Comment: Why 2.3.2 ? The latest one is 3.7.4 I see other shell scripts also use this specific version. IMO those should be updated at some point. And since this is a newly introduced doc generator I think doing it now is a good time. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
