Copilot commented on code in PR #53829:
URL: https://github.com/apache/spark/pull/53829#discussion_r2697272755
##########
sql/gen-sql-api-docs.py:
##########
@@ -281,35 +398,149 @@ def generate_sql_api_markdown(jvm, path):
"""
- with open(path, 'w') as mdfile:
+ def _write_function_entry(mdfile, info):
+ """Write a single function entry to the markdown file."""
+ name = info.name
+ anchor = _make_anchor(name)
+ usage = _make_pretty_usage(info.usage)
+ arguments = _make_pretty_arguments(info.arguments)
+ examples = _make_pretty_examples(info.examples)
+ note = _make_pretty_note(info.note)
+ since = info.since
+ deprecated = _make_pretty_deprecated(info.deprecated)
+
+ # Use explicit anchor for special characters
+ mdfile.write('<a name="%s"></a>\n\n' % anchor)
+ mdfile.write("### %s\n\n" % name)
+ if usage is not None:
+ mdfile.write("%s\n\n" % usage.strip())
+ if arguments is not None:
+ mdfile.write(arguments)
+ if examples is not None:
+ mdfile.write(examples)
+ if note is not None:
+ mdfile.write(note)
+ if since is not None and since != "":
+ mdfile.write("**Since:** %s\n\n" % since.strip())
+ if deprecated is not None:
+ mdfile.write(deprecated)
+ mdfile.write("<br/>\n\n")
+
+ # Group functions by category
+ grouped_infos = _list_grouped_function_infos(jvm)
+
+ # Track categories that have functions for the index
+ categories_with_functions = []
+
+ # Generate a separate markdown file for each category
+ for group_key, infos in grouped_infos:
+ display_name = _get_display_name(group_key)
+ file_name = _get_file_name(group_key)
+ categories_with_functions.append((group_key, display_name, file_name,
len(infos)))
+
+ category_path = os.path.join(docs_dir, "%s.md" % file_name)
+ with open(category_path, 'w') as mdfile:
+ mdfile.write("# %s\n\n" % display_name)
+ mdfile.write("This page lists all %s available in Spark SQL.\n\n"
% display_name.lower())
+ mdfile.write("---\n\n")
+ for info in infos:
+ _write_function_entry(mdfile, info)
+
+ # Generate the index file with links to all categories
+ index_path = os.path.join(docs_dir, "index.md")
+ with open(index_path, 'w') as mdfile:
mdfile.write("# Built-in Functions\n\n")
- for info in _list_function_infos(jvm):
- name = info.name
- usage = _make_pretty_usage(info.usage)
- arguments = _make_pretty_arguments(info.arguments)
- examples = _make_pretty_examples(info.examples)
- note = _make_pretty_note(info.note)
- since = info.since
- deprecated = _make_pretty_deprecated(info.deprecated)
-
- mdfile.write("### %s\n\n" % name)
- if usage is not None:
- mdfile.write("%s\n\n" % usage.strip())
- if arguments is not None:
- mdfile.write(arguments)
- if examples is not None:
- mdfile.write(examples)
- if note is not None:
- mdfile.write(note)
- if since is not None and since != "":
- mdfile.write("**Since:** %s\n\n" % since.strip())
- if deprecated is not None:
- mdfile.write(deprecated)
- mdfile.write("<br/>\n\n")
+ # Inline CSS for responsive grid layout
+ mdfile.write("<style>\n")
+ mdfile.write(".func-grid {\n")
+ mdfile.write(" display: grid;\n")
+ mdfile.write(" grid-template-columns: repeat(4, 1fr);\n")
+ mdfile.write(" gap: 0.5rem;\n")
+ mdfile.write(" margin-bottom: 1.5rem;\n")
+ mdfile.write("}\n")
+ mdfile.write(".func-grid a {\n")
+ mdfile.write(" padding: 0.25rem 0;\n")
+ mdfile.write(" white-space: nowrap;\n")
+ mdfile.write(" overflow: hidden;\n")
+ mdfile.write(" text-overflow: ellipsis;\n")
+ mdfile.write("}\n")
+ mdfile.write("@media (max-width: 1200px) {\n")
+ mdfile.write(" .func-grid { grid-template-columns: repeat(3, 1fr);
}\n")
+ mdfile.write("}\n")
+ mdfile.write("@media (max-width: 768px) {\n")
+ mdfile.write(" .func-grid { grid-template-columns: repeat(2, 1fr);
}\n")
+ mdfile.write("}\n")
+ mdfile.write("@media (max-width: 480px) {\n")
+ mdfile.write(" .func-grid { grid-template-columns: 1fr; }\n")
+ mdfile.write("}\n")
+ mdfile.write("</style>\n\n")
+ mdfile.write("Spark SQL provides a comprehensive set of built-in
functions for data ")
+ mdfile.write("manipulation and analysis. Functions are organized into
the following ")
+ mdfile.write("categories:\n\n")
+
+ # Sort categories by display name for consistent ordering
+ sorted_categories = sorted(categories_with_functions, key=lambda x:
x[1])
+
+ # Generate detailed TOC for each category with all function names
+ for group_key, display_name, file_name, count in sorted_categories:
+ mdfile.write("## %s (%d)\n\n" % (display_name, count))
+ # Get the functions for this category
+ category_infos = next(
+ (infos for gk, infos in grouped_infos if gk == group_key),
+ []
+ )
+ # Write function links in a responsive grid layout
+ mdfile.write('<div class="func-grid">\n')
+ for info in category_infos:
+ anchor = _make_anchor(info.name)
+ mdfile.write('<a href="%s.md#%s">%s</a>\n' % (file_name,
anchor, info.name))
Review Comment:
Function names containing HTML special characters (like '<', '>', '&') are
embedded directly in HTML anchor tags without escaping. For example, the
operators '<>', '||', '<=>' defined in _virtual_operator_infos could render
incorrectly or cause parsing issues. While MkDocs may handle this in pure
Markdown, these are raw HTML tags where escaping is not automatic. Consider
using html.escape(info.name) when embedding function names in HTML tags.
##########
sql/gen-sql-api-docs.py:
##########
@@ -281,35 +398,149 @@ def generate_sql_api_markdown(jvm, path):
"""
- with open(path, 'w') as mdfile:
+ def _write_function_entry(mdfile, info):
+ """Write a single function entry to the markdown file."""
+ name = info.name
+ anchor = _make_anchor(name)
+ usage = _make_pretty_usage(info.usage)
+ arguments = _make_pretty_arguments(info.arguments)
+ examples = _make_pretty_examples(info.examples)
+ note = _make_pretty_note(info.note)
+ since = info.since
+ deprecated = _make_pretty_deprecated(info.deprecated)
+
+ # Use explicit anchor for special characters
+ mdfile.write('<a name="%s"></a>\n\n' % anchor)
+ mdfile.write("### %s\n\n" % name)
Review Comment:
The code uses the deprecated HTML4 'name' attribute for anchors instead of
the HTML5 'id' attribute. While browsers still support this for backward
compatibility, modern HTML should use '<a id="..."></a>' or add the id directly
to the header tag like '### {#anchor-id}'. Additionally, since MkDocs generates
anchors from headers automatically, there may be duplicate anchors created (one
from the header '### name' and one from the explicit '<a name="..."></a>').
```suggestion
# Use explicit anchor for special characters by attaching an id to
the header
mdfile.write("### %s {#%s}\n\n" % (name, anchor))
```
##########
sql/gen-sql-api-docs.py:
##########
@@ -132,10 +231,25 @@ def _list_function_infos(jvm):
examples=jinfo.getExamples().replace("_FUNC_", name),
note=jinfo.getNote().replace("_FUNC_", name),
since=jinfo.getSince(),
- deprecated=jinfo.getDeprecated()))
+ deprecated=jinfo.getDeprecated(),
+ group=group))
return sorted(infos, key=lambda i: i.name)
+def _list_grouped_function_infos(jvm):
+ """
+ Returns a list of function information grouped by category.
+ Each item is a tuple of (group_key, list_of_infos).
+ """
+ infos = _list_function_infos(jvm)
+ # Group by category
+ grouped = itertools.groupby(
+ sorted(infos, key=lambda x: (x.group or "", x.name)),
+ key=lambda x: x.group
+ )
+ return [(k, sorted(list(g), key=lambda x: x.name)) for k, g in grouped]
Review Comment:
The functions are already sorted by (group, name) on line 247, which means
within each group they're already sorted by name. The additional sort by name
on line 250 is redundant and adds unnecessary computation. You can remove the
sort call and just use 'list(g)' instead of 'sorted(list(g), key=lambda x:
x.name)'.
```suggestion
return [(k, list(g)) for k, g in grouped]
```
##########
sql/gen-sql-api-docs.py:
##########
@@ -281,35 +398,149 @@ def generate_sql_api_markdown(jvm, path):
"""
- with open(path, 'w') as mdfile:
+ def _write_function_entry(mdfile, info):
+ """Write a single function entry to the markdown file."""
+ name = info.name
+ anchor = _make_anchor(name)
+ usage = _make_pretty_usage(info.usage)
+ arguments = _make_pretty_arguments(info.arguments)
+ examples = _make_pretty_examples(info.examples)
+ note = _make_pretty_note(info.note)
+ since = info.since
+ deprecated = _make_pretty_deprecated(info.deprecated)
+
+ # Use explicit anchor for special characters
+ mdfile.write('<a name="%s"></a>\n\n' % anchor)
+ mdfile.write("### %s\n\n" % name)
+ if usage is not None:
+ mdfile.write("%s\n\n" % usage.strip())
+ if arguments is not None:
+ mdfile.write(arguments)
+ if examples is not None:
+ mdfile.write(examples)
+ if note is not None:
+ mdfile.write(note)
+ if since is not None and since != "":
+ mdfile.write("**Since:** %s\n\n" % since.strip())
+ if deprecated is not None:
+ mdfile.write(deprecated)
+ mdfile.write("<br/>\n\n")
+
+ # Group functions by category
+ grouped_infos = _list_grouped_function_infos(jvm)
+
+ # Track categories that have functions for the index
+ categories_with_functions = []
+
+ # Generate a separate markdown file for each category
+ for group_key, infos in grouped_infos:
+ display_name = _get_display_name(group_key)
+ file_name = _get_file_name(group_key)
+ categories_with_functions.append((group_key, display_name, file_name,
len(infos)))
+
+ category_path = os.path.join(docs_dir, "%s.md" % file_name)
+ with open(category_path, 'w') as mdfile:
+ mdfile.write("# %s\n\n" % display_name)
+ mdfile.write("This page lists all %s available in Spark SQL.\n\n"
% display_name.lower())
+ mdfile.write("---\n\n")
+ for info in infos:
+ _write_function_entry(mdfile, info)
+
+ # Generate the index file with links to all categories
+ index_path = os.path.join(docs_dir, "index.md")
+ with open(index_path, 'w') as mdfile:
mdfile.write("# Built-in Functions\n\n")
- for info in _list_function_infos(jvm):
- name = info.name
- usage = _make_pretty_usage(info.usage)
- arguments = _make_pretty_arguments(info.arguments)
- examples = _make_pretty_examples(info.examples)
- note = _make_pretty_note(info.note)
- since = info.since
- deprecated = _make_pretty_deprecated(info.deprecated)
-
- mdfile.write("### %s\n\n" % name)
- if usage is not None:
- mdfile.write("%s\n\n" % usage.strip())
- if arguments is not None:
- mdfile.write(arguments)
- if examples is not None:
- mdfile.write(examples)
- if note is not None:
- mdfile.write(note)
- if since is not None and since != "":
- mdfile.write("**Since:** %s\n\n" % since.strip())
- if deprecated is not None:
- mdfile.write(deprecated)
- mdfile.write("<br/>\n\n")
+ # Inline CSS for responsive grid layout
+ mdfile.write("<style>\n")
+ mdfile.write(".func-grid {\n")
+ mdfile.write(" display: grid;\n")
+ mdfile.write(" grid-template-columns: repeat(4, 1fr);\n")
+ mdfile.write(" gap: 0.5rem;\n")
+ mdfile.write(" margin-bottom: 1.5rem;\n")
+ mdfile.write("}\n")
+ mdfile.write(".func-grid a {\n")
+ mdfile.write(" padding: 0.25rem 0;\n")
+ mdfile.write(" white-space: nowrap;\n")
+ mdfile.write(" overflow: hidden;\n")
+ mdfile.write(" text-overflow: ellipsis;\n")
+ mdfile.write("}\n")
+ mdfile.write("@media (max-width: 1200px) {\n")
+ mdfile.write(" .func-grid { grid-template-columns: repeat(3, 1fr);
}\n")
+ mdfile.write("}\n")
+ mdfile.write("@media (max-width: 768px) {\n")
+ mdfile.write(" .func-grid { grid-template-columns: repeat(2, 1fr);
}\n")
+ mdfile.write("}\n")
+ mdfile.write("@media (max-width: 480px) {\n")
+ mdfile.write(" .func-grid { grid-template-columns: 1fr; }\n")
+ mdfile.write("}\n")
+ mdfile.write("</style>\n\n")
Review Comment:
Embedding CSS directly in Markdown files can cause issues with Content
Security Policy (CSP) in some documentation hosting environments. Consider
moving the CSS to a separate stylesheet file or using MkDocs' built-in theming
capabilities to define custom styles, which would be more maintainable and
follow documentation best practices.
##########
sql/gen-sql-api-docs.py:
##########
@@ -132,10 +231,25 @@ def _list_function_infos(jvm):
examples=jinfo.getExamples().replace("_FUNC_", name),
note=jinfo.getNote().replace("_FUNC_", name),
since=jinfo.getSince(),
- deprecated=jinfo.getDeprecated()))
+ deprecated=jinfo.getDeprecated(),
+ group=group))
return sorted(infos, key=lambda i: i.name)
+def _list_grouped_function_infos(jvm):
+ """
+ Returns a list of function information grouped by category.
+ Each item is a tuple of (group_key, list_of_infos).
+ """
+ infos = _list_function_infos(jvm)
+ # Group by category
+ grouped = itertools.groupby(
+ sorted(infos, key=lambda x: (x.group or "", x.name)),
+ key=lambda x: x.group
Review Comment:
The groupby operation uses 'x.group' as the key while the sort uses 'x.group
or ""'. This inconsistency, while not causing a functional bug due to
itertools.groupby's behavior, makes the code less clear. For better readability
and maintainability, consider using the same expression for both: 'key=lambda
x: x.group or ""' in the groupby call.
```suggestion
key=lambda x: x.group or ""
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]