errose28 commented on code in PR #6916: URL: https://github.com/apache/ozone/pull/6916#discussion_r1721956187
########## .github/workflows/doc.yml: ########## Review Comment: Let's use a more descriptive name for the file, like `build-config-doc.yml` or something like that. ########## .github/workflows/doc.yml: ########## @@ -0,0 +1,149 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: xml-to-md Review Comment: Let's make this name closer to the file name we decide to go with. See existing workflows as examples. ########## dev-support/ci/xml_to_md.py: ########## @@ -0,0 +1,90 @@ +import os +import re +import zipfile +import xml.etree.ElementTree as ET +from collections import namedtuple +from pathlib import Path + +Property = namedtuple('Property', ['name', 'value', 'tag', 'description']) + +def extract_xml_from_jar(jar_path, xml_filename): + xml_files = [] + with zipfile.ZipFile(jar_path, 'r') as jar: + for file_info in jar.infolist(): + if file_info.filename.endswith(xml_filename): + with jar.open(file_info.filename) as xml_file: + xml_files.append(xml_file.read()) + return xml_files + +def parse_xml_file(xml_content): + root = ET.fromstring(xml_content) + properties = {} + for prop in root.findall('property'): + name = prop.find('name').text if prop.find('name') is not None else '' + value = prop.find('value').text if prop.find('value') is not None else '' + tag = prop.find('tag').text if prop.find('tag') is not None else '' + description = prop.find('description').text if prop.find('description') is not None else '' + description = ' '.join(description.split()).strip() if description else '' + properties[name] = Property(name, value, tag, description) + return properties + +def generate_markdown(properties): + markdown = [] + markdown.append("---\n") + markdown.append("title: \"Ozone configurations\"\n") + markdown.append("summary: Ozone configurations\n") + markdown.append("---\n") + markdown.append("<!--\n") + markdown.append("Licensed to the Apache Software Foundation (ASF) under one or more\n") + markdown.append("contributor license agreements. See the NOTICE file distributed with\n") + markdown.append("this work for additional information regarding copyright ownership.\n") + markdown.append("The ASF licenses this file to You under the Apache License, Version 2.0\n") + markdown.append("(the \"License\"); you may not use this file except in compliance with\n") + markdown.append("the License. You may obtain a copy of the License at\n\n") + markdown.append(" http://www.apache.org/licenses/LICENSE-2.0\n\n") + markdown.append("Unless required by applicable law or agreed to in writing, software\n") + markdown.append("distributed under the License is distributed on an \"AS IS\" BASIS,\n") + markdown.append("WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n") + markdown.append("See the License for the specific language governing permissions and\n") + markdown.append("limitations under the License.\n") + markdown.append("-->\n\n") + + for prop in sorted(properties.values(), key=lambda p: p.name): + markdown.append(f"| **Name** | `{prop.name}` |\n") + markdown.append("|:----------------|:----------------------------|\n") + markdown.append(f"| **Value** | {prop.value} |\n") + markdown.append(f"| **Tag** | {prop.tag} |\n") + markdown.append(f"| **Description** | {prop.description} |\n") + markdown.append("--------------------------------------------------------------------------------\n") + + return ''.join(markdown) + +def main(): + base_path = 'ozone-bin/extracted' + + # Find ozone SNAPSHOT directory dynamically using regex + snapshot_dir = next( + (os.path.join(base_path, d) for d in os.listdir(base_path) if re.match(r'ozone-.*-SNAPSHOT', d)), Review Comment: Instead of hardcoding `base_path` and trying to find the build within there, lets have the workflow pass the path to the build as a parameter. This is more flexible since the script could then be run manually/locally. ########## .github/workflows/doc.yml: ########## @@ -0,0 +1,149 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: xml-to-md + +on: + push: + branches: + - master + +jobs: + build: + runs-on: ubuntu-20.04 + timeout-minutes: 60 + strategy: + matrix: + java: [ 8 ] + fail-fast: false + steps: + - name: Checkout project + uses: actions/checkout@v4 + + - name: Cache for npm dependencies + uses: actions/cache@v4 + with: + path: | + ~/.pnpm-store + **/node_modules + key: ${{ runner.os }}-pnpm-${{ hashFiles('**/pnpm-lock.yaml') }} + restore-keys: | + ${{ runner.os }}-pnpm- + + - name: Cache for maven dependencies + uses: actions/cache/restore@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/ozone + key: maven-repo-${{ hashFiles('**/pom.xml') }} + restore-keys: | + maven-repo- + - name: Download Ratis repo + if: ${{ inputs.ratis_args != '' }} + uses: actions/download-artifact@v4 + with: + name: ratis-jars + path: | + ~/.m2/repository/org/apache/ratis + - name: Setup java + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: ${{ matrix.java }} + - name: Run a full build + run: hadoop-ozone/dev-support/checks/build.sh -Pdist -Psrc -Dmaven.javadoc.skip=true ${{ inputs.ratis_args }} + env: + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + - name: Store binaries for tests + uses: actions/upload-artifact@v4 + with: + name: ozone-bin + path: | + hadoop-ozone/dist/target/ozone-*.tar.gz + !hadoop-ozone/dist/target/ozone-*-src.tar.gz + retention-days: 1 + - name: Store source tarball for compilation + uses: actions/upload-artifact@v4 + with: + name: ozone-src + path: hadoop-ozone/dist/target/ozone-*-src.tar.gz + retention-days: 1 + - name: Store Maven repo for tests + uses: actions/upload-artifact@v4 + with: + name: ozone-repo + path: | + ~/.m2/repository/org/apache/ozone + retention-days: 1 + + xml-to-md: + needs: + - build + runs-on: ubuntu-20.04 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: sudo apt-get update && sudo apt-get install -y python3-pip + + - name: Download the source artifact + uses: actions/download-artifact@v4 + with: + name: ozone-bin + path: ozone-bin + + - name: Extract the source tarball + run: | + mkdir -p ozone-bin/extracted + tar -xzf ozone-bin/ozone-*-SNAPSHOT.tar.gz -C ozone-bin/extracted + + - name: Run the Python script to convert XML properties into Markdown + run: python3 dev-support/ci/xml_to_md.py + + - name: Commit and push to apache/ozone Review Comment: For the current site this should also be a PR instead of a direct commit so we can check for correctness or breakage before publishing. ########## dev-support/ci/xml_to_md.py: ########## @@ -0,0 +1,90 @@ +import os +import re +import zipfile +import xml.etree.ElementTree as ET +from collections import namedtuple +from pathlib import Path + +Property = namedtuple('Property', ['name', 'value', 'tag', 'description']) + +def extract_xml_from_jar(jar_path, xml_filename): + xml_files = [] + with zipfile.ZipFile(jar_path, 'r') as jar: + for file_info in jar.infolist(): + if file_info.filename.endswith(xml_filename): + with jar.open(file_info.filename) as xml_file: + xml_files.append(xml_file.read()) + return xml_files + +def parse_xml_file(xml_content): + root = ET.fromstring(xml_content) + properties = {} + for prop in root.findall('property'): + name = prop.find('name').text if prop.find('name') is not None else '' + value = prop.find('value').text if prop.find('value') is not None else '' + tag = prop.find('tag').text if prop.find('tag') is not None else '' + description = prop.find('description').text if prop.find('description') is not None else '' + description = ' '.join(description.split()).strip() if description else '' + properties[name] = Property(name, value, tag, description) + return properties + +def generate_markdown(properties): + markdown = [] + markdown.append("---\n") + markdown.append("title: \"Ozone configurations\"\n") + markdown.append("summary: Ozone configurations\n") + markdown.append("---\n") + markdown.append("<!--\n") + markdown.append("Licensed to the Apache Software Foundation (ASF) under one or more\n") + markdown.append("contributor license agreements. See the NOTICE file distributed with\n") + markdown.append("this work for additional information regarding copyright ownership.\n") + markdown.append("The ASF licenses this file to You under the Apache License, Version 2.0\n") + markdown.append("(the \"License\"); you may not use this file except in compliance with\n") + markdown.append("the License. You may obtain a copy of the License at\n\n") + markdown.append(" http://www.apache.org/licenses/LICENSE-2.0\n\n") + markdown.append("Unless required by applicable law or agreed to in writing, software\n") + markdown.append("distributed under the License is distributed on an \"AS IS\" BASIS,\n") + markdown.append("WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n") + markdown.append("See the License for the specific language governing permissions and\n") + markdown.append("limitations under the License.\n") + markdown.append("-->\n\n") + Review Comment: We need some descriptive text to explain what is on this page rather than just having a table. Format could be something like: ```md # <Page Title> <description of this page> ## Configuration Keys (or similar heading) <table of config keys> ``` ########## .github/workflows/doc.yml: ########## @@ -0,0 +1,149 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: xml-to-md + +on: + push: + branches: + - master + +jobs: + build: + runs-on: ubuntu-20.04 + timeout-minutes: 60 + strategy: + matrix: + java: [ 8 ] + fail-fast: false + steps: + - name: Checkout project + uses: actions/checkout@v4 + + - name: Cache for npm dependencies + uses: actions/cache@v4 + with: + path: | + ~/.pnpm-store + **/node_modules + key: ${{ runner.os }}-pnpm-${{ hashFiles('**/pnpm-lock.yaml') }} + restore-keys: | + ${{ runner.os }}-pnpm- + + - name: Cache for maven dependencies + uses: actions/cache/restore@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/ozone + key: maven-repo-${{ hashFiles('**/pom.xml') }} + restore-keys: | + maven-repo- + - name: Download Ratis repo + if: ${{ inputs.ratis_args != '' }} + uses: actions/download-artifact@v4 + with: + name: ratis-jars + path: | + ~/.m2/repository/org/apache/ratis + - name: Setup java + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: ${{ matrix.java }} + - name: Run a full build + run: hadoop-ozone/dev-support/checks/build.sh -Pdist -Psrc -Dmaven.javadoc.skip=true ${{ inputs.ratis_args }} + env: + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + - name: Store binaries for tests + uses: actions/upload-artifact@v4 + with: + name: ozone-bin + path: | + hadoop-ozone/dist/target/ozone-*.tar.gz + !hadoop-ozone/dist/target/ozone-*-src.tar.gz + retention-days: 1 + - name: Store source tarball for compilation + uses: actions/upload-artifact@v4 + with: + name: ozone-src + path: hadoop-ozone/dist/target/ozone-*-src.tar.gz + retention-days: 1 + - name: Store Maven repo for tests + uses: actions/upload-artifact@v4 + with: + name: ozone-repo + path: | + ~/.m2/repository/org/apache/ozone + retention-days: 1 + + xml-to-md: + needs: + - build + runs-on: ubuntu-20.04 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: sudo apt-get update && sudo apt-get install -y python3-pip + + - name: Download the source artifact + uses: actions/download-artifact@v4 + with: + name: ozone-bin + path: ozone-bin + + - name: Extract the source tarball + run: | + mkdir -p ozone-bin/extracted + tar -xzf ozone-bin/ozone-*-SNAPSHOT.tar.gz -C ozone-bin/extracted + + - name: Run the Python script to convert XML properties into Markdown + run: python3 dev-support/ci/xml_to_md.py + + - name: Commit and push to apache/ozone + run: | + git config --global user.name 'Github Actions' + git config --global user.email '[email protected]' + git add hadoop-hdds/docs/content/concept/Configurations.md + git commit -m "[Auto] Update Configurations.md" + git push + + - name: Push changes to ozone-site + uses: actions/checkout@v4 + with: + repository: apache/ozone-site + ref: 'HDDS-9225-website-v2' + token: ${{ secrets.GITHUB_TOKEN }} + path: ozone-site + + - name: Copy MD file + run: | + cp hadoop-hdds/docs/content/tools/Configurations.md ozone-site/docs/03-core-concepts/06-configurations.md + + - name: Commit changes Review Comment: We should raise PRs to both the current and new site. This lets us review the changes to potentially catch errors with the generation, and makes it clear when configs have changed. I don't think we make too many config changes such that the extra PRs will be overwhelming. ########## .github/workflows/doc.yml: ########## @@ -0,0 +1,149 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: xml-to-md + +on: + push: + branches: + - master + +jobs: + build: + runs-on: ubuntu-20.04 + timeout-minutes: 60 + strategy: + matrix: + java: [ 8 ] + fail-fast: false + steps: + - name: Checkout project + uses: actions/checkout@v4 + + - name: Cache for npm dependencies + uses: actions/cache@v4 + with: + path: | + ~/.pnpm-store + **/node_modules + key: ${{ runner.os }}-pnpm-${{ hashFiles('**/pnpm-lock.yaml') }} + restore-keys: | + ${{ runner.os }}-pnpm- + + - name: Cache for maven dependencies + uses: actions/cache/restore@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/ozone + key: maven-repo-${{ hashFiles('**/pom.xml') }} + restore-keys: | + maven-repo- + - name: Download Ratis repo + if: ${{ inputs.ratis_args != '' }} + uses: actions/download-artifact@v4 + with: + name: ratis-jars + path: | + ~/.m2/repository/org/apache/ratis + - name: Setup java + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: ${{ matrix.java }} + - name: Run a full build Review Comment: We shouldn't build Ozone from scratch in this workflow. We should append this to the existing CI that runs on each commit to use the build artifact it has already generated. ########## dev-support/ci/xml_to_md.py: ########## @@ -0,0 +1,90 @@ +import os +import re +import zipfile +import xml.etree.ElementTree as ET +from collections import namedtuple +from pathlib import Path + +Property = namedtuple('Property', ['name', 'value', 'tag', 'description']) + +def extract_xml_from_jar(jar_path, xml_filename): + xml_files = [] + with zipfile.ZipFile(jar_path, 'r') as jar: + for file_info in jar.infolist(): + if file_info.filename.endswith(xml_filename): + with jar.open(file_info.filename) as xml_file: + xml_files.append(xml_file.read()) + return xml_files + +def parse_xml_file(xml_content): + root = ET.fromstring(xml_content) + properties = {} + for prop in root.findall('property'): + name = prop.find('name').text if prop.find('name') is not None else '' + value = prop.find('value').text if prop.find('value') is not None else '' + tag = prop.find('tag').text if prop.find('tag') is not None else '' + description = prop.find('description').text if prop.find('description') is not None else '' + description = ' '.join(description.split()).strip() if description else '' + properties[name] = Property(name, value, tag, description) + return properties + +def generate_markdown(properties): + markdown = [] Review Comment: My python is a bit rusty, but can we write this as one string with newlines in it? I think the triple quote `"""` syntax preserves newlines as they literally appear in the string. ########## dev-support/ci/xml_to_md.py: ########## @@ -0,0 +1,90 @@ +import os +import re +import zipfile +import xml.etree.ElementTree as ET +from collections import namedtuple +from pathlib import Path + +Property = namedtuple('Property', ['name', 'value', 'tag', 'description']) + +def extract_xml_from_jar(jar_path, xml_filename): + xml_files = [] + with zipfile.ZipFile(jar_path, 'r') as jar: + for file_info in jar.infolist(): + if file_info.filename.endswith(xml_filename): + with jar.open(file_info.filename) as xml_file: + xml_files.append(xml_file.read()) + return xml_files + +def parse_xml_file(xml_content): + root = ET.fromstring(xml_content) + properties = {} + for prop in root.findall('property'): + name = prop.find('name').text if prop.find('name') is not None else '' + value = prop.find('value').text if prop.find('value') is not None else '' + tag = prop.find('tag').text if prop.find('tag') is not None else '' + description = prop.find('description').text if prop.find('description') is not None else '' Review Comment: I'm not very familiar with the XML library being used here, but what is the type of the `prop` variable? If it is a python dictionary, `get` supports a default value the can remove the if/else syntax here. If it is a type specific to the XML library, I would imagine there is similar functionality available. ########## .github/workflows/doc.yml: ########## @@ -0,0 +1,149 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: xml-to-md + +on: + push: + branches: + - master Review Comment: IMO we should run this on every commit, even for PRs, because if there's an error generating the doc page it could indicate a problem with the change itself. Only the last step that raises a PR to contribute the changes back needs to be restricted to master branch commits. ########## dev-support/ci/xml_to_md.py: ########## @@ -0,0 +1,90 @@ +import os +import re +import zipfile +import xml.etree.ElementTree as ET +from collections import namedtuple +from pathlib import Path + +Property = namedtuple('Property', ['name', 'value', 'tag', 'description']) + +def extract_xml_from_jar(jar_path, xml_filename): + xml_files = [] + with zipfile.ZipFile(jar_path, 'r') as jar: + for file_info in jar.infolist(): + if file_info.filename.endswith(xml_filename): + with jar.open(file_info.filename) as xml_file: + xml_files.append(xml_file.read()) + return xml_files + +def parse_xml_file(xml_content): + root = ET.fromstring(xml_content) + properties = {} + for prop in root.findall('property'): + name = prop.find('name').text if prop.find('name') is not None else '' + value = prop.find('value').text if prop.find('value') is not None else '' + tag = prop.find('tag').text if prop.find('tag') is not None else '' + description = prop.find('description').text if prop.find('description') is not None else '' + description = ' '.join(description.split()).strip() if description else '' + properties[name] = Property(name, value, tag, description) + return properties + +def generate_markdown(properties): + markdown = [] + markdown.append("---\n") + markdown.append("title: \"Ozone configurations\"\n") + markdown.append("summary: Ozone configurations\n") + markdown.append("---\n") + markdown.append("<!--\n") + markdown.append("Licensed to the Apache Software Foundation (ASF) under one or more\n") + markdown.append("contributor license agreements. See the NOTICE file distributed with\n") + markdown.append("this work for additional information regarding copyright ownership.\n") + markdown.append("The ASF licenses this file to You under the Apache License, Version 2.0\n") + markdown.append("(the \"License\"); you may not use this file except in compliance with\n") + markdown.append("the License. You may obtain a copy of the License at\n\n") + markdown.append(" http://www.apache.org/licenses/LICENSE-2.0\n\n") + markdown.append("Unless required by applicable law or agreed to in writing, software\n") + markdown.append("distributed under the License is distributed on an \"AS IS\" BASIS,\n") + markdown.append("WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n") + markdown.append("See the License for the specific language governing permissions and\n") + markdown.append("limitations under the License.\n") + markdown.append("-->\n\n") + + for prop in sorted(properties.values(), key=lambda p: p.name): + markdown.append(f"| **Name** | `{prop.name}` |\n") + markdown.append("|:----------------|:----------------------------|\n") + markdown.append(f"| **Value** | {prop.value} |\n") + markdown.append(f"| **Tag** | {prop.tag} |\n") + markdown.append(f"| **Description** | {prop.description} |\n") + markdown.append("--------------------------------------------------------------------------------\n") + + return ''.join(markdown) + +def main(): + base_path = 'ozone-bin/extracted' + + # Find ozone SNAPSHOT directory dynamically using regex + snapshot_dir = next( + (os.path.join(base_path, d) for d in os.listdir(base_path) if re.match(r'ozone-.*-SNAPSHOT', d)), + None + ) + + extract_path = os.path.join(snapshot_dir, 'share', 'ozone', 'lib') + xml_filename = 'ozone-default-generated.xml' + + property_map = {} + for file_name in os.listdir(extract_path): + if file_name.endswith('.jar'): + jar_path = os.path.join(extract_path, file_name) + xml_contents = extract_xml_from_jar(jar_path, xml_filename) + for xml_content in xml_contents: + property_map.update(parse_xml_file(xml_content)) + + markdown_content = generate_markdown(property_map) + output_path = Path("hadoop-hdds/docs/content/tools/Configurations.md") Review Comment: The output path should also be passed in to the script, so that the job can commit the file wherever it is needed. The path will be different for the new website, for example. Might even be better if the script just writes to stdout and the caller can redirect it as needed. ########## .github/workflows/doc.yml: ########## @@ -0,0 +1,149 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: xml-to-md + +on: + push: + branches: + - master + +jobs: + build: + runs-on: ubuntu-20.04 + timeout-minutes: 60 + strategy: + matrix: + java: [ 8 ] + fail-fast: false + steps: + - name: Checkout project + uses: actions/checkout@v4 + + - name: Cache for npm dependencies + uses: actions/cache@v4 + with: + path: | + ~/.pnpm-store + **/node_modules + key: ${{ runner.os }}-pnpm-${{ hashFiles('**/pnpm-lock.yaml') }} + restore-keys: | + ${{ runner.os }}-pnpm- + + - name: Cache for maven dependencies + uses: actions/cache/restore@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/ozone + key: maven-repo-${{ hashFiles('**/pom.xml') }} + restore-keys: | + maven-repo- + - name: Download Ratis repo + if: ${{ inputs.ratis_args != '' }} + uses: actions/download-artifact@v4 + with: + name: ratis-jars + path: | + ~/.m2/repository/org/apache/ratis + - name: Setup java + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: ${{ matrix.java }} + - name: Run a full build + run: hadoop-ozone/dev-support/checks/build.sh -Pdist -Psrc -Dmaven.javadoc.skip=true ${{ inputs.ratis_args }} + env: + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + - name: Store binaries for tests + uses: actions/upload-artifact@v4 + with: + name: ozone-bin + path: | + hadoop-ozone/dist/target/ozone-*.tar.gz + !hadoop-ozone/dist/target/ozone-*-src.tar.gz + retention-days: 1 + - name: Store source tarball for compilation + uses: actions/upload-artifact@v4 + with: + name: ozone-src + path: hadoop-ozone/dist/target/ozone-*-src.tar.gz + retention-days: 1 + - name: Store Maven repo for tests + uses: actions/upload-artifact@v4 + with: + name: ozone-repo + path: | + ~/.m2/repository/org/apache/ozone + retention-days: 1 + + xml-to-md: + needs: + - build + runs-on: ubuntu-20.04 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: sudo apt-get update && sudo apt-get install -y python3-pip + + - name: Download the source artifact + uses: actions/download-artifact@v4 + with: + name: ozone-bin + path: ozone-bin + + - name: Extract the source tarball + run: | + mkdir -p ozone-bin/extracted + tar -xzf ozone-bin/ozone-*-SNAPSHOT.tar.gz -C ozone-bin/extracted + + - name: Run the Python script to convert XML properties into Markdown + run: python3 dev-support/ci/xml_to_md.py + + - name: Commit and push to apache/ozone + run: | + git config --global user.name 'Github Actions' + git config --global user.email '[email protected]' + git add hadoop-hdds/docs/content/concept/Configurations.md + git commit -m "[Auto] Update Configurations.md" + git push + + - name: Push changes to ozone-site + uses: actions/checkout@v4 + with: + repository: apache/ozone-site + ref: 'HDDS-9225-website-v2' + token: ${{ secrets.GITHUB_TOKEN }} + path: ozone-site + + - name: Copy MD file + run: | + cp hadoop-hdds/docs/content/tools/Configurations.md ozone-site/docs/03-core-concepts/06-configurations.md + + - name: Commit changes + run: | + cd ozone-site + git config --global user.name 'Github Actions' + git config --global user.email '[email protected]' + git add ozone-site/docs/03-core-concepts/06-configurations.md Review Comment: The actual path in the new website should be `ozone-site/docs/05-administrator-guide/02-configuration/<file-name>.md`. Core concepts is just the basics users and admins both need to understand. A full list of config keys, their meaning, and values is advanced and only admins really need that. I clarified this in [this PR](https://github.com/apache/ozone-site/pull/92) which has unfortunately been open for a while. When raising a PR to the new website, the number prefix cannot be hardcoded in case sections are changed later. The job probably needs to search the current directory structure to find the name of the file to write to, assuming it will be like `docs/*-administrator-guide/*-configuration/<file-name>.md`. If the structure changes later, the PR review gives us an opportunity to catch that instead of immediately publishing a broken page. ########## dev-support/ci/xml_to_md.py: ########## @@ -0,0 +1,90 @@ +import os +import re +import zipfile +import xml.etree.ElementTree as ET +from collections import namedtuple +from pathlib import Path + +Property = namedtuple('Property', ['name', 'value', 'tag', 'description']) + +def extract_xml_from_jar(jar_path, xml_filename): + xml_files = [] + with zipfile.ZipFile(jar_path, 'r') as jar: + for file_info in jar.infolist(): + if file_info.filename.endswith(xml_filename): + with jar.open(file_info.filename) as xml_file: + xml_files.append(xml_file.read()) + return xml_files + +def parse_xml_file(xml_content): + root = ET.fromstring(xml_content) + properties = {} + for prop in root.findall('property'): + name = prop.find('name').text if prop.find('name') is not None else '' + value = prop.find('value').text if prop.find('value') is not None else '' + tag = prop.find('tag').text if prop.find('tag') is not None else '' + description = prop.find('description').text if prop.find('description') is not None else '' + description = ' '.join(description.split()).strip() if description else '' + properties[name] = Property(name, value, tag, description) + return properties + +def generate_markdown(properties): + markdown = [] + markdown.append("---\n") + markdown.append("title: \"Ozone configurations\"\n") + markdown.append("summary: Ozone configurations\n") Review Comment: The current website doesn't require front matter. The new website will fail the build if disallowed front matter keys are used like `title` and `summary`. I would change this to use `sidebar_label` only if the label in the docs sidebar should be different than the page title given by the first heading. `sidebar_laebl` is only used by the new website. The current website will ignore this key. ########## dev-support/ci/xml_to_md.py: ########## @@ -0,0 +1,90 @@ +import os +import re +import zipfile +import xml.etree.ElementTree as ET +from collections import namedtuple +from pathlib import Path + +Property = namedtuple('Property', ['name', 'value', 'tag', 'description']) + +def extract_xml_from_jar(jar_path, xml_filename): + xml_files = [] + with zipfile.ZipFile(jar_path, 'r') as jar: + for file_info in jar.infolist(): + if file_info.filename.endswith(xml_filename): + with jar.open(file_info.filename) as xml_file: + xml_files.append(xml_file.read()) + return xml_files + +def parse_xml_file(xml_content): + root = ET.fromstring(xml_content) + properties = {} + for prop in root.findall('property'): + name = prop.find('name').text if prop.find('name') is not None else '' + value = prop.find('value').text if prop.find('value') is not None else '' + tag = prop.find('tag').text if prop.find('tag') is not None else '' + description = prop.find('description').text if prop.find('description') is not None else '' + description = ' '.join(description.split()).strip() if description else '' + properties[name] = Property(name, value, tag, description) Review Comment: Are all of these values really optional? Name is definitely required, and we can use this as a place to fail CI if someone adds a config key without a description. Tag and default value will be optional though. ########## .github/workflows/doc.yml: ########## @@ -0,0 +1,149 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: xml-to-md + +on: + push: + branches: + - master + +jobs: + build: + runs-on: ubuntu-20.04 + timeout-minutes: 60 + strategy: + matrix: + java: [ 8 ] + fail-fast: false + steps: + - name: Checkout project + uses: actions/checkout@v4 + + - name: Cache for npm dependencies + uses: actions/cache@v4 + with: + path: | + ~/.pnpm-store + **/node_modules + key: ${{ runner.os }}-pnpm-${{ hashFiles('**/pnpm-lock.yaml') }} + restore-keys: | + ${{ runner.os }}-pnpm- + + - name: Cache for maven dependencies + uses: actions/cache/restore@v4 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/ozone + key: maven-repo-${{ hashFiles('**/pom.xml') }} + restore-keys: | + maven-repo- + - name: Download Ratis repo + if: ${{ inputs.ratis_args != '' }} + uses: actions/download-artifact@v4 + with: + name: ratis-jars + path: | + ~/.m2/repository/org/apache/ratis + - name: Setup java + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: ${{ matrix.java }} + - name: Run a full build + run: hadoop-ozone/dev-support/checks/build.sh -Pdist -Psrc -Dmaven.javadoc.skip=true ${{ inputs.ratis_args }} + env: + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + - name: Store binaries for tests + uses: actions/upload-artifact@v4 + with: + name: ozone-bin + path: | + hadoop-ozone/dist/target/ozone-*.tar.gz + !hadoop-ozone/dist/target/ozone-*-src.tar.gz + retention-days: 1 + - name: Store source tarball for compilation + uses: actions/upload-artifact@v4 + with: + name: ozone-src + path: hadoop-ozone/dist/target/ozone-*-src.tar.gz + retention-days: 1 + - name: Store Maven repo for tests + uses: actions/upload-artifact@v4 + with: + name: ozone-repo + path: | + ~/.m2/repository/org/apache/ozone + retention-days: 1 + + xml-to-md: + needs: + - build + runs-on: ubuntu-20.04 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: sudo apt-get update && sudo apt-get install -y python3-pip Review Comment: I don't see pip being used to install anything later in the workflow. Can we remove this? ########## dev-support/ci/xml_to_md.py: ########## @@ -0,0 +1,90 @@ +import os +import re +import zipfile +import xml.etree.ElementTree as ET +from collections import namedtuple +from pathlib import Path + +Property = namedtuple('Property', ['name', 'value', 'tag', 'description']) + +def extract_xml_from_jar(jar_path, xml_filename): + xml_files = [] + with zipfile.ZipFile(jar_path, 'r') as jar: + for file_info in jar.infolist(): + if file_info.filename.endswith(xml_filename): + with jar.open(file_info.filename) as xml_file: + xml_files.append(xml_file.read()) + return xml_files + +def parse_xml_file(xml_content): + root = ET.fromstring(xml_content) + properties = {} + for prop in root.findall('property'): + name = prop.find('name').text if prop.find('name') is not None else '' + value = prop.find('value').text if prop.find('value') is not None else '' + tag = prop.find('tag').text if prop.find('tag') is not None else '' + description = prop.find('description').text if prop.find('description') is not None else '' + description = ' '.join(description.split()).strip() if description else '' + properties[name] = Property(name, value, tag, description) + return properties + +def generate_markdown(properties): + markdown = [] Review Comment: Also apache license headers are not required on doc pages per ASF guidelines so we can actually remove this. ########## dev-support/ci/xml_to_md.py: ########## @@ -0,0 +1,90 @@ +import os +import re +import zipfile +import xml.etree.ElementTree as ET +from collections import namedtuple +from pathlib import Path + +Property = namedtuple('Property', ['name', 'value', 'tag', 'description']) + +def extract_xml_from_jar(jar_path, xml_filename): + xml_files = [] + with zipfile.ZipFile(jar_path, 'r') as jar: + for file_info in jar.infolist(): + if file_info.filename.endswith(xml_filename): + with jar.open(file_info.filename) as xml_file: + xml_files.append(xml_file.read()) + return xml_files + +def parse_xml_file(xml_content): + root = ET.fromstring(xml_content) + properties = {} + for prop in root.findall('property'): + name = prop.find('name').text if prop.find('name') is not None else '' + value = prop.find('value').text if prop.find('value') is not None else '' + tag = prop.find('tag').text if prop.find('tag') is not None else '' + description = prop.find('description').text if prop.find('description') is not None else '' + description = ' '.join(description.split()).strip() if description else '' + properties[name] = Property(name, value, tag, description) + return properties + +def generate_markdown(properties): + markdown = [] + markdown.append("---\n") + markdown.append("title: \"Ozone configurations\"\n") + markdown.append("summary: Ozone configurations\n") + markdown.append("---\n") + markdown.append("<!--\n") + markdown.append("Licensed to the Apache Software Foundation (ASF) under one or more\n") + markdown.append("contributor license agreements. See the NOTICE file distributed with\n") + markdown.append("this work for additional information regarding copyright ownership.\n") + markdown.append("The ASF licenses this file to You under the Apache License, Version 2.0\n") + markdown.append("(the \"License\"); you may not use this file except in compliance with\n") + markdown.append("the License. You may obtain a copy of the License at\n\n") + markdown.append(" http://www.apache.org/licenses/LICENSE-2.0\n\n") + markdown.append("Unless required by applicable law or agreed to in writing, software\n") + markdown.append("distributed under the License is distributed on an \"AS IS\" BASIS,\n") + markdown.append("WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n") + markdown.append("See the License for the specific language governing permissions and\n") + markdown.append("limitations under the License.\n") + markdown.append("-->\n\n") + + for prop in sorted(properties.values(), key=lambda p: p.name): + markdown.append(f"| **Name** | `{prop.name}` |\n") + markdown.append("|:----------------|:----------------------------|\n") + markdown.append(f"| **Value** | {prop.value} |\n") + markdown.append(f"| **Tag** | {prop.tag} |\n") + markdown.append(f"| **Description** | {prop.description} |\n") + markdown.append("--------------------------------------------------------------------------------\n") + + return ''.join(markdown) + +def main(): + base_path = 'ozone-bin/extracted' + + # Find ozone SNAPSHOT directory dynamically using regex + snapshot_dir = next( + (os.path.join(base_path, d) for d in os.listdir(base_path) if re.match(r'ozone-.*-SNAPSHOT', d)), + None + ) + + extract_path = os.path.join(snapshot_dir, 'share', 'ozone', 'lib') + xml_filename = 'ozone-default-generated.xml' + + property_map = {} + for file_name in os.listdir(extract_path): + if file_name.endswith('.jar'): + jar_path = os.path.join(extract_path, file_name) + xml_contents = extract_xml_from_jar(jar_path, xml_filename) + for xml_content in xml_contents: + property_map.update(parse_xml_file(xml_content)) + + markdown_content = generate_markdown(property_map) + output_path = Path("hadoop-hdds/docs/content/tools/Configurations.md") Review Comment: Lets use path.join or similar to avoid hardcoding directory slashes. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
