Re: [PR] Move projects metadata for SBOM export to Google Spreadsheet [airflow]

via GitHub Tue, 03 Sep 2024 01:36:44 -0700


potiuk commented on code in PR #41930:
URL: https://github.com/apache/airflow/pull/41930#discussion_r1741661126



##########
dev/breeze/src/airflow_breeze/utils/projects_google_spreadsheet.py:
##########
@@ -0,0 +1,253 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+from enum import Enum
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from googleapiclient.discovery import Resource
+
+from airflow_breeze.utils.console import get_console
+
+INTERESTING_OPSF_FIELDS = [
+    "Score",
+    "Code-Review",
+    "Maintained",
+    "Dangerous-Workflow",
+    "Security-Policy",
+    "Packaging",
+    "Vulnerabilities",
+]
+
+INTERESTING_OPSF_SCORES = ["OPSF-" + field for field in 
INTERESTING_OPSF_FIELDS]
+INTERESTING_OPSF_DETAILS = ["OPSF-Details-" + field for field in 
INTERESTING_OPSF_FIELDS]
+
+
+class MetadataFromSpreadsheet(Enum):
+    KNOWN_REPUTABLE_FOUNDATIONS = 0
+    KNOWN_STRONG_COMMUNITIES = 1
+    KNOWN_COMPANIES = 2
+    KNOWN_STABLE_PROJECTS = 3
+    KNOWN_LOW_IMPORTANCE_PROJECTS = 4
+    KNOWN_MEDIUM_IMPORTANCE_PROJECTS = 5
+    KNOWN_HIGH_IMPORTANCE_PROJECTS = 6
+    RELATIONSHIP_PROJECTS = 7
+    CONTACTED_PROJECTS = 8
+
+
+metadata_from_spreadsheet: dict[MetadataFromSpreadsheet, list[str]] = {}
+
+
+def get_project_metadata(metadata_type: MetadataFromSpreadsheet) -> list[str]:
+    return metadata_from_spreadsheet[metadata_type]
+
+
+# This is a spreadsheet where we store metadata about projects that we want to 
use in our analysis
+METADATA_SPREADSHEET_ID = "1Hg6_B_irfnqNltnu1OUmt7Ph-K6x-DTWF7GZ5t-G0iI"
+# This is the named range where we keep metadata
+METADATA_RANGE_NAME = "SpreadsheetMetadata"
+
+
+def read_metadata_from_google_spreadsheet(sheets: Resource):
+    get_console().print(
+        "[info]Reading metadata from Google Spreadsheet: "
+        f"https://docs.google.com/spreadsheets/d/{METADATA_SPREADSHEET_ID}";
+    )
+    range = sheets.values().get(spreadsheetId=METADATA_SPREADSHEET_ID, 
range=METADATA_RANGE_NAME).execute()
+    metadata_types: list[MetadataFromSpreadsheet] = []
+    for metadata_field in range["values"][0]:
+        metadata_types.append(MetadataFromSpreadsheet[metadata_field])
+        metadata_from_spreadsheet[MetadataFromSpreadsheet[metadata_field]] = []
+    for row in range["values"][1:]:
+        for index, value in enumerate(row):
+            value = value.strip()
+            if value:
+                metadata_from_spreadsheet[metadata_types[index]].append(value)
+    get_console().print("[success]Metadata read from Google Spreadsheet.")
+
+
+def authorize_google_spreadsheets(json_credentials_file: Path, token_path: 
Path) -> Resource:
+    from google.auth.transport.requests import Request
+    from google.oauth2.credentials import Credentials
+    from google_auth_oauthlib.flow import InstalledAppFlow
+    from googleapiclient.discovery import build
+
+    SCOPES = ["https://www.googleapis.com/auth/spreadsheets";]
+    creds = None
+    if token_path.exists():
+        creds = Credentials.from_authorized_user_file(token_path.as_posix(), 
SCOPES)
+    if not creds or not creds.valid:
+        if creds and creds.expired and creds.refresh_token:
+            creds.refresh(Request())
+        else:
+            flow = 
InstalledAppFlow.from_client_secrets_file(json_credentials_file.as_posix(), 
SCOPES)
+            creds = flow.run_local_server(port=0)
+            # Save the credentials for the next run
+        token_path.write_text(creds.to_json())
+    service = build("sheets", "v4", credentials=creds)
+    sheets = service.spreadsheets()
+    return sheets
+
+
+def get_sheets(json_credentials_file: Path) -> Resource:
+    token_path = Path.home() / ".config" / "gsheet" / "token.json"
+    sheets = authorize_google_spreadsheets(json_credentials_file, token_path)
+    return sheets
+
+
+def write_sbom_information_to_google_spreadsheet(
+    sheets: Resource,
+    docs: dict[str, str],
+    google_spreadsheet_id: str,
+    all_dependencies: list[dict[str, Any]],
+    fieldnames: list[str],
+    include_opsf_scorecard: bool = False,
+):
+    # Use only interesting values from the scorecard
+    cell_field_names = [
+        fieldname
+        for fieldname in fieldnames
+        if fieldname in INTERESTING_OPSF_SCORES or not 
fieldname.startswith("OPSF-")
+    ]
+
+    num_rows = update_field_values(all_dependencies, cell_field_names, 
google_spreadsheet_id, sheets)
+    if include_opsf_scorecard:
+        get_console().print("[info]Updating OPSF detailed comments.")
+        update_opsf_detailed_comments(
+            all_dependencies, fieldnames, num_rows, google_spreadsheet_id, 
docs, sheets
+        )
+
+
+def update_opsf_detailed_comments(
+    all_dependencies: list[dict[str, Any]],
+    fieldnames: list[str],
+    num_rows: int,
+    google_spreadsheet_id: str,
+    docs: dict[str, str],
+    sheets: Resource,
+):
+    opsf_details_field_names = [
+        fieldname for fieldname in fieldnames if fieldname in 
INTERESTING_OPSF_DETAILS
+    ]
+    start_opsf_column = fieldnames.index(opsf_details_field_names[0]) - 1
+    opsf_details = []
+    opsf_details.append(
+        {
+            "values": [
+                {"note": docs[check]}
+                for check in INTERESTING_OPSF_FIELDS
+                if check != INTERESTING_OPSF_FIELDS[0]
+            ]
+        }
+    )
+    get_console().print("[info]Adding notes to all cells.")
+    for dependency in all_dependencies:
+        note_row = 
convert_sbom_dict_to_spreadsheet_data(opsf_details_field_names, dependency)
+        opsf_details.append({"values": [{"note": note} for note in note_row]})
+    notes = {
+        "updateCells": {
+            "range": {
+                "startRowIndex": 1,
+                "endRowIndex": num_rows + 1,
+                "startColumnIndex": start_opsf_column,
+                "endColumnIndex": start_opsf_column + 
len(opsf_details_field_names) + 1,
+            },
+            "rows": opsf_details,
+            "fields": "note",
+        },
+    }
+    update_note_body = {"requests": [notes]}
+    get_console().print("[info]Updating notes in google spreadsheet.")
+    sheets.batchUpdate(spreadsheetId=google_spreadsheet_id, 
body=update_note_body).execute()
+
+
+def calculate_range(num_columns: int, row: int) -> str:
+    import string

Review Comment:
   Yes. could be regular import..



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Move projects metadata for SBOM export to Google Spreadsheet [airflow]

Reply via email to