sbp commented on code in PR #442:
URL: 
https://github.com/apache/tooling-trusted-releases/pull/442#discussion_r2644097671


##########
atr/get/sbom.py:
##########
@@ -378,45 +388,96 @@ def _missing_tally(items: 
list[sbom.models.conformance.Missing]) -> list[tuple[s
     )
 
 
+# TODO: Update this to return either a block or something we can use later in 
a block for styling reasons
 def _license_tally(
     items: list[sbom.models.licenses.Issue],
-) -> list[tuple[sbom.models.licenses.Category, int, list[str | None]]]:
+    old_issues: list[sbom.models.licenses.Issue],
+) -> list[tuple[sbom.models.licenses.Category, int, int, list[str | None]]]:
     counts: dict[sbom.models.licenses.Category, int] = {}
     components: dict[sbom.models.licenses.Category, list[str | None]] = {}
+    new_count = 0
+    old_map = {lic.component_name: (lic.license_expression, lic.category) for 
lic in old_issues}
     for item in items:
         key = item.category
         counts[key] = counts.get(key, 0) + 1
+        name = str(item).capitalize()
+        if item.component_name not in old_map:
+            new_count = new_count + 1
+            name = f"{name} (new)"
+        elif item.license_expression != old_map[item.component_name][0]:
+            new_count = new_count + 1
+            name = f"{name} (previously {old_map[item.component_name][0]} - 
Category {
+                str(old_map[item.component_name][1]).upper()
+            })"
         if key not in components:
-            components[key] = [str(item)]
+            components[key] = [name]
         else:
-            components[key].append(str(item))
+            components[key].append(name)
     return sorted(
-        [(category, count, components.get(category, [])) for category, count 
in counts.items()],
+        [(category, count, new_count, components.get(category, [])) for 
category, count in counts.items()],

Review Comment:
   The `new_count` is the count of overall new licenses, by the looks, but 
these returned rows are per category. Shouldn't the new count therefore be 
tracked per category rather than overall, and be custom for each category row?



##########
atr/get/sbom.py:
##########
@@ -468,58 +547,86 @@ def _vulnerability_scan_find_in_progress_task(
 
 
 def _vulnerability_scan_results(
-    block: htm.Block, vulns: list[osv.CdxVulnerabilityDetail], scans: 
list[str], task: sql.Task | None
+    block: htm.Block,
+    vulns: list[osv.CdxVulnerabilityDetail],
+    scans: list[str],
+    task: sql.Task | None,
+    prev: list[osv.CdxVulnerabilityDetail] | None,
 ) -> None:
+    previous_vulns = None
+    if prev is not None:
+        previous_osv = [_cdx_to_osv(v) for v in prev]
+        previous_vulns = {v.id: _extract_vulnerability_severity(v) for v in 
previous_osv}
     if task is not None:
-        task_result = task.result
-        if not isinstance(task_result, results.SBOMOSVScan):
-            block.p["Invalid scan result format."]
-            return
+        _vulnerability_results_from_scan(task, block, previous_vulns)
+    else:
+        _vulnerability_results_from_bom(vulns, block, scans, previous_vulns)
 
-        components = task_result.components
-        ignored = task_result.ignored
-        ignored_count = len(ignored)
 
-        if not components:
-            block.p["No vulnerabilities found."]
-            if ignored_count > 0:
-                component_word = "component was" if (ignored_count == 1) else 
"components were"
-                block.p[f"{ignored_count} {component_word} ignored due to 
missing PURL or version information:"]
-                block.p[f"{','.join(ignored)}"]
-            return
+def _vulnerability_results_from_bom(
+    vulns: list[osv.CdxVulnerabilityDetail], block: htm.Block, scans: 
list[str], previous_vulns: dict[str, str] | None
+) -> None:
+    total_new = 0
+    new_block = htm.Block()
+    if len(vulns) == 0:
+        block.p["No vulnerabilities listed in this SBOM."]
+        return
+    components = {a.get("ref", "") for v in vulns if v.affects is not None for 
a in v.affects}
+
+    if len(scans) > 0:
+        block.p["This SBOM was scanned for vulnerabilities at revision ", 
htm.code[scans[-1]], "."]
+
+    for component in components:
+        new = _vulnerability_component_details_osv(
+            new_block,
+            results.OSVComponent(
+                purl=component,
+                vulnerabilities=[
+                    _cdx_to_osv(v)
+                    for v in vulns
+                    if (v.affects is not None) and (component in [a.get("ref") 
for a in v.affects])
+                ],
+            ),
+            previous_vulns,
+        )
+        total_new = total_new + new
+
+    new_str = f" ({total_new!s} new since last release)" if (total_new > 0) 
else ""
+    block.p[f"Vulnerabilities{new_str} found in {len(components)} components:"]
+    block.append(new_block)
 
-        block.p[f"Scan found vulnerabilities in {len(components)} components:"]
 
-        for component in components:
-            _vulnerability_component_details_osv(block, component)
+def _vulnerability_results_from_scan(task: sql.Task, block: htm.Block, 
previous_vulns: dict[str, str] | None) -> None:
+    total_new = 0
+    new_block = htm.Block()
+    task_result = task.result
+    if not isinstance(task_result, results.SBOMOSVScan):
+        block.p["Invalid scan result format."]
+        return
 
+    components = task_result.components
+    ignored = task_result.ignored
+    ignored_count = len(ignored)
+
+    if not components:
+        block.p["No vulnerabilities found."]
         if ignored_count > 0:
             component_word = "component was" if (ignored_count == 1) else 
"components were"
             block.p[f"{ignored_count} {component_word} ignored due to missing 
PURL or version information:"]
             block.p[f"{','.join(ignored)}"]
-    else:
-        if len(vulns) == 0:
-            block.p["No vulnerabilities listed in this SBOM."]
-            return
-        components = {a.get("ref", "") for v in vulns if v.affects is not None 
for a in v.affects}
+        return
 
-        if len(scans) > 0:
-            block.p["This SBOM was scanned for vulnerabilities at revision ", 
htm.code[scans[-1]], "."]
+    for component in components:
+        new = _vulnerability_component_details_osv(new_block, component, 
previous_vulns)
+        total_new = total_new + new
 
-        block.p[f"Vulnerabilities found in {len(components)} components:"]
+    new_str = f" ({total_new!s} new since last release)" if (total_new > 0) 
else ""
+    block.p[f"Scan found vulnerabilities{new_str} in {len(components)} 
components:"]
 
-        for component in components:
-            _vulnerability_component_details_osv(
-                block,
-                results.OSVComponent(
-                    purl=component,
-                    vulnerabilities=[
-                        _cdx_to_osv(v)
-                        for v in vulns
-                        if (v.affects is not None) and (component in 
[a.get("ref") for a in v.affects])
-                    ],
-                ),
-            )
+    if ignored_count > 0:
+        component_word = "component was" if (ignored_count == 1) else 
"components were"
+        block.p[f"{ignored_count} {component_word} ignored due to missing PURL 
or version information:"]
+        block.p[f"{','.join(ignored)}"]

Review Comment:
   I think this is missing a `block.append(new_block)` after it, to make it 
work the same as `_vulnerability_results_from_bom` which does do that.



##########
atr/get/sbom.py:
##########
@@ -56,16 +59,55 @@ async def report(session: web.Committer, project: str, 
version: str, file_path:
         release = await session.release(project, version, 
phase=sql.ReleasePhase.RELEASE_CANDIDATE, with_committee=True)
     is_release_candidate = release.phase == sql.ReleasePhase.RELEASE_CANDIDATE
 
+    task, augment_tasks, osv_tasks = await _fetch_tasks(validated_path_str, 
project, release, version)
+
+    task_status = await _report_task_results(block, task)
+    if task_status:
+        return task_status
+
+    if task is None or (not isinstance(task.result, results.SBOMToolScore)):
+        raise base.ASFQuartException("Invalid SBOM score result", 
errorcode=500)
+
+    task_result = task.result
+    _report_header(block, is_release_candidate, release, task_result)
+
+    if not is_release_candidate:
+        latest_augment = None
+        last_augmented_bom = None
+        if len(augment_tasks) > 0:
+            latest_augment = augment_tasks[0]
+            augment_results: list[Any] = [t.result for t in augment_tasks]
+            augmented_bom_versions = [
+                r.bom_version for r in augment_results if (r is not None) and 
(r.bom_version is not None)
+            ]
+            if len(augmented_bom_versions) > 0:
+                last_augmented_bom = max(augmented_bom_versions)
+        _augment_section(block, release, task_result, latest_augment, 
last_augmented_bom)
+
+    _conformance_section(block, task_result)
+    _license_section(block, task_result)
+
+    _vulnerability_scan_section(block, project, version, file_path, 
task_result, osv_tasks, is_release_candidate)
+
+    _outdated_tool_section(block, task_result)
+
+    _cyclonedx_cli_errors(block, task_result)
+
+    return await template.blank("SBOM report", content=block.collect())
+
+
+async def _fetch_tasks(
+    file_path: str, project: str, release: sql.Release, version: str
+) -> tuple[sql.Task | None, collections.abc.Sequence[sql.Task], 
collections.abc.Sequence[sql.Task]]:
     async with db.session() as data:
         via = sql.validate_instrumented_attribute
-        # TODO: Abstract this code and the sbomtool.MissingAdapter validators

Review Comment:
   Do we not still want to do this?



##########
atr/get/sbom.py:
##########
@@ -378,45 +388,96 @@ def _missing_tally(items: 
list[sbom.models.conformance.Missing]) -> list[tuple[s
     )
 
 
+# TODO: Update this to return either a block or something we can use later in 
a block for styling reasons
 def _license_tally(
     items: list[sbom.models.licenses.Issue],
-) -> list[tuple[sbom.models.licenses.Category, int, list[str | None]]]:
+    old_issues: list[sbom.models.licenses.Issue],
+) -> list[tuple[sbom.models.licenses.Category, int, int, list[str | None]]]:
     counts: dict[sbom.models.licenses.Category, int] = {}
     components: dict[sbom.models.licenses.Category, list[str | None]] = {}
+    new_count = 0
+    old_map = {lic.component_name: (lic.license_expression, lic.category) for 
lic in old_issues}
     for item in items:
         key = item.category
         counts[key] = counts.get(key, 0) + 1
+        name = str(item).capitalize()
+        if item.component_name not in old_map:

Review Comment:
   The vulnerabilities code has a `previous_vulns is not None` guard, which is 
why it's unaffected.



##########
atr/get/sbom.py:
##########
@@ -183,41 +182,48 @@ def _outdated_tool_section(block: htm.Block, task_result: 
results.SBOMToolScore)
 
 
 def _conformance_section(block: htm.Block, task_result: results.SBOMToolScore) 
-> None:
+    block.h2["Conformance report"]
     warnings = 
[sbom.models.conformance.MissingAdapter.validate_python(json.loads(w)) for w in 
task_result.warnings]
     errors = 
[sbom.models.conformance.MissingAdapter.validate_python(json.loads(e)) for e in 
task_result.errors]
     if warnings:
-        block.h2["Warnings"]
+        block.h3[htm.icon("bi-exclamation-triangle-fill", 
".me-2.text-warning"), "Warnings"]

Review Comment:
   The `htm.icon` function adds the `bi-` prefix, so this will have `bi-bi-` as 
a prefix.



##########
bootstrap/source/custom.scss:
##########
@@ -156,3 +156,7 @@ small, .text-muted {
 .nav-link {
   cursor: pointer;
 }
+
+.text-strike {

Review Comment:
   The idea of `custom.scss` is that it customises Bootstrap classes, but 
`text-strike` is not a Bootstrap class. Classes which we add, even if they seem 
to follow the Bootstrap feel, go in `atr.css` and are typically given an `atr-` 
prefix to distinguish them from Bootstrap classes (we're not always consistent 
about this). The `.atr-hide` class, for example, just applies `display: none` 
and has a Bootstrap feel to it but was added to `atr.css`. I've added a comment 
about this to the documentation in 0de12049b76e4eba08ad43b12d74f8cbcf332003.



##########
atr/get/sbom.py:
##########
@@ -378,45 +388,96 @@ def _missing_tally(items: 
list[sbom.models.conformance.Missing]) -> list[tuple[s
     )
 
 
+# TODO: Update this to return either a block or something we can use later in 
a block for styling reasons
 def _license_tally(
     items: list[sbom.models.licenses.Issue],
-) -> list[tuple[sbom.models.licenses.Category, int, list[str | None]]]:
+    old_issues: list[sbom.models.licenses.Issue],
+) -> list[tuple[sbom.models.licenses.Category, int, int, list[str | None]]]:
     counts: dict[sbom.models.licenses.Category, int] = {}
     components: dict[sbom.models.licenses.Category, list[str | None]] = {}
+    new_count = 0
+    old_map = {lic.component_name: (lic.license_expression, lic.category) for 
lic in old_issues}
     for item in items:
         key = item.category
         counts[key] = counts.get(key, 0) + 1
+        name = str(item).capitalize()
+        if item.component_name not in old_map:
+            new_count = new_count + 1
+            name = f"{name} (new)"
+        elif item.license_expression != old_map[item.component_name][0]:
+            new_count = new_count + 1
+            name = f"{name} (previously {old_map[item.component_name][0]} - 
Category {

Review Comment:
   Could make this (and "(new)" above) `<strong>`. Maybe a diff style + OLD - 
NEW change over two lines would be better, or `OLD -> new` or something like 
that. Some colour might help too.



##########
atr/get/sbom.py:
##########
@@ -378,45 +388,96 @@ def _missing_tally(items: 
list[sbom.models.conformance.Missing]) -> list[tuple[s
     )
 
 
+# TODO: Update this to return either a block or something we can use later in 
a block for styling reasons
 def _license_tally(
     items: list[sbom.models.licenses.Issue],
-) -> list[tuple[sbom.models.licenses.Category, int, list[str | None]]]:
+    old_issues: list[sbom.models.licenses.Issue],
+) -> list[tuple[sbom.models.licenses.Category, int, int, list[str | None]]]:
     counts: dict[sbom.models.licenses.Category, int] = {}
     components: dict[sbom.models.licenses.Category, list[str | None]] = {}
+    new_count = 0
+    old_map = {lic.component_name: (lic.license_expression, lic.category) for 
lic in old_issues}
     for item in items:
         key = item.category
         counts[key] = counts.get(key, 0) + 1
+        name = str(item).capitalize()
+        if item.component_name not in old_map:

Review Comment:
   This is always true if `old_issues` is empty, which means that on the very 
first release every license will be shown as new. This is inconsistent with 
vulnerabilities, which don't show as new on a first release.



##########
atr/get/sbom.py:
##########
@@ -378,45 +388,96 @@ def _missing_tally(items: 
list[sbom.models.conformance.Missing]) -> list[tuple[s
     )
 
 
+# TODO: Update this to return either a block or something we can use later in 
a block for styling reasons
 def _license_tally(
     items: list[sbom.models.licenses.Issue],
-) -> list[tuple[sbom.models.licenses.Category, int, list[str | None]]]:
+    old_issues: list[sbom.models.licenses.Issue],
+) -> list[tuple[sbom.models.licenses.Category, int, int, list[str | None]]]:
     counts: dict[sbom.models.licenses.Category, int] = {}
     components: dict[sbom.models.licenses.Category, list[str | None]] = {}
+    new_count = 0
+    old_map = {lic.component_name: (lic.license_expression, lic.category) for 
lic in old_issues}
     for item in items:
         key = item.category
         counts[key] = counts.get(key, 0) + 1
+        name = str(item).capitalize()
+        if item.component_name not in old_map:
+            new_count = new_count + 1
+            name = f"{name} (new)"
+        elif item.license_expression != old_map[item.component_name][0]:
+            new_count = new_count + 1

Review Comment:
   This is not truly new, only updated. We should probably use a 
`changed_count` or `updated_count` instead, and display as (N new, M changed / 
updated).



##########
atr/sbom/models/base.py:
##########
@@ -21,7 +21,7 @@
 
 
 class Lax(pydantic.BaseModel):
-    model_config = pydantic.ConfigDict(extra="allow", strict=False)
+    model_config = pydantic.ConfigDict(extra="allow", strict=False, 
validate_assignment=True, validate_by_name=True)

Review Comment:
   I wonder what happens if we add `validate_assignment` to `schema.Lax`? It's 
probably not a good idea for us to let `schema.py` and `base.py` get out of 
sync. Similarly, if `validate_by_name` is needed for SBOM related parsing, then 
we should probably add it to `schema.Lax` too.



##########
atr/get/sbom.py:
##########
@@ -44,6 +44,9 @@
 async def report(session: web.Committer, project: str, version: str, 
file_path: str) -> str:
     await session.check_access(project)
 
+    block = htm.Block()

Review Comment:
   We should put some navigation à la `atr.shared.distribution.html_nav` at the 
top of this page.



##########
atr/get/sbom.py:
##########
@@ -378,45 +388,96 @@ def _missing_tally(items: 
list[sbom.models.conformance.Missing]) -> list[tuple[s
     )
 
 
+# TODO: Update this to return either a block or something we can use later in 
a block for styling reasons
 def _license_tally(
     items: list[sbom.models.licenses.Issue],
-) -> list[tuple[sbom.models.licenses.Category, int, list[str | None]]]:
+    old_issues: list[sbom.models.licenses.Issue],
+) -> list[tuple[sbom.models.licenses.Category, int, int, list[str | None]]]:
     counts: dict[sbom.models.licenses.Category, int] = {}
     components: dict[sbom.models.licenses.Category, list[str | None]] = {}
+    new_count = 0
+    old_map = {lic.component_name: (lic.license_expression, lic.category) for 
lic in old_issues}
     for item in items:
         key = item.category
         counts[key] = counts.get(key, 0) + 1
+        name = str(item).capitalize()
+        if item.component_name not in old_map:
+            new_count = new_count + 1
+            name = f"{name} (new)"
+        elif item.license_expression != old_map[item.component_name][0]:
+            new_count = new_count + 1
+            name = f"{name} (previously {old_map[item.component_name][0]} - 
Category {
+                str(old_map[item.component_name][1]).upper()
+            })"
         if key not in components:
-            components[key] = [str(item)]
+            components[key] = [name]
         else:
-            components[key].append(str(item))
+            components[key].append(name)
     return sorted(
-        [(category, count, components.get(category, [])) for category, count 
in counts.items()],
+        [(category, count, new_count, components.get(category, [])) for 
category, count in counts.items()],
         key=lambda kv: kv[0].value,
     )
 
 
-def _vulnerability_component_details_osv(block: htm.Block, component: 
results.OSVComponent) -> None:
-    details_content = []
-    summary_element = htm.summary[
-        
htm.span(".badge.bg-danger.me-2.font-monospace")[str(len(component.vulnerabilities))],
-        htm.strong[component.purl],
-    ]
-    details_content.append(summary_element)
-
+def _severity_to_style(severity: str) -> str:
+    match severity.lower():
+        case "critical":
+            return ".bg-danger.text-light"
+        case "high":
+            return ".bg-danger.text-light"
+        case "medium":
+            return ".bg-warning.text-dark"
+        case "moderate":
+            return ".bg-warning.text-dark"
+        case "low":
+            return ".bg-warning.text-dark"
+        case "info":
+            return ".bg-info.text-light"
+    return ".bg-info.text-light"
+
+
+def _vulnerability_component_details_osv(
+    block: htm.Block,
+    component: results.OSVComponent,
+    previous_vulns: dict[str, str] | None,  # id: severity
+) -> int:
+    severities = ["critical", "high", "medium", "moderate", "low", "info", 
"none", "unknown"]
+    new = 0
+    worst = 99
+
+    vuln_details = []
     for vuln in component.vulnerabilities:
+        is_new = False
         vuln_id = vuln.id or "Unknown"
         vuln_summary = vuln.summary
         vuln_refs = []
         if vuln.references is not None:
             vuln_refs = [r for r in vuln.references if r.get("type", "") == 
"WEB"]
         vuln_primary_ref = vuln_refs[0] if (len(vuln_refs) > 0) else {}
         vuln_modified = vuln.modified or "Unknown"
+
         vuln_severity = _extract_vulnerability_severity(vuln)
+        worst = _update_worst_severity(severities, vuln_severity, worst)
+
+        if previous_vulns is not None:
+            if (vuln_id not in previous_vulns) or previous_vulns[vuln_id] != 
vuln_severity:
+                is_new = True
+                new = new + 1
 
         vuln_header = [htm.a(href=vuln_primary_ref.get("url", ""), 
target="_blank")[htm.strong(".me-2")[vuln_id]]]
-        if vuln_severity != "Unknown":
-            
vuln_header.append(htm.span(".badge.bg-warning.text-dark")[vuln_severity])
+        style = f".badge.me-2{_severity_to_style(vuln_severity)}"
+        vuln_header.append(htm.span(style)[vuln_severity])
+
+        if (previous_vulns is not None) and is_new:
+            if vuln_id in previous_vulns:  # If it's there, the sev must have 
changed

Review Comment:
   Line ending comments are discouraged by the style guide. Can we put this one 
underneath this condititional? If it goes on top, it might look like it belongs 
to the conditional on L471.



##########
atr/get/sbom.py:
##########
@@ -378,45 +388,96 @@ def _missing_tally(items: 
list[sbom.models.conformance.Missing]) -> list[tuple[s
     )
 
 
+# TODO: Update this to return either a block or something we can use later in 
a block for styling reasons
 def _license_tally(
     items: list[sbom.models.licenses.Issue],
-) -> list[tuple[sbom.models.licenses.Category, int, list[str | None]]]:
+    old_issues: list[sbom.models.licenses.Issue],
+) -> list[tuple[sbom.models.licenses.Category, int, int, list[str | None]]]:
     counts: dict[sbom.models.licenses.Category, int] = {}
     components: dict[sbom.models.licenses.Category, list[str | None]] = {}
+    new_count = 0
+    old_map = {lic.component_name: (lic.license_expression, lic.category) for 
lic in old_issues}
     for item in items:
         key = item.category
         counts[key] = counts.get(key, 0) + 1
+        name = str(item).capitalize()
+        if item.component_name not in old_map:
+            new_count = new_count + 1
+            name = f"{name} (new)"
+        elif item.license_expression != old_map[item.component_name][0]:
+            new_count = new_count + 1
+            name = f"{name} (previously {old_map[item.component_name][0]} - 
Category {
+                str(old_map[item.component_name][1]).upper()
+            })"
         if key not in components:
-            components[key] = [str(item)]
+            components[key] = [name]
         else:
-            components[key].append(str(item))
+            components[key].append(name)
     return sorted(
-        [(category, count, components.get(category, [])) for category, count 
in counts.items()],
+        [(category, count, new_count, components.get(category, [])) for 
category, count in counts.items()],
         key=lambda kv: kv[0].value,
     )
 
 
-def _vulnerability_component_details_osv(block: htm.Block, component: 
results.OSVComponent) -> None:
-    details_content = []
-    summary_element = htm.summary[
-        
htm.span(".badge.bg-danger.me-2.font-monospace")[str(len(component.vulnerabilities))],
-        htm.strong[component.purl],
-    ]
-    details_content.append(summary_element)
-
+def _severity_to_style(severity: str) -> str:
+    match severity.lower():
+        case "critical":
+            return ".bg-danger.text-light"
+        case "high":
+            return ".bg-danger.text-light"
+        case "medium":
+            return ".bg-warning.text-dark"
+        case "moderate":
+            return ".bg-warning.text-dark"
+        case "low":
+            return ".bg-warning.text-dark"
+        case "info":
+            return ".bg-info.text-light"
+    return ".bg-info.text-light"
+
+
+def _vulnerability_component_details_osv(
+    block: htm.Block,
+    component: results.OSVComponent,
+    previous_vulns: dict[str, str] | None,  # id: severity
+) -> int:
+    severities = ["critical", "high", "medium", "moderate", "low", "info", 
"none", "unknown"]
+    new = 0
+    worst = 99
+
+    vuln_details = []
     for vuln in component.vulnerabilities:
+        is_new = False
         vuln_id = vuln.id or "Unknown"
         vuln_summary = vuln.summary
         vuln_refs = []
         if vuln.references is not None:
             vuln_refs = [r for r in vuln.references if r.get("type", "") == 
"WEB"]
         vuln_primary_ref = vuln_refs[0] if (len(vuln_refs) > 0) else {}
         vuln_modified = vuln.modified or "Unknown"
+
         vuln_severity = _extract_vulnerability_severity(vuln)
+        worst = _update_worst_severity(severities, vuln_severity, worst)
+
+        if previous_vulns is not None:
+            if (vuln_id not in previous_vulns) or previous_vulns[vuln_id] != 
vuln_severity:

Review Comment:
   We should perhaps scope newness to the component rather than the whole 
project. Consider a library called libalpha being added to example-0.1 with 
CVE-123. Then in example-0.2 they remove the dependency on libalpha, and add 
libbeta as a dependency instead, which also has CVE-123. This wouldn't be 
marked as new, but in a sense it is new.



##########
atr/util.py:
##########
@@ -1011,6 +1011,53 @@ def version_name_error(version_name: str) -> str | None:
     return None
 
 
+def version_sort_key(version: str) -> bytes:
+    """
+    Convert a version string into a sortable byte sequence.
+    Prefixes each digit sequence with its length as u16 little-endian.
+    Strips leading zeros and appends a byte for the count of leading zeros.
+    """
+    result = []
+    i = 0
+    length = len(version)
+    while i < length:
+        if version[i].isdigit():
+            # Find the end of this digit sequence
+            j = i
+            while j < length and version[j].isdigit():
+                j += 1
+
+            digit_sequence = version[i:j]
+
+            # Count leading zeros
+            leading_zeros = 0
+            for char in digit_sequence:
+                if char == "0":
+                    leading_zeros += 1
+                else:
+                    break
+
+            # Strip leading zeros (but keep at least one digit if all zeros)
+            stripped = digit_sequence.lstrip("0") or "0"
+
+            # Count the stripped digits and encode as u16 little-endian
+            digit_count = len(stripped)
+            length_bytes = digit_count.to_bytes(2, "little")

Review Comment:
   Whoops, this should be big endian of course. (My fault, I said little.) But 
I think we should encode this as a single byte anyway, the traditional way. We 
don't need to support digit sequences over 256 digits long.



##########
atr/util.py:
##########
@@ -1011,6 +1011,53 @@ def version_name_error(version_name: str) -> str | None:
     return None
 
 
+def version_sort_key(version: str) -> bytes:
+    """
+    Convert a version string into a sortable byte sequence.
+    Prefixes each digit sequence with its length as u16 little-endian.
+    Strips leading zeros and appends a byte for the count of leading zeros.
+    """
+    result = []
+    i = 0
+    length = len(version)
+    while i < length:
+        if version[i].isdigit():
+            # Find the end of this digit sequence
+            j = i
+            while j < length and version[j].isdigit():
+                j += 1
+
+            digit_sequence = version[i:j]
+
+            # Count leading zeros
+            leading_zeros = 0
+            for char in digit_sequence:
+                if char == "0":
+                    leading_zeros += 1
+                else:
+                    break
+
+            # Strip leading zeros (but keep at least one digit if all zeros)

Review Comment:
   Don't have to keep a `0` in this algorithm, I think, because it's keeping 
the leading 0 count.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to