commit:     d19c0cfce1aaee8399e553317e5541809cc5354b
Author:     Matt Jolly <kangie <AT> gentoo <DOT> org>
AuthorDate: Wed Oct 23 03:46:36 2024 +0000
Commit:     Matt Jolly <kangie <AT> gentoo <DOT> org>
CommitDate: Wed Oct 23 03:49:42 2024 +0000
URL:        
https://gitweb.gentoo.org/proj/chromium-tools.git/commit/?id=d19c0cfc

extract test_fonts sha

Signed-off-by: Matt Jolly <kangie <AT> gentoo.org>

 get-chromium-toolchain-strings.py | 154 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 151 insertions(+), 3 deletions(-)

diff --git a/get-chromium-toolchain-strings.py 
b/get-chromium-toolchain-strings.py
index 0582912..d7c124d 100755
--- a/get-chromium-toolchain-strings.py
+++ b/get-chromium-toolchain-strings.py
@@ -3,12 +3,154 @@
 # This script extracts the revision and sub-revision from the update.py and 
update_rust.py files in the Chromium source code.
 # The revision and sub-revision are used to identify the version of Clang and 
Rust used in the Chromium toolchain.
 
-
+import json
 import requests
 import sys
 
+def get_testfonts(url) -> str:
+    """
+    Reads the DEPS file (gclient) and extracts the testfonts SHA which is used
+    as the object name (SHA256 as of 2024)
+    deps['src/third_party/test_fonts']['objects'][0]['object_name']
+
+    Args:
+        url (str): The URL of the DEPS file on GitHub's raw endpoint.
+
+    Returns:
+        str: The SHA256 of the testfonts, or None if not found.
+        """
+
+    # We're not properly parsing the DEPS file, but it's 'close enough' to 
JSON that
+    # we can throw away the preamble and do some remediation to read the 
values in.
+
+    testfonts = None
+    response = requests.get(url)
+    if response.status_code == 200:
+        text = response.content.decode('utf-8')
+        lines = text.splitlines()
+        # throw away everything up to `deps = {`
+        # We'll add our own opening brace to make it valid JSON
+        start = 0
+        for idx, line in enumerate(lines):
+            if line.startswith("deps = {"):
+                start = idx + 1
+                break
+
+        # throw away everything after the variable ends `}`
+        length = 0
+        for idx, line in enumerate(lines):
+            if idx < start:
+                continue
+            if line.startswith("}"):
+                length = idx
+                break
+
+        deps: list[str] = ['{', '}']
+        deps[1:1] = lines[start:length]
+
+        # remove any comments, because JSON doesn't like them
+        deps = [line for line in deps if not line.strip().startswith('#')]
+
+        # I hate to do this, but we need to remediate the JSON - single quotes 
to double quotes ho!
+        deps = [line.replace("'", '"') for line in deps]
+        # the `condition` variable is always a python comparison. Let's not 
even try to parse it.
+        # we don't care so just drop the whole line
+        deps = [line for line in deps if "condition" not in line]
+        # ditto `Var()`
+        deps = [line for line in deps if "Var(" not in line]
+        # if a line ends in ' +' it's a python thing and we probably already 
stripped whatever is being
+        # concatenated, so we can just remove the '+' and append a ','.
+        deps = [line.replace(" +", ",") if line.endswith(" +") else line for 
line in deps]
+        # strip ' "@",' from any lines... No idea what gclient does with this
+        deps = [line.replace(' "@",', "") for line in deps]
+
+
+        # If we encounter '[{' or '}]' we should expand them onto individual 
lines.
+        # for '[{', remove the { and add it on a new line, for '}]' remove the 
] and add it on a new line.
+        # every instance so far has been '}],' so let's assume that holds true?
+        newdeps = []
+        for line in deps:
+            if '[{' in line:
+                # 'blah: [', '{'
+                newdeps.append(line[:-1])
+                newdeps.append('{')
+            elif '}]' in line:
+                # '},', '],'
+                newdeps.append(line[:-2])
+                newdeps.append('],')
+            else:
+                newdeps.append(line)
 
-def get_revision_info(url):
+        deps = newdeps
+
+        # if the last thing in an object has a trailing comma, it's invalid 
JSON so we need to remove it,
+        # probably easiest to do if we check that the next line is '}' when 
stripped and remediate that
+        newdeps = []
+        for idx, line in enumerate(deps):
+            if line.endswith(",") and deps[idx + 1].strip() == "}":
+                newdeps.append(line.replace(",", ""))
+            elif line.endswith(",") and deps[idx + 1].strip() == "},":
+                newdeps.append(line.replace(",", ""))
+            else:
+                newdeps.append(line)
+
+        deps = newdeps
+        newdeps = []
+
+        for idx, line in enumerate(deps):
+            if line.endswith("},") and deps[idx + 1].strip() == "]":
+                newdeps.append(line.replace(",", ""))
+            elif line.endswith("},") and deps[idx + 1].strip() == "],":
+                newdeps.append(line.replace(",", ""))
+            else:
+                newdeps.append(line)
+
+        deps = newdeps
+
+        # If the line does not contain a colon _and_ the previous and next 
lines contain '{' and '}' respectively,
+        # it's very likely a naked sha and json can't parse it. We can just 
strip it.
+        newdeps = []
+        for idx, line in enumerate(deps):
+            if ":" not in line and "{" in deps[idx - 1] and '}' in deps[idx + 
1]:
+                continue
+            else:
+                newdeps.append(line)
+
+        deps = newdeps
+
+        # final blacklist; not worth writing a rule for this
+        bad_lines = [
+            '+ "@" + "42e892d96e47b1f6e29844cc705e148ec4856448", # release 
1.9.4',
+        ]
+        deps = [line for line in deps if line.strip() not in bad_lines]
+
+        # Clean up any keys with no values. Always do this last
+        newdeps = []
+        for idx, line in enumerate(deps):
+            if line.endswith(":") and deps[idx + 1].strip() == "":
+                continue
+            else:
+                newdeps.append(line)
+
+        deps = newdeps
+
+        # debug_lines = range(1460, 1500)
+        # for idx, line in enumerate(deps):
+        #     if idx in debug_lines:
+        #         print(f"{idx}: {line}")
+
+        # Now we have a list of strings that should be valid JSON
+        # We can join them and load them
+        deps = json.loads('\n'.join(deps))
+        # Now we can get the testfonts SHA
+        return 
deps['src/third_party/test_fonts/test_fonts']['objects'][0]['object_name']
+    else:
+        raise ValueError(f"Failed to get revision info. Status code: 
{response.status_code}")
+
+    return testfonts
+
+
+def get_revision_info(url) -> str:
     """
     Extracts revision and sub-revision from a Chromium source file URL.
 
@@ -16,7 +158,7 @@ def get_revision_info(url):
         url (str): The URL of the source file on GitHub's raw endpoint.
 
     Returns:
-        tuple: A tuple containing the revision (str) and sub-revision (int), 
+        tuple: A tuple containing the revision (str) and sub-revision (int)
                or (None, None) if not found.
     """
     response = requests.get(url)
@@ -48,8 +190,10 @@ def main():
     base_url = "https://raw.githubusercontent.com/chromium/chromium/";
     clang_url = f"{base_url}{version}/tools/clang/scripts/update.py"
     rust_url = f"{base_url}{version}/tools/rust/update_rust.py"
+    deps_url = f"{base_url}{version}/DEPS"
     clang_revision, clang_sub_revision = get_revision_info(clang_url)
     rust_revision, rust_sub_revision = get_revision_info(rust_url)
+    testfonts = get_testfonts(deps_url)
     if clang_revision and clang_sub_revision:
         print(f"clang revision: {clang_revision}-{clang_sub_revision}")
     else:
@@ -58,6 +202,10 @@ def main():
         print(f"rust revision: {rust_revision}-{rust_sub_revision}")
     else:
         print("rust revision not found")
+    if testfonts:
+        print(f"test fonts: {testfonts}")
+    else:
+        print("test fonts not found")
 
 if __name__ == "__main__":
     main()

Reply via email to