I pleasantly hacked/drafted a super basic and quick sourcefile enumerator
for the new guix 1.5.0 release in python, attached. Contains a Cuirass
class for programmatic access fo the build server, although dependencies
are scraped from html and better collected from the derivation files as
demonstrated a little in the source. Uses joblib to cache http responses if
interrupted, outputs store paths on stdout. Does not yet use other servers
if the .drv is missing, instead providing other avenues of enumeration.
These sourcefiles can be retrieved via the .nar method and their signatures
via the .narinfo method.
I haven't let this run to troubleshoot edge errors which are presently
allowed to propagate if they happen, instead having added resuming via
caching.
It uses a handful of quick scraping tactics and is delicate in spots, but
likely much more robust than before due to derivation walking.
Many people have likely already done this.
import requests
import joblib

@joblib.Memory('cache').cache
def get(url):
    print(url)
    result = requests.get(url)
    result.raise_for_status()
    return result
from getcache import get as GET
import lzma

class Cuirass:
    def __init__(self, url = "https://ci.guix.gnu.org/";):
        self.url = url
    def get(self, *path, **params):
        return GET(self.url + '/'.join([str(item) for item in path]) + '?' + '&'.join([f'{k}={v}' for k,v in params.items() if v is not None]))
    def json(self, *path, **params):
        return self.get(*path, **params).json()
    def text(self, *path, **params):
        return self.get(*path, **params).text
    def lzip(self, *path, **params):
        data = self.get(*path, **params).content
        return lzma.decompress(data)
    def specifications(self):
        return self.json('jobsets')
    def evaluations(self, nr, spec = None):
        return self.json('api','evaluations',nr=nr,spec=spec)
    def evaluation(self, id):
        return self.json('api','evaluation',id=id)
    def evaluationlog(self, id):
        return self.text('eval', id, 'log', 'raw')
    def channels(self, id):
        return self.json('eval', id, 'channels.json')
    def jobs(self, evaluation, *names):
        return self.json('api','jobs',evaluation=evaluation,names=','.join(names) if names else None)
    def build(self, bid):
        return self.json('build', bid)
    def buildlog(self, bid):
        return self.text('build', bid, 'log', 'raw')
    def details(self, bid):
        return self.text('build', bid, 'details')
    def dependencies(self, bid):
        _, dephunk = self.details(bid).split('<tr><th>Dependencies</th><td class="dependencies">')
        if dephunk.startswith('—</td></tr>') or dephunk.startswith('</td></tr>'):
            return {}
        try:
            dephunk, _ = dephunk.split('</a><br /></div><button', 1)
        except:
            dephunk, _ = dephunk.split('</a><br /></div></td></tr>', 1)
        depchunks = dephunk.split('</a><br /></div><div')
        return {
                namechunk: int(bid)
            for depchunk in depchunks
            for bid, namechunk in [depchunk.split('<a href="/build/', 1)[1].split('/details">')]
        }
    def download(self, id:int):
        return self.get('download', id).content
    def output(self, oid):
        oid = oid.removeprefix('/gnu/store/')
        return self.json('output', oid)
    def outputlog(self, oid):
        oid = oid.removeprefix('/gnu/store/')
        return self.text('log', oid)
    def narinfo(self, oid, raw=False):
        oid, *_ = oid.removeprefix('/gnu/store/').split('-')
        narinfo = self.text(oid + '.narinfo')
        if not raw:
            narinfo = narinfo.strip('\n').split('\n')
            narinfo =[ line.split(': ', 1) for line in narinfo ]
        return narinfo
    def nar(self, url):
        fn = url.split('/')[-1]
        return self.lzip('nar', 'lzip', fn)

c = Cuirass()

eid = 2125531 # guix 1.5.0
seen = set()
bids = [job['build'] for job in c.jobs(eid)]
sitms = []
while bids or sitms:
    if bids:
        bid = bids.pop()
        if bid in seen:
            continue
        seen.add(bid)
        for name, bid in c.dependencies(bid).items():
            bids.append(bid)
        build = c.build(bid)
        sitms.append(build['derivation'])
    while sitms:
        sitm = sitms.pop().split('/')[-1]
        if sitm in seen:
            continue
        seen.add(sitm)
        narinfo = dict(c.narinfo(sitm))
        if sitm.endswith('.drv'):
            drv = c.nar(narinfo['URL'])
            drv = drv.split(b'Derive(')[1]
            drv, *_ = drv.split(b')\0')
            drv, *_ = drv.split(b')\x01')
            drv = eval(drv.decode())
            out, inps, *_ = drv
            if out[0][3]:
                print(out[0][1])
            sitms.extend([inp[0] for inp in inps])
        refs = narinfo['References']
        if refs:
            sitms.extend(refs.split(' '))
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks
  • ... Undescribed Horrific Abuse, One Victim & Survivor of Many via cypherpunks

Reply via email to