I pleasantly hacked/drafted a super basic and quick sourcefile enumerator
for the new guix 1.5.0 release in python, attached. Contains a Cuirass
class for programmatic access fo the build server, although dependencies
are scraped from html and better collected from the derivation files as
demonstrated a little in the source. Uses joblib to cache http responses if
interrupted, outputs store paths on stdout. Does not yet use other servers
if the .drv is missing, instead providing other avenues of enumeration.
These sourcefiles can be retrieved via the .nar method and their signatures
via the .narinfo method.
I haven't let this run to troubleshoot edge errors which are presently
allowed to propagate if they happen, instead having added resuming via
caching.
It uses a handful of quick scraping tactics and is delicate in spots, but
likely much more robust than before due to derivation walking.
Many people have likely already done this.
import requests
import joblib
@joblib.Memory('cache').cache
def get(url):
print(url)
result = requests.get(url)
result.raise_for_status()
return result
from getcache import get as GET
import lzma
class Cuirass:
def __init__(self, url = "https://ci.guix.gnu.org/"):
self.url = url
def get(self, *path, **params):
return GET(self.url + '/'.join([str(item) for item in path]) + '?' + '&'.join([f'{k}={v}' for k,v in params.items() if v is not None]))
def json(self, *path, **params):
return self.get(*path, **params).json()
def text(self, *path, **params):
return self.get(*path, **params).text
def lzip(self, *path, **params):
data = self.get(*path, **params).content
return lzma.decompress(data)
def specifications(self):
return self.json('jobsets')
def evaluations(self, nr, spec = None):
return self.json('api','evaluations',nr=nr,spec=spec)
def evaluation(self, id):
return self.json('api','evaluation',id=id)
def evaluationlog(self, id):
return self.text('eval', id, 'log', 'raw')
def channels(self, id):
return self.json('eval', id, 'channels.json')
def jobs(self, evaluation, *names):
return self.json('api','jobs',evaluation=evaluation,names=','.join(names) if names else None)
def build(self, bid):
return self.json('build', bid)
def buildlog(self, bid):
return self.text('build', bid, 'log', 'raw')
def details(self, bid):
return self.text('build', bid, 'details')
def dependencies(self, bid):
_, dephunk = self.details(bid).split('<tr><th>Dependencies</th><td class="dependencies">')
if dephunk.startswith('—</td></tr>') or dephunk.startswith('</td></tr>'):
return {}
try:
dephunk, _ = dephunk.split('</a><br /></div><button', 1)
except:
dephunk, _ = dephunk.split('</a><br /></div></td></tr>', 1)
depchunks = dephunk.split('</a><br /></div><div')
return {
namechunk: int(bid)
for depchunk in depchunks
for bid, namechunk in [depchunk.split('<a href="/build/', 1)[1].split('/details">')]
}
def download(self, id:int):
return self.get('download', id).content
def output(self, oid):
oid = oid.removeprefix('/gnu/store/')
return self.json('output', oid)
def outputlog(self, oid):
oid = oid.removeprefix('/gnu/store/')
return self.text('log', oid)
def narinfo(self, oid, raw=False):
oid, *_ = oid.removeprefix('/gnu/store/').split('-')
narinfo = self.text(oid + '.narinfo')
if not raw:
narinfo = narinfo.strip('\n').split('\n')
narinfo =[ line.split(': ', 1) for line in narinfo ]
return narinfo
def nar(self, url):
fn = url.split('/')[-1]
return self.lzip('nar', 'lzip', fn)
c = Cuirass()
eid = 2125531 # guix 1.5.0
seen = set()
bids = [job['build'] for job in c.jobs(eid)]
sitms = []
while bids or sitms:
if bids:
bid = bids.pop()
if bid in seen:
continue
seen.add(bid)
for name, bid in c.dependencies(bid).items():
bids.append(bid)
build = c.build(bid)
sitms.append(build['derivation'])
while sitms:
sitm = sitms.pop().split('/')[-1]
if sitm in seen:
continue
seen.add(sitm)
narinfo = dict(c.narinfo(sitm))
if sitm.endswith('.drv'):
drv = c.nar(narinfo['URL'])
drv = drv.split(b'Derive(')[1]
drv, *_ = drv.split(b')\0')
drv, *_ = drv.split(b')\x01')
drv = eval(drv.decode())
out, inps, *_ = drv
if out[0][3]:
print(out[0][1])
sitms.extend([inp[0] for inp in inps])
refs = narinfo['References']
if refs:
sitms.extend(refs.split(' '))