Split the repository code into two parts:

- fetch of the trust anchors (the certs referenced by TAL files)
- fetch of the MFT files of a repository

While the two things kind of look similar there are some differences.

- TA files are loaded via rsync or https URI (only one file needs to be
  loaded)
- MFT files need everything inside the repository to be loaded since they
  reference to other files (.roa, .cer, .crl). These repositories are
  synced once with rsync and many mft may be part of a repo. Also these
  repositories can be synced via rsync or RRDP

To simplify these diverse options it is time to split the code up.
Introduce a ta_lookup() along with repo_lookup(). Refactor the repo_lookup
code into subfunctions repo_alloc() and repo_fetch() (both are also used
by ta_lookup()). Use the caRepository URI to figure out the base URI.
Simplify rsync_uri_parse() into rsync_base_uri() which clips of excess
directories from the URI (else thousends of individual rsync calls would
be made against the RIR's CA repos).

The big change is that the layout of the cache directory is changed.
The cache will now have two base directories:
- ta/ (for all trust anchors)
- rsync/ (for all other repositories)

-- 
:wq Claudio

Index: extern.h
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
retrieving revision 1.42
diff -u -p -r1.42 extern.h
--- extern.h    8 Feb 2021 09:22:53 -0000       1.42
+++ extern.h    8 Feb 2021 13:44:22 -0000
@@ -392,9 +392,7 @@ void                 proc_parser(int) __attribute__((n
 
 /* Rsync-specific. */
 
-int             rsync_uri_parse(const char **, size_t *,
-                       const char **, size_t *, const char **, size_t *,
-                       enum rtype *, const char *);
+char           *rsync_base_uri(const char *);
 void            proc_rsync(char *, char *, int) __attribute__((noreturn));
 
 /* Logging (though really used for OpenSSL errors). */
Index: main.c
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.98
diff -u -p -r1.98 main.c
--- main.c      5 Feb 2021 12:26:52 -0000       1.98
+++ main.c      8 Feb 2021 13:50:20 -0000
@@ -78,11 +78,12 @@
  * An rsync repository.
  */
 struct repo {
-       char            *repo;  /* repository rsync URI */
-       char            *local; /* local path name */
-       char            *notify; /* RRDB notify URI if available */
-       size_t           id; /* identifier (array index) */
-       int              loaded; /* whether loaded or not */
+       char            *repouri;       /* CA repository base URI */
+       char            *local;         /* local path name */
+       char            *uris[2];       /* URIs to fetch from */
+       size_t           id;            /* identifier (array index) */
+       int              uriidx;        /* which URI is fetched */
+       int              loaded;        /* whether loaded or not */
 };
 
 size_t entity_queue;
@@ -284,33 +285,12 @@ entityq_add(struct entityq *q, char *fil
 }
 
 /*
- * Look up a repository, queueing it for discovery if not found.
+ * Allocat a new repository be extending the repotable.
  */
-static const struct repo *
-repo_lookup(const char *uri)
+static struct repo *
+repo_alloc(void)
 {
-       const char      *host, *mod;
-       size_t           hostsz, modsz, i;
-       char            *local;
-       struct repo     *rp;
-       struct ibuf     *b;
-
-       if (!rsync_uri_parse(&host, &hostsz,
-           &mod, &modsz, NULL, NULL, NULL, uri))
-               errx(1, "%s: malformed", uri);
-
-       if (asprintf(&local, "%.*s/%.*s", (int)hostsz, host,
-           (int)modsz, mod) == -1)
-               err(1, "asprintf");
-
-       /* Look up in repository table. */
-
-       for (i = 0; i < rt.reposz; i++) {
-               if (strcmp(rt.repos[i].local, local))
-                       continue;
-               free(local);
-               return &rt.repos[i];
-       }
+       struct repo *rp;
 
        rt.repos = reallocarray(rt.repos,
                rt.reposz + 1, sizeof(struct repo));
@@ -320,28 +300,99 @@ repo_lookup(const char *uri)
        rp = &rt.repos[rt.reposz++];
        memset(rp, 0, sizeof(struct repo));
        rp->id = rt.reposz - 1;
-       rp->local = local;
 
-       if ((rp->repo = strndup(uri, mod + modsz - uri)) == NULL)
-               err(1, "strdup");
+       return rp;
+}
 
-       if (!noop) {
-               if (asprintf(&local, "%s", rp->local) == -1)
-                       err(1, "asprintf");
-               logx("%s: pulling from network", local);
-               if ((b = ibuf_dynamic(256, UINT_MAX)) == NULL)
-                       err(1, NULL);
-               io_simple_buffer(b, &rp->id, sizeof(rp->id));
-               io_str_buffer(b, local);
-               io_str_buffer(b, rp->repo);
-               ibuf_close(&rsyncq, b);
-               free(local);
-       } else {
+static void
+repo_fetch(struct repo *rp)
+{
+       struct ibuf     *b;
+
+       if (noop) {
                rp->loaded = 1;
                logx("%s: using cache", rp->local);
                stats.repos++;
                /* there is nothing in the queue so no need to flush */
+               return;
+       }
+
+       logx("%s: pulling from network", rp->local);
+       if ((b = ibuf_dynamic(256, UINT_MAX)) == NULL)
+               err(1, NULL);
+       io_simple_buffer(b, &rp->id, sizeof(rp->id));
+       io_str_buffer(b, rp->local);
+       io_str_buffer(b, rp->uris[0]);
+       ibuf_close(&rsyncq, b);
+}
+
+/*
+ * Look up a trust anchor, queueing it for download if not found.
+ */
+static const struct repo *
+ta_lookup(const struct tal *tal)
+{
+       struct repo     *rp;
+       char            *local;
+       size_t          i, j;
+
+       if (asprintf(&local, "ta/%s", tal->descr) == -1)
+               err(1, "asprinf");
+
+       /* Look up in repository table. (Lookup should actually fail here) */
+       for (i = 0; i < rt.reposz; i++) {
+               if (rt.repos[i].repouri != NULL ||
+                   strcmp(rt.repos[i].local, local))
+                       continue;
+               free(local);
+               return &rt.repos[i];
        }
+
+       rp = repo_alloc();
+       rp->local = local;
+       for (i = 0, j = 0; i < tal->urisz && j < 2; i++) {
+               if (strncasecmp(tal->uri[i], "rsync://", 8) != 0)
+                       continue;       /* ignore non rsync URI for now */
+               rp->uris[j++] = tal->uri[i];
+       }
+       if (j == 0)
+               errx(1, "TAL file has no rsync:// URI");
+
+       repo_fetch(rp);
+       return rp;
+}
+
+/*
+ * Look up a repository, queueing it for discovery if not found.
+ */
+static const struct repo *
+repo_lookup(const char *uri)
+{
+       char            *local, *repo;
+       struct repo     *rp;
+       size_t           i;
+
+       if ((repo = rsync_base_uri(uri)) == NULL)
+               return NULL;
+
+       /* Look up in repository table. */
+       for (i = 0; i < rt.reposz; i++) {
+               if (rt.repos[i].repouri == NULL ||
+                   strcmp(rt.repos[i].repouri, repo))
+                       continue;
+               free(repo);
+               return &rt.repos[i];
+       }
+
+       rp = repo_alloc();
+       rp->repouri = repo;
+       local = strchr(repo, ':') + strlen("://");
+       if (asprintf(&rp->local, "rsync/%s", local) == -1)
+               err(1, "asprintf");
+       if ((rp->uris[0] = strdup(repo)) == NULL)
+               err(1, "strdup");
+
+       repo_fetch(rp);
        return rp;
 }
 
@@ -353,7 +404,10 @@ repo_filename(const struct repo *repo, c
 {
        char *nfile;
 
-       uri += strlen(repo->repo) + 1;
+       if (strstr(uri, repo->repouri) != uri)
+               errx(1, "%s: URI outside of repository", uri);
+       uri += strlen(repo->repouri) + 1;       /* skip base and '/' */
+
        if (asprintf(&nfile, "%s/%s", repo->local, uri) == -1)
                err(1, "asprintf");
        return nfile;
@@ -484,22 +538,17 @@ queue_add_from_tal(struct entityq *q, co
 {
        char                    *nfile;
        const struct repo       *repo;
-       const char              *uri = NULL;
-       size_t                   i;
+       const char              *uri;
 
        assert(tal->urisz);
 
-       for (i = 0; i < tal->urisz; i++) {
-               uri = tal->uri[i];
-               if (strncasecmp(uri, "rsync://", 8) == 0)
-                       break;
-       }
-       if (uri == NULL)
-               errx(1, "TAL file has no rsync:// URI");
-
        /* Look up the repository. */
-       repo = repo_lookup(uri);
-       nfile = repo_filename(repo, uri);
+       repo = ta_lookup(tal);
+
+       uri = strrchr(repo->uris[0], '/');
+       assert(uri);
+       if (asprintf(&nfile, "%s/%s", repo->local, uri + 1) == -1)
+               err(1, "asprintf");
 
        entityq_add(q, nfile, RTYPE_CER, repo, tal->pkey,
            tal->pkeysz, tal->descr);
@@ -515,6 +564,9 @@ queue_add_from_cert(struct entityq *q, c
        char                    *nfile;
 
        repo = repo_lookup(cert->mft);
+       if (repo == NULL) /* bad repository URI */
+               return;
+
        nfile = repo_filename(repo, cert->mft);
 
        entityq_add(q, nfile, RTYPE_MFT, repo, NULL, 0, NULL);
@@ -1081,8 +1133,10 @@ main(int argc, char *argv[])
 
        /* Memory cleanup. */
        for (i = 0; i < rt.reposz; i++) {
+               free(rt.repos[i].repouri);
                free(rt.repos[i].local);
-               free(rt.repos[i].repo);
+               free(rt.repos[i].uris[0]);
+               free(rt.repos[i].uris[1]);
        }
        free(rt.repos);
 
Index: rsync.c
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/rsync.c,v
retrieving revision 1.16
diff -u -p -r1.16 rsync.c
--- rsync.c     3 Feb 2021 09:29:22 -0000       1.16
+++ rsync.c     8 Feb 2021 13:43:44 -0000
@@ -45,110 +45,50 @@ struct     rsyncproc {
 };
 
 /*
- * Conforms to RFC 5781.
- * Note that "Source" is broken down into the module, path, and also
- * file type relevant to RPKI.
- * Any of the pointers (except "uri") may be NULL.
- * Returns zero on failure, non-zero on success.
+ * Return the base of a rsync URI (rsync://hostname/module). The
+ * caRepository provided by the RIR CAs point deeper than they should
+ * which would result in many rsync calls for almost every subdirectory.
+ * This is inefficent so instead crop the URI to a common base.
+ * The returned string needs to be freed by the caller.
  */
-int
-rsync_uri_parse(const char **hostp, size_t *hostsz,
-    const char **modulep, size_t *modulesz,
-    const char **pathp, size_t *pathsz,
-    enum rtype *rtypep, const char *uri)
+char *
+rsync_base_uri(const char *uri)
 {
-       const char      *host, *module, *path;
-       size_t           sz;
-
-       /* Initialise all output values to NULL or 0. */
-
-       if (hostsz != NULL)
-               *hostsz = 0;
-       if (modulesz != NULL)
-               *modulesz = 0;
-       if (pathsz != NULL)
-               *pathsz = 0;
-       if (hostp != NULL)
-               *hostp = 0;
-       if (modulep != NULL)
-               *modulep = 0;
-       if (pathp != NULL)
-               *pathp = 0;
-       if (rtypep != NULL)
-               *rtypep = RTYPE_EOF;
+       const char *host, *module, *rest;
 
        /* Case-insensitive rsync URI. */
-
        if (strncasecmp(uri, "rsync://", 8)) {
                warnx("%s: not using rsync schema", uri);
-               return 0;
+               return NULL;
        }
 
        /* Parse the non-zero-length hostname. */
-
        host = uri + 8;
 
        if ((module = strchr(host, '/')) == NULL) {
                warnx("%s: missing rsync module", uri);
-               return 0;
+               return NULL;
        } else if (module == host) {
                warnx("%s: zero-length rsync host", uri);
-               return 0;
+               return NULL;
        }
 
-       if (hostp != NULL)
-               *hostp = host;
-       if (hostsz != NULL)
-               *hostsz = module - host;
-
        /* The non-zero-length module follows the hostname. */
-
-       if (module[1] == '\0') {
+       module++;
+       if (*module == '\0') {
                warnx("%s: zero-length rsync module", uri);
-               return 0;
+               return NULL;
        }
 
-       module++;
-
        /* The path component is optional. */
-
-       if ((path = strchr(module, '/')) == NULL) {
-               assert(*module != '\0');
-               if (modulep != NULL)
-                       *modulep = module;
-               if (modulesz != NULL)
-                       *modulesz = strlen(module);
-               return 1;
-       } else if (path == module) {
+       if ((rest = strchr(module, '/')) == NULL) {
+               return strdup(uri);
+       } else if (rest == module) {
                warnx("%s: zero-length module", uri);
-               return 0;
-       }
-
-       if (modulep != NULL)
-               *modulep = module;
-       if (modulesz != NULL)
-               *modulesz = path - module;
-
-       path++;
-       sz = strlen(path);
-
-       if (pathp != NULL)
-               *pathp = path;
-       if (pathsz != NULL)
-               *pathsz = sz;
-
-       if (rtypep != NULL && sz > 4) {
-               if (strcasecmp(path + sz - 4, ".roa") == 0)
-                       *rtypep = RTYPE_ROA;
-               else if (strcasecmp(path + sz - 4, ".mft") == 0)
-                       *rtypep = RTYPE_MFT;
-               else if (strcasecmp(path + sz - 4, ".cer") == 0)
-                       *rtypep = RTYPE_CER;
-               else if (strcasecmp(path + sz - 4, ".crl") == 0)
-                       *rtypep = RTYPE_CRL;
+               return NULL;
        }
 
-       return 1;
+       return strndup(uri, rest - uri);
 }
 
 static void

Reply via email to