Hi Chengyu,

On 2025/8/21 15:32, ChengyuZhu6 wrote:

Thanks for the patch!

The author doesn't seem to be updated, you could

`git commit --amend --author="Chengyu Zhu <[email protected]>"`

or just use `git config` for global update.

This patch adds support for building EROFS filesystems from OCI-compliant

72-char per line.

container registries, enabling users to create EROFS images directly from
container images stored in registries like Docker Hub, Quay.io, etc.

The implementation includes:
- OCI remote backend with registry authentication support
- Manifest parsing for Docker v2 and OCI v1 formats
- Layer extraction and tar processing integration
- Multi-platform image selection capability
- Both anonymous and authenticated registry access
- Comprehensive build system integration

New mkfs.erofs option: --oci=registry/repo:tag[,options]

Supported options:
- platform=os/arch (default: linux/amd64)
- layer=N (extract specific layer, default: all layers)
- anonymous (use anonymous access)

Is it possible to enable anonymous access if username/password
are not specified?

- username/password (basic authentication)

Signed-off-by: Chengyu Zhu <[email protected]>
Signed-off-by: Changzhi Xie <[email protected]>

Your `Signed-off-by` needs to the last line of the commit message
since you sent out the email:

Signed-off-by: Changzhi Xie <[email protected]>
Signed-off-by: Chengyu Zhu <[email protected]>

---

...


diff --git a/lib/liberofs_oci.h b/lib/liberofs_oci.h
new file mode 100644
index 0000000..8ed98d7
--- /dev/null
+++ b/lib/liberofs_oci.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+/*
+ * Copyright (C) 2025 Tencent, Inc.
+ *             http://www.tencent.com/
+ */
+#ifndef __EROFS_OCI_H
+#define __EROFS_OCI_H
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct erofs_inode;
+
+/* OCI authentication modes */
+enum oci_auth_mode {
+       OCI_AUTH_ANONYMOUS,     /* No authentication */
+       OCI_AUTH_TOKEN,         /* Bearer token authentication */
+       OCI_AUTH_BASIC,         /* Basic authentication */
+};
+
+/* Maximum lengths for OCI parameters */
+#define OCI_REGISTRY_LEN               256
+#define OCI_REPOSITORY_LEN             256
+#define OCI_TAG_LEN                    64
+#define OCI_PLATFORM_LEN               32
+#define OCI_USERNAME_LEN               64
+#define OCI_PASSWORD_LEN               256

Can we make those string as dynamic allocated?
Hard-coded strings look dangerous.

+
+/**
+ * struct erofs_oci - OCI configuration
+ * @registry:         registry hostname (e.g., "registry-1.docker.io")
+ * @repository:       image repository (e.g., "library/ubuntu")
+ * @tag:              image tag or digest (e.g., "latest" or sha256:...)
+ * @platform:         target platform in "os/arch" format (e.g., "linux/amd64")
+ * @username:         username for basic authentication
+ * @password:         password for basic authentication
+ * @auth_mode:        authentication mode to use
+ * @layer_index:      specific layer to extract (-1 for all layers)
+ * @anonymous_access: whether to use anonymous access
+ */
+struct erofs_oci {

is it possible to rename it as `struct erofs_oci_params`?

+       char registry[OCI_REGISTRY_LEN + 1];
+       char repository[OCI_REPOSITORY_LEN + 1];
+       char tag[OCI_TAG_LEN + 1];
+       char platform[OCI_PLATFORM_LEN + 1];
+       char username[OCI_USERNAME_LEN + 1];
+       char password[OCI_PASSWORD_LEN + 1];
+       
+       enum oci_auth_mode auth_mode;

What's the difference between
 OCI_AUTH_ANONYMOUS
and
 anonymous_access?

+       int layer_index;
+       bool anonymous_access;
+};
+
+/**
+ * ocierofs_build_trees - Build file trees from OCI container image layers
+ * @root:     root inode to build the file tree under
+ * @oci:      OCI configuration
+ * @ref:      reference string (unused, kept for API compatibility)
+ * @fillzero: if true, only create inodes without downloading actual data
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+int ocierofs_build_trees(struct erofs_inode *root, struct erofs_oci *oci,
+                        const char *ref, bool fillzero);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __EROFS_OCI_H */
diff --git a/lib/remotes/oci.c b/lib/remotes/oci.c
new file mode 100644
index 0000000..e86af50
--- /dev/null
+++ b/lib/remotes/oci.c
@@ -0,0 +1,665 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+/*
+ * Copyright (C) 2025 Tencent, Inc.
+ *             http://www.tencent.com/
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <curl/curl.h>
+#include <json-c/json.h>
+#include "erofs/internal.h"
+#include "erofs/print.h"
+#include "erofs/inode.h"
+#include "erofs/blobchunk.h"
+#include "erofs/diskbuf.h"
+#include "erofs/rebuild.h"
+#include "erofs/tar.h"
+#include "liberofs_oci.h"
+
+/* Constants */
+#define OCI_URL_MAX_LEN                        8192
+#define OCI_AUTH_HEADER_MAX_LEN                1024
+#define OCI_TEMP_FILENAME_MAX_LEN      256
+
+/* Media types */
+#define DOCKER_MEDIATYPE_MANIFEST_V2   
"application/vnd.docker.distribution.manifest.v2+json"
+#define DOCKER_MEDIATYPE_MANIFEST_V1   
"application/vnd.docker.distribution.manifest.v1+json"
+#define DOCKER_MEDIATYPE_MANIFEST_LIST 
"application/vnd.docker.distribution.manifest.list.v2+json"
+#define OCI_MEDIATYPE_MANIFEST         
"application/vnd.oci.image.manifest.v1+json"
+#define OCI_MEDIATYPE_INDEX            
"application/vnd.oci.image.index.v1+json"
+
+/* Registry constants */
+#define DOCKER_REGISTRY                        "docker.io"
+#define DOCKER_API_REGISTRY            "registry-1.docker.io"
+#define QUAY_REGISTRY                  "quay.io"
+
+/* Global CURL handle */
+static CURL *g_curl;

and then make

struct erofs_oci {
        CURL *curl;
        struct erofs_oci_params *params;
};

As a OCI context (it can be used for multiple instance usage
such as runtime lazy loading.)

+> +/* HTTP request/response structures */
+struct oci_request {
+       char url[OCI_URL_MAX_LEN];
+       struct curl_slist *headers;
+};

struct erofs_oci_request {
};

+
+struct oci_response {

erofs_oci_response...

+       char *data;
+       size_t size;
+       long http_code;
+};
+
+/* Layer information */

The comment can be droped since it's obvious.

+struct oci_layer {

erofs_oci_layer {

+       char *digest;
+       u64 size;
+       const char *media_type;
+};
+
+/* Layer streaming context */
+struct oci_stream {

Same here.

+       struct erofs_tarfile tarfile;

        what's the purpose of `struct erofs_tarfile`?

+       FILE *temp_file;
+       char temp_filename[OCI_TEMP_FILENAME_MAX_LEN];
+       int layer_index;
+};
+
+/* Callback for writing response data to memory */
+static size_t oci_write_callback(void *contents, size_t size, size_t nmemb, 
void *userp)
+{
+       size_t realsize = size * nmemb;
+       struct oci_response *resp = userp;
+       char *ptr;
+
+       ptr = realloc(resp->data, resp->size + realsize + 1);
+       if (!ptr)
+               return 0;
+
+       resp->data = ptr;
+       memcpy(&resp->data[resp->size], contents, realsize);
+       resp->size += realsize;
+       resp->data[resp->size] = '\0';
+       return realsize;
+}
+
+/* Callback for writing layer data to file */
+static size_t oci_layer_write_callback(void *contents, size_t size, size_t 
nmemb, void *userp)
+{
+       struct oci_stream *stream = userp;
+       size_t realsize = size * nmemb;
+       
+       if (!stream->temp_file)
+               return 0;
+               
+       if (fwrite(contents, 1, realsize, stream->temp_file) != realsize) {
+               erofs_err("failed to write layer data for layer %d", 
stream->layer_index);
+               return 0;
+       }
+       
+       return realsize;
+}
+
+/* Perform HTTP request */
+static int oci_request_perform(struct oci_request *req, struct oci_response 
*resp)
+{
+       CURLcode res;
+       
+       erofs_dbg("requesting URL: %s", req->url);
+       
+       curl_easy_setopt(g_curl, CURLOPT_URL, req->url);
+       curl_easy_setopt(g_curl, CURLOPT_WRITEDATA, resp);
+       curl_easy_setopt(g_curl, CURLOPT_WRITEFUNCTION, oci_write_callback);
+       
+       if (req->headers)
+               curl_easy_setopt(g_curl, CURLOPT_HTTPHEADER, req->headers);
+
+       res = curl_easy_perform(g_curl);
+       if (res != CURLE_OK) {
+               erofs_err("curl request failed: %s", curl_easy_strerror(res));
+               return -EIO;
+       }
+
+       res = curl_easy_getinfo(g_curl, CURLINFO_RESPONSE_CODE, 
&resp->http_code);
+       if (res != CURLE_OK) {
+               erofs_err("failed to get HTTP response code: %s", 
curl_easy_strerror(res));
+               return -EIO;
+       }
+
+       if (resp->http_code < 200 || resp->http_code >= 300) {
+               erofs_err("HTTP request failed with code %ld", resp->http_code);
+               return -EIO;
+       }
+
+       return 0;
+}
+
+/* Get authentication token */
+static char *oci_get_auth_token(const char *registry, const char *repository,
+                               const char *username, const char *password)
+{
+       struct oci_request req = {};
+       struct oci_response resp = {};
+       json_object *root, *token_obj;
+       const char *token;
+       char *auth_header = NULL;
+       int ret;
+
+       if (!registry || !repository)
+               return ERR_PTR(-EINVAL);
+
+       if (!strcmp(registry, DOCKER_API_REGISTRY) || !strcmp(registry, 
DOCKER_REGISTRY)) {
+               snprintf(req.url, sizeof(req.url),
+                        
"https://auth.docker.io/token?service=registry.docker.io&scope=repository:%s:pull";,
+                        repository);
+       } else if (!strcmp(registry, QUAY_REGISTRY)) {
+               snprintf(req.url, sizeof(req.url),
+                        
"https://%s/v2/auth?service=%s&scope=repository:%s:pull";,
+                        QUAY_REGISTRY, QUAY_REGISTRY, repository);
+       } else {
+               snprintf(req.url, sizeof(req.url),
+                        "https://%s/token?service=%s&scope=repository:%s:pull";,
+                        registry, registry, repository);
+       }

You could use asprintf to allocate dynamic strings...


...

                case 'V':
                        version();
@@ -1638,7 +1775,8 @@ int main(int argc, char **argv)
                erofs_uuid_generate(g_sbi.uuid);
if ((source_mode == EROFS_MKFS_SOURCE_TAR && !erofstar.index_mode) ||
-           (source_mode == EROFS_MKFS_SOURCE_S3)) {
+           (source_mode == EROFS_MKFS_SOURCE_S3) ||
+           (source_mode == EROFS_MKFS_SOURCE_OCI)) {
                err = erofs_diskbuf_init(1);
                if (err) {
                        erofs_err("failed to initialize diskbuf: %s",
@@ -1756,12 +1894,27 @@ int main(int argc, char **argv)
                                        dataimport_mode == 
EROFS_MKFS_DATA_IMPORT_ZEROFILL);
                        if (err)
                                goto exit;
+#endif
+#ifdef OCIEROFS_ENABLED
+               } else if (source_mode == EROFS_MKFS_SOURCE_OCI) {
+                       if (incremental_mode ||
+                           dataimport_mode == EROFS_MKFS_DATA_IMPORT_RVSP)
+                               err = -EOPNOTSUPP;
+                       else
+                               err = ocierofs_build_trees(root, &ocicfg,
+                                                          cfg.c_src_path,
+                                       dataimport_mode == 
EROFS_MKFS_DATA_IMPORT_ZEROFILL);
+                       if (err)
+                               goto exit;
+                       erofs_info("OCI build_trees completed, starting filesystem 
construction");
  #endif
                }
+ erofs_info("Starting erofs_rebuild_dump_tree...");

The debugging info can be removed..

                err = erofs_rebuild_dump_tree(root, incremental_mode);
                if (err)
                        goto exit;
+               erofs_info("erofs_rebuild_dump_tree completed");

Same here.

Thanks,
Gao Xiang

Reply via email to