Hi,

this patch allows to mount qemu disk images on the host.

It is based on the Network Block Device protocol and allows qemu-img to
become an NBD server (Yes, Anthony, userspace block device is the right
way to do that... :-P ).

Once you've applied the attached patch to Qemu and build the binaries,
you can use it like that:

# ./qemu-img server -d 1234 etch.qcow2

This starts an NBD server on port 1234. This server will expose
the disk image etch.qcow2. "-d" means it will be daemonize and will run
in background.

Then you need to connect the block device to the server:

# nbd-client localhost 1234 /dev/nbd0
Negotiation: ..size = 4194304KB
bs=1024, sz=4194304

This will link etch.qcow2 to /dev/nbd0.

Then to see partitions, you can use kpartx, as explained Daniel, or my
patched loop modules (I can send an updated and bug free version).
...
# kpartx -a /dev/nbd0
...
or
...
# rmmod loop
# insmod drivers/block/loop.ko max_part=64
# losetup -f /dev/nbd0
...
# mount /dev/loop0p1 /mnt
# ls /mnt
bench  cdrom    etc     initrd.img  media  proc  selinux  tmp  vmlinuz
bin    clients  home    lib         mnt    root  srv      usr
boot   dev      initrd  lost+found  opt    sbin  sys      var
# cd
# umount /mnt
# losetup -d  /dev/loop0
# nbd-client -d /dev/nbd0

TODO: security/host client checking, device lock...

As usual all comments are welcome,
have fun,
Laurent
-- 
----------------- [EMAIL PROTECTED]  ------------------
  "La perfection est atteinte non quand il ne reste rien à
ajouter mais quand il ne reste rien à enlever." Saint Exupéry
---
 qemu-img.c |  348 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 348 insertions(+)

Index: qemu/qemu-img.c
===================================================================
--- qemu.orig/qemu-img.c	2008-01-25 13:09:10.000000000 +0100
+++ qemu/qemu-img.c	2008-01-25 13:16:49.000000000 +0100
@@ -25,6 +25,12 @@
 #include "block_int.h"
 #include <assert.h>
 
+#ifdef __linux__
+#include <arpa/inet.h>
+#include <netinet/tcp.h>
+#include <sys/wait.h>
+#endif /* __linux__ */
+
 #ifdef _WIN32
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
@@ -92,6 +98,9 @@ static void help(void)
            "  commit [-f fmt] filename\n"
            "  convert [-c] [-e] [-6] [-f fmt] filename [filename2 [...]] [-O output_fmt] output_filename\n"
            "  info [-f fmt] filename\n"
+#ifdef __linux__
+           "  server [-d] [-f fmt] port filename\n"
+#endif
            "\n"
            "Command parameters:\n"
            "  'filename' is a disk image filename\n"
@@ -105,6 +114,9 @@ static void help(void)
            "  '-c' indicates that target image must be compressed (qcow format only)\n"
            "  '-e' indicates that the target image must be encrypted (qcow format only)\n"
            "  '-6' indicates that the target image must use compatibility level 6 (vmdk format only)\n"
+#ifdef __linux__
+           "  '-d' daemonize (server only)\n"
+#endif
            );
     printf("\nSupported format:");
     bdrv_iterate_format(format_print, NULL);
@@ -602,6 +614,338 @@ static int img_convert(int argc, char **
     return 0;
 }
 
+#ifdef __linux__
+
+//#define DEBUG_SERVER
+
+#ifdef DEBUG_SERVER
+#define DPRINTF(fmt, args...) \
+do { printf("img-server: " fmt , ##args); } while (0)
+#else
+#define DPRINTF(fmt, args...) do {} while(0)
+#endif
+
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define htonll(x) (x)
+# define ntohll(x) (x)
+#else
+# define htonll(x) __bswap_64(x)
+# define ntohll(x) __bswap_64(x)
+#endif
+
+#define BUFSIZE (1024*1024)
+
+#define INIT_PASSWD "NBDMAGIC"
+
+#define NBD_REQUEST_MAGIC 0x25609513
+#define NBD_REPLY_MAGIC 0x67446698
+
+enum {
+    NBD_CMD_READ = 0,
+    NBD_CMD_WRITE = 1,
+    NBD_CMD_DISC = 2
+};
+
+struct nbd_request {
+    uint32_t magic;
+    uint32_t type;
+    char handle[8];
+    uint64_t from;
+    uint32_t len;
+} __attribute__ ((packed));
+
+struct nbd_reply {
+    uint32_t magic;
+    uint32_t error;
+    char handle[8];
+} __attribute__ ((packed));
+
+static void sigchld_handler(int s)
+{
+    int status;
+    pid_t pid;
+
+    pid = waitpid(-1, &status, WNOHANG);
+    if (WIFEXITED(status)) {
+        DPRINTF("child %d exited\n", pid);
+    }
+}
+
+static int nbd_receive(int fd, char *buf, size_t len)
+{
+    ssize_t rd;
+
+    while (len > 0) {
+        rd = read(fd, buf, len);
+        if (rd < 0)
+            return -errno;
+        len -= rd;
+        buf += rd;
+    }
+    return 0;
+}
+
+static int nbd_send(int fd, char *buf, size_t len)
+{
+    ssize_t written;
+
+    while (len > 0) {
+        written = write(fd, buf, len);
+        if (written < 0)
+            return -errno;
+        len -= written;
+        buf += written;
+    }
+    return 0;
+}
+
+static int negotiate(int sock, uint64_t size)
+{
+    char zeros[128];
+    uint64_t magic = htonll(0x00420281861253ULL);
+    int ret;
+
+    DPRINTF("negotiate(%d,%ld)\n", sock, size);
+
+    memset(zeros, 0, sizeof(zeros));
+
+    ret = write(sock, INIT_PASSWD, 8);
+    if (ret != 8)
+        return -1;
+    ret = nbd_send(sock, (char*)&magic, sizeof(magic));
+    if (ret < 0)
+        return -1;
+    size = htonll(size);
+    ret = nbd_send(sock, (char*)&size, sizeof(size));
+    if (ret < 0)
+        return -1;
+    ret = nbd_send(sock, zeros, sizeof(zeros));
+    if (ret < 0)
+        return -1;
+
+    return 0;
+}
+
+static void client_loop(BlockDriverState *drv, int net)
+{
+    struct nbd_request request;
+    struct nbd_reply reply;
+    uint64_t total_sectors;
+    char buf[BUFSIZE];
+    int ret;
+
+    DPRINTF("client_loop(%p,%d)\n", drv, net);
+    int size = 1;
+    ret = setsockopt(net, IPPROTO_TCP, TCP_NODELAY, &size, sizeof(int));
+    if (ret < 0) {
+        DPRINTF("setsockopt failed %d\n", errno);
+        return;
+    }
+
+    bdrv_get_geometry(drv, &total_sectors);
+    if (negotiate(net, total_sectors * 512) < 0) {
+        DPRINTF("negotiate failed %d\n", errno);
+        return;
+    }
+
+    while(1) {
+        uint32_t len;
+        uint64_t from;
+
+        ret = nbd_receive(net, (char*)&request, sizeof(request));
+        if (ret < 0) {
+            DPRINTF("read failed %d (%d)\n", ret, errno);
+            break;
+        }
+
+        DPRINTF("request magic %x type %d from %lx len %x\n",
+                ntohl(request.magic), ntohl(request.type),
+                ntohll(request.from), ntohl(request.len));
+
+        if (request.magic != htonl(NBD_REQUEST_MAGIC)) {
+            DPRINTF("Bad Magic\n");
+            return;
+        }
+
+        if (request.type == htonl(NBD_CMD_DISC)) {
+            /* disconnect */
+            DPRINTF("Command Disconnect\n");
+            break;
+        }
+
+        len = ntohl(request.len);
+        if (len > BUFSIZE - sizeof(struct nbd_reply)) {
+            DPRINTF("len too big %d\n", len);
+            break;
+        }
+
+        /* prepare reply */
+
+        reply.magic = htonl(NBD_REPLY_MAGIC);
+        reply.error = 0;
+        memcpy(reply.handle, request.handle, sizeof(reply.handle));
+
+        /* do I/O */
+
+        from = ntohll(request.from);
+
+        switch(ntohl(request.type)) {
+        case NBD_CMD_READ:
+            reply.error = bdrv_read(drv, from >> 9,
+                                    buf + sizeof(struct nbd_reply), len >> 9);
+            if (reply.error != 0) {
+                DPRINTF("bdrv_read error %d\n", reply.error);
+            }
+            memcpy(buf, &reply, sizeof(struct nbd_reply));
+            ret = nbd_send(net, buf, len + sizeof(struct nbd_reply));
+            if (ret < 0) {
+                DPRINTF("NBD_CMD_READ: cannot sent result\n");
+                return;
+            }
+            break;
+        case NBD_CMD_WRITE:
+            ret = nbd_receive(net, buf, len);
+            if (ret < 0) {
+                DPRINTF("NBD_CMD_WRITE: cannot receive block %d != %d\n", ret, len);
+                return;
+            }
+            reply.error = bdrv_write(drv, from >> 9, buf, len >> 9);
+            if (reply.error != 0) {
+                DPRINTF("bdrv_write error %d\n", reply.error);
+            }
+            ret = nbd_send(net, (char*)&reply, sizeof(reply));
+            if (ret < 0) {
+                DPRINTF("NBD_CMD_WRITE: cannot sent result %d != %d\n", ret, len);
+                return;
+            }
+            break;
+        }
+    }
+}
+
+void server_loop(BlockDriverState *drv, int port)
+{
+    struct sockaddr_in addrin;
+    int addrinlen = sizeof(addrin);
+    int sock;
+    int ret;
+    int yes;
+    fd_set read_fds;
+    struct sigaction sa;
+
+    sa.sa_handler = sigchld_handler;
+    sigemptyset(&sa.sa_mask);
+    sa.sa_flags = SA_RESTART;
+    if(sigaction(SIGCHLD, &sa, NULL) == -1)
+        error("Failed to catch SIGCHLD\n");
+
+    sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+    if (sock < 0)
+        error("error while creating socket");
+
+    yes = 1;
+
+    ret = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(int));
+    if (ret == -1)
+        error("setsockopt SO_REUSEADDR");
+
+    ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, &yes, sizeof(int));
+    if (ret == -1)
+        error("setsockopt SO_KEEPALIVE");
+
+    addrin.sin_family = AF_INET;
+    addrin.sin_port = htons(port);
+    addrin.sin_addr.s_addr = 0;
+
+    ret = bind(sock, (struct sockaddr *) &addrin, addrinlen);
+    if (ret < 0)
+            error("error bind");
+
+    ret = listen(sock, 1);
+    if (ret < 0)
+            error("error listen");
+
+    FD_ZERO(&read_fds);
+    FD_SET(sock, &read_fds);
+
+    /* server main loop */
+
+    while(1) {
+        DPRINTF("server select\n");
+        ret = select(sock + 1, &read_fds, NULL, NULL, NULL);
+        if (ret <= 0)
+            continue;
+
+        if (FD_ISSET(sock, &read_fds)) {
+             int net;
+             pid_t pid;
+
+             /* accept connection */
+
+            net = accept(sock, &addrin, &addrinlen);
+            if (net < 0)
+                continue;
+
+            pid = fork();
+            if (pid == 0) {
+                client_loop(drv, net);
+                return;
+            }
+            close(net);
+        }
+    }
+}
+
+static int img_server(int argc, char **argv)
+{
+    int c;
+    const char *fmt;
+    BlockDriverState *drv;
+    int port;
+    char *filename;
+    int daemonize = 0;
+
+    fmt = NULL;
+    for(;;) {
+        c = getopt(argc, argv, "f:hd");
+        if (c == -1)
+            break;
+        switch(c) {
+        case 'h':
+            help();
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 'd':
+            daemonize = 1;
+            break;
+        }
+    }
+
+    if (argc - optind < 2)
+        help();
+
+    port = strtol(argv[optind], NULL, 0);
+    filename = argv[optind + 1];
+
+    drv = bdrv_new_open(filename, fmt);
+    if (!drv)
+        error("Could not open '%s'", filename);
+
+#ifndef DEBUG_SERVER
+    if (daemonize)
+        daemon(0, 0);
+#endif
+
+    server_loop(drv, port);
+
+    bdrv_delete(drv);
+
+    return 0;
+}
+#endif /* __linux__ */
+
 #ifdef _WIN32
 static int64_t get_allocated_file_size(const char *filename)
 {
@@ -746,6 +1090,10 @@ int main(int argc, char **argv)
         img_convert(argc, argv);
     } else if (!strcmp(cmd, "info")) {
         img_info(argc, argv);
+#ifdef __linux__
+    } else if (!strcmp(cmd, "server")) {
+        img_server(argc, argv);
+#endif /* __linux__ */
     } else {
         help();
     }

Reply via email to