Hi,

just to keep this thread alive, here are few comments on the progress.
(Mostly for those not following QEMU mailing lists.)


On Fri, 9 Sep 2016 15:18:02 +0300
Roman Kagan <[email protected]> wrote:

> On Fri, Sep 09, 2016 at 03:08:31PM +0300, Roman Kagan wrote:
> > On Fri, Sep 09, 2016 at 01:03:49PM +0200, Tomáš Golembiovský wrote:
> > I think something like this can be done for your usecase, e.g. you can
> > create qcow2 images with images from the archive as backing files.  One
> > possibility to do so is to have a loopback block device on top of the
> > archive with appropriate offset and length

Using the loopback device would be the easiest solution in the sense
that it wouldn't require code changes outside virt-v2v. But, there's a
big issue: it requires root privileges. That's why I cast this idea away
from the beginning and decided to use that only as the last resort.


Other suggestion was to use qemu-nbd because it already has --offset
argument to set where the disk starts. It turned out that:

a)  The code for --offset is buggy and clients may attempt to read data
    outside the backing file, because qemu-nbd reports wrong size of the
    disk. Such request will inevitably fail.

b)  We also need to be able to specify the size of the disk. It cannot
    be left undefined and limited only by the length of underlying file.
    Security reasons aside, there are also some practical reasons. E.g.
    it turned out that VMDK format stores some footer at the end of the
    disk. If the size of the disk is not set properly the VMDK driver
    fails to find the footer and regards the disk as invalid.


I fixed the issues and sent patches to QEMU [1][2].

Quickly somebody commented [3] that doing this in qemu-nbd is not the
right idea and it should be done in the raw driver...

> Alternatively, you can make one layer less if you teach QEMU raw block
> driver to recongnize offset and length options.
> 
> Roman.

... that is something what Roman suggested from the start.

It'll be more complicated than fixing qemu-nbd, but I gave it a shot. I
have sent the first version [4] of that to QEMU. I would expect it going
in right away, there's still some work to be done.

That being said, I also have POC ready (attached) for virt-v2v. It's
rather dirty as it contains some 'tar' invocations to get the necessary
information from the archive. There are some OCAML modules for working
with tar archives, but I didn't try any yet.

Good thing is that QEMU people are not against putting the size & offset
parameters into raw driver. Still it may turn out to be a task too hard.
In that case, as a fallback, we can still use the NBD approach. We don't
have to use qemu-nbd. We could also try to submit code for size & offset
into the nbd-server in NBD tools (the defacto reference implementation).
But that's something I didn't pursue yet.


    Tomas

[1] https://lists.nongnu.org/archive/html/qemu-devel/2016-09/msg04554.html
[2] https://lists.nongnu.org/archive/html/qemu-devel/2016-09/msg04556.html
[3] https://lists.nongnu.org/archive/html/qemu-devel/2016-09/msg04578.html
[4] https://lists.nongnu.org/archive/html/qemu-block/2016-10/msg00008.html

-- 
Tomáš Golembiovský <[email protected]>
diff --git a/v2v/input_ova.ml b/v2v/input_ova.ml
index 4f848e2..492b934 100644
--- a/v2v/input_ova.ml
+++ b/v2v/input_ova.ml
@@ -39,11 +39,11 @@ object
 
   method source () =
     (* Extract ova file. *)
-    let exploded =
+    let exploded, partial =
       (* The spec allows a directory to be specified as an ova.  This
        * is also pretty convenient.
        *)
-      if is_directory ova then ova
+      if is_directory ova then ova, false
       else (
         let uncompress_head zcat file =
           let cmd = sprintf "%s %s" zcat (quote file) in
@@ -66,11 +66,18 @@ object
           if run_command cmd <> 0 then
             error (f_"error unpacking %s, see earlier error messages") ova in
 
+        let untar_partial file outdir =
+          let cmd1 = [ "tar"; "-tf" ; file ] in
+          let cmd2 = [ "grep"; "\\.\\(ovf\\|mf\\)$" ] in
+          let cmd3 = [ "xargs"; "tar"; "-xf" ; file; "-C"; outdir ] in
+          if shell_command ((stringify_args cmd1) ^ " | " ^ (stringify_args cmd2) ^ " | " ^ (stringify_args cmd3)) <> 0 then
+            error (f_"error unpacking %s, see earlier error messages") ova in
+
         match detect_file_type ova with
         | `Tar ->
           (* Normal ovas are tar file (not compressed). *)
-          untar ova tmpdir;
-          tmpdir
+          untar_partial ova tmpdir;
+          tmpdir, true
         | `Zip ->
           (* However, although not permitted by the spec, people ship
            * zip files as ova too.
@@ -80,7 +87,7 @@ object
             [ "-j"; "-d"; tmpdir; ova ] in
           if run_command cmd <> 0 then
             error (f_"error unpacking %s, see earlier error messages") ova;
-          tmpdir
+          tmpdir, false
         | (`GZip|`XZ) as format ->
           let zcat, tar_fmt =
             match format with
@@ -93,7 +100,7 @@ object
           (match tmpfiletype with
           | `Tar ->
             untar ~format:tar_fmt ova tmpdir;
-            tmpdir
+            tmpdir, false
           | `Zip | `GZip | `XZ | `Unknown ->
             error (f_"%s: unsupported file format\n\nFormats which we currently understand for '-i ova' are: tar (uncompressed, compress with gzip or xz), zip") ova
           )
@@ -121,6 +128,40 @@ object
       loop [dir]
     in
 
+    (* Find file in [tar] archive and return at which byte it starts and how
+     * long it is.
+     *)
+    let find_file_in_tar tar filename =
+      let cmd1 = [ "tar"; "tRvf"; tar ] in
+      let cmd2 = [ "awk"; sprintf
+        "$8 == \"%s\" {print substr($2, 1, index($2, \":\")-1), $5}"
+        filename ]
+      in
+      let lines =
+        external_command ((stringify_args cmd1) ^ " | " ^ (stringify_args cmd2))
+      in
+      if (List.length lines < 1) then
+        raise Not_found
+      else
+        let soffset, ssize = String.split " " (List.hd lines) in
+        let offset =
+          try int_of_string soffset
+          with Failure "int_of_string" ->
+            error (f_"Invalid offset returned by `tar`: %s") soffset
+        in
+        let size =
+          try int_of_string ssize
+          with Failure "int_of_string" ->
+            error (f_"Invalid size returend by `tar': %s") ssize
+        in
+        (* Note: Offset is actualy block number and there is a single block
+         * with tar header at the beginning of the file. So skip the header and
+         * convert the block number to bytes before returning.
+         *)
+        (offset+1)*512, size
+    in
+
+
     (* Search for the ovf file. *)
     let ovf = find_files exploded ".ovf" in
     let ovf =
@@ -132,6 +173,8 @@ object
         error (f_"more than one .ovf file was found in %s") ova in
 
     (* Read any .mf (manifest) files and verify sha1. *)
+    (* TODO: this won't work once the code is fixed and actualy starts
+     * verifying hashes. *)
     let mf = find_files exploded ".mf" in
     let rex = Str.regexp "SHA1(\\(.*\\))=\\([0-9a-fA-F]+\\)\r?" in
     List.iter (
@@ -264,14 +307,30 @@ object
             | Some s -> s in
 
           (* Does the file exist and is it readable? *)
-          let filename = ovf_folder // filename in
-          Unix.access filename [Unix.R_OK];
+          let filename =
+            if not partial then (
+              let filename = ovf_folder // filename in
+              Unix.access filename [Unix.R_OK];
+              filename
+            )
+            else (
+              let start, offset =
+                try find_file_in_tar ova filename
+                with Not_found ->
+                  error "File '%s' not found in the tar archive" filename
+              in
+              sprintf
+                "json:{ \"file.filename\":\"%s\", \"file.offset\":\"%d\", \"file.size\":\"%d\"}"
+                ova start offset
+            )
+          in
 
           (* The spec allows the file to be gzip-compressed, in which case
            * we must uncompress it into the tmpdir.
            *)
           let filename =
-            if detect_file_type filename = `GZip then (
+            (* TODO: partial + compressed? *)
+            if not partial && (detect_file_type filename = `GZip) then (
               let new_filename = tmpdir // String.random8 () ^ ".vmdk" in
               let cmd =
                 sprintf "zcat %s > %s" (quote filename) (quote new_filename) in
_______________________________________________
Libguestfs mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/libguestfs

Reply via email to