_CreateDisk used to just throw an exception if _CreateBlockDev failed
leaving the caller in the state that some disks were created, without
precise knowledge which. Usually, the clean up then overapproximated
by removing all disks of the instance. This can cause data loss in case
the disk creation failed because of a file being already in the place
where the disk is to be created.

With this patch, upon failure, _CreateDisk removes the disks that were
successfully created, leaving a well-defined state.

Signed-off-by: Klaus Aehlig <[email protected]>
---
 lib/cmdlib.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/lib/cmdlib.py b/lib/cmdlib.py
index bd0919c..00640e2 100644
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -9699,6 +9699,7 @@ def _CreateDisks(lu, instance, to_skip=None, 
target_node=None):
     result.Raise("Failed to create directory '%s' on"
                  " node %s" % (file_storage_dir, pnode))
 
+  disks_created = []
   # Note: this needs to be kept in sync with adding of disks in
   # LUInstanceSetParams
   for idx, device in enumerate(instance.disks):
@@ -9708,7 +9709,19 @@ def _CreateDisks(lu, instance, to_skip=None, 
target_node=None):
     #HARDCODE
     for node in all_nodes:
       f_create = node == pnode
-      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
+      try:
+        _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
+        disks_created.append((node, device))
+      except errors.OpExecError:
+        logging.warning("Creating disk %s for instance '%s' failed",
+                        idx, instance.name)
+        for (node, disk) in disks_created:
+          lu.cfg.SetDiskID(disk, node)
+          result = lu.rpc.call_blockdev_remove(node, disk)
+          if result.fail_msg:
+            logging.warning("Failed to remove newly-created disk %s on node 
%s:"
+                            " %s", device, node, result.fail_msg)
+        raise
 
 
 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
@@ -9716,8 +9729,7 @@ def _RemoveDisks(lu, instance, target_node=None, 
ignore_failures=False):
 
   This abstracts away some work from `AddInstance()` and
   `RemoveInstance()`. Note that in case some of the devices couldn't
-  be removed, the removal will continue with the other ones (compare
-  with `_CreateDisks()`).
+  be removed, the removal will continue with the other ones.
 
   @type lu: L{LogicalUnit}
   @param lu: the logical unit on whose behalf we execute
-- 
1.8.1.3

Reply via email to