Patch subject is complete summary.

 ocaml/xapi/static_vdis.ml      |  104 +++++++++++++++++-------------
 ocaml/xapi/static_vdis_list.ml |    2 +-
 ocaml/xapi/xapi_ha.ml          |  113 ++++++++++++++++++---------------
 ocaml/xapi/xha_metadata_vdi.ml |    6 +
 ocaml/xapi/xha_statefile.ml    |  136 +++++++++++++++++++++-------------------
 5 files changed, 197 insertions(+), 164 deletions(-)


# HG changeset patch
# User Mike McClurg <[email protected]>
# Date 1292518952 0
# Node ID 6d0cdb76d845fc9d0f2d3a30521a7afbde2936bc
# Parent  0e8d1cf6b4048cd0fc4c0154bfc27d318dfc7b98
imported patch CA-48539-XAPI_call_vdi_deactivate_during_pool-ha-disable

diff --git a/ocaml/xapi/static_vdis.ml b/ocaml/xapi/static_vdis.ml
--- a/ocaml/xapi/static_vdis.ml
+++ b/ocaml/xapi/static_vdis.ml
@@ -25,56 +25,68 @@
 let static_vdis = "/opt/xensource/bin/static-vdis"
 
 (** Generate the static configuration and attach the VDI now *)
-let permanent_vdi_attach ~__context ~vdi ~reason = 
-  info "permanent_vdi_attach: vdi = %s; sr = %s" 
-    (Ref.string_of vdi) (Ref.string_of (Db.VDI.get_SR ~__context ~self:vdi));
-  Helpers.call_script static_vdis
-    [ "add"; Db.VDI.get_uuid ~__context ~self:vdi; reason ];
-  (* VDI will be attached on next boot; attach it now too *)
-  String.rtrim (Helpers.call_script static_vdis
-		  [ "attach"; Db.VDI.get_uuid ~__context ~self:vdi ])
+let permanent_vdi_attach ~__context ~vdi ~reason =
+	info "permanent_vdi_attach: vdi = %s; sr = %s"
+		(Ref.string_of vdi) (Ref.string_of (Db.VDI.get_SR ~__context ~self:vdi));
+	ignore (Helpers.call_script static_vdis [ "add"; Db.VDI.get_uuid ~__context ~self:vdi; reason ]);
+	(* VDI will be attached on next boot; attach it now too *)
+	String.rtrim (Helpers.call_script static_vdis
+		[ "attach"; Db.VDI.get_uuid ~__context ~self:vdi ])
 
- (** Detach the VDI (by reference) now and destroy the static configuration *)
-let permanent_vdi_detach ~__context ~vdi = 
-  info "permanent_vdi_detach: vdi = %s; sr = %s" 
-    (Ref.string_of vdi) (Ref.string_of (Db.VDI.get_SR ~__context ~self:vdi));  
-  Sm.call_sm_vdi_functions ~__context ~vdi 
-    (fun srconf srtype sr -> Sm.vdi_detach srconf srtype sr vdi);
-  ignore(Helpers.call_script static_vdis
-           [ "del"; Db.VDI.get_uuid ~__context ~self:vdi ])
+(** Detach the VDI (by reference) now and destroy the static configuration *)
+let permanent_vdi_detach ~__context ~vdi =
+	info "permanent_vdi_detach: vdi = %s; sr = %s"
+		(Ref.string_of vdi) (Ref.string_of (Db.VDI.get_SR ~__context ~self:vdi));
+	Sm.call_sm_vdi_functions ~__context ~vdi
+		(fun srconf srtype sr -> Sm.vdi_detach srconf srtype sr vdi);
+	ignore(Helpers.call_script static_vdis
+		[ "del"; Db.VDI.get_uuid ~__context ~self:vdi ])
 
 (** Detach the VDI (by uuid) now and destroy the static configuration *)
-let permanent_vdi_detach_by_uuid ~__context ~uuid = 
-  info "permanent_vdi_detach: vdi-uuid = %s" uuid;
-  begin
-    try
-      (* This might fail because the VDI has been destroyed *)
-      let vdi = Db.VDI.get_by_uuid ~__context ~uuid in
-      Sm.call_sm_vdi_functions ~__context ~vdi 
-	(fun srconf srtype sr -> Sm.vdi_detach srconf srtype sr vdi)
-    with e ->
-      warn "Ignoring exception calling SM vdi_detach for VDI uuid %s: %s (possibly VDI has been deleted while we were offline" uuid (ExnHelper.string_of_exn e)
-  end;
-  ignore(Helpers.call_script static_vdis [ "del"; uuid ])
+let permanent_vdi_detach_by_uuid ~__context ~uuid =
+	info "permanent_vdi_detach: vdi-uuid = %s" uuid;
+	begin
+		try
+			(* This might fail because the VDI has been destroyed *)
+			let vdi = Db.VDI.get_by_uuid ~__context ~uuid in
+			Sm.call_sm_vdi_functions ~__context ~vdi
+				(fun srconf srtype sr -> Sm.vdi_detach srconf srtype sr vdi)
+		with e ->
+			warn "Ignoring exception calling SM vdi_detach for VDI uuid %s: %s (possibly VDI has been deleted while we were offline" uuid (ExnHelper.string_of_exn e)
+	end;
+	ignore(Helpers.call_script static_vdis [ "del"; uuid ])
+
+(** Added for CA-48539. Deactivates a vdi. You should probably follow
+	this call with one of the previous vdi_detach functions. *)
+let permanent_vdi_deactivate_by_uuid ~__context ~uuid =
+	info "permanent_vdi_detach: vdi-uuid = %s" uuid ;
+	try
+		let vdi = Db.VDI.get_by_uuid ~__context ~uuid in
+		Sm.call_sm_vdi_functions ~__context ~vdi
+			(fun srconf srtype sr -> Sm.vdi_deactivate srconf srtype sr vdi)
+	with e ->
+		warn "Ignoring exception calling SM vdi_deactivate for VDI uuid %s: %s (possibly VDI has been deleted while we were offline"
+			uuid
+			(ExnHelper.string_of_exn e)
 
 (** Detaches and removes records for VDIs which have been deleted *)
-let gc () = 
-  Server_helpers.exec_with_new_task "GCing on-boot VDIs" (fun __context ->
-  List.iter
-    (fun vdi ->
-       let exists = try ignore(Db.VDI.get_by_uuid ~__context ~uuid:vdi.uuid); true with _ -> false in
-       if not(exists) then begin
-	 warn "static-vdi %s cannot be found in database; removing on-boot configuration" vdi.uuid;
-	 (* NB we can't call the SM functions since the record has gone *)
-	 ignore(Helpers.call_script static_vdis [ "del"; vdi.uuid ])
-       end
-    ) (list ()))
+let gc () =
+	Server_helpers.exec_with_new_task "GCing on-boot VDIs" (fun __context ->
+		List.iter
+			(fun vdi ->
+				let exists = try ignore(Db.VDI.get_by_uuid ~__context ~uuid:vdi.uuid); true with _ -> false in
+				if not(exists) then begin
+					warn "static-vdi %s cannot be found in database; removing on-boot configuration" vdi.uuid;
+					(* NB we can't call the SM functions since the record has gone *)
+					ignore(Helpers.call_script static_vdis [ "del"; vdi.uuid ])
+				end
+			) (list ()))
 
 (** If we just rebooted and failed to attach our static VDIs then this can be called to reattempt the attach:
-    this is necessary for HA to start. *)
-let reattempt_on_boot_attach () = 
-  let script = "/etc/init.d/attach-static-vdis" in
-  try
-    ignore(Helpers.call_script script [ "start" ])
-  with e ->
-    warn "Attempt to reattach static VDIs via '%s start' failed: %s" script (ExnHelper.string_of_exn e)
+	this is necessary for HA to start. *)
+let reattempt_on_boot_attach () =
+	let script = "/etc/init.d/attach-static-vdis" in
+	try
+		ignore(Helpers.call_script script [ "start" ])
+	with e ->
+		warn "Attempt to reattach static VDIs via '%s start' failed: %s" script (ExnHelper.string_of_exn e)
diff --git a/ocaml/xapi/static_vdis_list.ml b/ocaml/xapi/static_vdis_list.ml
--- a/ocaml/xapi/static_vdis_list.ml
+++ b/ocaml/xapi/static_vdis_list.ml
@@ -33,7 +33,7 @@
 	      let path = Filename.concat main_dir x in
 	      let uuid = Unixext.string_of_file (Filename.concat path "vdi-uuid") in
 	      let reason = Unixext.string_of_file (Filename.concat path "reason") in
-	      let bool_of_string x = String.lowercase x = "true" in
+	      (* let bool_of_string x = String.lowercase x = "true" in *)
 	      let delete_next_boot = 
 		try ignore(Unix.stat (Filename.concat path "delete-next-boot")); true 
 		with _ -> false in
diff --git a/ocaml/xapi/xapi_ha.ml b/ocaml/xapi/xapi_ha.ml
--- a/ocaml/xapi/xapi_ha.ml
+++ b/ocaml/xapi/xapi_ha.ml
@@ -50,7 +50,7 @@
 let call_script ?log_successful_output script args =
 	try
 		Mutex.execute ha_script_m (fun () -> Helpers.call_script ?log_successful_output script args)
-	with Forkhelpers.Spawn_internal_error(stderr, stdout, Unix.WEXITED n) as e ->
+	with Forkhelpers.Spawn_internal_error(stderr, stdout, Unix.WEXITED n) ->
 		let code = Xha_errno.of_int n in
 		warn "%s %s returned %s (%s)" script (String.concat " " args)
 			(Xha_errno.to_string code) (Xha_errno.to_description_string code);
@@ -97,7 +97,7 @@
 	String_unmarshall_helper.map (fun x -> x) (fun x -> x) v
 
 (** Without using the Pool's database, returns the IP address of a particular host
-    named by UUID. *)
+	named by UUID. *)
 let address_of_host_uuid uuid =
 	let table = get_uuid_to_ip_mapping () in
 	if not(List.mem_assoc uuid table) then begin
@@ -106,8 +106,8 @@
 	end else List.assoc uuid table
 
 (** Without using the Pool's database, returns the UUID of a particular host named by
-    heartbeat IP address. This is only necesary because the liveset info doesn't include
-    the host IP address *)
+	heartbeat IP address. This is only necesary because the liveset info doesn't include
+	the host IP address *)
 let uuid_of_host_address address =
 	let table = List.map (fun (k, v) -> v, k) (get_uuid_to_ip_mapping ()) in
 	if not(List.mem_assoc address table) then begin
@@ -116,13 +116,13 @@
 	end else List.assoc address table
 
 (** Called in two circumstances:
-    1. When I started up I thought I was the master but my proposal was rejected by the
-    heartbeat component.
-    2. I was happily running as someone's slave but they left the liveset.
+	1. When I started up I thought I was the master but my proposal was rejected by the
+	heartbeat component.
+	2. I was happily running as someone's slave but they left the liveset.
  *)
 let on_master_failure () =
 	(* The plan is: keep asking if I should be the master. If I'm rejected then query the
-       live set and see if someone else has been marked as master, if so become a slave of them. *)
+	   live set and see if someone else has been marked as master, if so become a slave of them. *)
 
 	let become_master () =
 		info "This node will become the master";
@@ -391,7 +391,7 @@
 			let process_liveset_on_master liveset =
 				let pool = Helpers.get_pool ~__context in
 				let to_tolerate = Int64.to_int (Db.Pool.get_ha_host_failures_to_tolerate ~__context ~self:pool) in
-				let planned_for = Int64.to_int (Db.Pool.get_ha_plan_exists_for ~__context ~self:pool) in
+				(* let planned_for = Int64.to_int (Db.Pool.get_ha_plan_exists_for ~__context ~self:pool) in *)
 
 				(* First consider whether VM failover actions need to happen.
 				   Convert the liveset into a list of Host references used by the VM failover code *)
@@ -725,13 +725,13 @@
 
 
 (** Called when xapi restarts: server may be in emergency mode at this point. We need
-    to inspect the local configuration and if HA is supposed to be armed we need to
-    set everything up.
-    Note that
-    the master shouldn't be able to activate HA while we are offline since that would cause
-    us to come up with a broken configuration (the enable-HA stage has the critical task of
-    synchronising the HA configuration on all the hosts). So really we only want to notice
-    if the Pool has had HA disabled while we were offline. *)
+	to inspect the local configuration and if HA is supposed to be armed we need to
+	set everything up.
+	Note that
+	the master shouldn't be able to activate HA while we are offline since that would cause
+	us to come up with a broken configuration (the enable-HA stage has the critical task of
+	synchronising the HA configuration on all the hosts). So really we only want to notice
+	if the Pool has had HA disabled while we were offline. *)
 let on_server_restart () =
 	let armed = bool_of_string (Localdb.get Constants.ha_armed) in
 
@@ -760,7 +760,7 @@
 				let (_ : string) = call_script ha_start_daemon [] in
 				finished := true;
 			with
-				| Xha_error Xha_errno.Mtc_exit_daemon_is_present as e ->
+				| Xha_error Xha_errno.Mtc_exit_daemon_is_present ->
 					warn "ha_start_daemon failed with MTC_EXIT_DAEMON_IS_PRESENT: continuing with startup";
 					finished := true;
 				| Xha_error Xha_errno.Mtc_exit_invalid_pool_state as e ->
@@ -826,8 +826,8 @@
 	end
 
 (** Called in the master xapi startup when the database is ready. We set all hosts (including this one) to
-    disabled then signal the monitor thread to look. It can then wait for slaves to turn up
-    before trying to restart VMs. *)
+	disabled then signal the monitor thread to look. It can then wait for slaves to turn up
+	before trying to restart VMs. *)
 let on_database_engine_ready () =
 	info "Setting all hosts to dead and disabled. Hosts must re-enable themselves explicitly";
 	Server_helpers.exec_with_new_task "Setting all hosts to dead and disabled"
@@ -846,7 +846,7 @@
 (* Internal API calls to configure individual hosts                                          *)
 
 (** Internal API call to prevent this node making an unsafe failover decision.
-    This call is idempotent. *)
+	This call is idempotent. *)
 let ha_disable_failover_decisions __context localhost =
 	debug "Disabling failover decisions";
 	(* FIST *)
@@ -857,8 +857,8 @@
 	Localdb.put Constants.ha_disable_failover_decisions "true"
 
 (** Internal API call to disarm localhost.
-    If the daemon is missing then we return success. Either fencing was previously disabled and the
-    daemon has shutdown OR the daemon has died and this node will fence shortly...
+	If the daemon is missing then we return success. Either fencing was previously disabled and the
+	daemon has shutdown OR the daemon has died and this node will fence shortly...
  *)
 let ha_disarm_fencing __context localhost =
 	try
@@ -870,7 +870,7 @@
 	let (_ : string) = call_script ha_set_excluded [] in ()
 
 (** Internal API call to stop the HA daemon.
-    This call is idempotent. *)
+	This call is idempotent. *)
 let ha_stop_daemon __context localhost =
 	Monitor.stop ();
 	let (_ : string) = call_script ha_stop_daemon [] in ()
@@ -898,29 +898,38 @@
 	(* Might not be able to access the database to detach statefiles; however this isn't critical *)
 	()
 
-(** Internal API call to release any HA resources after the system has been shutdown.
-    This call is idempotent. *)
+(** Internal API call to release any HA resources after the system has
+	been shutdown.  This call is idempotent. Modified for CA-48539 to
+	call vdi.deactivate before vdi.detach. *)
 let ha_release_resources __context localhost =
 	Monitor.stop ();
-	(* Detach any statefile VDIs *)
-	let pool = Helpers.get_pool ~__context in
-	List.iter
-		(fun vdi ->
-			let uuid = Db.VDI.get_uuid ~__context ~self:vdi in
-			Helpers.log_exn_continue
-				(Printf.sprintf "detaching statefile VDI uuid: %s" uuid)
-				(fun () -> Static_vdis.permanent_vdi_detach ~__context ~vdi) ()
-		) (List.map Ref.of_string (Db.Pool.get_ha_statefiles ~__context ~self:pool));
 
-	(* Detach any metadata VDIs *)
-	Xha_metadata_vdi.detach_existing ~__context;
+	(* Why aren't we calling Xha_statefile.detach_existing_statefiles?
+	   Does Db.Pool.get_ha_statefiles return a different set of
+	   statefiles than Xha_statefile.list_existing_statefiles? *)
+
+	(* Deactivate and detach all statefile VDIs in the entire pool *)
+	let statefile_vdis = Db.Pool.get_ha_statefiles ~__context ~self:(Helpers.get_pool ~__context)
+	and deactiavte_and_detach_vdi vdi_str =
+		let uuid = Db.VDI.get_uuid ~__context ~self:(Ref.of_string vdi_str) in
+		Helpers.log_exn_continue
+			(Printf.sprintf "detaching statefile VDI uuid: %s" uuid)
+			(fun () ->
+				Static_vdis.permanent_vdi_deactivate_by_uuid ~__context ~uuid ;
+				Static_vdis.permanent_vdi_detach_by_uuid ~__context ~uuid) ()
+	in List.iter deactiavte_and_detach_vdi statefile_vdis ;
+
+	(* Deactivate and detach any metadata VDIs *)
+	Helpers.log_exn_continue
+		(Printf.sprintf "deactivating and detaching metadata VDIs")
+		(fun () -> Xha_metadata_vdi.deactivate_and_detach_existing ~__context) ();
 
 	(* At this point a restart won't enable the HA subsystem *)
 	Localdb.put Constants.ha_armed "false"
 
 (** Internal API call which blocks until this node's xHA daemon spots the invalid statefile
-    and exits cleanly. If the daemon survives but the statefile access is lost then this function
-    will return an exception and the no-statefile shutdown can be attempted.
+	and exits cleanly. If the daemon survives but the statefile access is lost then this function
+	will return an exception and the no-statefile shutdown can be attempted.
  *)
 let ha_wait_for_shutdown_via_statefile __context localhost =
 	try
@@ -978,7 +987,7 @@
 let write_config_file ~__context statevdi_paths generation =
 	let local_heart_beat_interface = Xapi_inventory.lookup Xapi_inventory._management_interface in
 	(* Need to find the name of the physical interface, so xHA can monitor the bonding status (if appropriate).
-       Note that this interface isn't used for sending packets so VLANs don't matter: the physical NIC or bond device is all we need. *)
+	   Note that this interface isn't used for sending packets so VLANs don't matter: the physical NIC or bond device is all we need. *)
 	let localhost = Helpers.get_localhost ~__context in
 	let mgmt_pifs = List.filter (fun self -> Db.PIF.get_management ~__context ~self) (Db.Host.get_PIFs ~__context ~self:localhost) in
 	if mgmt_pifs = [] then failwith (Printf.sprintf "Cannot enable HA on host %s: there is no management interface for heartbeating" (Db.Host.get_hostname ~__context ~self:localhost));
@@ -1031,7 +1040,7 @@
 	Db.Host.set_ha_statefiles ~__context ~self:localhost ~value:(List.map Ref.string_of statevdis);
 
 	(* The master has already attached the statefile VDIs and written the
-       configuration file. *)
+	   configuration file. *)
 	if not(Pool_role.is_master ()) then begin
 		let statefiles = attach_statefiles ~__context statevdis in
 		write_config_file ~__context statefiles generation;
@@ -1053,9 +1062,9 @@
 	info "Local flag ha_armed <- true";
 
 	(* If this host is the current master then it must assert its authority as master;
-       otherwise another host's heartbeat thread might conclude that the master has gone
-       and propose itself. This would lead the xHA notion of master to immediately diverge
-       from the XenAPI notion. *)
+	   otherwise another host's heartbeat thread might conclude that the master has gone
+	   and propose itself. This would lead the xHA notion of master to immediately diverge
+	   from the XenAPI notion. *)
 	if Pool_role.is_master () then begin
 		if not (propose_master ())
 		then failwith "failed to propose the current master as master";
@@ -1189,13 +1198,13 @@
 	redo_log_ha_disabled_during_runtime __context;
 
 	(* Steps from 8.6 Disabling HA
-       If the master has access to the state file (how do we determine this)?
-       * ha_set_pool_state(invalid)
-       If the master hasn't access to the state file but all hosts are available via heartbeat
-       * set the flag "can not be master and no VM failover decision on next boot"
-       * ha_disarm_fencing()
-       * ha_stop_daemon()
-       Otherwise we'll be fenced *)
+	   If the master has access to the state file (how do we determine this)?
+	   * ha_set_pool_state(invalid)
+	   If the master hasn't access to the state file but all hosts are available via heartbeat
+	   * set the flag "can not be master and no VM failover decision on next boot"
+	   * ha_disarm_fencing()
+	   * ha_stop_daemon()
+	   Otherwise we'll be fenced *)
 
 	let hosts = Db.Host.get_all ~__context in
 
@@ -1380,7 +1389,7 @@
 		(List.map (fun (pif,pifr) -> Ref.string_of pif) unplugged_ununpluggable_pifs)));
 
 	(* Check also that any PIFs with IP information set are currently attached - it's a non-fatal
-       error if they are, but we'll warn with a message *)
+	   error if they are, but we'll warn with a message *)
 	let pifs_with_ip_config = List.filter (fun (_,pifr) -> pifr.API.pIF_ip_configuration_mode <> `None) pifs in
 	let not_bond_slaves = List.filter (fun (_,pifr) -> not (Db.is_valid_ref pifr.API.pIF_bond_slave_of)) pifs_with_ip_config in
 	let without_disallow_unplug = List.filter (fun (_,pifr) -> not (pifr.API.pIF_disallow_unplug || pifr.API.pIF_management)) not_bond_slaves in
@@ -1406,7 +1415,7 @@
 		if not alive then raise (Api_errors.Server_error(Api_errors.host_offline, [ Ref.string_of host ]))
 	) (Db.Host.get_all ~__context);
 
-	let set_difference a b = List.filter (fun x -> not(List.mem x b)) a in
+	(* let set_difference a b = List.filter (fun x -> not(List.mem x b)) a in *)
 
 	(* Steps from 8.7 Enabling HA in Marathon spec:
 	 * 1. Bring up state file VDI(s)
diff --git a/ocaml/xapi/xha_metadata_vdi.ml b/ocaml/xapi/xha_metadata_vdi.ml
--- a/ocaml/xapi/xha_metadata_vdi.ml
+++ b/ocaml/xapi/xha_metadata_vdi.ml
@@ -57,6 +57,12 @@
   let vdis = list_existing() in
   List.iter (fun x -> Static_vdis.permanent_vdi_detach_by_uuid ~__context ~uuid:x.Static_vdis.uuid) vdis
 
+(** Added for CA-48539 *)
+let deactivate_and_detach_existing ~__context =
+	let vdi_uuids = List.map (fun vdi -> vdi.Static_vdis.uuid) (list_existing ()) in
+	List.iter (fun vdi_uuid -> Static_vdis.permanent_vdi_deactivate_by_uuid ~__context ~uuid:vdi_uuid) vdi_uuids ;
+	List.iter (fun vdi_uuid -> Static_vdis.permanent_vdi_detach_by_uuid ~__context ~uuid:vdi_uuid) vdi_uuids
+
 open Pervasiveext
 
 (** Attempt to flush the database to the metadata VDI *)
diff --git a/ocaml/xapi/xha_statefile.ml b/ocaml/xapi/xha_statefile.ml
--- a/ocaml/xapi/xha_statefile.ml
+++ b/ocaml/xapi/xha_statefile.ml
@@ -28,84 +28,90 @@
 open Listext
 open Stringext
 
-(** Return the minimum size of an HA statefile, as of 
+(** Return the minimum size of an HA statefile, as of
     XenServer HA state-file description vsn 1.3 *)
-let minimum_size number_of_hosts = 
-  let ( ** ) = Int64.mul
-  and ( ++ ) = Int64.add in
+let minimum_size number_of_hosts =
+	let ( ** ) = Int64.mul
+	and ( ++ ) = Int64.add in
 
-  let global_section_size = 4096L 
-  and host_section_size = 4096L in
-  global_section_size ++ (Int64.of_int number_of_hosts) ** host_section_size
+	let global_section_size = 4096L
+	and host_section_size = 4096L in
+	global_section_size ++ (Int64.of_int number_of_hosts) ** host_section_size
 
 let set_difference a b = List.filter (fun x -> not(List.mem x b)) a
 
-let assert_sr_can_host_statefile ~__context ~sr = 
-  (* Check that each host has a PBD to this SR *)
-  let pbds = Db.SR.get_PBDs ~__context ~self:sr in
-  let connected_hosts = List.setify (List.map (fun self -> Db.PBD.get_host ~__context ~self) pbds) in
-  let all_hosts = Db.Host.get_all ~__context in
-  if List.length connected_hosts < (List.length all_hosts) then begin
-    error "Cannot place statefile in SR %s: some hosts lack a PBD: [ %s ]"
-      (Ref.string_of sr) 
-      (String.concat "; " (List.map Ref.string_of (set_difference all_hosts connected_hosts)));
-    raise (Api_errors.Server_error(Api_errors.sr_no_pbds, [ Ref.string_of sr ]))
-  end;
-  (* Check that each PBD is plugged in *)
-  List.iter (fun self -> 
-	       if not(Db.PBD.get_currently_attached ~__context ~self) then begin
-		 error "Cannot place statefile in SR %s: PBD %s is not plugged"
-		   (Ref.string_of sr) (Ref.string_of self);
-		 (* Same exception is used in this case (see Helpers.assert_pbd_is_plugged) *)
-		 raise (Api_errors.Server_error(Api_errors.sr_no_pbds, [ Ref.string_of sr ]))
-	       end) pbds;
-  (* Check the exported capabilities of the SR's SM plugin *)
-  let srtype = Db.SR.get_type ~__context ~self:sr in
-  if not (List.mem Smint.Vdi_generate_config (Sm.capabilities_of_driver srtype)) 
-  then raise (Api_errors.Server_error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]))
+let assert_sr_can_host_statefile ~__context ~sr =
+	(* Check that each host has a PBD to this SR *)
+	let pbds = Db.SR.get_PBDs ~__context ~self:sr in
+	let connected_hosts = List.setify (List.map (fun self -> Db.PBD.get_host ~__context ~self) pbds) in
+	let all_hosts = Db.Host.get_all ~__context in
+	if List.length connected_hosts < (List.length all_hosts) then begin
+		error "Cannot place statefile in SR %s: some hosts lack a PBD: [ %s ]"
+			(Ref.string_of sr)
+			(String.concat "; " (List.map Ref.string_of (set_difference all_hosts connected_hosts)));
+		raise (Api_errors.Server_error(Api_errors.sr_no_pbds, [ Ref.string_of sr ]))
+	end;
+	(* Check that each PBD is plugged in *)
+	List.iter (fun self ->
+		if not(Db.PBD.get_currently_attached ~__context ~self) then begin
+			error "Cannot place statefile in SR %s: PBD %s is not plugged"
+				(Ref.string_of sr) (Ref.string_of self);
+			(* Same exception is used in this case (see Helpers.assert_pbd_is_plugged) *)
+			raise (Api_errors.Server_error(Api_errors.sr_no_pbds, [ Ref.string_of sr ]))
+		end) pbds;
+	(* Check the exported capabilities of the SR's SM plugin *)
+	let srtype = Db.SR.get_type ~__context ~self:sr in
+	if not (List.mem Smint.Vdi_generate_config (Sm.capabilities_of_driver srtype))
+	then raise (Api_errors.Server_error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]))
 
-let list_srs_which_can_host_statefile ~__context = 
-  List.filter (fun sr -> try assert_sr_can_host_statefile ~__context ~sr; true
-	       with _ -> false) (Db.SR.get_all ~__context)
+let list_srs_which_can_host_statefile ~__context =
+	List.filter (fun sr -> try assert_sr_can_host_statefile ~__context ~sr; true
+	with _ -> false) (Db.SR.get_all ~__context)
 
-let create ~__context ~sr = 
-  assert_sr_can_host_statefile ~__context ~sr;
-  let size = minimum_size (List.length (Db.Host.get_all ~__context)) in
-  Helpers.call_api_functions ~__context
-    (fun rpc session_id ->
-       Client.VDI.create ~rpc ~session_id
-	 ~name_label:"Statefile for HA" 
-	 ~name_description:"Used for storage heartbeating"
-         ~sR:sr ~virtual_size:size ~_type:`ha_statefile
-         ~sharable:true ~read_only:false ~other_config:[] ~xenstore_data:[] ~sm_config:statefile_sm_config ~tags:[]
-    )
+let create ~__context ~sr =
+	assert_sr_can_host_statefile ~__context ~sr;
+	let size = minimum_size (List.length (Db.Host.get_all ~__context)) in
+	Helpers.call_api_functions ~__context
+		(fun rpc session_id ->
+			Client.VDI.create ~rpc ~session_id
+				~name_label:"Statefile for HA"
+				~name_description:"Used for storage heartbeating"
+				~sR:sr ~virtual_size:size ~_type:`ha_statefile
+				~sharable:true ~read_only:false ~other_config:[] ~xenstore_data:[] ~sm_config:statefile_sm_config ~tags:[]
+		)
 
 (** Return a reference to a valid statefile VDI in the given SR.
     This function prefers to reuse existing VDIs to avoid confusing the heartbeat component:
     it expects to see a poisoned VDI but not necessarily a stale or corrupted one. Consider that
     when using LVM-based SRs the VDI could be deleted on the master but the slaves would still
     have access to stale data. *)
-let find_or_create ~__context ~sr = 
-  assert_sr_can_host_statefile ~__context ~sr;
-  let size = minimum_size (List.length (Db.Host.get_all ~__context)) in
-  match
-  List.filter 
-    (fun self -> true
-       && (Db.VDI.get_type ~__context ~self = `ha_statefile)
-       && (Db.VDI.get_virtual_size ~__context ~self >= size))
-    (Db.SR.get_VDIs ~__context ~self:sr) with
-    | x :: _ ->
-	info "re-using existing statefile: %s" (Db.VDI.get_uuid ~__context ~self:x);
-	x
-    | [] ->
-	info "no suitable existing statefile found; creating a fresh one";
-	create ~__context ~sr
+let find_or_create ~__context ~sr =
+	assert_sr_can_host_statefile ~__context ~sr;
+	let size = minimum_size (List.length (Db.Host.get_all ~__context)) in
+	match
+		List.filter
+			(fun self -> true
+				&& (Db.VDI.get_type ~__context ~self = `ha_statefile)
+				&& (Db.VDI.get_virtual_size ~__context ~self >= size))
+			(Db.SR.get_VDIs ~__context ~self:sr) with
+				| x :: _ ->
+					info "re-using existing statefile: %s" (Db.VDI.get_uuid ~__context ~self:x);
+					x
+				| [] ->
+					info "no suitable existing statefile found; creating a fresh one";
+					create ~__context ~sr
 
-let list_existing_statefiles () = 
-  List.filter (fun x -> x.Static_vdis.reason = reason) (Static_vdis.list ()) 
+let list_existing_statefiles () =
+	List.filter (fun x -> x.Static_vdis.reason = reason) (Static_vdis.list ())
 
 (** Detach all statefiles attached with reason 'HA statefile', to clear stale state *)
-let detach_existing_statefiles ~__context = 
-  let statefiles = List.filter (fun x -> x.Static_vdis.reason = reason) (Static_vdis.list ()) in
-  List.iter (fun x -> Static_vdis.permanent_vdi_detach_by_uuid ~__context ~uuid:x.Static_vdis.uuid) statefiles
-    
+let detach_existing_statefiles ~__context =
+	let statefile_uuids = List.map (fun vdi -> vdi.Static_vdis.uuid) (list_existing_statefiles ()) in
+	List.iter (fun uuid -> Static_vdis.permanent_vdi_detach_by_uuid ~__context ~uuid) statefile_uuids
+
+(** Added for CA-48539. Deactivate and detach all statefiles attached
+	with reason 'HA statefile', to clear stale state *)
+let deactivate_and_detach_existing_statefiles ~__context =
+	let statefile_uuids = List.map (fun vdi -> vdi.Static_vdis.uuid) (list_existing_statefiles ()) in
+	List.iter (fun uuid -> Static_vdis.permanent_vdi_deactivate_by_uuid ~__context ~uuid) statefile_uuids ;
+	List.iter (fun uuid -> Static_vdis.permanent_vdi_detach_by_uuid ~__context ~uuid) statefile_uuids
_______________________________________________
xen-api mailing list
[email protected]
http://lists.xensource.com/mailman/listinfo/xen-api

Reply via email to