diff -r a1409baa84e3 ocaml/xapi/dbsync_slave.ml
--- a/ocaml/xapi/dbsync_slave.ml	Mon Dec 21 17:53:29 2009 -0500
+++ b/ocaml/xapi/dbsync_slave.ml	Mon Jan 18 14:21:51 2010 -0500
@@ -621,7 +621,8 @@
        let is_management_pif = Xapi_pif.is_my_management_pif ~__context ~self in
        let was_pif_brought_up_at_start_of_day = List.mem self (List.map fst pifs_brought_up) in
        (* Mark important interfaces as attached *)
-       let mark_as_attached = is_management_pif || was_pif_brought_up_at_start_of_day in
+       let mark_as_attached = is_management_pif || was_pif_brought_up_at_start_of_day || 
+                              (Mtc.is_pif_attached_to_mtc_vms_and_should_not_be_offline ~__context ~self) in
        Db.PIF.set_currently_attached ~__context ~self ~value:mark_as_attached;
        Db.PIF.set_management ~__context ~self ~value:is_management_pif;
        debug "Marking PIF device %s as %s" (Db.PIF.get_device ~__context ~self) (if mark_as_attached then "attached" else "offline")
diff -r a1409baa84e3 ocaml/xapi/message_forwarding.ml
--- a/ocaml/xapi/message_forwarding.ml	Mon Dec 21 17:53:29 2009 -0500
+++ b/ocaml/xapi/message_forwarding.ml	Mon Jan 18 14:21:51 2010 -0500
@@ -2697,8 +2697,10 @@
     (* -------------------------------------------------------------------------- *)
 
 	let set_sharable ~__context ~self ~value =
+              if not (Mtc.is_vdi_accessed_by_protected_VM ~__context ~vdi:self) then begin
 		let sr = Db.VDI.get_SR ~__context ~self in
 		Sm.assert_session_has_internal_sr_access ~__context ~sr;
+              end;
 		Local.VDI.set_sharable ~__context ~self ~value
 
     let set_managed ~__context ~self ~value =
diff -r a1409baa84e3 ocaml/xapi/mtc.ml
--- a/ocaml/xapi/mtc.ml	Mon Dec 21 17:53:29 2009 -0500
+++ b/ocaml/xapi/mtc.ml	Mon Jan 18 14:21:51 2010 -0500
@@ -31,6 +31,16 @@
 module DD=Debug.Debugger(struct let name="MTC:" end)
 open DD
 
+module Internal = struct
+
+let read_one_line file =
+	let inchan = open_in file in
+	try
+		let result = input_line inchan in
+		close_in inchan;
+		result
+	with exn -> close_in inchan; raise exn
+end
 
 
 (* 
@@ -52,6 +62,8 @@
  *)
 let vm_protected_key = "vm_protected"
 let vm_peer_uuid_key = "vm_peer_uuid"
+let mtc_pvm_key = "mtc_pvm"
+let mtc_vdi_share_key = "mtc_vdi_shareable"
 
 (*
 * This function looks at the 'other-config' field in the VM's configuration
@@ -240,7 +252,6 @@
    the foreground phase (ie, domain has been suspended).
    Called only by the source of a migration. *)
 let event_notify_entering_suspend ~__context ~self =
-  if (is_this_vm_protected ~__context ~self) then (
     with_xc_and_xs
       (fun xc xs ->
         let key = (migration_base_path ~xs ~__context ~vm:self) ^ 
@@ -248,13 +259,11 @@
         debug "Entering suspend. Key: %s" key;
         xs.Xs.write key "1";
       )
-  )
 
 (* A blocking wait.  Wait for external party to acknowlege the suspend
    stage has been entered. If no response is received within timeout,
    routine simply returns `ACKED to simulate a response. *)
 let event_wait_entering_suspend_acked ?timeout ~__context ~self =
-  if (is_this_vm_protected ~__context ~self) then (
     with_xc_and_xs
       (fun xc xs ->
         let ack_key = (migration_base_path ~xs ~__context ~vm:self) ^ 
@@ -287,7 +296,6 @@
               debug "Timed-out waiting for suspend ack on key: %s" ack_key;
               `TIMED_OUT            
       )
-  ) else `ACKED 
 
 (* Check to see if an abort request has been made through XenStore *)
 let event_check_for_abort_req ~__context ~self =
@@ -304,6 +312,77 @@
         value = "1"
       )
   ) else false 
+
+
+
+(* 
+ * -----------------------------------------------------------------------------
+ *  Network Functions
+ * -----------------------------------------------------------------------------
+ *)
+(* Determine if we should allow the specified PIF to be marked not online when XAPI is
+ * restarted.  Returns TRUE if this is a PIF fielding a VIF attached to an MTC-
+ * protected VM and we don't want it marked offline because we have checked here that the
+ * PIF and its bridge are already up.
+ *)
+let is_pif_attached_to_mtc_vms_and_should_not_be_offline ~__context ~self =
+  try 
+
+    (* Get the VMs that are hooked up to this PIF *)
+    let network = Db.PIF.get_network ~__context ~self in
+    let vifs = Db.Network.get_VIFs ~__context ~self:network in
+
+
+    (* Figure out the VIFs attached to local MTC VMs and then derive their networks, bridges and PIFs *)
+    let vms = List.map (fun vif -> 
+                        Db.VIF.get_VM ~__context ~self:vif)
+                        vifs in
+    let localhost = Helpers.get_localhost ~__context in
+    let resident_vms = List.filter (fun vm  -> 
+                                    localhost = (Db.VM.get_resident_on ~__context ~self:vm)) 
+                                    vms in
+    let protected_vms = List.filter (fun vm  -> 
+                                     List.mem_assoc mtc_pvm_key (Db.VM.get_other_config ~__context ~self:vm)) 
+                                     resident_vms in
+
+    let protected_vms_uuid = List.map (fun vm  -> 
+                                       Db.VM.get_uuid ~__context ~self:vm) 
+                                       protected_vms in
+
+
+    (* If we have protected VMs using this PIF, then decide whether it should be marked offline *)
+    if protected_vms <> [] then begin
+      let current = Netdev.network.Netdev.list () in
+      let bridge = Db.Network.get_bridge ~__context ~self:network in
+      let nic = Db.PIF.get_device ~__context ~self in
+      debug "The following MTC VMs are using %s for PIF %s: [%s]" 
+             nic
+             (Db.PIF.get_uuid ~__context ~self)
+             (String.concat "; " protected_vms_uuid);
+
+      let nic_device_path = Printf.sprintf "/sys/class/net/%s/operstate" nic in
+      let nic_device_state = Internal.read_one_line nic_device_path in
+
+      let bridge_device_path = Printf.sprintf "/sys/class/net/%s/operstate" bridge in
+      let bridge_device_state = Internal.read_one_line bridge_device_path in
+
+      (* The PIF should be marked online if:
+         1) its network has a bridge created in dom0 and
+         2) the bridge link is up and
+         3) the physical NIC is up and
+         4) the bridge operational state is up (unknown is also up).
+       *)
+       let mark_online = (List.mem bridge current) && 
+                         (Netdev.Link.is_up bridge) && 
+                          nic_device_state = "up" &&
+                          (bridge_device_state = "up" ||
+                          bridge_device_state = "unknown") in
+
+       debug "Its current operational state is %s.  Therefore we'll be marking it as %s" 
+              nic_device_state (if mark_online then "online" else "offline");
+       mark_online
+    end else false
+  with _ -> false
 
 (* 
  * -----------------------------------------------------------------------------
@@ -328,3 +407,22 @@
       raise e
   end
 
+(* Raises an exception if the destination VM is not in the expected power state:  halted *)
+let verify_dest_vm_power_state ~__context ~vm =
+  let actual = Db.VM.get_power_state ~__context ~self:vm in
+  if actual != `Halted then
+    raise(Api_errors.Server_error(Api_errors.vm_bad_power_state, [Ref.string_of vm; "halted"; (Record_util.power_to_string actual)]))
+
+(* Returns true if VDI is accessed by an MTC-protected VM *)
+let is_vdi_accessed_by_protected_VM ~__context ~vdi =
+
+  let uuid = Uuid.of_string (Db.VDI.get_uuid ~__context ~self:vdi) in
+
+  let protected_vdi = List.mem_assoc mtc_vdi_share_key (Db.VDI.get_other_config ~__context ~self:vdi) in
+
+  (* Return TRUE if this VDI is attached to a protected VM *)
+  if protected_vdi then begin
+     debug "VDI %s is attached to a Marathon-protected VM" (Uuid.to_string uuid);
+     true 
+  end else
+     false
diff -r a1409baa84e3 ocaml/xapi/storage_access.ml
--- a/ocaml/xapi/storage_access.ml	Mon Dec 21 17:53:29 2009 -0500
+++ b/ocaml/xapi/storage_access.ml	Mon Jan 18 14:21:51 2010 -0500
@@ -183,7 +183,9 @@
       let uuid = Uuid.of_string (Db.VDI.get_uuid ~__context ~self) in
       with_vdi_lock
 	(fun () ->
-	   if is_already_attached uuid && (mode <> get_mode uuid) then
+           (* MTC: A protected VM needs to have its disks mounted into two VMs: one as R+W and another as RO *)
+	   if is_already_attached uuid && (mode <> get_mode uuid) && 
+             not (Mtc.is_vdi_accessed_by_protected_VM ~__context ~vdi:self) then
 	     failwith (Printf.sprintf "The VDI %s is already attached in %s mode; it can't be attached in %s mode!" (Uuid.to_string uuid) (string_of_mode (get_mode uuid)) (string_of_mode mode));
 	   let attach_path = 
 	     Sm.call_sm_vdi_functions ~__context ~vdi:self
diff -r a1409baa84e3 ocaml/xapi/vmops.ml
--- a/ocaml/xapi/vmops.ml	Mon Dec 21 17:53:29 2009 -0500
+++ b/ocaml/xapi/vmops.ml	Mon Jan 18 14:21:51 2010 -0500
@@ -785,7 +785,7 @@
 (** Tells a VM to shutdown with a specific reason (reboot/halt/poweroff), waits for
     it to shutdown (or vanish) and then return the reason.
 	Note this is not always called with the per-VM mutex. *)
-let clean_shutdown_with_reason ?(at = fun _ -> ()) ~xal ~__context ~self domid reason =
+let clean_shutdown_with_reason ?(at = fun _ -> ()) ~xal ~__context ~self ?(rel_timeout = 5.) domid reason =
   (* Set the task allowed_operations to include cancel *)
   if reason <> Domain.Suspend then TaskHelper.set_cancellable ~__context;
 
@@ -823,7 +823,8 @@
   let result = ref None in
   while (Unix.gettimeofday () -. start < total_timeout) && (!result = None) do
     try
-      result := Some (Xal.wait_release xal ~timeout:5. domid);
+      debug "MTC: calling xal.wait_release timeout=%f" rel_timeout;
+      result := Some (Xal.wait_release xal ~timeout:rel_timeout domid);
     with Xal.Timeout -> 
       if reason <> Domain.Suspend && TaskHelper.is_cancelling ~__context
       then raise (Api_errors.Server_error(Api_errors.task_cancelled, [ Ref.string_of (Context.get_task_id __context) ]));
diff -r a1409baa84e3 ocaml/xapi/xapi_vm_migrate.ml
--- a/ocaml/xapi/xapi_vm_migrate.ml	Mon Dec 21 17:53:29 2009 -0500
+++ b/ocaml/xapi/xapi_vm_migrate.ml	Mon Jan 18 14:21:51 2010 -0500
@@ -109,6 +109,10 @@
 
 (* MTC: Routine to report migration progress via task and events *)
 let migration_progress_cb ~__context vm_migrate_failed ~vm progress =
+
+  if TaskHelper.is_cancelling ~__context
+    then raise (Api_errors.Server_error(Api_errors.task_cancelled, [ Ref.string_of (Context.get_task_id __context) ]));
+
   TaskHelper.set_progress ~__context progress;
   Mtc.event_notify_task_status ~__context ~vm ~status:`pending progress;
   if Mtc.event_check_for_abort_req ~__context ~self:vm then
@@ -119,26 +123,34 @@
    requires that an external agent acknowledge the transition prior to 
    continuing. *)
 let migration_suspend_cb ~xal ~xc ~xs ~__context vm_migrate_failed ~self domid reason =
-  Mtc.event_notify_entering_suspend ~__context ~self;
 
-  let ack = Mtc.event_wait_entering_suspend_acked ~timeout:60. ~__context ~self in
+  if TaskHelper.is_cancelling ~__context
+    then raise (Api_errors.Server_error(Api_errors.task_cancelled, [ Ref.string_of (Context.get_task_id __context) ]));
 
-  (* If we got the ack, then proceed to shutdown the domain with the suspend
+  if (Mtc.is_this_vm_protected ~__context ~self) then (
+    Mtc.event_notify_entering_suspend ~__context ~self;
+    let ack = Mtc.event_wait_entering_suspend_acked ~timeout:60. ~__context ~self in
+
+    (* If we got the ack, then proceed to shutdown the domain with the suspend
      reason.  If we failed to get the ack, then raise an exception to abort
      the migration *)
-  if (ack = `ACKED) then begin
-    match Vmops.clean_shutdown_with_reason ~xal ~__context ~self domid Domain.Suspend with
-	| Xal.Suspended -> () (* good *)
-	| Xal.Crashed ->
+    if (ack = `ACKED) then begin
+      match Vmops.clean_shutdown_with_reason ~xal ~__context ~self ~rel_timeout:0.25 domid Domain.Suspend with
+	  | Xal.Suspended -> () (* good *)
+	  | Xal.Crashed ->
 		  raise (Api_errors.Server_error(Api_errors.vm_crashed, [ Ref.string_of self ]))
-	| Xal.Rebooted ->
-		  raise (Api_errors.Server_error(Api_errors.vm_rebooted, [ Ref.string_of self ]))	
-	| Xal.Vanished
-	| Xal.Halted ->
+	  | Xal.Rebooted ->
+		  raise (Api_errors.Server_error(Api_errors.vm_rebooted, [ Ref.string_of self ]))
+	  | Xal.Vanished
+	  | Xal.Halted ->
 		  raise (Api_errors.Server_error(Api_errors.vm_halted, [ Ref.string_of self ]))
-	| Xal.Shutdown x -> vm_migrate_failed (Printf.sprintf "Domain shutdown for unexpected reason: %d" x)
-  end else 
-    vm_migrate_failed "Failed to receive suspend acknowledgement within timeout period or an abort was requested."
+	  | Xal.Shutdown x -> vm_migrate_failed (Printf.sprintf "Domain shutdown for unexpected reason: %d" x)
+     end else 
+       vm_migrate_failed "Failed to receive suspend acknowledgement within timeout period or an abort was requested."
+  ) else (
+      Vmops.clean_shutdown_with_reason ~xal ~__context ~self domid Domain.Suspend;
+      ()
+  )
 
 (* ------------------------------------------------------------------- *)
 (* Part 2: transmitter and receiver functions                          *)
@@ -252,6 +264,12 @@
        memory image has been transmitted. We assume that we cannot recover this domain
        and that it must be destroyed. We must make sure we detect failure in the 
        remote to complete the admin and set the VM to halted if this happens. *)
+
+    (* Recover an MTC VM if abort was requested during the suspended phase *)
+    if Mtc.event_check_for_abort_req ~__context ~self:vm then ( 
+       vm_migrate_failed  "An external abort event was detected during the VM suspend phase.";
+    );
+
     Stats.time_this "VM migration downtime" (fun () ->
     (* Depending on where the exn in the try block happens, we may or may not want to
        deactivate VDIs in the finally clause. In the case of a non-localhost migration
@@ -301,26 +319,38 @@
 			detach_in_finally_clause := false;
 		end;
 
-
-	   (* Now send across the RRD *)
-	   (try Monitor_rrds.migrate_push ~__context (Db.VM.get_uuid ~__context ~self:vm) host with e ->
-	     debug "Caught exception while trying to push rrds: %s" (ExnHelper.string_of_exn e);
-	     log_backtrace ());
+           (* MTC: don't send RRDs since MTC VMs are not really migrated. *)
+ 	   if not (Mtc.is_this_vm_protected ~__context ~self:vm) then (
+	     (* Now send across the RRD *)
+	     (try Monitor_rrds.migrate_push ~__context (Db.VM.get_uuid ~__context ~self:vm) host with e ->
+	       debug "Caught exception while trying to push rrds: %s" (ExnHelper.string_of_exn e);
+	       log_backtrace ());
+           );
 
 	   (* We mustn't return to our caller (and release locks) until the remote confirms
 	      that it has reparented the VM by setting resident-on, domid *)
 	   debug "Sender 7. waiting for all-clear from remote";
 	   (* <-- [4] Synchronisation point *)
-	   Handshake.recv_success fd
+	   Handshake.recv_success fd;
+ 	   if Mtc.is_this_vm_protected ~__context ~self:vm then (
+	     let hvm = Helpers.has_booted_hvm ~__context ~self:vm in
+ 	     debug "Sender 7a. resuming source domain";
+	     Domain.resume ~xc ~xs ~hvm ~cooperative:true domid 
+	   );
 	 with e ->
 	   (* This should only happen if the receiver has died *)
 	   let msg = Printf.sprintf "Caught exception %s at last minute during migration"
 	     (ExnHelper.string_of_exn e) in
 	   debug "%s" msg; error "%s" msg;
-	   Xapi_vm_lifecycle.force_state_reset ~__context ~self:vm ~value:`Halted;
+           (* MTC: don't reset state upon failure.  MTC VMs will simply resume *)
+ 	   if not (Mtc.is_this_vm_protected ~__context ~self:vm) then 
+ 	     Xapi_vm_lifecycle.force_state_reset ~__context ~self:vm ~value:`Halted;
 	   vm_migrate_failed msg
       )
       (fun () ->
+ 	 if Mtc.is_this_vm_protected ~__context ~self:vm then (
+	    debug "MTC: Sender won't clean up by destroying remains of local domain";
+         ) else (
 	 debug "Sender cleaning up by destroying remains of local domain";
 	 if !deactivate_in_finally_clause then
 		List.iter (fun vdi -> Storage_access.VDI.deactivate ~__context ~self:vdi) vdis;
@@ -329,6 +359,7 @@
 	 let preserve_xs_vm = (Helpers.get_localhost ~__context = host) in
 	 Vmops.destroy_domain ~preserve_xs_vm ~clear_currently_attached:false ~detach_devices:(not is_localhost_migration)
 	   ~deactivate_devices:(!deactivate_in_finally_clause) ~__context ~xc ~xs ~self:vm domid)
+	)
 ) (* Stats.timethis *)
   with 
     (* If the domain shuts down incorrectly, rely on the event thread for tidying up *)
@@ -567,6 +598,9 @@
 	(fun () ->      
 	   Unixext.set_tcp_nodelay insecure_fd true;
 
+           (* Set the task allowed_operations to include cancel *)
+           TaskHelper.set_cancellable ~__context;
+
 	   let secure_rpc = Helpers.make_rpc ~__context in
 	   debug "Sender 1. Logging into remote server";
 	   let session_id = Client.Session.slave_login ~rpc:secure_rpc ~host
@@ -634,6 +668,14 @@
 			      host session_id vm xc xs live);
 		with e ->
 		  debug "Sender Caught exception: %s" (ExnHelper.string_of_exn e);
+ 	          with_xc_and_xs (fun xc xs -> 
+                      if Mtc.is_this_vm_protected ~__context ~self:vm then (
+ 	                debug "MTC: exception encountered.  Resuming source domain";
+                        let domid = Int64.to_int (Db.VM.get_domid ~__context ~self:vm) in
+			let hvm = Helpers.has_booted_hvm ~__context ~self:vm in
+			Domain.resume ~xc ~xs ~hvm ~cooperative:true domid 
+	              ));
+
 		  (* NB the domain might now be in a crashed state: rely on the event thread
 		     to do the cleanup asynchronously. *)
 		  raise_api_error e
@@ -802,6 +844,10 @@
 			Vmops.with_enough_memory ~__context ~xc ~xs ~memory_required_kib
 			(fun () ->
 *)
+                        (* MTC-3009: The dest VM of a Marathon protected VM MUST be in halted state. *)
+                        if Mtc.is_this_vm_protected ~__context ~self:dest_vm then (
+		           Mtc.verify_dest_vm_power_state ~__context ~vm:dest_vm
+                        );
 				debug "Receiver 3. sending back HTTP 200 OK";
 				Http_svr.headers fd (Http.http_200_ok ());
 				receiver ~__context ~localhost localhost_migration fd vm xc xs memory_required_kib
diff -r a1409baa84e3 ocaml/xenguest/xenguest_main.ml
--- a/ocaml/xenguest/xenguest_main.ml	Mon Dec 21 17:53:29 2009 -0500
+++ b/ocaml/xenguest/xenguest_main.ml	Mon Jan 18 14:21:51 2010 -0500
@@ -154,6 +154,8 @@
 			exit 0
 		with _ -> exit 0
 	end;
+
+	Sys.set_signal Sys.sigterm (Sys.Signal_handle (fun i -> debug "Signal handler killing PID=%d" pid; Unix.kill pid Sys.sigterm));
 	List.iter Unix.close [ stdout_w; stderr_w; output_w ];
 
 	let finished = ref false in
diff -r a1409baa84e3 ocaml/xenops/device.ml
--- a/ocaml/xenops/device.ml	Mon Dec 21 17:53:29 2009 -0500
+++ b/ocaml/xenops/device.ml	Mon Jan 18 14:21:51 2010 -0500
@@ -1398,7 +1398,11 @@
 	match wait_for with
 	| Some state ->
 		let pw = cmdpath ^ "/state" in
-		Watch.wait_for ~xs (Watch.value_to_become pw state)
+               (* MTC: The default timeout for this operation was 20mins, which is
+                * way too long for our software to recover successfully.
+                * Talk to Citrix about this
+                *) 
+		Watch.wait_for ~xs ~timeout:30. (Watch.value_to_become pw state)
 	| None -> ()
 
 let get_state ~xs domid =
diff -r a1409baa84e3 ocaml/xenops/domain.ml
--- a/ocaml/xenops/domain.ml	Mon Dec 21 17:53:29 2009 -0500
+++ b/ocaml/xenops/domain.ml	Mon Jan 18 14:21:51 2010 -0500
@@ -97,9 +97,24 @@
 	| _     -> Arch_native
 
 let make ~xc ~xs info uuid =
-	let flags =
+	let flags = if info.hvm then (
+	  let default_flags =
 		(if info.hvm then [ Xc.CDF_HVM ] else []) @
 		(if (info.hvm && info.hap) then [ Xc.CDF_HAP ] else []) in
+	   if (List.mem_assoc "hap" info.platformdata) then (
+              if (List.assoc "hap" info.platformdata) = "false" then (
+                 debug "HAP will be disabled for VM %s." (Uuid.to_string uuid);
+                 [ Xc.CDF_HVM ]
+              ) else if (List.assoc "hap" info.platformdata) = "true" then (
+                 debug "HAP will be enabled for VM %s." (Uuid.to_string uuid);
+                 [ Xc.CDF_HVM; Xc.CDF_HAP ] 
+              ) else (
+                 debug "Unrecognized HAP platform value.  Assuming default settings for VM %s." (Uuid.to_string uuid);
+                 default_flags
+              )
+           ) else
+              default_flags
+        ) else [] in
 	let domid = Xc.domain_create xc info.ssidref flags uuid in
 	let name = if info.name <> "" then info.name else sprintf "Domain-%d" domid in
 	try
