Author: arkurth Date: Thu Jul 30 14:39:28 2009 New Revision: 799292 URL: http://svn.apache.org/viewvc?rev=799292&view=rev Log: VCL-81 Fixed bug in image retrieval for xCAT.pm and vmware.pm. It wasn't accurately detecting if another image retrieval SCP process was running. It was getting hung up on SCP processes started by vmware.pm which send images to VMware hosts. The pgrep regex was fixed.
Added short delay before retrieving image for the number of seconds equal to the last digit of the reservation ID. This prevents multiple reservations for the same image from starting SCP processes at the same time. Removed unused $imagemeta_sysprep variable from vmware.pm::capture(). Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/vmware.pm incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/xCAT.pm Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/vmware.pm URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/vmware.pm?rev=799292&r1=799291&r2=799292&view=diff ============================================================================== --- incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/vmware.pm (original) +++ incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/vmware.pm Thu Jul 30 14:39:28 2009 @@ -1122,10 +1122,7 @@ my $image_identity = $self->data->get_image_identity; my $image_os_type = $self->data->get_image_os_type; my $image_name = $self->data->get_image_name(); - - - my $imagemeta_sysprep = $self->data->get_imagemeta_sysprep; - + my $computer_id = $self->data->get_computer_id; my $computer_shortname = $self->data->get_computer_short_name; my $computer_nodename = $computer_shortname; @@ -1999,10 +1996,17 @@ # Make sure an scp process isn't currently running to retrieve the image # This can happen if another reservation is running for the same image and the management node didn't have a copy + # Be careful with the pattern of the SCP process to check for + # The VMware module may be copying an image via SCP to a VM host, don't want to detect this + # Only want to detect an image being copied to this management node's image repository + # pgrep -fl output for image retrieval processes look like this: + # [r...@mn]# pgrep -fl "scp.*winxp-base1-v27\* /install/image/x86" + # 32578 sh -c /usr/bin/scp -B -i /etc/vcl/vcl.key -P 22 -p -r v...@10.1.1.1:/install/image/x86/winxp-base1-v27* /install/image/x86 2>&1 + # 32579 /usr/bin/scp -B -i /etc/vcl/vcl.key -P 22 -p -r vcl 10.1.1.1 /install/image/x86/winxp-base1-v27* /install/image/x86 my $scp_wait_attempt = 0; my $scp_wait_max_attempts = 40; my $scp_wait_delay = 15; - while (is_management_node_process_running("scp.*$image_name")) { + while (is_management_node_process_running('scp.*$image_name\* $image_repository_path')) { $scp_wait_attempt++; notify($ERRORS{'OK'}, 0, "attempt $scp_wait_attempt/$scp_wait_max_attempts: scp process is running to retrieve $image_name, waiting for $scp_wait_delay seconds"); @@ -2081,6 +2085,15 @@ return; } + # Get the last digit of the reservation ID and sleep that number of seconds + # This is done in case 2 reservations for the same image were started at the same time + # Both may attempt to retrieve an image and execute the SCP command at nearly the same time + # does_image_exist() may not catch this and allow 2 SCP retrieval processes to start + # It's likely that the reservation IDs are consecutive and the the last digits will be different + my ($pre_retrieval_sleep) = $self->data->get_reservation_id() =~ /(\d)$/; + notify($ERRORS{'DEBUG'}, 0, "sleeping for $pre_retrieval_sleep seconds to prevent multiple SCP image retrieval processes"); + sleep $pre_retrieval_sleep; + # Make sure image does not already exist on this management node if ($self->does_image_exist($image_name)) { notify($ERRORS{'OK'}, 0, "$image_name already exists on this management node"); Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/xCAT.pm URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/xCAT.pm?rev=799292&r1=799291&r2=799292&view=diff ============================================================================== --- incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/xCAT.pm (original) +++ incubator/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/xCAT.pm Thu Jul 30 14:39:28 2009 @@ -2711,10 +2711,17 @@ # Make sure an scp process isn't currently running to retrieve the image # This can happen if another reservation is running for the same image and the management node didn't have a copy + # Be careful with the pattern of the SCP process to check for + # The VMware module may be copying an image via SCP to a VM host, don't want to detect this + # Only want to detect an image being copied to this management node's image repository + # pgrep -fl output for image retrieval processes look like this: + # [r...@mn]# pgrep -fl "scp.*winxp-base1-v27\* /install/image/x86" + # 32578 sh -c /usr/bin/scp -B -i /etc/vcl/vcl.key -P 22 -p -r v...@10.1.1.1:/install/image/x86/winxp-base1-v27* /install/image/x86 2>&1 + # 32579 /usr/bin/scp -B -i /etc/vcl/vcl.key -P 22 -p -r vcl 10.1.1.1 /install/image/x86/winxp-base1-v27* /install/image/x86 my $scp_wait_attempt = 0; my $scp_wait_max_attempts = 40; my $scp_wait_delay = 15; - while (is_management_node_process_running("scp.*$image_name")) { + while (is_management_node_process_running('scp.*$image_name\* $image_repository_path')) { $scp_wait_attempt++; notify($ERRORS{'OK'}, 0, "attempt $scp_wait_attempt/$scp_wait_max_attempts: scp process is running to retrieve $image_name, waiting for $scp_wait_delay seconds"); @@ -2865,6 +2872,15 @@ return; } + # Get the last digit of the reservation ID and sleep that number of seconds + # This is done in case 2 reservations for the same image were started at the same time + # Both may attempt to retrieve an image and execute the SCP command at nearly the same time + # does_image_exist() may not catch this and allow 2 SCP retrieval processes to start + # It's likely that the reservation IDs are consecutive and the the last digits will be different + my ($pre_retrieval_sleep) = $self->data->get_reservation_id() =~ /(\d)$/; + notify($ERRORS{'DEBUG'}, 0, "sleeping for $pre_retrieval_sleep seconds to prevent multiple SCP image retrieval processes"); + sleep $pre_retrieval_sleep; + # Make sure image does not already exist on this management node if ($self->does_image_exist($image_name)) { notify($ERRORS{'OK'}, 0, "$image_name already exists on this management node");