Martin Peřina has uploaded a new change for review.

Change subject: core: Adds SSH soft fencing capability
......................................................................

core: Adds SSH soft fencing capability

Adds a step into standard host not responding treatment process that
tries to restart VDSM on the host using SSH connection prior to standard
fencing process. If the VDSM restart dont't help, standard fencing
process will be executed.

Change-Id: I8002b6ac00a1e2e543b5cc8d1affdd42b994d5f7
Bug-Url: https://bugzilla.redhat.com/967328
Signed-off-by: Martin Perina <[email protected]>
---
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
M 
backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/action/FenceVdsActionParameters.java
M 
backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java
M 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
5 files changed, 171 insertions(+), 12 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/98/15798/1

diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java
index c8c0ee3..18422a7 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java
@@ -115,7 +115,7 @@
     }
 
     @Override
-    public void vdsNotResponding(final VDS vds) {
+    public void vdsNotResponding(final VDS vds, final boolean 
executeSshSoftFencing) {
         ExecutionHandler.updateSpecificActionJobCompleted(vds.getId(), 
VdcActionType.MaintenanceVds, false);
         ThreadPoolUtil.execute(new Runnable() {
             @Override
@@ -124,7 +124,7 @@
                         vds.getId(),
                         vds.getHostName());
                 
Backend.getInstance().runInternalAction(VdcActionType.VdsNotRespondingTreatment,
-                        new FenceVdsActionParameters(vds.getId(), 
FenceActionType.Restart),
+                        new FenceVdsActionParameters(vds.getId(), 
FenceActionType.Restart, executeSshSoftFencing),
                         ExecutionHandler.createInternalJobContext());
             }
         });
diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
index 2d070fb..77ce9f3 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
@@ -1,7 +1,17 @@
 package org.ovirt.engine.core.bll;
 
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.KeyPair;
+import java.security.KeyStore;
+import java.security.KeyStoreException;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.concurrent.TimeUnit;
 
 import org.ovirt.engine.core.common.AuditLogType;
 import org.ovirt.engine.core.common.VdcObjectType;
@@ -12,10 +22,14 @@
 import org.ovirt.engine.core.common.businessentities.VM;
 import org.ovirt.engine.core.common.businessentities.VMStatus;
 import org.ovirt.engine.core.common.businessentities.VdsSpmStatus;
+import org.ovirt.engine.core.common.config.Config;
+import org.ovirt.engine.core.common.config.ConfigValues;
 import 
org.ovirt.engine.core.common.vdscommands.SetVmStatusVDSCommandParameters;
 import org.ovirt.engine.core.common.vdscommands.VDSCommandType;
 import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogDirector;
 import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogableBase;
+import org.ovirt.engine.core.utils.EngineLocalConfig;
+import org.ovirt.engine.core.utils.ssh.SSHClient;
 
 @NonTransactiveCommandAttribute
 public class VdsNotRespondingTreatmentCommand<T extends 
FenceVdsActionParameters> extends RestartVdsCommand<T> {
@@ -48,7 +62,17 @@
     protected void executeCommand() {
         setVds(null);
         if (getVds() != null && shouldVdsBeFenced()) {
-            super.executeCommand();
+            boolean sshVdsmRestartSuccess = false;
+            if (getParameters().isExecuteSshVdsmRestart()) {
+                sshVdsmRestartSuccess =
+                        executeRestartVDSMUsingSsh(getVds().getHostName(), 
getVds().getVdsGroupCompatibilityVersion()
+                                .toString());
+            }
+
+            // VDSM restart using SSH was not successful, execute standard 
fencing
+            if (!sshVdsmRestartSuccess) {
+                super.executeCommand();
+            }
         } else {
             setCommandShouldBeLogged(false);
             log.infoFormat("Host {0}({1}) not fenced since it's status is ok, 
or it doesn't exist anymore.",
@@ -129,4 +153,110 @@
         }
         return jobProperties;
     }
+
+    /**
+     * Executes VDSM restart command using SSH connection
+     * @param host host to restart VDSM on
+     * @returns {@code true} if restart command has been executed 
successfully, {@code false} otherwise
+     */
+    private boolean executeRestartVDSMUsingSsh(String host, String version) {
+        boolean result = true;
+        SSHClient sshClient = null;
+
+        try {
+            sshClient = getSshClient(host);
+            sshClient.connect();
+            sshClient.authenticate();
+            ByteArrayOutputStream bos = new ByteArrayOutputStream();
+            sshClient.executeCommand(Config.<String> 
GetValue(ConfigValues.RestartVdsmBySshCommand, version),
+                    null,
+                    bos,
+                    null);
+            log.info("VDSM restart executed on host " + host);
+            log.debug("VDSM restart command output " + bos.toString());
+        } catch (Exception ex) {
+            log.error("VDSM restart failed on host " + host, ex);
+            result = false;
+        } finally {
+            closeSshConnection(sshClient);
+        }
+        return result;
+    }
+
+    /**
+     * Tries to close SSH client connection
+     * @param sshClient SSH client
+     */
+    private void closeSshConnection(SSHClient sshClient) {
+        if (sshClient != null) {
+            try {
+                sshClient.disconnect();
+            } catch (Exception ex) {
+                log.error("Error disconnecting SSH connection", ex);
+            }
+        }
+    }
+
+    /**
+     * Initializes SSH client instance
+     *
+     * @param host
+     *            host to connect to
+     * @return initialized SSH client instance
+     * @throws KeyStoreException
+     *             if engine SSH key pair cannot be loaded
+     */
+    private SSHClient getSshClient(String host) throws KeyStoreException {
+        SSHClient sshClient = new SSHClient();
+        sshClient.setHardTimeout(TimeUnit.SECONDS.toMillis(
+                Config.<Integer> 
GetValue(ConfigValues.SSHInactivityHardTimoutSeconds)));
+        sshClient.setSoftTimeout(TimeUnit.SECONDS.toMillis(
+                Config.<Integer> 
GetValue(ConfigValues.SSHInactivityTimoutSeconds)));
+        sshClient.setHost(host);
+        sshClient.setUser("root");
+        loadEngineSshKeyPair(sshClient);
+        return sshClient;
+    }
+
+    /**
+     * Loads engine SSH key pair into specified SSH client instance.
+     *
+     * @param sshClient
+     *            SSH client instance
+     */
+    private void loadEngineSshKeyPair(SSHClient sshClient) throws 
KeyStoreException {
+        EngineLocalConfig config = EngineLocalConfig.getInstance();
+        final File p12 = config.getPKIEngineStore();
+        final char[] password = 
config.getPKIEngineStorePassword().toCharArray();
+        final String alias = config.getPKIEngineStoreAlias();
+
+        KeyStore.PrivateKeyEntry entry;
+        InputStream in = null;
+        try {
+            in = new FileInputStream(p12);
+            KeyStore ks = KeyStore.getInstance("PKCS12");
+            ks.load(in, password);
+
+            entry = (KeyStore.PrivateKeyEntry) ks.getEntry(alias, new 
KeyStore.PasswordProtection(password));
+        } catch (Exception e) {
+            throw new KeyStoreException(String.format("Failed to get 
certificate entry from key store: %1$s/%2$s",
+                    p12,
+                    alias), e);
+        } finally {
+            Arrays.fill(password, '*');
+            if (in != null) {
+                try {
+                    in.close();
+                } catch (IOException e) {
+                    log.error("Cannot close key store", e);
+                }
+            }
+        }
+
+        if (entry == null) {
+            throw new KeyStoreException(
+                    String.format("Bad key store: %1$s/%2$s", p12, alias));
+        }
+        sshClient.setKeyPair(new 
KeyPair(entry.getCertificate().getPublicKey(), entry.getPrivateKey()));
+    }
 }
diff --git 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/action/FenceVdsActionParameters.java
 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/action/FenceVdsActionParameters.java
index 532d4d0..db9127c5 100644
--- 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/action/FenceVdsActionParameters.java
+++ 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/action/FenceVdsActionParameters.java
@@ -6,17 +6,32 @@
 public class FenceVdsActionParameters extends VdsActionParameters {
     private static final long serialVersionUID = 6174371941176548263L;
 
-    public FenceVdsActionParameters(Guid vdsId, FenceActionType action) {
-        super(vdsId);
-        _action = action;
+    private FenceActionType _action = FenceActionType.forValue(0);
+
+    /**
+     * Indicator to execute VDSM restart using SSH
+     */
+    private final boolean executeSshVdsmRestart;
+
+    public FenceVdsActionParameters() {
+        this.executeSshVdsmRestart = false;
     }
 
-    private FenceActionType _action = FenceActionType.forValue(0);
+    public FenceVdsActionParameters(Guid vdsId, FenceActionType action) {
+        this(vdsId, action, false);
+    }
+
+    public FenceVdsActionParameters(Guid vdsId, FenceActionType action, 
boolean executeSshVdsmRestart) {
+        super(vdsId);
+        _action = action;
+        this.executeSshVdsmRestart = executeSshVdsmRestart;
+    }
 
     public FenceActionType getAction() {
         return _action;
     }
 
-    public FenceVdsActionParameters() {
+    public boolean isExecuteSshVdsmRestart() {
+        return executeSshVdsmRestart;
     }
 }
diff --git 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java
 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java
index 22318ae..f0b97b6 100644
--- 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java
+++ 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java
@@ -8,7 +8,7 @@
 import org.ovirt.engine.core.compat.TransactionScopeOption;
 
 public interface IVdsEventListener {
-    void vdsNotResponding(VDS vds); // BLL
+    void vdsNotResponding(VDS vds, boolean executeSshSoftFencing); // BLL
 
     void vdsNonOperational(Guid vdsId, NonOperationalReason type, boolean 
logCommand, boolean saveToDb,
             Guid domainId); // BLL
diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
index e2db5f5..343a520 100644
--- 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
@@ -94,6 +94,7 @@
 
     private final AtomicInteger mFailedToRunVmAttempts;
     private final AtomicInteger mUnrespondedAttempts;
+    private final AtomicBoolean sshSoftFencingExecuted;
 
     private static final int VDS_DURING_FAILURE_TIMEOUT_IN_MINUTES = Config
             .<Integer> 
GetValue(ConfigValues.TimeToReduceFailedRunOnVdsInMinutes);
@@ -137,6 +138,7 @@
         monitoringStrategy = 
MonitoringStrategyFactory.getMonitoringStrategyForVds(vds);
         mUnrespondedAttempts = new AtomicInteger();
         mFailedToRunVmAttempts = new AtomicInteger();
+        sshSoftFencingExecuted = new AtomicBoolean(false);
         monitoringLock = new 
EngineLock(Collections.singletonMap(_vdsId.toString(),
                 new Pair<String, String>(LockingGroup.VDS_INIT.name(), "")), 
null);
 
@@ -521,6 +523,7 @@
      */
     public void SuccededToRunVm(Guid vmId) {
         mUnrespondedAttempts.set(0);
+        sshSoftFencingExecuted.set(false);
         ResourceManager.getInstance().SuccededToRunVm(vmId, _vds.getId());
     }
 
@@ -600,11 +603,18 @@
         if (spmStatus != VdsSpmStatus.None) {
             spmIndicator = 1;
         }
-        return TimeUnit.SECONDS.toMillis((int)(
+        int secToFence = (int)(
                 // delay time can be fracture number, casting it to int should 
be enough
                 Config.<Integer> 
GetValue(ConfigValues.TimeoutToResetVdsInSeconds) +
                 (Config.<Double> 
GetValue(ConfigValues.DelayResetForSpmInSeconds) * spmIndicator) +
-                (Config.<Double> 
GetValue(ConfigValues.DelayResetPerVmInSeconds) * vmCount)));
+                (Config.<Double> 
GetValue(ConfigValues.DelayResetPerVmInSeconds) * vmCount));
+
+        if (sshSoftFencingExecuted.get()) {
+            // VDSM restart by SSH has been executed, wait more to see if host 
is OK
+            secToFence = 2 * secToFence;
+        }
+
+        return TimeUnit.SECONDS.toMillis(secToFence);
     }
     /**
      * Handle network exception, return true if save vdsDynamic to DB is 
needed.
@@ -640,7 +650,11 @@
 
             AuditLogableBase logable = new AuditLogableBase(vds.getId());
             AuditLogDirector.log(logable, AuditLogType.VDS_FAILURE);
-            
ResourceManager.getInstance().getEventListener().vdsNotResponding(vds);
+            boolean executeSshSoftFencing = false;
+            if (!sshSoftFencingExecuted.getAndSet(true)) {
+                executeSshSoftFencing = true;
+            }
+            
ResourceManager.getInstance().getEventListener().vdsNotResponding(vds, 
executeSshSoftFencing);
         }
         return true;
     }


-- 
To view, visit http://gerrit.ovirt.org/15798
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I8002b6ac00a1e2e543b5cc8d1affdd42b994d5f7
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-engine
Gerrit-Branch: master
Gerrit-Owner: Martin Peřina <[email protected]>
_______________________________________________
Engine-patches mailing list
[email protected]
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to