On Thursday, December 26, 2013 11:37:47 am John Baldwin wrote:
> On Wednesday, December 25, 2013 10:43:27 am Michael Dexter wrote:
> > 
> > John,
> > 
> > This is awesome.
> > 
> > Can this be triggered from the host to shut down an unresponsive VM?
> > 
> > If so, syntax?
> 
> It cannot.  However, we could add a virtual power button that acts like an
> external ACPI power button.  We could then add a flag to bhyvectl to
> trigger it.  I will look into that.

I've implemented this.  Rather than doing it via bhyvectl, I turned SIGTERM
sent to bhyve itself into the virtual power button.  Thus, if you
'killall bhyve' or 'pkill bhyve' from the host, the guest will gracefully
shutdown using S5.  You can still use kill -9 to kill a hung guest (similar
to the 4 second power button override on real metal).

To do this I fleshed out the ACPI power management support a bit more adding
more of the implementation of PM1_EVT and PM1_CNT.  I also added an SMI_CMD
register with commands to enable and disable ACPI (for now all that does is
enable/disable the virtual power button as well as toggle SCI_EN in PM1_CNT).
I also added support for raising ACPI's SCI based on settings in PM1_EVT.

I extended mevent to handle EVFILT_SIGNAL events and use a mevent handler for
SIGTERM to simulate pressing the power button.  This just implements the
simple fixed-feature power button support via PM1_EVT rather than having a
control method button (which would require implementing GPE support).

While here, I made sure the SCI interrupt was active-lo / level-triggered,
and I changed PCI interrupts in the MP Table to explicitly use active-lo
since they were already explicitly using level-trigger.

I also fixed the pmtmr code to use pthread_once instead of a racy static
int.  That should probably be a separate commit though.

--- //depot/vendor/freebsd/src/usr.sbin/bhyve/acpi.c
+++ //depot/user/jhb/bhyve/usr.sbin/bhyve/acpi.c
@@ -85,10 +87,6 @@
 #define BHYVE_ASL_SUFFIX       ".aml"
 #define BHYVE_ASL_COMPILER     "/usr/sbin/iasl"
 
-#define BHYVE_PM1A_EVT_ADDR    0x400
-#define BHYVE_PM1A_CNT_ADDR    0x404
-#define BHYVE_PM_TIMER_ADDR    0x408
-
 static int basl_keep_temps;
 static int basl_verbose_iasl;
 static int basl_ncpu;
@@ -285,11 +290,11 @@
        EFPRINTF(fp, "[0001]\t\tSubtable Type : 02\n");
        EFPRINTF(fp, "[0001]\t\tLength : 0A\n");
        EFPRINTF(fp, "[0001]\t\tBus : 00\n");
-       EFPRINTF(fp, "[0001]\t\tSource : 09\n");
-       EFPRINTF(fp, "[0004]\t\tInterrupt : 00000009\n");
+       EFPRINTF(fp, "[0001]\t\tSource : %02X\n", SCI_INT);
+       EFPRINTF(fp, "[0004]\t\tInterrupt : %08X\n", SCI_INT);
        EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0000\n");
-       EFPRINTF(fp, "\t\t\tPolarity : 0\n");
-       EFPRINTF(fp, "\t\t\tTrigger Mode : 0\n");
+       EFPRINTF(fp, "\t\t\tPolarity : 3\n");
+       EFPRINTF(fp, "\t\t\tTrigger Mode : 3\n");
        EFPRINTF(fp, "\n");
 
        /* Local APIC NMI is connected to LINT 1 on all CPUs */
@@ -336,23 +341,27 @@
            basl_acpi_base + FACS_OFFSET);
        EFPRINTF(fp, "[0004]\t\tDSDT Address : %08X\n",
            basl_acpi_base + DSDT_OFFSET);
-       EFPRINTF(fp, "[0001]\t\tModel : 00\n");
+       EFPRINTF(fp, "[0001]\t\tModel : 01\n");
        EFPRINTF(fp, "[0001]\t\tPM Profile : 00 [Unspecified]\n");
-       EFPRINTF(fp, "[0002]\t\tSCI Interrupt : 0009\n");
-       EFPRINTF(fp, "[0004]\t\tSMI Command Port : 00000000\n");
-       EFPRINTF(fp, "[0001]\t\tACPI Enable Value : 00\n");
-       EFPRINTF(fp, "[0001]\t\tACPI Disable Value : 00\n");
+       EFPRINTF(fp, "[0002]\t\tSCI Interrupt : %04X\n",
+           SCI_INT);
+       EFPRINTF(fp, "[0004]\t\tSMI Command Port : %08X\n",
+           SMI_CMD);
+       EFPRINTF(fp, "[0001]\t\tACPI Enable Value : %02X\n",
+           BHYVE_ACPI_ENABLE);
+       EFPRINTF(fp, "[0001]\t\tACPI Disable Value : %02X\n",
+           BHYVE_ACPI_DISABLE);
        EFPRINTF(fp, "[0001]\t\tS4BIOS Command : 00\n");
        EFPRINTF(fp, "[0001]\t\tP-State Control : 00\n");
        EFPRINTF(fp, "[0004]\t\tPM1A Event Block Address : %08X\n",
-                BHYVE_PM1A_EVT_ADDR);
+           PM1A_EVT_ADDR);
        EFPRINTF(fp, "[0004]\t\tPM1B Event Block Address : 00000000\n");
        EFPRINTF(fp, "[0004]\t\tPM1A Control Block Address : %08X\n",
-                BHYVE_PM1A_CNT_ADDR);
+           PM1A_CNT_ADDR);
        EFPRINTF(fp, "[0004]\t\tPM1B Control Block Address : 00000000\n");
        EFPRINTF(fp, "[0004]\t\tPM2 Control Block Address : 00000000\n");
        EFPRINTF(fp, "[0004]\t\tPM Timer Block Address : %08X\n",
-                BHYVE_PM_TIMER_ADDR);
+           IO_PMTMR);
        EFPRINTF(fp, "[0004]\t\tGPE0 Block Address : 00000000\n");
        EFPRINTF(fp, "[0004]\t\tGPE1 Block Address : 00000000\n");
        EFPRINTF(fp, "[0001]\t\tPM1 Event Block Length : 04\n");
@@ -385,7 +394,7 @@
        EFPRINTF(fp, "\t\t\tWBINVD flushes all caches (V1) : 0\n");
        EFPRINTF(fp, "\t\t\tAll CPUs support C1 (V1) : 1\n");
        EFPRINTF(fp, "\t\t\tC2 works on MP system (V1) : 0\n");
-       EFPRINTF(fp, "\t\t\tControl Method Power Button (V1) : 1\n");
+       EFPRINTF(fp, "\t\t\tControl Method Power Button (V1) : 0\n");
        EFPRINTF(fp, "\t\t\tControl Method Sleep Button (V1) : 1\n");
        EFPRINTF(fp, "\t\t\tRTC wake not in fixed reg space (V1) : 0\n");
        EFPRINTF(fp, "\t\t\tRTC can wake system from S4 (V1) : 0\n");
@@ -427,7 +436,7 @@
        EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
        EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word Access:16]\n");
        EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n",
-           BHYVE_PM1A_EVT_ADDR);
+           PM1A_EVT_ADDR);
        EFPRINTF(fp, "\n");
        
        EFPRINTF(fp,
@@ -447,7 +456,7 @@
        EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
        EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word Access:16]\n");
        EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n",
-           BHYVE_PM1A_CNT_ADDR);
+           PM1A_CNT_ADDR);
        EFPRINTF(fp, "\n");
 
        EFPRINTF(fp,
@@ -479,7 +488,7 @@
        EFPRINTF(fp,
            "[0001]\t\tEncoded Access Width : 03 [DWord Access:32]\n");
        EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n",
-           BHYVE_PM_TIMER_ADDR);
+           IO_PMTMR);
        EFPRINTF(fp, "\n");
 
        EFPRINTF(fp, "[0012]\t\tGPE0 Block : [Generic Address Structure]\n");
--- //depot/vendor/freebsd/src/usr.sbin/bhyve/acpi.h
+++ //depot/user/jhb/bhyve/usr.sbin/bhyve/acpi.h
@@ -29,6 +29,19 @@
 #ifndef _ACPI_H_
 #define _ACPI_H_
 
+#define        SCI_INT                 9
+
+#define        SMI_CMD                 0xb2
+#define        BHYVE_ACPI_ENABLE       0xa0
+#define        BHYVE_ACPI_DISABLE      0xa1
+
+#define        PM1A_EVT_ADDR           0x400
+#define        PM1A_CNT_ADDR           0x404
+
+#define        IO_PMTMR                0x408   /* 4-byte i/o port for the 
timer */
+
+struct vmctx;
+
 int    acpi_build(struct vmctx *ctx, int ncpu);
 
 #endif /* _ACPI_H_ */
--- //depot/vendor/freebsd/src/usr.sbin/bhyve/mevent.c
+++ //depot/user/jhb/bhyve/usr.sbin/bhyve/mevent.c
@@ -135,6 +135,9 @@
        if (mevp->me_type == EVF_TIMER)
                retval = EVFILT_TIMER;
 
+       if (mevp->me_type == EVF_SIGNAL)
+               retval = EVFILT_SIGNAL;
+
        return (retval);
 }
 
@@ -437,7 +440,7 @@
                 * Block awaiting events
                 */
                ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
-               if (ret == -1) {
+               if (ret == -1 && errno != EINTR) {
                        perror("Error return from kevent monitor");
                }
                
--- //depot/vendor/freebsd/src/usr.sbin/bhyve/mevent.h
+++ //depot/user/jhb/bhyve/usr.sbin/bhyve/mevent.h
@@ -32,7 +32,8 @@
 enum ev_type {
        EVF_READ,
        EVF_WRITE,
-       EVF_TIMER
+       EVF_TIMER,
+       EVF_SIGNAL
 };
 
 struct mevent;
--- //depot/vendor/freebsd/src/usr.sbin/bhyve/mptbl.c
+++ //depot/user/jhb/bhyve/usr.sbin/bhyve/mptbl.c
@@ -36,6 +36,7 @@
 #include <stdio.h>
 #include <string.h>
 
+#include "acpi.h"
 #include "bhyverun.h"
 #include "mptbl.h"
 
@@ -227,13 +228,21 @@
                        mpie->int_type = INTENTRY_TYPE_INT;
                        mpie->src_bus_irq = 0;
                        break;
+               case SCI_INT:
+                       /* ACPI SCI is level triggered and active-lo. */
+                       mpie->int_flags = INTENTRY_FLAGS_POLARITY_ACTIVELO |
+                           INTENTRY_FLAGS_TRIGGER_LEVEL;
+                       mpie->int_type = INTENTRY_TYPE_INT;
+                       mpie->src_bus_irq = SCI_INT;
+                       break;
                case 5:
                case 10:
                case 11:
                        /*
-                        * PCI Irqs set to level triggered.
+                        * PCI Irqs set to level triggered and active-lo.
                         */
-                       mpie->int_flags = INTENTRY_FLAGS_TRIGGER_LEVEL;
+                       mpie->int_flags = INTENTRY_FLAGS_POLARITY_ACTIVELO |
+                           INTENTRY_FLAGS_TRIGGER_LEVEL;
                        mpie->src_bus_id = 0;
                        /* fall through.. */
                default:
--- //depot/vendor/freebsd/src/usr.sbin/bhyve/pm.c
+++ //depot/user/jhb/bhyve/usr.sbin/bhyve/pm.c
@@ -29,11 +29,20 @@
 __FBSDID("$FreeBSD: head/usr.sbin/bhyve/pm.c 259826 2013-12-24 16:14:19Z jhb 
$");
 
 #include <sys/types.h>
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <pthread.h>
+#include <signal.h>
+#include <vmmapi.h>
 
+#include "acpi.h"
 #include "inout.h"
+#include "mevent.h"
 
-#define        PM1A_EVT_ADDR   0x400
-#define        PM1A_CNT_ADDR   0x404
+static pthread_mutex_t pm_lock = PTHREAD_MUTEX_INITIALIZER;
+static struct mevent *power_button;
+static sig_t old_power_handler;
 
 /*
  * Reset Control register at I/O port 0xcf9.  Bit 2 forces a system
@@ -63,12 +73,75 @@
 INOUT_PORT(reset_reg, 0xCF9, IOPORT_F_INOUT, reset_handler);
 
 /*
+ * ACPI's SCI is a level-triggered interrupt.
+ */
+static int sci_active;
+
+static void
+sci_assert(struct vmctx *ctx)
+{
+
+       if (sci_active)
+               return;
+       vm_ioapic_assert_irq(ctx, SCI_INT);
+       sci_active = 1;
+}
+
+static void
+sci_deassert(struct vmctx *ctx)
+{
+
+       if (!sci_active)
+               return;
+       vm_ioapic_deassert_irq(ctx, SCI_INT);
+       sci_active = 0;
+}
+
+/*
  * Power Management 1 Event Registers
  *
- * bhyve doesn't support any power management events currently, so the
- * status register always returns zero.  The enable register preserves
- * its value but has no effect.
+ * The only power management event supported is a power button upon
+ * receiving SIGTERM.
  */
+static uint16_t pm1_enable, pm1_status;
+
+#define        PM1_TMR_STS             0x0001
+#define        PM1_BM_STS              0x0010
+#define        PM1_GBL_STS             0x0020
+#define        PM1_PWRBTN_STS          0x0100
+#define        PM1_SLPBTN_STS          0x0200
+#define        PM1_RTC_STS             0x0400
+#define        PM1_WAK_STS             0x8000
+
+#define        PM1_TMR_EN              0x0001
+#define        PM1_GBL_EN              0x0020
+#define        PM1_PWRBTN_EN           0x0100
+#define        PM1_SLPBTN_EN           0x0200
+#define        PM1_RTC_EN              0x0400
+
+static void
+sci_update(struct vmctx *ctx)
+{
+       int need_sci;
+
+       /* See if the SCI should be active or not. */
+       need_sci = 0;
+       if ((pm1_enable & PM1_TMR_EN) && (pm1_status & PM1_TMR_STS))
+               need_sci = 1;
+       if ((pm1_enable & PM1_GBL_EN) && (pm1_status & PM1_GBL_STS))
+               need_sci = 1;
+       if ((pm1_enable & PM1_PWRBTN_EN) && (pm1_status & PM1_PWRBTN_STS))
+               need_sci = 1;
+       if ((pm1_enable & PM1_SLPBTN_EN) && (pm1_status & PM1_SLPBTN_STS))
+               need_sci = 1;
+       if ((pm1_enable & PM1_RTC_EN) && (pm1_status & PM1_RTC_STS))
+               need_sci = 1;
+       if (need_sci)
+               sci_assert(ctx);
+       else
+               sci_deassert(ctx);
+}
+
 static int
 pm1_status_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
     uint32_t *eax, void *arg)
@@ -76,8 +150,20 @@
 
        if (bytes != 2)
                return (-1);
+
+       pthread_mutex_lock(&pm_lock);
        if (in)
-               *eax = 0;
+               *eax = pm1_status;
+       else {
+               /*
+                * Writes are only permitted to clear certain bits by
+                * writing 1 to those flags.
+                */
+               pm1_status &= ~(*eax & (PM1_WAK_STS | PM1_RTC_STS |
+                   PM1_SLPBTN_STS | PM1_PWRBTN_STS | PM1_BM_STS));
+               sci_update(ctx);
+       }
+       pthread_mutex_unlock(&pm_lock);
        return (0);
 }
 
@@ -85,25 +171,51 @@
 pm1_enable_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
     uint32_t *eax, void *arg)
 {
-       static uint16_t pm1_enable;
 
        if (bytes != 2)
                return (-1);
+
+       pthread_mutex_lock(&pm_lock);
        if (in)
                *eax = pm1_enable;
-       else
-               pm1_enable = *eax;
+       else {
+               /*
+                * Only permit certain bits to be set.  We never use
+                * the global lock, but ACPI-CA whines profusely if it
+                * can't set GBL_EN.
+                */
+               pm1_enable = *eax & (PM1_PWRBTN_EN | PM1_GBL_EN);
+               sci_update(ctx);
+       }
+       pthread_mutex_unlock(&pm_lock);
        return (0);
 }
 INOUT_PORT(pm1_status, PM1A_EVT_ADDR, IOPORT_F_INOUT, pm1_status_handler);
 INOUT_PORT(pm1_enable, PM1A_EVT_ADDR + 2, IOPORT_F_INOUT, pm1_enable_handler);
 
+static void
+power_button_handler(int signal, enum ev_type type, void *arg)
+{
+       struct vmctx *ctx;
+
+       ctx = arg;
+       pthread_mutex_lock(&pm_lock);
+       if (!(pm1_status & PM1_PWRBTN_STS)) {
+               pm1_status |= PM1_PWRBTN_STS;
+               sci_update(ctx);
+       }
+       pthread_mutex_unlock(&pm_lock);
+}
+
 /*
  * Power Management 1 Control Register
  *
  * This is mostly unimplemented except that we wish to handle writes that
  * set SPL_EN to handle S5 (soft power off).
  */
+static uint16_t pm1_control;
+
+#define        PM1_SCI_EN      0x0001
 #define        PM1_SLP_TYP     0x1c00
 #define        PM1_SLP_EN      0x2000
 #define        PM1_ALWAYS_ZERO 0xc003
@@ -112,7 +224,6 @@
 pm1_control_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
     uint32_t *eax, void *arg)
 {
-       static uint16_t pm1_control;
 
        if (bytes != 2)
                return (-1);
@@ -121,9 +232,11 @@
        else {
                /*
                 * Various bits are write-only or reserved, so force them
-                * to zero in pm1_control.
+                * to zero in pm1_control.  Always preserve SCI_EN as OSPM
+                * can never change it.
                 */
-               pm1_control = *eax & ~(PM1_SLP_EN | PM1_ALWAYS_ZERO);
+               pm1_control = (pm1_control & PM1_SCI_EN) |
+                   (*eax & ~(PM1_SLP_EN | PM1_ALWAYS_ZERO));
 
                /*
                 * If SLP_EN is set, check for S5.  Bhyve's _S5_ method
@@ -137,3 +250,41 @@
        return (0);
 }
 INOUT_PORT(pm1_control, PM1A_CNT_ADDR, IOPORT_F_INOUT, pm1_control_handler);
+
+/*
+ * ACPI SMI Command Register
+ *
+ * This write-only register is used to enable and disable ACPI.
+ */
+static int
+smi_cmd_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+    uint32_t *eax, void *arg)
+{
+
+       assert(!in);
+       if (bytes != 1)
+               return (-1);
+
+       pthread_mutex_lock(&pm_lock);
+       switch (*eax) {
+       case BHYVE_ACPI_ENABLE:
+               pm1_control |= PM1_SCI_EN;
+               if (power_button == NULL) {
+                       power_button = mevent_add(SIGTERM, EVF_SIGNAL,
+                           power_button_handler, ctx);
+                       old_power_handler = signal(SIGTERM, SIG_IGN);
+               }
+               break;
+       case BHYVE_ACPI_DISABLE:
+               pm1_control &= ~PM1_SCI_EN;
+               if (power_button != NULL) {
+                       mevent_delete(power_button);
+                       power_button = NULL;
+                       signal(SIGTERM, old_power_handler);
+               }
+               break;
+       }
+       pthread_mutex_unlock(&pm_lock);
+       return (0);
+}
+INOUT_PORT(smi_cmd, SMI_CMD, IOPORT_F_OUT, smi_cmd_handler);
--- //depot/vendor/freebsd/src/usr.sbin/bhyve/pmtmr.c
+++ //depot/user/jhb/bhyve/usr.sbin/bhyve/pmtmr.c
@@ -40,6 +40,7 @@
 #include <assert.h>
 #include <pthread.h>
 
+#include "acpi.h"
 #include "inout.h"
 
 /*
@@ -49,11 +50,10 @@
  * This implementation will be 32-bits
  */
 
-#define        IO_PMTMR        0x408   /* 4-byte i/o port for the timer */
-
 #define PMTMR_FREQ     3579545  /* 3.579545MHz */
 
 static pthread_mutex_t pmtmr_mtx;
+static pthread_once_t pmtmr_once = PTHREAD_ONCE_INIT;
 
 static uint64_t        pmtmr_old;
 
@@ -123,6 +123,7 @@
                pmtmr_uptime_old = tsnew;
                pmtmr_old = timespec_to_pmtmr(&tsnew, &tsold);
        }
+       pthread_mutex_init(&pmtmr_mtx, NULL);
 }
 
 static uint32_t
@@ -133,13 +134,7 @@
        uint64_t        pmtmr_new;
        int             error;
 
-       static int      inited = 0;
-
-       if (!inited) {
-               pthread_mutex_init(&pmtmr_mtx, NULL);
-               pmtmr_init();
-               inited = 1;
-       }
+       pthread_once(&pmtmr_once, pmtmr_init);
 
        pthread_mutex_lock(&pmtmr_mtx);
 

> 
> -- 
> John Baldwin
> _______________________________________________
> freebsd-virtualization@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
> To unsubscribe, send any mail to 
> "freebsd-virtualization-unsubscr...@freebsd.org"
> 

-- 
John Baldwin
_______________________________________________
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
"freebsd-virtualization-unsubscr...@freebsd.org"

Reply via email to