Hi John,

On Fri, Dec 27, 2013 at 8:46 AM, John Baldwin <j...@freebsd.org> wrote:
> On Thursday, December 26, 2013 11:37:47 am John Baldwin wrote:
>> On Wednesday, December 25, 2013 10:43:27 am Michael Dexter wrote:
>> >
>> > John,
>> >
>> > This is awesome.
>> >
>> > Can this be triggered from the host to shut down an unresponsive VM?
>> >
>> > If so, syntax?
>>
>> It cannot.  However, we could add a virtual power button that acts like an
>> external ACPI power button.  We could then add a flag to bhyvectl to
>> trigger it.  I will look into that.
>
> I've implemented this.  Rather than doing it via bhyvectl, I turned SIGTERM
> sent to bhyve itself into the virtual power button.  Thus, if you
> 'killall bhyve' or 'pkill bhyve' from the host, the guest will gracefully
> shutdown using S5.  You can still use kill -9 to kill a hung guest (similar
> to the 4 second power button override on real metal).
>
> To do this I fleshed out the ACPI power management support a bit more adding
> more of the implementation of PM1_EVT and PM1_CNT.  I also added an SMI_CMD
> register with commands to enable and disable ACPI (for now all that does is
> enable/disable the virtual power button as well as toggle SCI_EN in PM1_CNT).
> I also added support for raising ACPI's SCI based on settings in PM1_EVT.
>
> I extended mevent to handle EVFILT_SIGNAL events and use a mevent handler for
> SIGTERM to simulate pressing the power button.  This just implements the
> simple fixed-feature power button support via PM1_EVT rather than having a
> control method button (which would require implementing GPE support).
>
> While here, I made sure the SCI interrupt was active-lo / level-triggered,
> and I changed PCI interrupts in the MP Table to explicitly use active-lo
> since they were already explicitly using level-trigger.
>
> I also fixed the pmtmr code to use pthread_once instead of a racy static
> int.  That should probably be a separate commit though.
>

This looks great!

Thanks for adding this functionality.

best
Neel

> --- //depot/vendor/freebsd/src/usr.sbin/bhyve/acpi.c
> +++ //depot/user/jhb/bhyve/usr.sbin/bhyve/acpi.c
> @@ -85,10 +87,6 @@
>  #define BHYVE_ASL_SUFFIX       ".aml"
>  #define BHYVE_ASL_COMPILER     "/usr/sbin/iasl"
>
> -#define BHYVE_PM1A_EVT_ADDR    0x400
> -#define BHYVE_PM1A_CNT_ADDR    0x404
> -#define BHYVE_PM_TIMER_ADDR    0x408
> -
>  static int basl_keep_temps;
>  static int basl_verbose_iasl;
>  static int basl_ncpu;
> @@ -285,11 +290,11 @@
>         EFPRINTF(fp, "[0001]\t\tSubtable Type : 02\n");
>         EFPRINTF(fp, "[0001]\t\tLength : 0A\n");
>         EFPRINTF(fp, "[0001]\t\tBus : 00\n");
> -       EFPRINTF(fp, "[0001]\t\tSource : 09\n");
> -       EFPRINTF(fp, "[0004]\t\tInterrupt : 00000009\n");
> +       EFPRINTF(fp, "[0001]\t\tSource : %02X\n", SCI_INT);
> +       EFPRINTF(fp, "[0004]\t\tInterrupt : %08X\n", SCI_INT);
>         EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0000\n");
> -       EFPRINTF(fp, "\t\t\tPolarity : 0\n");
> -       EFPRINTF(fp, "\t\t\tTrigger Mode : 0\n");
> +       EFPRINTF(fp, "\t\t\tPolarity : 3\n");
> +       EFPRINTF(fp, "\t\t\tTrigger Mode : 3\n");
>         EFPRINTF(fp, "\n");
>
>         /* Local APIC NMI is connected to LINT 1 on all CPUs */
> @@ -336,23 +341,27 @@
>             basl_acpi_base + FACS_OFFSET);
>         EFPRINTF(fp, "[0004]\t\tDSDT Address : %08X\n",
>             basl_acpi_base + DSDT_OFFSET);
> -       EFPRINTF(fp, "[0001]\t\tModel : 00\n");
> +       EFPRINTF(fp, "[0001]\t\tModel : 01\n");
>         EFPRINTF(fp, "[0001]\t\tPM Profile : 00 [Unspecified]\n");
> -       EFPRINTF(fp, "[0002]\t\tSCI Interrupt : 0009\n");
> -       EFPRINTF(fp, "[0004]\t\tSMI Command Port : 00000000\n");
> -       EFPRINTF(fp, "[0001]\t\tACPI Enable Value : 00\n");
> -       EFPRINTF(fp, "[0001]\t\tACPI Disable Value : 00\n");
> +       EFPRINTF(fp, "[0002]\t\tSCI Interrupt : %04X\n",
> +           SCI_INT);
> +       EFPRINTF(fp, "[0004]\t\tSMI Command Port : %08X\n",
> +           SMI_CMD);
> +       EFPRINTF(fp, "[0001]\t\tACPI Enable Value : %02X\n",
> +           BHYVE_ACPI_ENABLE);
> +       EFPRINTF(fp, "[0001]\t\tACPI Disable Value : %02X\n",
> +           BHYVE_ACPI_DISABLE);
>         EFPRINTF(fp, "[0001]\t\tS4BIOS Command : 00\n");
>         EFPRINTF(fp, "[0001]\t\tP-State Control : 00\n");
>         EFPRINTF(fp, "[0004]\t\tPM1A Event Block Address : %08X\n",
> -                BHYVE_PM1A_EVT_ADDR);
> +           PM1A_EVT_ADDR);
>         EFPRINTF(fp, "[0004]\t\tPM1B Event Block Address : 00000000\n");
>         EFPRINTF(fp, "[0004]\t\tPM1A Control Block Address : %08X\n",
> -                BHYVE_PM1A_CNT_ADDR);
> +           PM1A_CNT_ADDR);
>         EFPRINTF(fp, "[0004]\t\tPM1B Control Block Address : 00000000\n");
>         EFPRINTF(fp, "[0004]\t\tPM2 Control Block Address : 00000000\n");
>         EFPRINTF(fp, "[0004]\t\tPM Timer Block Address : %08X\n",
> -                BHYVE_PM_TIMER_ADDR);
> +           IO_PMTMR);
>         EFPRINTF(fp, "[0004]\t\tGPE0 Block Address : 00000000\n");
>         EFPRINTF(fp, "[0004]\t\tGPE1 Block Address : 00000000\n");
>         EFPRINTF(fp, "[0001]\t\tPM1 Event Block Length : 04\n");
> @@ -385,7 +394,7 @@
>         EFPRINTF(fp, "\t\t\tWBINVD flushes all caches (V1) : 0\n");
>         EFPRINTF(fp, "\t\t\tAll CPUs support C1 (V1) : 1\n");
>         EFPRINTF(fp, "\t\t\tC2 works on MP system (V1) : 0\n");
> -       EFPRINTF(fp, "\t\t\tControl Method Power Button (V1) : 1\n");
> +       EFPRINTF(fp, "\t\t\tControl Method Power Button (V1) : 0\n");
>         EFPRINTF(fp, "\t\t\tControl Method Sleep Button (V1) : 1\n");
>         EFPRINTF(fp, "\t\t\tRTC wake not in fixed reg space (V1) : 0\n");
>         EFPRINTF(fp, "\t\t\tRTC can wake system from S4 (V1) : 0\n");
> @@ -427,7 +436,7 @@
>         EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
>         EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word 
> Access:16]\n");
>         EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n",
> -           BHYVE_PM1A_EVT_ADDR);
> +           PM1A_EVT_ADDR);
>         EFPRINTF(fp, "\n");
>
>         EFPRINTF(fp,
> @@ -447,7 +456,7 @@
>         EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
>         EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word 
> Access:16]\n");
>         EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n",
> -           BHYVE_PM1A_CNT_ADDR);
> +           PM1A_CNT_ADDR);
>         EFPRINTF(fp, "\n");
>
>         EFPRINTF(fp,
> @@ -479,7 +488,7 @@
>         EFPRINTF(fp,
>             "[0001]\t\tEncoded Access Width : 03 [DWord Access:32]\n");
>         EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n",
> -           BHYVE_PM_TIMER_ADDR);
> +           IO_PMTMR);
>         EFPRINTF(fp, "\n");
>
>         EFPRINTF(fp, "[0012]\t\tGPE0 Block : [Generic Address Structure]\n");
> --- //depot/vendor/freebsd/src/usr.sbin/bhyve/acpi.h
> +++ //depot/user/jhb/bhyve/usr.sbin/bhyve/acpi.h
> @@ -29,6 +29,19 @@
>  #ifndef _ACPI_H_
>  #define _ACPI_H_
>
> +#define        SCI_INT                 9
> +
> +#define        SMI_CMD                 0xb2
> +#define        BHYVE_ACPI_ENABLE       0xa0
> +#define        BHYVE_ACPI_DISABLE      0xa1
> +
> +#define        PM1A_EVT_ADDR           0x400
> +#define        PM1A_CNT_ADDR           0x404
> +
> +#define        IO_PMTMR                0x408   /* 4-byte i/o port for the 
> timer */
> +
> +struct vmctx;
> +
>  int    acpi_build(struct vmctx *ctx, int ncpu);
>
>  #endif /* _ACPI_H_ */
> --- //depot/vendor/freebsd/src/usr.sbin/bhyve/mevent.c
> +++ //depot/user/jhb/bhyve/usr.sbin/bhyve/mevent.c
> @@ -135,6 +135,9 @@
>         if (mevp->me_type == EVF_TIMER)
>                 retval = EVFILT_TIMER;
>
> +       if (mevp->me_type == EVF_SIGNAL)
> +               retval = EVFILT_SIGNAL;
> +
>         return (retval);
>  }
>
> @@ -437,7 +440,7 @@
>                  * Block awaiting events
>                  */
>                 ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
> -               if (ret == -1) {
> +               if (ret == -1 && errno != EINTR) {
>                         perror("Error return from kevent monitor");
>                 }
>
> --- //depot/vendor/freebsd/src/usr.sbin/bhyve/mevent.h
> +++ //depot/user/jhb/bhyve/usr.sbin/bhyve/mevent.h
> @@ -32,7 +32,8 @@
>  enum ev_type {
>         EVF_READ,
>         EVF_WRITE,
> -       EVF_TIMER
> +       EVF_TIMER,
> +       EVF_SIGNAL
>  };
>
>  struct mevent;
> --- //depot/vendor/freebsd/src/usr.sbin/bhyve/mptbl.c
> +++ //depot/user/jhb/bhyve/usr.sbin/bhyve/mptbl.c
> @@ -36,6 +36,7 @@
>  #include <stdio.h>
>  #include <string.h>
>
> +#include "acpi.h"
>  #include "bhyverun.h"
>  #include "mptbl.h"
>
> @@ -227,13 +228,21 @@
>                         mpie->int_type = INTENTRY_TYPE_INT;
>                         mpie->src_bus_irq = 0;
>                         break;
> +               case SCI_INT:
> +                       /* ACPI SCI is level triggered and active-lo. */
> +                       mpie->int_flags = INTENTRY_FLAGS_POLARITY_ACTIVELO |
> +                           INTENTRY_FLAGS_TRIGGER_LEVEL;
> +                       mpie->int_type = INTENTRY_TYPE_INT;
> +                       mpie->src_bus_irq = SCI_INT;
> +                       break;
>                 case 5:
>                 case 10:
>                 case 11:
>                         /*
> -                        * PCI Irqs set to level triggered.
> +                        * PCI Irqs set to level triggered and active-lo.
>                          */
> -                       mpie->int_flags = INTENTRY_FLAGS_TRIGGER_LEVEL;
> +                       mpie->int_flags = INTENTRY_FLAGS_POLARITY_ACTIVELO |
> +                           INTENTRY_FLAGS_TRIGGER_LEVEL;
>                         mpie->src_bus_id = 0;
>                         /* fall through.. */
>                 default:
> --- //depot/vendor/freebsd/src/usr.sbin/bhyve/pm.c
> +++ //depot/user/jhb/bhyve/usr.sbin/bhyve/pm.c
> @@ -29,11 +29,20 @@
>  __FBSDID("$FreeBSD: head/usr.sbin/bhyve/pm.c 259826 2013-12-24 16:14:19Z jhb 
> $");
>
>  #include <sys/types.h>
> +#include <machine/vmm.h>
> +
> +#include <assert.h>
> +#include <pthread.h>
> +#include <signal.h>
> +#include <vmmapi.h>
>
> +#include "acpi.h"
>  #include "inout.h"
> +#include "mevent.h"
>
> -#define        PM1A_EVT_ADDR   0x400
> -#define        PM1A_CNT_ADDR   0x404
> +static pthread_mutex_t pm_lock = PTHREAD_MUTEX_INITIALIZER;
> +static struct mevent *power_button;
> +static sig_t old_power_handler;
>
>  /*
>   * Reset Control register at I/O port 0xcf9.  Bit 2 forces a system
> @@ -63,12 +73,75 @@
>  INOUT_PORT(reset_reg, 0xCF9, IOPORT_F_INOUT, reset_handler);
>
>  /*
> + * ACPI's SCI is a level-triggered interrupt.
> + */
> +static int sci_active;
> +
> +static void
> +sci_assert(struct vmctx *ctx)
> +{
> +
> +       if (sci_active)
> +               return;
> +       vm_ioapic_assert_irq(ctx, SCI_INT);
> +       sci_active = 1;
> +}
> +
> +static void
> +sci_deassert(struct vmctx *ctx)
> +{
> +
> +       if (!sci_active)
> +               return;
> +       vm_ioapic_deassert_irq(ctx, SCI_INT);
> +       sci_active = 0;
> +}
> +
> +/*
>   * Power Management 1 Event Registers
>   *
> - * bhyve doesn't support any power management events currently, so the
> - * status register always returns zero.  The enable register preserves
> - * its value but has no effect.
> + * The only power management event supported is a power button upon
> + * receiving SIGTERM.
>   */
> +static uint16_t pm1_enable, pm1_status;
> +
> +#define        PM1_TMR_STS             0x0001
> +#define        PM1_BM_STS              0x0010
> +#define        PM1_GBL_STS             0x0020
> +#define        PM1_PWRBTN_STS          0x0100
> +#define        PM1_SLPBTN_STS          0x0200
> +#define        PM1_RTC_STS             0x0400
> +#define        PM1_WAK_STS             0x8000
> +
> +#define        PM1_TMR_EN              0x0001
> +#define        PM1_GBL_EN              0x0020
> +#define        PM1_PWRBTN_EN           0x0100
> +#define        PM1_SLPBTN_EN           0x0200
> +#define        PM1_RTC_EN              0x0400
> +
> +static void
> +sci_update(struct vmctx *ctx)
> +{
> +       int need_sci;
> +
> +       /* See if the SCI should be active or not. */
> +       need_sci = 0;
> +       if ((pm1_enable & PM1_TMR_EN) && (pm1_status & PM1_TMR_STS))
> +               need_sci = 1;
> +       if ((pm1_enable & PM1_GBL_EN) && (pm1_status & PM1_GBL_STS))
> +               need_sci = 1;
> +       if ((pm1_enable & PM1_PWRBTN_EN) && (pm1_status & PM1_PWRBTN_STS))
> +               need_sci = 1;
> +       if ((pm1_enable & PM1_SLPBTN_EN) && (pm1_status & PM1_SLPBTN_STS))
> +               need_sci = 1;
> +       if ((pm1_enable & PM1_RTC_EN) && (pm1_status & PM1_RTC_STS))
> +               need_sci = 1;
> +       if (need_sci)
> +               sci_assert(ctx);
> +       else
> +               sci_deassert(ctx);
> +}
> +
>  static int
>  pm1_status_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
>      uint32_t *eax, void *arg)
> @@ -76,8 +150,20 @@
>
>         if (bytes != 2)
>                 return (-1);
> +
> +       pthread_mutex_lock(&pm_lock);
>         if (in)
> -               *eax = 0;
> +               *eax = pm1_status;
> +       else {
> +               /*
> +                * Writes are only permitted to clear certain bits by
> +                * writing 1 to those flags.
> +                */
> +               pm1_status &= ~(*eax & (PM1_WAK_STS | PM1_RTC_STS |
> +                   PM1_SLPBTN_STS | PM1_PWRBTN_STS | PM1_BM_STS));
> +               sci_update(ctx);
> +       }
> +       pthread_mutex_unlock(&pm_lock);
>         return (0);
>  }
>
> @@ -85,25 +171,51 @@
>  pm1_enable_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
>      uint32_t *eax, void *arg)
>  {
> -       static uint16_t pm1_enable;
>
>         if (bytes != 2)
>                 return (-1);
> +
> +       pthread_mutex_lock(&pm_lock);
>         if (in)
>                 *eax = pm1_enable;
> -       else
> -               pm1_enable = *eax;
> +       else {
> +               /*
> +                * Only permit certain bits to be set.  We never use
> +                * the global lock, but ACPI-CA whines profusely if it
> +                * can't set GBL_EN.
> +                */
> +               pm1_enable = *eax & (PM1_PWRBTN_EN | PM1_GBL_EN);
> +               sci_update(ctx);
> +       }
> +       pthread_mutex_unlock(&pm_lock);
>         return (0);
>  }
>  INOUT_PORT(pm1_status, PM1A_EVT_ADDR, IOPORT_F_INOUT, pm1_status_handler);
>  INOUT_PORT(pm1_enable, PM1A_EVT_ADDR + 2, IOPORT_F_INOUT, 
> pm1_enable_handler);
>
> +static void
> +power_button_handler(int signal, enum ev_type type, void *arg)
> +{
> +       struct vmctx *ctx;
> +
> +       ctx = arg;
> +       pthread_mutex_lock(&pm_lock);
> +       if (!(pm1_status & PM1_PWRBTN_STS)) {
> +               pm1_status |= PM1_PWRBTN_STS;
> +               sci_update(ctx);
> +       }
> +       pthread_mutex_unlock(&pm_lock);
> +}
> +
>  /*
>   * Power Management 1 Control Register
>   *
>   * This is mostly unimplemented except that we wish to handle writes that
>   * set SPL_EN to handle S5 (soft power off).
>   */
> +static uint16_t pm1_control;
> +
> +#define        PM1_SCI_EN      0x0001
>  #define        PM1_SLP_TYP     0x1c00
>  #define        PM1_SLP_EN      0x2000
>  #define        PM1_ALWAYS_ZERO 0xc003
> @@ -112,7 +224,6 @@
>  pm1_control_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
>      uint32_t *eax, void *arg)
>  {
> -       static uint16_t pm1_control;
>
>         if (bytes != 2)
>                 return (-1);
> @@ -121,9 +232,11 @@
>         else {
>                 /*
>                  * Various bits are write-only or reserved, so force them
> -                * to zero in pm1_control.
> +                * to zero in pm1_control.  Always preserve SCI_EN as OSPM
> +                * can never change it.
>                  */
> -               pm1_control = *eax & ~(PM1_SLP_EN | PM1_ALWAYS_ZERO);
> +               pm1_control = (pm1_control & PM1_SCI_EN) |
> +                   (*eax & ~(PM1_SLP_EN | PM1_ALWAYS_ZERO));
>
>                 /*
>                  * If SLP_EN is set, check for S5.  Bhyve's _S5_ method
> @@ -137,3 +250,41 @@
>         return (0);
>  }
>  INOUT_PORT(pm1_control, PM1A_CNT_ADDR, IOPORT_F_INOUT, pm1_control_handler);
> +
> +/*
> + * ACPI SMI Command Register
> + *
> + * This write-only register is used to enable and disable ACPI.
> + */
> +static int
> +smi_cmd_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
> +    uint32_t *eax, void *arg)
> +{
> +
> +       assert(!in);
> +       if (bytes != 1)
> +               return (-1);
> +
> +       pthread_mutex_lock(&pm_lock);
> +       switch (*eax) {
> +       case BHYVE_ACPI_ENABLE:
> +               pm1_control |= PM1_SCI_EN;
> +               if (power_button == NULL) {
> +                       power_button = mevent_add(SIGTERM, EVF_SIGNAL,
> +                           power_button_handler, ctx);
> +                       old_power_handler = signal(SIGTERM, SIG_IGN);
> +               }
> +               break;
> +       case BHYVE_ACPI_DISABLE:
> +               pm1_control &= ~PM1_SCI_EN;
> +               if (power_button != NULL) {
> +                       mevent_delete(power_button);
> +                       power_button = NULL;
> +                       signal(SIGTERM, old_power_handler);
> +               }
> +               break;
> +       }
> +       pthread_mutex_unlock(&pm_lock);
> +       return (0);
> +}
> +INOUT_PORT(smi_cmd, SMI_CMD, IOPORT_F_OUT, smi_cmd_handler);
> --- //depot/vendor/freebsd/src/usr.sbin/bhyve/pmtmr.c
> +++ //depot/user/jhb/bhyve/usr.sbin/bhyve/pmtmr.c
> @@ -40,6 +40,7 @@
>  #include <assert.h>
>  #include <pthread.h>
>
> +#include "acpi.h"
>  #include "inout.h"
>
>  /*
> @@ -49,11 +50,10 @@
>   * This implementation will be 32-bits
>   */
>
> -#define        IO_PMTMR        0x408   /* 4-byte i/o port for the timer */
> -
>  #define PMTMR_FREQ     3579545  /* 3.579545MHz */
>
>  static pthread_mutex_t pmtmr_mtx;
> +static pthread_once_t pmtmr_once = PTHREAD_ONCE_INIT;
>
>  static uint64_t        pmtmr_old;
>
> @@ -123,6 +123,7 @@
>                 pmtmr_uptime_old = tsnew;
>                 pmtmr_old = timespec_to_pmtmr(&tsnew, &tsold);
>         }
> +       pthread_mutex_init(&pmtmr_mtx, NULL);
>  }
>
>  static uint32_t
> @@ -133,13 +134,7 @@
>         uint64_t        pmtmr_new;
>         int             error;
>
> -       static int      inited = 0;
> -
> -       if (!inited) {
> -               pthread_mutex_init(&pmtmr_mtx, NULL);
> -               pmtmr_init();
> -               inited = 1;
> -       }
> +       pthread_once(&pmtmr_once, pmtmr_init);
>
>         pthread_mutex_lock(&pmtmr_mtx);
>
>
>>
>> --
>> John Baldwin
>> _______________________________________________
>> freebsd-virtualization@freebsd.org mailing list
>> http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
>> To unsubscribe, send any mail to 
>> "freebsd-virtualization-unsubscr...@freebsd.org"
>>
>
> --
> John Baldwin
> _______________________________________________
> freebsd-virtualization@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
> To unsubscribe, send any mail to 
> "freebsd-virtualization-unsubscr...@freebsd.org"
_______________________________________________
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
"freebsd-virtualization-unsubscr...@freebsd.org"

Reply via email to