Author: dfr
Date: Wed Mar 11 15:30:12 2009
New Revision: 189699
URL: http://svn.freebsd.org/changeset/base/189699

Log:
  Merge in support for Xen HVM on amd64 architecture.

Added:
  head/sys/amd64/conf/XENHVM   (contents, props changed)
     - copied, changed from r189614, user/dfr/xenhvm/7/sys/amd64/conf/XENHVM
  head/sys/amd64/include/xen/   (props changed)
     - copied from r189614, user/dfr/xenhvm/7/sys/amd64/include/xen/
  head/sys/dev/xen/xenpci/   (props changed)
     - copied from r189614, user/dfr/xenhvm/7/sys/dev/xen/xenpci/
  head/sys/xen/features.h
     - copied unchanged from r189614, user/dfr/xenhvm/7/sys/xen/features.h
  head/sys/xen/reboot.c
     - copied unchanged from r189614, user/dfr/xenhvm/7/sys/xen/reboot.c
Modified:
  head/sys/   (props changed)
  head/sys/amd64/amd64/machdep.c
  head/sys/amd64/include/pcpu.h
  head/sys/amd64/include/xen/xenvar.h
  head/sys/conf/files
  head/sys/conf/options.amd64
  head/sys/contrib/pf/   (props changed)
  head/sys/dev/ata/ata-usb.c   (props changed)
  head/sys/dev/cxgb/   (props changed)
  head/sys/dev/sound/usb/uaudio.c   (props changed)
  head/sys/dev/sound/usb/uaudio.h   (props changed)
  head/sys/dev/sound/usb/uaudio_pcm.c   (props changed)
  head/sys/dev/sound/usb/uaudioreg.h   (props changed)
  head/sys/dev/usb/usb.h   (props changed)
  head/sys/dev/usb/usb_if.m   (props changed)
  head/sys/dev/usb/usbdevs   (props changed)
  head/sys/dev/usb/usbhid.h   (props changed)
  head/sys/dev/xen/balloon/balloon.c
  head/sys/dev/xen/blkfront/blkfront.c
  head/sys/dev/xen/console/console.c
  head/sys/dev/xen/console/xencons_ring.c
  head/sys/dev/xen/netfront/   (props changed)
  head/sys/dev/xen/netfront/netfront.c
  head/sys/dev/xen/xenpci/machine_reboot.c
  head/sys/i386/include/xen/xenpmap.h
  head/sys/xen/evtchn.h   (props changed)
  head/sys/xen/evtchn/evtchn.c
  head/sys/xen/evtchn/evtchn_dev.c
  head/sys/xen/features.c
  head/sys/xen/gnttab.c
  head/sys/xen/gnttab.h
  head/sys/xen/hypervisor.h   (contents, props changed)
  head/sys/xen/interface/arch-x86/xen.h
  head/sys/xen/interface/hvm/params.h
  head/sys/xen/xen_intr.h   (contents, props changed)
  head/sys/xen/xenbus/xenbus_probe.c
  head/sys/xen/xenbus/xenbus_xs.c

Modified: head/sys/amd64/amd64/machdep.c
==============================================================================
--- head/sys/amd64/amd64/machdep.c      Wed Mar 11 14:55:04 2009        
(r189698)
+++ head/sys/amd64/amd64/machdep.c      Wed Mar 11 15:30:12 2009        
(r189699)
@@ -1494,6 +1494,14 @@ hammer_time(u_int64_t modulep, u_int64_t
        if (env != NULL)
                strlcpy(kernelname, env, sizeof(kernelname));
 
+#ifdef XENHVM
+       if (inw(0x10) == 0x49d2) {
+               if (bootverbose)
+                       printf("Xen detected: disabling emulated block and 
network devices\n");
+               outw(0x10, 3);
+       }
+#endif
+
        /* Location of kernel stack for locore */
        return ((u_int64_t)thread0.td_pcb);
 }

Copied and modified: head/sys/amd64/conf/XENHVM (from r189614, 
user/dfr/xenhvm/7/sys/amd64/conf/XENHVM)
==============================================================================
--- user/dfr/xenhvm/7/sys/amd64/conf/XENHVM     Tue Mar 10 10:59:30 2009        
(r189614, copy source)
+++ head/sys/amd64/conf/XENHVM  Wed Mar 11 15:30:12 2009        (r189699)
@@ -1,8 +1,8 @@
 #
 # XENHVM -- Xen HVM kernel configuration file for FreeBSD/amd64
 #
-# For more information on this file, please read the handbook section on
-# Kernel Configuration Files:
+# For more information on this file, please read the config(5) manual page,
+# and/or the handbook section on Kernel Configuration Files:
 #
 #    
http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
 #
@@ -19,11 +19,17 @@
 # $FreeBSD$
 
 cpu            HAMMER
-ident          XENHVM
+ident          GENERIC
 
 # To statically compile in device wiring instead of /boot/device.hints
 #hints         "GENERIC.hints"         # Default places to look for devices.
 
+# Use the following to compile in values accessible to the kernel
+# through getenv() (or kenv(1) in userland). The format of the file
+# is 'variable=value', see kenv(1)
+#
+# env          "GENERIC.env"
+
 makeoptions    DEBUG=-g                # Build kernel with gdb(1) debug symbols
 makeoptions    MODULES_OVERRIDE=""
 
@@ -31,7 +37,7 @@ options       SCHED_ULE               # ULE scheduler
 options        PREEMPTION              # Enable kernel thread preemption
 options        INET                    # InterNETworking
 options        INET6                   # IPv6 communications protocols
-options        SCTP                    # Stream Control Transmission Protocol 
+options        SCTP                    # Stream Control Transmission Protocol
 options        FFS                     # Berkeley Fast Filesystem
 options        SOFTUPDATES             # Enable FFS soft updates support
 options        UFS_ACL                 # Support for access control lists
@@ -42,18 +48,18 @@ options     NFSCLIENT               # Network Filesystem
 options        NFSSERVER               # Network Filesystem Server
 options        NFSLOCKD                # Network Lock Manager
 options        NFS_ROOT                # NFS usable as /, requires NFSCLIENT
-options        NTFS                    # NT File System
 options        MSDOSFS                 # MSDOS Filesystem
 options        CD9660                  # ISO 9660 Filesystem
 options        PROCFS                  # Process filesystem (requires PSEUDOFS)
 options        PSEUDOFS                # Pseudo-filesystem framework
 options        GEOM_PART_GPT           # GUID Partition Tables.
 options        GEOM_LABEL              # Provides labelization
-options        COMPAT_43TTY            # BSD 4.3 TTY compat [KEEP THIS!]
+options        COMPAT_43TTY            # BSD 4.3 TTY compat (sgtty)
 options        COMPAT_IA32             # Compatible with i386 binaries
 options        COMPAT_FREEBSD4         # Compatible with FreeBSD4
 options        COMPAT_FREEBSD5         # Compatible with FreeBSD5
 options        COMPAT_FREEBSD6         # Compatible with FreeBSD6
+options        COMPAT_FREEBSD7         # Compatible with FreeBSD7
 options        SCSI_DELAY=5000         # Delay (in ms) before probing SCSI
 options        KTRACE                  # ktrace(1) support
 options        STACK                   # stack(9) support
@@ -62,15 +68,20 @@ options     SYSVMSG                 # SYSV-style message 
 options        SYSVSEM                 # SYSV-style semaphores
 options        _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time 
extensions
 options        KBD_INSTALL_CDEV        # install a CDEV entry in /dev
-options        ADAPTIVE_GIANT          # Giant mutex is adaptive.
 options        STOP_NMI                # Stop CPUS using NMI instead of IPI
+options        HWPMC_HOOKS             # Necessary kernel hooks for hwpmc(4)
 options        AUDIT                   # Security event auditing
 #options       KDTRACE_FRAME           # Ensure frames are compiled in
 #options       KDTRACE_HOOKS           # Kernel DTrace hooks
 
-options                KDB
-options                DDB
-options                GDB
+# Debugging for use in -current
+options        KDB                     # Enable kernel debugger support.
+options        DDB                     # Support DDB.
+options        GDB                     # Support remote GDB.
+options        INVARIANTS              # Enable calls of extra sanity checking
+options        INVARIANT_SUPPORT       # Extra sanity checks of internal 
structures, required by INVARIANTS
+options        WITNESS                 # Enable checks to detect deadlocks and 
cycles
+options        WITNESS_SKIPSPIN        # Don't run witness on spinlocks for 
speed
 
 # Make an SMP-capable kernel by default
 options        SMP                     # Symmetric MultiProcessor Kernel
@@ -107,6 +118,7 @@ device              cd              # CD
 device         pass            # Passthrough device (direct SCSI access)
 device         ses             # SCSI Environmental Services (and SAF-TE)
 
+
 # atkbdc0 controls both the keyboard and the PS/2 mouse
 device         atkbdc          # AT keyboard controller
 device         atkbd           # AT keyboard
@@ -124,7 +136,6 @@ device              sc
 device         agp             # support several AGP chipsets
 
 # Serial (COM) ports
-device         sio             # 8250, 16[45]50 based serial ports
 device         uart            # Generic UART driver
 
 # PCI Ethernet NICs that use the common MII bus controller code.
@@ -136,10 +147,8 @@ device             re              # RealTek 
8139C+/8169/8169S/
 device         loop            # Network loopback
 device         random          # Entropy device
 device         ether           # Ethernet support
-device         sl              # Kernel SLIP
-device         ppp             # Kernel PPP
 device         tun             # Packet tunnel.
-device         pty             # Pseudo-ttys (telnet etc)
+device         pty             # BSD-style compatibility pseudo ttys
 device         md              # Memory "disks"
 device         gif             # IPv6 and IPv4 tunneling
 device         faith           # IPv6-to-IPv4 relaying (translation)
@@ -149,4 +158,3 @@ device              firmware        # firmware assist modul
 # Be aware of the administrative consequences of enabling this!
 # Note that 'bpf' is required for DHCP.
 device         bpf             # Berkeley packet filter
-

Modified: head/sys/amd64/include/pcpu.h
==============================================================================
--- head/sys/amd64/include/pcpu.h       Wed Mar 11 14:55:04 2009        
(r189698)
+++ head/sys/amd64/include/pcpu.h       Wed Mar 11 15:30:12 2009        
(r189699)
@@ -33,6 +33,24 @@
 #error "sys/cdefs.h is a prerequisite for this file"
 #endif
 
+#if defined(XEN) || defined(XENHVM)
+#ifndef NR_VIRQS
+#define        NR_VIRQS        24
+#endif
+#ifndef NR_IPIS
+#define        NR_IPIS         2
+#endif
+#endif
+
+#ifdef XENHVM
+#define PCPU_XEN_FIELDS                                                        
\
+       ;                                                               \
+       unsigned int pc_last_processed_l1i;                             \
+       unsigned int pc_last_processed_l2i
+#else
+#define PCPU_XEN_FIELDS
+#endif
+
 /*
  * The SMP parts are setup in pmap.c and locore.s for the BSP, and
  * mp_machdep.c sets up the data for the AP's to "see" when they awake.
@@ -49,7 +67,8 @@
        register_t pc_scratch_rsp;      /* User %rsp in syscall */      \
        u_int   pc_apic_id;                                             \
        u_int   pc_acpi_id;             /* ACPI CPU id */               \
-       struct user_segment_descriptor  *pc_gs32p
+       struct user_segment_descriptor  *pc_gs32p                       \
+       PCPU_XEN_FIELDS
 
 #ifdef _KERNEL
 

Modified: head/sys/amd64/include/xen/xenvar.h
==============================================================================
--- user/dfr/xenhvm/7/sys/amd64/include/xen/xenvar.h    Tue Mar 10 10:59:30 
2009        (r189614)
+++ head/sys/amd64/include/xen/xenvar.h Wed Mar 11 15:30:12 2009        
(r189699)
@@ -71,6 +71,7 @@ machtophys(vm_paddr_t ma)
 #define MFNTOPFN(ma)   (ma)
 
 #define set_phys_to_machine(pfn, mfn)  ((void)0)
+#define phys_to_machine_mapping_valid(pfn)     (TRUE)
 #define PT_UPDATES_FLUSH()             ((void)0)
 
 #else

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files Wed Mar 11 14:55:04 2009        (r189698)
+++ head/sys/conf/files Wed Mar 11 15:30:12 2009        (r189699)
@@ -2758,21 +2758,24 @@ gnu/fs/xfs/xfs_iomap.c          optional xfs \
 gnu/fs/xfs/xfs_behavior.c      optional xfs \
        compile-with "${NORMAL_C} -I$S/gnu/fs/xfs/FreeBSD 
-I$S/gnu/fs/xfs/FreeBSD/support -I$S/gnu/fs/xfs"
 
-xen/gnttab.c                   optional xen
-xen/features.c                 optional xen
-xen/evtchn/evtchn.c            optional xen
-xen/evtchn/evtchn_dev.c                optional xen
-xen/xenbus/xenbus_client.c     optional xen
-xen/xenbus/xenbus_comms.c      optional xen
-xen/xenbus/xenbus_dev.c                optional xen
-xen/xenbus/xenbus_if.m         optional xen
-xen/xenbus/xenbus_probe.c      optional xen
-#xen/xenbus/xenbus_probe_backend.c      optional xen
-xen/xenbus/xenbus_xs.c         optional xen
-dev/xen/console/console.c      optional xen
-dev/xen/console/xencons_ring.c optional xen
-dev/xen/blkfront/blkfront.c    optional xen
-dev/xen/netfront/netfront.c    optional xen
-#dev/xen/xenpci/xenpci.c        optional xen
-#xen/xenbus/xenbus_newbus.c    optional xenhvm
+xen/gnttab.c                   optional xen | xenhvm
+xen/features.c                 optional xen | xenhvm
+xen/evtchn/evtchn.c            optional xen
+xen/evtchn/evtchn_dev.c                optional xen | xenhvm
+xen/reboot.c                   optional xen
+xen/xenbus/xenbus_client.c     optional xen | xenhvm
+xen/xenbus/xenbus_comms.c      optional xen | xenhvm
+xen/xenbus/xenbus_dev.c                optional xen | xenhvm
+xen/xenbus/xenbus_if.m         optional xen | xenhvm
+xen/xenbus/xenbus_probe.c      optional xen | xenhvm
+#xen/xenbus/xenbus_probe_backend.c     optional xen
+xen/xenbus/xenbus_xs.c         optional xen | xenhvm
+dev/xen/balloon/balloon.c      optional xen | xenhvm
+dev/xen/console/console.c      optional xen
+dev/xen/console/xencons_ring.c optional xen
+dev/xen/blkfront/blkfront.c    optional xen | xenhvm
+dev/xen/netfront/netfront.c    optional xen | xenhvm
+dev/xen/xenpci/xenpci.c                optional xenpci
+dev/xen/xenpci/evtchn.c         optional xenpci
+dev/xen/xenpci/machine_reboot.c optional xenpci
 

Modified: head/sys/conf/options.amd64
==============================================================================
--- head/sys/conf/options.amd64 Wed Mar 11 14:55:04 2009        (r189698)
+++ head/sys/conf/options.amd64 Wed Mar 11 15:30:12 2009        (r189699)
@@ -57,3 +57,5 @@ KDTRACE_FRAME         opt_kdtrace.h
 
 # BPF just-in-time compiler
 BPF_JITTER             opt_bpf.h
+
+XENHVM                 opt_global.h

Modified: head/sys/dev/xen/balloon/balloon.c
==============================================================================
--- head/sys/dev/xen/balloon/balloon.c  Wed Mar 11 14:55:04 2009        
(r189698)
+++ head/sys/dev/xen/balloon/balloon.c  Wed Mar 11 15:30:12 2009        
(r189699)
@@ -34,11 +34,24 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/lock.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/malloc.h>
 #include <sys/mutex.h>
+#include <sys/sysctl.h>
 
-#include <machine/hypervisor-ifs.h>
-#include <machine/xen-os.h>
-#include <machine/xenbus.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenfunc.h>
+#include <machine/xen/xenvar.h>
+#include <xen/hypervisor.h>
+#include <xen/xenbus/xenbusvar.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver");
+
+struct mtx balloon_mutex;
 
 /*
  * Protects atomic reservation decrease/increase against concurrent increases.
@@ -46,23 +59,44 @@ __FBSDID("$FreeBSD$");
  * balloon lists.
  */
 struct mtx balloon_lock;
-#ifdef notyet
-
-/* We aim for 'current allocation' == 'target allocation'. */
-static unsigned long current_pages;
-static unsigned long target_pages;
 
-/* VM /proc information for memory */
-extern unsigned long totalram_pages;
+/* We increase/decrease in batches which fit in a page */
+static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
+#define ARRAY_SIZE(A)  (sizeof(A) / sizeof(A[0]))
+
+struct balloon_stats {
+       /* We aim for 'current allocation' == 'target allocation'. */
+       unsigned long current_pages;
+       unsigned long target_pages;
+       /* We may hit the hard limit in Xen. If we do then we remember it. */
+       unsigned long hard_limit;
+       /*
+        * Drivers may alter the memory reservation independently, but they
+        * must inform the balloon driver so we avoid hitting the hard limit.
+        */
+       unsigned long driver_pages;
+       /* Number of pages in high- and low-memory balloons. */
+       unsigned long balloon_low;
+       unsigned long balloon_high;
+};
 
-/* We may hit the hard limit in Xen. If we do then we remember it. */
-static unsigned long hard_limit;
+static struct balloon_stats balloon_stats;
+#define bs balloon_stats
 
-/*
- * Drivers may alter the memory reservation independently, but they must
- * inform the balloon driver so that we can avoid hitting the hard limit.
- */
-static unsigned long driver_pages;
+SYSCTL_DECL(_dev_xen);
+SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD,
+    &bs.current_pages, 0, "Current allocation");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD,
+    &bs.target_pages, 0, "Target allocation");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD,
+    &bs.driver_pages, 0, "Driver pages");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD,
+    &bs.hard_limit, 0, "Xen hard limit");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD,
+    &bs.balloon_low, 0, "Low-mem balloon");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD,
+    &bs.balloon_high, 0, "High-mem balloon");
 
 struct balloon_entry {
        vm_page_t page;
@@ -72,9 +106,6 @@ struct balloon_entry {
 /* List of ballooned pages, threaded through the mem_map array. */
 static STAILQ_HEAD(,balloon_entry) ballooned_pages;
 
-static unsigned long balloon_low, balloon_high;
-
-
 /* Main work function, always executed in process context. */
 static void balloon_process(void *unused);
 
@@ -89,10 +120,10 @@ balloon_append(vm_page_t page)
 {
        struct balloon_entry *entry;
 
-       entry = malloc(sizeof(struct balloon_entry), M_WAITOK);
-
+       entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK);
+       entry->page = page;
        STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
-       balloon_low++;
+       bs.balloon_low++;
 }
 
 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
@@ -111,13 +142,13 @@ balloon_retrieve(void)
        page = entry->page;
        free(entry, M_DEVBUF);
        
-       balloon_low--;
+       bs.balloon_low--;
 
        return page;
 }
 
 static void 
-balloon_alarm(unsigned long unused)
+balloon_alarm(void *unused)
 {
        wakeup(balloon_process);
 }
@@ -125,17 +156,56 @@ balloon_alarm(unsigned long unused)
 static unsigned long 
 current_target(void)
 {
-       unsigned long target = min(target_pages, hard_limit);
-       if (target > (current_pages + balloon_low + balloon_high))
-               target = current_pages + balloon_low + balloon_high;
+       unsigned long target = min(bs.target_pages, bs.hard_limit);
+       if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
+               target = bs.current_pages + bs.balloon_low + bs.balloon_high;
        return target;
 }
 
+static unsigned long
+minimum_target(void)
+{
+#ifdef XENHVM
+#define max_pfn physmem
+#endif
+       unsigned long min_pages, curr_pages = current_target();
+
+#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
+       /* Simple continuous piecewiese linear function:
+        *  max MiB -> min MiB  gradient
+        *       0         0
+        *      16        16
+        *      32        24
+        *     128        72    (1/2)
+        *     512       168    (1/4)
+        *    2048       360    (1/8)
+        *    8192       552    (1/32)
+        *   32768      1320
+        *  131072      4392
+        */
+       if (max_pfn < MB2PAGES(128))
+               min_pages = MB2PAGES(8) + (max_pfn >> 1);
+       else if (max_pfn < MB2PAGES(512))
+               min_pages = MB2PAGES(40) + (max_pfn >> 2);
+       else if (max_pfn < MB2PAGES(2048))
+               min_pages = MB2PAGES(104) + (max_pfn >> 3);
+       else
+               min_pages = MB2PAGES(296) + (max_pfn >> 5);
+#undef MB2PAGES
+
+       /* Don't enforce growth */
+       return min(min_pages, curr_pages);
+#ifndef CONFIG_XEN
+#undef max_pfn
+#endif
+}
+
 static int 
 increase_reservation(unsigned long nr_pages)
 {
-       unsigned long *mfn_list, pfn, i, flags;
-       struct page   *page;
+       unsigned long  pfn, i;
+       struct balloon_entry *entry;
+       vm_page_t      page;
        long           rc;
        struct xen_memory_reservation reservation = {
                .address_bits = 0,
@@ -143,64 +213,81 @@ increase_reservation(unsigned long nr_pa
                .domid        = DOMID_SELF
        };
 
-       if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
-               nr_pages = PAGE_SIZE / sizeof(unsigned long);
+       if (nr_pages > ARRAY_SIZE(frame_list))
+               nr_pages = ARRAY_SIZE(frame_list);
 
-       mfn_list = (unsigned long *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
-       if (mfn_list == NULL)
-               return ENOMEM;
+       mtx_lock(&balloon_lock);
 
+       for (entry = STAILQ_FIRST(&ballooned_pages), i = 0;
+            i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) {
+               KASSERT(entry, ("ballooned_pages list corrupt"));
+               page = entry->page;
+               frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
+       }
 
-       reservation.extent_start = mfn_list;
+       set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents   = nr_pages;
        rc = HYPERVISOR_memory_op(
-               XENMEM_increase_reservation, &reservation);
+               XENMEM_populate_physmap, &reservation);
        if (rc < nr_pages) {
-               int ret;
-               /* We hit the Xen hard limit: reprobe. */
-               reservation.extent_start = mfn_list;
-               reservation.nr_extents   = rc;
-               ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
-                               &reservation);
-               PANIC_IF(ret != rc);
-               hard_limit = current_pages + rc - driver_pages;
+               if (rc > 0) {
+                       int ret;
+
+                       /* We hit the Xen hard limit: reprobe. */
+                       reservation.nr_extents = rc;
+                       ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+                                       &reservation);
+                       KASSERT(ret == rc, ("HYPERVISOR_memory_op failed"));
+               }
+               if (rc >= 0)
+                       bs.hard_limit = (bs.current_pages + rc -
+                                        bs.driver_pages);
                goto out;
        }
 
        for (i = 0; i < nr_pages; i++) {
                page = balloon_retrieve();
-               PANIC_IF(page == NULL);
+               KASSERT(page, ("balloon_retrieve failed"));
 
                pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
-               PANIC_IF(phys_to_machine_mapping_valid(pfn));
+               KASSERT((xen_feature(XENFEAT_auto_translated_physmap) ||
+                       !phys_to_machine_mapping_valid(pfn)),
+                   ("auto translated physmap but mapping is valid"));
+
+               set_phys_to_machine(pfn, frame_list[i]);
+
+#ifndef XENHVM
+               /* Link back into the page tables if not highmem. */
+               if (pfn < max_low_pfn) {
+                       int ret;
+                       ret = HYPERVISOR_update_va_mapping(
+                               (unsigned long)__va(pfn << PAGE_SHIFT),
+                               pfn_pte_ma(frame_list[i], PAGE_KERNEL),
+                               0);
+                       PASSING(ret == 0,
+                           ("HYPERVISOR_update_va_mapping failed"));
+               }
+#endif
 
-               /* Update P->M and M->P tables. */
-               PFNTOMFN(pfn) = mfn_list[i];
-               xen_machphys_update(mfn_list[i], pfn);
-            
                /* Relinquish the page back to the allocator. */
-               ClearPageReserved(page);
-               set_page_count(page, 1);
+               vm_page_unwire(page, 0);
                vm_page_free(page);
        }
 
-       current_pages += nr_pages;
-       totalram_pages = current_pages;
+       bs.current_pages += nr_pages;
+       //totalram_pages = bs.current_pages;
 
  out:
-       balloon_unlock(flags);
-
-       free((mfn_list);
+       mtx_unlock(&balloon_lock);
 
        return 0;
 }
 
-static int 
+static int
 decrease_reservation(unsigned long nr_pages)
 {
-       unsigned long *mfn_list, pfn, i, flags;
-       struct page   *page;
-       void          *v;
+       unsigned long  pfn, i;
+       vm_page_t      page;
        int            need_sleep = 0;
        int ret;
        struct xen_memory_reservation reservation = {
@@ -209,48 +296,68 @@ decrease_reservation(unsigned long nr_pa
                .domid        = DOMID_SELF
        };
 
-       if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
-               nr_pages = PAGE_SIZE / sizeof(unsigned long);
-
-       mfn_list = (unsigned long *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
-       if (mfn_list == NULL)
-               return ENOMEM;
+       if (nr_pages > ARRAY_SIZE(frame_list))
+               nr_pages = ARRAY_SIZE(frame_list);
 
        for (i = 0; i < nr_pages; i++) {
                int color = 0;
                if ((page = vm_page_alloc(NULL, color++, 
-                                         VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 
-                                         VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == 
NULL) {
+                           VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 
+                           VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
                        nr_pages = i;
                        need_sleep = 1;
                        break;
                }
+
                pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
-               mfn_list[i] = PFNTOMFN(pfn);
+               frame_list[i] = PFNTOMFN(pfn);
+
+#if 0
+               if (!PageHighMem(page)) {
+                       v = phys_to_virt(pfn << PAGE_SHIFT);
+                       scrub_pages(v, 1);
+#ifdef CONFIG_XEN
+                       ret = HYPERVISOR_update_va_mapping(
+                               (unsigned long)v, __pte_ma(0), 0);
+                       BUG_ON(ret);
+#endif
+               }
+#endif
+#ifdef CONFIG_XEN_SCRUB_PAGES
+               else {
+                       v = kmap(page);
+                       scrub_pages(v, 1);
+                       kunmap(page);
+               }
+#endif
        }
 
-       balloon_lock(flags);
+#ifdef CONFIG_XEN
+       /* Ensure that ballooned highmem pages don't have kmaps. */
+       kmap_flush_unused();
+       flush_tlb_all();
+#endif
+
+       mtx_lock(&balloon_lock);
 
        /* No more mappings: invalidate P2M and add to balloon. */
        for (i = 0; i < nr_pages; i++) {
-               pfn = MFNTOPFN(mfn_list[i]);
-               PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
+               pfn = MFNTOPFN(frame_list[i]);
+               set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
                balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT));
        }
 
-       reservation.extent_start = mfn_list;
+       set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents   = nr_pages;
        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
-       PANIC_IF(ret != nr_pages);
+       KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed"));
 
-       current_pages -= nr_pages;
-       totalram_pages = current_pages;
-
-       balloon_unlock(flags);
+       bs.current_pages -= nr_pages;
+       //totalram_pages = bs.current_pages;
 
-       free(mfn_list, M_DEVBUF);
+       mtx_unlock(&balloon_lock);
 
-       return need_sleep;
+       return (need_sleep);
 }
 
 /*
@@ -265,27 +372,24 @@ balloon_process(void *unused)
        int need_sleep = 0;
        long credit;
        
+       mtx_lock(&balloon_mutex);
        for (;;) {
                do {
-                       credit = current_target() - current_pages;
+                       credit = current_target() - bs.current_pages;
                        if (credit > 0)
                                need_sleep = (increase_reservation(credit) != 
0);
                        if (credit < 0)
                                need_sleep = (decrease_reservation(-credit) != 
0);
                        
-#ifndef CONFIG_PREEMPT
-                       if (need_resched())
-                               schedule();
-#endif
                } while ((credit != 0) && !need_sleep);
                
                /* Schedule more work if there is some still to be done. */
-               if (current_target() != current_pages)
-                       timeout(balloon_alarm, NULL, ticks + HZ);
+               if (current_target() != bs.current_pages)
+                       timeout(balloon_alarm, NULL, ticks + hz);
 
-                       msleep(balloon_process, balloon_lock, 0, "balloon", -1);
+               msleep(balloon_process, &balloon_mutex, 0, "balloon", -1);
        }
-
+       mtx_unlock(&balloon_mutex);
 }
 
 /* Resets the Xen limit, sets new target, and kicks off processing. */
@@ -293,8 +397,8 @@ static void 
 set_new_target(unsigned long target)
 {
        /* No need for lock. Not read-modify-write updates. */
-       hard_limit   = ~0UL;
-       target_pages = target;
+       bs.hard_limit   = ~0UL;
+       bs.target_pages = max(target, minimum_target());
        wakeup(balloon_process);
 }
 
@@ -311,8 +415,9 @@ watch_target(struct xenbus_watch *watch,
        unsigned long long new_target;
        int err;
 
-       err = xenbus_scanf(NULL, "memory", "target", "%llu", &new_target);
-       if (err != 1) {
+       err = xenbus_scanf(XBT_NIL, "memory", "target", NULL,
+           "%llu", &new_target);
+       if (err) {
                /* This is ok (for domain0 at least) - so just return */
                return;
        } 
@@ -325,7 +430,7 @@ watch_target(struct xenbus_watch *watch,
 }
 
 static void 
-balloon_init_watcher(void *)
+balloon_init_watcher(void *arg)
 {
        int err;
 
@@ -334,48 +439,60 @@ balloon_init_watcher(void *)
                printf("Failed to set balloon watcher\n");
 
 }
+SYSINIT(balloon_init_watcher, SI_SUB_PSEUDO, SI_ORDER_ANY,
+    balloon_init_watcher, NULL);
 
 static void 
-balloon_init(void *)
+balloon_init(void *arg)
 {
-       unsigned long pfn;
-       struct page *page;
+#ifndef XENHVM
+       vm_page_t page;
+#endif
 
-       IPRINTK("Initialising balloon driver.\n");
+       if (!is_running_on_xen())
+               return;
 
-       if (xen_init() < 0)
-               return -1;
-
-       current_pages = min(xen_start_info->nr_pages, max_pfn);
-       target_pages  = current_pages;
-       balloon_low   = 0;
-       balloon_high  = 0;
-       driver_pages  = 0UL;
-       hard_limit    = ~0UL;
-
-       init_timer(&balloon_timer);
-       balloon_timer.data = 0;
-       balloon_timer.function = balloon_alarm;
+       mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF);
+       mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF);
+
+#ifndef XENHVM
+       bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
+#else
+       bs.current_pages = physmem;
+#endif
+       bs.target_pages  = bs.current_pages;
+       bs.balloon_low   = 0;
+       bs.balloon_high  = 0;
+       bs.driver_pages  = 0UL;
+       bs.hard_limit    = ~0UL;
+
+       kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon");
+//     init_timer(&balloon_timer);
+//     balloon_timer.data = 0;
+//     balloon_timer.function = balloon_alarm;
     
+#ifndef XENHVM
        /* Initialise the balloon with excess memory space. */
        for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
                page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
                balloon_append(page);
        }
+#endif
 
        target_watch.callback = watch_target;
     
-       return 0;
+       return;
 }
+SYSINIT(balloon_init, SI_SUB_PSEUDO, SI_ORDER_ANY, balloon_init, NULL);
+
+void balloon_update_driver_allowance(long delta);
 
 void 
 balloon_update_driver_allowance(long delta)
 {
-       unsigned long flags;
-
-       balloon_lock(flags);
-       driver_pages += delta;
-       balloon_unlock(flags);
+       mtx_lock(&balloon_lock);
+       bs.driver_pages += delta;
+       mtx_unlock(&balloon_lock);
 }
 
 #if 0
@@ -393,17 +510,18 @@ static int dealloc_pte_fn(
        set_pte_at(&init_mm, addr, pte, __pte_ma(0));
        set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
-       PANIC_IF(ret != 1);
+       KASSERT(ret == 1, ("HYPERVISOR_memory_op failed"));
        return 0;
 }
 
 #endif
+
+#if 0
 vm_page_t
 balloon_alloc_empty_page_range(unsigned long nr_pages)
 {
-       unsigned long flags;
        vm_page_t pages;
-       int i;
+       int i, rc;
        unsigned long *mfn_list;
        struct xen_memory_reservation reservation = {
                .address_bits = 0,
@@ -422,7 +540,9 @@ balloon_alloc_empty_page_range(unsigned 
                PFNTOMFN(i) = INVALID_P2M_ENTRY;
                reservation.extent_start = mfn_list;
                reservation.nr_extents = nr_pages;
-               PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, 
&reservation) != nr_pages);
+               rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+                   &reservation);
+               KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed"));
        }
 
        current_pages -= nr_pages;
@@ -435,12 +555,11 @@ balloon_alloc_empty_page_range(unsigned 
 void 
 balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages)
 {
-       unsigned long i, flags;
+       unsigned long i;
 
        for (i = 0; i < nr_pages; i++)
                balloon_append(page + i);
 
        wakeup(balloon_process);
 }
-
 #endif

Modified: head/sys/dev/xen/blkfront/blkfront.c
==============================================================================
--- head/sys/dev/xen/blkfront/blkfront.c        Wed Mar 11 14:55:04 2009        
(r189698)
+++ head/sys/dev/xen/blkfront/blkfront.c        Wed Mar 11 15:30:12 2009        
(r189699)
@@ -40,17 +40,17 @@ __FBSDID("$FreeBSD$");
 #include <machine/intr_machdep.h>
 #include <machine/vmparam.h>
 
-#include <xen/hypervisor.h>
 #include <machine/xen/xen-os.h>
+#include <machine/xen/xenfunc.h>
+#include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
 #include <xen/evtchn.h>
+#include <xen/gnttab.h>
 #include <xen/interface/grant_table.h>
 #include <xen/interface/io/protocols.h>
 #include <xen/xenbus/xenbusvar.h>
 
 #include <geom/geom_disk.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/gnttab.h>
 
 #include <dev/xen/blkfront/block.h>
 
@@ -106,7 +106,7 @@ static char * blkif_status_name[] = {
 #endif
 #define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
 #if 0
-#define DPRINTK(fmt, args...) printf("[XEN] %s:%d" fmt ".\n", __FUNCTION__, 
__LINE__,##args)
+#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, 
__LINE__, ##args)
 #else
 #define DPRINTK(fmt, args...) 
 #endif
@@ -138,7 +138,6 @@ pfn_to_mfn(vm_paddr_t pfn)
        return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
 }
 
-
 /*
  * Translate Linux major/minor to an appropriate name and unit
  * number. For HVM guests, this allows us to use the same drive names
@@ -323,17 +322,17 @@ blkfront_probe(device_t dev)
 static int
 blkfront_attach(device_t dev)
 {
-       int err, vdevice, i, unit;
+       int error, vdevice, i, unit;
        struct blkfront_info *info;
        const char *name;
 
        /* FIXME: Use dynamic device id if this is not set. */
-       err = xenbus_scanf(XBT_NIL, xenbus_get_node(dev),
+       error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev),
            "virtual-device", NULL, "%i", &vdevice);
-       if (err) {
-               xenbus_dev_fatal(dev, err, "reading virtual-device");
+       if (error) {
+               xenbus_dev_fatal(dev, error, "reading virtual-device");
                printf("couldn't find virtual device");
-               return (err);
+               return (error);
        }
 
        blkfront_vdevice_to_unit(vdevice, &unit, &name);
@@ -362,9 +361,22 @@ blkfront_attach(device_t dev)
        /* Front end dir is a number, which is used as the id. */
        info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0);
 
-       err = talk_to_backend(dev, info);
-       if (err)
-               return (err);
+       error = talk_to_backend(dev, info);
+       if (error)
+               return (error);
+
+       return (0);
+}
+
+static int
+blkfront_suspend(device_t dev)
+{
+       struct blkfront_info *info = device_get_softc(dev);
+
+       /* Prevent new requests being issued until we fix things up. */
+       mtx_lock(&blkif_io_lock);
+       info->connected = BLKIF_STATE_SUSPENDED;
+       mtx_unlock(&blkif_io_lock);
 
        return (0);
 }
@@ -375,16 +387,14 @@ blkfront_resume(device_t dev)
        struct blkfront_info *info = device_get_softc(dev);
        int err;
 
-       DPRINTK("blkfront_resume: %s\n", dev->nodename);
+       DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
 
        blkif_free(info, 1);
-
        err = talk_to_backend(dev, info);
-
        if (info->connected == BLKIF_STATE_SUSPENDED && !err)
                blkif_recover(info);
 
-       return err;
+       return (err);
 }
 
 /* Common code used when first setting up, and when resuming. */
@@ -425,6 +435,7 @@ talk_to_backend(device_t dev, struct blk
                message = "writing protocol";
                goto abort_transaction;
        }
+
        err = xenbus_transaction_end(xbt, 0);
        if (err) {
                if (err == EAGAIN)
@@ -462,8 +473,8 @@ setup_blkring(device_t dev, struct blkfr
        SHARED_RING_INIT(sring);
        FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
 
-       error = xenbus_grant_ring(dev, (vtomach(info->ring.sring) >> 
PAGE_SHIFT),
-               &info->ring_ref);
+       error = xenbus_grant_ring(dev,
+           (vtomach(info->ring.sring) >> PAGE_SHIFT), &info->ring_ref);
        if (error) {
                free(sring, M_DEVBUF);
                info->ring.sring = NULL;
@@ -471,11 +482,11 @@ setup_blkring(device_t dev, struct blkfr
        }
        
        error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
-               "xbd", (driver_intr_t *)blkif_int, info,
-                                       INTR_TYPE_BIO | INTR_MPSAFE, 
&info->irq);
+           "xbd", (driver_intr_t *)blkif_int, info,
+           INTR_TYPE_BIO | INTR_MPSAFE, &info->irq);
        if (error) {
                xenbus_dev_fatal(dev, error,
-                                "bind_evtchn_to_irqhandler failed");
+                   "bind_evtchn_to_irqhandler failed");
                goto fail;
        }
 
@@ -494,7 +505,7 @@ blkfront_backend_changed(device_t dev, X
 {
        struct blkfront_info *info = device_get_softc(dev);
 
-       DPRINTK("blkfront:backend_changed.\n");
+       DPRINTK("backend_state=%d\n", backend_state);
 
        switch (backend_state) {
        case XenbusStateUnknown:
@@ -707,7 +718,7 @@ blkif_open(struct disk *dp)
        struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
 
        if (sc == NULL) {
-               printk("xb%d: not found", sc->xb_unit);
+               printf("xb%d: not found", sc->xb_unit);
                return (ENXIO);
        }
 
@@ -1019,9 +1030,11 @@ blkif_recover(struct blkfront_info *info
        blkif_request_t *req;
        struct blk_shadow *copy;
 
+       if (!info->sc)
+               return;
+
        /* Stage 1: Make a safe copy of the shadow state. */
        copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, 
M_NOWAIT|M_ZERO);
-       PANIC_IF(copy == NULL);
        memcpy(copy, info->shadow, sizeof(info->shadow));
 
        /* Stage 2: Set up free list. */

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to