On Wed, May 10, 2017 at 10:09:59PM +1000, David Gwynne wrote:
> On Mon, May 08, 2017 at 11:03:58AM +1000, David Gwynne wrote:
> > on modern sparc64s (think fire or sparc enterprise Mx000 boxes),
> > setting up and tearing down the translation table entries (TTEs)
> > is very expensive. so expensive that the cost of doing it for disk
> > io has a noticable impact on compile times.
> >
> > now that there's a BUS_DMA_64BIT flag, we can use that to decide
> > to bypass the iommu for devices that set that flag, therefore
> > avoiding the cost of handling the TTEs.
> >
> > the following diff adds support for bypass mappings to the iommu
> > code on sparc64. it's based on a diff from kettenis@ back in 2009.
> > the main changes are around coping with the differences between
> > schizo/psycho and fire/oberon.
> >
> > the differences between the chips are now represented by a iommu_hw
> > struct. these differences include how to enable the iommu (now via
> > a function pointer), and masks for bypass addresses.
> >
> > ive tested this on oberon (on an m4000) and schizo (on a v880).
> > however, the bypass code isnt working on fire (v245s). to cope with
> > that for now, the iommu_hw struct lets drivers mask flag bits that
> > are handled when creating a dmamap. this means fire boards will
> > ignore BUS_DMA_64BIT until i can figure out whats wrong with them.
>
> i figured it out. it turns out Fire was working fine. however,
> enabling 64bit dva on the onboard devices didnt work because the
> serverworks/broadcom pcie to pcix bridge can only handle dma addresses
> in the low 40 bits. because the fire bypass window is higher than
> this, the bridge would choke and things stopped working.
>
> the updated diff attempts to handle this. basically when probing
> the bridge, the platform creates a custom dma tag for it. this tag
> intercets bus_dmamap_create and clears the BUS_DMA_64BIT flag before
> handing it up to the parent bridge, which is pyro in my situation.
> it looks like early sun4v boxes could make use of this too.
>
> > i have not tested this on psycho yet. if anyone has such a machine
> > and is willing to work with me to figure it out, please talk to me.
>
> i still dont have psycho reports.
Would anyone object if I committed this? I've been running it for the
last release or two without issues, but with significant improvements in
performance on the machines involved.
> Index: dev/iommu.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/iommu.c,v
> retrieving revision 1.74
> diff -u -p -r1.74 iommu.c
> --- dev/iommu.c 30 Apr 2017 16:45:45 -0000 1.74
> +++ dev/iommu.c 10 May 2017 12:00:09 -0000
> @@ -100,6 +100,25 @@ void iommu_iomap_clear_pages(struct iomm
> void _iommu_dvmamap_sync(bus_dma_tag_t, bus_dma_tag_t, bus_dmamap_t,
> bus_addr_t, bus_size_t, int);
>
> +void iommu_hw_enable(struct iommu_state *);
> +
> +const struct iommu_hw iommu_hw_default = {
> + .ihw_enable = iommu_hw_enable,
> +
> + .ihw_dvma_pa = IOTTE_PAMASK,
> +
> + .ihw_bypass = 0x3fffUL << 50,
> + .ihw_bypass_nc = 0,
> + .ihw_bypass_ro = 0,
> +};
> +
> +void
> +iommu_hw_enable(struct iommu_state *is)
> +{
> + IOMMUREG_WRITE(is, iommu_tsb, is->is_ptsb);
> + IOMMUREG_WRITE(is, iommu_cr, IOMMUCR_EN | (is->is_tsbsize << 16));
> +}
> +
> /*
> * Initiate an STC entry flush.
> */
> @@ -125,7 +144,8 @@ iommu_strbuf_flush(struct strbuf_ctl *sb
> * - create a private DVMA map.
> */
> void
> -iommu_init(char *name, struct iommu_state *is, int tsbsize, u_int32_t
> iovabase)
> +iommu_init(char *name, const struct iommu_hw *ihw, struct iommu_state *is,
> + int tsbsize, u_int32_t iovabase)
> {
> psize_t size;
> vaddr_t va;
> @@ -149,13 +169,9 @@ iommu_init(char *name, struct iommu_stat
> * be hard-wired, so we read the start and size from the PROM and
> * just use those values.
> */
> - if (strncmp(name, "pyro", 4) == 0) {
> - is->is_cr = IOMMUREG_READ(is, iommu_cr);
> - is->is_cr &= ~IOMMUCR_FIRE_BE;
> - is->is_cr |= (IOMMUCR_FIRE_SE | IOMMUCR_FIRE_CM_EN |
> - IOMMUCR_FIRE_TE);
> - } else
> - is->is_cr = IOMMUCR_EN;
> +
> + is->is_hw = ihw;
> +
> is->is_tsbsize = tsbsize;
> if (iovabase == (u_int32_t)-1) {
> is->is_dvmabase = IOTSB_VSTART(is->is_tsbsize);
> @@ -237,15 +253,6 @@ iommu_init(char *name, struct iommu_stat
> mtx_init(&is->is_mtx, IPL_HIGH);
>
> /*
> - * Set the TSB size. The relevant bits were moved to the TSB
> - * base register in the PCIe host bridges.
> - */
> - if (strncmp(name, "pyro", 4) == 0)
> - is->is_ptsb |= is->is_tsbsize;
> - else
> - is->is_cr |= (is->is_tsbsize << 16);
> -
> - /*
> * Now actually start up the IOMMU.
> */
> iommu_reset(is);
> @@ -262,10 +269,7 @@ iommu_reset(struct iommu_state *is)
> {
> int i;
>
> - IOMMUREG_WRITE(is, iommu_tsb, is->is_ptsb);
> -
> - /* Enable IOMMU */
> - IOMMUREG_WRITE(is, iommu_cr, is->is_cr);
> + (*is->is_hw->ihw_enable)(is);
>
> for (i = 0; i < 2; ++i) {
> struct strbuf_ctl *sb = is->is_sb[i];
> @@ -280,7 +284,7 @@ iommu_reset(struct iommu_state *is)
> printf(", STC%d enabled", i);
> }
>
> - if (is->is_flags & IOMMU_FLUSH_CACHE)
> + if (ISSET(is->is_hw->ihw_flags, IOMMU_HW_FLUSH_CACHE))
> IOMMUREG_WRITE(is, iommu_cache_invalidate, -1ULL);
> }
>
> @@ -433,7 +437,7 @@ iommu_extract(struct iommu_state *is, bu
> if (dva >= is->is_dvmabase && dva <= is->is_dvmaend)
> tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
>
> - return (tte & IOTTE_PAMASK);
> + return (tte & is->is_hw->ihw_dvma_pa);
> }
>
> /*
> @@ -601,6 +605,7 @@ iommu_dvmamap_create(bus_dma_tag_t t, bu
> {
> int ret;
> bus_dmamap_t map;
> + struct iommu_state *is = sb->sb_iommu;
> struct iommu_map_state *ims;
>
> BUS_DMA_FIND_PARENT(t, _dmamap_create);
> @@ -610,6 +615,12 @@ iommu_dvmamap_create(bus_dma_tag_t t, bu
> if (ret)
> return (ret);
>
> + if (flags & BUS_DMA_64BIT) {
> + map->_dm_cookie = is;
> + *dmamap = map;
> + return (0);
> + }
> +
> ims = iommu_iomap_create(atop(round_page(size)));
>
> if (ims == NULL) {
> @@ -641,8 +652,10 @@ iommu_dvmamap_destroy(bus_dma_tag_t t, b
> if (map->dm_nsegs)
> bus_dmamap_unload(t0, map);
>
> - if (map->_dm_cookie)
> - iommu_iomap_destroy(map->_dm_cookie);
> + if (!ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
> + if (map->_dm_cookie)
> + iommu_iomap_destroy(map->_dm_cookie);
> + }
> map->_dm_cookie = NULL;
>
> BUS_DMA_FIND_PARENT(t, _dmamap_destroy);
> @@ -667,36 +680,36 @@ iommu_dvmamap_load(bus_dma_tag_t t, bus_
> u_long dvmaddr, sgstart, sgend;
> bus_size_t align, boundary;
> struct iommu_state *is;
> - struct iommu_map_state *ims = map->_dm_cookie;
> + struct iommu_map_state *ims;
> pmap_t pmap;
>
> -#ifdef DIAGNOSTIC
> - if (ims == NULL)
> - panic("iommu_dvmamap_load: null map state");
> -#endif
> -#ifdef DEBUG
> - if (ims->ims_sb == NULL)
> - panic("iommu_dvmamap_load: null sb");
> - if (ims->ims_sb->sb_iommu == NULL)
> - panic("iommu_dvmamap_load: null iommu");
> -#endif /* DEBUG */
> - is = ims->ims_sb->sb_iommu;
> -
> - if (map->dm_nsegs) {
> - /*
> - * Is it still in use? _bus_dmamap_load should have taken care
> - * of this.
> - */
> -#ifdef DIAGNOSTIC
> - panic("iommu_dvmamap_load: map still in use");
> -#endif
> - bus_dmamap_unload(t0, map);
> - }
> -
> /*
> * Make sure that on error condition we return "no valid mappings".
> */
> - map->dm_nsegs = 0;
> + KASSERTMSG(map->dm_nsegs == 0, "map still in use");
> +
> + if (ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
> + unsigned long bypass;
> + int i;
> +
> + is = map->_dm_cookie;
> + bypass = is->is_hw->ihw_bypass;
> +
> + /* Bypass translation by the IOMMU. */
> +
> + BUS_DMA_FIND_PARENT(t, _dmamap_load);
> + err = (*t->_dmamap_load)(t, t0, map, buf, buflen, p, flags);
> + if (err != 0)
> + return (err);
> +
> + for (i = 0; i < map->dm_nsegs; i++)
> + map->dm_segs[i].ds_addr |= bypass;
> +
> + return (0);
> + }
> +
> + ims = map->_dm_cookie;
> + is = ims->ims_sb->sb_iommu;
>
> if (buflen < 1 || buflen > map->_dm_size) {
> DPRINTF(IDB_BUSDMA,
> @@ -876,28 +889,31 @@ iommu_dvmamap_load_raw(bus_dma_tag_t t,
> bus_size_t boundary, align;
> u_long dvmaddr, sgstart, sgend;
> struct iommu_state *is;
> - struct iommu_map_state *ims = map->_dm_cookie;
> + struct iommu_map_state *ims;
>
> -#ifdef DIAGNOSTIC
> - if (ims == NULL)
> - panic("iommu_dvmamap_load_raw: null map state");
> -#endif
> -#ifdef DEBUG
> - if (ims->ims_sb == NULL)
> - panic("iommu_dvmamap_load_raw: null sb");
> - if (ims->ims_sb->sb_iommu == NULL)
> - panic("iommu_dvmamap_load_raw: null iommu");
> -#endif /* DEBUG */
> - is = ims->ims_sb->sb_iommu;
> + KASSERTMSG(map->dm_nsegs == 0, "map stil in use");
>
> - if (map->dm_nsegs) {
> - /* Already in use?? */
> -#ifdef DIAGNOSTIC
> - panic("iommu_dvmamap_load_raw: map still in use");
> -#endif
> - bus_dmamap_unload(t0, map);
> + if (ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
> + unsigned long bypass;
> +
> + is = map->_dm_cookie;
> + bypass = is->is_hw->ihw_bypass;
> +
> + /* Bypass translation by the IOMMU. */
> + for (i = 0; i < nsegs; i++) {
> + map->dm_segs[i].ds_addr = bypass | segs[i].ds_addr;
> + map->dm_segs[i].ds_len = segs[i].ds_len;
> + }
> +
> + map->dm_nsegs = nsegs;
> + map->dm_mapsize = size;
> +
> + return (0);
> }
>
> + ims = map->_dm_cookie;
> + is = ims->ims_sb->sb_iommu;
> +
> /*
> * A boundary presented to bus_dmamem_alloc() takes precedence
> * over boundary in the map.
> @@ -1088,11 +1104,6 @@ iommu_dvmamap_append_range(bus_dma_tag_t
> bus_dma_segment_t *seg = NULL;
> int i = map->dm_nsegs;
>
> -#ifdef DEBUG
> - if (ims == NULL)
> - panic("iommu_dvmamap_append_range: null map state");
> -#endif
> -
> sgstart = iommu_iomap_translate(ims, pa);
> sgend = sgstart + length - 1;
>
> @@ -1298,20 +1309,17 @@ void
> iommu_dvmamap_unload(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map)
> {
> struct iommu_state *is;
> - struct iommu_map_state *ims = map->_dm_cookie;
> + struct iommu_map_state *ims;
> bus_addr_t dvmaddr = map->_dm_dvmastart;
> bus_size_t sgsize = map->_dm_dvmasize;
> int error;
>
> -#ifdef DEBUG
> - if (ims == NULL)
> - panic("iommu_dvmamap_unload: null map state");
> - if (ims->ims_sb == NULL)
> - panic("iommu_dvmamap_unload: null sb");
> - if (ims->ims_sb->sb_iommu == NULL)
> - panic("iommu_dvmamap_unload: null iommu");
> -#endif /* DEBUG */
> + if (ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
> + bus_dmamap_unload(t->_parent, map);
> + return;
> + }
>
> + ims = map->_dm_cookie;
> is = ims->ims_sb->sb_iommu;
>
> /* Flush the iommu */
> @@ -1488,7 +1496,7 @@ iommu_dvmamap_print_map(bus_dma_tag_t t,
> break;
> }
>
> - if (map->_dm_cookie) {
> + if (!ISSET(map->_dm_flags, BUS_DMA_64BIT) && map->_dm_cookie != NULL) {
> struct iommu_map_state *ims = map->_dm_cookie;
> struct iommu_page_map *ipm = &ims->ims_map;
>
> @@ -1546,19 +1554,19 @@ void
> iommu_dvmamap_sync(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
> bus_addr_t offset, bus_size_t len, int ops)
> {
> - struct iommu_map_state *ims = map->_dm_cookie;
> + struct iommu_map_state *ims;
>
> -#ifdef DIAGNOSTIC
> - if (ims == NULL)
> - panic("iommu_dvmamap_sync: null map state");
> - if (ims->ims_sb == NULL)
> - panic("iommu_dvmamap_sync: null sb");
> - if (ims->ims_sb->sb_iommu == NULL)
> - panic("iommu_dvmamap_sync: null iommu");
> -#endif
> if (len == 0)
> return;
>
> + if (map->_dm_flags & BUS_DMA_64BIT) {
> + if (ops & (BUS_DMASYNC_PREWRITE | BUS_DMASYNC_POSTREAD))
> + membar(MemIssue);
> + return;
> + }
> +
> + ims = map->_dm_cookie;
> +
> if (ops & BUS_DMASYNC_PREWRITE)
> membar(MemIssue);
>
> @@ -1622,9 +1630,13 @@ iommu_dvmamem_alloc(bus_dma_tag_t t, bus
> "bound %llx segp %p flags %d\n", (unsigned long long)size,
> (unsigned long long)alignment, (unsigned long long)boundary,
> segs, flags));
> +
> + if ((flags & BUS_DMA_64BIT) == 0)
> + flags |= BUS_DMA_DVMA;
> +
> BUS_DMA_FIND_PARENT(t, _dmamem_alloc);
> return ((*t->_dmamem_alloc)(t, t0, size, alignment, boundary,
> - segs, nsegs, rsegs, flags | BUS_DMA_DVMA));
> + segs, nsegs, rsegs, flags));
> }
>
> void
> @@ -1763,7 +1775,7 @@ iommu_iomap_load_map(struct iommu_state
>
> /* Flush cache if necessary. */
> slot = IOTSBSLOT(e->ipe_va, is->is_tsbsize);
> - if (is->is_flags & IOMMU_FLUSH_CACHE &&
> + if (ISSET(is->is_hw->ihw_flags, IOMMU_HW_FLUSH_CACHE) &&
> (i == (ipm->ipm_pagecnt - 1) || (slot % 8) == 7))
> IOMMUREG_WRITE(is, iommu_cache_flush,
> is->is_ptsb + slot * 8);
> @@ -1788,7 +1800,7 @@ iommu_iomap_unload_map(struct iommu_stat
>
> /* Flush cache if necessary. */
> slot = IOTSBSLOT(e->ipe_va, is->is_tsbsize);
> - if (is->is_flags & IOMMU_FLUSH_CACHE &&
> + if (ISSET(is->is_hw->ihw_flags, IOMMU_HW_FLUSH_CACHE) &&
> (i == (ipm->ipm_pagecnt - 1) || (slot % 8) == 7))
> IOMMUREG_WRITE(is, iommu_cache_flush,
> is->is_ptsb + slot * 8);
> Index: dev/iommureg.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/iommureg.h,v
> retrieving revision 1.17
> diff -u -p -r1.17 iommureg.h
> --- dev/iommureg.h 17 Aug 2012 20:46:50 -0000 1.17
> +++ dev/iommureg.h 10 May 2017 12:00:09 -0000
> @@ -90,10 +90,11 @@ struct iommu_strbuf {
> #define IOMMUCR_DE 0x000000000000000002LL /* Diag enable */
> #define IOMMUCR_EN 0x000000000000000001LL /* Enable IOMMU */
>
> -#define IOMMUCR_FIRE_SE 0x000000000000000400LL /* Snoop enable
> */
> -#define IOMMUCR_FIRE_CM_EN 0x000000000000000300LL /* Cache mode enable */
> -#define IOMMUCR_FIRE_BE 0x000000000000000002LL /* Bypass
> enable */
> -#define IOMMUCR_FIRE_TE 0x000000000000000001LL /* Translation
> enabled */
> +#define IOMMUCR_FIRE_PD 0x000000000000001000UL /* Process
> disable */
> +#define IOMMUCR_FIRE_SE 0x000000000000000400UL /* Snoop enable
> */
> +#define IOMMUCR_FIRE_CM_EN 0x000000000000000300UL /* Cache mode enable */
> +#define IOMMUCR_FIRE_BE 0x000000000000000002UL /* Bypass
> enable */
> +#define IOMMUCR_FIRE_TE 0x000000000000000001UL /* Translation
> enabled */
>
> /*
> * IOMMU stuff
> Index: dev/iommuvar.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/iommuvar.h,v
> retrieving revision 1.17
> diff -u -p -r1.17 iommuvar.h
> --- dev/iommuvar.h 4 May 2016 18:26:12 -0000 1.17
> +++ dev/iommuvar.h 10 May 2017 12:00:09 -0000
> @@ -100,6 +100,21 @@ struct iommu_map_state {
> };
> #define IOMMU_MAP_STREAM 1
>
> +struct iommu_hw {
> + void (*ihw_enable)(struct iommu_state *);
> +
> + unsigned long ihw_dvma_pa;
> +
> + unsigned long ihw_bypass;
> + unsigned long ihw_bypass_nc; /* non-cached */
> + unsigned long ihw_bypass_ro; /* relaxed ordering */
> +
> + unsigned int ihw_flags;
> +#define IOMMU_HW_FLUSH_CACHE (1 << 0)
> +};
> +
> +extern const struct iommu_hw iommu_hw_default;
> +
> /*
> * per-IOMMU state
> */
> @@ -112,8 +127,7 @@ struct iommu_state {
> int64_t is_cr; /* Control register value */
> struct mutex is_mtx;
> struct extent *is_dvmamap; /* DVMA map for this instance */
> - int is_flags;
> -#define IOMMU_FLUSH_CACHE 0x00000001
> + const struct iommu_hw *is_hw;
>
> struct strbuf_ctl *is_sb[2]; /* Streaming buffers if any */
>
> @@ -126,7 +140,8 @@ struct iommu_state {
> };
>
> /* interfaces for PCI/SBus code */
> -void iommu_init(char *, struct iommu_state *, int, u_int32_t);
> +void iommu_init(char *, const struct iommu_hw *, struct iommu_state *,
> + int, u_int32_t);
> void iommu_reset(struct iommu_state *);
> paddr_t iommu_extract(struct iommu_state *, bus_addr_t);
> int64_t iommu_lookup_tte(struct iommu_state *, bus_addr_t);
> @@ -146,6 +161,7 @@ int iommu_dvmamem_alloc(bus_dma_tag_t, b
> bus_size_t, bus_size_t, bus_dma_segment_t *, int, int *, int);
> void iommu_dvmamem_free(bus_dma_tag_t, bus_dma_tag_t, bus_dma_segment_t *,
> int);
> +
>
> #define IOMMUREG_READ(is, reg) \
> bus_space_read_8((is)->is_bustag, \
> Index: dev/pci_machdep.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/pci_machdep.c,v
> retrieving revision 1.44
> diff -u -p -r1.44 pci_machdep.c
> --- dev/pci_machdep.c 10 May 2014 12:15:19 -0000 1.44
> +++ dev/pci_machdep.c 10 May 2017 12:00:09 -0000
> @@ -58,6 +58,7 @@ int sparc_pci_debug = 0x0;
> #include <machine/openfirm.h>
> #include <dev/pci/pcivar.h>
> #include <dev/pci/pcireg.h>
> +#include <dev/pci/pcidevs.h>
>
> #include <dev/ofw/ofw_pci.h>
>
> @@ -85,6 +86,46 @@ pci_attach_hook(parent, self, pba)
> struct pcibus_attach_args *pba;
> {
> /* Don't do anything */
> +}
> +
> +int
> +pci_bcm_dmamap_create(bus_dma_tag_t dt, bus_dma_tag_t t0, bus_size_t size,
> + int nsegments, bus_size_t maxsegsz, bus_size_t boundary, int flags,
> + bus_dmamap_t *dmamp)
> +{
> + bus_dma_tag_t pdt = dt->_parent;
> +
> + CLR(flags, BUS_DMA_64BIT);
> +
> + return ((*pdt->_dmamap_create)(pdt, t0, size, nsegments, maxsegsz,
> + boundary, flags, dmamp));
> +}
> +
> +int
> +pci_probe_device_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
> +{
> + bus_dma_tag_t dt, pdt;
> +
> + if (pa->pa_id ==
> + PCI_ID_CODE(PCI_VENDOR_RCC, PCI_PRODUCT_RCC_PCIE_PCIX)) {
> + /*
> + * These PCI bridges only support 40bit DVA, so intercept
> + * bus_dmamap_create so we can clear BUS_DMA_64BIT.
> + */
> +
> + dt = malloc(sizeof(*dt), M_DEVBUF, M_NOWAIT | M_ZERO);
> + if (dt == NULL)
> + panic("%s: could not alloc dma tag", __func__);
> +
> + pdt = pa->pa_dmat;
> +
> + dt->_parent = pdt;
> + dt->_dmamap_create = pci_bcm_dmamap_create;
> +
> + pa->pa_dmat = dt;
> + }
> +
> + return (0);
> }
>
> int
> Index: dev/psycho.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/psycho.c,v
> retrieving revision 1.74
> diff -u -p -r1.74 psycho.c
> --- dev/psycho.c 23 Aug 2016 03:28:01 -0000 1.74
> +++ dev/psycho.c 10 May 2017 12:00:09 -0000
> @@ -902,7 +902,7 @@ psycho_iommu_init(struct psycho_softc *s
> panic("couldn't malloc iommu name");
> snprintf(name, 32, "%s dvma", sc->sc_dev.dv_xname);
>
> - iommu_init(name, is, tsbsize, iobase);
> + iommu_init(name, &iommu_hw_default, is, tsbsize, iobase);
> }
>
> /*
> Index: dev/pyro.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/pyro.c,v
> retrieving revision 1.30
> diff -u -p -r1.30 pyro.c
> --- dev/pyro.c 20 Dec 2016 13:40:50 -0000 1.30
> +++ dev/pyro.c 10 May 2017 12:00:09 -0000
> @@ -131,6 +131,30 @@ int pyro_msi_eq_intr(void *);
> int pyro_dmamap_create(bus_dma_tag_t, bus_dma_tag_t, bus_size_t, int,
> bus_size_t, bus_size_t, int, bus_dmamap_t *);
>
> +void pyro_iommu_enable(struct iommu_state *);
> +
> +const struct iommu_hw iommu_hw_fire = {
> + .ihw_enable = pyro_iommu_enable,
> +
> + .ihw_dvma_pa = 0x000007ffffffffffUL,
> +
> + .ihw_bypass = 0xfffc000000000000UL,
> + .ihw_bypass_nc = 0x0000080000000000UL,
> + .ihw_bypass_ro = 0,
> +};
> +
> +const struct iommu_hw iommu_hw_oberon = {
> + .ihw_enable = pyro_iommu_enable,
> +
> + .ihw_dvma_pa = 0x00007fffffffffffUL,
> +
> + .ihw_bypass = 0x7ffc000000000000UL,
> + .ihw_bypass_nc = 0x0000800000000000UL,
> + .ihw_bypass_ro = 0x8000000000000000UL,
> +
> + .ihw_flags = IOMMU_HW_FLUSH_CACHE,
> +};
> +
> #ifdef DDB
> void pyro_xir(void *, int);
> #endif
> @@ -266,6 +290,7 @@ pyro_init_iommu(struct pyro_softc *sc, s
> int tsbsize = 7;
> u_int32_t iobase = -1;
> char *name;
> + const struct iommu_hw *ihw = &iommu_hw_fire;
>
> is->is_bustag = sc->sc_bust;
>
> @@ -282,11 +307,23 @@ pyro_init_iommu(struct pyro_softc *sc, s
> panic("couldn't malloc iommu name");
> snprintf(name, 32, "%s dvma", sc->sc_dv.dv_xname);
>
> - /* On Oberon, we need to flush the cache. */
> if (sc->sc_oberon)
> - is->is_flags |= IOMMU_FLUSH_CACHE;
> + ihw = &iommu_hw_oberon;
> +
> + iommu_init(name, ihw, is, tsbsize, iobase);
> +}
> +
> +void
> +pyro_iommu_enable(struct iommu_state *is)
> +{
> + unsigned long cr;
> +
> + cr = IOMMUREG_READ(is, iommu_cr);
> + cr |= IOMMUCR_FIRE_BE | IOMMUCR_FIRE_SE | IOMMUCR_FIRE_CM_EN |
> + IOMMUCR_FIRE_TE;
>
> - iommu_init(name, is, tsbsize, iobase);
> + IOMMUREG_WRITE(is, iommu_tsb, is->is_ptsb | is->is_tsbsize);
> + IOMMUREG_WRITE(is, iommu_cr, cr);
> }
>
> void
> Index: dev/sbus.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/sbus.c,v
> retrieving revision 1.44
> diff -u -p -r1.44 sbus.c
> --- dev/sbus.c 19 Sep 2015 21:07:04 -0000 1.44
> +++ dev/sbus.c 10 May 2017 12:00:09 -0000
> @@ -349,7 +349,7 @@ sbus_mb_attach(struct device *parent, st
> snprintf(name, 32, "%s dvma", sc->sc_dev.dv_xname);
>
> printf("%s: ", sc->sc_dev.dv_xname);
> - iommu_init(name, &sc->sc_is, 0, -1);
> + iommu_init(name, &iommu_hw_default, &sc->sc_is, 0, -1);
>
> /* Initialize Starfire PC interrupt translation. */
> if (OF_getprop(findroot(), "name", buf, sizeof(buf)) > 0 &&
> Index: dev/schizo.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/dev/schizo.c,v
> retrieving revision 1.67
> diff -u -p -r1.67 schizo.c
> --- dev/schizo.c 23 Aug 2016 03:28:01 -0000 1.67
> +++ dev/schizo.c 10 May 2017 12:00:09 -0000
> @@ -451,7 +451,7 @@ schizo_init_iommu(struct schizo_softc *s
> "using iobase=0x%x, tsbsize=%d\n", iobase, tsbsize));
> }
>
> - iommu_init(name, is, tsbsize, iobase);
> + iommu_init(name, &iommu_hw_default, is, tsbsize, iobase);
> }
>
> int
> Index: include/pci_machdep.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/sparc64/include/pci_machdep.h,v
> retrieving revision 1.33
> diff -u -p -r1.33 pci_machdep.h
> --- include/pci_machdep.h 4 May 2016 14:30:01 -0000 1.33
> +++ include/pci_machdep.h 10 May 2017 12:00:09 -0000
> @@ -74,10 +74,13 @@ struct sparc_pci_chipset {
> pcireg_t (*conf_read)(pci_chipset_tag_t, pcitag_t, int);
> void (*conf_write)(pci_chipset_tag_t, pcitag_t, int, pcireg_t);
> int (*intr_map)(struct pci_attach_args *, pci_intr_handle_t *);
> + int (*probe_device_hook)(void *, struct pci_attach_args *);
> };
>
> void pci_attach_hook(struct device *, struct device *,
> struct pcibus_attach_args *);
> +int pci_probe_device_hook(pci_chipset_tag_t,
> + struct pci_attach_args *);
> int pci_bus_maxdevs(pci_chipset_tag_t, int);
> pcitag_t pci_make_tag(pci_chipset_tag_t, int, int, int);
> void pci_decompose_tag(pci_chipset_tag_t, pcitag_t, int *, int *,
> @@ -102,8 +105,6 @@ int sparc64_pci_enumerate_bus(struct pc
> struct pci_attach_args *);
>
> #define PCI_MACHDEP_ENUMERATE_BUS sparc64_pci_enumerate_bus
> -
> -#define pci_probe_device_hook(c, a) (0)
>
> #define pci_min_powerstate(c, t) (PCI_PMCSR_STATE_D3)
> #define pci_set_powerstate_md(c, t, s, p)