the following diff make mfi(4) use 64-bit frames, and support 64-bit
dma addresses.  these changes are based on freebsd's mfi(4).  however,
freebsd only uses 64-bit frames 'if (sizeof(bus_addr_t)) == 8',
whereas this patch uses 64-bit frames unconditionally, for both 32-bit
and 64-bit platforms.  I did it unconditionally, because it makes the
code a bit simpler.

according to my tests, this does have a slightly negative speed
impact on i386.

I've tested it with the following script.

#!/bin/sh

for i in 1 2 3
do
        cd /usr/obj
        sudo rm -rf *

        cd /usr/src
        time (make -j4 obj > /dev/null 2>&1 && \
                make -j4 includes > /dev/null 2>&1 && \
                make -j4 build > /dev/null 2>&1)
done


I ran this twice on each combination of i386/amd64, current code/with
patch.  these are the results (numbers after each "run" are the mean
averages for that run):

i386

  32-bit mfi frames (current code)

  run 1
   32m11.25s real    33m48.65s user    10m42.95s system
   32m25.02s real    33m51.56s user    10m50.86s system
   32m32.85s real    33m51.49s user    10m54.01s system

   32m23.04s         33m50.57s         10m49.27s

  run 2
   32m32.08s real    33m51.22s user    10m49.67s system
   32m26.76s real    33m47.27s user    10m52.08s system
   32m34.42s real    33m46.22s user    10m51.88s system

   32m31.09s         33m48.24s         10m51.21s

  64-bit mfi frames (with patch)

  run 1
   32m39.15s real    33m38.95s user    10m59.97s system
   32m24.67s real    33m36.85s user    11m6.44s system
   32m25.48s real    33m35.04s user    11m3.67s system

   32m29.76s         33m36.95s         11m3.20s

  run 2
   32m25.55s real    33m39.58s user    11m6.11s system
   33m04.37s real    33m34.70s user    11m9.84s system
   32m33.54s real    33m41.38s user    11m7.65s system

   32m41.15s         33m38.55s         11m7.87s


amd64

  32-bit mfi frames (current code)

  run 1
   17m43.44s real    17m49.14s user     8m19.46s system
   17m51.65s real    17m49.07s user     8m29.63s system
   17m50.89s real    17m49.28s user     8m28.45s system

   17m48.66s         17m49.16s          8m25.85s

  run 2
   17m49.82s real    17m47.05s user     8m32.91s system
   17m49.68s real    17m51.12s user     8m27.38s system
   17m51.37s real    17m47.93s user     8m33.09s system

   17m50.29s         17m48.70s          8m31.13s

  64-bit mfi frames (with patch)

  run 1
   17m44.04s real    17m51.22s user     8m21.02s system
   17m51.86s real    17m51.00s user     8m28.67s system
   17m51.87s real    17m50.50s user     8m30.44s system

   17m49.26s         17m51.04s          8m26.71s

  run 2
   17m54.74s real    17m54.06s user     8m29.78s system
   17m53.51s real    17m49.95s user     8m33.98s system
   17m50.85s real    17m49.76s user     8m32.85s system

   17m53.03s         17m51.26s          8m32.20s


so, the question is, is the impact on i386 enough to warrant using
32-bit frames on 32-bit platforms?  if so, should this be decided
at runtime or compile time?

any other thoughts?

-- 
jake...@sdf.lonestar.org
SDF Public Access UNIX System - http://sdf.lonestar.org

Index: mfi.c
===================================================================
RCS file: /cvs/src/sys/dev/ic/mfi.c,v
retrieving revision 1.114
diff -u -p mfi.c
--- mfi.c       30 Dec 2010 08:53:50 -0000      1.114
+++ mfi.c       14 Mar 2011 20:05:59 -0000
@@ -618,7 +618,7 @@ int
 mfi_attach(struct mfi_softc *sc, enum mfi_iop iop)
 {
        struct scsibus_attach_args saa;
-       uint32_t                status, frames;
+       uint32_t                status, frames, max_sgl;
        int                     i;
 
        switch (iop) {
@@ -648,7 +648,8 @@ mfi_attach(struct mfi_softc *sc, enum mfi_iop iop)
 
        status = mfi_fw_state(sc);
        sc->sc_max_cmds = status & MFI_STATE_MAXCMD_MASK;
-       sc->sc_max_sgl = (status & MFI_STATE_MAXSGL_MASK) >> 16;
+       max_sgl = (status & MFI_STATE_MAXSGL_MASK) >> 16;
+       sc->sc_max_sgl = min(max_sgl, (128 * 1024) / PAGE_SIZE + 1);
        DNPRINTF(MFI_D_MISC, "%s: max commands: %u, max sgl: %u\n",
            DEVNAME(sc), sc->sc_max_cmds, sc->sc_max_sgl);
 
@@ -662,8 +663,7 @@ mfi_attach(struct mfi_softc *sc, enum mfi_iop iop)
        }
 
        /* frame memory */
-       /* we are not doing 64 bit IO so only calculate # of 32 bit frames */
-       frames = (sizeof(struct mfi_sg32) * sc->sc_max_sgl +
+       frames = (sizeof(struct mfi_sgl) * sc->sc_max_sgl +
            MFI_FRAME_SIZE - 1) / MFI_FRAME_SIZE + 1;
        sc->sc_frames_size = frames * MFI_FRAME_SIZE;
        sc->sc_frames = mfi_allocmem(sc, sc->sc_frames_size * sc->sc_max_cmds);
@@ -1105,7 +1105,7 @@ mfi_create_sgl(struct mfi_ccb *ccb, int flags)
        struct mfi_softc        *sc = ccb->ccb_sc;
        struct mfi_frame_header *hdr;
        bus_dma_segment_t       *sgd;
-       union mfi_sgl           *sgl;
+       struct mfi_sgl          *sgl;
        int                     error, i;
 
        DNPRINTF(MFI_D_DMA, "%s: mfi_create_sgl %#x\n", DEVNAME(sc),
@@ -1128,11 +1128,12 @@ mfi_create_sgl(struct mfi_ccb *ccb, int flags)
        hdr = &ccb->ccb_frame->mfr_header;
        sgl = ccb->ccb_sgl;
        sgd = ccb->ccb_dmamap->dm_segs;
+       hdr->mfh_flags |= MFI_FRAME_SGL64;
        for (i = 0; i < ccb->ccb_dmamap->dm_nsegs; i++) {
-               sgl->sg32[i].addr = htole32(sgd[i].ds_addr);
-               sgl->sg32[i].len = htole32(sgd[i].ds_len);
+               sgl[i].addr = htole64((u_int64_t)sgd[i].ds_addr);
+               sgl[i].len = htole64((u_int64_t)sgd[i].ds_len);
                DNPRINTF(MFI_D_DMA, "%s: addr: %#x  len: %#x\n",
-                   DEVNAME(sc), sgl->sg32[i].addr, sgl->sg32[i].len);
+                   DEVNAME(sc), sgl[i].addr, sgl[i].len);
        }
 
        if (ccb->ccb_direction == MFI_DATA_IN) {
@@ -1146,8 +1147,7 @@ mfi_create_sgl(struct mfi_ccb *ccb, int flags)
        }
 
        hdr->mfh_sg_count = ccb->ccb_dmamap->dm_nsegs;
-       /* for 64 bit io make the sizeof a variable to hold whatever sg size */
-       ccb->ccb_frame_size += sizeof(struct mfi_sg32) *
+       ccb->ccb_frame_size += sizeof(struct mfi_sgl) *
            ccb->ccb_dmamap->dm_nsegs;
        ccb->ccb_extra_frames = (ccb->ccb_frame_size - 1) / MFI_FRAME_SIZE;
 
Index: mfireg.h
===================================================================
RCS file: /cvs/src/sys/dev/ic/mfireg.h,v
retrieving revision 1.28
diff -u -p mfireg.h
--- mfireg.h    28 Jan 2009 23:45:12 -0000      1.28
+++ mfireg.h    14 Mar 2011 20:05:59 -0000
@@ -251,21 +251,11 @@ struct mfi_sense {
 } __packed;
 
 /* scatter gather elements */
-struct mfi_sg32 {
-       uint32_t                addr;
-       uint32_t                len;
-} __packed;
-
-struct mfi_sg64 {
+struct mfi_sgl {
        uint64_t                addr;
        uint32_t                len;
 } __packed;
 
-union mfi_sgl {
-       struct mfi_sg32         sg32[1];
-       struct mfi_sg64         sg64[1];
-} __packed;
-
 /* message frame */
 struct mfi_frame_header {
        uint8_t                 mfh_cmd;
@@ -283,12 +273,6 @@ struct mfi_frame_header {
        uint32_t                mfh_data_len;
 } __packed;
 
-union mfi_sgl_frame {
-       struct mfi_sg32         sge32[8];
-       struct mfi_sg64         sge64[5];
-
-} __packed;
-
 struct mfi_init_frame {
        struct mfi_frame_header mif_header;
        uint32_t                mif_qinfo_new_addr_lo;
@@ -317,7 +301,7 @@ struct mfi_io_frame {
        uint32_t                mif_sense_addr_hi;
        uint32_t                mif_lba_lo;
        uint32_t                mif_lba_hi;
-       union mfi_sgl           mif_sgl;
+       struct mfi_sgl          mif_sgl;
 } __packed;
 
 #define MFI_PASS_FRAME_SIZE    48
@@ -326,7 +310,7 @@ struct mfi_pass_frame {
        uint32_t                mpf_sense_addr_lo;
        uint32_t                mpf_sense_addr_hi;
        uint8_t                 mpf_cdb[16];
-       union mfi_sgl           mpf_sgl;
+       struct mfi_sgl          mpf_sgl;
 } __packed;
 
 #define MFI_DCMD_FRAME_SIZE    40
@@ -334,7 +318,7 @@ struct mfi_dcmd_frame {
        struct mfi_frame_header mdf_header;
        uint32_t                mdf_opcode;
        uint8_t                 mdf_mbox[MFI_MBOX_SIZE];
-       union mfi_sgl           mdf_sgl;
+       struct mfi_sgl          mdf_sgl;
 } __packed;
 
 struct mfi_abort_frame {
@@ -349,20 +333,14 @@ struct mfi_abort_frame {
 struct mfi_smp_frame {
        struct mfi_frame_header msf_header;
        uint64_t                msf_sas_addr;
-       union {
-               struct mfi_sg32 sg32[2];
-               struct mfi_sg64 sg64[2];
-       }                       msf_sgl;
+       struct mfi_sgl          sgl[2];
 } __packed;
 
 struct mfi_stp_frame {
        struct mfi_frame_header msf_header;
        uint16_t                msf_fis[10];
        uint32_t                msf_stp_flags;
-       union {
-               struct mfi_sg32 sg32[2];
-               struct mfi_sg64 sg64[2];
-       }                       msf_sgl;
+       struct mfi_sgl          sgl[2];
 } __packed;
 
 union mfi_frame {
Index: mfivar.h
===================================================================
RCS file: /cvs/src/sys/dev/ic/mfivar.h,v
retrieving revision 1.40
diff -u -p mfivar.h
--- mfivar.h    30 Dec 2010 08:53:50 -0000      1.40
+++ mfivar.h    14 Mar 2011 20:05:59 -0000
@@ -66,7 +66,7 @@ struct mfi_ccb {
 
        bus_dmamap_t            ccb_dmamap;
 
-       union mfi_sgl           *ccb_sgl;
+       struct mfi_sgl          *ccb_sgl;
 
        /* data for sgl */
        void                    *ccb_data;

Reply via email to