On 26/04/18(Thu) 23:06, Landry Breuil wrote:
> On Thu, Apr 26, 2018 at 10:01:25PM +0200, Martin Pieuchot wrote:
> > This is the 4th attempt to implement clustering for msdosfs.  See [0]
> > for the previous story.
> > 
> > This version implements the strict necessary to read a maximum of 64k
> > per read.  Unlike the previous version we no longer mix cluster and
> > block numbers.
> > 
> > This speeds up msdosfs reads significantly.  Regress tests are passing.
> > 
> > I'd appreciate more tests and reviews.
> 
> Well, i see a 2.5x dropout in perfs.. rsyncing a 100mb dir from the same
> usb key:
> 
> sd1 at scsibus4 targ 1 lun 0: <Kingston, DataTraveler G2, 1.00> SCSI2 
> 0/direct removable serial.09511624F0417921125B
> sd1: 3689MB, 512 bytes/sector, 7555528 sectors
> 
> without the diff:
> sent 101,634,811 bytes  received 153 bytes  2,540,870.35 bytes/sec
> 
> with the diff:
> sent 101,634,811 bytes  received 153 bytes  936,727.78 bytes/sec
> sent 101,634,811 bytes  received 153 bytes  928,173.19 bytes/sec
> 
> To be analysed ...

Thanks to landry I now understand why we cannot keep using "block numbers"
as logical value to index buffers.

The VFS clustering code assume that the next logical block number correspond
to the next block on disk.  In the case of FAT it is true for cluster.

So I believe we should commit the diff below.

Index: msdosfs/denode.h
===================================================================
RCS file: /cvs/src/sys/msdosfs/denode.h,v
retrieving revision 1.32
diff -u -p -r1.32 denode.h
--- msdosfs/denode.h    13 Jun 2017 18:13:18 -0000      1.32
+++ msdosfs/denode.h    28 Apr 2018 14:47:10 -0000
@@ -142,7 +142,6 @@ struct denode {
        struct vnode *de_devvp; /* vnode of blk dev we live on */
        uint32_t de_flag;               /* flag bits */
        dev_t de_dev;           /* device where direntry lives */
-       daddr_t de_lastr;
        uint32_t de_dirclust;   /* cluster of the directory file containing 
this entry */
        uint32_t de_diroffset;  /* offset of this entry in the directory 
cluster */
        uint32_t de_fndoffset;  /* offset of found dir entry */
Index: msdosfs/msdosfs_fat.c
===================================================================
RCS file: /cvs/src/sys/msdosfs/msdosfs_fat.c,v
retrieving revision 1.31
diff -u -p -r1.31 msdosfs_fat.c
--- msdosfs/msdosfs_fat.c       30 Dec 2017 20:47:00 -0000      1.31
+++ msdosfs/msdosfs_fat.c       28 Apr 2018 14:47:10 -0000
@@ -1020,14 +1020,12 @@ extendfile(struct denode *dep, uint32_t 
                                        bp = getblk(pmp->pm_devvp, cntobn(pmp, 
cn++),
                                                    pmp->pm_bpcluster, 0, 0);
                                else {
-                                       bp = getblk(DETOV(dep), de_cn2bn(pmp, 
frcn++),
+                                       bp = getblk(DETOV(dep), frcn++,
                                            pmp->pm_bpcluster, 0, 0);
                                        /*
                                         * Do the bmap now, as in msdosfs_write
                                         */
-                                       if (pcbmap(dep,
-                                           de_bn2cn(pmp, bp->b_lblkno),
-                                           &bp->b_blkno, 0, 0))
+                                       if (pcbmap(dep, bp->b_lblkno, 
&bp->b_blkno, 0, 0))
                                                bp->b_blkno = -1;
                                        if (bp->b_blkno == -1)
                                                panic("extendfile: pcbmap");
Index: msdosfs/msdosfs_vnops.c
===================================================================
RCS file: /cvs/src/sys/msdosfs/msdosfs_vnops.c,v
retrieving revision 1.118
diff -u -p -r1.118 msdosfs_vnops.c
--- msdosfs/msdosfs_vnops.c     28 Apr 2018 03:13:05 -0000      1.118
+++ msdosfs/msdosfs_vnops.c     28 Apr 2018 14:47:10 -0000
@@ -78,13 +78,13 @@
 
 static uint32_t fileidhash(uint64_t);
 
+int msdosfs_bmaparray(struct vnode *, daddr_t, daddr_t *, int *);
 int msdosfs_kqfilter(void *);
 int filt_msdosfsread(struct knote *, long);
 int filt_msdosfswrite(struct knote *, long);
 int filt_msdosfsvnode(struct knote *, long);
 void filt_msdosfsdetach(struct knote *);
 
-
 /*
  * Some general notes:
  *
@@ -510,18 +510,14 @@ int
 msdosfs_read(void *v)
 {
        struct vop_read_args *ap = v;
-       int error = 0;
-       uint32_t diff;
-       int blsize;
-       int isadir;
-       uint32_t n;
-       long on;
-       daddr_t lbn, rablock, rablkno;
-       struct buf *bp;
        struct vnode *vp = ap->a_vp;
        struct denode *dep = VTODE(vp);
        struct msdosfsmount *pmp = dep->de_pmp;
        struct uio *uio = ap->a_uio;
+       int isadir, error = 0;
+       uint32_t n, diff, size, on;
+       struct buf *bp;
+       daddr_t cn, bn;
 
        /*
         * If they didn't ask for any data, then we are done.
@@ -536,7 +532,8 @@ msdosfs_read(void *v)
                if (uio->uio_offset >= dep->de_FileSize)
                        return (0);
 
-               lbn = de_cluster(pmp, uio->uio_offset);
+               cn = de_cluster(pmp, uio->uio_offset);
+               size = pmp->pm_bpcluster;
                on = uio->uio_offset & pmp->pm_crbomask;
                n = ulmin(pmp->pm_bpcluster - on, uio->uio_resid);
 
@@ -549,31 +546,23 @@ msdosfs_read(void *v)
                if (diff < n)
                        n = diff;
 
-               /* convert cluster # to block # if a directory */
-               if (isadir) {
-                       error = pcbmap(dep, lbn, &lbn, 0, &blsize);
-                       if (error)
-                               return (error);
-               }
                /*
                 * If we are operating on a directory file then be sure to
                 * do i/o with the vnode for the filesystem instead of the
-                * vnode for the directory.
+                * vnode for the directory. In that case, the block number
+                * is physical blocks and not clusters.
                 */
                if (isadir) {
-                       error = bread(pmp->pm_devvp, lbn, blsize, &bp);
+                       /* dirfile-relative cn -> fs-relative bn */
+                       error = pcbmap(dep, cn, &bn, 0, &size);
+                       if (error)
+                               return (error);
+                       error = bread(pmp->pm_devvp, bn, size, &bp);
                } else {
-                       rablock = lbn + 1;
-                       rablkno = de_cn2bn(pmp, rablock);
-                       if (dep->de_lastr + 1 == lbn &&
-                           de_cn2off(pmp, rablock) < dep->de_FileSize)
-                               error = breadn(vp, de_cn2bn(pmp, lbn),
-                                   pmp->pm_bpcluster, &rablkno,
-                                   &pmp->pm_bpcluster, 1, &bp);
+                       if (de_cn2off(pmp, cn + 1) >= dep->de_FileSize)
+                               error = bread(vp, cn, size, &bp);
                        else
-                               error = bread(vp, de_cn2bn(pmp, lbn),
-                                   pmp->pm_bpcluster, &bp);
-                       dep->de_lastr = lbn;
+                               error = bread_cluster(vp, cn, size, &bp);
                }
                n = min(n, pmp->pm_bpcluster - bp->b_resid);
                if (error) {
@@ -602,7 +591,7 @@ msdosfs_write(void *v)
        uint32_t osize;
        int error = 0;
        uint32_t count, lastcn;
-       daddr_t bn;
+       daddr_t cn;
        struct buf *bp;
        int ioflag = ap->a_ioflag;
        struct uio *uio = ap->a_uio;
@@ -679,30 +668,30 @@ msdosfs_write(void *v)
                lastcn = de_clcount(pmp, osize) - 1;
 
        do {
-               if (de_cluster(pmp, uio->uio_offset) > lastcn) {
+               croffset = uio->uio_offset & pmp->pm_crbomask;
+               cn = de_cluster(pmp, uio->uio_offset);
+
+               if (cn > lastcn) {
                        error = ENOSPC;
                        break;
                }
 
-               bn = de_blk(pmp, uio->uio_offset);
-               if ((uio->uio_offset & pmp->pm_crbomask) == 0
-                   && (de_blk(pmp, uio->uio_offset + uio->uio_resid) > 
de_blk(pmp, uio->uio_offset)
-                       || uio->uio_offset + uio->uio_resid >= 
dep->de_FileSize)) {
+               if (croffset == 0 &&
+                   (de_cluster(pmp, uio->uio_offset + uio->uio_resid) > cn ||
+                    (uio->uio_offset + uio->uio_resid) >= dep->de_FileSize)) {
                        /*
                         * If either the whole cluster gets written,
                         * or we write the cluster from its start beyond EOF,
                         * then no need to read data from disk.
                         */
-                       bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0);
+                       bp = getblk(thisvp, cn, pmp->pm_bpcluster, 0, 0);
                        clrbuf(bp);
                        /*
                         * Do the bmap now, since pcbmap needs buffers
                         * for the fat table. (see msdosfs_strategy)
                         */
                        if (bp->b_blkno == bp->b_lblkno) {
-                               error = pcbmap(dep,
-                                              de_bn2cn(pmp, bp->b_lblkno),
-                                              &bp->b_blkno, 0, 0);
+                               error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 
0, 0);
                                if (error)
                                        bp->b_blkno = -1;
                        }
@@ -714,16 +703,16 @@ msdosfs_write(void *v)
                        }
                } else {
                        /*
-                        * The block we need to write into exists, so read it 
in.
+                        * The block we need to write into exists, so
+                        * read it in.
                         */
-                       error = bread(thisvp, bn, pmp->pm_bpcluster, &bp);
+                       error = bread(thisvp, cn, pmp->pm_bpcluster, &bp);
                        if (error) {
                                brelse(bp);
                                break;
                        }
                }
 
-               croffset = uio->uio_offset & pmp->pm_crbomask;
                n = ulmin(uio->uio_resid, pmp->pm_bpcluster - croffset);
                if (uio->uio_offset + n > dep->de_FileSize) {
                        dep->de_FileSize = uio->uio_offset + n;
@@ -1756,7 +1745,7 @@ msdosfs_islocked(void *v)
 
 /*
  * vp  - address of vnode file the file
- * bn  - which cluster we are interested in mapping to a filesystem block 
number.
+ * bn  - which cluster we are interested in mapping to a filesystem block 
number
  * vpp - returns the vnode for the block special file holding the filesystem
  *      containing the file of interest
  * bnp - address of where to return the filesystem relative block number
@@ -1766,19 +1755,51 @@ msdosfs_bmap(void *v)
 {
        struct vop_bmap_args *ap = v;
        struct denode *dep = VTODE(ap->a_vp);
-       struct msdosfsmount *pmp = dep->de_pmp;
 
        if (ap->a_vpp != NULL)
                *ap->a_vpp = dep->de_devvp;
        if (ap->a_bnp == NULL)
                return (0);
-       if (ap->a_runp) {
+
+       return (msdosfs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, ap->a_runp));
+}
+
+int
+msdosfs_bmaparray(struct vnode *vp, daddr_t cn, daddr_t *bnp, int *runp)
+{
+       struct denode *dep = VTODE(vp);
+       struct msdosfsmount *pmp = dep->de_pmp;
+       struct mount *mp;
+       int error, maxrun = 0, run;
+       daddr_t daddr;
+
+       mp = vp->v_mount;
+
+       if (runp) {
                /*
-                * Sequential clusters should be counted here.
+                * XXX
+                * If MAXBSIZE is the largest transfer the disks can handle,
+                * we probably want maxrun to be 1 block less so that we
+                * don't create a block larger than the device can handle.
                 */
-               *ap->a_runp = 0;
+               *runp = 0;
+               maxrun = min(MAXBSIZE / mp->mnt_stat.f_iosize - 1,
+                   pmp->pm_maxcluster - cn);
        }
-       return (pcbmap(dep, de_bn2cn(pmp, ap->a_bn), ap->a_bnp, 0, 0));
+
+       if ((error = pcbmap(dep, cn, bnp, 0, 0)) != 0)
+               return (error);
+
+       for (run = 1; run <= maxrun; run++) {
+               error = pcbmap(dep, cn + run, &daddr, 0, 0);
+               if (error != 0 || (daddr != *bnp + de_cn2bn(pmp, run)))
+                       break;
+       }
+
+       if (runp)
+               *runp = run - 1;
+
+       return (0);
 }
 
 int
@@ -1800,8 +1821,7 @@ msdosfs_strategy(void *v)
         * don't allow files with holes, so we shouldn't ever see this.
         */
        if (bp->b_blkno == bp->b_lblkno) {
-               error = pcbmap(dep, de_bn2cn(dep->de_pmp, bp->b_lblkno),
-                              &bp->b_blkno, 0, 0);
+               error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0);
                if (error)
                        bp->b_blkno = -1;
                if (bp->b_blkno == -1)

Reply via email to