Hello JFS List,

I have created a first patch with TRIM support for the jfs filesystem.
Last week I bought a Crucial M4 512GB and tested the speed for various
filesystems on it. My conclusion was, that I should use JFS, like
always done before ;)

I can post some fs_mark benchmarks later... but first look at this
initial discard stuff... IWRITE_LOCK / IWRITE_UNLOCK and so on are not
correct by now - I think :(

Is there some help from the real kernel hackers on this list available ?


TRIM is supported on both interfaces:
1) mount -o discard -> should work, but I don't know really
2) fstrim /mntpoint -> this was a bit harder, but may also do, what
   it should do...


PS: The current patch does not really issue the TRIM, but it shows
nicely in kernel logs, what it would do... so anyone with an SSD could
help here ;)


Have a Nice Weekand.

-- 
regards, TR
diff -urN jfs-3.4.4-original/ioctl.c jfs-trim/ioctl.c
--- jfs-3.4.4-original/ioctl.c  2012-05-21 00:29:13.000000000 +0200
+++ jfs-trim/ioctl.c    2012-07-14 01:14:37.000000000 +0200
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2006 Herbert Poetzl
  * adapted from Remy Card's ext2/ioctl.c
+ * Portions Copyright (C) Tino Reichardt, 2012
  */
 
 #include <linux/fs.h>
@@ -11,13 +12,16 @@
 #include <linux/mount.h>
 #include <linux/time.h>
 #include <linux/sched.h>
+#include <linux/blkdev.h>
 #include <asm/current.h>
 #include <asm/uaccess.h>
 
+#include "jfs_filsys.h"
+#include "jfs_debug.h"
 #include "jfs_incore.h"
 #include "jfs_dinode.h"
 #include "jfs_inode.h"
-
+#include "jfs_dmap.h"
 
 static struct {
        long jfs_flag;
@@ -123,6 +127,40 @@
                mnt_drop_write_file(filp);
                return err;
        }
+
+       case FITRIM:
+       {
+               struct super_block *sb = inode->i_sb;
+               struct request_queue *q = bdev_get_queue(sb->s_bdev);
+               struct fstrim_range range;
+               s64 ret = 0;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (!blk_queue_discard(q)) {
+                       jfs_warn("FITRIM not supported on device");
+                       return -EOPNOTSUPP;
+               }
+
+               if (copy_from_user(&range, (struct fstrim_range __user *)arg,
+                   sizeof(range)))
+                       return -EFAULT;
+
+               range.minlen = max((unsigned int)range.minlen,
+                                  q->limits.discard_granularity);
+
+               ret = dbDiscard_ioctl(inode, &range);
+               if (ret < 0)
+                       return ret;
+
+               if (copy_to_user((struct fstrim_range __user *)arg, &range,
+                   sizeof(range)))
+                       return -EFAULT;
+
+               return 0;
+       }
+
        default:
                return -ENOTTY;
        }
@@ -142,6 +180,9 @@
        case JFS_IOC_SETFLAGS32:
                cmd = JFS_IOC_SETFLAGS;
                break;
+       case FITRIM:
+               cmd = FITRIM;
+               break;
        }
        return jfs_ioctl(filp, cmd, arg);
 }
diff -urN jfs-3.4.4-original/jfs_dmap.c jfs-trim/jfs_dmap.c
--- jfs-3.4.4-original/jfs_dmap.c       2012-07-12 15:50:04.737752121 +0200
+++ jfs-trim/jfs_dmap.c 2012-07-14 11:17:46.000000000 +0200
@@ -1,5 +1,6 @@
 /*
  *   Copyright (C) International Business Machines Corp., 2000-2004
+ *   Portions Copyright (C) Tino Reichardt, 2012
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -18,6 +19,8 @@
 
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/blkdev.h>
+
 #include "jfs_incore.h"
 #include "jfs_superblock.h"
 #include "jfs_dmap.h"
@@ -65,6 +68,15 @@
  *     to the persistent bitmaps in dmaps) is guarded by (busy) buffers.
  */
 
+/**
+ * warning: disabling this, will issue discard requests to device!
+ * pre-pre-alpha here!
+ *
+ * -> nearly all #ifdef's will go away if aproved by some jfs guru's
+ * /TR 2012-07-14
+ */
+#define DEBUG_DISCARD
+
 #define BMAP_LOCK_INIT(bmp)    mutex_init(&bmp->db_bmaplock)
 #define BMAP_LOCK(bmp)         mutex_lock(&bmp->db_bmaplock)
 #define BMAP_UNLOCK(bmp)       mutex_unlock(&bmp->db_bmaplock)
@@ -104,7 +116,6 @@
 static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
                      int nblocks);
 static int dbMaxBud(u8 * cp);
-s64 dbMapFileSizeToMapSize(struct inode *ipbmap);
 static int blkstol2(s64 nb);
 
 static int cntlz(u32 value);
@@ -118,6 +129,8 @@
 static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i);
 static int dbGetL2AGSize(s64 nblocks);
 
+static void dbIssueDiscard(struct inode *ip, u64 blkno, u64 nblocks);
+static s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen);
 
 /*
  *     buddy table
@@ -311,6 +324,42 @@
        return (0);
 }
 
+/*
+ * NAME:       dbIssueDiscard()
+ *
+ * FUNCTION:   TRIM the specified block range on device, if supported
+ *
+ * PARAMETERS:
+ *     ip      - pointer to in-core inode
+ *     blkno   - starting block number to be trimmed
+ *     nblocks - number of blocks to be trimmed
+ *
+ * RETURN VALUES:
+ *     none
+ *
+ * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
+ */
+static void dbIssueDiscard(struct inode *ip, u64 blkno, u64 nblocks)
+{
+       struct super_block *sb = ip->i_sb;
+       int r = 0;
+
+#ifndef DEBUG_DISCARD
+       r = sb_issue_discard(sb, blkno, nblocks, GFP_NOFS, 0);
+       if (unlikely(r != 0)) {
+               printk(KERN_INFO "JFS: dbIssueDiscard"
+                       "(%p, %llu, %llu, GFP_NOFS, 0) = %d (ERR)\n", sb,
+                       (unsigned long long)blkno,
+                       (unsigned long long)nblocks, r);
+       }
+#else
+       printk(KERN_INFO "JFS, txDiscard: sb_issue_discard"
+               "(%p, %llu, %llu, GFP_NOFS, 0) = %d\n", sb,
+               (unsigned long long)blkno, (unsigned long long)nblocks, r);
+#endif
+
+       return;
+}
 
 /*
  * NAME:       dbFree()
@@ -338,6 +387,7 @@
        s64 lblkno, rem;
        struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
        struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
+       struct super_block *sb = ip->i_sb;
 
        IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
 
@@ -388,6 +438,13 @@
        /* write the last buffer. */
        write_metapage(mp);
 
+       /**
+        * TRIM the blocks, when mounted with discard option
+        */
+       if (JFS_SBI(sb)->flag & JFS_DISCARD) {
+               dbIssueDiscard(ip, blkno, nblocks);
+       }
+
        IREAD_UNLOCK(ipbmap);
 
        return (0);
@@ -1096,7 +1153,6 @@
                /* we were not successful */
                release_metapage(mp);
 
-
        return (rc);
 }
 
@@ -1590,6 +1646,192 @@
 }
 
 
+/*
+ * NAME:       dbDiscardAG()
+ *
+ * FUNCTION:   attempt to discard (TRIM) all free blocks of specific AG
+ *
+ *             algorithm:
+ *             1) allocate blocks, as large as possible and save them
+ *             2) trim all these saved block/length values
+ *             3) mark the blocks free again
+ *
+ *             benefit:
+ *             - we work only on one ag at some time, which is fully blocked
+ *             - reading / writing the fs is possible most time, even on 
trimming
+ *
+ *             downside:
+ *             - we write two times to the dmapctl and dmap pages
+ *             - but for me, this seems the best way, better ideas?
+ *             /TR 2012
+ *
+ * PARAMETERS:
+ *     ip      - pointer to in-core inode
+ *     agno    - ag to trim
+ *     minlen  - minimum value of contiguous blocks
+ *
+ * RETURN VALUES:
+ *     s64     - actual number of blocks trimmed
+ */
+static s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen)
+{
+       struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
+       struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
+       struct super_block *sb = ipbmap->i_sb;
+       s64 nblocks, blkno;
+       u64 trimmed = 0;
+       int rc, l2nb;
+
+       struct range2trim {
+               u64 blkno;
+               u64 nblocks;
+       } *totrim, *tt;
+
+#ifdef DEBUG_DISCARD
+       printk(KERN_INFO "JFS: dbDiscardAG, agno=%d/%d, db_agfree=%lld"
+               ", dn_agsize=%lld, db_used=%lld dn_nfree=%lld\n",
+               agno+1, bmp->db_numag,
+               (signed long long)bmp->db_agfree[agno],
+               (signed long long)bmp->db_agsize,
+               (signed long long)bmp->db_agsize - bmp->db_agfree[agno],
+               (signed long long)bmp->db_nfree);
+#endif
+
+       /* worst value: each free block gets an entry */
+       nblocks = bmp->db_agfree[agno];
+       totrim = kmalloc(sizeof(struct range2trim) * nblocks + 1, GFP_NOFS);
+       if (totrim == NULL) {
+               jfs_error(bmp->db_ipbmap->i_sb,
+                         "dbDiscardAG: no space for trim array");
+               return 0;
+       }
+
+       /* prevent others from writing new stuff here, while trimming */
+       IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
+
+       tt = totrim;
+       while (nblocks >= minlen) {
+               l2nb = BLKSTOL2(nblocks);
+
+               /* 0 = okay, -EIO = fatal, -ENOSPC -> block kleiner */
+               rc = dbAllocAG(bmp, agno, nblocks, l2nb, &blkno);
+               if (rc == 0) {
+                       tt->blkno = blkno;
+                       tt->nblocks = nblocks;
+                       tt++;
+
+#ifdef DEBUG_DISCARD
+       printk(KERN_INFO "JFS: agno=%d/%d, blkno:%ld, nblocks=%ld\n",
+               agno+1, bmp->db_numag, (long int)blkno, (long int)nblocks);
+#endif
+                       /* the whole ag is free, trim now */
+                       if (bmp->db_agfree[agno] == 0)
+                               break;
+
+                       /* give a hint for the next while */
+                       nblocks = bmp->db_agfree[agno];
+                       continue;
+               } else if (rc == -ENOSPC) {
+                       /* search for next smaller log2 block */
+                       l2nb = BLKSTOL2(nblocks) - 1;
+                       nblocks = 1 << l2nb;
+               } else {
+                       printk(KERN_ERR "JFS: dbDiscardAG: -EIO\n");
+                       kfree(totrim);
+                       return trimmed;
+               }
+
+#ifdef DEBUG_DISCARD
+       printk(KERN_INFO "JFS: agno=%d/%d, while(nblocks(%ld) >= 
minlen(%ld))\n",
+               agno+1, bmp->db_numag, (long int)blkno, (long int)nblocks);
+#endif
+
+       }
+       IWRITE_UNLOCK(ipbmap);
+
+       tt->nblocks = 0; /* mark the current end */
+       for (tt = totrim; tt->nblocks != 0; tt++) {
+
+#ifdef DEBUG_DISCARD
+               printk(KERN_INFO "JFS: to-trim(%p) blkno=%d, nblocks=%d\n",
+                       tt, (int)tt->blkno, (int)tt->nblocks);
+#endif
+
+               /**
+                * jfs was mounted without discard option
+                * -> we have to issue tht ourself, dbFree won't do it
+                */
+               if (!(JFS_SBI(sb)->flag & JFS_DISCARD)) {
+                       dbIssueDiscard(ip, tt->blkno, tt->nblocks);
+               }
+
+               dbFree(ip, tt->blkno, tt->nblocks);
+               trimmed += tt->nblocks;
+       }
+
+
+       kfree(totrim);
+
+       return trimmed;
+}
+
+
+/*
+ * NAME:       dbDiscard_ioctl()
+ *
+ * FUNCTION:   attempt to discard (TRIM) all free blocks from the
+ *              filesystem.
+ *
+ * PARAMETERS:
+ *     ip      - pointer to in-core inode;
+ *     range   - the range, given by user space
+ *
+ * RETURN VALUES:
+ *     0       - success
+ *     -EIO    - i/o error
+ */
+int dbDiscard_ioctl(struct inode *ip, struct fstrim_range *range)
+{
+       struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
+       struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
+       struct super_block *sb = ipbmap->i_sb;
+       int agno, agno_end;
+       s64 start, end, minlen;
+       u64 trimmed = 0;
+
+       /**
+        * convert byte values to block size of filesystem:
+        * start:       First Byte to trim
+        * len:         number of Bytes to trim from start
+        * minlen:      minimum extent length in Bytes
+        */
+       start = range->start >> sb->s_blocksize_bits;
+       if (start < 0)
+               start = 0;
+       end = start + (range->len >> sb->s_blocksize_bits) - 1;
+       if (end >= bmp->db_mapsize)
+               end = bmp->db_mapsize - 1;
+       minlen = range->minlen;
+       if (minlen < PSIZE)
+               minlen = 1;
+
+       /**
+        * we trim all ag's within the range ...
+        * -> of cause, mostly we will have start=0 and agno_end=max
+        */
+       agno = BLKTOAG(start, JFS_SBI(ip->i_sb));
+       agno_end = BLKTOAG(end, JFS_SBI(ip->i_sb));
+       while (agno <= agno_end) {
+               trimmed += dbDiscardAG(ip, agno, minlen);
+               agno++;
+       }
+
+       range->len = trimmed << sb->s_blocksize_bits;
+
+       return 0;
+}
+
+
 /*
  * NAME:       dbFindCtl()
  *
diff -urN jfs-3.4.4-original/jfs_dmap.h jfs-trim/jfs_dmap.h
--- jfs-3.4.4-original/jfs_dmap.h       2012-05-21 00:29:13.000000000 +0200
+++ jfs-trim/jfs_dmap.h 2012-07-13 20:35:01.000000000 +0200
@@ -1,5 +1,6 @@
 /*
  *   Copyright (C) International Business Machines Corp., 2000-2002
+ *   Portions Copyright (C) Tino Reichardt, 2012
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -311,4 +312,7 @@
 extern int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks);
 extern void dbFinalizeBmap(struct inode *ipbmap);
 extern s64 dbMapFileSizeToMapSize(struct inode *ipbmap);
+
+extern int dbDiscard_ioctl(struct inode *ip, struct fstrim_range *range);
+
 #endif                         /* _H_JFS_DMAP */
diff -urN jfs-3.4.4-original/jfs_filsys.h jfs-trim/jfs_filsys.h
--- jfs-3.4.4-original/jfs_filsys.h     2012-05-21 00:29:13.000000000 +0200
+++ jfs-trim/jfs_filsys.h       2012-07-13 20:30:00.000000000 +0200
@@ -1,5 +1,6 @@
 /*
  *   Copyright (C) International Business Machines Corp., 2000-2003
+ *   Portions Copyright (C) Tino Reichardt, 2012
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -45,6 +46,9 @@
 /* mount time flag to disable journaling to disk */
 #define JFS_NOINTEGRITY 0x00000040
 
+/* mount time flag to enable TRIM to ssd disks */
+#define JFS_DISCARD     0x00000080
+
 /* commit option */
 #define        JFS_COMMIT      0x00000f00      /* commit option mask */
 #define        JFS_GROUPCOMMIT 0x00000100      /* group (of 1) commit */
diff -urN jfs-3.4.4-original/super.c jfs-trim/super.c
--- jfs-3.4.4-original/super.c  2012-05-21 00:29:13.000000000 +0200
+++ jfs-trim/super.c    2012-07-11 23:34:00.000000000 +0200
@@ -1,6 +1,7 @@
 /*
  *   Copyright (C) International Business Machines Corp., 2000-2004
  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
+ *   Portions Copyright (C) Tino Reichardt, 2012
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -33,6 +34,7 @@
 #include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <linux/seq_file.h>
+#include <linux/blkdev.h>
 
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
@@ -46,6 +48,7 @@
 
 MODULE_DESCRIPTION("The Journaled Filesystem (JFS)");
 MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM");
+MODULE_AUTHOR("Tino Reichardt");
 MODULE_LICENSE("GPL");
 
 static struct kmem_cache * jfs_inode_cachep;
@@ -197,7 +200,8 @@
 enum {
        Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
        Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
-       Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask
+       Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask,
+       Opt_discard, Opt_nodiscard
 };
 
 static const match_table_t tokens = {
@@ -214,6 +218,8 @@
        {Opt_uid, "uid=%u"},
        {Opt_gid, "gid=%u"},
        {Opt_umask, "umask=%u"},
+       {Opt_discard, "discard"},
+       {Opt_nodiscard, "nodiscard"},
        {Opt_err, NULL}
 };
 
@@ -324,12 +330,14 @@
                        sbi->uid = simple_strtoul(uid, &uid, 0);
                        break;
                }
+
                case Opt_gid:
                {
                        char *gid = args[0].from;
                        sbi->gid = simple_strtoul(gid, &gid, 0);
                        break;
                }
+
                case Opt_umask:
                {
                        char *umask = args[0].from;
@@ -341,6 +349,23 @@
                        }
                        break;
                }
+
+               case Opt_discard:
+               {
+                       struct request_queue *q = bdev_get_queue(sb->s_bdev);
+                       if (blk_queue_discard(q)) {
+                               *flag |= JFS_DISCARD;
+                       } else {
+                               printk(KERN_ERR "JFS: discard option "
+                                       "not supported on device\n");
+                       }
+                       break;
+               }
+
+               case Opt_nodiscard:
+                       *flag &= ~JFS_DISCARD;
+                       break;
+
                default:
                        printk("jfs: Unrecognized mount option \"%s\" "
                                        " or missing value\n", p);
@@ -620,6 +645,8 @@
                seq_printf(seq, ",umask=%03o", sbi->umask);
        if (sbi->flag & JFS_NOINTEGRITY)
                seq_puts(seq, ",nointegrity");
+       if (sbi->flag & JFS_DISCARD)
+               seq_puts(seq, ",discard");
        if (sbi->nls_tab)
                seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset);
        if (sbi->flag & JFS_ERR_CONTINUE)
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Jfs-discussion mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/jfs-discussion

Reply via email to