SOURCES (LINUX_2_6_16): linux-dmcache.patch - undos!

glen Tue, 13 Nov 2007 16:12:23 -0800

Author: glen                         Date: Wed Nov 14 00:14:15 2007 GMT
Module: SOURCES                       Tag: LINUX_2_6_16
---- Log message:
- undos!


---- Files affected:
SOURCES:
   linux-dmcache.patch (1.1.2.1 -> 1.1.2.2) 

---- Diffs:

================================================================
Index: SOURCES/linux-dmcache.patch
diff -u SOURCES/linux-dmcache.patch:1.1.2.1 SOURCES/linux-dmcache.patch:1.1.2.2
--- SOURCES/linux-dmcache.patch:1.1.2.1 Wed Nov 14 01:08:39 2007
+++ SOURCES/linux-dmcache.patch Wed Nov 14 01:14:10 2007
@@ -1,1786 +1,1786 @@
-diff -Naur linux-2.6.19.1-orig/drivers/md/dm-cache.c 
linux-2.6.19.1-dmcache/drivers/md/dm-cache.c
---- linux-2.6.19.1-orig/drivers/md/dm-cache.c  1969-12-31 19:00:00.000000000 
-0500
-+++ linux-2.6.19.1-dmcache/drivers/md/dm-cache.c       2007-01-01 
18:26:06.000000000 -0500
-@@ -0,0 +1,1755 @@
-+/****************************************************************************
-+ *  dm-cache.c
-+ *  Device mapper target for block-level disk caching
-+ *
-+ *  Copyright (C) International Business Machines Corp., 2006
-+ *  Author: Ming Zhao ([EMAIL PROTECTED])
-+ *
-+ *  This program is free software; you can redistribute it and/or modify
-+ *  it under the terms of the GNU General Public License as published by
-+ *  the Free Software Foundation; under version 2 of the License.
-+ *
-+ *  This program is distributed in the hope that it will be useful,
-+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ *  GNU General Public License for more details.
-+ *
-+ *  You should have received a copy of the GNU General Public License
-+ *  along with this program; if not, write to the Free Software
-+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-+ *
-+ ****************************************************************************/
-+
-+#include <asm/atomic.h>
-+#include <asm/checksum.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/list.h>
-+#include <linux/blkdev.h>
-+#include <linux/bio.h>
-+#include <linux/slab.h>
-+#include <linux/hash.h>
-+#include <linux/spinlock.h>
-+#include <linux/workqueue.h>
-+#include <linux/pagemap.h>
-+
-+#include "dm.h"
-+#include "dm-io.h"
-+#include "dm-bio-list.h"
-+#include "kcopyd.h"
-+
-+#define DMC_DEBUG 0
-+
-+#define DM_MSG_PREFIX "cache"
-+#define DMC_PREFIX "dm-cache: "
-+
-+#if DMC_DEBUG
-+#define DPRINTK( s, arg... ) printk(DMC_PREFIX s "\n", ##arg)
-+#else
-+#define DPRINTK( s, arg... )
-+#endif
-+
-+/* Default cache parameters */
-+#define DEFAULT_CACHE_SIZE    65536
-+#define DEFAULT_CACHE_ASSOC   1024
-+#define DEFAULT_BLOCK_SIZE    8
-+#define CONSECUTIVE_BLOCKS    512
-+
-+/* Write policy */
-+#define WRITE_THROUGH 0
-+#define WRITE_BACK 1
-+#define DEFAULT_WRITE_POLICY WRITE_THROUGH
-+
-+/* Number of pages for I/O */
-+#define DMCACHE_COPY_PAGES 1024
-+
-+/* Hash function */
-+#define HASH 0                /* Use hash_long */
-+#define UNIFORM 1     /* Evenly distributed */
-+#define DEFAULT_HASHFUNC UNIFORM
-+
-+/* States of a cache block */
-+#define INVALID               0
-+#define VALID         1       /* Valid */
-+#define RESERVED      2       /* Allocated but data not in place yet */
-+#define DIRTY         4       /* Locally modified */
-+#define WRITEBACK     8       /* In the process of write back */
-+
-+#define is_state(x, y)                (x & y)
-+#define set_state(x, y)               (x |= y)
-+#define clear_state(x, y)     (x &= ~y)
-+
-+/*
-+ * Cache context
-+ */
-+struct cache_c {
-+      struct dm_dev *src_dev;         /* Source device */
-+      struct dm_dev *cache_dev;       /* Cache device */
-+      struct kcopyd_client *kcp_client; /* Kcopyd client for writing back 
data */
-+
-+      struct cacheblock *cache;       /* Hash table for cache blocks */
-+      sector_t size;                  /* Cache size */
-+      unsigned int bits;              /* Cache size in bits */
-+      unsigned int assoc;             /* Cache associativity */
-+      unsigned int block_size;        /* Cache block size */
-+      unsigned int block_shift;       /* Cache block size in bits */
-+      unsigned int block_mask;        /* Cache block mask */
-+      unsigned int consecutive_shift; /* Consecutive blocks size in bits */
-+      unsigned long counter;          /* Logical timestamp of last access */
-+      unsigned int write_policy;      /* Cache write policy */
-+      sector_t dirty_blocks;          /* Number of dirty blocks */
-+
-+      spinlock_t lock;                /* Lock to protect page 
allocation/deallocation */
-+      struct page_list *pages;        /* Pages for I/O */
-+      unsigned int nr_pages;          /* Number of pages */
-+      unsigned int nr_free_pages;     /* Number of free pages */
-+      wait_queue_head_t destroyq;     /* Wait queue for I/O completion */
-+      atomic_t nr_jobs;               /* Number of I/O jobs */
-+      /* Stats */
-+      unsigned long reads;            /* Number of reads */
-+      unsigned long writes;           /* Number of writes */
-+      unsigned long cache_hits;       /* Number of cache hits */
-+      unsigned long replace;          /* Number of cache replacements */
-+      unsigned long writeback;        /* Number of replaced dirty blocks */
-+      unsigned long dirty;            /* Number of submitted dirty blocks */
-+};
-+
-+/* Cache block metadata structure */
-+struct cacheblock {
-+      spinlock_t lock;        /* Lock to protect operations on the bio list */
-+      sector_t block;         /* Sector number of the cached block */
-+      unsigned short state;   /* State of a block */
-+      unsigned long counter;  /* Logical timestamp of the block's last access 
*/
-+      struct bio_list bios;   /* List of pending bios */
-+};
-+
-+
-+/****************************************************************************
-+ *  Functions and data structures for implementing a kcached to handle async
-+ *  I/O. Code for page and queue handling is borrowed from kcopyd.c.
-+ ****************************************************************************/
-+
-+/*
-+ * Functions for handling pages used by async I/O.
-+ * The data asked by a bio request may not be aligned with cache blocks, in
-+ * which case additional pages are required for the request that is forwarded
-+ * to the server. A pool of pages are reserved for this purpose.
-+ */
-+
-+static struct page_list *alloc_pl(void)
-+{
-+      struct page_list *pl;
-+
-+      pl = kmalloc(sizeof(*pl), GFP_KERNEL);
-+      if (!pl)
-+              return NULL;
-+
-+      pl->page = alloc_page(GFP_KERNEL);
-+      if (!pl->page) {
-+              kfree(pl);
-+              return NULL;
-+      }
-+
-+      return pl;
-+}
-+
-+static void free_pl(struct page_list *pl)
-+{
-+      __free_page(pl->page);
-+      kfree(pl);
-+}
-+
-+static void drop_pages(struct page_list *pl)
-+{
-+      struct page_list *next;
-+
-+      while (pl) {
-+              next = pl->next;
-+              free_pl(pl);
-+              pl = next;
-+      }
-+}
-+
-+static int kcached_get_pages(struct cache_c *dmc, unsigned int nr,
-+                               struct page_list **pages)
-+{
-+      struct page_list *pl;
-+
-+      spin_lock(&dmc->lock);
-+      if (dmc->nr_free_pages < nr) {
-+              DPRINTK("kcached_get_pages: No free pages: %u<%u",
-+                      dmc->nr_free_pages, nr);
-+              spin_unlock(&dmc->lock);
-+              return -ENOMEM;
-+      }
-+
-+      dmc->nr_free_pages -= nr;
-+      for (*pages = pl = dmc->pages; --nr; pl = pl->next)
-+              ;
-+
-+      dmc->pages = pl->next;
-+      pl->next = NULL;
-+
-+      spin_unlock(&dmc->lock);
-+
-+      return 0;
-+}
-+
-+static void kcached_put_pages(struct cache_c *dmc, struct page_list *pl)
-+{
-+      struct page_list *cursor;
-+
-+      spin_lock(&dmc->lock);
-+      for (cursor = pl; cursor->next; cursor = cursor->next)
-+              dmc->nr_free_pages++;
-+
-+      dmc->nr_free_pages++;
-+      cursor->next = dmc->pages;
-+      dmc->pages = pl;
-+
-+      spin_unlock(&dmc->lock);
-+}
-+
-+static int alloc_bio_pages(struct cache_c *dmc, unsigned int nr)
-+{
-+      unsigned int i;
-+      struct page_list *pl = NULL, *next;
-+
-+      for (i = 0; i < nr; i++) {
-+              next = alloc_pl();
-+              if (!next) {
-+                      if (pl)
-+                              drop_pages(pl);
-+                      return -ENOMEM;
-+              }
-+              next->next = pl;
-+              pl = next;
-+      }
-+
-+      kcached_put_pages(dmc, pl);
-+      dmc->nr_pages += nr;
-+
-+      return 0;
-+}
-+
-+static void free_bio_pages(struct cache_c *dmc)
-+{
-+      BUG_ON(dmc->nr_free_pages != dmc->nr_pages);
-+      drop_pages(dmc->pages);
-+      dmc->pages = NULL;
-+      dmc->nr_free_pages = dmc->nr_pages = 0;
-+}
-+
-+/* Structure for a kcached job */
-+struct kcached_job {
-+      struct list_head list;
-+      struct cache_c *dmc;
-+      struct bio *bio;        /* Original bio */
-+      struct io_region src;
-+      struct io_region dest;
-+      struct cacheblock *cacheblock;
-+      int rw;
-+      /*
-+       * When the original bio is not aligned with cache blocks,
-+       * we need extra bvecs and pages for padding.
-+       */
-+      struct bio_vec *bvec;
-+      unsigned int nr_pages;
-+      struct page_list *pages;
-+};
-+
-+static struct workqueue_struct *_kcached_wq;
-+static struct work_struct _kcached_work;
-+
-+static inline void wake(void)
-+{
-+      queue_work(_kcached_wq, &_kcached_work);
-+}
-+
-+#define MIN_JOBS 1024
-+
-+static kmem_cache_t *_job_cache;
-+static mempool_t *_job_pool;
-+
-+static DEFINE_SPINLOCK(_job_lock);
-+
-+static LIST_HEAD(_complete_jobs);
-+static LIST_HEAD(_io_jobs);
-+static LIST_HEAD(_pages_jobs);
-+
-+static int jobs_init(void)
-+{
-+      _job_cache = kmem_cache_create("kcached-jobs",
-+                                     sizeof(struct kcached_job),
-+                                     __alignof__(struct kcached_job),
-+                                     0, NULL, NULL);
-+      if (!_job_cache)
-+              return -ENOMEM;
-+
-+      _job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab,
-+                                 mempool_free_slab, _job_cache);
-+      if (!_job_pool) {
-+              kmem_cache_destroy(_job_cache);
-+              return -ENOMEM;
-+      }
-+
-+      return 0;
-+}
-+
-+static void jobs_exit(void)
-+{
-+      BUG_ON(!list_empty(&_complete_jobs));
-+      BUG_ON(!list_empty(&_io_jobs));
-+      BUG_ON(!list_empty(&_pages_jobs));
-+
-+      mempool_destroy(_job_pool);
-+      kmem_cache_destroy(_job_cache);
-+      _job_pool = NULL;
-+      _job_cache = NULL;
-+}
-+
-+/*
-+ * Functions to push and pop a job onto the head of a given job list.
-+ */
-+static inline struct kcached_job *pop(struct list_head *jobs)
-+{
-+      struct kcached_job *job = NULL;
-+      unsigned long flags;
-+
-+      spin_lock_irqsave(&_job_lock, flags);
-+
-+      if (!list_empty(jobs)) {
-+              job = list_entry(jobs->next, struct kcached_job, list);
-+              list_del(&job->list);
-+      }
-+      spin_unlock_irqrestore(&_job_lock, flags);
-+
-+      return job;
-+}
-+
-+static inline void push(struct list_head *jobs, struct kcached_job *job)
-+{
-+      unsigned long flags;
-+
-+      spin_lock_irqsave(&_job_lock, flags);
-+      list_add_tail(&job->list, jobs);
-+      spin_unlock_irqrestore(&_job_lock, flags);
-+}
-+
-+
-+/****************************************************************************
-+ * Functions for asynchronously fetching data from source device and storing
-+ * data in cache device. Because the requested data may not align with the
-+ * cache blocks, extra handling is required to pad a block request and extract
-+ * the requested data from the results.
-+ ****************************************************************************/
-+
-+static void io_callback(unsigned long error, void *context)
-+{
-+      struct kcached_job *job = (struct kcached_job *) context;
-+      struct bio *bio = job->bio, *clone;
-+      int i;
-+
-+      if (error) {
-+              /* TODO */
-+              DMERR("io_callback: io error");
-+              return;
-+      }
-+
-+      if (job->rw == READ) {
-+              /* A READ is acknowledged as soon as the requested data is 
fetched, and
-+                 does not have to wait for it being stored in cache. The bio 
is cloned
-+                 so that the original one can be ended here. But to avoid 
copying
-+                 pages, we reuse the pages allocated for the original bio, 
and mark
-+                 each of them to prevent the pages being freed before the 
cache
-+                 insertion is completed.
-+               */
-+              if (bio_data_dir(bio) == READ) {
-+                      clone = bio_clone(bio, GFP_NOIO);
-+                      for (i=bio->bi_idx; i<bio->bi_vcnt; i++) {
-+                              get_page(bio->bi_io_vec[i].bv_page);
-+                      }
-+                      DPRINTK("bio ended for %llu:%u", bio->bi_sector, 
bio->bi_size);
-+                              bio_endio(bio, bio->bi_size, 0);
-+                      job->bio = clone;
-+              }
-+              job->rw = WRITE;
-+              push(&_io_jobs, job);
-+      } else
-+              push(&_complete_jobs, job);
-+      wake();
-+}
-+
-+/*
-+ * Fetch data from the source device asynchronously.
-+ * For a READ bio, if a cache block is larger than the requested data, then
-+ * additional data are prefetched. Larger cache block size enables more
-+ * aggressive read prefetching, which is useful for read-mostly usage.
-+ * For a WRITE bio, if a cache block is larger than the requested data, the
-+ * entire block needs to be fetched, and larger block size incurs more 
overhead.
-+ * In scenaros where writes are frequent, 4KB is a good cache block size.
-+ */
-+static int do_fetch(struct kcached_job *job)
-+{
-+      int r = 0, i, j;
-+      struct bio *bio = job->bio;
-+      struct cache_c *dmc = job->dmc;
-+      unsigned int offset, head, tail, remaining, nr_vecs, idx = 0;
-+      struct bio_vec *bvec;
-+      struct page_list *pl;
-+
-+      offset = (unsigned int) (bio->bi_sector & dmc->block_mask);
-+      head = to_bytes(offset);
-+      tail = to_bytes(dmc->block_size) - bio->bi_size - head;
-+
-+      DPRINTK("do_fetch: %llu(%llu->%llu,%llu), head:%u,tail:%u",
-+                  bio->bi_sector, job->src.sector, job->dest.sector,
-+              job->src.count, head, tail);
-+
-+      if (bio_data_dir(bio) == READ) { /* The original request is a READ */
-+              if (0 == job->nr_pages) { /* The request is aligned to cache 
block */
-+                      r = dm_io_async_bvec(1, &job->src, READ,
-+                                           bio->bi_io_vec + bio->bi_idx,
-+                                           io_callback, job);
-+                      return r;
-+              }
-+
-+              nr_vecs = bio->bi_vcnt - bio->bi_idx + job->nr_pages;
-+              bvec = kmalloc(nr_vecs * sizeof(*bvec), GFP_NOIO);
-+              if (!bvec) {
-+                      DMERR("do_fetch: No memory");
-+                      return 1;
-+              }
-+
-+              pl = job->pages;
-+              i = 0;
-+              while (head) {
-+                      bvec[i].bv_len = min(head, (unsigned int)PAGE_SIZE);
-+                      bvec[i].bv_offset = 0;
-+                      bvec[i].bv_page = pl->page;
-+                      head -= bvec[i].bv_len;
-+                      pl = pl->next;
-+                      i++;
-+              }
-+
-+              remaining = bio->bi_size;
-+              j = bio->bi_idx;
-+              while (remaining) {
-+                      bvec[i] = bio->bi_io_vec[j];
-+                      remaining -= bvec[i].bv_len;
-+                      i++; j++;
-+              }
-+
-+              while (tail) {
-+                      bvec[i].bv_len = min(tail, (unsigned int)PAGE_SIZE);
-+                      bvec[i].bv_offset = 0;
-+                      bvec[i].bv_page = pl->page;
-+                      tail -= bvec[i].bv_len;
-+                      pl = pl->next;
-+                      i++;
-+              }
-+
-+              job->bvec = bvec;
-+              r = dm_io_async_bvec(1, &job->src, READ, job->bvec, 
io_callback, job);
-+              return r;
-+      } else { /* The original request is a WRITE */
-+              pl = job->pages;
-+
-+              if (head && tail) { /* Special case */
-+                      bvec = kmalloc(job->nr_pages * sizeof(*bvec), 
GFP_KERNEL);
-+                      if (!bvec) {
-+                              DMERR("do_fetch: No memory");
-+                              return 1;
-+                      }
-+                      for (i=0; i<job->nr_pages; i++) {
-+                              bvec[i].bv_len = PAGE_SIZE;
-+                              bvec[i].bv_offset = 0;
-+                              bvec[i].bv_page = pl->page;
-+                              pl = pl->next;
-+                      }
-+                      job->bvec = bvec;
-+                      r = dm_io_async_bvec(1, &job->src, READ, job->bvec,
-+                                           io_callback, job);
-+                      return r;
-+              }
-+
-+              bvec = kmalloc((job->nr_pages + bio->bi_vcnt - bio->bi_idx)
-+                              * sizeof(*bvec), GFP_KERNEL);
-+              if (!bvec) {
-+                      DMERR("do_fetch: No memory");
-+                      return 1;
-+              }
-+
-+              i = 0;
-+              while (head) {
-+                      bvec[i].bv_len = min(head, (unsigned int)PAGE_SIZE);
-+                      bvec[i].bv_offset = 0;
-+                      bvec[i].bv_page = pl->page;
-+                      head -= bvec[i].bv_len;
-+                      pl = pl->next;
-+                      i++;
-+              }
-+
-+              remaining = bio->bi_size;
-+              j = bio->bi_idx;
-+              while (remaining) {
-+                      bvec[i] = bio->bi_io_vec[j];
-+                      remaining -= bvec[i].bv_len;
-+                      i++; j++;
-+              }
-+
-+              if (tail) {
-+                      idx = i;
-+                      bvec[i].bv_offset = (to_bytes(offset) + bio->bi_size) &
-+                                          (PAGE_SIZE - 1);
-+                      bvec[i].bv_len = PAGE_SIZE - bvec[i].bv_offset;
-+                      bvec[i].bv_page = pl->page;
-+                      tail -= bvec[i].bv_len;
-+                      pl = pl->next; i++;
-+                      while (tail) {
-+                              bvec[i].bv_len = PAGE_SIZE;
-+                              bvec[i].bv_offset = 0;
-+                              bvec[i].bv_page = pl->page;
-+                              tail -= bvec[i].bv_len;
-+                              pl = pl->next; i++;
-+                      }
-+              }
-+
-+              job->bvec = bvec;
-+              r = dm_io_async_bvec(1, &job->src, READ, job->bvec + idx,
-+                                   io_callback, job);
-+
-+              return r;
-+      }
-+}
-+
-+/*
-+ * Store data to the cache source device asynchronously.
-+ * For a READ bio request, the data fetched from the source device are 
returned
-+ * to kernel and stored in cache at the same time.
-+ * For a WRITE bio request, the data are written to the cache and source 
device
-+ * at the same time.
-+ */
-+static int do_store(struct kcached_job *job)
-+{
-+      int i, j, r = 0;
-+      struct bio *bio = job->bio;
-+      struct cache_c *dmc = job->dmc;
-+      unsigned int offset, head, tail, remaining, nr_vecs;
-+      struct bio_vec *bvec;
-+
-+      offset = (unsigned int) (bio->bi_sector & dmc->block_mask);
-+      head = to_bytes(offset);
-+      tail = to_bytes(dmc->block_size) - bio->bi_size - head;
-+
-+      DPRINTK("do_store: %llu(%llu->%llu,%llu), head:%u,tail:%u",
-+              bio->bi_sector, job->src.sector, job->dest.sector,
-+              job->src.count, head, tail);
-+
-+      if (0 == job->nr_pages) /* Original request is aligned with cache 
blocks */
-+              r = dm_io_async_bvec(1, &job->dest, WRITE, bio->bi_io_vec + 
bio->bi_idx,
-+                                   io_callback, job);
-+      else {
-+              if (bio_data_dir(bio) == WRITE && head > 0 && tail > 0) {
-+                      DPRINTK("Special case: %lu %u %u", bio_data_dir(bio), 
head, tail);
-+                      nr_vecs = job->nr_pages + bio->bi_vcnt - bio->bi_idx;
-+                      if (offset && (offset + bio->bi_size < PAGE_SIZE)) 
nr_vecs++;
-+                      DPRINTK("Create %u new vecs", nr_vecs);
-+                      bvec = kmalloc(nr_vecs * sizeof(*bvec), GFP_KERNEL);
-+                      if (!bvec) {
-+                              DMERR("do_store: No memory");
-+                              return 1;
-+                      }
-+
-+                      i = 0;
-+                      while (head) {
-+                              bvec[i].bv_len = min(head, job->bvec[i].bv_len);
-+                              bvec[i].bv_offset = 0;
-+                              bvec[i].bv_page = job->bvec[i].bv_page;
-+                              head -= bvec[i].bv_len;
-+                              i++;
-+                      }
-+                      remaining = bio->bi_size;
-+                      j = bio->bi_idx;
-+                      while (remaining) {
-+                              bvec[i] = bio->bi_io_vec[j];
-+                              remaining -= bvec[i].bv_len;
-+                              i++; j++;
-+                      }
-+                      j = (to_bytes(offset) + bio->bi_size) / PAGE_SIZE;
-+                      bvec[i].bv_offset = (to_bytes(offset) + bio->bi_size) -
-+                                          j * PAGE_SIZE;
-+                      bvec[i].bv_len = PAGE_SIZE - bvec[i].bv_offset;
-+                      bvec[i].bv_page = job->bvec[j].bv_page;
-+                      tail -= bvec[i].bv_len;
-+                      i++; j++;
-+                      while (tail) {
-+                              bvec[i] = job->bvec[j];
<<Diff was trimmed, longer than 597 lines>>

---- CVS-web:
    
http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/SOURCES/linux-dmcache.patch?r1=1.1.2.1&r2=1.1.2.2&f=u

_______________________________________________
pld-cvs-commit mailing list
[email protected]
http://lists.pld-linux.org/mailman/listinfo/pld-cvs-commit

SOURCES (LINUX_2_6_16): linux-dmcache.patch - undos!

Reply via email to