Author: trociny
Date: Thu Sep 19 20:19:08 2013
New Revision: 255716
URL: http://svnweb.freebsd.org/changeset/base/255716

Log:
  When updating the map of dirty extents, most recently used extents are
  kept dirty to reduce the number of on-disk metadata updates. The
  sequence of operations is:
  
  1) acquire the activemap lock;
  2) update in-memory map;
  3) if the list of keepdirty extents is changed, update on-disk metadata;
  4) release the lock.
  
  On-disk updates are not frequent in comparison with in-memory updates,
  while require much more time. So situations are possible when one
  thread is updating on-disk metadata and another one is waiting for the
  activemap lock just to update the in-memory map.
  
  Improve this by introducing additional, on-disk map lock: when
  in-memory map is updated and it is detected that the on-disk map needs
  update too, the on-disk map lock is acquired and the on-memory lock is
  released before flushing the map.
  
  Reported by:  Yamagi Burmeister yamagi.org
  Tested by:    Yamagi Burmeister yamagi.org
  Reviewed by:  pjd
  Approved by:  re (marius)
  MFC after:    2 weeks

Modified:
  head/sbin/hastd/hast.h
  head/sbin/hastd/primary.c

Modified: head/sbin/hastd/hast.h
==============================================================================
--- head/sbin/hastd/hast.h      Thu Sep 19 20:17:50 2013        (r255715)
+++ head/sbin/hastd/hast.h      Thu Sep 19 20:19:08 2013        (r255716)
@@ -226,8 +226,10 @@ struct hast_resource {
 
        /* Activemap structure. */
        struct activemap *hr_amp;
-       /* Locked used to synchronize access to hr_amp. */
+       /* Lock used to synchronize access to hr_amp. */
        pthread_mutex_t hr_amp_lock;
+       /* Lock used to synchronize access to hr_amp diskmap. */
+       pthread_mutex_t hr_amp_diskmap_lock;
 
        /* Number of BIO_READ requests. */
        uint64_t        hr_stat_read;

Modified: head/sbin/hastd/primary.c
==============================================================================
--- head/sbin/hastd/primary.c   Thu Sep 19 20:17:50 2013        (r255715)
+++ head/sbin/hastd/primary.c   Thu Sep 19 20:19:08 2013        (r255716)
@@ -291,22 +291,28 @@ primary_exitx(int exitcode, const char *
        exit(exitcode);
 }
 
+/* Expects res->hr_amp locked, returns unlocked. */
 static int
 hast_activemap_flush(struct hast_resource *res)
 {
        const unsigned char *buf;
        size_t size;
+       int ret;
 
+       mtx_lock(&res->hr_amp_diskmap_lock);
        buf = activemap_bitmap(res->hr_amp, &size);
+       mtx_unlock(&res->hr_amp_lock);
        PJDLOG_ASSERT(buf != NULL);
        PJDLOG_ASSERT((size % res->hr_local_sectorsize) == 0);
+       ret = 0;
        if (pwrite(res->hr_localfd, buf, size, METADATA_SIZE) !=
            (ssize_t)size) {
                pjdlog_errno(LOG_ERR, "Unable to flush activemap to disk");
                res->hr_stat_activemap_write_error++;
-               return (-1);
+               ret = -1;
        }
-       if (res->hr_metaflush == 1 && g_flush(res->hr_localfd) == -1) {
+       if (ret == 0 && res->hr_metaflush == 1 &&
+           g_flush(res->hr_localfd) == -1) {
                if (errno == EOPNOTSUPP) {
                        pjdlog_warning("The %s provider doesn't support 
flushing write cache. Disabling it.",
                            res->hr_localpath);
@@ -315,10 +321,11 @@ hast_activemap_flush(struct hast_resourc
                        pjdlog_errno(LOG_ERR,
                            "Unable to flush disk cache on activemap update");
                        res->hr_stat_activemap_flush_error++;
-                       return (-1);
+                       ret = -1;
                }
        }
-       return (0);
+       mtx_unlock(&res->hr_amp_diskmap_lock);
+       return (ret);
 }
 
 static bool
@@ -783,6 +790,7 @@ init_remote(struct hast_resource *res, s
                 * Now that we merged bitmaps from both nodes, flush it to the
                 * disk before we start to synchronize.
                 */
+               mtx_lock(&res->hr_amp_lock);
                (void)hast_activemap_flush(res);
        }
        nv_free(nvin);
@@ -1288,8 +1296,9 @@ ggate_recv_thread(void *arg)
                            ggio->gctl_offset, ggio->gctl_length)) {
                                res->hr_stat_activemap_update++;
                                (void)hast_activemap_flush(res);
+                       } else {
+                               mtx_unlock(&res->hr_amp_lock);
                        }
-                       mtx_unlock(&res->hr_amp_lock);
                        break;
                case BIO_DELETE:
                        res->hr_stat_delete++;
@@ -1650,8 +1659,9 @@ done_queue:
                        if (activemap_need_sync(res->hr_amp, ggio->gctl_offset,
                            ggio->gctl_length)) {
                                (void)hast_activemap_flush(res);
+                       } else {
+                               mtx_unlock(&res->hr_amp_lock);
                        }
-                       mtx_unlock(&res->hr_amp_lock);
                        if (hio->hio_replication == HAST_REPLICATION_MEMSYNC)
                                (void)refcnt_release(&hio->hio_countdown);
                }
@@ -1918,8 +1928,9 @@ ggate_send_thread(void *arg)
                            ggio->gctl_offset, ggio->gctl_length)) {
                                res->hr_stat_activemap_update++;
                                (void)hast_activemap_flush(res);
+                       } else {
+                               mtx_unlock(&res->hr_amp_lock);
                        }
-                       mtx_unlock(&res->hr_amp_lock);
                }
                if (ggio->gctl_cmd == BIO_WRITE) {
                        /*
@@ -2015,8 +2026,11 @@ sync_thread(void *arg __unused)
                         */
                        if (activemap_extent_complete(res->hr_amp, syncext))
                                (void)hast_activemap_flush(res);
+                       else
+                               mtx_unlock(&res->hr_amp_lock);
+               } else {
+                       mtx_unlock(&res->hr_amp_lock);
                }
-               mtx_unlock(&res->hr_amp_lock);
                if (dorewind) {
                        dorewind = false;
                        if (offset == -1)
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to