Author: trociny
Date: Wed Mar  6 06:57:18 2013
New Revision: 247866
URL: http://svnweb.freebsd.org/changeset/base/247866

Log:
  MFC r247281:
  
  Add i/o error counters to hastd(8) and make hastctl(8) display
  them.  This may be useful for detecting problems with HAST disks.
  
  Discussed with and reviewed by: pjd

Modified:
  stable/9/sbin/hastd/control.c
  stable/9/sbin/hastd/hast.h
  stable/9/sbin/hastd/primary.c
  stable/9/sbin/hastd/secondary.c
Directory Properties:
  stable/9/sbin/hastd/   (props changed)

Modified: stable/9/sbin/hastd/control.c
==============================================================================
--- stable/9/sbin/hastd/control.c       Wed Mar  6 06:24:09 2013        
(r247865)
+++ stable/9/sbin/hastd/control.c       Wed Mar  6 06:57:18 2013        
(r247866)
@@ -207,6 +207,14 @@ control_status_worker(struct hast_resour
            "stat_flush%u", no);
        nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_activemap_update"),
            "stat_activemap_update%u", no);
+       nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_read_error"),
+           "stat_read_error%u", no);
+       nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_write_error"),
+           "stat_write_error%u", no);
+       nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_delete_error"),
+           "stat_delete_error%u", no);
+       nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_flush_error"),
+           "stat_flush_error%u", no);
 end:
        if (cnvin != NULL)
                nv_free(cnvin);
@@ -459,6 +467,16 @@ ctrl_thread(void *arg)
                        nv_add_uint64(nvout, res->hr_stat_flush, "stat_flush");
                        nv_add_uint64(nvout, res->hr_stat_activemap_update,
                            "stat_activemap_update");
+                       nv_add_uint64(nvout, res->hr_stat_read_error,
+                           "stat_read_error");
+                       nv_add_uint64(nvout, res->hr_stat_write_error +
+                           res->hr_stat_activemap_write_error,
+                           "stat_write_error");
+                       nv_add_uint64(nvout, res->hr_stat_delete_error,
+                           "stat_delete_error");
+                       nv_add_uint64(nvout, res->hr_stat_flush_error +
+                           res->hr_stat_activemap_flush_error,
+                           "stat_flush_error");
                        nv_add_int16(nvout, 0, "error");
                        break;
                case CONTROL_RELOAD:

Modified: stable/9/sbin/hastd/hast.h
==============================================================================
--- stable/9/sbin/hastd/hast.h  Wed Mar  6 06:24:09 2013        (r247865)
+++ stable/9/sbin/hastd/hast.h  Wed Mar  6 06:57:18 2013        (r247866)
@@ -234,6 +234,18 @@ struct hast_resource {
        uint64_t        hr_stat_flush;
        /* Number of activemap updates. */
        uint64_t        hr_stat_activemap_update;
+       /* Number of local read errors. */
+       uint64_t        hr_stat_read_error;
+       /* Number of local write errors. */
+       uint64_t        hr_stat_write_error;
+       /* Number of local delete errors. */
+       uint64_t        hr_stat_delete_error;
+       /* Number of flush errors. */
+       uint64_t        hr_stat_flush_error;
+       /* Number of activemap write errors. */
+       uint64_t        hr_stat_activemap_write_error;
+       /* Number of activemap flush errors. */
+       uint64_t        hr_stat_activemap_flush_error;
 
        /* Next resource. */
        TAILQ_ENTRY(hast_resource) hr_next;

Modified: stable/9/sbin/hastd/primary.c
==============================================================================
--- stable/9/sbin/hastd/primary.c       Wed Mar  6 06:24:09 2013        
(r247865)
+++ stable/9/sbin/hastd/primary.c       Wed Mar  6 06:57:18 2013        
(r247866)
@@ -303,6 +303,7 @@ hast_activemap_flush(struct hast_resourc
        if (pwrite(res->hr_localfd, buf, size, METADATA_SIZE) !=
            (ssize_t)size) {
                pjdlog_errno(LOG_ERR, "Unable to flush activemap to disk");
+               res->hr_stat_activemap_write_error++;
                return (-1);
        }
        if (res->hr_metaflush == 1 && g_flush(res->hr_localfd) == -1) {
@@ -313,6 +314,7 @@ hast_activemap_flush(struct hast_resourc
                } else {
                        pjdlog_errno(LOG_ERR,
                            "Unable to flush disk cache on activemap update");
+                       res->hr_stat_activemap_flush_error++;
                        return (-1);
                }
        }
@@ -1792,6 +1794,22 @@ ggate_send_thread(void *arg)
                                    "G_GATE_CMD_DONE failed");
                        }
                }
+               if (hio->hio_errors[0]) {
+                       switch (ggio->gctl_cmd) {
+                       case BIO_READ:
+                               res->hr_stat_read_error++;
+                               break;
+                       case BIO_WRITE:
+                               res->hr_stat_write_error++;
+                               break;
+                       case BIO_DELETE:
+                               res->hr_stat_delete_error++;
+                               break;
+                       case BIO_FLUSH:
+                               res->hr_stat_flush_error++;
+                               break;
+                       }
+               }
                pjdlog_debug(2,
                    "ggate_send: (%p) Moving request to the free queue.", hio);
                QUEUE_INSERT2(hio, free);

Modified: stable/9/sbin/hastd/secondary.c
==============================================================================
--- stable/9/sbin/hastd/secondary.c     Wed Mar  6 06:24:09 2013        
(r247865)
+++ stable/9/sbin/hastd/secondary.c     Wed Mar  6 06:57:18 2013        
(r247866)
@@ -725,6 +725,7 @@ disk_thread(void *arg)
                                pjdlog_errno(LOG_WARNING,
                                    "Unable to store cleared activemap");
                                free(map);
+                               res->hr_stat_activemap_write_error++;
                                break;
                        }
                        free(map);
@@ -839,8 +840,23 @@ send_thread(void *arg)
                        PJDLOG_ABORT("Unexpected command (cmd=%hhu).",
                            hio->hio_cmd);
                }
-               if (hio->hio_error != 0)
+               if (hio->hio_error != 0) {
+                       switch (hio->hio_cmd) {
+                       case HIO_READ:
+                               res->hr_stat_read_error++;
+                               break;
+                       case HIO_WRITE:
+                               res->hr_stat_write_error++;
+                               break;
+                       case HIO_DELETE:
+                               res->hr_stat_delete_error++;
+                               break;
+                       case HIO_FLUSH:
+                               res->hr_stat_flush_error++;
+                               break;
+                       }
                        nv_add_int16(nvout, hio->hio_error, "error");
+               }
                if (hast_proto_send(res, res->hr_remoteout, nvout, data,
                    length) == -1) {
                        secondary_exit(EX_TEMPFAIL, "Unable to send reply");
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-stable-9
To unsubscribe, send any mail to "[email protected]"

Reply via email to