Author: trociny
Date: Mon Feb 25 20:09:07 2013
New Revision: 247281
URL: http://svnweb.freebsd.org/changeset/base/247281

Log:
  Add i/o error counters to hastd(8) and make hastctl(8) display
  them.  This may be useful for detecting problems with HAST disks.
  
  Discussed with and reviewed by:       pjd
  MFC after:    1 week

Modified:
  head/sbin/hastctl/hastctl.c
  head/sbin/hastd/control.c
  head/sbin/hastd/hast.h
  head/sbin/hastd/primary.c
  head/sbin/hastd/secondary.c

Modified: head/sbin/hastctl/hastctl.c
==============================================================================
--- head/sbin/hastctl/hastctl.c Mon Feb 25 19:55:32 2013        (r247280)
+++ head/sbin/hastctl/hastctl.c Mon Feb 25 20:09:07 2013        (r247281)
@@ -351,6 +351,12 @@ control_status(struct nv *nv)
                    (uint64_t)nv_get_uint64(nv, "stat_flush%u", ii));
                printf("    activemap updates: %ju\n",
                    (uint64_t)nv_get_uint64(nv, "stat_activemap_update%u", ii));
+               printf("    local errors: "
+                   "read: %ju, write: %ju, delete: %ju, flush: %ju\n",
+                   (uintmax_t)nv_get_uint64(nv, "stat_read_error%u", ii),
+                   (uintmax_t)nv_get_uint64(nv, "stat_write_error%u", ii),
+                   (uintmax_t)nv_get_uint64(nv, "stat_delete_error%u", ii),
+                   (uintmax_t)nv_get_uint64(nv, "stat_flush_error%u", ii));
        }
        return (ret);
 }

Modified: head/sbin/hastd/control.c
==============================================================================
--- head/sbin/hastd/control.c   Mon Feb 25 19:55:32 2013        (r247280)
+++ head/sbin/hastd/control.c   Mon Feb 25 20:09:07 2013        (r247281)
@@ -207,6 +207,14 @@ control_status_worker(struct hast_resour
            "stat_flush%u", no);
        nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_activemap_update"),
            "stat_activemap_update%u", no);
+       nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_read_error"),
+           "stat_read_error%u", no);
+       nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_write_error"),
+           "stat_write_error%u", no);
+       nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_delete_error"),
+           "stat_delete_error%u", no);
+       nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_flush_error"),
+           "stat_flush_error%u", no);
 end:
        if (cnvin != NULL)
                nv_free(cnvin);
@@ -459,6 +467,16 @@ ctrl_thread(void *arg)
                        nv_add_uint64(nvout, res->hr_stat_flush, "stat_flush");
                        nv_add_uint64(nvout, res->hr_stat_activemap_update,
                            "stat_activemap_update");
+                       nv_add_uint64(nvout, res->hr_stat_read_error,
+                           "stat_read_error");
+                       nv_add_uint64(nvout, res->hr_stat_write_error +
+                           res->hr_stat_activemap_write_error,
+                           "stat_write_error");
+                       nv_add_uint64(nvout, res->hr_stat_delete_error,
+                           "stat_delete_error");
+                       nv_add_uint64(nvout, res->hr_stat_flush_error +
+                           res->hr_stat_activemap_flush_error,
+                           "stat_flush_error");
                        nv_add_int16(nvout, 0, "error");
                        break;
                case CONTROL_RELOAD:

Modified: head/sbin/hastd/hast.h
==============================================================================
--- head/sbin/hastd/hast.h      Mon Feb 25 19:55:32 2013        (r247280)
+++ head/sbin/hastd/hast.h      Mon Feb 25 20:09:07 2013        (r247281)
@@ -239,6 +239,18 @@ struct hast_resource {
        uint64_t        hr_stat_flush;
        /* Number of activemap updates. */
        uint64_t        hr_stat_activemap_update;
+       /* Number of local read errors. */
+       uint64_t        hr_stat_read_error;
+       /* Number of local write errors. */
+       uint64_t        hr_stat_write_error;
+       /* Number of local delete errors. */
+       uint64_t        hr_stat_delete_error;
+       /* Number of flush errors. */
+       uint64_t        hr_stat_flush_error;
+       /* Number of activemap write errors. */
+       uint64_t        hr_stat_activemap_write_error;
+       /* Number of activemap flush errors. */
+       uint64_t        hr_stat_activemap_flush_error;
 
        /* Next resource. */
        TAILQ_ENTRY(hast_resource) hr_next;

Modified: head/sbin/hastd/primary.c
==============================================================================
--- head/sbin/hastd/primary.c   Mon Feb 25 19:55:32 2013        (r247280)
+++ head/sbin/hastd/primary.c   Mon Feb 25 20:09:07 2013        (r247281)
@@ -303,6 +303,7 @@ hast_activemap_flush(struct hast_resourc
        if (pwrite(res->hr_localfd, buf, size, METADATA_SIZE) !=
            (ssize_t)size) {
                pjdlog_errno(LOG_ERR, "Unable to flush activemap to disk");
+               res->hr_stat_activemap_write_error++;
                return (-1);
        }
        if (res->hr_metaflush == 1 && g_flush(res->hr_localfd) == -1) {
@@ -313,6 +314,7 @@ hast_activemap_flush(struct hast_resourc
                } else {
                        pjdlog_errno(LOG_ERR,
                            "Unable to flush disk cache on activemap update");
+                       res->hr_stat_activemap_flush_error++;
                        return (-1);
                }
        }
@@ -1936,6 +1938,22 @@ ggate_send_thread(void *arg)
                                    "G_GATE_CMD_DONE failed");
                        }
                }
+               if (hio->hio_errors[0]) {
+                       switch (ggio->gctl_cmd) {
+                       case BIO_READ:
+                               res->hr_stat_read_error++;
+                               break;
+                       case BIO_WRITE:
+                               res->hr_stat_write_error++;
+                               break;
+                       case BIO_DELETE:
+                               res->hr_stat_delete_error++;
+                               break;
+                       case BIO_FLUSH:
+                               res->hr_stat_flush_error++;
+                               break;
+                       }
+               }
                pjdlog_debug(2,
                    "ggate_send: (%p) Moving request to the free queue.", hio);
                QUEUE_INSERT2(hio, free);

Modified: head/sbin/hastd/secondary.c
==============================================================================
--- head/sbin/hastd/secondary.c Mon Feb 25 19:55:32 2013        (r247280)
+++ head/sbin/hastd/secondary.c Mon Feb 25 20:09:07 2013        (r247281)
@@ -765,6 +765,7 @@ disk_thread(void *arg)
                                pjdlog_errno(LOG_WARNING,
                                    "Unable to store cleared activemap");
                                free(map);
+                               res->hr_stat_activemap_write_error++;
                                break;
                        }
                        free(map);
@@ -883,8 +884,23 @@ send_thread(void *arg)
                        PJDLOG_ABORT("Unexpected command (cmd=%hhu).",
                            hio->hio_cmd);
                }
-               if (hio->hio_error != 0)
+               if (hio->hio_error != 0) {
+                       switch (hio->hio_cmd) {
+                       case HIO_READ:
+                               res->hr_stat_read_error++;
+                               break;
+                       case HIO_WRITE:
+                               res->hr_stat_write_error++;
+                               break;
+                       case HIO_DELETE:
+                               res->hr_stat_delete_error++;
+                               break;
+                       case HIO_FLUSH:
+                               res->hr_stat_flush_error++;
+                               break;
+                       }
                        nv_add_int16(nvout, hio->hio_error, "error");
+               }
                if (hast_proto_send(res, res->hr_remoteout, nvout, data,
                    length) == -1) {
                        secondary_exit(EX_TEMPFAIL, "Unable to send reply");
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to