CVSROOT:        /cvs/cluster
Module name:    cluster
Branch:         RHEL5
Changes by:     [EMAIL PROTECTED]       2008-02-04 18:27:20

Modified files:
        cmirror/src    : cluster.c functions.c local.c logging.h 

Log message:
        - change priority of some log statements
        - fix potential OOB memory op by macro
        - add reference counting to log... fixes some issues with mirror 
conversion
        - plug a memory leak... likely related to bug 383291

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/cluster.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.11&r2=1.1.2.12
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/functions.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.10&r2=1.1.2.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/local.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.10&r2=1.1.2.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/logging.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.4&r2=1.1.2.5

--- cluster/cmirror/src/Attic/cluster.c 2008/01/23 21:21:06     1.1.2.11
+++ cluster/cmirror/src/Attic/cluster.c 2008/02/04 18:27:20     1.1.2.12
@@ -335,7 +335,7 @@
        }
 
        if (rv == SA_AIS_ERR_EXIST) {
-               LOG_ERROR("export_checkpoint: checkpoint already exists");
+               LOG_DBG("export_checkpoint: checkpoint already exists");
                EXIT();
                return -EEXIST;
        }
@@ -361,7 +361,7 @@
        }
 
        if (rv == SA_AIS_ERR_EXIST) {
-               LOG_ERROR("export_checkpoint: sync checkpoint section already 
exists");
+               LOG_DBG("export_checkpoint: sync checkpoint section already 
exists");
                EXIT();
                return -EEXIST;
        }
@@ -588,8 +588,8 @@
                }
 
                for (cp = entry->checkpoint_list; cp;) {
-                       LOG_ERROR("Checkpoint data available for node %u",
-                                 cp->requester);
+                       LOG_DBG("[%s] Checkpoint data available for node %u",
+                               SHORT_UUID(entry->name.value), cp->requester);
 
                        /*
                         * FIXME: Check return code.  Could send failure
@@ -747,24 +747,25 @@
 
        ENTER();
 
-       LOG_PRINT("****** CPG config callback ****************");
+       LOG_DBG("****** CPG config callback **[%s]**",
+               SHORT_UUID(gname->value));
 
-       LOG_PRINT("* JOINING (%d):", joined_list_entries);
+       LOG_DBG("* JOINING (%d):", joined_list_entries);
        for (i = 0; i < joined_list_entries; i++)
-               LOG_PRINT("*   nodeid: %d, pid: %d",
-                         joined_list[i].nodeid, joined_list[i].pid);
+               LOG_DBG("*   nodeid: %d, pid: %d",
+                       joined_list[i].nodeid, joined_list[i].pid);
 
-       LOG_PRINT("* MEMBERS (%d):", member_list_entries);
+       LOG_DBG("* MEMBERS (%d):", member_list_entries);
        for (i = 0; i < member_list_entries; i++)
-               LOG_PRINT("*   nodeid: %d, pid: %d",
-                         member_list[i].nodeid, member_list[i].pid);
+               LOG_DBG("*   nodeid: %d, pid: %d",
+                       member_list[i].nodeid, member_list[i].pid);
 
-       LOG_PRINT("* LEAVING (%d):", left_list_entries);
+       LOG_DBG("* LEAVING (%d):", left_list_entries);
        for (i = 0; i < left_list_entries; i++)
-               LOG_PRINT("*   nodeid: %d, pid: %d",
-                         left_list[i].nodeid, left_list[i].pid);       
+               LOG_DBG("*   nodeid: %d, pid: %d",
+                       left_list[i].nodeid, left_list[i].pid); 
 
-       LOG_PRINT("*****************************************");
+       LOG_DBG("*****************************************");
 
        list_for_each_entry_safe(match, tmp, &clog_cpg_list, list) {
                LOG_DBG("Given handle: %llu", (unsigned long long)handle);
--- cluster/cmirror/src/Attic/functions.c       2008/01/23 21:21:06     1.1.2.10
+++ cluster/cmirror/src/Attic/functions.c       2008/02/04 18:27:20     1.1.2.11
@@ -43,7 +43,9 @@
 
 struct log_c {
        struct list_head list;
+
        char uuid[DM_UUID_LEN];
+       uint32_t ref_count;
 
        int touched;
        uint32_t region_size;
@@ -350,6 +352,7 @@
        uint64_t region_count;
        uint32_t bitset_size;
        struct log_c *lc = NULL;
+       struct log_c *dup;
        enum sync sync = DEFAULTSYNC;
 
        int disk_log = 0;
@@ -422,9 +425,19 @@
        lc->sync = sync;
        lc->sync_search = 0;
        lc->recovering_region = (uint64_t)-1;
-       strncpy(lc->uuid, argv[1 + disk_log], DM_UUID_LEN);
        lc->disk_fd = -1;
        lc->log_dev_failed = 0;
+       lc->ref_count = 1;
+       strncpy(lc->uuid, argv[1 + disk_log], DM_UUID_LEN);
+
+       if ((dup = get_log(lc->uuid)) ||
+           (dup = get_pending_log(lc->uuid))) {
+               LOG_PRINT("[%s] Inc reference count on cluster log",
+                         SHORT_UUID(lc->uuid));
+               free(lc);
+               dup->ref_count++;
+               return 0;
+       }
 
        INIT_LIST_HEAD(&lc->mark_list);
 
@@ -561,7 +574,8 @@
        if (r)
                LOG_ERROR("Failed to create cluster log (%s)", tfr->uuid);
        else
-               LOG_PRINT("Cluster log created (%s)", tfr->uuid);
+               LOG_PRINT("[%s] Cluster log created",
+                         SHORT_UUID(tfr->uuid));
 
        return r;
 }
@@ -575,23 +589,31 @@
 {
        struct log_c *lc = get_log(tfr->uuid);
 
-       if (!lc) {
-               /* Is the log in the pending list? */
-               lc = get_pending_log(tfr->uuid);
-               if (!lc) {
-                       LOG_ERROR("clog_dtr called on log that is not official 
or pending");
-                       return -EINVAL;
-               }
-       } else {
-               LOG_DBG("[%s] clog_dtr: leaving CPG", SHORT_UUID(lc->uuid));
+       if (lc) {
                /*
-                * If postsuspend had done the destroy_cluster_cpg,
-                * the log context would be in the pending list
+                * The log should not be on the official list.  There
+                * should have been a suspend first.
                 */
-               destroy_cluster_cpg(tfr->uuid);
+               lc->ref_count--;
+               if (!lc->ref_count) {
+                       LOG_ERROR("[%s] DTR before SUS: leaving CPG",
+                                 SHORT_UUID(tfr->uuid));
+                       destroy_cluster_cpg(tfr->uuid);
+               }
+       } else if ((lc = get_pending_log(tfr->uuid))) {
+               lc->ref_count--;
+       } else {
+               LOG_ERROR("clog_dtr called on log that is not official or 
pending");
+               return -EINVAL;
+       }
+
+       if (lc->ref_count) {
+               LOG_PRINT("[%s] Dec reference count on cluster log",
+                         SHORT_UUID(lc->uuid));
+               return 0;
        }
 
-       LOG_PRINT("Cluster log removed (%s)", lc->uuid);
+       LOG_PRINT("[%s] Cluster log removed", SHORT_UUID(lc->uuid));
 
        list_del_init(&lc->list);
        if (lc->disk_fd != -1)
@@ -638,7 +660,7 @@
        if (!lc)
                return -EINVAL;
 
-       LOG_DBG("[%s] clog_postsuspend: leaving CPG", SHORT_UUID(lc->uuid));
+       LOG_PRINT("[%s] clog_postsuspend: leaving CPG", SHORT_UUID(lc->uuid));
        destroy_cluster_cpg(tfr->uuid);
 
        return 0;
@@ -656,7 +678,7 @@
        if (!lc)
                return -EINVAL;
 
-       LOG_DBG("[%s] clog_postsuspend: finalizing", SHORT_UUID(lc->uuid));
+       LOG_PRINT("[%s] clog_postsuspend: finalizing", SHORT_UUID(lc->uuid));
        lc->resume_override = 0;
 
        /* move log to pending list */
@@ -686,10 +708,12 @@
 
        switch (lc->resume_override) {
        case 1000:
-               LOG_ERROR("ERROR:: Additional resume issued before suspend");
+               LOG_ERROR("[%s] ERROR:: Additional resume issued before 
suspend",
+                         SHORT_UUID(tfr->uuid));
                return 0;
        case 0:
-               LOG_PRINT("Master resume: reading disk log");
+               LOG_PRINT("[%s] Master resume: reading disk log",
+                         SHORT_UUID(lc->uuid));
                lc->resume_override = 1000;
                break;
        case 1:
@@ -699,8 +723,8 @@
                LOG_ERROR("Error:: partial bit loading (just clean_bits)");
                return -EINVAL;
        case 3:
-               LOG_DBG("[%s] Non-master resume: bits pre-loaded",
-                       SHORT_UUID(lc->uuid));
+               LOG_PRINT("[%s] Non-master resume: bits pre-loaded",
+                         SHORT_UUID(lc->uuid));
                lc->resume_override = 1000;
                lc->sync_count = count_bits32(lc->clean_bits, 
lc->bitset_uint32_count);
                LOG_DBG("[%s] Initial sync_count = %llu",
@@ -1232,6 +1256,9 @@
        struct log_c *lc = get_log(tfr->uuid);
 
        if (!lc)
+               lc = get_pending_log(tfr->uuid);
+
+       if (!lc)
                return -EINVAL;
 
        if (lc->disk_fd == -1)
@@ -1287,6 +1314,9 @@
        struct log_c *lc = get_log(tfr->uuid);
 
        if (!lc)
+               lc = get_pending_log(tfr->uuid);
+
+       if (!lc)
                return -EINVAL;
 
        if (lc->disk_fd == -1)
--- cluster/cmirror/src/Attic/local.c   2008/01/25 16:24:47     1.1.2.10
+++ cluster/cmirror/src/Attic/local.c   2008/02/04 18:27:20     1.1.2.11
@@ -16,8 +16,6 @@
 #include "local.h"
 
 static int cn_fd;  /* Connector (netlink) socket fd */
-static int request_array[20];  /* for request counting (debugging) */
-static int cluster_array[20];  /* for request counting (debugging) */
 
 static int kernel_recv_helper(void *data, int in_size)
 {
@@ -145,7 +143,6 @@
  */
 static int do_local_work(void *data)
 {
-       static int request_count = 0;
        int r, i;
        struct clog_tfr *tfr = NULL;
 
@@ -154,9 +151,6 @@
        if (r)
                return r;
 
-       request_array[tfr->request_type]++;
-       request_count++;
-
        LOG_DBG("Request from kernel recieved [%s/%s/%llu]",
                RQ_TYPE(tfr->request_type), SHORT_UUID(tfr->uuid),
                (unsigned long long)tfr->seq);
@@ -208,13 +202,22 @@
                }
                /* ELSE, fall through to default */
        default:
-               cluster_array[tfr->request_type]++;
-               /* Add before send_to_cluster, so cluster code can find it */
-               queue_add_tail(tfr, cluster_queue);
                r = cluster_send(tfr);
-               if (r)
-                       LOG_ERROR("Unable to send request to cluster: %s",
-                                 strerror(-r));
+               if (r) {
+                       LOG_ERROR("[%s] Unable to send %s to cluster: %s",
+                                 SHORT_UUID(tfr->uuid),
+                                 RQ_TYPE(tfr->request_type), strerror(-r));
+                       tfr->error = r;
+                       kernel_send(tfr);
+               } else {
+                       /*
+                        * If this was multi-threaded, we would have to
+                        * add the 'tfr' to the queue before doing
+                        * the cluster_send
+                        */
+                       queue_add_tail(tfr, cluster_queue);
+               }
+
                break;
        }
 
@@ -223,14 +226,6 @@
                tfr->error = r;
        }
 
-       if (!(request_count % 10000)) {
-               LOG_PRINT("Total requests (%d):", request_count);
-               for (i = 0; i < 20; i++)
-                       LOG_PRINT("  %s: %d", RQ_TYPE(i), request_array[i]);
-               LOG_PRINT("Cluster-bound requests:");
-               for (i = 0; i < 20; i++)
-                       LOG_PRINT("  %s: %d", RQ_TYPE(i), cluster_array[i]);
-       }
        EXIT();
        return r;
 }
@@ -303,9 +298,6 @@
 
        ENTER();
 
-       memset(request_array, 0, sizeof(int)*20);
-       memset(cluster_array, 0, sizeof(int)*20);
-
        cn_fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
        if (cn_fd < 0) {
                EXIT();
--- cluster/cmirror/src/Attic/logging.h 2008/01/18 17:11:07     1.1.2.4
+++ cluster/cmirror/src/Attic/logging.h 2008/02/04 18:27:20     1.1.2.5
@@ -29,7 +29,7 @@
 #endif
 
 /* SHORT_UUID - print last 8 chars of a string */
-#define SHORT_UUID(x) ((x) + (strlen(x) - 8))
+#define SHORT_UUID(x) (strlen(x) > 8) ? ((x) + (strlen(x) - 8)) : (x)
 
 extern int log_tabbing;
 extern int log_is_open;

Reply via email to