CVSROOT:        /cvs/cluster
Module name:    cluster
Branch:         RHEL5
Changes by:     [EMAIL PROTECTED]       2007-12-04 20:24:43

Modified files:
        cman/qdisk     : disk.c disk.h disk_util.c main.c mkqdisk.c 
                         proc.c 

Log message:
        Make qdiskd work with sector sizes other than 512 bytes.  Import patch 
from Fabio M. Di Nitto to make qdiskd use (node_count - 1) for votes if there's 
none specified in cluster.conf

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4.4.1&r2=1.4.4.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4.2.3&r2=1.4.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk_util.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2.4.2&r2=1.2.4.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4.2.6&r2=1.4.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/mkqdisk.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.3.4.1&r2=1.3.4.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/proc.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2&r2=1.2.4.1

--- cluster/cman/qdisk/disk.c   2007/10/29 17:54:25     1.4.4.1
+++ cluster/cman/qdisk/disk.c   2007/12/04 20:24:43     1.4.4.2
@@ -43,8 +43,9 @@
 #include <platform.h>
 #include <unistd.h>
 #include <time.h>
+#include <linux/fs.h>
 
-static int diskRawRead(int fd, char *buf, int len);
+static int diskRawRead(target_info_t *disk, char *buf, int len);
 uint32_t clu_crc32(const char *data, size_t count);
 
 
@@ -211,49 +212,58 @@
  * Returns - (the file descriptor), a value >= 0 on success.
  */
 int
-qdisk_open(char *name)
+qdisk_open(char *name, target_info_t *disk)
 {
-       int fd;
-       int retval;
+       int ret;
+       unsigned long ssz;
 
        /*
         * Open for synchronous writes to insure all writes go directly
         * to disk.
         */
-       fd = open(name, O_RDWR | O_SYNC | O_DIRECT);
-       if (fd < 0) {
-               return fd;
-       }
+       disk->d_fd = open(name, O_RDWR | O_SYNC | O_DIRECT);
+       if (disk->d_fd < 0)
+               return disk->d_fd;
+
+       disk->d_blksz = 512;
+       ret = ioctl(disk->d_fd, BLKSSZGET, &ssz);
+       if (ret < 0)
+               perror("qdisk_open: ioctl(BLKSSZGET)");
+       else
+               /* Sorry, no sector sizes >4GB please */
+               disk->d_blksz = (uint32_t)ssz;
 
-       /* Check to verify that the partition is large enough.*/
-       retval = lseek(fd, END_OF_DISK, SEEK_SET);
+       disk->d_pagesz = sysconf(_SC_PAGESIZE);
 
-       if (retval < 0) {
+       /* Check to verify that the partition is large enough.*/
+       ret = lseek(disk->d_fd, END_OF_DISK(disk->d_blksz), SEEK_SET);
+       if (ret < 0) {
                perror("open_partition: seek");
                return -1;
        }
 
-       if (retval < END_OF_DISK) {
+       if (ret < END_OF_DISK(disk->d_blksz)) {
                fprintf(stderr, "Partition %s too small\n", name);
                errno = EINVAL;
                return -1;
        }
 
        /* Set close-on-exec bit */
-        retval = fcntl(fd, F_GETFD, 0);
-        if (retval < 0) {
-                close(fd);
+        ret = fcntl(disk->d_fd, F_GETFD, 0);
+        if (ret < 0) {
+               perror("open_partition: fcntl(F_GETFD)");
+                close(disk->d_fd);
                 return -1;
         }
 
-        retval |= FD_CLOEXEC;
-        if (fcntl(fd, F_SETFD, retval) < 0) {
-               perror("open_partition: fcntl");
-                close(fd);
+        ret |= FD_CLOEXEC;
+        if (fcntl(disk->d_fd, F_SETFD, ret) < 0) {
+               perror("open_partition: fcntl(F_SETFD)");
+                close(disk->d_fd);
                 return -1;
         }
 
-       return fd;
+       return 0;
 }
 
 
@@ -263,17 +273,17 @@
  * Returns - value from close syscall.
  */
 int
-qdisk_close(int *fd)
+qdisk_close(target_info_t *disk)
 {
        int retval;
 
-       if (!fd || *fd < 0) {
+       if (!disk || disk->d_fd < 0) {
                errno = EINVAL;
                return -1;
        }
 
-       retval = close(*fd);
-       *fd = -1;
+       retval = close(disk->d_fd);
+       disk->d_fd = -1;
 
        return retval;
 }
@@ -288,7 +298,7 @@
 qdisk_validate(char *name)
 {
        struct stat stat_st, *stat_ptr;
-       int fd;
+       target_info_t disk;
        stat_ptr = &stat_st;
 
        if (stat(name, stat_ptr) < 0) {
@@ -310,26 +320,25 @@
        /*
         * Verify read/write permission.
         */
-       fd = qdisk_open(name);
-       if (fd < 0) {
+       if (qdisk_open(name, &disk) < 0) {
                fprintf(stderr, "%s: open of %s for RDWR failed: %s\n",
                        __FUNCTION__, name, strerror(errno));
                return -1;
        }
-       qdisk_close(&fd);
+       qdisk_close(&disk);
        return 0;
 }
 
 
 static int
-diskRawReadShadow(int fd, off_t readOffset, char *buf, int len)
+diskRawReadShadow(target_info_t *disk, off_t readOffset, char *buf, int len)
 {
        int ret;
        shared_header_t *hdrp;
        char *data;
        int datalen;
 
-       ret = lseek(fd, readOffset, SEEK_SET);
+       ret = lseek(disk->d_fd, readOffset, SEEK_SET);
        if (ret != readOffset) {
 #if 0
                fprintf(stderr,
@@ -340,7 +349,7 @@
                return -1;
        }
 
-       ret = diskRawRead(fd, buf, len);
+       ret = diskRawRead(disk, buf, len);
        if (ret != len) {
 #if 0
                fprintf(stderr, "diskRawReadShadow: aligned read "
@@ -375,7 +384,7 @@
  * Here we check for alignment and do a bounceio if necessary.
  */
 static int
-diskRawRead(int fd, char *buf, int len)
+diskRawRead(target_info_t *disk, char *buf, int len)
 {
        char *alignedBuf;
        int readret;
@@ -383,21 +392,24 @@
        int readlen;
        int bounceNeeded = 1;
 
-       if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) &&
-           ((len % 512) == 0)) {
+       
+       /* was 3ff, which is (512<<1-1) */
+       if ((((unsigned long) buf &
+             (unsigned long) ((disk->d_blksz << 1) -1)) == 0) &&
+           ((len % (disk->d_blksz)) == 0)) {
                bounceNeeded = 0;
        }
 
        if (bounceNeeded == 0) {
                /* Already aligned and even multiple of 512, no bounceio
                 * required. */
-               return (read(fd, buf, len));
+               return (read(disk->d_fd, buf, len));
        }
 
-       if (len > 512) {
+       if (len > disk->d_blksz) {
                fprintf(stderr,
                        "diskRawRead: not setup for reads larger than %d.\n",
-                      512);
+                      (int)disk->d_blksz);
                return (-1);
        }
        /*
@@ -406,8 +418,8 @@
         * XXX - if the on-disk offsets don't provide enough room we're cooked!
         */
        extraLength = 0;
-       if (len % 512) {
-               extraLength = 512 - (len % 512);
+       if (len % disk->d_blksz) {
+               extraLength = disk->d_blksz - (len % disk->d_blksz);
        }
 
        readlen = len;
@@ -415,18 +427,18 @@
                readlen += extraLength;
        }
 
-       readret = posix_memalign((void **)&alignedBuf, 512, 512);
+       readret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, 
disk->d_blksz);
        if (readret < 0) {
                return -1;
        }
 
-       readret = read(fd, alignedBuf, readlen);
+       readret = read(disk->d_fd, alignedBuf, readlen);
        if (readret > 0) {
                if (readret > len) {
-                       bcopy(alignedBuf, buf, len);
+                       memcpy(alignedBuf, buf, len);
                        readret = len;
                } else {
-                       bcopy(alignedBuf, buf, readret);
+                       memcpy(alignedBuf, buf, readret);
                }
        }
 
@@ -445,7 +457,7 @@
  * Here we check for alignment and do a bounceio if necessary.
  */
 static int
-diskRawWrite(int fd, char *buf, int len)
+diskRawWrite(target_info_t *disk, char *buf, int len)
 {
        char *alignedBuf;
        int ret;
@@ -453,31 +465,33 @@
        int writelen;
        int bounceNeeded = 1;
 
-       if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) &&
-           ((len % 512) == 0)) {
+       /* was 3ff, which is (512<<1-1) */
+       if ((((unsigned long) buf &
+             (unsigned long) ((disk->d_blksz << 1) -1)) == 0) &&
+           ((len % (disk->d_blksz)) == 0)) {
                bounceNeeded = 0;
        }
+
        if (bounceNeeded == 0) {
                /* Already aligned and even multiple of 512, no bounceio
                 * required. */
-               return (write(fd, buf, len));
+               return (write(disk->d_fd, buf, len));
        }
 
-       if (len > 512) {
+       if (len > disk->d_blksz) {
                fprintf(stderr,
-                      "diskRawWrite: not setup for larger than %d.\n",
-                      512);
+                       "diskRawRead: not setup for reads larger than %d.\n",
+                      (int)disk->d_blksz);
                return (-1);
        }
-
        /*
         * All IOs must be of size which is a multiple of 512.  Here we
         * just add in enough extra to accommodate.
         * XXX - if the on-disk offsets don't provide enough room we're cooked!
         */
        extraLength = 0;
-       if (len % 512) {
-               extraLength = 512 - (len % 512);
+       if (len % disk->d_blksz) {
+               extraLength = disk->d_blksz - (len % disk->d_blksz);
        }
 
        writelen = len;
@@ -485,13 +499,20 @@
                writelen += extraLength;
        }
 
-       ret = posix_memalign((void **)&alignedBuf, 512,512);
+       ret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, 
disk->d_blksz);
        if (ret < 0) {
+               return -1;
+       }
+
+       if (len > disk->d_blksz) {
+               fprintf(stderr,
+                      "diskRawWrite: not setup for larger than %d.\n",
+                      (int)disk->d_blksz);
                return (-1);
        }
 
-       bcopy(buf, alignedBuf, len);
-       ret = write(fd, alignedBuf, writelen);
+       memcpy(buf, alignedBuf, len);
+       ret = write(disk->d_fd, alignedBuf, writelen);
        if (ret > len) {
                ret = len;
        }
@@ -507,7 +528,7 @@
 
 
 static int
-diskRawWriteShadow(int fd, __off64_t writeOffset, char *buf, int len)
+diskRawWriteShadow(target_info_t *disk, __off64_t writeOffset, char *buf, int 
len)
 {
        off_t retval_seek;
        ssize_t retval_write;
@@ -519,7 +540,7 @@
                return (-1);
        }
 
-       retval_seek = lseek(fd, writeOffset, SEEK_SET);
+       retval_seek = lseek(disk->d_fd, writeOffset, SEEK_SET);
        if (retval_seek != writeOffset) {
                fprintf(stderr,
                       "diskRawWriteShadow: can't seek to offset %d\n",
@@ -527,7 +548,7 @@
                return (-1);
        }
 
-       retval_write = diskRawWrite(fd, buf, len);
+       retval_write = diskRawWrite(disk, buf, len);
        if (retval_write != len) {
                if (retval_write == -1) {
                        fprintf(stderr, "%s: %s\n", __FUNCTION__,
@@ -544,7 +565,7 @@
 
 
 int
-qdisk_read(int fd, __off64_t offset, void *buf, int count)
+qdisk_read(target_info_t *disk, __off64_t offset, void *buf, int count)
 {
        shared_header_t *hdrp;
        char *data;
@@ -556,15 +577,15 @@
         * Raw blocks are 512 byte aligned.
         */
        total = count + sizeof(shared_header_t);
-       if (total < 512)
-               total = 512;
+       if (total < disk->d_blksz)
+               total = disk->d_blksz;
 
        /* Round it up */
-       if (total % 512) 
-               total = total + (512 * !!(total % 512)) - (total % 512);
+       if (total % disk->d_blksz) 
+               total = total + (disk->d_blksz * !!(total % disk->d_blksz)) - 
(total % disk->d_blksz);
 
        hdrp = NULL;
-       rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total);
+       rv = posix_memalign((void **)&hdrp, disk->d_pagesz, disk->d_blksz);
        if (rv < 0)
                return -1;
 
@@ -573,7 +594,7 @@
 
        data = (char *)hdrp + sizeof(shared_header_t);
 
-       rv = diskRawReadShadow(fd, offset, (char *)hdrp, total);
+       rv = diskRawReadShadow(disk, offset, (char *)hdrp, disk->d_blksz);
        
        if (rv == -1) {
                return -1;
@@ -594,12 +615,12 @@
 
 
 int
-qdisk_write(int fd, __off64_t offset, const void *buf, int count)
+qdisk_write(target_info_t *disk, __off64_t offset, const void *buf, int count)
 {
        size_t maxsize;
        shared_header_t *hdrp;
        char *data;
-       size_t total = 0, rv = -1, psz = 512; //sysconf(_SC_PAGESIZE);
+       size_t total = 0, rv = -1, psz = disk->d_blksz; //sysconf(_SC_PAGESIZE);
 
        maxsize = psz - (sizeof(shared_header_t));
        if (count >= (maxsize + sizeof(shared_header_t))) {
@@ -611,7 +632,6 @@
 
        /*
         * Calculate the total length of the buffer, including the header.
-        * Raw blocks are 512 byte aligned.
         */
        total = count + sizeof(shared_header_t);
        if (total < psz)
@@ -622,7 +642,7 @@
                total = total + (psz * !!(total % psz)) - (total % psz);
 
        hdrp = NULL;
-       rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total);
+       rv = posix_memalign((void **)&hdrp, disk->d_pagesz, total);
        if (rv < 0) {
                perror("posix_memalign");
                return -1;
@@ -645,7 +665,7 @@
         * about locking here.
         */
        if (total == psz)
-               rv = diskRawWriteShadow(fd, offset, (char *)hdrp, psz);
+               rv = diskRawWriteShadow(disk, offset, (char *)hdrp, psz);
 
        if (rv == -1)
                perror("diskRawWriteShadow");
@@ -658,11 +678,11 @@
 
 
 static int
-header_init(int fd, char *label)
+header_init(target_info_t *disk, char *label)
 {
        quorum_header_t qh;
 
-       if (qdisk_read(fd, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) {
+       if (qdisk_read(disk, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) {
                swab_quorum_header_t(&qh);
                if (qh.qh_magic == HEADER_MAGIC_OLD) {
                        printf("Warning: Red Hat Cluster Manager 1.2.x "
@@ -681,14 +701,18 @@
        /* Copy in the cluster/label name */
        snprintf(qh.qh_cluster, sizeof(qh.qh_cluster)-1, "%s", label);
 
+       qh.qh_version = VERSION_MAGIC_V2;
        if ((qh.qh_timestamp = (uint64_t)time(NULL)) <= 0) {
                perror("time");
                return -1;
        }
 
        qh.qh_magic = HEADER_MAGIC_NUMBER;
+       qh.qh_blksz = disk->d_blksz;
+       qh.qh_pad = 0;
+
        swab_quorum_header_t(&qh);
-       if (qdisk_write(fd, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) {
+       if (qdisk_write(disk, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) {
                return -1;
        }
 
@@ -699,24 +723,24 @@
 int
 qdisk_init(char *partname, char *label)
 {
-       int fd;
+       target_info_t disk;
        status_block_t ps, wps;
-       int nid;
+       int nid, ret;
        time_t t;
 
-       fd = qdisk_validate(partname);
-       if (fd < 0) {
+       ret = qdisk_validate(partname);
+       if (ret < 0) {
                perror("qdisk_verify");
                return -1;
        }
 
-       fd = qdisk_open(partname);
-       if (fd < 0) {
+       ret = qdisk_open(partname, &disk);
+       if (ret < 0) {
                perror("qdisk_open");
                return -1;
        }
 
-       if (header_init(fd, label) < 0) {
+       if (header_init(&disk, label) < 0) {
                return -1;
        }
 
@@ -744,14 +768,14 @@
                wps = ps;
                swab_status_block_t(&wps);
 
-               if (qdisk_write(fd, qdisk_nodeid_offset(nid), &wps, 
sizeof(wps)) < 0) {
+               if (qdisk_write(&disk, qdisk_nodeid_offset(nid, disk.d_blksz), 
&wps, sizeof(wps)) < 0) {
                        printf("Error writing node ID block %d\n", nid);
-                       qdisk_close(&fd);
+                       qdisk_close(&disk);
                        return -1;
                }
        }
 
-       qdisk_close(&fd);
+       qdisk_close(&disk);
 
        return 0;
 }
--- cluster/cman/qdisk/disk.h   2007/02/21 20:22:53     1.4.2.3
+++ cluster/cman/qdisk/disk.h   2007/12/04 20:24:43     1.4.2.4
@@ -72,7 +72,8 @@
        RF_DEBUG = 0x4,
        RF_PARANOID = 0x8,
        RF_ALLOW_KILL = 0x10,
-       RF_UPTIME = 0x20
+       RF_UPTIME = 0x20,
+       RF_CMAN_LABEL = 0x40
 } run_flag_t;
 
 
@@ -86,6 +87,9 @@
 #define STATE_MAGIC_NUMBER     0x47bacef8      /* Status block */
 #define SHARED_HEADER_MAGIC    0x00DEBB1E      /* Per-block headeer */
 
+/* Version magic. */
+#define VERSION_MAGIC_V2       0x389fabc4
+
 
 typedef struct __attribute__ ((packed)) {
        uint32_t        ps_magic;
@@ -152,16 +156,21 @@
  */
 typedef struct __attribute__ ((packed)) {
        uint32_t        qh_magic;
-       uint32_t        qh_align;          // 64-bit-ism: alignment fixer.
+       uint32_t        qh_version;        // 
        uint64_t        qh_timestamp;      // time of last update
        char            qh_updatehost[128];// Hostname who put this here...
-       char            qh_cluster[128];   // Cluster name
+       char            qh_cluster[120];   // Cluster name; CMAN only 
+                                          // supports 16 chars.
+       uint32_t        qh_blksz;          // Known block size @ creation
+       uint32_t        qh_pad;
 } quorum_header_t;
 
 #define swab_quorum_header_t(ptr) \
 {\
        swab32((ptr)->qh_magic); \
-       swab32((ptr)->qh_align); \
+       swab32((ptr)->qh_version); \
+       swab32((ptr)->qh_blksz); \
+       swab32((ptr)->qh_pad); \
        swab64((ptr)->qh_timestamp); \
 }
 
@@ -196,31 +205,35 @@
 
 /* Offsets from RHCM 1.2.x */
 #define OFFSET_HEADER  0
-#define HEADER_SIZE    4096            /* Page size for now */
+#define HEADER_SIZE(ssz)               (ssz<4096?4096:ssz)
 
-#define OFFSET_FIRST_STATUS_BLOCK      (OFFSET_HEADER + HEADER_SIZE)
-#define SPACE_PER_STATUS_BLOCK         4096 /* Page size for now */
+#define OFFSET_FIRST_STATUS_BLOCK(ssz) (OFFSET_HEADER + HEADER_SIZE(ssz))
+#define SPACE_PER_STATUS_BLOCK(ssz)    (ssz<4096?4096:ssz)
 #define STATUS_BLOCK_COUNT             MAX_NODES_DISK
 
-#define SPACE_PER_MESSAGE_BLOCK                (4096)
-#define        MESSAGE_BLOCK_COUNT             MAX_NODES_DISK
-
-#define END_OF_DISK                    (OFFSET_FIRST_STATUS_BLOCK + \
+#define END_OF_DISK(ssz)               (OFFSET_FIRST_STATUS_BLOCK(ssz) + \
                                         (MAX_NODES_DISK + 1) * \
-                                        SPACE_PER_STATUS_BLOCK) \
+                                        SPACE_PER_STATUS_BLOCK(ssz)) \
 
 
+typedef struct {
+       int d_fd;
+       int _pad_;
+       size_t d_blksz;
+       size_t d_pagesz;
+} target_info_t;
+
 
 /* From disk.c */
-int qdisk_open(char *name);
-int qdisk_close(int *fd);
+int qdisk_open(char *name, target_info_t *disk);
+int qdisk_close(target_info_t *disk);
 int qdisk_init(char *name, char *clustername);
 int qdisk_validate(char *name);
-int qdisk_read(int fd, __off64_t ofs, void *buf, int len);
-int qdisk_write(int fd, __off64_t ofs, const void *buf, int len);
+int qdisk_read(target_info_t *disk, __off64_t ofs, void *buf, int len);
+int qdisk_write(target_info_t *disk, __off64_t ofs, const void *buf, int len);
 
-#define qdisk_nodeid_offset(nodeid) \
-       (OFFSET_FIRST_STATUS_BLOCK + (SPACE_PER_STATUS_BLOCK * (nodeid - 1)))
+#define qdisk_nodeid_offset(nodeid, ssz) \
+       (OFFSET_FIRST_STATUS_BLOCK(ssz) + (SPACE_PER_STATUS_BLOCK(ssz) * 
(nodeid - 1)))
 
 /* From disk_utils.c */
 #define HISTORY_LENGTH 60
@@ -231,11 +244,12 @@
        uint16_t pad0;
 } disk_msg_t;
 
+
 typedef struct {
        uint64_t qc_incarnation;
        struct timeval qc_average;
        struct timeval qc_last[HISTORY_LENGTH];
-       int qc_fd;
+       target_info_t qc_disk;
        int qc_my_id;
        int qc_writes;
        int qc_interval;
@@ -250,12 +264,14 @@
        disk_node_state_t qc_disk_status;
        disk_node_state_t qc_status;
        int qc_master;          /* Master?! */
-       int _pad_;
+       int qc_status_sock;
        run_flag_t qc_flags;
        cman_handle_t qc_ch;
        char *qc_device;
        char *qc_label;
        char *qc_status_file;
+       char *qc_cman_label;
+       char *qc_status_sockname;
 } qd_ctx;
 
 typedef struct {
@@ -272,14 +288,15 @@
 
 int qd_write_status(qd_ctx *ctx, int nid, disk_node_state_t state,
                    disk_msg_t *msg, memb_mask_t mask, memb_mask_t master);
-int qd_read_print_status(int fd, int nid);
+int qd_read_print_status(target_info_t *disk, int nid);
 int qd_init(qd_ctx *ctx, cman_handle_t ch, int me);
 void qd_destroy(qd_ctx *ctx);
 
 /* proc.c */
 int find_partitions(const char *partfile, const char *label,
                    char *devname, size_t devlen, int print);
-int check_device(char *device, char *label, quorum_header_t *qh);
+int check_device(char *device, char *label, int *ssz, quorum_header_t *qh,
+                int flags);
 
 
 #endif
--- cluster/cman/qdisk/disk_util.c      2007/01/26 14:34:55     1.2.4.2
+++ cluster/cman/qdisk/disk_util.c      2007/12/04 20:24:43     1.2.4.3
@@ -201,8 +201,9 @@
        if (get_time(&start, ctx->qc_flags&RF_UPTIME) < 0)
                utime_ok = 0;
        swab_status_block_t(&ps);
-       if (qdisk_write(ctx->qc_fd, qdisk_nodeid_offset(nid), &ps,
-                       sizeof(ps)) < 0) {
+       if (qdisk_write(&ctx->qc_disk,
+                       qdisk_nodeid_offset(nid, ctx->qc_disk.d_blksz),
+                       &ps, sizeof(ps)) < 0) {
                printf("Error writing node ID block %d\n", nid);
                return -1;
        }
@@ -223,12 +224,12 @@
 
 
 int
-qd_print_status(status_block_t *ps)
+qd_print_status(target_info_t *disk, status_block_t *ps)
 {
        int x;
 
        printf("Data @ offset %d:\n",
-              (int)qdisk_nodeid_offset(ps->ps_nodeid));
+              (int)qdisk_nodeid_offset(ps->ps_nodeid, disk->d_blksz));
        printf("status_block_t {\n");
        printf("\t.ps_magic = %08x;\n", (int)ps->ps_magic);
        printf("\t.ps_nodeid = %d;\n", (int)ps->ps_nodeid);
@@ -261,11 +262,11 @@
 
 
 int
-qd_read_print_status(int fd, int nid)
+qd_read_print_status(target_info_t *disk, int nid)
 {
        status_block_t ps;
 
-       if (fd < 0) {
+       if (!disk || disk->d_fd < 0) {
                errno = EINVAL;
                return -1;
        }
@@ -275,13 +276,13 @@
                return -1;
        }
 
-       if (qdisk_read(fd, qdisk_nodeid_offset(nid), &ps,
+       if (qdisk_read(disk, qdisk_nodeid_offset(nid, disk->d_blksz), &ps,
                        sizeof(ps)) < 0) {
                printf("Error reading node ID block %d\n", nid);
                return -1;
        }
        swab_status_block_t(&ps);
-       qd_print_status(&ps);
+       qd_print_status(disk, &ps);
 
        return 0;
 }
@@ -322,6 +323,7 @@
        ctx->qc_incarnation = generate_token();
        ctx->qc_ch = ch;
        ctx->qc_my_id = me;
+       ctx->qc_status_sock = -1;
 
        return 0;
 }
@@ -339,6 +341,5 @@
                free(ctx->qc_device);
                ctx->qc_device = NULL;
        }
-       close(ctx->qc_fd);
-       ctx->qc_fd = -1;
+       qdisk_close(&ctx->qc_disk);
 }
--- cluster/cman/qdisk/main.c   2007/03/20 19:37:04     1.4.2.6
+++ cluster/cman/qdisk/main.c   2007/12/04 20:24:43     1.4.2.7
@@ -36,6 +36,7 @@
 #include <time.h>
 #include <sys/reboot.h>
 #include <sys/time.h>
+#include <sys/un.h>
 #include <linux/reboot.h>
 #include <sched.h>
 #include <signal.h>
@@ -147,7 +148,8 @@
 
                sb = &ni[x].ni_status;
 
-               if (qdisk_read(ctx->qc_fd, qdisk_nodeid_offset(x+1),
+               if (qdisk_read(&ctx->qc_disk,
+                              qdisk_nodeid_offset(x+1, ctx->qc_disk.d_blksz),
                               sb, sizeof(*sb)) < 0) {
                        clulog(LOG_WARNING,"Error reading node ID block %d\n",
                               x+1);
@@ -452,6 +454,10 @@
 quorum_init(qd_ctx *ctx, node_info_t *ni, int max, struct h_data *h, int maxh)
 {
        int x = 0, score, maxscore, score_req;
+       char buf[64];
+#if 0
+       struct sockaddr_un sun;
+#endif
 
        clulog(LOG_INFO, "Quorum Daemon Initializing\n");
        
@@ -462,12 +468,28 @@
        if (qdisk_validate(ctx->qc_device) < 0)
                return -1;
 
-       ctx->qc_fd = qdisk_open(ctx->qc_device);
-       if (ctx->qc_fd < 0) {
+       if (qdisk_open(ctx->qc_device, &ctx->qc_disk) < 0) {
                clulog(LOG_CRIT, "Failed to open %s: %s\n", ctx->qc_device,
                       strerror(errno));
                return -1;
        }
+
+       if (strlen(ctx->qc_device) > 15 && !(ctx->qc_flags & RF_CMAN_LABEL)) {
+               if (ctx->qc_label && strlen(ctx->qc_label) <= 15) {
+                       ctx->qc_cman_label = strdup(ctx->qc_label);
+               } else {
+                       snprintf(buf, sizeof(buf), "QDisk[%d]",
+                               (int)strlen(ctx->qc_device));
+                       ctx->qc_cman_label = strdup(buf);
+               }
+
+               ctx->qc_flags |= RF_CMAN_LABEL;
+               clulog(LOG_DEBUG, "Device too long! Setting CMAN label to: 
%s\n",
+                       ctx->qc_cman_label);
+       }
+
+       clulog(LOG_DEBUG, "I/O Size: %d  Page Size: %d\n",
+              ctx->qc_disk.d_blksz, ctx->qc_disk.d_pagesz);
        
        if (h && maxh) {
                start_score_thread(ctx, h, maxh);
@@ -484,6 +506,42 @@
                return -1;
        }
 
+#if 0
+       if (ctx->qc_status_sockname) {
+               ctx->qc_status_sock = socket(PF_LOCAL, SOCK_STREAM, 0);
+
+               if (ctx->qc_status_sockname < 0) {
+                       clulog(LOG_ERR,
+                              "Could not create local socket %s: %s\n",
+                              qc->qc_status_sockname, strerror(errno));
+                       free(qc->qc_status_sockname);
+                       qc->qc_status_sockname = NULL;
+               } else {
+                       sun.sun_family = PF_LOCAL;
+                       snprintf(sun.sun_path, sizeof(sun.sun_path),
+                                qc->qc_status_sockname);
+                       unlink(qc->qc_status_sockname);
+                       if (bind(ctx->qc_status_sock,
+                                (struct sockaddr *)&sun, sizeof(sun)) < 0) {
+                               clulog(LOG_ERR, "Could not bind to local "
+                                      "socket %s: %s\n",
+                                      qc->qc_status_sockname,
+                                      strerror(errno));
+                               free(qc->qc_status_sockname);
+                               qc->qc_status_sockname = NULL;
+                               close(qc->qc_status_sock);
+                               qc->qc_status_sock = -1;
+                       }
+               }
+       } else {
+               qc->qc_status_sock = -1;
+       }
+
+       if (qc->qc_status_sock >= 0) {
+               listen(qc->qc_status_sock, 5);
+       }
+#endif
+
        while (++x <= ctx->qc_tko && _running) {
                read_node_blocks(ctx, ni, max);
                check_transitions(ctx, ni, max, NULL);
@@ -622,23 +680,7 @@
 
 
 char *
-state_str(disk_node_state_t s)
-{
-       switch (s) {
-       case S_NONE:
-               return "None";
-       case S_EVICT:
-               return "Evicted";
-       case S_INIT:
-               return "Initializing";
-       case S_RUN:
-               return "Running";
-       case S_MASTER:
-               return "Master";
-       default:
-               return "ILLEGAL";
-       }
-}
+state_str(disk_node_state_t s);
 
 
 void
@@ -1237,6 +1279,12 @@
                ctx->qc_status_file = val;
        }
 
+       /* Get status socket */
+       snprintf(query, sizeof(query), "/cluster/quorumd/@status_sock");
+       if (ccs_get(ccsfd, query, &val) == 0) {
+               ctx->qc_status_sockname = val;
+       }
+
        /* Get min score */
        snprintf(query, sizeof(query), "/cluster/quorumd/@min_score");
        if (ccs_get(ccsfd, query, &val) == 0) {
@@ -1285,6 +1333,15 @@
                        ctx->qc_flags &= ~RF_REBOOT;
                free(val);
        }
+
+       /* Get cman_label */
+       snprintf(query, sizeof(query), "/cluster/quorumd/@cman_label");
+       if (ccs_get(ccsfd, query, &val) == 0) {
+               if (strlen(val) > 0 && strlen(val) <= 15) {
+                       ctx->qc_flags |= RF_CMAN_LABEL;
+                       ctx->qc_cman_label = val;
+               }
+       }
        
        /*
         * Get flag to see if we're supposed to kill cman if qdisk is not 
@@ -1384,21 +1441,25 @@
 main(int argc, char **argv)
 {
        cman_node_t me;
-       int cfh, rv, forked = 0, nfd = -1;
+       int cfh, rv, forked = 0, nfd = -1, ret = -1;
+#if 0
+       int status_run = 0;
+#endif
        qd_ctx ctx;
-       cman_handle_t ch;
+       cman_handle_t ch = NULL;
        node_info_t ni[MAX_NODES_DISK];
        struct h_data h[10];
        char debug = 0, foreground = 0;
        char device[128];
        pid_t pid;
+       quorum_header_t qh;
 
        if (check_process_running(argv[0], &pid) && pid !=getpid()) {
                printf("QDisk services already running\n");
                return 0;
        }
        
-       while ((rv = getopt(argc, argv, "fdQ")) != EOF) {
+       while ((rv = getopt(argc, argv, "fdQs")) != EOF) {
                switch (rv) {
                case 'd':
                        debug = 1;
@@ -1418,11 +1479,15 @@
                        dup2(nfd, 2);
                        close(nfd);
                        break;
+#if 0
+               case 's':
+                       status_run = 1;
+#endif
                default:
                        break;
                }
        }
-       
+
 #if (defined(LIBCMAN_VERSION) && LIBCMAN_VERSION >= 2)
        ch = cman_admin_init(NULL);
 #else
@@ -1431,7 +1496,7 @@
        if (!ch) {
                if (!foreground && !forked) {
                        if (daemon_init(argv[0]) < 0)
-                               return -1;
+                               goto out;
                        else
                                forked = 1;
                }
@@ -1452,7 +1517,7 @@
        while (cman_get_node(ch, CMAN_NODEID_US, &me) < 0) {
                if (!foreground && !forked) {
                        if (daemon_init(argv[0]) < 0)
-                               return -1;
+                               goto out;
                        else
                                forked = 1;
                }
@@ -1472,7 +1537,7 @@
        if (get_config_data(NULL, &ctx, h, 10, &cfh, debug) < 0) {
                clulog_and_print(LOG_CRIT, "Configuration failed\n");
                check_stop_cman(&ctx);
-               return -1;
+               goto out;
        }
        
        if (ctx.qc_label) {
@@ -1483,7 +1548,7 @@
                                         " '%s' to any device\n",
                                         ctx.qc_label);
                        check_stop_cman(&ctx);
-                       return -1;
+                       goto out;
                }
 
                if (ctx.qc_device)
@@ -1494,18 +1559,29 @@
                clulog(LOG_INFO, "Quorum Partition: %s Label: %s\n",
                       ctx.qc_device, ctx.qc_label);
        } else if (ctx.qc_device) {
-               if (check_device(ctx.qc_device, NULL, NULL) != 0) {
+               if (check_device(ctx.qc_device, NULL, &rv, &qh, 0) != 0) {
                        clulog(LOG_CRIT,
                               "Specified partition %s does not have a "
                               "qdisk label\n", ctx.qc_device);
                        check_stop_cman(&ctx);
-                       return -1;
+                       goto out;
+               }
+
+               if (qh.qh_version == VERSION_MAGIC_V2 &&
+                    qh.qh_blksz != rv) {
+                       clulog(LOG_CRIT,
+                              "Specified device %s does match kernel's "
+                              "reported sector size (%d != %d)\n",
+                              ctx.qc_device,
+                              ctx.qc_disk.d_blksz, rv);
+                       check_stop_cman(&ctx);
+                       goto out;
                }
        }
 
        if (!foreground && !forked) {
                 if (daemon_init(argv[0]) < 0)
-                       return -1;
+                       goto out;
        }
        
        set_priority(ctx.qc_sched, ctx.qc_sched_prio);
@@ -1513,13 +1589,19 @@
        if (quorum_init(&ctx, ni, MAX_NODES_DISK, h, cfh) < 0) {
                clulog_and_print(LOG_CRIT, "Initialization failed\n");
                check_stop_cman(&ctx);
-               return -1;
+               goto out;
        }
 
+       ret = 0;
+
        if (!_running)
-               return 0;
+               goto out;
        
-       cman_register_quorum_device(ctx.qc_ch, ctx.qc_device, ctx.qc_votes);
+       cman_register_quorum_device(ctx.qc_ch,
+                                   (ctx.qc_flags&RF_CMAN_LABEL)? 
+                                       ctx.qc_cman_label:
+                                        ctx.qc_device,
+                                   ctx.qc_votes);
        /*
                XXX this always returns -1 / EBUSY even when it works?!!!
                
@@ -1529,16 +1611,18 @@
                                 "Could not register %s with CMAN; "
                                 "return = %d; error = %s\n",
                                 ctx.qc_device, rv, strerror(errno));
-               return -1;
+               goto out;
        }
        */
-
        if (quorum_loop(&ctx, ni, MAX_NODES_DISK) == 0)
                cman_unregister_quorum_device(ctx.qc_ch);
 
        quorum_logout(&ctx);
+       /* free cman handle to avoid leak in cman */
+out:
+       cman_finish(ctx.qc_ch);
        qd_destroy(&ctx);
 
-       return 0;
+       return ret;
 }
 
--- cluster/cman/qdisk/mkqdisk.c        2006/11/21 14:50:30     1.3.4.1
+++ cluster/cman/qdisk/mkqdisk.c        2007/12/04 20:24:43     1.3.4.2
@@ -37,23 +37,26 @@
 {
        char device[128];
        char *newdev = NULL, *newlabel = NULL;
-       int rv;
+       int rv, debug_level = 1;
 
-       printf("mkqdisk v0.5.1\n");
+       printf("mkqdisk v0.5.2\n");
 
-       while ((rv = getopt(argc, argv, "Lf:c:l:h")) != EOF) {
+       while ((rv = getopt(argc, argv, "Ldf:c:l:h")) != EOF) {
                switch (rv) {
+               case 'd':
+                       ++debug_level;
+                       break;
                case 'L':
                        /* List */
                        close(2);
                        return find_partitions("/proc/partitions",
-                                              NULL, NULL, 0, 1);
+                                              NULL, NULL, 0, debug_level);
                        break;
                case 'f':
                        close(2);
                        return find_partitions("/proc/partitions",
                                               optarg, device,
-                                              sizeof(device), 1);
+                                              sizeof(device), debug_level);
                case 'c':
                        newdev = optarg;
                        break;
--- cluster/cman/qdisk/proc.c   2006/06/23 16:05:33     1.2
+++ cluster/cman/qdisk/proc.c   2007/12/04 20:24:43     1.2.4.1
@@ -32,27 +32,33 @@
 
 
 int
-check_device(char *device, char *label, quorum_header_t *qh)
+check_device(char *device, char *label, int *ssz, quorum_header_t *qh,
+            int flags)
 {
-       int fd = -1, ret = -1;
+       int ret = -1;
        quorum_header_t qh_local;
+       target_info_t disk;
 
        if (!qh)
                qh = &qh_local;
 
-       fd = qdisk_validate(device);
-       if (fd < 0) {
+       ret = qdisk_validate(device);
+       if (ret < 0) {
                perror("qdisk_verify");
                return -1;
        }
 
-       fd = qdisk_open(device);
-       if (fd < 0) {
+       ret = qdisk_open(device, &disk);
+       if (ret < 0) {
                perror("qdisk_open");
                return -1;
        }
 
-       if (qdisk_read(fd, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) {
+       if (ssz) 
+               *ssz = disk.d_blksz;
+
+       ret = -1;
+       if (qdisk_read(&disk, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) {
                swab_quorum_header_t(qh);
                 if (qh->qh_magic == HEADER_MAGIC_NUMBER) {
                        if (!label || !strcmp(qh->qh_cluster, label)) {
@@ -61,12 +67,91 @@
                 }
         }
 
-       qdisk_close(&fd);
+       /* only flag now is 'strict device check'; i.e.,
+         "block size recorded must match kernel's reported size" */
+       if (flags && qh->qh_version == VERSION_MAGIC_V2 &&
+            disk.d_blksz != qh->qh_blksz) {
+               ret = -1;
+       }
+
+       qdisk_close(&disk);
 
        return ret;
 }
 
 
+char *
+state_str(disk_node_state_t s)
+{
+       switch (s) {
+       case S_NONE:
+               return "None";
+       case S_EVICT:
+               return "Evicted";
+       case S_INIT:
+               return "Initializing";
+       case S_RUN:
+               return "Running";
+       case S_MASTER:
+               return "Master";
+       default:
+               return "ILLEGAL";
+       }
+}
+
+
+void
+print_status_block(status_block_t *sb)
+{
+       if (sb->ps_state == S_NONE)
+               return;
+       printf("Status block for node %d\n", sb->ps_nodeid);
+       printf("\tLast updated by node %d\n", sb->ps_updatenode);
+       printf("\tLast updated on %s", ctime((time_t *)&sb->ps_timestamp));
+       printf("\tState: %s\n", state_str(sb->ps_state));
+       printf("\tFlags: %04x\n", sb->ps_flags);
+       printf("\tScore: %d/%d\n", sb->ps_score, sb->ps_scoremax);
+       printf("\tAverage Cycle speed: %d.%06d seconds\n", 
+               sb->ps_ca_sec, sb->ps_ca_usec);
+       printf("\tLast Cycle speed: %d.%06d seconds\n", 
+               sb->ps_lc_sec, sb->ps_lc_usec);
+       printf("\tIncarnation: %08x%08x\n",
+               (int)(sb->ps_incarnation>>32&0xffffffff),
+               (int)(sb->ps_incarnation&0xffffffff));
+
+}
+
+
+void
+read_info(char *dev)
+{
+       target_info_t ti;
+       int x;
+       status_block_t sb;
+
+       if (qdisk_open(dev, &ti) < 0) {
+               printf("Could not read from %s: %s\n",
+                      dev, strerror(errno));
+               return;
+       }
+
+       for (x = 0; x < MAX_NODES_DISK; x++) {
+
+               if (qdisk_read(&ti,
+                              qdisk_nodeid_offset(x+1, ti.d_blksz),
+                              &sb, sizeof(sb)) < 0) {
+                       printf("Error reading node ID block %d\n",
+                              x+1);
+                       continue;
+               }
+               swab_status_block_t(&sb);
+               print_status_block(&sb);
+       }
+
+       qdisk_close(&ti);
+}
+
+
 int
 find_partitions(const char *partfile, const char *label,
                char *devname, size_t devlen, int print)
@@ -78,6 +163,7 @@
        char device[128];
        char realdev[256];
        quorum_header_t qh;
+       int ssz;
 
        fp = fopen(partfile, "r");
        if (!fp)
@@ -96,16 +182,35 @@
                if (strlen(device)) {
                        snprintf(realdev, sizeof(realdev),
                                 "/dev/%s", device);
-                       if (check_device(realdev, (char *)label, &qh) != 0)
+
+                       /* If we're not "just printing", then 
+                          then reject devices which don't match
+                          the recorded sector size */
+                       if (check_device(realdev, (char *)label, &ssz,
+                                        &qh, !print) != 0)
                                continue;
 
                        if (print) {
                                printf("%s:\n", realdev);
-                               printf("\tMagic:   %08x\n", qh.qh_magic);
-                               printf("\tLabel:   %s\n", qh.qh_cluster);
-                               printf("\tCreated: %s",
+                               printf("\tMagic:                %08x\n", 
qh.qh_magic);
+                               printf("\tLabel:                %s\n", 
qh.qh_cluster);
+                               printf("\tCreated:              %s",
                                       ctime((time_t *)&qh.qh_timestamp));
-                               printf("\tHost:    %s\n\n", qh.qh_updatehost);
+                               printf("\tHost:                 %s\n", 
qh.qh_updatehost);
+                               printf("\tKernel Sector Size:   %d\n", ssz);
+                               if (qh.qh_version == VERSION_MAGIC_V2) {
+                                       printf("\tRecorded Sector Size: 
%d\n\n", (int)qh.qh_blksz);
+                                       if (qh.qh_blksz != ssz) {
+                                               printf("WARNING: Sector size 
mismatch: Header: %d  Kernel: %d\n",
+                                                       (int)qh.qh_blksz, ssz);
+                                       }
+                               } else
+                                       printf("\n");
+                       }
+
+                       if (print >= 2) {
+                               /* Print node stuff */
+                               read_info(realdev);
                        }
 
                        if (devname && devlen) {

Reply via email to