[Cluster-devel] [PATCH v2 1/3] dlm: check if workqueues are NULL before flushing/destroying

2019-04-02 Thread David Windsor
If the DLM lowcomms stack is shut down before any DLM
traffic can be generated, flush_workqueue() and
destroy_workqueue() can be called on empty send and/or recv
workqueues.

Insert guard conditionals to only call flush_workqueue()
and destroy_workqueue() on workqueues that are not NULL.

Signed-off-by: David Windsor 
---
 fs/dlm/lowcomms.c | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index a5e4a221435c..a93ebffe84b3 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1630,8 +1630,10 @@ static void clean_writequeues(void)
 
 static void work_stop(void)
 {
-   destroy_workqueue(recv_workqueue);
-   destroy_workqueue(send_workqueue);
+   if (recv_workqueue)
+   destroy_workqueue(recv_workqueue);
+   if (send_workqueue)
+   destroy_workqueue(send_workqueue);
 }
 
 static int work_start(void)
@@ -1691,13 +1693,17 @@ static void work_flush(void)
struct hlist_node *n;
struct connection *con;
 
-   flush_workqueue(recv_workqueue);
-   flush_workqueue(send_workqueue);
+   if (recv_workqueue)
+   flush_workqueue(recv_workqueue);
+   if (send_workqueue)
+   flush_workqueue(send_workqueue);
do {
ok = 1;
foreach_conn(stop_conn);
-   flush_workqueue(recv_workqueue);
-   flush_workqueue(send_workqueue);
+   if (recv_workqueue)
+   flush_workqueue(recv_workqueue);
+   if (send_workqueue)
+   flush_workqueue(send_workqueue);
for (i = 0; i < CONN_HASH_SIZE && ok; i++) {
hlist_for_each_entry_safe(con, n,
  _hash[i], list) {
-- 
2.20.1



[Cluster-devel] [PATCH v2 2/3] dlm: add TCP multihoming/failover support

2019-04-02 Thread David Windsor
Add the ability to specify multiple source addresses
for DLM nodes so that multihomed configurations can
use multiple addresses and still be recognized by the
receiving node.

While each node is capable of being configured for multiple
IPs, DLM requires each node have only one active address
at a time.

This patch introduces a round-robin heuristic for selecting
the next active interface, but other heuristics could
easily be added later.

To support failover, a new configfs node is added by this patch:
/sys/kernel/config/dlm/cluster/comms//error
This node is write-only, and is provided so that userspace
may signal the kernel when it detects a communications error.
The kernel will switch to the next local network interface
after 1 is written to the new configfs node.

Signed-off-by: David Windsor 
---
 fs/dlm/config.c   | 21 +
 fs/dlm/lowcomms.c | 60 +--
 fs/dlm/lowcomms.h |  1 +
 3 files changed, 69 insertions(+), 13 deletions(-)

diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 1270551d24e3..96db7b1346f9 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -31,6 +31,7 @@
  * /config/dlm//comms//local
  * /config/dlm//comms//addr  (write only)
  * /config/dlm//comms//addr_list (read only)
+ * /config/dlm//comms//error(write only)
  * The  level is useless, but I haven't figured out how to avoid it.
  */
 
@@ -198,6 +199,7 @@ enum {
COMM_ATTR_LOCAL,
COMM_ATTR_ADDR,
COMM_ATTR_ADDR_LIST,
+   COMM_ATTR_ERROR,
 };
 
 enum {
@@ -662,16 +664,35 @@ static ssize_t comm_addr_list_show(struct config_item 
*item, char *buf)
return 4096 - allowance;
 }
 
+static ssize_t comm_error_store(struct config_item *item, const char *buf,
+   size_t len)
+{
+   int ret, i;
+
+   ret = kstrtoint(buf, 0, );
+   if (ret < 0)
+   return ret;
+
+   if (i == 0)
+   return 0;
+
+   dlm_lowcomms_next_addr();
+
+   return len;
+}
+
 CONFIGFS_ATTR(comm_, nodeid);
 CONFIGFS_ATTR(comm_, local);
 CONFIGFS_ATTR_WO(comm_, addr);
 CONFIGFS_ATTR_RO(comm_, addr_list);
+CONFIGFS_ATTR_WO(comm_, error);
 
 static struct configfs_attribute *comm_attrs[] = {
[COMM_ATTR_NODEID] = _attr_nodeid,
[COMM_ATTR_LOCAL] = _attr_local,
[COMM_ATTR_ADDR] = _attr_addr,
[COMM_ATTR_ADDR_LIST] = _attr_addr_list,
+   [COMM_ATTR_ERROR] = _attr_error,
NULL,
 };
 
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index a93ebffe84b3..be0e134d4fc4 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -159,6 +159,8 @@ static DEFINE_SPINLOCK(dlm_node_addrs_spin);
 static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
 static int dlm_local_count;
 static int dlm_allow_conn;
+static int dlm_local_idx;
+static DEFINE_SPINLOCK(dlm_local_idx_spin);
 
 /* Work queues */
 static struct workqueue_struct *recv_workqueue;
@@ -330,7 +332,8 @@ static int nodeid_to_addr(int nodeid, struct 
sockaddr_storage *sas_out,
if (!sa_out)
return 0;
 
-   if (dlm_local_addr[0]->ss_family == AF_INET) {
+   spin_lock(_local_idx_spin);
+   if (dlm_local_addr[dlm_local_idx]->ss_family == AF_INET) {
struct sockaddr_in *in4  = (struct sockaddr_in *) 
struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out;
ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
@@ -339,6 +342,7 @@ static int nodeid_to_addr(int nodeid, struct 
sockaddr_storage *sas_out,
struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out;
ret6->sin6_addr = in6->sin6_addr;
}
+   spin_unlock(_local_idx_spin);
 
return 0;
 }
@@ -519,6 +523,8 @@ static void lowcomms_error_report(struct sock *sk)
   dlm_config.ci_tcp_port, sk->sk_err,
   sk->sk_err_soft);
}
+
+   dlm_lowcomms_next_addr();
 out:
read_unlock_bh(>sk_callback_lock);
if (orig_report)
@@ -572,7 +578,9 @@ static void add_sock(struct socket *sock, struct connection 
*con)
 static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
  int *addr_len)
 {
-   saddr->ss_family =  dlm_local_addr[0]->ss_family;
+   spin_lock(_local_idx_spin);
+   saddr->ss_family =  dlm_local_addr[dlm_local_idx]->ss_family;
+   spin_unlock(_local_idx_spin);
if (saddr->ss_family == AF_INET) {
struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
in4_addr->sin_port = cpu_to_be16(port);
@@ -590,8 +598,14 @@ static void make_sockaddr(struct sockaddr_storage *saddr, 
uint16_t port,
 static void close_connection(struct connection *con, bool and_other,
 bool tx, bool rx)
 {
-   bool closing = test_and_set_bit(CF_CLOSING, >flags);
+   bool closing;
+
+   if (!con) {
+   printk(KERN_INFO "dlm: 

[Cluster-devel] [PATCH v2 3/3] dlm: allow binding to all network interfaces

2019-04-02 Thread David Windsor
Currently, in the kernel, DLM only is able to bind its
listen socket to a single network interface.  To support
more robust network configurations, DLM should be able
to bind to all network interfaces.

This patch adds a configfs node to enable/disable binding
to all network interfaces.  When 1 is written to this
configfs node, the DLM listen socket will bind to all network
interfaces.  When 0 is written to the node, DLM will bind
only to its current local network interface.

Signed-off-by: David Windsor 
---
 fs/dlm/config.c   | 21 +
 fs/dlm/config.h   |  3 ++-
 fs/dlm/lowcomms.c | 19 ++-
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 96db7b1346f9..16b83d61b060 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -29,6 +29,7 @@
  * /config/dlm//spaces//nodes//weight
  * /config/dlm//comms//nodeid
  * /config/dlm//comms//local
+ * /config/dlm//comms//bind_all
  * /config/dlm//comms//addr  (write only)
  * /config/dlm//comms//addr_list (read only)
  * /config/dlm//comms//error(write only)
@@ -39,6 +40,7 @@ static struct config_group *space_list;
 static struct config_group *comm_list;
 static struct dlm_comm *local_comm;
 static uint32_t dlm_comm_count;
+static int bind_all;
 
 struct dlm_clusters;
 struct dlm_cluster;
@@ -200,6 +202,7 @@ enum {
COMM_ATTR_ADDR,
COMM_ATTR_ADDR_LIST,
COMM_ATTR_ERROR,
+   COMM_ATTR_BIND_ALL,
 };
 
 enum {
@@ -681,11 +684,23 @@ static ssize_t comm_error_store(struct config_item *item, 
const char *buf,
return len;
 }
 
+static ssize_t comm_bind_all_show(struct config_item *item, char *buf)
+{
+   return sprintf(buf, "%d\n", bind_all);
+}
+
+static ssize_t comm_bind_all_store(struct config_item *item, const char *buf,
+  size_t len)
+{
+   return kstrtoint(buf, 0, _all);
+}
+
 CONFIGFS_ATTR(comm_, nodeid);
 CONFIGFS_ATTR(comm_, local);
 CONFIGFS_ATTR_WO(comm_, addr);
 CONFIGFS_ATTR_RO(comm_, addr_list);
 CONFIGFS_ATTR_WO(comm_, error);
+CONFIGFS_ATTR(comm_, bind_all);
 
 static struct configfs_attribute *comm_attrs[] = {
[COMM_ATTR_NODEID] = _attr_nodeid,
@@ -693,6 +708,7 @@ static struct configfs_attribute *comm_attrs[] = {
[COMM_ATTR_ADDR] = _attr_addr,
[COMM_ATTR_ADDR_LIST] = _attr_addr_list,
[COMM_ATTR_ERROR] = _attr_error,
+   [COMM_ATTR_BIND_ALL] = _attr_bind_all,
NULL,
 };
 
@@ -868,6 +884,11 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
return 0;
 }
 
+int dlm_bind_all(void)
+{
+   return bind_all;
+}
+
 /* Config file defaults */
 #define DEFAULT_TCP_PORT   21064
 #define DEFAULT_BUFFER_SIZE 4096
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 6041eec886ab..e3fd8ce45874 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -21,7 +21,7 @@ struct dlm_config_node {
uint32_t comm_seq;
 };
 
-#define DLM_MAX_ADDR_COUNT 3
+#define DLM_MAX_ADDR_COUNT 9
 
 struct dlm_config_info {
int ci_tcp_port;
@@ -49,6 +49,7 @@ int dlm_config_nodes(char *lsname, struct dlm_config_node 
**nodes_out,
 int dlm_comm_seq(int nodeid, uint32_t *seq);
 int dlm_our_nodeid(void);
 int dlm_our_addr(struct sockaddr_storage *addr, int num);
+int dlm_bind_all(void);
 
 #endif /* __CONFIG_DOT_H__ */
 
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index be0e134d4fc4..60ae7c53a8a1 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1394,6 +1394,9 @@ static int sctp_listen_for_all(void)
 static int tcp_listen_for_all(void)
 {
struct socket *sock = NULL;
+   struct sockaddr_in *sin4;
+   struct sockaddr_in6 *sin6;
+   struct sockaddr_storage sas, laddr;
struct connection *con = nodeid2con(0, GFP_NOFS);
int result = -EINVAL;
 
@@ -1402,7 +1405,21 @@ static int tcp_listen_for_all(void)
 
log_print("Using TCP for communications");
 
-   sock = tcp_create_listen_sock(con, dlm_local_addr[dlm_local_idx]);
+   memcpy(, dlm_local_addr[dlm_local_idx], sizeof(sas));
+   memcpy(, dlm_local_addr[dlm_local_idx], sizeof(laddr));
+   if (dlm_bind_all()) {
+   if (sas.ss_family == AF_INET) {
+   sin4 = (struct sockaddr_in *) 
+   sin4->sin_addr.s_addr = htonl(INADDR_ANY);
+   memcpy(, sin4, sizeof(laddr));
+   } else {
+   sin6 = (struct sockaddr_in6 *) 
+   sin6->sin6_addr = in6addr_any;
+   memcpy(, sin6, sizeof(laddr));
+   }
+   }
+
+   sock = tcp_create_listen_sock(con, );
if (sock) {
add_sock(sock, con);
result = 0;
-- 
2.20.1



Re: [Cluster-devel] [GFS2 PATCH v3] gfs2: clean_journal improperly set sd_log_flush_head

2019-04-02 Thread Steven Whitehouse

Hi,

On 28/03/2019 17:10, Bob Peterson wrote:

Hi,

Andreas found some problems with the previous version. Here is version 3.

Ross: Can you please test this one with your scenario? Thanks.

Bob Peterson
---

This patch fixes regressions in 588bff95c94efc05f9e1a0b19015c9408ed7c0ef.
Due to that patch, function clean_journal was setting the value of
sd_log_flush_head, but that's only valid if it is replaying the node's
own journal. If it's replaying another node's journal, that's completely
wrong and will lead to multiple problems. This patch tries to clean up
the mess by passing the value of the logical journal block number into
gfs2_write_log_header so the function can treat non-owned journals
generically. For the local journal, the journal extent map is used for
best performance. For other nodes from other journals, gfs2_extent_map
is called to figure it out.

This patch also tries to establish more consistency when passing journal
block parameters by changing several unsigned int types to a consistent
u32.

Fixes: 588bff95c94e ("GFS2: Reduce code redundancy writing log headers")

Signed-off-by: Bob Peterson 
---
  fs/gfs2/incore.h   |  2 +-
  fs/gfs2/log.c  | 26 +++---
  fs/gfs2/log.h  |  3 ++-
  fs/gfs2/lops.c |  6 +++---
  fs/gfs2/lops.h |  2 +-
  fs/gfs2/recovery.c |  8 
  fs/gfs2/recovery.h |  2 +-
  7 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index cdf07b408f54..86840a70ee1a 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -535,7 +535,7 @@ struct gfs2_jdesc {
unsigned long jd_flags;
  #define JDF_RECOVERY 1
unsigned int jd_jid;
-   unsigned int jd_blocks;
+   u32 jd_blocks;
int jd_recover_error;
/* Replay stuff */
  
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c

index b8830fda51e8..8a5a19a26582 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -672,13 +672,15 @@ void gfs2_write_revokes(struct gfs2_sbd *sdp)
   * @seq: sequence number
   * @tail: tail of the log
   * @flags: log header flags GFS2_LOG_HEAD_*
+ * @lblock: value for lh_blkno (block number relative to start of journal)
   * @op_flags: flags to pass to the bio
   *
   * Returns: the initialized log buffer descriptor
   */
  
  void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,

-  u64 seq, u32 tail, u32 flags, int op_flags)
+  u64 seq, u32 tail, u32 flags, u32 lblock,
+  int op_flags)
  {
struct gfs2_log_header *lh;
u32 hash, crc;
@@ -686,7 +688,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct 
gfs2_jdesc *jd,
struct gfs2_statfs_change_host *l_sc = >sd_statfs_local;
struct timespec64 tv;
struct super_block *sb = sdp->sd_vfs;
-   u64 addr;
+   u64 dblock;
  
  	lh = page_address(page);

clear_page(lh);
@@ -699,15 +701,25 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct 
gfs2_jdesc *jd,
lh->lh_sequence = cpu_to_be64(seq);
lh->lh_flags = cpu_to_be32(flags);
lh->lh_tail = cpu_to_be32(tail);
-   lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
+   lh->lh_blkno = cpu_to_be32(lblock);
hash = ~crc32(~0, lh, LH_V1_SIZE);
lh->lh_hash = cpu_to_be32(hash);
  
  	ktime_get_coarse_real_ts64();

lh->lh_nsec = cpu_to_be32(tv.tv_nsec);
lh->lh_sec = cpu_to_be64(tv.tv_sec);
-   addr = gfs2_log_bmap(sdp);
-   lh->lh_addr = cpu_to_be64(addr);
+   if (jd->jd_jid == sdp->sd_lockstruct.ls_jid)
+   dblock = gfs2_log_bmap(sdp);
+   else {
+   u32 extlen;
+   int new = 0, error;
+
+   error = gfs2_extent_map(jd->jd_inode, lblock, , ,
+   );


We should not be adding new calls to gfs2_extent_map() here since that 
function is obsolete and deprecated. It looks like perhaps we should 
have a parameter to gfs2_log_bmap() to indicate which journal we need to 
map?


Steve.


+   if (gfs2_assert_withdraw(sdp, error == 0))
+   return;
+   }
+   lh->lh_addr = cpu_to_be64(dblock);
lh->lh_jinode = cpu_to_be64(GFS2_I(jd->jd_inode)->i_no_addr);
  
  	/* We may only write local statfs, quota, etc., when writing to our

@@ -732,7 +744,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct 
gfs2_jdesc *jd,
 sb->s_blocksize - LH_V1_SIZE - 4);
lh->lh_crc = cpu_to_be32(crc);
  
-	gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr);

+   gfs2_log_write(sdp, page, sb->s_blocksize, 0, dblock);
gfs2_log_submit_bio(>sd_log_bio, REQ_OP_WRITE, op_flags);
log_flush_wait(sdp);
  }
@@ -761,7 +773,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 
flags)
}
sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++, tail,
-