Re: [Cluster-devel] [GFS2 Patch] [Try 3] GFS2: Reduce file fragmentation

2012-07-12 Thread Steven Whitehouse
Hi,

On Wed, 2012-07-11 at 14:10 -0400, Bob Peterson wrote:
 Hi,
 
 Here is my third version of the GFS2 file defragmentation patch for
 upstream:
 
 Description:
 
 This patch reduces GFS2 file fragmentation by pre-reserving blocks.
 In this patch, I've implemented almost all of Steve's suggestions.
 
 Regards,
 
 Bob Peterson
 Red Hat File Systems
 
 Signed-off-by: Bob Peterson rpete...@redhat.com
 ---

I've found a workload that seems to do something a bit odd...


[root@chywoon /]# for i in `seq 0 9`; do dd if=/dev/zero of=file.${i} bs=4096 
count=1; done

(i.e. create 10 single block files, sequentially)

Then I looked at the allocation pattern using stat  filefrag. We get 10
inodes, at positions: 33150, 33151, 33152, 33153, 33154, 33155, 33156,
33157, 33158, 33159 (so sequential with each other)

The lets look at where the data lands up: 33302, 33462, 33622, 33782,
33942, 34102, 34262, 34422, 34582, 34742 (respectively for each inode)

What I'd hope to see is that the data blocks are being allocated
contiguously with the inodes, at least in most cases. The data blocks
appear to be spaced out with gaps of 160 blocks between them. This was
on newly created filesystem, btw, so there should be nothing to get in
the way of the allocations.

Given that directories don't have reservations, and the reservations
relating to the individual inodes that are being created should be
cleared on close (i.e. before the subsequent creation) this should not
have changed in behaviour from the current kernels,

Steve.


 diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
 index 6d957a8..49cd7dd 100644
 --- a/fs/gfs2/bmap.c
 +++ b/fs/gfs2/bmap.c
 @@ -785,6 +785,9 @@ static int do_strip(struct gfs2_inode *ip, struct 
 buffer_head *dibh,
   if (error)
   goto out_rlist;
  
 + if (gfs2_rs_active(ip-i_res)) /* needs to be done with the rgrp glock 
 held */
 + gfs2_rs_deltree(ip-i_res);
 +
   error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
RES_INDIRECT + RES_STATFS + RES_QUOTA,
revokes);
 diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
 index 6fbf3cb..b93275f 100644
 --- a/fs/gfs2/file.c
 +++ b/fs/gfs2/file.c
 @@ -383,6 +383,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, 
 struct vm_fault *vmf)
   if (ret)
   return ret;
  
 + atomic_set(ip-i_res-rs_sizehint,
 +PAGE_CACHE_SIZE / sdp-sd_sb.sb_bsize);
 +
   gfs2_holder_init(ip-i_gl, LM_ST_EXCLUSIVE, 0, gh);
   ret = gfs2_glock_nq(gh);
   if (ret)
 @@ -571,22 +574,15 @@ fail:
  
  static int gfs2_release(struct inode *inode, struct file *file)
  {
 - struct gfs2_sbd *sdp = inode-i_sb-s_fs_info;
 - struct gfs2_file *fp;
   struct gfs2_inode *ip = GFS2_I(inode);
  
 - fp = file-private_data;
 + kfree(file-private_data);
   file-private_data = NULL;
  
 - if ((file-f_mode  FMODE_WRITE)  ip-i_res 
 - (atomic_read(inode-i_writecount) == 1))
 + if ((file-f_mode  FMODE_WRITE) 
 + (atomic_read(inode-i_writecount) == 1)) {
   gfs2_rs_delete(ip);
 -
 - if (gfs2_assert_warn(sdp, fp))
 - return -EIO;
 -
 - kfree(fp);
 -
 + }
   return 0;
  }
  
 @@ -662,14 +658,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, 
 const struct iovec *iov,
  unsigned long nr_segs, loff_t pos)
  {
   struct file *file = iocb-ki_filp;
 + size_t writesize = iov_length(iov, nr_segs);
   struct dentry *dentry = file-f_dentry;
   struct gfs2_inode *ip = GFS2_I(dentry-d_inode);
 + struct gfs2_sbd *sdp;
   int ret;
  
 + sdp = GFS2_SB(file-f_mapping-host);
   ret = gfs2_rs_alloc(ip);
   if (ret)
   return ret;
  
 + atomic_set(ip-i_res-rs_sizehint, writesize / sdp-sd_sb.sb_bsize);
   if (file-f_flags  O_APPEND) {
   struct gfs2_holder gh;
  
 @@ -795,6 +795,8 @@ static long gfs2_fallocate(struct file *file, int mode, 
 loff_t offset,
   if (unlikely(error))
   goto out_uninit;
  
 + atomic_set(ip-i_res-rs_sizehint, len / sdp-sd_sb.sb_bsize);
 +
   while (len  0) {
   if (len  bytes)
   bytes = len;
 @@ -803,10 +805,6 @@ static long gfs2_fallocate(struct file *file, int mode, 
 loff_t offset,
   offset += bytes;
   continue;
   }
 - error = gfs2_rindex_update(sdp);
 - if (error)
 - goto out_unlock;
 -
   error = gfs2_quota_lock_check(ip);
   if (error)
   goto out_unlock;
 diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
 index dc73070..4384d77 100644
 --- a/fs/gfs2/incore.h
 +++ b/fs/gfs2/incore.h
 @@ -84,6 +84,7 @@ struct gfs2_rgrpd {
   u32 rd_data;/* num of data blocks in rgrp */
   u32 rd_bitbytes;/* number of bytes in data 

Re: [Cluster-devel] [PATCH] cman: fix data copy and memory leak when reloading config

2012-07-12 Thread Christine Caulfield

ACK apart from the comments!

see below

On 11/07/12 10:46, Fabio M. Di Nitto wrote:

From: Fabio M. Di Nitto fdini...@redhat.com

cman.cluster_id,nodename,two_node information were not copied
from the old to the new config at reload time. This triggers
a problem when cman is set in cluster.conf and we effectively
drop information from objdb (suboptimal).

Also fix a possible memory leak when we have reload issues.

Signed-off-by: Fabio M. Di Nitto fdini...@redhat.com
---
  cman/daemon/cman-preconfig.c |   31 +--
  1 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/cman/daemon/cman-preconfig.c b/cman/daemon/cman-preconfig.c
index 68fec22..22583fe 100644
--- a/cman/daemon/cman-preconfig.c
+++ b/cman/daemon/cman-preconfig.c
@@ -1478,6 +1478,7 @@ static int cmanpre_reloadconfig(struct objdb_iface_ver0 
*objdb, int flush, const
hdb_handle_t cluster_parent_handle_new;
unsigned int config_version = 0, config_version_new = 0;
char *config_value = NULL;
+   char str[255];

/* don't reload if we've been told to run configless */
if (getenv(CMAN_NOCONFIG)) {
@@ -1489,16 +1490,16 @@ static int cmanpre_reloadconfig(struct objdb_iface_ver0 
*objdb, int flush, const
/* find both /cluster entries */
objdb-object_find_create(OBJECT_PARENT_HANDLE, cluster, 
strlen(cluster), find_handle);
objdb-object_find_next(find_handle, cluster_parent_handle);
+   objdb-object_find_next(find_handle, cluster_parent_handle_new);
+   objdb-object_find_destroy(find_handle);
if (!cluster_parent_handle) {
snprintf (error_reason, sizeof(error_reason) - 1, Cannot find old 
/cluster/ key in configuration\n);
goto err;
}
-   objdb-object_find_next(find_handle, cluster_parent_handle_new);
if (!cluster_parent_handle_new) {
snprintf (error_reason, sizeof(error_reason) - 1, Cannot find new 
/cluster/ key in configuration\n);
goto err;
}
-   objdb-object_find_destroy(find_handle);

if (!objdb-object_key_get(cluster_parent_handle, config_version, 
strlen(config_version), (void *)config_value, NULL)) {
if (config_value) {
@@ -1531,6 +1532,32 @@ static int cmanpre_reloadconfig(struct objdb_iface_ver0 
*objdb, int flush, const
/* destroy the old one */
objdb-object_destroy(cluster_parent_handle);

+   /*
+* create cluster.cman in the new config if it doesn't exists




 * create cluster.cman in the new config if it doesn't exist




+*/
+   objdb-object_find_create(cluster_parent_handle_new, cman, 
strlen(cman), find_handle);
+   if (objdb-object_find_next(find_handle, object_handle)) {
+   objdb-object_create(cluster_parent_handle_new, object_handle,
+cman, strlen(cman));
+   }
+   objdb-object_find_destroy(find_handle);
+
+   /*
+* readd cluster_id/two_node/nodename
+*/



 * write cluster_id/two_node/nodename




+   snprintf(str, sizeof(str) - 1, %d, cluster_id);
+   objdb-object_key_create_typed(object_handle, cluster_id,
+  str, strlen(str) + 1, 
OBJDB_VALUETYPE_STRING);
+
+   if (two_node) {
+   snprintf(str, sizeof(str) - 1, %d, 1);
+   objdb-object_key_create_typed(object_handle, two_node,
+  str, strlen(str) + 1, 
OBJDB_VALUETYPE_STRING);
+   }
+
+   objdb-object_key_create_typed(object_handle, nodename,
+  nodename, strlen(nodename)+1, 
OBJDB_VALUETYPE_STRING);
+
/* update the reference to the new config */
cluster_parent_handle = cluster_parent_handle_new;







Re: [Cluster-devel] [PATCH 2/2] fsck.gfs2: Fix buffer overflow in get_lockproto_table

2012-07-12 Thread Bob Peterson
- Original Message -
| Coverity discovered a buffer overflow in this function where an
| overly
| long cluster name in cluster.conf could cause a crash while repairing
| the superblock. This patch fixes the bug by making sure the lock
| table
| is composed sensibly, limiting the fsname to 16 chars as documented,
| and
| only allowing the cluster name (which doesn't seem to have a
| documented
| max size) to use the remaining space in the locktable name string.
| 
| Signed-off-by: Andrew Price anpr...@redhat.com
| ---

Hi,

ACK to both patches.

Regards,

Bob Peterson
Red Hat File Systems



Re: [Cluster-devel] [PATCH] rgmanager: Add IP interface parameter

2012-07-12 Thread Fabio M. Di Nitto
Hi Ryan,

only one comment here.. many times we have been asked to implement
interface parameter to allow any random IP on any specific interface
(beside the pre configured ip on that interface).

Can we change the patch to simply fix both problems at once?
Effectively, the fact that 2 interfaces have 2 ip on the same subnet is
simply a corner case.

Maybe later on we can add something like: ifconfig iface up / down.

when doing ifconfig up we need to store the output of ip addresses
automatically assigned to that interface.

on shutdown, we need to check if the ip we are removing is the last one
on that interface _before_ issuing an ifconfig down in case there are
more ip resources associated to it.

The patch looks ok, but I would probably use a different term than
interface as it sounds very similar to the expected feature above.

Fabio

On 07/12/2012 07:23 PM, Ryan McCabe wrote:
 This patch adds an interface parameter for IP resources. The
 interface must already be configured and active. This parameter
 should be used only when at least two active interfaces have IP
 addresses on the same subnet and it's necessary to specify which
 particular interface should be used.
 
 Signed-off-by: Ryan McCabe rmcc...@redhat.com
 ---
  rgmanager/src/resources/ip.sh |   17 +
  1 file changed, 17 insertions(+)
 
 diff --git a/rgmanager/src/resources/ip.sh b/rgmanager/src/resources/ip.sh
 index 38d1ab9..3adbb12 100755
 --- a/rgmanager/src/resources/ip.sh
 +++ b/rgmanager/src/resources/ip.sh
 @@ -132,6 +132,15 @@ meta_data()
   content type=boolean/
   /parameter
  
 + parameter name=interface
 + longdesc lang=en
 + The network interface to which the IP address should be added. 
 The interface must already be configured and active. This parameter should be 
 used only when at least two active interfaces have IP addresses on the same 
 subnet and it is desired to have the IP address added to a particular 
 interface.
 + /longdesc
 + shortdesc lang=en
 + Network interface
 + /shortdesc
 + content type=string/
 + /parameter
  /parameters
  
  actions
 @@ -587,6 +596,10 @@ ipv6()
   fi
   
   if [ $1 = add ]; then
 + if [ -n $OCF_RESKEY_interface ]  \
 +[ $OCF_RESKEY_interface != $dev ]; then
 + continue
 + fi
   ipv6_same_subnet $ifaddr_exp/$maskbits $addr_exp
   if [ $? -ne 0 ]; then
  continue
 @@ -670,6 +683,10 @@ ipv4()
   fi
  
   if [ $1 = add ]; then
 + if [ -n $OCF_RESKEY_interface ]  \
 +[ $OCF_RESKEY_interface != $dev ]; then
 + continue
 + fi
   ipv4_same_subnet $ifaddr/$maskbits $addr
   if [ $? -ne 0 ]; then
   continue