[Ocfs2-devel] Shared-du: show the shared extents per file and the footprint v4

2010-09-20 Thread jeff.liu
Hello,

The coming patches introduce fiemap support to du(1), the goal is to teach 
du(1) to figure up the
shared extents per file it goes through, as well as the footprint of the 
storage in the end.

Changes to v3:
. fix the issues according to Tao's comments.
. Try to merge to the left or right node if possible when inserting a new 
extent_info to rbtree.

I have done some tests in the past few days, it works fine, thanks Tao for help 
creating the test
envionment!


Also, I write a tiny script to verify the result with shared-du as below, it 
show the total shared
extents against the target storage in bytes.

usage:
./show_shared_extents.sh [storage_mount_path] [storage_device]
like: ./show_shared_extents.sh /storage /dev/sda8

#!/bin/bash

DEBUGGER=/sbin/debugfs.ocfs2 -n

#
# Get the block size and cluster size, we make use of cluster size to calculate 
the
# shared extent physical offset and length in bytes.
#
ocfs2_block_cluster_size=($(echo stats | $DEBUGGER -n /dev/sda8 | grep Block 
Size Bits | awk '{
print $4 $8 }'))
block_size=$[ 2 ** ${ocfs2_block_cluster_size[0]} ]
cluster_size=$[ 2 ** ${ocfs2_block_cluster_size[1]} ]

function process_file() {
local __f=$1
local device=$2
local __start=0
local __lines=0
local start_line=0
local end_line=0

inode=$(stat --format=%i ${__f})

#
# Check if we meet a refcount file
#
refcount_file=$(echo stat $inode | $DEBUGGER $device | sed '5!d' | 
grep Refcounted)

if (test -n $refcount_file) then
refcount_records=($(echo refcount $inode|
$DEBUGGER $device   |
grep -n Refcount records  |
awk -F':' '{print $1 $4}'))

refcount_records_num=${#refcount_recor...@]}

i=0
while [[ $i -lt $refcount_records_num ]]
do
__start=${refcount_records[$i]}
(( i++ ))
__lines=$[ ${refcount_records[$i]} + 1 ]
(( i++ ))
let start_line = __start + 1
let end_line = start_line + __lines

extents=($(echo refcount $inode | $DEBUGGER $device 
|
   awk FNR  $start_line  FNR  $end_line   
|
   awk '{ print $2 $3 $4 }' ))

extents_num=${#exten...@]}

for (( j = 0; j  $extents_num; ))
do
physical_offset=$[ ${extents[$j]} * 
$cluster_size ]

(( j++ ))
length=$[ ${extents[$j]} * $cluster_size ]
(( j++ ))

#
# Decrease the reference count to meet the du 
semantics
#
count=$[${extents[$j]} - 1]
(( j++ ))


extent_array[$physical_offset]=$physical_offset:$length:$count
done
done
fi
}


STORAGE_MOUNT_PATH=$1
STORAGE_DEVICE=$2

for f in $(find $STORAGE_MOUNT_PATH -type f)
do
process_file ${f} $STORAGE_DEVICE
done

items=${#extent_array[*]}

total_shared_length=0
for item in ${extent_arr...@]}
do
shared_length=$(echo ${item} | awk -F: '{ print $2 * $3}')
let total_shared_length += shared_length
done

echo TOTAL_SHARED_LENGTH: $total_shared_length



Regards,
-Jeff

___
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
http://oss.oracle.com/mailman/listinfo/ocfs2-devel


[Ocfs2-devel] [PATCH 1/3] shared-du: add the rbtree source and header file to lib v4

2010-09-20 Thread Jie Liu
* lib/rbtree.c: Source file of rbtree.
* lib/rbtree.h: Header file of rbtree.
* lib/Makefile.am (libcoreutils_a_SOURCES): Add both of them.

Signed-off-by: Jie Liu jeff@oracle.com
---
 coreutils-6.9/lib/Makefile.am |3 +-
 coreutils-6.9/lib/Makefile.in |4 +-
 coreutils-6.9/lib/rbtree.c|  403 +
 coreutils-6.9/lib/rbtree.h|  143 +++
 4 files changed, 550 insertions(+), 3 deletions(-)
 create mode 100644 coreutils-6.9/lib/rbtree.c
 create mode 100644 coreutils-6.9/lib/rbtree.h

diff --git a/coreutils-6.9/lib/Makefile.am b/coreutils-6.9/lib/Makefile.am
index cfa22be..6e22f65 100644
--- a/coreutils-6.9/lib/Makefile.am
+++ b/coreutils-6.9/lib/Makefile.am
@@ -26,7 +26,8 @@ LDADD = $(noinst_LIBRARIES)
 
 libcoreutils_a_SOURCES += \
   buffer-lcm.c buffer-lcm.h \
-  xmemxfrm.c xmemxfrm.h
+  xmemxfrm.c xmemxfrm.h \
+  rbtree.c rbtree.h
 
 libcoreutils_a_LIBADD += $(LIBOBJS)
 libcoreutils_a_DEPENDENCIES += $(LIBOBJS)
diff --git a/coreutils-6.9/lib/Makefile.in b/coreutils-6.9/lib/Makefile.in
index 772d8f3..29a81c0 100644
--- a/coreutils-6.9/lib/Makefile.in
+++ b/coreutils-6.9/lib/Makefile.in
@@ -213,7 +213,7 @@ am_libcoreutils_a_OBJECTS = allocsa.$(OBJEXT) 
base64.$(OBJEXT) \
xalloc-die.$(OBJEXT) xgethostname.$(OBJEXT) xmemcoll.$(OBJEXT) \
xreadlink.$(OBJEXT) xreadlink-with-size.$(OBJEXT) \
xstrndup.$(OBJEXT) xstrtoimax.$(OBJEXT) xstrtoumax.$(OBJEXT) \
-   buffer-lcm.$(OBJEXT) xmemxfrm.$(OBJEXT)
+   buffer-lcm.$(OBJEXT) xmemxfrm.$(OBJEXT) rbtree.$(OBJEXT)
 libcoreutils_a_OBJECTS = $(am_libcoreutils_a_OBJECTS)
 LTLIBRARIES = $(noinst_LTLIBRARIES)
 PROGRAMS = $(noinst_PROGRAMS)
@@ -643,7 +643,7 @@ libcoreutils_a_SOURCES = allocsa.h allocsa.c base64.h 
base64.c \
xgethostname.h xgethostname.c xmemcoll.h xmemcoll.c \
xreadlink.c xreadlink-with-size.c xstrndup.h xstrndup.c \
xstrtoimax.c xstrtoumax.c buffer-lcm.c buffer-lcm.h xmemxfrm.c \
-   xmemxfrm.h
+   xmemxfrm.h rbtree.c rbtree.h
 libcoreutils_a_LIBADD = $(gl_LIBOBJS) @ALLOCA@ $(LIBOBJS)
 libcoreutils_a_DEPENDENCIES = $(gl_LIBOBJS) @ALLOCA@ $(LIBOBJS)
 EXTRA_libcoreutils_a_SOURCES = acl.c acl_entries.c file-has-acl.c \
diff --git a/coreutils-6.9/lib/rbtree.c b/coreutils-6.9/lib/rbtree.c
new file mode 100644
index 000..e6d1b7f
--- /dev/null
+++ b/coreutils-6.9/lib/rbtree.c
@@ -0,0 +1,403 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * kernel-rbtree.c
+ *
+ * This is imported from the Linux kernel to give us a tested and
+ * portable tree library.
+ */
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli and...@suse.de
+  (C) 2002  David Woodhouse dw...@infradead.org
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/lib/rbtree.c
+*/
+
+#include rbtree.h
+
+static void
+__rb_rotate_left (struct rb_node *node, struct rb_root *root)
+{
+  struct rb_node *right = node-rb_right;
+
+  if ((node-rb_right = right-rb_left))
+right-rb_left-rb_parent = node;
+  right-rb_left = node;
+
+  if ((right-rb_parent = node-rb_parent))
+{
+  if (node == node-rb_parent-rb_left)
+node-rb_parent-rb_left = right;
+  else
+node-rb_parent-rb_right = right;
+}
+  else
+root-rb_node = right;
+node-rb_parent = right;
+}
+
+static void
+__rb_rotate_right (struct rb_node *node, struct rb_root *root)
+{
+  struct rb_node *left = node-rb_left;
+
+  if ((node-rb_left = left-rb_right))
+left-rb_right-rb_parent = node;
+  left-rb_right = node;
+
+  if ((left-rb_parent = node-rb_parent))
+{
+  if (node == node-rb_parent-rb_right)
+node-rb_parent-rb_right = left;
+  else
+node-rb_parent-rb_left = left;
+   }
+  else
+root-rb_node = left;
+
+  node-rb_parent = left;
+}
+
+void
+rb_insert_color (struct rb_node *node, struct rb_root *root)
+{
+  struct rb_node *parent, *gparent;
+
+  while ((parent = node-rb_parent)  parent-rb_color == RB_RED)
+{
+ gparent = parent-rb_parent;
+
+  if (parent == gparent-rb_left)
+{
+  {
+register struct rb_node *uncle = gparent-rb_right;
+if (uncle  uncle-rb_color == RB_RED)
+ {
+uncle-rb_color = RB_BLACK;
+parent-rb_color = RB_BLACK;
+

[Ocfs2-devel] [PATCH 2/3] build: distribute new file, fiemap.h v4

2010-09-20 Thread Jie Liu
* src/Makefile.am (noinst_HEADERS): Add fiemap.h.

Signed-off-by: Jie Liu jeff@oracle.com
---
 coreutils-6.9/src/Makefile.am |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/coreutils-6.9/src/Makefile.am b/coreutils-6.9/src/Makefile.am
index 863a32b..af132e1 100644
--- a/coreutils-6.9/src/Makefile.am
+++ b/coreutils-6.9/src/Makefile.am
@@ -43,7 +43,8 @@ noinst_HEADERS = \
   remove.h \
   system.h \
   wheel-size.h \
-  wheel.h
+  wheel.h \
+  fiemap.h
 
 EXTRA_DIST = dcgen dircolors.hin tac-pipe.c \
   groups.sh wheel-gen.pl extract-magic c99-to-c89.diff
-- 
1.5.4.3


___
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
http://oss.oracle.com/mailman/listinfo/ocfs2-devel


Re: [Ocfs2-devel] [PATCH 2/3] build: distribute new file, fiemap.h v4

2010-09-20 Thread jeff.liu
Sorry, please ignore these patches, I will send out another one.

-Jeff

Jie Liu wrote:
 * src/Makefile.am (noinst_HEADERS): Add fiemap.h.
 
 Signed-off-by: Jie Liu jeff@oracle.com
 ---
  coreutils-6.9/src/Makefile.am |3 ++-
  1 files changed, 2 insertions(+), 1 deletions(-)
 
 diff --git a/coreutils-6.9/src/Makefile.am b/coreutils-6.9/src/Makefile.am
 index 863a32b..af132e1 100644
 --- a/coreutils-6.9/src/Makefile.am
 +++ b/coreutils-6.9/src/Makefile.am
 @@ -43,7 +43,8 @@ noinst_HEADERS = \
remove.h \
system.h \
wheel-size.h \
 -  wheel.h
 +  wheel.h \
 +  fiemap.h
  
  EXTRA_DIST = dcgen dircolors.hin tac-pipe.c \
groups.sh wheel-gen.pl extract-magic c99-to-c89.diff


___
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
http://oss.oracle.com/mailman/listinfo/ocfs2-devel


[Ocfs2-devel] [PATCH 2/3] build: distribute new file, fiemap.h v4

2010-09-20 Thread Jie Liu
Add fiemap header to src.

* src/Makefile.am (noinst_HEADERS): Add fiemap.h.

Signed-off-by: Jie Liu jeff@oracle.com
---
 coreutils-6.9/src/Makefile.am |3 +-
 coreutils-6.9/src/fiemap.h|  102 +
 2 files changed, 104 insertions(+), 1 deletions(-)
 create mode 100644 coreutils-6.9/src/fiemap.h

diff --git a/coreutils-6.9/src/Makefile.am b/coreutils-6.9/src/Makefile.am
index 863a32b..af132e1 100644
--- a/coreutils-6.9/src/Makefile.am
+++ b/coreutils-6.9/src/Makefile.am
@@ -43,7 +43,8 @@ noinst_HEADERS = \
   remove.h \
   system.h \
   wheel-size.h \
-  wheel.h
+  wheel.h \
+  fiemap.h
 
 EXTRA_DIST = dcgen dircolors.hin tac-pipe.c \
   groups.sh wheel-gen.pl extract-magic c99-to-c89.diff
diff --git a/coreutils-6.9/src/fiemap.h b/coreutils-6.9/src/fiemap.h
new file mode 100644
index 000..c5d8424
--- /dev/null
+++ b/coreutils-6.9/src/fiemap.h
@@ -0,0 +1,102 @@
+/* FS_IOC_FIEMAP ioctl infrastructure.
+   Some portions copyright (C) 2007 Cluster File Systems, Inc
+   Authors: Mark Fasheh mfas...@suse.com
+Kalpak Shah kalpak.s...@sun.com
+Andreas Dilger adil...@sun.com.  */
+
+/* Copy from kernel, modified to respect GNU code style by Jie Liu.  */
+
+#ifndef _LINUX_FIEMAP_H
+# define _LINUX_FIEMAP_H
+
+# include stdint.h
+
+struct fiemap_extent
+{
+  /* Logical offset in bytes for the start of the extent
+ from the beginning of the file.  */
+  uint64_t fe_logical;
+
+  /* Physical offset in bytes for the start of the extent
+ from the beginning of the disk.  */
+  uint64_t fe_physical;
+
+  /* Length in bytes for this extent.  */
+  uint64_t fe_length;
+
+  uint64_t fe_reserved64[2];
+
+  /* FIEMAP_EXTENT_* flags for this extent.  */
+  uint32_t fe_flags;
+
+  uint32_t fe_reserved[3];
+};
+
+struct fiemap
+{
+  /* Logical offset(inclusive) at which to start mapping(in).  */
+  uint64_t fm_start;
+
+  /* Logical length of mapping which userspace wants(in).  */
+  uint64_t fm_length;
+
+  /* FIEMAP_FLAG_* flags for request(in/out).  */
+  uint32_t fm_flags;
+
+  /* Number of extents that were mapped(out).  */
+  uint32_t fm_mapped_extents;
+
+  /* Size of fm_extents array(in).  */
+  uint32_t fm_extent_count;
+
+  uint32_t fm_reserved;
+
+  /* Array of mapped extents(out).  */
+  struct fiemap_extent fm_extents[0];
+};
+
+/* The maximum offset can be mapped for a file.  */
+# define FIEMAP_MAX_OFFSET   (~0ULL)
+
+/* Sync file data before map.  */
+# define FIEMAP_FLAG_SYNC0x0001
+
+/* Map extented attribute tree.  */
+# define FIEMAP_FLAG_XATTR   0x0002
+
+# define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
+
+/* Last extent in file.  */
+# define FIEMAP_EXTENT_LAST  0x0001
+
+/* Data location unknown.  */
+# define FIEMAP_EXTENT_UNKNOWN   0x0002
+
+/* Location still pending, Sets EXTENT_UNKNOWN.  */
+# define FIEMAP_EXTENT_DELALLOC  0x0004
+
+/* Data can not be read while fs is unmounted.  */
+# define FIEMAP_EXTENT_ENCODED   0x0008
+
+/* Data is encrypted by fs.  Sets EXTENT_NO_BYPASS.  */
+# define FIEMAP_EXTENT_DATA_ENCRYPTED0x0080
+
+/* Extent offsets may not be block aligned.  */
+# define FIEMAP_EXTENT_NOT_ALIGNED   0x0100
+
+/* Data mixed with metadata.  Sets EXTENT_NOT_ALIGNED.  */
+# define FIEMAP_EXTENT_DATA_INLINE   0x0200
+
+/* Multiple files in block.  Set EXTENT_NOT_ALIGNED.  */
+# define FIEMAP_EXTENT_DATA_TAIL 0x0400
+
+/* Space allocated, but not data (i.e. zero).  */
+# define FIEMAP_EXTENT_UNWRITTEN 0x0800
+
+/* File does not natively support extents.  Result merged for efficiency.  */
+# define FIEMAP_EXTENT_MERGED  0x1000
+
+/* Space shared with other files.  */
+# define FIEMAP_EXTENT_SHARED0x2000
+
+#endif
-- 
1.5.4.3


___
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
http://oss.oracle.com/mailman/listinfo/ocfs2-devel


[Ocfs2-devel] [PATCH 3/3] shared-du: using fiemap to figure up the shared extents per file and the footprint in all v4

2010-09-20 Thread Jie Liu
If issue du(1) with either '--shared-size' or '-E' option, show the shared 
extents in parens per
file as well as the footprint in the end.

* src/du.c: Add this feature.

Signed-off-by: Jie Liu jeff@oracle.com
---
 coreutils-6.9/src/du.c |  479 +++-
 1 files changed, 473 insertions(+), 6 deletions(-)

diff --git a/coreutils-6.9/src/du.c b/coreutils-6.9/src/du.c
index 206d318..41fda57 100644
--- a/coreutils-6.9/src/du.c
+++ b/coreutils-6.9/src/du.c
@@ -45,6 +45,13 @@
 #include xfts.h
 #include xstrtol.h
 
+#include fiemap.h
+#include rbtree.h
+
+#if HAVE_SYS_IOCTL_H
+# include sys/ioctl.h
+#endif
+
 extern bool fts_debug;
 
 /* The official name of this program (e.g., no `g' prefix).  */
@@ -64,6 +71,10 @@ extern bool fts_debug;
 /* Initial size of the hash table.  */
 #define INITIAL_TABLE_SIZE 103
 
+#if defined(_IOWR)  !defined(FS_IOC_FIEMAP)
+# define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap)
+#endif
+
 /* Hash structure for inode and device numbers.  The separate entry
structure makes it easier to rehash in place.  */
 
@@ -171,6 +182,31 @@ static enum time_type time_type = time_mtime;
 /* User specified date / time style */
 static char const *time_style = NULL;
 
+/* If true, display the size of the shared extents per file and end
+   up with the overall footprint.  */
+static bool print_shared_size = false;
+
+/* The size of the shared extents per file.  */
+static uint64_t file_shared_extents = 0;
+
+/* The root of our rbtree for tracking extent_info as below.  */
+static struct rb_root fe_root;
+
+/* A structure for the extents information.  */
+struct extent_info {
+  /* rbtree node */
+  struct rb_node ei_node;
+
+  /* physical offset in bytes */
+  uint64_t ei_physical;
+
+  /* length in bytes for this extent */
+  uint64_t ei_length;
+
+  /* extent shared count */
+  size_t ei_shared_count;
+};
+
 /* Format used to display date / time. Controlled by --time-style */
 static char const *time_format = NULL;
 
@@ -215,6 +251,7 @@ static struct option const long_options[] =
   {block-size, required_argument, NULL, 'B'},
   {bytes, no_argument, NULL, 'b'},
   {count-links, no_argument, NULL, 'l'},
+  {shared-size, no_argument, NULL, 'E'},
   {dereference, no_argument, NULL, 'L'},
   {dereference-args, no_argument, NULL, 'D'},
   {exclude, required_argument, NULL, EXCLUDE_OPTION},
@@ -299,6 +336,7 @@ Mandatory arguments to long options are mandatory for short 
options too.\n\
   -b, --bytes   equivalent to `--apparent-size --block-size=1'\n\
   -c, --total   produce a grand total\n\
   -D, --dereference-args  dereference FILEs that are symbolic links\n\
+  -E, --shared-size show the size of the shared extents per file\n\
 ), stdout);
   fputs (_(\
   --files0-from=F   summarize disk usage of the NUL-terminated file\n\
@@ -443,6 +481,22 @@ print_only_size (uintmax_t n_bytes)
 1, output_block_size), stdout);
 }
 
+/* Print footprint follow by STRING. */
+
+static void
+print_footprint (const struct duinfo *pdui, uintmax_t footprint, const char 
*string)
+{
+  print_only_size (footprint);
+  if (opt_time)
+{
+  putchar ('\t');
+  show_date (time_format, pdui-tmax);
+}
+
+  printf (\t%s%c, string, opt_nul_terminate_output ? '\0' : '\n');
+  fflush (stdout);
+}
+
 /* Print size (and optionally time) indicated by *PDUI, followed by STRING.  */
 
 static void
@@ -454,10 +508,400 @@ print_size (const struct duinfo *pdui, const char 
*string)
   putchar ('\t');
   show_date (time_format, pdui-tmax);
 }
+
+  /* FIXME: make better formatting output?  */
+  if ((print_shared_size)  (file_shared_extents  0))
+{
+  putchar ('\t');
+  putchar ('(');
+
+  /* If display file size in bytes (i.e, output_block_size == 1), we
+ should honor pdui-size if the file shared extent size is larger
+ than it.  */
+  if ((output_block_size == 1)  (file_shared_extents  pdui-size))
+file_shared_extents = pdui-size;
+
+  print_only_size (file_shared_extents);
+  putchar (')');
+
+  file_shared_extents = 0;
+}
   printf (\t%s%c, string, opt_nul_terminate_output ? '\0' : '\n');
   fflush (stdout);
 }
 
+/* Free all allocated extent_info node from the rbtree.  */
+
+static void
+free_extent_info (void)
+{
+  struct rb_node *node;
+  struct extent_info *ei;
+
+  while ((node = rb_first (fe_root)))
+{
+  ei = rb_entry (node, struct extent_info, ei_node);
+  rb_erase (ei-ei_node, fe_root);
+  free (ei);
+}
+}
+
+/* Go through the entire tree to sum up the shared extents
+   for whose ei_shared_count  0.  */
+
+static uintmax_t
+figure_up_shared_extent (void)
+{
+  struct rb_node *node;
+  struct extent_info *ei;
+  static uintmax_t total_shared_extent = 0;
+
+  for (node = rb_first (fe_root); node; node = rb_next (node))
+  {
+ei = rb_entry (node, struct extent_info, ei_node);
+if (ei-ei_shared_count  0)
+  

[Ocfs2-devel] [PATCH 1/1] o2dlm: free block mles during shutdown

2010-09-20 Thread Srinivas Eeda
If a node initiates shutdown after another node initiated the lock mastery
process, this node might have created block mle but will not release it if it
doesn't get the assert master from the other node. This causes block mle's to
lie around unfreed.

This patch frees any block mles that exists on master list after the node sent
DLM_EXIT_DOMAIN_MSG to other nodes.

Signed-off-by: Srinivas Eeda srinivas.e...@oracle.com
---
 fs/ocfs2/dlm/dlmcommon.h |1 +
 fs/ocfs2/dlm/dlmdomain.c |1 +
 fs/ocfs2/dlm/dlmmaster.c |   33 +
 3 files changed, 35 insertions(+), 0 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 4b6ae2c..48282dd 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -1030,6 +1030,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm,
 struct dlm_lock_resource *res);
 void dlm_clean_master_list(struct dlm_ctxt *dlm,
   u8 dead_node);
+void dlm_free_block_mles(struct dlm_ctxt *dlm);
 int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock);
 int __dlm_lockres_has_locks(struct dlm_lock_resource *res);
 int __dlm_lockres_unused(struct dlm_lock_resource *res);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 153abb5..8744fff 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -693,6 +693,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
 
dlm_mark_domain_leaving(dlm);
dlm_leave_domain(dlm);
+   dlm_free_block_mles(dlm);
dlm_complete_dlm_shutdown(dlm);
}
dlm_put(dlm);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index ffb4c68..5f4d6fd 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -3433,3 +3433,36 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm,
wake_up(res-wq);
wake_up(dlm-migration_wq);
 }
+
+void dlm_free_block_mles(struct dlm_ctxt *dlm)
+{
+   int i;
+   struct hlist_head *bucket;
+   struct dlm_master_list_entry *mle;
+   struct hlist_node *list;
+
+   spin_lock(dlm-spinlock);
+   spin_lock(dlm-master_lock);
+   for (i = 0; i  DLM_HASH_BUCKETS; i++) {
+   bucket = dlm_master_hash(dlm, i);
+   hlist_for_each(list, bucket) {
+   mle = hlist_entry(list, struct dlm_master_list_entry,
+ master_hash_node);
+   if (mle-type != DLM_MLE_BLOCK) {
+   mlog(ML_ERROR, mle for %.*s not destroyed, 
+type %d\n,
+mle-mnamelen, mle-mname, mle-type);
+   continue;
+   }
+   spin_lock(mle-spinlock);
+   atomic_set(mle-woken, 1);
+   spin_unlock(mle-spinlock);
+   wake_up(mle-wq);
+
+   __dlm_mle_detach_hb_events(dlm, mle);
+   __dlm_put_mle(mle);
+   }
+   }
+   spin_unlock(dlm-master_lock);
+   spin_unlock(dlm-spinlock);
+}
-- 
1.5.6.5


___
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
http://oss.oracle.com/mailman/listinfo/ocfs2-devel


Re: [Ocfs2-devel] [PATCH 1/1] o2dlm: free block mles during shutdown

2010-09-20 Thread Wengang Wang
Hi Srini,

On 10-09-20 14:59, Srinivas Eeda wrote:
 If a node initiates shutdown after another node initiated the lock mastery
 process, this node might have created block mle but will not release it if it
 doesn't get the assert master from the other node. This causes block mle's to
 lie around unfreed.
 
 This patch frees any block mles that exists on master list after the node sent
 DLM_EXIT_DOMAIN_MSG to other nodes.
 
 Signed-off-by: Srinivas Eeda srinivas.e...@oracle.com
 ---
  fs/ocfs2/dlm/dlmcommon.h |1 +
  fs/ocfs2/dlm/dlmdomain.c |1 +
  fs/ocfs2/dlm/dlmmaster.c |   33 +
  3 files changed, 35 insertions(+), 0 deletions(-)
 
 diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
 index 4b6ae2c..48282dd 100644
 --- a/fs/ocfs2/dlm/dlmcommon.h
 +++ b/fs/ocfs2/dlm/dlmcommon.h
 @@ -1030,6 +1030,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res);
  void dlm_clean_master_list(struct dlm_ctxt *dlm,
  u8 dead_node);
 +void dlm_free_block_mles(struct dlm_ctxt *dlm);
  int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock);
  int __dlm_lockres_has_locks(struct dlm_lock_resource *res);
  int __dlm_lockres_unused(struct dlm_lock_resource *res);
 diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
 index 153abb5..8744fff 100644
 --- a/fs/ocfs2/dlm/dlmdomain.c
 +++ b/fs/ocfs2/dlm/dlmdomain.c
 @@ -693,6 +693,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
  
   dlm_mark_domain_leaving(dlm);
   dlm_leave_domain(dlm);
 + dlm_free_block_mles(dlm);
   dlm_complete_dlm_shutdown(dlm);
   }
   dlm_put(dlm);
 diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
 index ffb4c68..5f4d6fd 100644
 --- a/fs/ocfs2/dlm/dlmmaster.c
 +++ b/fs/ocfs2/dlm/dlmmaster.c
 @@ -3433,3 +3433,36 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm,
   wake_up(res-wq);
   wake_up(dlm-migration_wq);
  }
 +
 +void dlm_free_block_mles(struct dlm_ctxt *dlm)
 +{
 + int i;
 + struct hlist_head *bucket;
 + struct dlm_master_list_entry *mle;
 + struct hlist_node *list;
 +
 + spin_lock(dlm-spinlock);
 + spin_lock(dlm-master_lock);
 + for (i = 0; i  DLM_HASH_BUCKETS; i++) {
 + bucket = dlm_master_hash(dlm, i);
 + hlist_for_each(list, bucket) {

I guess it can be the last reference on mles?
If so, don't we need hlist_for_each_safe here since we are removing entries?

regards,
wengang.

 + mle = hlist_entry(list, struct dlm_master_list_entry,
 +   master_hash_node);
 + if (mle-type != DLM_MLE_BLOCK) {
 + mlog(ML_ERROR, mle for %.*s not destroyed, 
 +  type %d\n,
 +  mle-mnamelen, mle-mname, mle-type);
 + continue;
 + }
 + spin_lock(mle-spinlock);
 + atomic_set(mle-woken, 1);
 + spin_unlock(mle-spinlock);
 + wake_up(mle-wq);
 +
 + __dlm_mle_detach_hb_events(dlm, mle);
 + __dlm_put_mle(mle);
 + }
 + }
 + spin_unlock(dlm-master_lock);
 + spin_unlock(dlm-spinlock);
 +}
 -- 
 1.5.6.5

___
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
http://oss.oracle.com/mailman/listinfo/ocfs2-devel