I wrote a user space tool to de-fragmentate global bitmap, hope this
tool is helpful.
for the case of storing non-DB data on ocfs2, there are not several
very large file, but lots of relative small files.
after a long time of using, --especially creating and deleting, the
global bitmap is split into fragments. so that even there is enough free
space(but not contiguous), creating a file may fail.
there is a relative bug 6730723(on bugdb) though closed with "not
supported". and I have made a scenario that "df" show the partition
usage 51%, but on other nodes creating a file fails with "no space" error.
this offline tool, o2defrag, can make larger contiguous free bits on
global bitmap by moving data clusters of regular file and directories.
it does:
1) for each group, move data clusters on the group to the front of the
same group to make bigger free space at the end.
2) for groups that has more free space, move data clusters to other
group(s) to make much more free space.
a) firstly, it try to move data clusters to the group on which
there are data clusters of the same file. if no such group or no space
on these groups, goto b).
b) move data clusters to a group on which there is no data
clusters of the same file.
3) does step 1) again.
stuff changed by this tool is
I) moving of data clusters.
II) moving of corresponding bits in global bitmap.
III) extent record in ocfs2_dinode block or extension block
this tool doesn't merge or split extent records for now. and it's nearly
help nothing to fs performance.
the feature to be added is trying to move all data clusters of a file
together as possible. so that accessing to file on ocfs2 can get better
performance.
the patch is based on ocfs2-tool 1.2.6. for compiling, needs to add a
symbolic link named "include" in o2defrag directory of
../debugfs.ocfs2/include.
usage is "o2defrag <ocfs2 partition>"
thanks,
wengang.
diff -N -u -p -r ocfs2-tools-1.2.6.orig/libocfs2/chain.c ocfs2-tools-1.2.6/libocfs2/chain.c
--- ocfs2-tools-1.2.6.orig/libocfs2/chain.c 2008-02-05 01:30:21.000000000 -0500
+++ ocfs2-tools-1.2.6/libocfs2/chain.c 2008-02-05 02:00:53.000000000 -0500
@@ -43,7 +43,33 @@ void ocfs2_swap_group_desc(struct ocfs2_
gd->bg_parent_dinode = bswap_64(gd->bg_parent_dinode);
gd->bg_blkno = bswap_64(gd->bg_blkno);
}
+errcode_t ocfs2_read_group_desc2(ocfs2_filesys *fs, uint64_t blkno,
+ char *gd_buf)
+{
+ errcode_t ret;
+ struct ocfs2_group_desc *gd;
+ if ((blkno < OCFS2_SUPER_BLOCK_BLKNO) ||
+ (blkno > fs->fs_blocks))
+ return OCFS2_ET_BAD_BLKNO;
+
+ ret = io_read_block(fs->fs_io, blkno, 1, gd_buf);
+ if (ret)
+ goto out;
+
+ gd = (struct ocfs2_group_desc *)gd_buf;
+
+ ret = OCFS2_ET_BAD_GROUP_DESC_MAGIC;
+ if (memcmp(gd->bg_signature, OCFS2_GROUP_DESC_SIGNATURE,
+ strlen(OCFS2_GROUP_DESC_SIGNATURE)))
+ goto out;
+
+ ocfs2_swap_group_desc(gd);
+ ret = 0;
+
+out:
+ return ret;
+}
errcode_t ocfs2_read_group_desc(ocfs2_filesys *fs, uint64_t blkno,
char *gd_buf)
{
@@ -81,7 +107,31 @@ out:
return ret;
}
+errcode_t ocfs2_write_group_desc2(ocfs2_filesys *fs, uint64_t blkno,
+ char *gd_buf)
+{
+ errcode_t ret;
+ struct ocfs2_group_desc *gd;
+
+ if (!(fs->fs_flags & OCFS2_FLAG_RW))
+ return OCFS2_ET_RO_FILESYS;
+
+ if ((blkno < OCFS2_SUPER_BLOCK_BLKNO) ||
+ (blkno > fs->fs_blocks))
+ return OCFS2_ET_BAD_BLKNO;
+ gd = (struct ocfs2_group_desc *)gd_buf;
+ ocfs2_swap_group_desc(gd);
+ ret = io_write_block(fs->fs_io, blkno, 1, gd_buf);
+ if (ret)
+ goto out;
+
+ fs->fs_flags |= OCFS2_FLAG_CHANGED;
+ ret = 0;
+
+out:
+ return ret;
+}
errcode_t ocfs2_write_group_desc(ocfs2_filesys *fs, uint64_t blkno,
char *gd_buf)
{
diff -N -u -p -r ocfs2-tools-1.2.6.orig/libocfs2/closefs.c ocfs2-tools-1.2.6/libocfs2/closefs.c
--- ocfs2-tools-1.2.6.orig/libocfs2/closefs.c 2008-02-05 01:30:21.000000000 -0500
+++ ocfs2-tools-1.2.6/libocfs2/closefs.c 2008-02-05 02:01:00.000000000 -0500
@@ -33,6 +33,7 @@
errcode_t ocfs2_flush(ocfs2_filesys *fs)
{
+ io_sync(fs->fs_io);
return 0;
}
diff -N -u -p -r ocfs2-tools-1.2.6.orig/libocfs2/extents.c ocfs2-tools-1.2.6/libocfs2/extents.c
--- ocfs2-tools-1.2.6.orig/libocfs2/extents.c 2008-02-05 01:30:21.000000000 -0500
+++ ocfs2-tools-1.2.6/libocfs2/extents.c 2008-02-05 02:01:11.000000000 -0500
@@ -98,6 +98,34 @@ static void ocfs2_swap_extent_block_to_c
ocfs2_swap_extent_list_to_cpu(&eb->h_list);
}
+errcode_t ocfs2_read_extent_block_nocheck2(ocfs2_filesys *fs,
+ uint64_t blkno,
+ char *eb_buf)
+{
+ errcode_t ret;
+ struct ocfs2_extent_block *eb;
+
+ if ((blkno < OCFS2_SUPER_BLOCK_BLKNO) ||
+ (blkno > fs->fs_blocks))
+ return OCFS2_ET_BAD_BLKNO;
+
+ ret = io_read_block(fs->fs_io, blkno, 1, eb_buf);
+ if (ret)
+ goto out;
+
+ eb = (struct ocfs2_extent_block *)eb_buf;
+
+ if (memcmp(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE,
+ strlen(OCFS2_EXTENT_BLOCK_SIGNATURE))) {
+ ret = OCFS2_ET_BAD_EXTENT_BLOCK_MAGIC;
+ goto out;
+ }
+
+ ocfs2_swap_extent_block_to_cpu(eb);
+
+out:
+ return ret;
+}
errcode_t ocfs2_read_extent_block_nocheck(ocfs2_filesys *fs,
uint64_t blkno,
char *eb_buf)
@@ -137,6 +165,20 @@ out:
return ret;
}
+errcode_t ocfs2_read_extent_block2(ocfs2_filesys *fs, uint64_t blkno,
+ char *eb_buf)
+{
+ errcode_t ret;
+ struct ocfs2_extent_block *eb =
+ (struct ocfs2_extent_block *)eb_buf;
+
+ ret = ocfs2_read_extent_block_nocheck2(fs, blkno, eb_buf);
+
+ if (ret == 0 && eb->h_list.l_next_free_rec > eb->h_list.l_count)
+ ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
+
+ return ret;
+}
errcode_t ocfs2_read_extent_block(ocfs2_filesys *fs, uint64_t blkno,
char *eb_buf)
{
@@ -151,6 +193,32 @@ errcode_t ocfs2_read_extent_block(ocfs2_
return ret;
}
+errcode_t ocfs2_write_extent_block2(ocfs2_filesys *fs, uint64_t blkno,
+ char *eb_buf)
+{
+ errcode_t ret;
+ struct ocfs2_extent_block *eb;
+
+ if (!(fs->fs_flags & OCFS2_FLAG_RW))
+ return OCFS2_ET_RO_FILESYS;
+
+ if ((blkno < OCFS2_SUPER_BLOCK_BLKNO) ||
+ (blkno > fs->fs_blocks))
+ return OCFS2_ET_BAD_BLKNO;
+
+ eb = (struct ocfs2_extent_block *) eb_buf;
+ ocfs2_swap_extent_block_from_cpu(eb);
+
+ ret = io_write_block(fs->fs_io, blkno, 1, eb_buf);
+ if (ret)
+ goto out;
+
+ fs->fs_flags |= OCFS2_FLAG_CHANGED;
+ ret = 0;
+
+out:
+ return ret;
+}
errcode_t ocfs2_write_extent_block(ocfs2_filesys *fs, uint64_t blkno,
char *eb_buf)
diff -N -u -p -r ocfs2-tools-1.2.6.orig/libocfs2/include/ocfs2.h ocfs2-tools-1.2.6/libocfs2/include/ocfs2.h
--- ocfs2-tools-1.2.6.orig/libocfs2/include/ocfs2.h 2008-02-05 01:30:21.000000000 -0500
+++ ocfs2-tools-1.2.6/libocfs2/include/ocfs2.h 2008-02-05 02:10:19.000000000 -0500
@@ -279,6 +279,10 @@ void ocfs2_swap_inode_from_cpu(struct oc
void ocfs2_swap_inode_to_cpu(struct ocfs2_dinode *di);
errcode_t ocfs2_read_inode(ocfs2_filesys *fs, uint64_t blkno,
char *inode_buf);
+errcode_t ocfs2_read_inode2(ocfs2_filesys *fs, uint64_t blkno,
+ char *inode_buf);
+errcode_t ocfs2_write_inode2(ocfs2_filesys *fs, uint64_t blkno,
+ char *inode_buf);
errcode_t ocfs2_write_inode(ocfs2_filesys *fs, uint64_t blkno,
char *inode_buf);
errcode_t ocfs2_check_directory(ocfs2_filesys *fs, uint64_t dir);
@@ -326,10 +330,16 @@ errcode_t ocfs2_write_journal_superblock
errcode_t ocfs2_make_journal(ocfs2_filesys *fs, uint64_t blkno,
uint32_t clusters);
+errcode_t ocfs2_read_extent_block2(ocfs2_filesys *fs, uint64_t blkno,
+ char *eb_buf);
errcode_t ocfs2_read_extent_block(ocfs2_filesys *fs, uint64_t blkno,
char *eb_buf);
+errcode_t ocfs2_read_extent_block_nocheck2(ocfs2_filesys *fs, uint64_t blkno,
+ char *eb_buf);
errcode_t ocfs2_read_extent_block_nocheck(ocfs2_filesys *fs, uint64_t blkno,
char *eb_buf);
+errcode_t ocfs2_write_extent_block2(ocfs2_filesys *fs, uint64_t blkno,
+ char *eb_buf);
errcode_t ocfs2_write_extent_block(ocfs2_filesys *fs, uint64_t blkno,
char *eb_buf);
errcode_t ocfs2_extent_iterate(ocfs2_filesys *fs,
@@ -478,9 +488,13 @@ errcode_t ocfs2_get_ocfs1_label(char *de
void ocfs2_swap_group_desc(struct ocfs2_group_desc *gd);
errcode_t ocfs2_read_group_desc(ocfs2_filesys *fs, uint64_t blkno,
char *gd_buf);
+errcode_t ocfs2_read_group_desc2(ocfs2_filesys *fs, uint64_t blkno,
+ char *gd_buf);
errcode_t ocfs2_write_group_desc(ocfs2_filesys *fs, uint64_t blkno,
char *gd_buf);
+errcode_t ocfs2_write_group_desc2(ocfs2_filesys *fs, uint64_t blkno,
+ char *gd_buf);
errcode_t ocfs2_chain_iterate(ocfs2_filesys *fs,
uint64_t blkno,
diff -N -u -p -r ocfs2-tools-1.2.6.orig/libocfs2/inode.c ocfs2-tools-1.2.6/libocfs2/inode.c
--- ocfs2-tools-1.2.6.orig/libocfs2/inode.c 2008-02-05 01:30:21.000000000 -0500
+++ ocfs2-tools-1.2.6/libocfs2/inode.c 2008-02-05 02:01:17.000000000 -0500
@@ -208,7 +208,33 @@ void ocfs2_swap_inode_to_cpu(struct ocfs
if (has_extents(di))
ocfs2_swap_extent_list_to_cpu(&di->id2.i_list);
}
+/* this will use inode_buf derectly, won't allocate another memory */
+errcode_t ocfs2_read_inode2(ocfs2_filesys *fs, uint64_t blkno,
+ char *inode_buf)
+{
+ errcode_t ret;
+ struct ocfs2_dinode *di;
+
+ if ((blkno < OCFS2_SUPER_BLOCK_BLKNO) ||
+ (blkno > fs->fs_blocks))
+ return OCFS2_ET_BAD_BLKNO;
+
+ ret = io_read_block(fs->fs_io, blkno, 1, inode_buf);
+ if (ret)
+ goto out;
+
+ ret = OCFS2_ET_BAD_INODE_MAGIC;
+ di = (struct ocfs2_dinode *)inode_buf;
+ if (memcmp(di->i_signature, OCFS2_INODE_SIGNATURE,
+ strlen(OCFS2_INODE_SIGNATURE)))
+ goto out;
+ ocfs2_swap_inode_to_cpu(di);
+
+ ret = 0;
+out:
+ return ret;
+}
errcode_t ocfs2_read_inode(ocfs2_filesys *fs, uint64_t blkno,
char *inode_buf)
{
@@ -247,6 +273,33 @@ out:
return ret;
}
+errcode_t ocfs2_write_inode2(ocfs2_filesys *fs, uint64_t blkno,
+ char *inode_buf)
+{
+ errcode_t ret;
+ struct ocfs2_dinode *di;
+
+ if (!(fs->fs_flags & OCFS2_FLAG_RW))
+ return OCFS2_ET_RO_FILESYS;
+
+ if ((blkno < OCFS2_SUPER_BLOCK_BLKNO) ||
+ (blkno > fs->fs_blocks))
+ return OCFS2_ET_BAD_BLKNO;
+
+
+ di = (struct ocfs2_dinode *)inode_buf;
+ ocfs2_swap_inode_from_cpu(di);
+
+ ret = io_write_block(fs->fs_io, blkno, 1, inode_buf);
+ if (ret)
+ goto out;
+
+ fs->fs_flags |= OCFS2_FLAG_CHANGED;
+ ret = 0;
+
+out:
+ return ret;
+}
errcode_t ocfs2_write_inode(ocfs2_filesys *fs, uint64_t blkno,
char *inode_buf)
{
diff -N -u -p -r ocfs2-tools-1.2.6.orig/libocfs2/unix_io.c ocfs2-tools-1.2.6/libocfs2/unix_io.c
--- ocfs2-tools-1.2.6.orig/libocfs2/unix_io.c 2008-02-05 01:30:21.000000000 -0500
+++ ocfs2-tools-1.2.6/libocfs2/unix_io.c 2008-02-05 02:01:29.000000000 -0500
@@ -172,6 +172,14 @@ out_chan:
return ret;
}
+errcode_t io_sync(io_channel *channel)
+{
+ errcode_t ret = 0;
+ if (fsync(channel->io_fd) <0)
+ ret = errno;
+
+ return ret;
+}
errcode_t io_close(io_channel *channel)
{
errcode_t ret = 0;
diff -N -u -p -r ocfs2-tools-1.2.6.orig/o2defrag/Makefile ocfs2-tools-1.2.6/o2defrag/Makefile
--- ocfs2-tools-1.2.6.orig/o2defrag/Makefile 1969-12-31 19:00:00.000000000 -0500
+++ ocfs2-tools-1.2.6/o2defrag/Makefile 2008-02-05 01:56:37.000000000 -0500
@@ -0,0 +1,45 @@
+TOPDIR = ..
+
+include $(TOPDIR)/Preamble.make
+
+sbindir = $(root_sbindir)
+SBIN_PROGRAMS = o2defrag
+
+DEFINES = -DG_DISABLE_DEPRECATED -DLINUX -DDEBUGOCFS -DDEBUG
+DEFINES += -DOCFS2_FLAT_INCLUDES -DVERSION=\"$(VERSION)\" -DO2DLM_FLAT_INCLUDES -DO2CB_FLAT_INCLUDES
+
+INCLUDES = -Iinclude -I$(TOPDIR)/libocfs2/include -I$(TOPDIR)/libo2dlm/include -I$(TOPDIR)/libo2cb/include
+INCLUDES += $(GLIB_CFLAGS)
+
+ifdef OCFS2_DEBUG
+CFLAGS = -Wall -ggdb
+else
+CFLAGS = -Wall -O2
+endif
+
+CFILES = o2defrag.c
+
+HFILES = \
+ include/main.h \
+ include/commands.h \
+ include/dump.h \
+ include/utils.h \
+ include/journal.h \
+ include/find_block_inode.h \
+ include/find_inode_paths.h \
+ include/ocfs2_internals.h \
+ include/dump_fs_locks.h
+
+OBJS = $(subst .c,.o,$(CFILES))
+
+LIBOCFS2_LIBS = -L$(TOPDIR)/libocfs2 -locfs2
+LIBO2CB_LIBS = -L$(TOPDIR)/libo2cb -lo2cb
+
+DIST_RULES = dist-subdircreate
+
+dist-subdircreate:
+ $(TOPDIR)/mkinstalldirs $(DIST_DIR)/include
+
+o2defrag: $(OBJS)
+ $(LINK) $(GLIB_LIBS) $(LIBOCFS2_LIBS) $(LIBO2CB_LIBS) $(COM_ERR_LIBS) $(READLINE_LIBS) $(NCURSES_LIBS)
+include $(TOPDIR)/Postamble.make
diff -N -u -p -r ocfs2-tools-1.2.6.orig/o2defrag/o2defrag.c ocfs2-tools-1.2.6/o2defrag/o2defrag.c
--- ocfs2-tools-1.2.6.orig/o2defrag/o2defrag.c 1969-12-31 19:00:00.000000000 -0500
+++ ocfs2-tools-1.2.6/o2defrag/o2defrag.c 2008-02-05 01:56:37.000000000 -0500
@@ -0,0 +1,1675 @@
+#include <main.h>
+#include <bitops.h>
+
+#define SYSTEM_FILE_NAME_MAX 40
+#define MAX_FILE_DEP 20
+#define MAX_FILE_NAME_LEN 256
+
+struct o2_file_group
+{
+ __le32 off; /* starting offset of bits the file data begins */
+ __le32 count; /* the bits count */
+ uint64_t blkno;
+ struct o2_file *file; /* the related file(only one)*/
+ struct o2_group *group; /* the related group(only one) */
+ struct o2_file_group *next_in_file; /* in the list of the file */
+ struct o2_file_group *next_in_group; /* in the list of the group */
+};
+
+struct o2_file_group_list
+{
+ struct o2_file_group *head; /* ths file o2_file_group object in this group */
+ int count; /* object count in this list. */
+};
+
+struct o2_file
+{
+ char type; /* type of this file. 'f' for file, 'd' for directory */
+ uint64_t blkno; /* meta data block num */
+ char *filename; /* file name */
+ struct o2_file_group_list file_group_list; /* file_groups in which this file has data clusters */
+ struct o2_file *next; /* next file in the list */
+};
+
+struct o2_file_list
+{
+ struct o2_file *head; /* the first file object in this list */
+ struct o2_file *last; /* the last file object. used for adding a new file to this list */
+ int count; /* file count in this list */
+};
+
+
+
+struct o2_group
+{
+ int group_num;
+ uint64_t blkno; /* the block number on disk */
+ __le16 total; /* total bits in this group */
+ __le16 free; /* free bits in this group */
+ __le16 cong; /* max contiguous free bits */
+ __le16 meta_used; /* meta used bits */
+ struct o2_file_group_list file_group_list;
+ struct o2_group *next; /* next group */
+ struct ocfs2_group_desc* gd; /* cache for the on disk group object */
+};
+
+struct o2_group_list{
+ int group_size; /* count of total bits of a group */
+ int count; /* count of groups */
+ struct o2_group *head; /* the first group in list */
+ struct o2_group *last; /* the last group in list */
+};
+
+struct o2_file_list all_file_list;
+struct o2_group_list all_group_list;
+
+/*
+struct current_name{
+ int current_layer;
+ char *name[MAX_FILE_DEP+1];
+};
+
+struct current_name current_name;
+*/
+
+struct o2_modify_meta{
+ uint64_t meta_blkno; /* inode number of a fle */
+ uint64_t old_data_blkno;
+ uint64_t new_data_blkno;
+ int needs_free; /* this is only for internal use */
+ struct o2_modify_meta *next;
+};
+
+struct o2_modify_meta_list
+{
+ struct o2_modify_meta *head;
+ struct o2_modify_meta *last;
+ int count;
+};
+
+struct o2_modify_meta_list meta_list;
+
+#define COUNT_OF_MODIFY_META 1024
+/* this is used for internal use. --to get better performance for local_malloc */
+struct memory_object
+{
+ int current_meta;
+ char *buf;
+};
+
+struct memory_object memory_object;
+
+
+
+dbgfs_gbls gbls;
+__le16 bits_per_group = 0;
+char *buf_modify_file_meta = NULL;
+char *buf_gd1 = NULL;
+char *buf_gd2 = NULL;
+int update_disk = 1;
+
+
+
+static void * local_malloc(size_t size, int *needs_free);
+static struct o2_modify_meta * new_and_insert_meta(uint64_t meta_blkno,
+ uint64_t old_data_blkno,
+ uint64_t new_data_blkno);
+static int update_all_meta();
+static struct o2_group* new_group(int group_num, uint64_t blkno,
+ __le16 total, __le16 free,
+ int max_contiguous);
+static void insert_file_group_to_group(struct o2_group *group,
+ struct o2_file_group * file_group);
+static void remove_file_group_from_group(struct o2_group *group,
+ struct o2_file_group *file_group);
+static void insert_file_group_to_file(struct o2_file *file,
+ struct o2_file_group *file_group);
+static void insert_file_to_list(struct o2_file *file);
+static void insert_group_to_list(struct o2_group *group, struct o2_group_list *list);
+static inline int get_shift_bits();
+static inline int is_first_group_gd(const struct o2_group *group);
+static inline int get_group_num_offset(uint64_t blkno,
+ uint64_t *group_blkno, __le32 *off);
+static struct o2_file_group *new_file_group(struct o2_file *file,
+ __le32 group_blkno,
+ __le32 off,
+ __le32 count);
+static int record_file_and_group(struct o2_file *file, __le32 group_blkno,
+ __le32 off, __le32 count);
+static int access_els(struct ocfs2_extent_list *el, struct o2_file *file);
+static int access_one_node(struct ocfs2_dinode *inode, struct o2_file *file);
+static struct o2_file *new_file(struct ocfs2_dinode *inode, char type);
+static int load_all_files(char *basename, uint64_t blkno);
+static int worktree(char *basename, uint64_t blkno);
+static int do_with_childrens(struct ocfs2_dir_entry *rec, int offset,
+ int blocksize, char *buf, void *priv_data);
+inline void find_max_contig_free_bits(struct ocfs2_group_desc *gd,
+ int *max_contig_free_bits);
+static inline int get_N_contig_free_bits(struct ocfs2_group_desc *gd, int num,
+ int *start_off);
+static int commit_all_groups();
+static int load_all_groups();
+int print_all_files();
+static int print_file_group_on_group(struct o2_file_group *file_group,
+ struct o2_group* group);
+static inline int get_first_free_bit(struct ocfs2_group_desc *gd);
+static inline void clear_bits_on_group(int off, int count,
+ struct ocfs2_group_desc *gd);
+static inline void set_bits_on_group(int off, int count,
+ struct ocfs2_group_desc *gd);
+static inline int copy_1_cluster(uint64_t from, uint64_t to);
+static inline int copy_N_clusters(uint64_t fromblk, uint64_t toblk,
+ int cluster_count);
+static inline int modify_file_meta(uint64_t file_blkno, uint64_t from_blkno,
+ uint64_t to_blkno);
+static inline int find_and_change_extent_rec(struct ocfs2_extent_list *el,
+ void *updatebuf,
+ uint64_t updateblk,
+ int this_is_inode,
+ uint64_t from_blkno,
+ uint64_t to_blkno);
+static int move_file_group_on_group(struct o2_group *group,
+ struct o2_file_group *file_group);
+static void move_file_group_object(struct o2_file_group *file_group,
+ struct o2_group *group_from,
+ struct o2_group *group_to);
+static inline int commit_one_group(struct ocfs2_group_desc* gd);
+static inline struct o2_group * get_group(uint64_t group_blkno);
+static int defrag1_move_data_to_front_on_all_group();
+static struct o2_group *find_o2_group_for_defrag2();
+static int move_file_group_between_groups(struct o2_file_group *file_group,
+ struct o2_group *group_from,
+ struct o2_group *group_to);
+static int defrag2_one_file_group_on_group(struct o2_file_group *file_group,
+ struct o2_group *group);
+static int defrag2_on_group(struct o2_group *group);
+static int defrag2();
+static void print_all_groups();
+static void * local_malloc(size_t size, int *needs_free)
+{
+ char *ret;
+ ssize_t cache_size;
+ int off;
+
+ cache_size = sizeof(struct o2_modify_meta) * COUNT_OF_MODIFY_META;
+
+ if (memory_object.current_meta == COUNT_OF_MODIFY_META) {
+ memory_object.buf = malloc(cache_size);
+ if (!memory_object.buf) {
+ printf("no mem\n");
+ return NULL;
+ }
+ memory_object.current_meta = 0;
+ }
+
+ if (memory_object.current_meta == 0) {
+ *needs_free = 1;
+ } else {
+ *needs_free = 0;
+ }
+ off = sizeof(struct o2_modify_meta) * memory_object.current_meta;
+ ret = memory_object.buf + off;
+ memory_object.current_meta ++;
+
+ return ret;
+}
+
+static struct o2_modify_meta * new_and_insert_meta(uint64_t meta_blkno,
+ uint64_t old_data_blkno,
+ uint64_t new_data_blkno)
+{
+ struct o2_modify_meta *tmp;
+ int needs_free;
+
+ tmp = local_malloc(sizeof (struct o2_modify_meta), &needs_free);
+ if (!tmp) {
+ printf("no mem\n");
+ return NULL;
+ }
+ tmp->meta_blkno = meta_blkno;
+ tmp->old_data_blkno = old_data_blkno;
+ tmp->new_data_blkno = new_data_blkno;
+ tmp->next = NULL;
+ tmp->needs_free = needs_free;
+
+ if (meta_list.last) {
+ meta_list.last->next = tmp;
+ meta_list.last = tmp;
+ } else {
+ meta_list.last = meta_list.head = tmp;
+ }
+ meta_list.count ++;
+
+ return tmp;
+}
+
+static int update_all_meta()
+{
+ struct o2_modify_meta *tmp, *free_tmp=NULL;
+ int res = 0;
+
+ tmp = meta_list.head;
+ printf("updating file meta...");
+ while (tmp) {
+ res = modify_file_meta(tmp->meta_blkno, tmp->old_data_blkno,
+ tmp->new_data_blkno);
+ if (res) {
+ return res;
+ }
+ if (tmp->needs_free) {
+ if (free_tmp)
+ free(free_tmp);
+ free_tmp = tmp;
+ }
+ tmp = tmp->next;
+ }
+ free(free_tmp);
+
+ meta_list.head = meta_list.last = NULL;
+ meta_list.count = 0;
+
+ memory_object.current_meta = COUNT_OF_MODIFY_META;
+ memory_object.buf = NULL;
+ printf("done.\n");
+
+ return res;
+}
+
+/* alloc a new o2_group object */
+static struct o2_group* new_group(int group_num, uint64_t blkno,
+ __le16 total, __le16 free,
+ int max_contiguous)
+{
+ struct o2_group *group;
+
+ group = malloc(sizeof(struct o2_group));
+ if (!group) {
+ printf("no mem\n");
+ return NULL;
+ }
+ group->group_num = group_num;
+ group->blkno = blkno;
+ group->file_group_list.head = NULL;
+ group->file_group_list.count = 0;
+ group->next = NULL;
+ group->total = total;
+ group->free = free;
+ group->cong = max_contiguous;
+ /* this is not really meta_used, but all used.
+ will update this when insert o2_file_group object into o2_group */
+ group->meta_used = total - free;
+ group->gd = NULL;
+
+ return group;
+}
+
+
+/* insert a file group object into a group
+ * a group may contain several file-group objects.
+ * keep the small to large order on blkno so that moving get better
+ * performance.
+*/
+static void insert_file_group_to_group(struct o2_group *group,
+ struct o2_file_group * file_group)
+{
+ struct o2_file_group *tmp, *pre = NULL;
+ if (group->file_group_list.head) {
+ tmp = group->file_group_list.head;
+ while (tmp && file_group->blkno > tmp->blkno) {
+ pre = tmp;
+ tmp = tmp->next_in_group;
+ }
+ if (pre) {
+ file_group->next_in_group = pre->next_in_group;
+ pre->next_in_group = file_group;
+ } else {
+ file_group->next_in_group = group->file_group_list.head;
+ group->file_group_list.head = file_group;
+ }
+
+ } else {
+ group->file_group_list.head = file_group;
+ }
+
+ group->file_group_list.count ++;
+ file_group->group = group;
+}
+
+/* remove a file_group from a group */
+static void remove_file_group_from_group(struct o2_group *group,
+ struct o2_file_group *file_group)
+{
+ struct o2_file_group *pre = NULL, *tmp;
+
+ tmp = group->file_group_list.head;
+ while (tmp) {
+ if (tmp == file_group) {
+ goto found;
+ }
+ pre = tmp;
+ tmp = tmp->next_in_group;
+ }
+
+ return;
+
+found:
+ if (pre == NULL) {
+ /* file_group is the first */
+ group->file_group_list.head = file_group->next_in_group;
+ } else {
+ pre->next_in_group = file_group->next_in_group;
+ }
+ group->file_group_list.count --;
+ file_group->next_in_group = NULL;
+}
+
+/* insert a file group object into a file object
+ * a file object can contain several file-group objects.
+*/
+static void insert_file_group_to_file(struct o2_file *file,
+ struct o2_file_group *file_group)
+{
+ struct o2_file_group *tmp, *pre = NULL;
+
+ if (file->file_group_list.head) {
+ tmp = file->file_group_list.head;
+ while (tmp && file_group->blkno > tmp->blkno) {
+ pre = tmp;
+ tmp = tmp->next_in_file;
+ }
+ if (pre) {
+ file_group->next_in_file = pre->next_in_file;
+ pre->next_in_file = file_group;
+ } else {
+ file_group->next_in_file = file->file_group_list.head;
+ file->file_group_list.head = file_group;
+ }
+ } else {
+ file->file_group_list.head = file_group;
+ }
+
+ file->file_group_list.count ++;
+}
+
+/* insert a file object into the file list
+ * in the list, all files are linked in all_file_list.
+*/
+static void insert_file_to_list(struct o2_file *file)
+{
+ if (all_file_list.last) {
+ all_file_list.last->next = file;
+ all_file_list.last = file;
+ } else {
+ all_file_list.head = all_file_list.last = file;
+ }
+
+ all_file_list.count ++;
+}
+
+/* insert a group to a group list */
+static void insert_group_to_list(struct o2_group *group, struct o2_group_list *list)
+{
+ if (list->last) {
+ list->last->next = group;
+ list->last = group;
+ } else {
+ list->head = list->last = group;
+ }
+
+ list->count ++;
+}
+
+/* get full path name for current file */
+static char *get_current_name(){
+/*
+ char *buf = malloc(MAX_FILE_NAME_LEN*MAX_FILE_DEP); // the first is "/", so +1 is not needed
+ int i,len;
+
+ if (!buf)
+ return NULL;
+ if (!current_name.current_layer)
+ return NULL;
+
+ len = sprintf(buf,"/"); // for root
+ for(i=2; i<=current_name.current_layer; i++) {
+ len += sprintf(buf+len, "%s/",current_name.name[i]);
+ }
+ buf[len] = 0;
+
+ return buf;
+*/
+ return NULL;
+}
+
+/* used for get full path name */
+static inline int push_name(const char *name)
+{
+/*
+ if (name) {
+ if (++current_name.current_layer > MAX_FILE_DEP) {
+ printf("file layer > %d\n", MAX_FILE_DEP);
+ return -1;
+ }
+ if (current_name.name[current_name.current_layer])
+ free(current_name.name[current_name.current_layer]);
+ current_name.name[current_name.current_layer] = strdup(name);
+ return 0;
+ }
+ return -1;
+*/
+ return 0;
+}
+
+/* used for get full path name */
+static inline int pop_name()
+{
+/*
+ if (--current_name.current_layer < 0) {
+ printf("error when pop name -- curren_layer <0 \n");
+ return -1;
+ }
+*/
+ return 0;
+}
+
+static inline int get_shift_bits()
+{
+ struct ocfs2_super_block *sb;
+ int shift_bits = 0;
+
+ sb = OCFS2_RAW_SB(gbls.fs->fs_super);
+ shift_bits = sb->s_clustersize_bits - sb->s_blocksize_bits;
+
+ return shift_bits;
+}
+
+static inline int is_first_group_gd(const struct o2_group *group)
+{
+ struct ocfs2_super_block *sb;
+ sb = OCFS2_RAW_SB(gbls.fs->fs_super);
+ if (sb->s_first_cluster_group == group->blkno) {
+ return 1;
+ } else {
+ return 0;
+ }
+
+}
+/* calculate the group num and offset in the group from the cops */
+static inline int get_group_num_offset(uint64_t blkno,
+ uint64_t *group_blkno, __le32 *off)
+{
+ __le32 cluster_num = 0;
+ unsigned int group_nr = 0;
+ struct ocfs2_super_block *sb;
+
+ sb = OCFS2_RAW_SB(gbls.fs->fs_super);
+ cluster_num = blkno >> get_shift_bits();
+ group_nr = cluster_num / bits_per_group;
+
+ if (group_nr == 0) {
+ *group_blkno = sb->s_first_cluster_group;
+ *off = cluster_num;
+ } else {
+ *group_blkno = (group_nr * bits_per_group)<<get_shift_bits();
+ *off = (blkno - (*group_blkno))>>get_shift_bits();
+ }
+
+ return 0;
+}
+
+/* get the group whose index(starts from 0) is group_num */
+static inline struct o2_group * get_group(uint64_t group_blkno)
+{
+ struct o2_group *group;
+
+ group = all_group_list.head;
+ while (group) {
+ if (group->blkno == group_blkno)
+ return group;
+ group = group->next;
+ }
+ return NULL;
+}
+
+/* alloc a new file-group object and file it */
+static struct o2_file_group *new_file_group(struct o2_file *file,
+ __le32 group_blkno,
+ __le32 off,
+ __le32 count)
+{
+ struct o2_file_group *tmp = malloc(sizeof(struct o2_file_group));
+ if (!tmp) {
+ printf("no mem\n");
+ return NULL;
+ }
+ tmp->off = off;
+ tmp->count = count;
+ tmp->file = file;
+ tmp->group = get_group(group_blkno);
+ if (!tmp->group) {
+ printf("group %u not exist\n",group_blkno);
+ free(tmp);
+ return NULL;
+ }
+ tmp->next_in_file = tmp->next_in_group = NULL;
+ tmp->blkno = off<<get_shift_bits();
+ if (!is_first_group_gd(tmp->group)) {
+ tmp->blkno += tmp->group->blkno;
+ }
+ return tmp;
+}
+
+static int record_file_and_group(struct o2_file *file, __le32 group_blkno,
+ __le32 off, __le32 count)
+{
+ struct o2_file_group *file_group;
+ struct o2_group *group;
+ uint64_t blkno;
+
+ file_group = new_file_group(file, group_blkno, off, count);
+ if (!file_group)
+ return -1;
+
+ group = file_group->group;
+ insert_file_group_to_group(group, file_group);
+ insert_file_group_to_file(file, file_group);
+ if (!is_first_group_gd(group)) {
+ blkno = file_group->off<<get_shift_bits();
+ blkno += file_group->group->blkno;
+ } else {
+ blkno = file_group->off<<get_shift_bits();
+ }
+
+ return 0;
+}
+
+static int access_els(struct ocfs2_extent_list *el, struct o2_file *file)
+{
+ struct ocfs2_extent_rec *rec;
+ struct ocfs2_extent_block *eb;
+ char *buf = NULL;
+ int i;
+ int res = 0;
+ uint64_t group_blkno;
+ __le32 off;
+
+ for (i = 0; i < el->l_next_free_rec; ++i) {
+ rec = &(el->l_recs[i]);
+ if (el->l_tree_depth) {
+ res = ocfs2_malloc_block(gbls.fs->fs_io, &buf);
+ if (res) {
+ printf("no mem\n");
+ goto ret;
+ }
+ res = ocfs2_read_extent_block2(gbls.fs, rec->e_blkno, buf);
+ if (res) {
+ printf("ocfs2_read_extent_blcok failed %d\n", res);
+ goto ret;
+ }
+ eb = (struct ocfs2_extent_block *)buf;
+ res = access_els(&(eb->h_list), file);
+ if (res) {
+ goto ret;
+ }
+ ocfs2_free(&buf);
+
+ } else {
+ res = get_group_num_offset(rec->e_blkno, &group_blkno, &off);
+ if (res)
+ return -1;
+
+ res = record_file_and_group(file, group_blkno, off,
+ rec->e_clusters);
+ if (res)
+ return -1;
+ }
+ }
+
+ret:
+ if (buf)
+ ocfs2_free(&buf);
+ return res;
+}
+
+static int access_one_node(struct ocfs2_dinode *inode, struct o2_file *file)
+{
+ struct ocfs2_extent_list * el;
+ int res;
+
+ el= &(inode->id2.i_list);
+ res = access_els(el, file);
+ if (res )
+ return res;
+
+ return 0;
+}
+
+/* alloc a new file object */
+static struct o2_file *new_file(struct ocfs2_dinode *inode, char type)
+{
+ struct o2_file *file;
+
+ file = malloc(sizeof(struct o2_file));
+ if (!file) {
+ printf("no mem\n");
+ return NULL;
+ }
+
+ file->type = type;
+ file->next = NULL;
+ file->file_group_list.head = NULL;
+ file->file_group_list.count = 0;
+ file->blkno = inode->i_blkno;
+ file->filename = NULL;
+ file->filename = get_current_name();
+ /*
+ if (!file->filename) {
+ free(file);
+ return NULL;
+ }
+ */
+
+ return file;
+}
+
+static int do_with_childrens(struct ocfs2_dir_entry *rec, int offset,
+ int blocksize, char *buf, void *priv_data)
+{
+ int res;
+ rec->name[rec->name_len] = '\0';
+
+ if (!strcmp(rec->name, ".") || !strcmp(rec->name, "..")) {
+ return 0;
+ }
+ res = worktree(rec->name, rec->inode);
+ return res;
+}
+static int load_all_files(char *basename, uint64_t blkno)
+{
+ return worktree(basename, blkno);
+}
+
+/* work on the subtree rooted at basename, whose meta blkno is blkno */
+static int worktree(char *basename, uint64_t blkno)
+{
+ int res;
+ char *buf = NULL;
+ struct ocfs2_dinode *inode = NULL;
+ struct o2_file *file = NULL;
+ char type;
+
+ res = push_name(basename);
+ if (res)
+ return -1;
+ res = ocfs2_malloc_block(gbls.fs->fs_io, &buf);
+ if (res) {
+ printf("ocfs2_malloc_block failed. %d\n", res);
+ return -1;
+ }
+
+ res = ocfs2_read_inode2(gbls.fs, blkno, buf);
+ if (res) {
+ printf("ocfs2_read_inode error %d\n",res);
+ goto ret1;
+ }
+ inode = (struct ocfs2_dinode *)buf;
+ if (S_ISREG(inode->i_mode))
+ type='f';
+ else if (S_ISDIR(inode->i_mode))
+ type='d';
+ else {
+ // ignore other type
+ goto ret2;
+ }
+
+ file = new_file(inode, type);
+ if (!file) {
+ goto ret1;
+ }
+ insert_file_to_list(file);
+ res = access_one_node(inode, file);
+ if (res)
+ goto ret2;
+
+ if (file->type == 'd') {
+ res = ocfs2_dir_iterate(gbls.fs, blkno, 0, NULL,
+ do_with_childrens, NULL);
+ if (res) {
+ printf("ocfs2_dir_iterate failed. %d\n", res);
+ goto ret2;
+ }
+ }
+ goto ret1;
+
+ret2:
+ if (file && file->filename)
+ free(file->filename);
+ if (file)
+ free(file);
+ret1:
+ if (buf)
+ ocfs2_free(&buf);
+ if (res) {
+ pop_name();
+ } else {
+ res = pop_name();
+ }
+
+ return res;
+}
+
+inline void find_max_contig_free_bits(struct ocfs2_group_desc *gd,
+ int *max_contig_free_bits)
+{
+ int end = 0;
+ int start;
+ int free_bits;
+
+ *max_contig_free_bits = 0;
+
+ while (end < gd->bg_bits) {
+ start = ocfs2_find_next_bit_clear(gd->bg_bitmap, gd->bg_bits, end);
+ if (start >= gd->bg_bits)
+ break;
+
+ end = ocfs2_find_next_bit_set(gd->bg_bitmap, gd->bg_bits, start);
+ free_bits = end - start;
+ if (*max_contig_free_bits < free_bits)
+ *max_contig_free_bits = free_bits;
+ }
+}
+/* get N contiguous free bits
+if there is, reutrn 0; otherwise return -1
+*/
+static inline int get_N_contig_free_bits(struct ocfs2_group_desc *gd, int num,
+ int *start_off)
+{
+ int end = 0;
+ int start;
+ int free_bits;
+ int i;
+
+ while (end < gd->bg_bits) {
+ start = ocfs2_find_next_bit_clear(gd->bg_bitmap, gd->bg_bits, end);
+ if (start >= gd->bg_bits)
+ return -1;
+ end = ocfs2_find_next_bit_set(gd->bg_bitmap, gd->bg_bits, start);
+ free_bits = end - start;
+ if (free_bits >= num) {
+ *start_off = start;
+ for (i = 0;i<num; i++) {
+ ocfs2_set_bit(*start_off+i,gd->bg_bitmap);
+ }
+ gd->bg_free_bits_count -= num;
+ return 0;
+ }
+ }
+
+ return -1; //shouldn't come here
+}
+
+static int commit_all_groups()
+{
+ struct o2_group *group;
+ int res = 0;
+
+ printf("commiting all groups...");
+ group = all_group_list.head;
+ while (group) {
+ res = commit_one_group(group->gd);
+ if (res) {
+ printf("error while commit group.\n");
+ return res;
+ }
+ group = group->next;
+ }
+ printf("done.\n");
+ return res;
+}
+
+static void print_all_groups(){
+ struct o2_group *group;
+ int max_free;
+
+ printf("Total groups: %d clusters per group: %u, blocks per group: %lu\n",
+ all_group_list.count, bits_per_group,
+ ((uint64_t)bits_per_group)<<get_shift_bits());
+ printf("No blkno total-bits free-bits max-cong-bits\n");
+ group = all_group_list.head;
+ while (group) {
+ find_max_contig_free_bits(group->gd, &max_free);
+ group->cong = max_free;
+ printf("%-3d %-10lu %-9u %-8u %-9u\n",
+ group->group_num, group->blkno,
+ group->total, group->free, group->cong);
+ group = group->next;
+ }
+ printf("\n");
+}
+static int load_all_groups()
+{
+ char *buf = NULL, *buf2 = NULL;
+ int res = -1;
+ uint64_t glbitmap_blkno;
+ struct ocfs2_dinode *inode;
+ struct ocfs2_chain_list *cl;
+ int i,index;
+ uint64_t blkno;;
+ struct ocfs2_chain_rec *rec;
+ struct ocfs2_group_desc *grp;
+ struct o2_group *group = NULL;
+ int max_free_bits = 0;
+
+ res = ocfs2_malloc_block(gbls.fs->fs_io, &buf);
+ if (res)
+ goto RET;
+
+ res = ocfs2_lookup(gbls.fs, gbls.sysdir_blkno, "global_bitmap",
+ strlen("global_bitmap"), NULL, &glbitmap_blkno);
+ if (res) {
+ printf("ocfs2_lookup failed. %d\n",res);
+ goto RET;
+ }
+ printf("got bitmap blkno: %lu\n",glbitmap_blkno);
+ res = ocfs2_read_inode2(gbls.fs, glbitmap_blkno, buf);
+ if (res) {
+ printf("ocfs2_read_inode error %d\n",res);
+ goto RET;
+ }
+ inode = (struct ocfs2_dinode*)buf;
+ cl = &(inode->id2.i_chain);
+ bits_per_group = cl->cl_cpg;
+
+ res = ocfs2_malloc_block(gbls.fs->fs_io, &buf2);
+ if (res)
+ goto RET;
+
+ index = 0;
+ for (i = 0; i < cl->cl_next_free_rec; ++i) {
+ rec = &(cl->cl_recs[i]);
+ blkno = rec->c_blkno;
+ while (blkno) {
+ res = ocfs2_read_group_desc2(gbls.fs, blkno, buf2);
+ if (res) {
+ printf("ocfs2_read_group_desc2 failed. %d\n",res);
+ goto RET;
+ }
+ grp = (struct ocfs2_group_desc *)buf2;
+ find_max_contig_free_bits(grp, &max_free_bits);
+ group = new_group(index, blkno, grp->bg_bits,
+ grp->bg_free_bits_count, max_free_bits);
+ if (!group) {
+ res = -1;
+ goto RET;
+ }
+ group->gd = grp;
+ buf2 = NULL;
+ res = ocfs2_malloc_block(gbls.fs->fs_io, &buf2);
+ if (res)
+ goto RET;
+ blkno = grp->bg_next_group;
+ insert_group_to_list(group, &all_group_list);
+ index++;
+ }
+ }
+ print_all_groups();
+ res = 0;
+
+RET:
+ if (buf)
+ ocfs2_free(&buf);
+ if (buf2)
+ ocfs2_free(&buf2);
+ return res;
+}
+
+int print_all_files()
+{
+ struct o2_file *tmp;
+ struct o2_file_group *file_group_tmp;
+
+ printf("total files: %d\n", all_file_list.count);
+ printf("filetype blkno filename data-groups off \n");
+ tmp = all_file_list.head;
+
+ while (tmp) {
+ printf("%c %lu %-20s\n", tmp->type, tmp->blkno,
+ tmp->filename?tmp->filename:"NULL");
+ file_group_tmp = tmp->file_group_list.head;
+ while (file_group_tmp) {
+ uint64_t blkno;
+ if (!is_first_group_gd(file_group_tmp->group)) {
+ blkno = file_group_tmp->off<<get_shift_bits();
+ blkno += file_group_tmp->group->blkno;
+ } else {
+ blkno = file_group_tmp->off<<get_shift_bits();
+ }
+ printf("group=%d, count=%3d,off=%3d,blkno=%3lu\n",
+ file_group_tmp->group->group_num,
+ file_group_tmp->count,
+ file_group_tmp->off,
+ blkno);
+ file_group_tmp = file_group_tmp->next_in_file;
+ }
+ printf("\n");
+ tmp = tmp->next;
+ }
+
+ return 0;
+}
+
+static int print_file_group_on_group(struct o2_file_group *file_group,
+ struct o2_group* group)
+{
+ printf("%-20s count=%d blkno=%lu\n",
+ file_group->file->filename?file_group->file->filename:"NULL",
+ file_group->count, file_group->blkno);
+ return 0;
+}
+
+int print_all_files_on_group(int index)
+{
+ struct o2_group *group;
+ struct o2_file_group *tmp;
+ int bk_index = index;
+ int res;
+
+ group = all_group_list.head;
+ while (index && group) {
+ group = group->next;
+ index --;
+ }
+ if (!group) {
+ printf("no such group. %d\n", bk_index);
+ return -1;
+ }
+ printf("files on group %u blkno=%lu\n",group->group_num, group->blkno);
+ printf("filename bit_offset_in_group count(clusters)\n");
+ tmp = group->file_group_list.head;
+ while (tmp) {
+ res = print_file_group_on_group(tmp, group);
+ if (res) {
+ return res;
+ }
+ tmp = tmp->next_in_group;
+ }
+ return 0;
+}
+
+/* get the first free bit offset in the group gdcopy */
+/* if there no free bit in this group, return -1 */
+static inline int get_first_free_bit(struct ocfs2_group_desc *gd)
+{
+ int i;
+ int first_free = -1;
+
+ for (i = 0; i<gd->bg_bits; i++) {
+ if (!ocfs2_test_bit(i, gd->bg_bitmap)) {
+ first_free = i;
+ break;
+ }
+ }
+ return first_free;
+}
+
+static inline void clear_bits_on_group(int off, int count,
+ struct ocfs2_group_desc *gd)
+{
+ int i;
+ for (i = 0; i<count; i++) {
+ ocfs2_clear_bit(off+i, gd->bg_bitmap);
+ }
+ gd->bg_free_bits_count += count;
+}
+
+static inline void set_bits_on_group(int off, int count,
+ struct ocfs2_group_desc *gd)
+{
+ int i;
+ for (i = 0; i<count; i++) {
+ ocfs2_set_bit(off+i, gd->bg_bitmap);
+ }
+ gd->bg_free_bits_count -= count;
+}
+
+char *buf_copy_1_cluster = NULL;
+/* copy content of one block to another and set 0 to the original(for debugging) */
+/* 'from' must > 'to' if they are in the same group */
+static inline int copy_1_cluster(uint64_t from, uint64_t to)
+{
+ int res = 0;
+ int blkspercluster = 1<<get_shift_bits();
+
+ if (to == from)
+ return 0;
+ if (from -to <32)
+ printf("warning: copying block in same cluster? from=%lu, to=%lu\n",
+ from, to);
+
+ res = io_read_block(gbls.fs->fs_io, from, blkspercluster, buf_copy_1_cluster);
+ if (res) {
+ printf("io_read_block (%lu) failed. %d, %d\n",
+ from, res, io_get_error(gbls.fs->fs_io));
+ res = -1;
+ goto ret;
+ }
+ res = io_write_block(gbls.fs->fs_io, to, blkspercluster, buf_copy_1_cluster);
+ if (res) {
+ printf("%d io_write_block failed. to(%lu) %d, err: %d\n",
+ __LINE__, to, res, io_get_error(gbls.fs->fs_io));
+ res = -1;
+ goto ret;
+ }
+ // clear the old block with 0
+ // this is only for testing
+ /*
+ memset(buf_copy_1_cluster, 0, blkspercluster*io_get_blksize(gbls.fs->fs_io));
+ res = io_write_block(gbls.fs->fs_io, from, blkspercluster, buf_copy_1_cluster);
+ if (res) {
+ printf("%d io_write_block failed. %d\n", __LINE__, res);
+ res = -1;
+ goto ret;
+ }
+ */
+
+ret:
+ return res;
+}
+
+/* copy N clusters */
+/* currently copy them one by one -- to be improved */
+/* here, count must be M*(blocks per cluster). */
+/* this implementation is safe only when cluster_count is 1 */
+static inline int copy_N_clusters(uint64_t fromblk, uint64_t toblk,
+ int cluster_count)
+{
+ int i;
+ int res;
+ int blkspercluster = 1<<get_shift_bits();
+
+ if (fromblk == toblk)
+ return 0;
+
+ for (i = 0; i<cluster_count; i++) {
+ res = copy_1_cluster(fromblk, toblk);
+ if (res)
+ return res;
+ fromblk += blkspercluster;
+ toblk += blkspercluster;
+ }
+ return 0;
+}
+
+/* find the record --ocfs2_extent_rec, which blkno is old_data_blkno, change blkno to
+new_data_blkno
+*/
+static inline int modify_file_meta(uint64_t file_blkno, uint64_t from_blkno,
+ uint64_t to_blkno)
+{
+ struct ocfs2_dinode *inode;
+ struct ocfs2_extent_list *el;
+ int res;
+
+ if (!update_disk)
+ return 0;
+
+ res = ocfs2_read_inode2(gbls.fs, file_blkno, buf_modify_file_meta);
+ if (res) {
+ printf("ocfs2_read_inode2 %lu, failed. %d\n",file_blkno, res);
+ return -1;
+ }
+ inode = (struct ocfs2_dinode *)buf_modify_file_meta;
+ el= &(inode->id2.i_list);
+
+ res = find_and_change_extent_rec(el, buf_modify_file_meta, file_blkno,
+ 1, from_blkno, to_blkno);
+ if (res == 1) {
+ printf("such meta block %lu not found\n", from_blkno);
+ }
+
+ return res;
+}
+
+/* recursive calls it */
+/* return 0 -- found and changed;
+ -1 -- error occured
+ 1 -- not found in this extent_list
+*/
+
+static inline int find_and_change_extent_rec(struct ocfs2_extent_list *el,
+ void *updatebuf,
+ uint64_t updateblk,
+ int this_is_inode,
+ uint64_t from_blkno,
+ uint64_t to_blkno)
+{
+ struct ocfs2_extent_rec *rec;
+ struct ocfs2_extent_block *eb;
+ char *buf = NULL;
+ int i;
+ int res = 0;
+
+ for (i = 0; i < el->l_next_free_rec; ++i) {
+ rec = &(el->l_recs[i]);
+ if (el->l_tree_depth) {
+ res = ocfs2_malloc_block(gbls.fs->fs_io, &buf);
+ if (res) {
+ printf("no mem\n");
+ return -1;
+ }
+ res = ocfs2_read_extent_block2(gbls.fs, rec->e_blkno, buf);
+ if (res) {
+ printf("ocfs2_read_extent_blcok failed %d\n", res);
+ ocfs2_free(&buf);
+ return -1;
+ }
+ eb = (struct ocfs2_extent_block *)buf;
+ res = find_and_change_extent_rec(&(eb->h_list), buf,
+ rec->e_blkno,
+ 0,
+ from_blkno,
+ to_blkno);
+ ocfs2_free(&buf);
+ buf = NULL;
+ if (res == 0 || res == -1)
+ return res;
+ } else {
+ if (rec->e_blkno == from_blkno) {
+ rec->e_blkno = to_blkno;
+ goto found;
+ }
+ }
+ }
+
+ if (buf)
+ ocfs2_free(&buf);
+
+ return 1; //not found in this group
+found:
+ if (this_is_inode) {
+ res = ocfs2_write_inode2(gbls.fs, updateblk, updatebuf);
+ } else {
+ res = ocfs2_write_extent_block2(gbls.fs, updateblk, updatebuf);
+ }
+ if (res) {
+ printf("%d: io_write_block error. %d, %d\n", __LINE__, res,
+ io_get_error(gbls.fs->fs_io));
+ res = -1;
+ }
+ ocfs2_free(&buf);
+ return res;
+}
+
+static inline int commit_one_group(struct ocfs2_group_desc* gd)
+{
+ int res;
+
+ res = ocfs2_write_group_desc2(gbls.fs, gd->bg_blkno, (char *)gd);
+ if (res) {
+ printf("ocfs2_write_group_desc2 fialed. %d\n",res);
+ return -1;
+ }
+ return 0;
+}
+
+/* move the clusters specified by file_group to front of the same group */
+/* -1 -->error; 0 -->moved successfully; 1 -->no space */
+static int move_file_group_on_group(struct o2_group *group,
+ struct o2_file_group *file_group)
+{
+ struct ocfs2_group_desc *gd;
+ int alloc_start_bits_off;
+ int res;
+ int first_free_bit;
+ uint64_t fromblk, toblk;
+
+ gd = group->gd;
+
+ first_free_bit = get_first_free_bit(gd);
+ if (-1 == first_free_bit) { //group full
+ res = 1;
+ goto done;
+ }
+
+ if (file_group->off < first_free_bit) {
+ res = 1;
+ goto done;
+ }
+
+ /* clear in memory the bits owned by this file_group */
+ clear_bits_on_group(file_group->off, file_group->count, gd);
+
+ /* try to find enough bits to move to */
+ res = get_N_contig_free_bits(gd, file_group->count,
+ &alloc_start_bits_off);
+ if (res) {
+ //no space
+ set_bits_on_group(file_group->off, file_group->count, gd);
+ res = 1;
+ goto done;
+ }
+
+ if (alloc_start_bits_off > file_group->off)
+ {
+ // got a starting address later than the original
+ clear_bits_on_group(alloc_start_bits_off, file_group->count, gd);
+ set_bits_on_group(file_group->off, file_group->count, gd);
+ res = 1;
+ goto done;
+ }
+/* now copy data clusters from old place to new place one by one */
+ fromblk = file_group->off<<get_shift_bits();
+ toblk = alloc_start_bits_off<<get_shift_bits();
+ if (!is_first_group_gd(group)) {
+ fromblk += group->blkno;
+ toblk += group->blkno;
+ }
+
+ res = copy_N_clusters(fromblk, toblk, file_group->count);
+ if (res) {
+ //error, won't continue to access.
+ goto done;
+ }
+
+ /* modify the meta data of the file on disk */
+ if (!new_and_insert_meta(file_group->file->blkno, fromblk, toblk)) {
+ //error, won't continue to access.
+ res = -1;
+ goto done;
+ }
+
+ /* update the file group object */
+ file_group->off = alloc_start_bits_off;
+ file_group->blkno = file_group->off<<get_shift_bits();
+ if (!is_first_group_gd(file_group->group)) {
+ file_group->blkno += file_group->group->blkno;
+ }
+
+done:
+ return res;
+}
+
+
+/* move all data clusters of files in a group to the end of this group */
+/* for all groups,
+ for all files that have data-clusters on one of the groups,
+ move the the data-clusters to front of this group, and move corresponding
+ bits to the front of the bitmap in this group.
+
+ this moving is based on a ocfs2_extent_rec, no mering or spliting on ocfs2_extent_rec.
+
+ what's modified on disk:
+ 1) data-clusters copying, (while debugging, set the old clusters to 0)
+ 2) file meta-data. --modifying the ocfs2_extent_rec.e_blkno.
+ 3) clearing bits and setting bits on bitmap of that group.
+*/
+static int defrag1_move_data_to_front_on_all_group()
+{
+ int res;
+ struct o2_group *group;
+ struct o2_file_group *tmp;
+ int first_free_bit;
+
+ group = all_group_list.head;
+ printf("process in defrag1 ...\n");
+
+ while ( group) {
+ //move group 2 only for debug
+ printf("defrag1 working on group %d/%d...", group->group_num, all_group_list.count);
+
+ first_free_bit = get_first_free_bit(group->gd);
+ if (-1 == first_free_bit) {
+ //group full
+ group = group->next;
+ printf("done.\n");
+ continue;
+ }
+
+ tmp = group->file_group_list.head;
+ while (tmp) {
+ res = move_file_group_on_group(group, tmp);
+ if (res == -1) {
+ printf("move_file_group_on_group failed. %d\n", res);
+ return res;
+ }
+ tmp = tmp->next_in_group;
+ }
+ printf("done.\n");
+ group = group->next;
+ }
+
+ res = update_all_meta();
+ printf("defrag1 finished.\n\n");
+ return res;
+}
+
+
+/* find a group that has max free bits groups in all_group_list */
+/* other groups must have capacity to holds all the data bits in this group */
+/* when found such a group, move out it from all_group_list and return it */
+static struct o2_group *find_o2_group_for_defrag2()
+{
+ //int least_meta_bits = bits_per_group;
+ int max_free = 0;
+ struct o2_group *group, *found = NULL, *prefound = NULL,*tmp = NULL;
+ int bits = 0;
+
+
+
+ group = all_group_list.head;
+ while (group) {
+ //if (group->meta_used < least_meta_bits) {
+ // least_meta_bits = group->meta_used;
+ if (group->free > max_free) {
+ max_free = group->free;
+ prefound = tmp;
+ found = group;
+ }
+ tmp = group;
+ group = group->next;
+ }
+
+ if (found) {
+ group = all_group_list.head;
+ while (group) {
+ if (group != found) {
+ bits += group->free;
+ }
+ group = group->next;
+ }
+ /* no enough bits to hold all data bits on the found group */
+ /*
+ if (bits <= found->total - found->free - found->meta_used) {
+ found = NULL;
+ }
+ */
+ }
+
+ if (found) {
+ if (found == all_group_list.head) {
+ all_group_list.head = found->next;
+ } else {
+ prefound->next = found->next;
+ }
+ if (all_group_list.last == found)
+ all_group_list.last = prefound;
+ found->next = NULL;
+ all_group_list.count --;
+ }
+
+ return found;
+}
+
+static void move_file_group_object(struct o2_file_group *file_group,
+ struct o2_group *group_from,
+ struct o2_group *group_to)
+{
+ if (file_group->group != group_from) {
+ printf("warning: file_group not in the orginal group.\n");
+ return;
+ }
+ remove_file_group_from_group(group_from, file_group);
+ group_from->free += file_group->count;
+ insert_file_group_to_group(group_to, file_group);
+ group_to->free -= file_group->count;
+}
+
+/* return value: 0 --> moved successfully
+ 1 --> not moved, no space
+ -1 --> error
+*/
+static int move_file_group_between_groups(struct o2_file_group *file_group,
+ struct o2_group *group_from,
+ struct o2_group *group_to)
+{
+ struct ocfs2_group_desc *gd_from, *gd_to;
+ int alloc_start_bits_off, first_free_bit;
+ int res = 0;
+ uint64_t fromblk, toblk;
+
+ gd_to = group_to->gd;
+ first_free_bit = get_first_free_bit(gd_to);
+ if (-1 == first_free_bit) { //group full
+ goto ret;
+ }
+
+ res = get_N_contig_free_bits(gd_to, file_group->count,
+ &alloc_start_bits_off);
+ if (res) {
+ /* no space */
+ return 1;
+ }
+
+ /* move file_group from group_from to group_to */
+ fromblk = file_group->off<<get_shift_bits();
+ if (!is_first_group_gd(group_from)) {
+ fromblk += group_from->blkno;
+ }
+ toblk = alloc_start_bits_off<<get_shift_bits();
+ if (!is_first_group_gd(group_to)) {
+ toblk += group_to->blkno;
+ }
+ res = copy_N_clusters(fromblk, toblk, file_group->count);
+ if (res) {
+ //error , wont' continue to access
+ goto ret; //copy data clusters failed.
+ }
+
+ gd_from = group_from->gd;
+ clear_bits_on_group(file_group->off,
+ file_group->count, gd_from);
+
+ if (!new_and_insert_meta(file_group->file->blkno, fromblk, toblk)) {
+ //error , wont' continue to access
+ res = -1;
+ goto ret;
+ }
+ move_file_group_object(file_group, group_from, group_to);
+ file_group->off = alloc_start_bits_off;
+ file_group->blkno = file_group->off<<get_shift_bits();
+ if (!is_first_group_gd(file_group->group)) {
+ file_group->blkno += file_group->group->blkno;
+ }
+
+ret:
+ return res;
+}
+
+/* move file_group from group specified by parameter to another one in all_group_list */
+/* it try to move the file_group to a group that has data clusters for the same file as
+ this file_group.
+ if no such group or no space on such group, it try other groups in all_group_list
+*/
+/* return value: 0-->file_group moved;
+ 1-->file_group not moved without error;
+ -1-->error occured
+*/
+static int defrag2_one_file_group_on_group(struct o2_file_group *file_group,
+ struct o2_group *group)
+{
+ struct o2_file *file;
+ struct o2_file_group *file_group_tmp, *next=NULL;
+ int res;
+ struct o2_group *group_tmp;
+
+ file = file_group->file;
+ file_group_tmp = file->file_group_list.head;
+
+ /* here will be a lot of waste loop if some file_group_tmp->group is the same */
+ /* how to improve it? */
+ while (file_group_tmp) {
+ /* not this group and it's in all_group_list */
+ // file_group_tmp may be moved to other group
+ next = file_group_tmp->next_in_file;
+ if ((file_group_tmp->group != group)
+ &&get_group(file_group_tmp->group->blkno)) {
+ res = move_file_group_between_groups(file_group, group,
+ file_group_tmp->group);
+ if (res == 0) {
+ /* case of moved sucessfully or error */
+ return res;
+ } else {
+ /* case of not moved because of no space */
+ ;
+ }
+ }
+ file_group_tmp = next;
+ }
+
+ group_tmp = all_group_list.head;
+ while (group_tmp) {
+ res = move_file_group_between_groups(file_group, group, group_tmp);
+ if (res == 0 || res == -1) {
+ /* case of moved sucessfully or error*/
+ return res;
+ } else {
+ /* no space */
+ ;
+ }
+ group_tmp = group_tmp->next;
+ }
+ return 1;
+
+}
+
+/* move some data clusters of a file on this group to other groups in all_group_list */
+/* this group is not in all_group_list while running this function */
+/* if there is no enough space for moving, won't return error */
+/* for a file, firstly try to move the data clusters on this group to groups that also holds
+ data clusters of this file as well. If no space on such groups, move to groups that don't
+ hold data clusters of this file.
+*/
+/* for a success moving, of o2_file_group object will be moved from one group to another */
+static int defrag2_on_group(struct o2_group *group)
+{
+ int res;
+ struct o2_file_group *file_group, *file_group2;
+
+ file_group = group->file_group_list.head;
+ while (file_group) {
+ // since file_group may be moved to another group,
+ //file_group = file_group->next_in_group is wrong*/
+ file_group2 = file_group->next_in_group;
+ res = defrag2_one_file_group_on_group(file_group, group);
+ if (res == -1)
+ return res;
+ file_group = file_group2;
+ }
+ return 0;
+}
+/* choose a group that has the least meta-used bits, move data bits to other groups */
+/* do this only when other groups have the capacity to holds all the data bits in this group */
+/* after this function, the original order in all_group_list maybe changed */
+static int defrag2()
+{
+ struct o2_group_list gldone;
+ struct o2_group *group;
+ int res = 0;
+
+ printf("process in defrag2 ...\n");
+ gldone.count = 0;
+ gldone.head = gldone.last = NULL;
+ gldone.group_size = all_group_list.group_size;
+
+ group = find_o2_group_for_defrag2();
+ while (group) {
+ printf("defrag2 working on group %d...", group->group_num);
+ res = defrag2_on_group(group);
+ printf("done.\n");
+ /* move group to gldone list */
+ insert_group_to_list(group, &gldone);
+ if (res)
+ goto done;
+ group = find_o2_group_for_defrag2();
+ }
+
+done:
+ res = update_all_meta();
+ if (gldone.count) {
+ all_group_list.count += gldone.count;
+ if (all_group_list.last) {
+ all_group_list.last->next = gldone.head;
+ } else {
+ all_group_list.head = gldone.head;
+ }
+ all_group_list.last = gldone.last;
+ }
+ printf("defrag2 finished\n\n");
+ return res;
+}
+int main(int argc, char **argv){
+ char *dev = argv[1];
+ struct ocfs2_super_block *sb;
+ int flags;
+ errcode_t ret = 0;
+ uint64_t superblock = 0, block_size = 0;
+
+ memset(&gbls,0, sizeof(gbls));
+ memset(&all_file_list, 0,sizeof(all_file_list));
+ memset(&all_group_list, 0,sizeof(all_group_list));
+
+ gbls.progname = basename(argv[0]);
+ meta_list.head = meta_list.last = NULL;
+ meta_list.count = 0;
+
+ memory_object.current_meta = COUNT_OF_MODIFY_META;
+ memory_object.buf = NULL;
+
+ gbls.allow_write = OCFS2_FLAG_RW;
+ flags = gbls.allow_write;
+ flags |= OCFS2_FLAG_HEARTBEAT_DEV_OK;
+
+ ret = ocfs2_open(dev, flags, superblock, block_size, &gbls.fs);
+ if (ret) {
+ printf("error while opening device\n");
+ exit(1);
+ }
+ ret = ocfs2_malloc_block(gbls.fs->fs_io, &gbls.blockbuf);
+ if (ret) {
+ printf("error while allocating a block.\n");
+ exit(1);
+ }
+ sb = OCFS2_RAW_SB(gbls.fs->fs_super);
+
+ /* set globals */
+ gbls.device = g_strdup (dev);
+ gbls.max_clusters = gbls.fs->fs_super->i_clusters;
+ gbls.max_blocks = ocfs2_clusters_to_blocks(gbls.fs, gbls.max_clusters);
+ gbls.root_blkno = sb->s_root_blkno;
+ gbls.sysdir_blkno = sb->s_system_dir_blkno;
+ gbls.cwd_blkno = sb->s_root_blkno;
+ gbls.cwd = strdup("/");
+
+ ret = ocfs2_malloc_block(gbls.fs->fs_io, &buf_modify_file_meta);
+ if (!ret)
+ ret = ocfs2_malloc_blocks(gbls.fs->fs_io,
+ 1<<get_shift_bits(),
+ &buf_copy_1_cluster);
+ if (ret) {
+ printf("error while allocating blocks \n");
+ exit(1);
+ }
+
+ ret = load_all_groups();
+ if (ret)
+ return -1;
+ ret = load_all_files("/", gbls.root_blkno);
+ if (ret) {
+ return -1;
+ }
+ ret = defrag1_move_data_to_front_on_all_group();
+ if (!ret) {
+ ret = defrag2();
+ }
+ if (!ret) {
+ defrag1_move_data_to_front_on_all_group();
+ print_all_groups();
+ }
+ if (ret)
+ return -1;
+ commit_all_groups();
+ printf("syncing to disk...");
+ ocfs2_close(gbls.fs);
+ printf("done.\n");
+ return 0;
+}
_______________________________________________
Ocfs2-devel mailing list
[email protected]
http://oss.oracle.com/mailman/listinfo/ocfs2-devel