Hi,
when copying a whole directory tree with standard tools, e.g.
tar cf - . | ( cd $DEST && tar xf - )
or cpio -p ...
the source disk is busy seeking. That's noisy and particularly slow.
I've written a small Python program which outputs the file names in
i-node order. If this is fed into tar or cpio nearly no seeks are
required during copying.
I've tested it by comparing the resulting copied tree to one created by
tar | tar.
But it's correctness for backing up data is critical.
Therefore I'd like to ask for comments.
Thanks for any comments,
Helmut.
#!/usr/bin/python3
import os, sys, stat
def walktree(top):
'''recursively descend the directory tree rooted at top,
calling the callback function for each regular file'''
for f in os.listdir(top):
pathname = os.path.join(top, f)
Stat= os.lstat(pathname)
Dev = Stat.st_dev
if Dev != Root_Dev :
continue
Ino = Stat.st_ino
mode = Stat.st_mode
if stat.S_ISDIR(mode):
# It's a directory, recurse into it
FN_List.append((Ino,pathname))
walktree(pathname)
else :
FN_List.append((Ino,pathname))
if len(sys.argv) != 2 :
print('''usage:
TreeWalk_I_Sorted <TOPDIR> # generates a list of files in inode order
# example with tar :
TreeWalk_I_Sorted <TOPDIR> | tar --no-recursion -c -j -T- -f XXX.tar.bz2
# example with cpio
TreeWalk_I_Sorted <TOPDIR> | cpio -padmu <DESTDIR>
''')
exit(1)
TOP= sys.argv[1]
Stat= os.lstat(TOP)
Root_Dev= Stat.st_dev
FN_List=[(Stat.st_ino,TOP)]
# import resource
# print("at Start in kB ",resource.getrusage(0).ru_maxrss)
# uses about 500 bytes per file
walktree(TOP)
FN_List.sort()
# print("*** starting ...",file=sys.stderr)
for I,F in FN_List :
print(F) # print(I," -> " ,F)
# print("after loading",len(FN_List)," items : ",resource.getrusage(0).ru_maxrss)