Hi again,

here's the patch.

I came up with with two options after all:
1) Keep only hardlinked files in hashtable, then lookup file index in the 
database
2) Keep both hardlinked files and hardlinks in hashtable

IMO both are fine.
Keeping only first occurence in htable consumes some 300K of memory, while mark 
time improved about 20 times (80 to 4 mins).
Then again, in my tests with 1M hardlinks it took 8 minutes.

Keeping it all in memory uses 48MB for 1M hardlinks, but mark works instantly.

So both options are here, just define/undef HARDLINKS_INMEM in ua_tree.c.
Usually, when I don't know which option to use, I introduce new config 
parameter:)

Tree node structure remains the same.

Regards...

>From 1e735cb83f74cc47f8bf2e73cd9b265b7713c74a Mon Sep 17 00:00:00 2001
From: Josip Almasi <j...@vrspace.org>
Date: Mon, 27 Aug 2012 11:31:45 +0200
Subject: [PATCH] hardlink performance improvement

---
 bacula/src/dird/ua_tree.c |   80 ++++++++++++++++++++++++++++++--------------
 bacula/src/lib/tree.c     |    7 +++-
 bacula/src/lib/tree.h     |   12 ++++++-
 3 files changed, 70 insertions(+), 29 deletions(-)

diff --git a/bacula/src/dird/ua_tree.c b/bacula/src/dird/ua_tree.c
index 0f97b0a..dd91b01 100644
--- a/bacula/src/dird/ua_tree.c
+++ b/bacula/src/dird/ua_tree.c
@@ -98,6 +98,7 @@ static struct cmdstruct commands[] = {
  { NT_("?"),          helpcmd,      _("print help")},
              };
 #define comsize ((int)(sizeof(commands)/sizeof(struct cmdstruct)))
+#define HARDLINKS_INMEM
 
 /*
  * Enter a prompt mode where the user can select/deselect
@@ -171,7 +172,6 @@ bool user_select_files_from_tree(TREE_CTX *tree)
    return stat;
 }
 
-
 /*
  * This callback routine is responsible for inserting the
  *  items it gets into the directory tree. For each JobId selected
@@ -193,6 +193,8 @@ int insert_tree_handler(void *ctx, int num_fields, char 
**row)
    int FileIndex;
    int32_t delta_seq;
    JobId_t JobId;
+   HL_ENTRY *entry = NULL;
+   int32_t LinkFI;
 
    Dmsg4(150, "Path=%s%s FI=%s JobId=%s\n", row[0], row[1],
          row[2], row[3]);
@@ -205,13 +207,14 @@ int insert_tree_handler(void *ctx, int num_fields, char 
**row)
    } else {
       type = TN_FILE;
    }
-   hard_link = (decode_LinkFI(row[4], &statp, sizeof(statp)) != 0);
+   decode_stat(row[4], &statp, sizeof(statp), &LinkFI );
+   hard_link = ( LinkFI != 0 );
    node = insert_tree_node(row[0], row[1], type, tree->root, NULL);
    JobId = str_to_int64(row[3]);
    FileIndex = str_to_int64(row[2]);
    delta_seq = str_to_int64(row[5]);
-   Dmsg5(150, "node=0x%p JobId=%s FileIndex=%s Delta=%s node.delta=%d\n",
-         node, row[3], row[2], row[5], node->delta_seq);
+   Dmsg6(150, "node=0x%p JobId=%s FileIndex=%s Delta=%s node.delta=%d 
LinkFI=%d\n",
+         node, row[3], row[2], row[5], node->delta_seq, LinkFI);
 
    /* TODO: check with hardlinks */
    if (delta_seq > 0) {
@@ -227,7 +230,7 @@ int insert_tree_handler(void *ctx, int num_fields, char 
**row)
             tree->ua->warning_msg(_("Something is wrong with the Delta 
sequence of %s, "
                                     "skiping new parts. Current sequence is 
%d\n"),
                                   row[1], node->delta_seq);
-            
+
             Dmsg3(0, "Something is wrong with Delta, skip it "
                   "fname=%s d1=%d d2=%d\n", row[1], node->delta_seq, 
delta_seq);
          }
@@ -265,6 +268,29 @@ int insert_tree_handler(void *ctx, int num_fields, char 
**row)
             node->extract_dir = true;   /* if dir, extract it */
          }
       }
+      /* insert file having hardlinks into hardlink hashtable  */
+      if ( statp.st_nlink > 1 && type != TN_DIR && type != TN_DIR_NLS ) {
+         if ( ! LinkFI ) {
+            /* first occurence - file hardlinked to */
+            entry = (HL_ENTRY 
*)tree->root->hardlinks.hash_malloc(sizeof(HL_ENTRY));
+            entry->key = (((uint64_t) JobId) << 32) + FileIndex;
+            entry->node = node;
+            tree->root->hardlinks.insert(entry->key, entry);
+#ifdef HARDLINKS_INMEM
+         } else {
+            /* hardlink to known file index: lookup original file */
+            uint64_t file_key = (((uint64_t) JobId) << 32) + LinkFI;
+            HL_ENTRY *first_hl = (HL_ENTRY *) 
tree->root->hardlinks.lookup(file_key);
+            if ( first_hl && first_hl->node ) {
+               /* then add hardlink entry to linked node*/
+               entry = (HL_ENTRY 
*)tree->root->hardlinks.hash_malloc(sizeof(HL_ENTRY));
+               entry->key = (((uint64_t) JobId) << 32) + FileIndex;
+               entry->node = first_hl->node;
+               tree->root->hardlinks.insert(entry->key, entry);
+            }
+#endif
+         }
+      }
    }
    if (node->inserted) {
       tree->FileCount++;
@@ -277,7 +303,6 @@ int insert_tree_handler(void *ctx, int num_fields, char 
**row)
    return 0;
 }
 
-
 /*
  * Set extract to value passed. We recursively walk
  *  down the tree setting all children if the
@@ -286,9 +311,11 @@ int insert_tree_handler(void *ctx, int num_fields, char 
**row)
 static int set_extract(UAContext *ua, TREE_NODE *node, TREE_CTX *tree, bool 
extract)
 {
    TREE_NODE *n;
+   int count = 0;
+#ifndef HARDLINKS_INMEM
    FILE_DBR fdbr;
    struct stat statp;
-   int count = 0;
+#endif
 
    node->extract = extract;
    if (node->type == TN_DIR || node->type == TN_DIR_NLS) {
@@ -314,6 +341,8 @@ static int set_extract(UAContext *ua, TREE_NODE *node, 
TREE_CTX *tree, bool extr
          }
       }
    } else if (extract) {
+      uint64_t key = 0;
+#ifndef HARDLINKS_INMEM
       char cwd[2000];
       /*
        * Ordinary file, we get the full path, look up the
@@ -326,21 +355,20 @@ static int set_extract(UAContext *ua, TREE_NODE *node, 
TREE_CTX *tree, bool extr
       if (node->hard_link && db_get_file_attributes_record(ua->jcr, ua->db, 
cwd, NULL, &fdbr)) {
          int32_t LinkFI;
          decode_stat(fdbr.LStat, &statp, sizeof(statp), &LinkFI); /* decode 
stat pkt */
+         key = (((uint64_t) node->JobId) << 32) + LinkFI;  /* lookup by linked 
file's fileindex */
+#else
+      if ( node->hard_link ) {
+         key = (((uint64_t) node->JobId) << 32) + node->FileIndex;  /* every 
hardlink is in hashtable, and it points to linked file */
+#endif
          /*
-          * If we point to a hard linked file, traverse the tree to
-          * find that file, and mark it to be restored as well. It
-          * must have the Link we just obtained and the same JobId.
+          If we point to a hard linked file, find that file in hardlinks 
hashmap,
+          and mark it to be restored as well.
           */
-         if (LinkFI) {
-            for (n=first_tree_node(tree->root); n; n=next_tree_node(n)) {
-               if (n->FileIndex == LinkFI && n->JobId == node->JobId) {
-                  n->extract = true;
-                  if (n->type == TN_DIR || n->type == TN_DIR_NLS) {
-                     n->extract_dir = true;
-                  }
-                  break;
-               }
-            }
+         HL_ENTRY *entry = (HL_ENTRY *) tree->root->hardlinks.lookup(key);
+         if ( entry && entry->node ) {
+            n = entry->node;
+            n->extract = true;
+            n->extract_dir = (n->type == TN_DIR || n->type == TN_DIR_NLS);
          }
       }
    }
@@ -488,7 +516,7 @@ static int dot_lsdircmd(UAContext *ua, TREE_CTX *tree)
          }
       }
    }
- 
+
    return 1;
 }
 
@@ -516,7 +544,7 @@ static int dot_lscmd(UAContext *ua, TREE_CTX *tree)
          ua->send_msg("%s%s\n", node->fname, tree_node_has_child(node)?"/":"");
       }
    }
- 
+
    return 1;
 }
 
@@ -608,8 +636,8 @@ static int lsmarkcmd(UAContext *ua, TREE_CTX *tree)
 /*
  * This is actually the long form used for "dir"
  */
-static void ls_output(guid_list *guid, char *buf, const char *fname, const 
char *tag, 
-                      struct stat *statp, bool dot_cmd) 
+static void ls_output(guid_list *guid, char *buf, const char *fname, const 
char *tag,
+                      struct stat *statp, bool dot_cmd)
 {
    char *p;
    const char *f;
@@ -623,7 +651,7 @@ static void ls_output(guid_list *guid, char *buf, const 
char *fname, const char
       *p++ = ',';
       n = sprintf(p, "%d,", (uint32_t)statp->st_nlink);
       p += n;
-      n = sprintf(p, "%s,%s,", 
+      n = sprintf(p, "%s,%s,",
                   guid->uid_to_name(statp->st_uid, en1, sizeof(en1)),
                   guid->gid_to_name(statp->st_gid, en2, sizeof(en2)));
       p += n;
@@ -636,7 +664,7 @@ static void ls_output(guid_list *guid, char *buf, const 
char *fname, const char
    } else {
       n = sprintf(p, "  %2d ", (uint32_t)statp->st_nlink);
       p += n;
-      n = sprintf(p, "%-8.8s %-8.8s", 
+      n = sprintf(p, "%-8.8s %-8.8s",
                   guid->uid_to_name(statp->st_uid, en1, sizeof(en1)),
                   guid->gid_to_name(statp->st_gid, en2, sizeof(en2)));
       p += n;
diff --git a/bacula/src/lib/tree.c b/bacula/src/lib/tree.c
index 008e25a..7414026 100644
--- a/bacula/src/lib/tree.c
+++ b/bacula/src/lib/tree.c
@@ -102,6 +102,8 @@ TREE_ROOT *new_tree(int count)
    root->cached_path = get_pool_memory(PM_FNAME);
    root->type = TN_ROOT;
    root->fname = "";
+   HL_ENTRY* entry = NULL;
+   root->hardlinks.init( entry, &entry->link, 0, 1 );
    return root;
 }
 
@@ -172,6 +174,7 @@ void free_tree(TREE_ROOT *root)
    struct s_mem *mem, *rel;
    uint32_t freed_blocks = 0;
 
+   root->hardlinks.destroy();
    for (mem=root->mem; mem; ) {
       rel = mem;
       mem = mem->next;
@@ -192,7 +195,7 @@ void free_tree(TREE_ROOT *root)
 void tree_add_delta_part(TREE_ROOT *root, TREE_NODE *node,
                          JobId_t JobId, int32_t FileIndex)
 {
-   struct delta_list *elt = 
+   struct delta_list *elt =
       (struct delta_list*) tree_alloc(root, sizeof(struct delta_list));
 
    elt->next = node->delta_list;
@@ -386,7 +389,7 @@ TREE_NODE *tree_cwd(char *path, TREE_ROOT *root, TREE_NODE 
*node)
    /* Handle relative path */
    if (path[0] == '.' && path[1] == '.' && (IsPathSeparator(path[2]) || 
path[2] == '\0')) {
       TREE_NODE *parent = node->parent ? node->parent : node;
-      if (path[2] == 0) { 
+      if (path[2] == 0) {
          return parent;
       } else {
          return tree_cwd(path+3, root, parent);
diff --git a/bacula/src/lib/tree.h b/bacula/src/lib/tree.h
index 3464cb1..1960a21 100644
--- a/bacula/src/lib/tree.h
+++ b/bacula/src/lib/tree.h
@@ -32,6 +32,8 @@
  *
 */
 
+#include "htable.h"
+
 struct s_mem {
    struct s_mem *next;                /* next buffer */
    int rem;                           /* remaining bytes */
@@ -112,9 +114,17 @@ struct s_tree_root {
    int cached_path_len;               /* length of cached path */
    char *cached_path;                 /* cached current path */
    TREE_NODE *cached_parent;          /* cached parent for above path */
+   htable hardlinks;                  /* references to first occurence of 
hardlinks */
 };
 typedef struct s_tree_root TREE_ROOT;
 
+/* hardlink hashtable entry */
+struct s_hl_entry {
+   uint64_t key;
+   hlink link;
+   TREE_NODE *node;
+};
+typedef struct s_hl_entry HL_ENTRY;
 
 /* type values */
 #define TN_ROOT    1                  /* root node */
@@ -130,7 +140,7 @@ TREE_NODE *insert_tree_node(char *path, char *fname, int 
type,
 TREE_NODE *make_tree_path(char *path, TREE_ROOT *root);
 TREE_NODE *tree_cwd(char *path, TREE_ROOT *root, TREE_NODE *node);
 TREE_NODE *tree_relcwd(char *path, TREE_ROOT *root, TREE_NODE *node);
-void tree_add_delta_part(TREE_ROOT *root, TREE_NODE *node, 
+void tree_add_delta_part(TREE_ROOT *root, TREE_NODE *node,
                          JobId_t JobId, int32_t FileIndex);
 void free_tree(TREE_ROOT *root);
 int tree_getpath(TREE_NODE *node, char *buf, int buf_size);
-- 
1.7.1


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Bacula-devel mailing list
Bacula-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/bacula-devel

Reply via email to