- closes #130

Signed-off-by: Jakub Filak <[email protected]>
---
 src/include/internal_libreport.h | 10 ++++++
 src/lib/problem_data.c           | 75 ++++++++++++++++++++++++----------------
 2 files changed, 56 insertions(+), 29 deletions(-)

diff --git a/src/include/internal_libreport.h b/src/include/internal_libreport.h
index ed18c2d..1d40c00 100644
--- a/src/include/internal_libreport.h
+++ b/src/include/internal_libreport.h
@@ -718,7 +718,12 @@ struct dump_dir *open_directory_for_writing(
 #define FILENAME_ENVIRON      "environ"
 #define FILENAME_LIMITS       "limits"
 #define FILENAME_OPEN_FDS     "open_fds"
+
+/* Global problem identifier which is usually generated by some "analyze_*"
+ * event because it may take a lot of time to obtain strong problem
+ * identification */
 #define FILENAME_DUPHASH      "duphash"
+
 // Name of the function where the application crashed.
 // Optional.
 #define FILENAME_CRASH_FUNCTION "crash_function"
@@ -744,7 +749,12 @@ struct dump_dir *open_directory_for_writing(
 // The file should contain a description of an alert
 #define FILENAME_DESCRIPTION  "description"
 
+/* Local problem identifier (weaker than global identifier) designed for fast
+ * local for fast local duplicate identification. This file is usually provided
+ * by crashed application (problem creator).
+ */
 #define FILENAME_UUID         "uuid"
+
 #define FILENAME_COUNT        "count"
 /* Multi-line list of places problem was reported.
  * Recommended line format:
diff --git a/src/lib/problem_data.c b/src/lib/problem_data.c
index c3f240b..ac712e8 100644
--- a/src/lib/problem_data.c
+++ b/src/lib/problem_data.c
@@ -73,42 +73,59 @@ void problem_data_add_basics(problem_data_t *pd)
     /* If application didn't provide dupe hash, we generate it
      * from all components, so we at least eliminate the exact same
      * reports
+     *
+     * We don't want to generate DUPHASH file because it is usually generated
+     * later in some "analyze_*" event. DUPHASH was originally designed as
+     * global problem identifier and generating of global identifier requires
+     * more space and data. On the contrary UUID was originally designed as
+     * local problem identifier. It means that this identifier is weaker (e.g.
+     * a hash generated from a coredump without debuginfo - there can be many
+     * similar backtraces without line numbers and function names).
      */
-    if (problem_data_get_content_or_NULL(pd, FILENAME_DUPHASH) == NULL)
+    if (problem_data_get_content_or_NULL(pd, FILENAME_UUID) == NULL)
     {
-        /* start hash */
-        sha1_ctx_t sha1ctx;
-        sha1_begin(&sha1ctx);
-
-        /*
-         * To avoid spurious hash differences, sort keys so that elements are
-         * always processed in the same order:
+        /* If application provided DUPHASH, we should use it in UUID as well.
+         * Otherwise we compute hash from all problem's data.
          */
-        GList *list = g_hash_table_get_keys(pd);
-        list = g_list_sort(list, (GCompareFunc)strcmp);
-        GList *l = list;
-        while (l)
+        const char *const duphash = problem_data_get_content_or_NULL(pd, 
FILENAME_DUPHASH);
+        if (duphash != NULL)
+            problem_data_add_text_noteditable(pd, FILENAME_UUID, duphash);
+        else
         {
-            const char *key = l->data;
-            l = l->next;
-            struct problem_item *item = g_hash_table_lookup(pd, key);
-            /* do not hash items which are binary (item->flags & CD_FLAG_BIN).
-             * Their ->content is full file name, with path. Path is always
-             * different and will make hash differ even if files are the same.
+            /* start hash */
+            sha1_ctx_t sha1ctx;
+            sha1_begin(&sha1ctx);
+
+            /*
+             * To avoid spurious hash differences, sort keys so that elements 
are
+             * always processed in the same order:
              */
-            if (item->flags & CD_FLAG_BIN)
-                continue;
-            sha1_hash(&sha1ctx, item->content, strlen(item->content));
-        }
-        g_list_free(list);
+            GList *list = g_hash_table_get_keys(pd);
+            list = g_list_sort(list, (GCompareFunc)strcmp);
+            GList *l = list;
+            while (l)
+            {
+                const char *key = l->data;
+                l = l->next;
+                struct problem_item *item = g_hash_table_lookup(pd, key);
+                /* do not hash items which are binary (item->flags & 
CD_FLAG_BIN).
+                 * Their ->content is full file name, with path. Path is always
+                 * different and will make hash differ even if files are the 
same.
+                 */
+                if (item->flags & CD_FLAG_BIN)
+                    continue;
+                sha1_hash(&sha1ctx, item->content, strlen(item->content));
+            }
+            g_list_free(list);
 
-        /* end hash */
-        char hash_bytes[SHA1_RESULT_LEN];
-        sha1_end(&sha1ctx, hash_bytes);
-        char hash_str[SHA1_RESULT_LEN*2 + 1];
-        bin2hex(hash_str, hash_bytes, SHA1_RESULT_LEN)[0] = '\0';
+            /* end hash */
+            char hash_bytes[SHA1_RESULT_LEN];
+            sha1_end(&sha1ctx, hash_bytes);
+            char hash_str[SHA1_RESULT_LEN*2 + 1];
+            bin2hex(hash_str, hash_bytes, SHA1_RESULT_LEN)[0] = '\0';
 
-        problem_data_add_text_noteditable(pd, FILENAME_DUPHASH, hash_str);
+            problem_data_add_text_noteditable(pd, FILENAME_UUID, hash_str);
+        }
     }
 }
 
-- 
1.8.1.2

Reply via email to