- closes #130
Signed-off-by: Jakub Filak <[email protected]>
---
src/include/internal_libreport.h | 10 ++++++
src/lib/problem_data.c | 75 ++++++++++++++++++++++++----------------
2 files changed, 56 insertions(+), 29 deletions(-)
diff --git a/src/include/internal_libreport.h b/src/include/internal_libreport.h
index ed18c2d..1d40c00 100644
--- a/src/include/internal_libreport.h
+++ b/src/include/internal_libreport.h
@@ -718,7 +718,12 @@ struct dump_dir *open_directory_for_writing(
#define FILENAME_ENVIRON "environ"
#define FILENAME_LIMITS "limits"
#define FILENAME_OPEN_FDS "open_fds"
+
+/* Global problem identifier which is usually generated by some "analyze_*"
+ * event because it may take a lot of time to obtain strong problem
+ * identification */
#define FILENAME_DUPHASH "duphash"
+
// Name of the function where the application crashed.
// Optional.
#define FILENAME_CRASH_FUNCTION "crash_function"
@@ -744,7 +749,12 @@ struct dump_dir *open_directory_for_writing(
// The file should contain a description of an alert
#define FILENAME_DESCRIPTION "description"
+/* Local problem identifier (weaker than global identifier) designed for fast
+ * local for fast local duplicate identification. This file is usually provided
+ * by crashed application (problem creator).
+ */
#define FILENAME_UUID "uuid"
+
#define FILENAME_COUNT "count"
/* Multi-line list of places problem was reported.
* Recommended line format:
diff --git a/src/lib/problem_data.c b/src/lib/problem_data.c
index c3f240b..ac712e8 100644
--- a/src/lib/problem_data.c
+++ b/src/lib/problem_data.c
@@ -73,42 +73,59 @@ void problem_data_add_basics(problem_data_t *pd)
/* If application didn't provide dupe hash, we generate it
* from all components, so we at least eliminate the exact same
* reports
+ *
+ * We don't want to generate DUPHASH file because it is usually generated
+ * later in some "analyze_*" event. DUPHASH was originally designed as
+ * global problem identifier and generating of global identifier requires
+ * more space and data. On the contrary UUID was originally designed as
+ * local problem identifier. It means that this identifier is weaker (e.g.
+ * a hash generated from a coredump without debuginfo - there can be many
+ * similar backtraces without line numbers and function names).
*/
- if (problem_data_get_content_or_NULL(pd, FILENAME_DUPHASH) == NULL)
+ if (problem_data_get_content_or_NULL(pd, FILENAME_UUID) == NULL)
{
- /* start hash */
- sha1_ctx_t sha1ctx;
- sha1_begin(&sha1ctx);
-
- /*
- * To avoid spurious hash differences, sort keys so that elements are
- * always processed in the same order:
+ /* If application provided DUPHASH, we should use it in UUID as well.
+ * Otherwise we compute hash from all problem's data.
*/
- GList *list = g_hash_table_get_keys(pd);
- list = g_list_sort(list, (GCompareFunc)strcmp);
- GList *l = list;
- while (l)
+ const char *const duphash = problem_data_get_content_or_NULL(pd,
FILENAME_DUPHASH);
+ if (duphash != NULL)
+ problem_data_add_text_noteditable(pd, FILENAME_UUID, duphash);
+ else
{
- const char *key = l->data;
- l = l->next;
- struct problem_item *item = g_hash_table_lookup(pd, key);
- /* do not hash items which are binary (item->flags & CD_FLAG_BIN).
- * Their ->content is full file name, with path. Path is always
- * different and will make hash differ even if files are the same.
+ /* start hash */
+ sha1_ctx_t sha1ctx;
+ sha1_begin(&sha1ctx);
+
+ /*
+ * To avoid spurious hash differences, sort keys so that elements
are
+ * always processed in the same order:
*/
- if (item->flags & CD_FLAG_BIN)
- continue;
- sha1_hash(&sha1ctx, item->content, strlen(item->content));
- }
- g_list_free(list);
+ GList *list = g_hash_table_get_keys(pd);
+ list = g_list_sort(list, (GCompareFunc)strcmp);
+ GList *l = list;
+ while (l)
+ {
+ const char *key = l->data;
+ l = l->next;
+ struct problem_item *item = g_hash_table_lookup(pd, key);
+ /* do not hash items which are binary (item->flags &
CD_FLAG_BIN).
+ * Their ->content is full file name, with path. Path is always
+ * different and will make hash differ even if files are the
same.
+ */
+ if (item->flags & CD_FLAG_BIN)
+ continue;
+ sha1_hash(&sha1ctx, item->content, strlen(item->content));
+ }
+ g_list_free(list);
- /* end hash */
- char hash_bytes[SHA1_RESULT_LEN];
- sha1_end(&sha1ctx, hash_bytes);
- char hash_str[SHA1_RESULT_LEN*2 + 1];
- bin2hex(hash_str, hash_bytes, SHA1_RESULT_LEN)[0] = '\0';
+ /* end hash */
+ char hash_bytes[SHA1_RESULT_LEN];
+ sha1_end(&sha1ctx, hash_bytes);
+ char hash_str[SHA1_RESULT_LEN*2 + 1];
+ bin2hex(hash_str, hash_bytes, SHA1_RESULT_LEN)[0] = '\0';
- problem_data_add_text_noteditable(pd, FILENAME_DUPHASH, hash_str);
+ problem_data_add_text_noteditable(pd, FILENAME_UUID, hash_str);
+ }
}
}
--
1.8.1.2