On 08/19/2013 09:10 PM, Jakub Hrozek wrote:
On Thu, Aug 15, 2013 at 06:42:53PM +0200, Michal Židek wrote:
On 08/15/2013 11:01 AM, Lukas Slebodnik wrote:
On (14/08/13 18:50), Michal Židek wrote:
On 08/14/2013 04:39 PM, Lukas Slebodnik wrote:
On (12/08/13 17:47), Michal Židek wrote:
Hello,

I think it could be useful to store the corrupted memory cache
before reset. We have very little info about what was really
wrong in the cache when it was in inconsistent state. This way we
could ask users to send us copy of the corrupted cache if they
hit this issue. It could provide some more answers.

Patch is attached. It stores the corrupted cache in
/var/lib/sss/mc/<cache_name>_corrupted.

Thanks
Michal

Please rebase patch on top of patches from thread
"[PATCH] mmap_cache: Check data->name value in client code"

It cannot be applied cleanly.

LS


Ok. New patch is attached. (Rebased on top of the patches in thread
[SSSD] [PATCH] mmap_cache: Check data->name value in client code)

Thanks
Michal

Backup of memory maped was created (gdb cheating)

ACK

LS

Just sending rebased version.

Michal


The patch looks good to me and I was about to push it but I wonder if we
should call sss_log() to explain to the admin what happened and what are
these strange files sssd created?

Ok. When the copy of memcache is successfully created a sss_log with SSS_LOG_NOTICE is called. We already do enough noise in sssd logs so I think NOTICE level in syslog is sufficient.

New patch attached.
Michal

>From 60321614ce4b9a1b3c920056a80419e8271e3012 Mon Sep 17 00:00:00 2001
From: Michal Zidek <[email protected]>
Date: Mon, 12 Aug 2013 16:23:59 +0200
Subject: [PATCH] mmap_cache: Store corrupted mmap cache before reset

This patch adds function to store corrupted mmap cache file to
disk for further analysis.
---
 src/responder/nss/nsssrv_mmap_cache.c | 65 +++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/src/responder/nss/nsssrv_mmap_cache.c b/src/responder/nss/nsssrv_mmap_cache.c
index 95a7fe9..d787be9 100644
--- a/src/responder/nss/nsssrv_mmap_cache.c
+++ b/src/responder/nss/nsssrv_mmap_cache.c
@@ -93,6 +93,67 @@ struct sss_mc_ctx {
     else used = false; \
 } while (0)
 
+/* This function will store corrupted memcache to disk for later
+ * analysis. */
+static void  sss_mc_save_corrupted(struct sss_mc_ctx *mc_ctx)
+{
+    int err;
+    int fd = -1;
+    ssize_t written;
+    char *file = NULL;
+    TALLOC_CTX *tmp_ctx;
+
+    if (mc_ctx == NULL) {
+        DEBUG(SSSDBG_TRACE_FUNC,
+              ("Cannot store uninitialized cache. Nothing to do.\n"));
+        return;
+    }
+
+    tmp_ctx = talloc_new(NULL);
+    if (tmp_ctx == NULL) {
+        DEBUG(SSSDBG_CRIT_FAILURE, ("Out of memory.\n"));
+        return;
+    }
+
+    file = talloc_asprintf(tmp_ctx, "%s_%s",
+                           mc_ctx->file, "corrupted");
+    if (file == NULL) {
+        DEBUG(SSSDBG_CRIT_FAILURE, ("Out of memory.\n"));
+        goto done;
+    }
+
+    /* We will always store only the last problematic cache state */
+    fd = creat(file, 0600);
+    if (fd == -1) {
+        err = errno;
+        DEBUG(SSSDBG_CRIT_FAILURE,
+              ("Failed to open file '%s' [%d]: %s\n",
+               file, err, strerror(err)));
+        goto done;
+    }
+
+    written = write(fd, mc_ctx->mmap_base, mc_ctx->mmap_size);
+    if (written != mc_ctx->mmap_size) {
+        if (written == -1) {
+            err = errno;
+            DEBUG(SSSDBG_CRIT_FAILURE,
+                  ("write() failed [%d]: %s\n", err, strerror(err)));
+        } else {
+            DEBUG(SSSDBG_CRIT_FAILURE,
+                  ("write() returned %zd (expected (%zd))\n",
+                   written, mc_ctx->mmap_size));
+        }
+    }
+
+    sss_log(SSS_LOG_NOTICE,
+            "Stored copy of corrupted mmap cache in file '%s\n'", file);
+done:
+    if (fd != -1) {
+        close(fd);
+    }
+    talloc_free(tmp_ctx);
+}
+
 static uint32_t sss_mc_hash(struct sss_mc_ctx *mcc,
                             const char *key, size_t len)
 {
@@ -421,6 +482,7 @@ static struct sss_mc_rec *sss_mc_find_record(struct sss_mc_ctx *mcc,
         if (!MC_SLOT_WITHIN_BOUNDS(slot, mcc->dt_size)) {
             DEBUG(SSSDBG_FATAL_FAILURE,
                   ("Corrupted fastcache. Slot number too big.\n"));
+            sss_mc_save_corrupted(mcc);
             sss_mmap_cache_reset(mcc);
             return NULL;
         }
@@ -437,6 +499,7 @@ static struct sss_mc_rec *sss_mc_find_record(struct sss_mc_ctx *mcc,
             || (uint8_t *)rec->data + strs_offset + strs_len > max_addr) {
             DEBUG(SSSDBG_FATAL_FAILURE,
                   ("Corrupted fastcache. name_ptr value is %u.\n", name_ptr));
+            sss_mc_save_corrupted(mcc);
             sss_mmap_cache_reset(mcc);
             return NULL;
         }
@@ -675,6 +738,7 @@ errno_t sss_mmap_cache_pw_invalidate_uid(struct sss_mc_ctx *mcc, uid_t uid)
     while (slot != MC_INVALID_VAL) {
         if (!MC_SLOT_WITHIN_BOUNDS(slot, mcc->dt_size)) {
             DEBUG(SSSDBG_FATAL_FAILURE, ("Corrupted fastcache.\n"));
+            sss_mc_save_corrupted(mcc);
             sss_mmap_cache_reset(mcc);
             ret = ENOENT;
             goto done;
@@ -813,6 +877,7 @@ errno_t sss_mmap_cache_gr_invalidate_gid(struct sss_mc_ctx *mcc, gid_t gid)
     while (slot != MC_INVALID_VAL) {
         if (!MC_SLOT_WITHIN_BOUNDS(slot, mcc->dt_size)) {
             DEBUG(SSSDBG_FATAL_FAILURE, ("Corrupted fastcache.\n"));
+            sss_mc_save_corrupted(mcc);
             sss_mmap_cache_reset(mcc);
             ret = ENOENT;
             goto done;
-- 
1.7.11.2

_______________________________________________
sssd-devel mailing list
[email protected]
https://lists.fedorahosted.org/mailman/listinfo/sssd-devel

Reply via email to