On 01/18/2016 09:22 PM, William A Rowe Jr wrote:
On Mon, Jan 18, 2016 at 5:13 AM, Jan Kaluža <jkal...@redhat.com
<mailto:jkal...@redhat.com>> wrote:

    On 01/08/2016 07:44 PM, William A Rowe Jr wrote:

        Do we have to repeat the softmagic call if checkzmagic resolves to
        x-gzip/x-deflate and the internal content type needs to be
        deciphered?


    That's true.

    I think that Yann's patch moving the zmagic call after the softmagic
    call would just mean that zmagic won't be called at all if the
    softmagic recognizes the file format.

    We would have to check the softmagic result by something similar to
    "magic_rsl_to_request" and if it's type we want to decompress, we
    would have to run zmagic.

    Before really trying to do so, I want to ask if I understand the
    reasoning right. Do we consider this way because users can then
    remove x-gzip from mime magic and will be able to use it to disable
    the mod_mime_magic behaviour discussed in this thread?


Yes... and in a more flexible manner that allows us to override any
compression mode, not only deflate, by simply tweaking the magic entries.

Putting magic file entry overrides into the mod_mime_magic directives is
a lot more interesting than simply toggling compression recognition.

Okay... I have something half-way right now.

Attached patch changes the order of softmagic and zmagic. When softmagic recognizes a file, the zmagic is called. zmagic method checks if the file has been recognized as "x-gzip" and it tries to uncompress it and call "tryit" again with the uncompressed data as before.

I'm now playing with an idea to extend the magic file so we wouldn't have to keep the "x-gzip" -> "gzip -dcq" relation hardcoded, but it could be defined in the magic file instead.

William, is the patch close to the behaviour you were describing?

Regards,
Jan Kaluza

Index: modules/metadata/mod_mime_magic.c
===================================================================
--- modules/metadata/mod_mime_magic.c	(revision 1725450)
+++ modules/metadata/mod_mime_magic.c	(working copy)
@@ -463,6 +463,8 @@
     magic_rsl *head;          /* result string list */
     magic_rsl *tail;
     unsigned suf_recursion;   /* recursion depth in suffix check */
+    char *type;               /* Content-Type cached value */
+    char *encoding;           /* Content-Encoding cached value */
 } magic_req_rec;
 
 /*
@@ -534,6 +536,8 @@
                                                       sizeof(magic_req_rec));
 
     req_dat->head = req_dat->tail = (magic_rsl *) NULL;
+    req_dat->type = NULL;
+    req_dat->encoding = NULL;
     ap_set_module_config(r->request_config, &mime_magic_module, req_dat);
     return req_dat;
 }
@@ -656,13 +660,26 @@
     return result;
 }
 
-/* states for the state-machine algorithm in magic_rsl_to_request() */
+static int magic_rsl_clear(request_rec *r)
+{
+    magic_req_rec *req_dat = (magic_req_rec *)
+                    ap_get_module_config(r->request_config, &mime_magic_module);
+    req_dat->head = NULL;
+    req_dat->tail = NULL;
+    req_dat->type = NULL;
+    req_dat->encoding = NULL;
+
+    /* success */
+    return 0;
+}
+
+/* states for the state-machine algorithm in magic_rsl_parse() */
 typedef enum {
     rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding
 } rsl_states;
 
 /* process the RSL and set the MIME info in the request record */
-static int magic_rsl_to_request(request_rec *r)
+static int magic_rsl_parse(request_rec *r, char **type, char **encoding)
 {
     int cur_frag,         /* current fragment number/counter */
         cur_pos,          /* current position within fragment */
@@ -673,7 +690,6 @@
         encoding_pos,     /* content encoding starting point: position */
         encoding_len;     /* content encoding length */
 
-    char *tmp;
     magic_rsl *frag;      /* list-traversal pointer */
     rsl_states state;
 
@@ -686,6 +702,13 @@
         return DECLINED;
     }
 
+    /* check for cached values filled in by previous run */
+    if (req_dat->type) {
+        *type = req_dat->type;
+        *encoding = req_dat->encoding;
+        return OK;
+    }
+
     /* start searching for the type and encoding */
     state = rsl_leading_space;
     type_frag = type_pos = type_len = 0;
@@ -785,24 +808,24 @@
     }
 
     /* save the info in the request record */
-    tmp = rsl_strdup(r, type_frag, type_pos, type_len);
+    *type = rsl_strdup(r, type_frag, type_pos, type_len);
     /* XXX: this could be done at config time I'm sure... but I'm
      * confused by all this magic_rsl stuff. -djg */
-    ap_content_type_tolower(tmp);
-    ap_set_content_type(r, tmp);
+    ap_content_type_tolower(*type);
+    req_dat->type = *type;
 
     if (state == rsl_encoding) {
-        tmp = rsl_strdup(r, encoding_frag,
+        *encoding = rsl_strdup(r, encoding_frag,
                                          encoding_pos, encoding_len);
         /* XXX: this could be done at config time I'm sure... but I'm
          * confused by all this magic_rsl stuff. -djg */
-        ap_str_tolower(tmp);
-        r->content_encoding = tmp;
+        ap_str_tolower(*encoding);
+        req_dat->encoding = *encoding;
     }
 
     /* detect memory allocation or other errors */
-    if (!r->content_type ||
-        (state == rsl_encoding && !r->content_encoding)) {
+    if (!*type ||
+        (state == rsl_encoding && !*encoding)) {
         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01511)
                       MODNAME ": unexpected state %d; could be caused by bad "
                       "data in magic file",
@@ -814,6 +837,43 @@
     return OK;
 }
 
+/* process the RSL and set the MIME info in the request record */
+static int magic_rsl_to_request(request_rec *r)
+{
+    char *type = NULL;
+    char *encoding = NULL;
+    int ret;
+
+    if ((ret = magic_rsl_parse(r, &type, &encoding)) != OK) {
+        return ret;
+    }
+
+    if (type) {
+        ap_set_content_type(r, type);
+    }
+
+    if (encoding) {
+        r->content_encoding = encoding;
+    }
+
+    /* success! */
+    return OK;
+}
+
+static char *magic_rsl_encoding(request_rec *r) {
+    char *type = NULL;
+    char *encoding = NULL;
+    int ret;
+
+    if ((ret = magic_rsl_parse(r, &type, &encoding)) != OK) {
+        return NULL;
+    }
+
+    fprintf(stderr, "XXX %s %s\n", type, encoding);
+
+    return encoding;
+}
+
 /*
  * magic_process - process input file r        Apache API request record
  * (formerly called "process" in file command, prefix added for clarity) Opens
@@ -870,7 +930,6 @@
     }
 
     (void) apr_file_close(fd);
-    (void) magic_rsl_putchar(r, '\n');
 
     return OK;
 }
@@ -880,24 +939,23 @@
                  int checkzmagic)
 {
     /*
-     * Try compression stuff
+     * try tests in /etc/magic (or surrogate magic file)
      */
-    if (checkzmagic == 1) {
-        if (zmagic(r, buf, nb) == 1)
+    if (softmagic(r, buf, nb) == 1) {
+        (void) magic_rsl_putchar(r, '\n');
+        if (checkzmagic && zmagic(r, buf, nb) == 1) {
             return OK;
+        }
+        return OK;
     }
 
     /*
-     * try tests in /etc/magic (or surrogate magic file)
-     */
-    if (softmagic(r, buf, nb) == 1)
-        return OK;
-
-    /*
      * try known keywords, check for ascii-ness too.
      */
-    if (ascmagic(r, buf, nb) == 1)
+    if (ascmagic(r, buf, nb) == 1) {
+        (void) magic_rsl_putchar(r, '\n');
         return OK;
+    }
 
     /*
      * abandon hope, all ye who remain here
@@ -2045,11 +2103,8 @@
  */
 
 static struct {
-    char *magic;
-    apr_size_t maglen;
+    char *encoding;
     char *argv[3];
-    int silent;
-    char *encoding;  /* MUST be lowercase */
 } compr[] = {
 
     /* we use gzip here rather than uncompress because we have to pass
@@ -2057,24 +2112,10 @@
      * ending with .Z
      */
     {
-        "\037\235", 2, {
+        "x-gzip", {
             "gzip", "-dcq", NULL
-        }, 0, "x-compress"
-    },
-    {
-        "\037\213", 2, {
-            "gzip", "-dcq", NULL
-        }, 1, "x-gzip"
-    },
-    /*
-     * XXX pcat does not work, cause I don't know how to make it read stdin,
-     * so we use gzip
-     */
-    {
-        "\037\036", 2, {
-            "gzip", "-dcq", NULL
-        }, 0, "x-gzip"
-    },
+        }
+    }
 };
 
 static int ncompr = sizeof(compr) / sizeof(compr[0]);
@@ -2084,12 +2125,16 @@
     unsigned char *newbuf;
     int newsize;
     int i;
+    char *encoding;
 
+    if ((encoding = magic_rsl_encoding(r)) == NULL) {
+        return 0;
+    }
+
     for (i = 0; i < ncompr; i++) {
-        if (nbytes < compr[i].maglen)
-            continue;
-        if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0)
+        if (strcmp(encoding, compr[i].encoding) == 0) {
             break;
+        }
     }
 
     if (i == ncompr)
@@ -2096,6 +2141,8 @@
         return 0;
 
     if ((newsize = uncompress(r, i, &newbuf, HOWMANY)) > 0) {
+        magic_rsl_clear(r);
+
         /* set encoding type in the request record */
         r->content_encoding = compr[i].encoding;
 

Reply via email to