Ian has posted his mod_gz filter before, now I'd like to give it a +1.

I told him I'd look at it a while ago, but never got a chance to do 
so.  So, I spent this morning cleaning up the configuration and a bit 
of the code to fit our style (nothing major).

I'd like to add this to the modules/filters directory (which seems
like the most appropriate place).

Can I get two other +1s?  I've reviewed the code and can get it
confirmed working with Netscape 4.77 and Mozilla 0.9.3 by adding the 
following to httpd.conf:

<IfModule mod_gz.c>
    GZFilter On
    AddOutputFilter GZ html
</IfModule>

We could remove GZFilter as it really serves no purpose as well as the 
text/html check in mod_gz.  I'd like to commit something that is close
to what Ian originally submitted and then tweak it slightly.

(Interesting to note that Netscape 4.77 does not allow you to view
the source of a gzipped'd entity while Mozilla shows you the
decompressed entity.  Mozilla is getting cool...)

I'm sure we can do more analysis of its performance (what the
appropriate deflation settings should be), but I'd really to get
this in first.  =-)  Please test and report back...  -- justin

Index: config.m4
===================================================================
RCS file: /home/cvs/httpd-2.0/modules/filters/config.m4,v
retrieving revision 1.6
diff -u -r1.6 config.m4
--- config.m4   2001/05/12 03:48:31     1.6
+++ config.m4   2001/09/01 21:38:16
@@ -6,6 +6,55 @@
 
 APACHE_MODULE(include, Server Side Includes, , , yes)
 
+APACHE_MODULE(gz, GZip encoding support, "mod_gz.lo", , most, [
+  AC_ARG_WITH(z, [  --with-z=DIR          use a specific zlib library],
+  [
+    if test "x$withval" != "xyes" && test "x$withval" != "x"; then
+      ap_zlib_base="$withval"
+    fi
+  ])
+  if test "x$ap_zlib_base" = "x"; then
+    AC_MSG_CHECKING([for zlib location])
+    AC_CACHE_VAL(ap_cv_zlib,[
+      for dir in /usr/local /usr ; do
+        if test -d $dir && test -f $dir/include/zlib.h; then
+          ap_cv_zlib=$dir
+          break
+        fi
+      done
+    ])
+    ap_zlib_base=$ap_cv_zlib
+    if test "x$ap_zlib_base" = "x"; then
+      enable_gz=no
+      AC_MSG_RESULT([not found])
+    else
+      AC_MSG_RESULT([$ap_zlib_base])
+    fi
+  fi
+  if test "$enable_gz" != "no"; then
+    ap_save_includes=$INCLUDE
+    ap_save_ldflags=$LDFLAGS
+    ap_save_libs=$LIBS
+    if test "$ap_zlib_base" != "/usr"; then
+      APR_ADDTO(INCLUDES, [-I${ap_zlib_base}/include])
+      APR_ADDTO(LDFLAGS, [-L${ap_zlib_base}/lib])
+      if test "x$ap_platform_runtime_link_flag" != "x"; then
+         APR_ADDTO(LDFLAGS, [$ap_platform_runtime_link_flag${ap_zlib_Base}/lib])
+      fi
+    fi
+    APR_ADDTO(LIBS, [-lz])
+    AC_MSG_CHECKING([for zlib library])
+    AC_TRY_LINK([#include <zlib.h>], [return Z_OK;], 
+    [AC_MSG_RESULT(found) 
+     AC_CHECK_HEADERS(zutil.h)],
+    [AC_MSG_RESULT(not found)
+     enable_gz=no
+     INCLUDES=$ap_save_includes
+     LDFLAGS=$ap_save_ldflags
+     LIBS=$ap_save_libs])
+  fi
+])
+
 APR_ADDTO(LT_LDFLAGS,-export-dynamic)
 
 APACHE_MODPATH_FINISH

Index: mod_gz.c
===================================================================
/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2000-2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact [EMAIL PROTECTED]
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 *
 * Portions of this software are based upon public domain software
 * (zlib functions gz_open and gzwrite)
 */

/*
 * mod_gz.c: GZip's HTML content on the fly
 * 
 * Written by Ian Holsman ([EMAIL PROTECTED])
 * 
 */

#include "httpd.h"
#include "http_config.h"
#include "http_log.h"
#include "apr_strings.h"
#include "apr_general.h"
#include "util_filter.h"
#include "apr_buckets.h"
#include "http_request.h"

#include "zlib.h"
#ifdef HAVE_ZUTIL_H
#include "zutil.h"
#else
/* As part of the encoding process, we must send what our OS_CODE is
 * (or so it seems based on what I can tell of how gzip encoding works).
 *
 * zutil.h is not always included with zlib distributions (it is a private
 * header), so this is straight from zlib 1.1.3's zutil.h.  - JRE
 */
#ifdef OS2
#  define OS_CODE  0x06
#endif

#ifdef WIN32 /* Window 95 & Windows NT */
#  define OS_CODE  0x0b
#endif

#if defined(VAXC) || defined(VMS)
#  define OS_CODE  0x02
#endif

#ifdef AMIGA
#  define OS_CODE  0x01
#endif

#if defined(ATARI) || defined(atarist)
#  define OS_CODE  0x05
#endif

#if defined(MACOS) || defined(TARGET_OS_MAC)
#  define OS_CODE  0x07
#endif

#ifdef __50SERIES /* Prime/PRIMOS */
#  define OS_CODE  0x0F
#endif

#ifdef TOPS20
#  define OS_CODE  0x0a
#endif

#ifndef OS_CODE
#  define OS_CODE  0x03  /* assume Unix */
#endif
#endif

static const char s_szGZFilterName[] = "GZ";
module AP_MODULE_DECLARE_DATA gz_module;

typedef struct
{
    int bEnabled;
        int windowSize;
        int memlevel;
        char*noteName;
} GZFilterConfig;
/* windowsize is negative to suppress Zlib header */
#define GZFILTER_DEFAULT_WINDOWSIZE -15        
#define GZFILTER_DEFAULT_MEMLEVEL 9
#define GZFILTER_BUFFERSIZE 8096
/*#define GZFILTER_COMPRESSION "GZCOMPRESSION" */

typedef struct gz_ctx
{
    z_stream strm;
    char buffer[GZFILTER_BUFFERSIZE];
    unsigned long crc;
}
gz_ctx;

/* Outputs a long in LSB order to the given file
 * only the bottom 4 bits are required for the GZ file format.
 */
static void putLong(char *string, unsigned long x)
{
    int n;
    for (n = 0; n < 4; n++) {
        string[n] = (int) (x & 0xff);
        x >>= 8;
    }
}

static void *GZFilterCreateServerConfig(apr_pool_t * p, server_rec * s)
{
    GZFilterConfig *pConfig = apr_pcalloc(p, sizeof *pConfig);

    pConfig->bEnabled   = 0;
    pConfig->memlevel   = GZFILTER_DEFAULT_MEMLEVEL;
    pConfig->windowSize = GZFILTER_DEFAULT_WINDOWSIZE;

    return pConfig;
}
static const char *GZSetWindowSize(cmd_parms * cmd, void *dummy, const char* arg)
{
    GZFilterConfig *pConfig = ap_get_module_config(cmd->server->module_config,
                                                   &gz_module);
    int iWindowSize;
    iWindowSize = atoi(arg);
    if (iWindowSize <1 || iWindowSize >15) 
        return "GZWindowSize must be between 1 and 15";
    pConfig->windowSize = iWindowSize *-1;

    return NULL;
}

static const char *GZSetNote(cmd_parms * cmd, void *dummy, const char* arg)
{
    GZFilterConfig *pConfig = ap_get_module_config(cmd->server->module_config,
                                                   &gz_module);
    pConfig->noteName=apr_pstrdup(cmd->pool,arg);

    return NULL;
}

static const char *GZSetMemLevel(cmd_parms * cmd, void *dummy, const char* arg)
{
    GZFilterConfig *pConfig = ap_get_module_config(cmd->server->module_config,
                                                   &gz_module);
    int iMemLevel;
    iMemLevel = atoi(arg);
    if (iMemLevel <1 || iMemLevel >9) 
        return "GZMemLevel must be between 1 and 9";
    pConfig->memlevel = iMemLevel;

    return NULL;
}

static const char *GZFilterEnable(cmd_parms * cmd, void *dummy, int arg)
{
    GZFilterConfig *pConfig = ap_get_module_config(cmd->server->module_config,
                                                   &gz_module);
    pConfig->bEnabled = arg;

    return NULL;
}

static void GZFilterInsertFilter(request_rec * r)
{
    GZFilterConfig *pConfig = ap_get_module_config(r->server->module_config,
                                                   &gz_module);

    if (!pConfig->bEnabled)
        return;

    ap_add_output_filter(s_szGZFilterName, NULL, r, r->connection);
}

static int gz_magic[2] = { 0x1f, 0x8b };        /* gzip magic header */

static apr_status_t GZFilterOutFilter(ap_filter_t * f,
                                      apr_bucket_brigade * pbbIn)
{
    apr_bucket *pbktIn;
    apr_bucket_brigade *pbbOut;
    apr_bucket *pbktOut;
    char *buf;
    char *b;
    const char *accepts;
    request_rec *r = f->r;
    gz_ctx *ctx;
    char *token = NULL;
    int zRC;
    GZFilterConfig *pConfig = ap_get_module_config(r->server->module_config,
                                                   &gz_module);

    if ( pConfig->bEnabled != 1 )
        return ap_pass_brigade(f->next, pbbIn);

    /* only work on main request/no subrequests */
    if (r->main)
        return ap_pass_brigade(f->next, pbbIn);

    /* GETs only (for the moment) */
    if (r->method_number != M_GET) {
        return ap_pass_brigade(f->next, pbbIn);
    }

    /* only compress text/html files */
    if (strncmp(r->content_type, "text/html", 9))
        return ap_pass_brigade(f->next, pbbIn);

    /* some browsers might have problems, so set no-gzip (with browsermatch) 
     * for them */
    if (apr_table_get(r->subprocess_env, "no-gzip")) {
        return ap_pass_brigade(f->next, pbbIn);
    }

    /* if they don't have the line, then they can't play */
    accepts = apr_table_get(r->headers_in, "Accept-Encoding");
    if (accepts == NULL) {
        return ap_pass_brigade(f->next, pbbIn);
    }

    token = ap_get_token(r->pool, &accepts, 0);
    while (token && token[0] && strcmp(token, "gzip")) {
        accepts++;                /* skip token */
        token = ap_get_token(r->pool, &accepts, 0);
    }

    if (token == NULL || token[0] == '\0') {
        return ap_pass_brigade(f->next, pbbIn);
    }

    pbbOut = apr_brigade_create(f->r->pool);
    if (!f->ctx) {
        f->ctx = apr_pcalloc(f->c->pool, sizeof(*ctx));
        ctx = f->ctx;
/*
        ctx->strm.zalloc = (alloc_func) 0;
        ctx->strm.zfree = (free_func) 0;
        ctx->strm.opaque = (voidpf) 0;
        ctx->crc = 0L;
*/
        zRC = deflateInit2(&ctx->strm, Z_BEST_SPEED, Z_DEFLATED,
                           pConfig->windowSize, pConfig->memlevel,
                           Z_DEFAULT_STRATEGY);
        if (zRC != Z_OK) {
            f->ctx = NULL;
            ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, r,
                        "unable to init Zlib: deflateInit2 returned %d: URL %s",
                        zRC, r->uri);
            return ap_pass_brigade(f->next, pbbIn);
        }
        buf = apr_psprintf(r->pool, "%c%c%c%c%c%c%c%c%c%c", gz_magic[0],
                           gz_magic[1], Z_DEFLATED, 0 /*flags */ , 0, 0, 0,
                           0 /*time */ , 0 /*xflags */ , OS_CODE);
        pbktOut = apr_bucket_pool_create(buf, 10, r->pool);
        APR_BRIGADE_INSERT_TAIL(pbbOut, pbktOut);

        apr_table_setn(r->headers_out, "Content-Encoding", "gzip");
        apr_table_setn(r->headers_out, "Vary", "Accept-Encoding");
    }
    else {
        ctx = f->ctx;
    }

    APR_BRIGADE_FOREACH(pbktIn, pbbIn) {
        const char *data;
        apr_size_t lenin;
        unsigned int len;
        char *buf;
        char szCRC[4];
        char szLen[4];

        apr_size_t e_wrt;
        int done = 0;

        if (APR_BUCKET_IS_EOS(pbktIn)) {
            apr_bucket *pbktEOS = apr_bucket_eos_create();

            ctx->strm.avail_in = 0;        /* should be zero already anyway */
            for (;;) {
                len = GZFILTER_BUFFERSIZE - ctx->strm.avail_out;
                if (len != 0) {
                    pbktOut =
                        apr_bucket_heap_create(ctx->buffer, len, 1, &e_wrt);
                    APR_BRIGADE_INSERT_TAIL(pbbOut, pbktOut);
                    ctx->strm.next_out = ctx->buffer;
                    ctx->strm.avail_out = GZFILTER_BUFFERSIZE;
                }
                if (done)
                    break;

                zRC = deflate(&ctx->strm, Z_FINISH);
                if (len == 0 && zRC == Z_BUF_ERROR)
                    zRC = Z_OK;
                done = (ctx->strm.avail_out != 0 || zRC == Z_STREAM_END);
                if (zRC != Z_OK && zRC != Z_STREAM_END)
                    break;
            }
            putLong(szCRC, ctx->crc);
            putLong(szLen, ctx->strm.total_in);
            buf = apr_palloc(r->pool, 8);
            b=buf;
            *b++=szCRC[0];
            *b++=szCRC[1];
            *b++=szCRC[2];
            *b++=szCRC[3];
            *b++=szLen[0];
            *b++=szLen[1];
            *b++=szLen[2];
            *b++=szLen[3];

            pbktOut = apr_bucket_pool_create(buf, 8, r->pool);
            APR_BRIGADE_INSERT_TAIL(pbbOut, pbktOut);
            ap_log_rerror(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r,
                          "Zlib: Compressed %ld to %ld : URL %s",
                          ctx->strm.total_in, ctx->strm.total_out, r->uri);
            if (pConfig->noteName) {
                 if (ctx->strm.total_in >0) {
                    apr_table_setn(r->notes, pConfig->noteName,
                         apr_itoa(r->pool, 
                            (ctx->strm.total_out*100/ctx->strm.total_in)));
                    } else {
                        apr_table_setn(r->notes,pConfig->noteName,"-");
                    }
            }

            deflateEnd(&ctx->strm);

            f->ctx = NULL;
            APR_BRIGADE_INSERT_TAIL(pbbOut, pbktOut);
            APR_BRIGADE_INSERT_TAIL(pbbOut, pbktEOS);
            break;
        }

        if (APR_BUCKET_IS_FLUSH(pbktIn)) {
            /* XXX FIX: do we need the Content-Size set, or can we stream?  
             * we should be able to stream */
            /* ignore flush buckets for the moment.. we can't stream as we 
             * need the size ;( */
            continue;
        }
        /* read */
        apr_bucket_read(pbktIn, &data, &lenin, APR_BLOCK_READ);
        ctx->crc = crc32(ctx->crc, (const Bytef *) data, lenin);

        /* write */
        ctx->strm.next_in = (char *) data;
        ctx->strm.avail_in = lenin;
        ctx->strm.next_out = ctx->buffer;
        ctx->strm.avail_out = GZFILTER_BUFFERSIZE;

        while (ctx->strm.avail_in != 0) {
            if (ctx->strm.avail_out == 0) {
                ctx->strm.next_out = ctx->buffer;
                len = GZFILTER_BUFFERSIZE - ctx->strm.avail_out;

                pbktOut = apr_bucket_heap_create(ctx->buffer, len, 1, &e_wrt);
                APR_BRIGADE_INSERT_TAIL(pbbOut, pbktOut);
                ctx->strm.avail_out = GZFILTER_BUFFERSIZE;
            }
            zRC = deflate(&(ctx->strm), Z_NO_FLUSH);
            if (zRC != Z_OK)
                break;
        }
    }
    return ap_pass_brigade(f->next, pbbOut);
}

static void GZFilterRegisterHooks(apr_pool_t * p)
{
/*  static const char * const aszPost[] = { "CHUNK", NULL };
 *  ap_hook_insert_filter(GZFilterInsertFilter, NULL, aszPost, APR_HOOK_LAST);
 */
    ap_register_output_filter(s_szGZFilterName, GZFilterOutFilter,
                              AP_FTYPE_HTTP_HEADER);
}

static const command_rec GZFilterCmds[] = {
    AP_INIT_FLAG("GZFilter", GZFilterEnable, NULL, RSRC_CONF,
                 "Run a GZ filter on this host"),
    AP_INIT_TAKE1("GZFilterNote", GZSetNote, NULL, RSRC_CONF,
                 "Set a note to report on compression ratio"),

    AP_INIT_TAKE1("GZWindowSize", GZSetWindowSize, NULL, RSRC_CONF,
                  "Set the Zlib window size (1-15)"),
    AP_INIT_TAKE1("GZMemLevel", GZSetMemLevel, NULL, RSRC_CONF,
                  "Set the Zlib Memory Level (1-9)"),

    {NULL}
};

module AP_MODULE_DECLARE_DATA gz_module = {
    STANDARD20_MODULE_STUFF,
    NULL,
    NULL,
    GZFilterCreateServerConfig,
    NULL,
    GZFilterCmds,
    GZFilterRegisterHooks
};

Reply via email to