GZ Filter.
This filter GZ encodes files before being sent out, resulting in less 
bytes on the network.
It has been coded only to work on HTML files  (some browsers don't like 
JS/CSS files gziped)
you can set the compression options (defaults to min CPU/max mem/max 
speed), and
it has the option of setting a note saying what compression level it 
achieved.

It has been tested on W2K, and on linux (functionailty only, didn't 
stress it) and it appears
to work ok.

2 Questions/Comments:
    * Currently you can enable the GZ Filter at the serverr level, 
should this be directory specific ?
        (mainly to get around Filters lack of being able to be turned 
off once set)
    * It uses a AP_FTYPE_HTTP_HEADER, as it needs to run after all 
content has been created,
       and before Transcoding begins.
   
I'll create a document page later, but briefly some notes on config.

(Settings are server-wide)

<IfModule mod_gz.c>
    GZFilter on    #enable GZ filtering on this server
    GZWindowSize 15    # zlib option
    GZMemLevel 9    # Zlib option
    GZFilterNote COMPRESS    #note to add to the custom log to show 
compression level
</IfModule>
<Location />
    SetOutputFilter GZ
</Location>


..Ian

(oh. I ran the code through indent with the 'approved' options as per 
the web page.. so the code should
follow the guidelines)
/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2000-2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact [EMAIL PROTECTED]
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 *
 * Portions of this software are based upon public domain software
 * (zlib functions gz_open and gzwrite)
 */

/*
 * mod_gz.c: GZip's HTML content on the fly
 * 
 * Written by Ian Holsman ([EMAIL PROTECTED])
 * 
 */


#include "zlib.h"
#include "zutil.h"
#include "httpd.h"
#include "http_config.h"
#include "http_log.h"
#include "apr_strings.h"
#include "apr_general.h"
#include "util_filter.h"
#include "apr_buckets.h"
#include "http_request.h"


static const char s_szGZFilterName[] = "GZ";
module AP_MODULE_DECLARE_DATA gz_module;

typedef struct
{
    int bEnabled;
        int windowSize;
        int memlevel;
        char*noteName;
}
GZFilterConfig;
#define GZFILTER_DEFAULT_WINDOWSIZE -15 /* windowsize is negative to suppress Zlib 
header */
#define GZFILTER_DEFAULT_MEMLEVEL 9
#define GZFILTER_BUFFERSIZE 8096
/*#define GZFILTER_COMPRESSION "GZCOMPRESSION" */

typedef struct gz_ctx
{
    z_stream strm;
    char buffer[GZFILTER_BUFFERSIZE];
    unsigned long crc;
}
gz_ctx;

/* ===========================================================================
   Outputs a long in LSB order to the given file
*/
static void putLong(char *string, unsigned long x)
{
    int n;
    for (n = 0; n < 4; n++) {
        string[n] = (int) (x & 0xff);
        x >>= 8;
    }
}

static void *GZFilterCreateServerConfig(apr_pool_t * p, server_rec * s)
{
    GZFilterConfig *pConfig = apr_pcalloc(p, sizeof *pConfig);

    pConfig->bEnabled   = 0;
        pConfig->memlevel       = GZFILTER_DEFAULT_MEMLEVEL;
        pConfig->windowSize = GZFILTER_DEFAULT_WINDOWSIZE;

    return pConfig;
}
static const char *GZSetWindowSize(cmd_parms * cmd, void *dummy, const char* arg)
{
    GZFilterConfig *pConfig = ap_get_module_config(cmd->server->module_config,
                                                   &gz_module);
        int iWindowSize;
        iWindowSize = atoi(arg);
        if (iWindowSize <1 || iWindowSize >15) 
                return "GZWindowSize must be between 1 and 15";
    pConfig->windowSize = iWindowSize *-1;

    return NULL;
}
static const char *GZSetNote(cmd_parms * cmd, void *dummy, const char* arg)
{
    GZFilterConfig *pConfig = ap_get_module_config(cmd->server->module_config,
                                                   &gz_module);
    pConfig->noteName=apr_pstrdup(cmd->pool,arg);

    return NULL;
}
static const char *GZSetMemLevel(cmd_parms * cmd, void *dummy, const char* arg)
{
    GZFilterConfig *pConfig = ap_get_module_config(cmd->server->module_config,
                                                   &gz_module);
        int iMemLevel;
        iMemLevel = atoi(arg);
        if (iMemLevel <1 || iMemLevel >9) 
                return "GZMemLevel must be between 1 and 9";
    pConfig->memlevel = iMemLevel;

    return NULL;
}
static const char *GZFilterEnable(cmd_parms * cmd, void *dummy, int arg)
{
    GZFilterConfig *pConfig = ap_get_module_config(cmd->server->module_config,
                                                   &gz_module);
    pConfig->bEnabled = arg;

    return NULL;
}

static void GZFilterInsertFilter(request_rec * r)
{
    GZFilterConfig *pConfig = ap_get_module_config(r->server->module_config,
                                                   &gz_module);

    if (!pConfig->bEnabled)
        return;

    ap_add_output_filter(s_szGZFilterName, NULL, r, r->connection);
}

static int gz_magic[2] = { 0x1f, 0x8b };        /* gzip magic header */

static apr_status_t GZFilterOutFilter(ap_filter_t * f,
                                      apr_bucket_brigade * pbbIn)
{
    apr_bucket *pbktIn;
    apr_bucket_brigade *pbbOut;
    apr_bucket *pbktOut;
    char *buf;
    const char *accepts;
    request_rec *r = f->r;
    gz_ctx *ctx;
    char *token = NULL;
    char *state = NULL;
    int zRC;
    GZFilterConfig *pConfig = ap_get_module_config(r->server->module_config,
                                                   &gz_module);

        if ( pConfig->bEnabled != 1 )
        return ap_pass_brigade(f->next, pbbIn);

    /* only work on main request/no subrequests */
        if (r->main)
        return ap_pass_brigade(f->next, pbbIn);

    /* only compress text/html files */
    if (strncmp(r->content_type, "text/html", 9))
        return ap_pass_brigade(f->next, pbbIn);

    /* GETs only (for the moment) */
    if (r->method_number != M_GET) {
        return ap_pass_brigade(f->next, pbbIn);
    }
    /* some browsers might have problems, so set no-gzip (with browsermatch) for them 
*/
    if (apr_table_get(r->subprocess_env, "no-gzip")) {
        return ap_pass_brigade(f->next, pbbIn);
    }

    accepts = apr_table_get(r->headers_in, "Accept-Encoding");
    /* if they don't have the line, then they can't play */
    if (accepts == NULL) {
        return ap_pass_brigade(f->next, pbbIn);
    }


    token = ap_get_token(r->pool, &accepts, 0);
    while (token && token[0] && strcmp(token, "gzip")) {
        accepts++;              /* skip token */
        token = ap_get_token(r->pool, &accepts, 0);
    }
    if (token == NULL || token[0] == '\0') {
        return ap_pass_brigade(f->next, pbbIn);
    }

    pbbOut = apr_brigade_create(f->r->pool);
    if (!f->ctx) {
        f->ctx = apr_pcalloc(f->c->pool, sizeof(*ctx));
        ctx = f->ctx;
        ctx->strm.zalloc = (alloc_func) 0;
        ctx->strm.zfree = (free_func) 0;
        ctx->strm.opaque = (voidpf) 0;
        ctx->crc = 0L;

        zRC = deflateInit2(&ctx->strm, Z_BEST_SPEED, Z_DEFLATED,
                           pConfig->windowSize, pConfig->memlevel,
                           Z_DEFAULT_STRATEGY);
        if (zRC != Z_OK) {
            f->ctx = NULL;
            ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, r,
                          "unable to init Zlib: deflateInit2 returned %d: URL %s",
                          zRC, r->uri);
            return ap_pass_brigade(f->next, pbbIn);
        }
        buf =
            apr_psprintf(r->pool, "%c%c%c%c%c%c%c%c%c%c", gz_magic[0],
                         gz_magic[1], Z_DEFLATED, 0 /*flags */ , 0, 0, 0,
                         0 /*time */ , 0 /*xflags */ , OS_CODE);
        pbktOut = apr_bucket_pool_create(buf, 10, r->pool);
        APR_BRIGADE_INSERT_TAIL(pbbOut, pbktOut);

        apr_table_setn(r->headers_out, "Content-Encoding", "gzip");
        apr_table_setn(r->headers_out, "Vary", "Accept-Encoding");
    }
    else {
        ctx = f->ctx;
    }

    APR_BRIGADE_FOREACH(pbktIn, pbbIn) {
        const char *data;
        apr_size_t lenin;
        unsigned int len;
        char *buf;
        char szCRC[4];
        char szLen[4];

        apr_size_t e_wrt;
        int done = 0;

        if (APR_BUCKET_IS_EOS(pbktIn)) {
            apr_bucket *pbktEOS = apr_bucket_eos_create();

            ctx->strm.avail_in = 0;     /* should be zero already anyway */
            for (;;) {
                len = GZFILTER_BUFFERSIZE - ctx->strm.avail_out;
                if (len != 0) {
                    pbktOut =
                        apr_bucket_heap_create(ctx->buffer, len, 1, &e_wrt);
                    APR_BRIGADE_INSERT_TAIL(pbbOut, pbktOut);
                    ctx->strm.next_out = ctx->buffer;
                    ctx->strm.avail_out = GZFILTER_BUFFERSIZE;
                }
                if (done)
                    break;

                zRC = deflate(&ctx->strm, Z_FINISH);
                if (len == 0 && zRC == Z_BUF_ERROR)
                    zRC = Z_OK;
                done = (ctx->strm.avail_out != 0 || zRC == Z_STREAM_END);
                if (zRC != Z_OK && zRC != Z_STREAM_END)
                    break;
            }
            putLong(szCRC, ctx->crc);
            putLong(szLen, ctx->strm.total_in);
            buf =
                apr_psprintf(r->pool, "%c%c%c%c%c%c%c%c", szCRC[0], szCRC[1],
                             szCRC[2], szCRC[3], szLen[0], szLen[1], szLen[2],
                             szLen[3]);
            pbktOut = apr_bucket_pool_create(buf, 8, r->pool);
            APR_BRIGADE_INSERT_TAIL(pbbOut, pbktOut);
            ap_log_rerror(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r,
                          "Zlib: Compressed %d to %d : URL %s",
                          ctx->strm.total_in, ctx->strm.total_out, r->uri);
                if (pConfig->noteName) {
                        if (ctx->strm.total_in >0) {
                                
apr_table_setn(r->notes,pConfig->noteName,apr_itoa(r->pool, 
                                        (ctx->strm.total_out*100/ctx->strm.total_in)));
                        } else {
                                apr_table_setn(r->notes,pConfig->noteName,"-");
                        }
                }

            deflateEnd(&ctx->strm);

            f->ctx = NULL;
            APR_BRIGADE_INSERT_TAIL(pbbOut, pbktOut);
            APR_BRIGADE_INSERT_TAIL(pbbOut, pbktEOS);
            break;
        }

        if (APR_BUCKET_IS_FLUSH(pbktIn)) {
            /* XXX FIX: do we need the Content-Size set, or can we stream?  we should 
be able to stream */
            /* ignore flush buckets for the moment.. we can't stream as we need the 
size ;( */
            continue;
        }
        /* read */
        apr_bucket_read(pbktIn, &data, &lenin, APR_BLOCK_READ);
        ctx->crc = crc32(ctx->crc, (const Bytef *) data, lenin);

        /* write */

        ctx->strm.next_in = (char *) data;
        ctx->strm.avail_in = lenin;
        ctx->strm.next_out = ctx->buffer;
        ctx->strm.avail_out = GZFILTER_BUFFERSIZE;

        while (ctx->strm.avail_in != 0) {
            if (ctx->strm.avail_out == 0) {
                ctx->strm.next_out = ctx->buffer;
                len = GZFILTER_BUFFERSIZE - ctx->strm.avail_out;

                pbktOut = apr_bucket_heap_create(ctx->buffer, len, 1, &e_wrt);
                APR_BRIGADE_INSERT_TAIL(pbbOut, pbktOut);
                ctx->strm.avail_out = GZFILTER_BUFFERSIZE;
            }
            zRC = deflate(&(ctx->strm), Z_NO_FLUSH);
            if (zRC != Z_OK)
                break;
        }


    }

    return ap_pass_brigade(f->next, pbbOut);
}


static void GZFilterRegisterHooks(apr_pool_t * p)
{
//    static const char * const aszPost[] = { "CHUNK", NULL };
//    ap_hook_insert_filter(GZFilterInsertFilter, NULL, aszPost, APR_HOOK_LAST);
    ap_register_output_filter(s_szGZFilterName, GZFilterOutFilter,
                              AP_FTYPE_HTTP_HEADER);
}

static const command_rec GZFilterCmds[] = {
    AP_INIT_FLAG("GZFilter", GZFilterEnable, NULL, RSRC_CONF,
                 "Run a GZ filter on this host"),
    AP_INIT_TAKE1("GZFilterNote", GZSetNote, NULL, RSRC_CONF,
                 "Set a note to report on compression ratio"),

    AP_INIT_TAKE1("GZWindowSize", GZSetWindowSize, NULL, RSRC_CONF,
                 "Set the Zlib window size (1-15)"),
    AP_INIT_TAKE1("GZMemLevel", GZSetMemLevel, NULL, RSRC_CONF,
                 "Set the Zlib Memory Level (1-9)"),

    {NULL}
};

module AP_MODULE_DECLARE_DATA gz_module = {
    STANDARD20_MODULE_STUFF,
    NULL,
    NULL,
    GZFilterCreateServerConfig,
    NULL,
    GZFilterCmds,
    GZFilterRegisterHooks
};
# Microsoft Developer Studio Project File - Name="mod_gz" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **

# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102

CFG=mod_gz - Win32 Release
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE 
!MESSAGE NMAKE /f "mod_gz.mak".
!MESSAGE 
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE 
!MESSAGE NMAKE /f "mod_gz.mak" CFG="mod_gz - Win32 Release"
!MESSAGE 
!MESSAGE Possible choices for configuration are:
!MESSAGE 
!MESSAGE "mod_gz - Win32 Release" (based on "Win32 (x86) Dynamic-Link Library")
!MESSAGE "mod_gz - Win32 Debug" (based on "Win32 (x86) Dynamic-Link Library")
!MESSAGE 

# Begin Project
# PROP AllowPerConfigDependencies 0
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
MTL=midl.exe
RSC=rc.exe

!IF  "$(CFG)" == "mod_gz - Win32 Release"

# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MD /W3 /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /FD /c
# ADD CPP /nologo /MD /W3 /O2 /I "..\..\include" /I "..\..\os\win32" /I 
"..\..\srclib\apr\include" /I "../../srclib/apr-util/include" /D "NDEBUG" /D "WIN32" 
/D "_WINDOWS" /Fd"Release\mod_gz" /FD /c
# ADD BASE MTL /nologo /D "NDEBUG" /win32
# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32
# ADD BASE RSC /l 0x809 /d "NDEBUG"
# ADD RSC /l 0x809 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib /nologo /subsystem:windows /dll /map /machine:I386 
/out:"Release/mod_gz.so" /base:@..\..\os\win32\BaseAddr.ref,mod_gz
# ADD LINK32 kernel32.lib /nologo /subsystem:windows /dll /map /machine:I386 
/out:"Release/mod_gz.so" /base:@..\..\os\win32\BaseAddr.ref,mod_gz

!ELSEIF  "$(CFG)" == "mod_gz - Win32 Debug"

# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MDd /W3 /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /FD /c
# ADD CPP /nologo /MDd /W3 /GX /ZI /Od /I "..\..\include" /I "..\..\os\win32" /I 
"..\..\srclib\apr\include" /I "../../srclib/apr-util/include" /I "c:\src\zlib" /D 
"_DEBUG" /D "WIN32" /D "_WINDOWS" /FR /Fd"Debug\mod_gz" /FD /c
# ADD BASE MTL /nologo /D "_DEBUG" /win32
# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
# ADD BASE RSC /l 0x809 /d "_DEBUG"
# ADD RSC /l 0x809 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib /nologo /subsystem:windows /dll /incremental:no /map 
/debug /machine:I386 /out:"Debug/mod_gz.so" /base:@..\..\os\win32\BaseAddr.ref,mod_gz
# ADD LINK32 kernel32.lib /nologo /subsystem:windows /dll /incremental:no /map /debug 
/machine:I386 /out:"Debug/mod_gz.so" /base:@..\..\os\win32\BaseAddr.ref,mod_gz

!ENDIF 

# Begin Target

# Name "mod_gz - Win32 Release"
# Name "mod_gz - Win32 Debug"
# Begin Source File

SOURCE=.\mod_gz.c
# End Source File
# End Target
# End Project
APACHE_MODPATH_INIT(gz)

APACHE_MODULE(gz, GZip encoding support, "mod_gz.lo", , no, [
  AC_MSG_CHECKING(for Zlib library)
  AC_ARG_WITH(z,   [  --with-z=DIR          use a specific zlib library],
  [
      if test x"$withval" = x"yes"; then
          for dir in  /usr /usr/local/
          do
              if test -d $dir && test -f $dir/lib/libz.a; then
                  withval=$dir
                  break
              fi
          done
          if test x"$withval" = x"yes"; then
              AC_MSG_ERROR(Use --with-z=DIR to specify the location of your Zlib 
installation)
          fi
      fi
      z_lib=unknown
      for params in \
        "zlib|/include/|/lib|-lz " 
      do
          prod=`IFS="|"; set -- $params; echo $1`
          incdir=`IFS="|"; set -- $params; echo $2`
          libdir=`IFS="|"; set -- $params; echo $3`
          libs=`IFS="|"; set -- $params; echo $4`
          searchfile="${withval}${incdir}/zlib.h"
          if test -f ${searchfile} ; then
              APR_ADDTO(INCLUDES, [-I${withval}${incdir}])
              APR_ADDTO(LIBS, [-L${withval}${libdir} ${libs}])
              z_lib="${prod}"
              break
          fi
      done
      if test x"${z_lib}" = x"unknown"; then
        AC_MSG_ERROR(--with-z given but no appropriate lib found)
      else
        AC_MSG_RESULT(found $z_lib)
      fi
  ],[
      AC_MSG_ERROR(--with-z not given)
  ] ) ] )

APACHE_MODPATH_FINISH
# a modules Makefile has no explicit targets -- they will be defined by
# whatever modules are enabled. just grab special.mk to deal with this.
include $(top_srcdir)/build/special.mk

Reply via email to