In mod_include, we use apr_pcalloc to create an include_ctx_t,
which consists of about 80 bytes of integer and pointer fields
and a pair of character buffers used to hold strings.  Each
of the character buffers is 8KB...

calloc'ing all this space accounts for about 9% of the total
usr-mode CPU time in the httpd.

This patch zero-fills just the integer and pointer fields in
the structure, plus the first byte of each of the string buffers.
With the patch, I'm seeing a 19% improvement in SSI throughput
in informal testing (ab running a single request at a time,
over the loopback on Linux).

--Brian

Index: modules/filters/mod_include.h
===================================================================
RCS file: /home/cvspublic/httpd-2.0/modules/filters/mod_include.h,v
retrieving revision 1.22
diff -u -r1.22 mod_include.h
--- modules/filters/mod_include.h       2001/09/02 01:09:02     1.22
+++ modules/filters/mod_include.h       2001/11/10 04:57:38
@@ -156,10 +156,16 @@
     apr_size_t   tag_length;
 
     apr_size_t   error_length;
+    apr_bucket_brigade *ssi_tag_brigade;
+
+    /* NOTE: when this structure is allocated, in includes_filter() in
+     * mod_include.c, only fields before error_str are initialized to
+     * zero.  (error_str and time_str are huge, so it would be wasteful
+     * to zero-fill them.)  If any new fields are added to this struct,
+     * they should be added *before* error_str.
+     */
     char         error_str[MAX_STRING_LEN];
     char         time_str[MAX_STRING_LEN];
-
-    apr_bucket_brigade *ssi_tag_brigade;
 } include_ctx_t;
 
 /* These flags are used to set flag bits. */
Index: modules/filters/mod_include.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/modules/filters/mod_include.c,v
retrieving revision 1.150
diff -u -r1.150 mod_include.c
--- modules/filters/mod_include.c       2001/09/19 06:25:07     1.150
+++ modules/filters/mod_include.c       2001/11/10 04:57:39
@@ -3015,7 +3015,19 @@
     }
 
     if (!f->ctx) {
-        f->ctx = ctx = apr_pcalloc(f->c->pool, sizeof(*ctx));
+        /* Note that the include_ctx_t structure has a set of small
+         * fields followed by a couple of huge character buffers.
+         * Here we initialize the small fields to zero but just set
+         * the first byte of each of the large buffers to zero.
+         * This trick saves a lot of time, but it depends on these
+         * buffers being the last fields in the struct.  (See the
+         * accompanying warning in the struct declaration in
+         * mod_include.h)
+         */
+        f->ctx = ctx = apr_palloc(f->c->pool, sizeof(*ctx));
+        memset(f->ctx, 0, (void *)&(ctx->error_str) - (void *)ctx);
+        ctx->error_str[0] = 0;
+        ctx->time_str[0] = 0;
         if (ctx != NULL) {
             ctx->state = PRE_HEAD;
             ctx->flags = (FLAG_PRINTING | FLAG_COND_TRUE);

Reply via email to