In mod_include, we use apr_pcalloc to create an include_ctx_t,
which consists of about 80 bytes of integer and pointer fields
and a pair of character buffers used to hold strings. Each
of the character buffers is 8KB...
calloc'ing all this space accounts for about 9% of the total
usr-mode CPU time in the httpd.
This patch zero-fills just the integer and pointer fields in
the structure, plus the first byte of each of the string buffers.
With the patch, I'm seeing a 19% improvement in SSI throughput
in informal testing (ab running a single request at a time,
over the loopback on Linux).
--Brian
Index: modules/filters/mod_include.h
===================================================================
RCS file: /home/cvspublic/httpd-2.0/modules/filters/mod_include.h,v
retrieving revision 1.22
diff -u -r1.22 mod_include.h
--- modules/filters/mod_include.h 2001/09/02 01:09:02 1.22
+++ modules/filters/mod_include.h 2001/11/10 04:57:38
@@ -156,10 +156,16 @@
apr_size_t tag_length;
apr_size_t error_length;
+ apr_bucket_brigade *ssi_tag_brigade;
+
+ /* NOTE: when this structure is allocated, in includes_filter() in
+ * mod_include.c, only fields before error_str are initialized to
+ * zero. (error_str and time_str are huge, so it would be wasteful
+ * to zero-fill them.) If any new fields are added to this struct,
+ * they should be added *before* error_str.
+ */
char error_str[MAX_STRING_LEN];
char time_str[MAX_STRING_LEN];
-
- apr_bucket_brigade *ssi_tag_brigade;
} include_ctx_t;
/* These flags are used to set flag bits. */
Index: modules/filters/mod_include.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/modules/filters/mod_include.c,v
retrieving revision 1.150
diff -u -r1.150 mod_include.c
--- modules/filters/mod_include.c 2001/09/19 06:25:07 1.150
+++ modules/filters/mod_include.c 2001/11/10 04:57:39
@@ -3015,7 +3015,19 @@
}
if (!f->ctx) {
- f->ctx = ctx = apr_pcalloc(f->c->pool, sizeof(*ctx));
+ /* Note that the include_ctx_t structure has a set of small
+ * fields followed by a couple of huge character buffers.
+ * Here we initialize the small fields to zero but just set
+ * the first byte of each of the large buffers to zero.
+ * This trick saves a lot of time, but it depends on these
+ * buffers being the last fields in the struct. (See the
+ * accompanying warning in the struct declaration in
+ * mod_include.h)
+ */
+ f->ctx = ctx = apr_palloc(f->c->pool, sizeof(*ctx));
+ memset(f->ctx, 0, (void *)&(ctx->error_str) - (void *)ctx);
+ ctx->error_str[0] = 0;
+ ctx->time_str[0] = 0;
if (ctx != NULL) {
ctx->state = PRE_HEAD;
ctx->flags = (FLAG_PRINTING | FLAG_COND_TRUE);