dgaudet 97/08/04 23:02:47
Modified: htdocs/manual vhosts-in-depth.html src CHANGES http_conf_globals.h http_config.c http_main.c http_protocol.c httpd.h Log: Hashed ip-vhosts, including some semantic changes to vhosts in general which should improve the vhost situation overall. Revision Changes Path 1.13 +4 -0 apache/htdocs/manual/vhosts-in-depth.html Index: vhosts-in-depth.html =================================================================== RCS file: /export/home/cvs/apache/htdocs/manual/vhosts-in-depth.html,v retrieving revision 1.12 retrieving revision 1.13 diff -u -r1.12 -r1.13 --- vhosts-in-depth.html 1997/07/06 17:18:57 1.12 +++ vhosts-in-depth.html 1997/08/05 06:02:38 1.13 @@ -186,6 +186,10 @@ <h3>Vhost Matching</h3> + +<p><strong>Apache 1.3 differs from what is documented +here, and documentation still has to be written.</strong> + <p> The server determines which vhost to use for a request as follows: 1.382 +14 -0 apache/src/CHANGES Index: CHANGES =================================================================== RCS file: /export/home/cvs/apache/src/CHANGES,v retrieving revision 1.381 retrieving revision 1.382 diff -u -r1.381 -r1.382 --- CHANGES 1997/08/02 22:43:50 1.381 +++ CHANGES 1997/08/05 06:02:39 1.382 @@ -1,5 +1,19 @@ Changes with Apache 1.3a2 + *) ip-based vhosts are stored and queried using a hashing function, which + has been shown to improve performance on servers with many ip-vhosts. + Some other changes had to be made to accomodate this: + - the * address for vhosts now behaves like _default_ + - the matching process now is: + - match an ip-vhost directly via hash (possibly matches main + server) + - if that fails, just pretend it matched the main server + - if so far only the main server has been matched, perform + name-based lookups (ServerName, ServerAlias, ServerPath) + *only on name-based vhosts* + - if they fail, look for _default_ vhosts + [Dean Gaudet, Dave Hankins <[EMAIL PROTECTED]>] + *) dbmmanage overhaul: - merge dbmmanage and dbmmanage.new functionality, remove dbmmanage.new - tie() to AnyDBM_File which will use one of DB_File, NDBM_File or 1.17 +2 -1 apache/src/http_conf_globals.h Index: http_conf_globals.h =================================================================== RCS file: /export/home/cvs/apache/src/http_conf_globals.h,v retrieving revision 1.16 retrieving revision 1.17 diff -u -r1.16 -r1.17 --- http_conf_globals.h 1997/08/03 20:29:18 1.16 +++ http_conf_globals.h 1997/08/05 06:02:40 1.17 @@ -87,8 +87,9 @@ extern char server_root[MAX_STRING_LEN]; extern char server_confname[MAX_STRING_LEN]; +extern server_rec_chain *vhash_table[VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP]; + /* We want this to have the least chance of being correupted if there * is some memory corruption, so we allocate it statically. */ extern char coredump_dir[MAX_STRING_LEN]; - 1.70 +5 -0 apache/src/http_config.c Index: http_config.c =================================================================== RCS file: /export/home/cvs/apache/src/http_config.c,v retrieving revision 1.69 retrieving revision 1.70 diff -u -r1.69 -r1.70 --- http_config.c 1997/08/03 20:29:18 1.69 +++ http_config.c 1997/08/05 06:02:40 1.70 @@ -1154,6 +1154,11 @@ bind_address.s_addr = htonl(INADDR_ANY); listeners = NULL; listenbacklog = DEFAULT_LISTENBACKLOG; + + /* Global virtual host hash bucket pointers. Init to null. */ + memset (vhash_table, 0, + (VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP) * sizeof (vhash_table[0])); + strncpy(coredump_dir, server_root, sizeof(coredump_dir)-1); coredump_dir[sizeof(coredump_dir)-1] = '\0'; } 1.197 +160 -39 apache/src/http_main.c Index: http_main.c =================================================================== RCS file: /export/home/cvs/apache/src/http_main.c,v retrieving revision 1.196 retrieving revision 1.197 diff -u -r1.196 -r1.197 --- http_main.c 1997/08/04 09:21:16 1.196 +++ http_main.c 1997/08/05 06:02:41 1.197 @@ -191,6 +191,23 @@ listen_rec *listeners; static listen_rec *head_listener; +/* A (n) bucket hash table, each entry has a pointer to a server rec and + * a pointer to the other entries in that bucket. Each individual address, + * even for virtualhosts with multiple addresses, has an entry in this hash + * table. There are extra buckets for _default_, and name-vhost entries. + * + * The main_server's addresses appear in the main part of this table. + * They're differentiated from real vhosts by server->is_virtual == 0. + * + * The VHASH_DEFAULT_BUCKET is a list of all the _default_ server_addr_recs. + * + * The VHASH_MAIN_BUCKET is a list of one server_addr_rec from each name + * based vhost. At the moment none of the name based vhost code is hashed, + * and it's just more convenient to have a list of all the name-based vhosts + * rather than a list of all the names of name-based vhosts. + */ +server_rec_chain *vhash_table[VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP]; + char server_root[MAX_STRING_LEN]; char server_confname[MAX_STRING_LEN]; char coredump_dir[MAX_STRING_LEN]; @@ -1694,50 +1711,128 @@ return (suexec_enabled); } +/* This hashing function is designed to get good distribution in the cases + * where the server is handling entire "networks" of servers. i.e. a + * whack of /24s. This is probably the most common configuration for + * ISPs with large virtual servers. + * + * Hash function provided by David Hankins. + */ +static inline unsigned hash_inaddr( unsigned key ) +{ + key ^= (key >> 16); + return ((key >> 8) ^ key) % VHASH_TABLE_SIZE; +} static server_rec *find_virtual_server (struct in_addr server_ip, unsigned port, server_rec *server) { - server_rec *virt; server_addr_rec *sar; - server_rec *def; + server_rec_chain *trav; + unsigned buk; - def = server; - for (virt = server->next; virt; virt = virt->next) { - for (sar = virt->addrs; sar; sar = sar->next) { - if ((virt->is_virtual == 1) && /* VirtualHost */ - (sar->host_addr.s_addr == htonl(INADDR_ANY) || - sar->host_addr.s_addr == server_ip.s_addr) && - (sar->host_port == 0 || sar->host_port == port)) { - return virt; - } else if ( sar->host_addr.s_addr == DEFAULT_VHOST_ADDR - && (sar->host_port == 0 || sar->host_port == port)) { - /* this is so that you can build a server that is the - "default" for any interface which isn't explicitly - specified. So that you can implement "deny anything - which isn't expressly permitted" -djg */ - def = virt; + /* scan the hash table for an exact match first */ + buk = hash_inaddr( server_ip.s_addr ); + for (trav = vhash_table[buk]; trav; trav = trav->next) { + sar = trav->sar; + if ((sar->host_addr.s_addr == server_ip.s_addr) + && (sar->host_port == 0 || sar->host_port == port)) { + if (trav->server->is_virtual) { + return trav->server; } + /* otherwise it's the "main server address", and we need + * to do _default_ handling + */ + break; } } - return def; + /* return the main server for now, might switch to a _default_ later */ + return server_conf; } -void default_server_hostnames(server_rec *s) + +static void add_to_vhash_bucket (unsigned buk, server_rec *s, + server_addr_rec *sar) +{ + server_rec_chain *hashme; + + hashme = palloc (pconf, sizeof (*hashme)); + hashme->server = s; + hashme->sar = sar; + hashme->next = vhash_table[buk]; + vhash_table[buk] = hashme; +} + + +/* hash table statistics, keep this in here for the beta period so + * we can find out if the hash function is ok + */ +#define VHASH_STATISTICS +#ifdef VHASH_STATISTICS +static int vhash_compare (const void *a, const void *b) +{ + return (*(const int *)b - *(const int *)a); +} + +static void dump_vhash_statistics (void) +{ + unsigned count[VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP]; + int i; + server_rec_chain *src; + unsigned total; + char buf[HUGE_STRING_LEN]; + char *p; + + total = 0; + for (i = 0; i < VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP; ++i) { + count[i] = 0; + for (src = vhash_table[i]; src; src = src->next) { + ++count[i]; + if (i < VHASH_TABLE_SIZE) { + /* don't count the slop buckets in the total */ + ++total; + } + } + } + qsort (count, VHASH_TABLE_SIZE, sizeof (count[0]), vhash_compare); + p = buf + ap_snprintf (buf, sizeof (buf), + "vhash: total hashed = %u, avg chain = %u, #default = %u, " + "#name-vhost = %u, chain lengths (count x len):", + total, total / VHASH_TABLE_SIZE, count [VHASH_DEFAULT_BUCKET], + count [VHASH_MAIN_BUCKET]); + total = 1; + for (i = 1; i < VHASH_TABLE_SIZE; ++i) { + if (count[i-1] != count[i]) { + p += ap_snprintf (p, sizeof (buf) - (p - buf), " %ux%u", + total, count[i-1]); + total = 1; + } else { + ++total; + } + } + p += ap_snprintf (p, sizeof (buf) - (p - buf), " %ux%u", + total, count[VHASH_TABLE_SIZE-1]); + log_error(buf, server_conf); +} +#endif + + +void default_server_hostnames(server_rec *main_s) { struct hostent *h; - struct in_addr *main_addr; - int num_addr; char *def_hostname; int n; server_addr_rec *sar; + server_addr_rec *main_sar; int has_default_vhost_addr; - unsigned mainport = s->port; int from_local=0; + server_rec *s; + int is_namevhost; /* Main host first */ - + s = main_s; + if (!s->server_hostname) { s->server_hostname = get_local_host(pconf); from_local = 1; @@ -1756,31 +1851,53 @@ }; exit(1); } - /* we need to use gethostbyaddr below... and since it shares a static - area with gethostbyname it'd clobber the value we just got. So - we need to make a copy. -djg */ - for (num_addr = 0; h->h_addr_list[num_addr] != NULL; num_addr++) { - /* nop */ - } - main_addr = palloc( pconf, sizeof( *main_addr ) * num_addr ); - for (n = 0; n < num_addr; n++) { - main_addr[n] = *(struct in_addr *)h->h_addr_list[n]; + + /* we fill in s->addrs for two reasons. One so that we have + * server_addr_recs for the hash table. And also because gethostbyname + * and gethostbyaddr share a static data area and our result would be + * clobbered here if we didn't copy it somewhere. -djg + */ + for (n = 0; h->h_addr_list[n] != NULL; n++) { + main_sar = pcalloc (pconf, sizeof (*main_sar)); + main_sar->host_addr = *(struct in_addr *)h->h_addr_list[n]; + main_sar->host_port = 0; /* we want this to match all ports */ + main_sar->virthost = s->server_hostname; + main_sar->next = s->addrs; + s->addrs = main_sar; + add_to_vhash_bucket (hash_inaddr (main_sar->host_addr.s_addr), + s, main_sar); } /* Then virtual hosts */ - + for (s = s->next; s; s = s->next) { /* Check to see if we might be a HTTP/1.1 virtual host - same IP */ has_default_vhost_addr = 0; - for (n = 0; n < num_addr; n++) { - for(sar = s->addrs; sar; sar = sar->next) { - if (sar->host_addr.s_addr == main_addr[n].s_addr && - s->port == mainport) + for(sar = s->addrs; sar; sar = sar->next) { + is_namevhost = 0; /* guess addr doesn't match main server */ + for (main_sar = main_s->addrs; main_sar; main_sar=main_sar->next) { + if (sar->host_addr.s_addr == main_sar->host_addr.s_addr + && s->port == main_s->port) { + add_to_vhash_bucket (VHASH_MAIN_BUCKET, s, sar); + /* XXX: only add it to the main bucket once since we're + * not optimizing name-vhosts yet */ s->is_virtual = 2; - if( sar->host_addr.s_addr == DEFAULT_VHOST_ADDR ) { - has_default_vhost_addr = 1; + is_namevhost = 1; + break; } } + if (sar->host_addr.s_addr == DEFAULT_VHOST_ADDR + || sar->host_addr.s_addr == INADDR_ANY) { + /* XXX: this probably isn't the best handling of INADDR_ANY */ + /* add it to default bucket for each appropriate sar + * since we need to do a port test + */ + has_default_vhost_addr = 1; + add_to_vhash_bucket (VHASH_DEFAULT_BUCKET, s, sar); + } else if (!is_namevhost) { + add_to_vhash_bucket (hash_inaddr (sar->host_addr.s_addr), + s, sar); + } } /* FIXME: some of this decision doesn't make a lot of sense in @@ -1819,6 +1936,10 @@ } } } + +#ifdef VHASH_STATISTICS + dump_vhash_statistics (); +#endif } conn_rec *new_connection (pool *p, server_rec *server, BUFF *inout, 1.150 +43 -13 apache/src/http_protocol.c Index: http_protocol.c =================================================================== RCS file: /export/home/cvs/apache/src/http_protocol.c,v retrieving revision 1.149 retrieving revision 1.150 diff -u -r1.149 -r1.150 --- http_protocol.c 1997/08/04 02:55:11 1.149 +++ http_protocol.c 1997/08/05 06:02:42 1.150 @@ -68,6 +68,7 @@ */ #include "util_date.h" /* For parseHTTPdate and BAD_DATE */ #include <stdarg.h> +#include "http_conf_globals.h" #define SET_BYTES_SENT(r) \ do { if (r->sent_bodyct) \ @@ -692,6 +693,7 @@ unsigned port = (*hostname) ? atoi(hostname) : 80; server_rec *s; int l; + server_rec_chain *src; if (port && (port != r->server->port)) return; @@ -703,15 +705,17 @@ r->hostname = host; - for (s = r->server->next; s; s = s->next) { + for (src = vhash_table[VHASH_MAIN_BUCKET]; src; src = src->next) { const char *names; server_addr_rec *sar; - if (s->addrs == NULL) { - /* this server has been disabled because of DNS screwups during - configuration */ - continue; - } + s = src->server; + + /* s->addrs != NULL because it's in a hash bucket */ + + /* Note that default_server_hostnames has ensured that each name-vhost + * appears only once in the VHASH_MAIN_BUCKET. + */ if ((!strcasecmp(host, s->server_hostname)) && (port == s->port)) { r->server = r->connection->server = s; @@ -754,13 +758,15 @@ void check_serverpath (request_rec *r) { server_rec *s; + server_rec_chain *src; /* This is in conjunction with the ServerPath code in * http_core, so we get the right host attached to a non- * Host-sending request. */ - for (s = r->server->next; s; s = s->next) { + for (src = vhash_table[VHASH_MAIN_BUCKET]; src; src = src->next) { + s = src->server; if (s->addrs && s->path && !strncmp(r->uri, s->path, s->pathlen) && (s->path[s->pathlen - 1] == '/' || r->uri[s->pathlen] == '/' || @@ -769,6 +775,24 @@ } } + +static void check_default_server (request_rec *r) +{ + server_addr_rec *sar; + server_rec_chain *trav; + unsigned port; + + port = ntohs (r->connection->local_addr.sin_port); + for (trav = vhash_table[VHASH_DEFAULT_BUCKET]; trav; trav = trav->next) { + sar = trav->sar; + if (sar->host_port == 0 || sar->host_port == port) { + /* match! */ + r->server = r->connection->server = trav->server; + return; + } + } +} + request_rec *read_request (conn_rec *conn) { request_rec *r = (request_rec *)pcalloc (conn->pool, sizeof(request_rec)); @@ -815,12 +839,18 @@ r->status = HTTP_OK; /* Until further notice. */ - /* handle Host header here, to get virtual server */ - - if (r->hostname || (r->hostname = table_get(r->headers_in, "Host"))) - check_hostalias(r); - else - check_serverpath(r); + /* if it's the main server so far, we have to do name-vhost style lookups */ + if (r->server->is_virtual == 0) { + if (r->hostname || (r->hostname = table_get(r->headers_in, "Host"))) + check_hostalias(r); + else + check_serverpath(r); + /* if that failed, then look for a default server */ + if (r->server->is_virtual == 0) { + check_default_server (r); + } + } + /* we have finished the search for a vhost */ /* we may have switched to another server */ r->per_dir_config = r->server->lookup_defaults; 1.137 +23 -1 apache/src/httpd.h Index: httpd.h =================================================================== RCS file: /export/home/cvs/apache/src/httpd.h,v retrieving revision 1.136 retrieving revision 1.137 diff -u -r1.136 -r1.137 --- httpd.h 1997/08/03 20:30:57 1.136 +++ httpd.h 1997/08/05 06:02:43 1.137 @@ -293,6 +293,19 @@ #define SCOREBOARD_MAINTENANCE_INTERVAL 1000000 #endif +/* This defines the size of the hash table used for hashing ip addresses + * of virtual hosts. It must be a power of two. + */ +#ifndef VHASH_TABLE_SIZE +#define VHASH_TABLE_SIZE 256 +#endif +/* bucket where _default_ entries are stored */ +#define VHASH_DEFAULT_BUCKET (VHASH_TABLE_SIZE) +/* bucket where name-vhosts are stored */ +#define VHASH_MAIN_BUCKET ((VHASH_TABLE_SIZE)+1) +/* number of magic buckets */ +#define VHASH_EXTRA_SLOP 2 + /* Number of requests to try to handle in a single process. If <= 0, * the children don't die off. That's the default here, since I'm still * interested in finding and stanching leaks. @@ -498,7 +511,6 @@ const struct htaccess_result *next; }; - typedef struct conn_rec conn_rec; typedef struct server_rec server_rec; typedef struct request_rec request_rec; @@ -677,6 +689,16 @@ char *virthost; /* The name given in <VirtualHost> */ }; +/* Meta linear list for hashes. Each server_rec can be in possibly multiple + * hash chains since it can have multiple ips + */ +typedef struct server_rec_chain server_rec_chain; +struct server_rec_chain { + server_rec_chain *next; + server_rec *server; + server_addr_rec *sar; /* the record causing it to be in + * this chain */ +}; struct server_rec {