Re: svn commit: r721952 - in /httpd/httpd/trunk: ./ modules/ modules/cluster/
On Mon, 01 Dec 2008 02:55:15 - [EMAIL PROTECTED] wrote: Author: pquerna Date: Sun Nov 30 18:55:14 2008 New Revision: 721952 URL: http://svn.apache.org/viewvc?rev=721952view=rev Log: Add two new modules, originally written at Joost, to handle load balancing across multiple apache servers within the same datacenter. mod_heartbeat generates multicast status messages with the current number of clients connected, but the formated can easily be extended to include other things. mod_heartmonitor collects these messages into a static file, which then can be used for other modules to make load balancing decisions on. Added: httpd/httpd/trunk/modules/cluster/ (with props) httpd/httpd/trunk/modules/cluster/Makefile.in (with props) httpd/httpd/trunk/modules/cluster/README.heartbeat httpd/httpd/trunk/modules/cluster/README.heartmonitor httpd/httpd/trunk/modules/cluster/config.m4 httpd/httpd/trunk/modules/cluster/mod_heartbeat.c (with props) httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c (with props) Modified: httpd/httpd/trunk/CHANGES httpd/httpd/trunk/modules/README [cut] Added: httpd/httpd/trunk/modules/cluster/mod_heartbeat.c URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/cluster/mod_heartbeat.c?rev=721952view=auto == --- httpd/httpd/trunk/modules/cluster/mod_heartbeat.c (added) +++ httpd/httpd/trunk/modules/cluster/mod_heartbeat.c Sun Nov 30 18:55:14 2008 @@ -0,0 +1,354 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more [cut] +typedef struct hb_ctx_t +{ +int active; +apr_sockaddr_t *mcast_addr; +int server_limit; +int thread_limit; +int status; [cut] + +static void *hb_worker(apr_thread_t *thd, void *data) Don't this need to be APR_THREAD_FUNC? +{ +hb_ctx_t *ctx = (hb_ctx_t *) data; +apr_status_t rv; + +apr_pool_t *pool = apr_thread_pool_get(thd); +apr_pool_tag(pool, heartbeat_worker); +ctx-status = 0; The meaning of status zero is unclear. [cut] +static void start_hb_worker(apr_pool_t *p, hb_ctx_t *ctx) +{ +apr_status_t rv; + +rv = apr_thread_mutex_create(ctx-start_mtx, APR_THREAD_MUTEX_UNNESTED, + p); + +if (rv) { +ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, + Heartbeat: apr_thread_cond_create failed); +ctx-status = rv; +return; +} status is apr_status_t? + +apr_thread_mutex_lock(ctx-start_mtx); + +apr_pool_cleanup_register(p, ctx, hb_pool_cleanup, apr_pool_cleanup_null); + +rv = apr_thread_create(ctx-thread, NULL, hb_worker, ctx, p); +if (rv) { +apr_pool_cleanup_kill(p, ctx, hb_pool_cleanup); +ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, + Heartbeat: apr_thread_create failed); +ctx-status = rv; +} Same above. + +apr_thread_mutex_lock(ctx-start_mtx); +apr_thread_mutex_unlock(ctx-start_mtx); +apr_thread_mutex_destroy(ctx-start_mtx); +} + +static void hb_child_init(apr_pool_t *p, server_rec *s) +{ +hb_ctx_t *ctx = ap_get_module_config(s-module_config, heartbeat_module); + +apr_proc_mutex_child_init(ctx-mutex, ctx-mutex_path, p); + +ctx-status = -1; I don't like this. status -1 is unclear. + +if (ctx-active) { +start_hb_worker(p, ctx); +if (ctx-status != 0) { Same above. Why not change the type of hb_ctx_t::status to apr_status_t ? [cut] +static const char *cmd_hb_address(cmd_parms *cmd, + void *dconf, const char *addr) +{ +apr_status_t rv; +char *host_str; +char *scope_id; +apr_port_t port = 0; +apr_pool_t *p = cmd-pool; +hb_ctx_t *ctx = +(hb_ctx_t *) ap_get_module_config(cmd-server-module_config, + heartbeat_module); +const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY); + +if (err != NULL) { +return err; +} + +ctx-active = 1; + +rv = apr_parse_addr_port(host_str, scope_id, port, addr, p); cmd-temp_pool is better than cmd-pool. + +if (rv) { +return HeartbeatAddress: Unable to parse address.; +} + +if (host_str == NULL) { +return HeartbeatAddress: No host provided in address; +} + +if (port == 0) { +return HeartbeatAddress: No port provided in address; +} + +rv = apr_sockaddr_info_get(ctx-mcast_addr, host_str, APR_INET, port, 0, + p); + +if (rv) { +return HeartbeatAddress: apr_sockaddr_info_get failed.; +} + +const char *tmpdir = NULL; +rv = apr_temp_dir_get(tmpdir, p); Same above. +if (rv) { +return HeartbeatAddress: unable to find temp directory.; +} + +char *path =
Re: svn commit: r721952 - in /httpd/httpd/trunk: ./ modules/ modules/cluster/
Takashi Sato wrote: On Mon, 01 Dec 2008 02:55:15 - [EMAIL PROTECTED] wrote: Author: pquerna Date: Sun Nov 30 18:55:14 2008 New Revision: 721952 + +static void *hb_worker(apr_thread_t *thd, void *data) Don't this need to be APR_THREAD_FUNC? Fixed in r724090. +{ +hb_ctx_t *ctx = (hb_ctx_t *) data; +apr_status_t rv; + +apr_pool_t *pool = apr_thread_pool_get(thd); +apr_pool_tag(pool, heartbeat_worker); +ctx-status = 0; The meaning of status zero is unclear. Fixed all of the ctx-status things to use apr_status_t values in r724091. [cut] +static const char *cmd_hb_address(cmd_parms *cmd, + void *dconf, const char *addr) +{ +apr_status_t rv; +char *host_str; +char *scope_id; +apr_port_t port = 0; +apr_pool_t *p = cmd-pool; +hb_ctx_t *ctx = +(hb_ctx_t *) ap_get_module_config(cmd-server-module_config, + heartbeat_module); +const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY); + +if (err != NULL) { +return err; +} + +ctx-active = 1; + +rv = apr_parse_addr_port(host_str, scope_id, port, addr, p); cmd-temp_pool is better than cmd-pool. Fixed up the config code to use the temp pool in r724092. Thanks, Paul
Re: svn commit: r721952 - in /httpd/httpd/trunk: ./ modules/ modules/cluster/
Ruediger Pluem wrote: +typedef struct hb_ctx_t +{ +int active; +apr_sockaddr_t *mcast_addr; +int server_limit; +int thread_limit; +int status; +int keep_running; Shouldn't this be volatile? Changed, r723660. +if (res == SERVER_READY ps-generation == ap_my_generation) { +ready++; +} +else if (res != SERVER_DEAD + res != SERVER_STARTING res != SERVER_IDLE_KILL) { +busy++; Is this correct even if ps-generation != ap_my_generation? Nope, this would over-report 'busy' servers. Fixed in r723661. + +apr_pool_t *tpool; +apr_pool_create(tpool, pool); +apr_pool_tag(tpool, heartbeat_worker_temp); +hb_monitor(ctx, tpool); +apr_pool_destroy(tpool); Why create / destroy and not simply create once and call apr_pool_clear in the loop? As this pool is around forever, but this is only ran every second, I don't think there is much advantage to clearing it at the small risk it keeps growing. +static void start_hb_worker(apr_pool_t *p, hb_ctx_t *ctx) +{ +apr_status_t rv; + +rv = apr_thread_mutex_create(ctx-start_mtx, APR_THREAD_MUTEX_UNNESTED, + p); + +if (rv) { +ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, + Heartbeat: apr_thread_cond_create failed); You create a thread mutex above, not a thread cond. Yeah, some old code used a thread cond for this, fixed in r723663. +rv = apr_thread_create(ctx-thread, NULL, hb_worker, ctx, p); +if (rv) { +apr_pool_cleanup_kill(p, ctx, hb_pool_cleanup); +ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, + Heartbeat: apr_thread_create failed); +ctx-status = rv; +} + +apr_thread_mutex_lock(ctx-start_mtx); +apr_thread_mutex_unlock(ctx-start_mtx); This may deserve some comment. As far as I understand the desire is to wait until the hb_worker thread is up. But to be honest I do not understand the need for the start_mutex at all. Added a comment in r723665. +rv = apr_proc_mutex_create(ctx-mutex, ctx-mutex_path, +#if APR_HAS_FCNTL_SERIALIZE + APR_LOCK_FCNTL, +#else +#if APR_HAS_FLOCK_SERIALIZE + APR_LOCK_FLOCK, +#else +#error port me to a non crap platform. +#endif +#endif + p); Is there any reason why we must use either APR_LOCK_FCNTL or APR_LOCK_FLOCK, wouldn't the default mutex work? The default lock mech on OSX is sysvsem. I couldn't get it to work properly after forking at all. Maybe I was doing something wrong, but switching it to flock/fcntl works pretty much everywhere, and pretty consistently works even if a child crashes. + +if (rv) { +ap_log_error(APLOG_MARK, APLOG_CRIT, rv, s, + Heartbeat: mutex failed creation at %s (type=%s), + ctx-mutex_path, apr_proc_mutex_defname()); And how do you know that apr_proc_mutex_defname is either APR_LOCK_FCNTL or APR_LOCK_FLOCK? Maybe the default mutex on this platform is something different. Fixed with r723666. Added: httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c . +typedef struct hm_ctx_t +{ +int active; +const char *storage_path; +apr_proc_mutex_t *mutex; +const char *mutex_path; +apr_sockaddr_t *mcast_addr; +int status; +int keep_running; Shouldn't this be volatile? Fixed in r723669. +apr_time_t last = apr_time_now(); +while (ctx-keep_running) { +int n; +apr_pool_t *p; +apr_pollfd_t pfd; +apr_interval_time_t timeout; +apr_pool_create(p, ctx-p); + +apr_time_t now = apr_time_now(); + +if (apr_time_sec((now - last)) 5) { Hardcoded 5 seconds? Bah!! Moved to a compile time define in r723672. + +apr_pool_destroy(p); Why not just clearing the pool? Because I don't trust pools ? :P +rv = apr_thread_mutex_create(ctx-start_mtx, APR_THREAD_MUTEX_UNNESTED, + p); + +if (rv) { +ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, + Heartmonitor: apr_thread_cond_create failed); You create a thread mutex above, not a thread cond. Fixed in r723674. + +apr_thread_mutex_lock(ctx-start_mtx); +apr_thread_mutex_unlock(ctx-start_mtx); This may deserve some comment. As far as I understand the desire is to wait until the hb_worker thread is up. But to be honest I do not understand the need for the start_mutex at all. Commented in r723675. ... +#if APR_HAS_FCNTL_SERIALIZE + +APR_LOCK_FCNTL, +#else +#if APR_HAS_FLOCK_SERIALIZE +APR_LOCK_FLOCK, +#else +#error port me to a non crap platform. +#endif +#endif +p);
Re: svn commit: r721952 - in /httpd/httpd/trunk: ./ modules/ modules/cluster/
On Dec 5, 2008, at 4:21 AM, Paul Querna wrote: Is there any reason why we must use either APR_LOCK_FCNTL or APR_LOCK_FLOCK, wouldn't the default mutex work? The default lock mech on OSX is sysvsem. I couldn't get it to work properly after forking at all. Maybe I was doing something wrong, but switching it to flock/fcntl works pretty much everywhere, and pretty consistently works even if a child crashes. This brings up something else I'h hoping to add in: the ability to change what the default mutex is. Many times the compile-time default isn't right and there are loads of places we simply use the default. Having a way within httpd/apr which overrules the default would be quite useful... It's on my TODO
Re: svn commit: r721952 - in /httpd/httpd/trunk: ./ modules/ modules/cluster/
Jim Jagielski schrieb: On Dec 5, 2008, at 4:21 AM, Paul Querna wrote: Is there any reason why we must use either APR_LOCK_FCNTL or APR_LOCK_FLOCK, wouldn't the default mutex work? The default lock mech on OSX is sysvsem. I couldn't get it to work properly after forking at all. Maybe I was doing something wrong, but switching it to flock/fcntl works pretty much everywhere, and pretty consistently works even if a child crashes. This brings up something else I'h hoping to add in: the ability to change what the default mutex is. Many times the compile-time default isn't right and there are loads of places we simply use the default. Having a way within httpd/apr which overrules the default would be quite useful... +1
Re: svn commit: r721952 - in /httpd/httpd/trunk: ./ modules/ modules/cluster/
On 12/01/2008 03:55 AM, [EMAIL PROTECTED] wrote: Author: pquerna Date: Sun Nov 30 18:55:14 2008 New Revision: 721952 URL: http://svn.apache.org/viewvc?rev=721952view=rev Log: Add two new modules, originally written at Joost, to handle load balancing across multiple apache servers within the same datacenter. mod_heartbeat generates multicast status messages with the current number of clients connected, but the formated can easily be extended to include other things. mod_heartmonitor collects these messages into a static file, which then can be used for other modules to make load balancing decisions on. Added: httpd/httpd/trunk/modules/cluster/ (with props) httpd/httpd/trunk/modules/cluster/Makefile.in (with props) httpd/httpd/trunk/modules/cluster/README.heartbeat httpd/httpd/trunk/modules/cluster/README.heartmonitor httpd/httpd/trunk/modules/cluster/config.m4 httpd/httpd/trunk/modules/cluster/mod_heartbeat.c (with props) httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c (with props) Modified: httpd/httpd/trunk/CHANGES httpd/httpd/trunk/modules/README Modified: httpd/httpd/trunk/CHANGES URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/CHANGES?rev=721952r1=721951r2=721952view=diff == --- httpd/httpd/trunk/CHANGES [utf-8] (original) +++ httpd/httpd/trunk/CHANGES [utf-8] Sun Nov 30 18:55:14 2008 @@ -2,6 +2,12 @@ Changes with Apache 2.3.0 [ When backported to 2.2.x, remove entry from this file ] + *) mod_heartmonitor: New module to collect heartbeats, and write out a file + so that other modules can load balance traffic as needed. [Paul Querna] + + *) mod_heartbeat: New module to genarate multicast heartbeats to konw if a + server is online. [Paul Querna] + s/konw/know/ In addition to the later adjusted svn log message I would propose that you add Sanders and Justins name to this change entry as well. Added: httpd/httpd/trunk/modules/cluster/mod_heartbeat.c URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/cluster/mod_heartbeat.c?rev=721952view=auto == --- httpd/httpd/trunk/modules/cluster/mod_heartbeat.c (added) +++ httpd/httpd/trunk/modules/cluster/mod_heartbeat.c Sun Nov 30 18:55:14 2008 @@ -0,0 +1,354 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the License); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include httpd.h +#include http_config.h +#include http_log.h +#include apr_strings.h + +#include ap_mpm.h +#include scoreboard.h + +#ifndef HEARTBEAT_INTERVAL +#define HEARTBEAT_INTERVAL (1) +#endif + +module AP_MODULE_DECLARE_DATA heartbeat_module; + +typedef struct hb_ctx_t +{ +int active; +apr_sockaddr_t *mcast_addr; +int server_limit; +int thread_limit; +int status; +int keep_running; Shouldn't this be volatile? +apr_proc_mutex_t *mutex; +const char *mutex_path; +apr_thread_mutex_t *start_mtx; +apr_thread_t *thread; +apr_file_t *lockf; +} hb_ctx_t; + +static const char *msg_format = v=%uready=%ubusy=%u; + +#define MSG_VERSION (1) + +static int hb_monitor(hb_ctx_t *ctx, apr_pool_t *p) +{ +int i, j; +apr_uint32_t ready = 0; +apr_uint32_t busy = 0; + +for (i = 0; i ctx-server_limit; i++) { +process_score *ps; +ps = ap_get_scoreboard_process(i); + +for (j = 0; j ctx-thread_limit; j++) { +worker_score *ws = NULL; + +ws = ap_scoreboard_image-servers[i][j]; + +int res = ws-status; + +if (res == SERVER_READY ps-generation == ap_my_generation) { +ready++; +} +else if (res != SERVER_DEAD + res != SERVER_STARTING res != SERVER_IDLE_KILL) { +busy++; Is this correct even if ps-generation != ap_my_generation? +} +} +} + +char buf[256]; +apr_size_t len = +apr_snprintf(buf, sizeof(buf), msg_format, MSG_VERSION, ready, busy); + +apr_socket_t *sock = NULL; +do { +apr_status_t rv;