On 07/30/2009 05:37 PM, [email protected] wrote:
> Author: jfclere
> Date: Thu Jul 30 15:37:22 2009
> New Revision: 799334
>
> URL: http://svn.apache.org/viewvc?rev=799334&view=rev
> Log:
> Add the file logic for the handler.
> (Next step add the slotmem logic for the multicast socket).
>
> Modified:
> httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c
>
> Modified: httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c
> URL:
> http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c?rev=799334&r1=799333&r2=799334&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c (original)
> +++ httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c Thu Jul 30 15:37:22
> 2009
> +static apr_status_t hm_file_update_stat(hm_ctx_t *ctx, hm_server_t *s,
> apr_pool_t *pool)
> +{
> + apr_status_t rv;
> + apr_file_t *fp;
> + apr_file_t *fpin;
> + apr_hash_index_t *hi;
> + apr_time_t now;
> + unsigned int fage;
> + apr_finfo_t fi;
> + int updated = 0;
> + char *path = apr_pstrcat(pool, ctx->storage_path, ".tmp.XXXXXX", NULL);
> +
> +
> + /* TODO: Update stats file (!) */
> + rv = apr_file_mktemp(&fp, path, APR_CREATE | APR_WRITE, pool);
> +
> + if (rv) {
> + ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ctx->s,
> + "Heartmonitor: Unable to open tmp file: %s", path);
> + return rv;
> + }
> + rv = apr_file_open(&fpin, ctx->storage_path,
> APR_READ|APR_BINARY|APR_BUFFERED,
> + APR_OS_DEFAULT, pool);
> +
> + now = apr_time_now();
> + if (rv == APR_SUCCESS) {
> + char *t;
> + apr_table_t *hbt = apr_table_make(pool, 10);
> + apr_bucket_alloc_t *ba = apr_bucket_alloc_create(pool);
> + apr_bucket_brigade *bb = apr_brigade_create(pool, ba);
> + apr_bucket_brigade *tmpbb = apr_brigade_create(pool, ba);
> + rv = apr_file_info_get(&fi, APR_FINFO_SIZE | APR_FINFO_MTIME, fpin);
> + if (rv) {
> + ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ctx->s,
> + "Heartmonitor: Unable to read file: %s",
> ctx->storage_path);
> + return rv;
> + }
> +
> + /* Read the file and update the line corresponding to the node */
> + ba = apr_bucket_alloc_create(pool);
> + bb = apr_brigade_create(pool, ba);
> + apr_brigade_insert_file(bb, fpin, 0, fi.size, pool);
> + tmpbb = apr_brigade_create(pool, ba);
> + fage = (unsigned int) apr_time_sec(now - fi.mtime);
> + do {
> + char buf[4096];
> + const char *ip;
> + apr_size_t bsize = sizeof(buf);
> + apr_brigade_cleanup(tmpbb);
> + if (APR_BRIGADE_EMPTY(bb)) {
> + break;
> + }
> + rv = apr_brigade_split_line(tmpbb, bb,
> + APR_BLOCK_READ, sizeof(buf));
> +
> + if (rv) {
> + ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ctx->s,
> + "Heartmonitor: Unable to read from file: %s",
> ctx->storage_path);
> + return rv;
> + }
> +
> + apr_brigade_flatten(tmpbb, buf, &bsize);
> + if (bsize == 0) {
> + break;
> + }
> + buf[bsize - 1] = 0;
> + t = strchr(buf, ' ');
> + if (t) {
> + ip = apr_pstrndup(pool, buf, t - buf);
> + } else {
> + ip = NULL;
> + }
> + if (!ip || buf[0] == '#') {
> + /* copy things we can't process */
> + apr_file_printf(fp, "%s\n", buf);
> + } else if (strcmp(ip, s->ip) !=0 ) {
> + hm_server_t node;
> + unsigned int seen;
> + /* Update seen time according to the last file modification
> */
> + apr_table_clear(hbt);
> + argstr_to_table(pool, apr_pstrdup(pool, t), hbt);
> + if (apr_table_get(hbt, "busy")) {
> + node.busy = atoi(apr_table_get(hbt, "busy"));
> + }
> +
> + if (apr_table_get(hbt, "ready")) {
> + node.ready = atoi(apr_table_get(hbt, "ready"));
> + }
> +
> + if (apr_table_get(hbt, "lastseen")) {
> + node.seen = atoi(apr_table_get(hbt, "lastseen"));
> + }
> + seen = fage + node.seen;
> + apr_file_printf(fp, "%s &ready=%u&busy=%u&lastseen=%u\n",
> + ip, node.ready, node.busy, (unsigned int)
> seen);
The above seems to be a lot of parsing back and forth for just updating the
lastseen parameter.
Couldn't we just keep the complete line until the 'lastseen=' and just update
the number afterwards?
Or is the order of ready, busy and lastseen not defined?
> + }
> +
> + rv = apr_file_rename(path, ctx->storage_path, pool);
We need to consider that multiple handlers run in parallel in different threads
/ processes.
If we are in bad luck this can lock out node A as in the following example.
Node A and Node B sent the request at the same time and the handlers processing
these requests are
running fairly in parallel.
If the handler processing Node B does the renaming one tick later then the
handler processing
Node A then the update of Node A is lost. If this happens over and over again
Node A is believed
to be dead although it is not.
I fear you need to use global locks here to avoid this.
Regards
RĂ¼diger