On 07/30/2009 05:37 PM, [email protected] wrote:
> Author: jfclere
> Date: Thu Jul 30 15:37:22 2009
> New Revision: 799334
> 
> URL: http://svn.apache.org/viewvc?rev=799334&view=rev
> Log:
> Add the file logic for the handler.
> (Next step add the slotmem logic for the multicast socket).
> 
> Modified:
>     httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c
> 
> Modified: httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c
> URL: 
> http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c?rev=799334&r1=799333&r2=799334&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c (original)
> +++ httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c Thu Jul 30 15:37:22 
> 2009

> +static apr_status_t hm_file_update_stat(hm_ctx_t *ctx, hm_server_t *s, 
> apr_pool_t *pool)
> +{
> +    apr_status_t rv;
> +    apr_file_t *fp;
> +    apr_file_t *fpin;
> +    apr_hash_index_t *hi;
> +    apr_time_t now;
> +    unsigned int fage;
> +    apr_finfo_t fi;
> +    int updated = 0;
> +    char *path = apr_pstrcat(pool, ctx->storage_path, ".tmp.XXXXXX", NULL);
> +
> +
> +    /* TODO: Update stats file (!) */
> +    rv = apr_file_mktemp(&fp, path, APR_CREATE | APR_WRITE, pool);
> +
> +    if (rv) {
> +        ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ctx->s,
> +                     "Heartmonitor: Unable to open tmp file: %s", path);
> +        return rv;
> +    }
> +    rv = apr_file_open(&fpin, ctx->storage_path, 
> APR_READ|APR_BINARY|APR_BUFFERED,
> +                       APR_OS_DEFAULT, pool);
> +
> +    now = apr_time_now();
> +    if (rv == APR_SUCCESS) {
> +        char *t;
> +        apr_table_t *hbt = apr_table_make(pool, 10);
> +        apr_bucket_alloc_t *ba = apr_bucket_alloc_create(pool);
> +        apr_bucket_brigade *bb = apr_brigade_create(pool, ba);
> +        apr_bucket_brigade *tmpbb = apr_brigade_create(pool, ba);
> +        rv = apr_file_info_get(&fi, APR_FINFO_SIZE | APR_FINFO_MTIME, fpin);
> +        if (rv) {
> +            ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ctx->s,
> +                         "Heartmonitor: Unable to read file: %s", 
> ctx->storage_path);
> +            return rv;
> +        }
> +
> +        /* Read the file and update the line corresponding to the node */
> +        ba = apr_bucket_alloc_create(pool);
> +        bb = apr_brigade_create(pool, ba);
> +        apr_brigade_insert_file(bb, fpin, 0, fi.size, pool);
> +        tmpbb = apr_brigade_create(pool, ba);
> +        fage = (unsigned int) apr_time_sec(now - fi.mtime);
> +        do {
> +            char buf[4096];
> +            const char *ip;
> +            apr_size_t bsize = sizeof(buf);
> +            apr_brigade_cleanup(tmpbb);
> +            if (APR_BRIGADE_EMPTY(bb)) {
> +                break;
> +            } 
> +            rv = apr_brigade_split_line(tmpbb, bb,
> +                                        APR_BLOCK_READ, sizeof(buf));
> +       
> +            if (rv) {
> +                ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ctx->s,
> +                             "Heartmonitor: Unable to read from file: %s", 
> ctx->storage_path);
> +                return rv;
> +            }
> +
> +            apr_brigade_flatten(tmpbb, buf, &bsize);
> +            if (bsize == 0) {
> +                break;
> +            }
> +            buf[bsize - 1] = 0;
> +            t = strchr(buf, ' ');
> +            if (t) {
> +                ip = apr_pstrndup(pool, buf, t - buf);
> +            } else {
> +                ip = NULL;
> +            }
> +            if (!ip || buf[0] == '#') {
> +                /* copy things we can't process */
> +                apr_file_printf(fp, "%s\n", buf);
> +            } else if (strcmp(ip, s->ip) !=0 ) {
> +                hm_server_t node; 
> +                unsigned int seen;
> +                /* Update seen time according to the last file modification 
> */
> +                apr_table_clear(hbt);
> +                argstr_to_table(pool, apr_pstrdup(pool, t), hbt);
> +                if (apr_table_get(hbt, "busy")) {
> +                    node.busy = atoi(apr_table_get(hbt, "busy"));
> +                }
> +
> +                if (apr_table_get(hbt, "ready")) {
> +                    node.ready = atoi(apr_table_get(hbt, "ready"));
> +                }
> +
> +                if (apr_table_get(hbt, "lastseen")) {
> +                    node.seen = atoi(apr_table_get(hbt, "lastseen"));
> +                } 
> +                seen = fage + node.seen;
> +                apr_file_printf(fp, "%s &ready=%u&busy=%u&lastseen=%u\n",
> +                                ip, node.ready, node.busy, (unsigned int) 
> seen);

The above seems to be a lot of parsing back and forth for just updating the 
lastseen parameter.
Couldn't we just keep the complete line until the 'lastseen=' and just update 
the number afterwards?
Or is the order of ready, busy and lastseen not defined?


> +    }
> +
> +    rv = apr_file_rename(path, ctx->storage_path, pool);

We need to consider that multiple handlers run in parallel in different threads 
/ processes.
If we are in bad luck this can lock out node A as in the following example.

Node A and Node B sent the request at the same time and the handlers processing 
these requests are
running fairly in parallel.
If the handler processing Node B does the renaming one tick later then the 
handler processing
Node A then the update of Node A is lost. If this happens over and over again 
Node A is believed
to be dead although it is not.
I fear you need to use global locks here to avoid this.

Regards

RĂ¼diger

Reply via email to