Doug MacEachern <[EMAIL PROTECTED]> writes: > > my $args = $q->param; # hash ref > > you mean parms() ? the Apache::Request::parms hash ref is tied, so > there are still method calls, but less than calling params(), which does > extra stuff to emulate CGI::params. I just looked at line 21 from Request.pm; it looks like $q->param() returns the same thing as $q->parms does; but surely $q->parms is even better! > parms() is going to be renamed (to something less like params()) and > documented as faster than using the params() wrapper, in the next release. A new libapreq release? Great news! Here's YAP for libapreq - I added Dave Mitchell's memmem in multipart_buffer.c for better portability, and made some minor changes to apache_request.c to eliminate some unnecessary copying. I'd be glad to send you a url to a production server, if you'd like to see it in action. HTH, and thanks again. -- Joe Schaefer [EMAIL PROTECTED] SunStar Systems, Inc.
diff -ur libapreq-0.31/c/apache_request.c libapreq/c/apache_request.c --- libapreq-0.31/c/apache_request.c Fri Jul 2 21:00:17 1999 +++ libapreq/c/apache_request.c Sun Sep 24 22:10:18 2000 @@ -64,8 +64,20 @@ for(x=0;str[x];x++) if(str[x] == '+') str[x] = ' '; } -static int util_read(ApacheRequest *req, const char **rbuf) +static int util_read(ApacheRequest *req, char **rbuf) { + /* could make pointer array (LoL) to capture growth + * p[1] -> [...........\0] + * p[2] -> [........\0] + * p[3] -> [......................\0] + * + * would need hi-tech flushing routine (per string) + * also need a hi-tech reader. is it really worth the trouble? + * + * + * + */ + request_rec *r = req->r; int rc = OK; @@ -84,9 +96,9 @@ return HTTP_REQUEST_ENTITY_TOO_LARGE; } - *rbuf = ap_pcalloc(r->pool, length + 1); + *rbuf = ap_pcalloc(r->pool, length + 1); /* can we improve this? */ - ap_hard_timeout("[libapreq] util_read", r); + ap_hard_timeout("[libapreq] util_parse", r); while ((len_read = ap_get_client_block(r, buff, sizeof(buff))) > 0) { @@ -234,56 +246,56 @@ return req; } -static int urlword_dlm[] = {'&', ';', 0}; +/* static int urlword_dlm[] = {'&', ';', 0}; */ -static char *my_urlword(pool *p, const char **line) +char *my_urlword(ApacheRequest *req, char **line) { - int i; - - for (i = 0; urlword_dlm[i]; i++) { - int stop = urlword_dlm[i]; - char *pos = strchr(*line, stop); - char *res; - - if (!pos) { - if (!urlword_dlm[i+1]) { - int len = strlen(*line); - res = ap_pstrndup(p, *line, len); - *line += len; - return res; - } - continue; - } + char dlm[] = "&;"; + long int len; + char *word, *dlm_ptr; - res = ap_pstrndup(p, *line, pos - *line); + if (! *line) { return NULL; } - while (*pos == stop) { - ++pos; - } + dlm_ptr = strpbrk(*line, dlm); - *line = pos; + if (!dlm_ptr) { + word = *line; + *line = NULL; + return word; - return res; + } else { + *dlm_ptr = '\0'; + word = *line; + *line = dlm_ptr + 1; + return word; } - - return NULL; } -static void split_to_parms(ApacheRequest *req, const char *data) +static void split_to_parms(ApacheRequest *req, char *data) { request_rec *r = req->r; - const char *val; - - while (*data && (val = my_urlword(r->pool, &data))) { - const char *key = ap_getword(r->pool, &val, '='); + char dlm[] = "&;"; + char *word; + char *val; + + do { + word = my_urlword(req, &data); /* modifies data */ + + if (!(val = strchr( word, '='))) { + continue; /* ignores words w/o an = sign */ + } + + *val = '\0'; + ++val; - req_plustospace((char*)key); - ap_unescape_url((char*)key); req_plustospace((char*)val); ap_unescape_url((char*)val); + req_plustospace((char*)word); + ap_unescape_url((char*)word); + + ap_table_add(req->parms, word, val); - ap_table_add(req->parms, key, val); - } + } while ( data ) ; } @@ -293,7 +305,8 @@ int rc = OK; if (r->method_number == M_POST) { - const char *data, *type; + char *data = NULL; + const char *type; type = ap_table_get(r->headers_in, "Content-Type"); @@ -304,12 +317,13 @@ return rc; } if (data) { - split_to_parms(req, data); + split_to_parms(req, data); } + } if (r->args) { - split_to_parms(req, r->args); + split_to_parms(req, ap_pstrdup(r->pool,r->args)); } return OK; @@ -439,7 +453,7 @@ } if (r->args) { - split_to_parms(req, r->args); + split_to_parms(req, ap_pstrdup(r->pool,r->args)); } return OK; diff -ur libapreq-0.31/c/multipart_buffer.c libapreq/c/multipart_buffer.c --- libapreq-0.31/c/multipart_buffer.c Sat May 1 02:44:28 1999 +++ libapreq/c/multipart_buffer.c Fri Sep 22 02:14:25 2000 @@ -55,134 +55,180 @@ * */ +/* JS: 6/30/2000 + * This should fix the memory allocation issues, and handle client disconnects + * gracefully. Comments in the code should explain what I think is going on. + */ + + #include "multipart_buffer.h" -#define FILLUNIT (1024 * 5) #ifndef CRLF #define CRLF "\015\012" #endif #define CRLF_CRLF "\015\012\015\012" +#define min(A, B) ((A) < (B) ? (A) : (B)) + +#define DEBUG 0 /* 0 = off, 1 = noisy */ static char *my_join(pool *p, char *one, int lenone, char *two, int lentwo) { - char *res; - int len = lenone+lentwo; - res = (char *)ap_palloc(p, len + 1); - memcpy(res, one, lenone); + char *res; + /* p is typically self->subp so we can free this at lines 229, 345 */ + res = (char *)ap_palloc(p, lenone + lentwo + 1); + memcpy(res, one, lenone); memcpy(res+lenone, two, lentwo); + *(res+lenone+lentwo+1) = '\0'; /* be nice to character strings */ return res; } -static char * -my_ninstr(register char *big, register char *bigend, char *little, char *lend) -{ - register char *s, *x; - register int first = *little; - register char *littleend = lend; +/* Dave Mitchell's memmem */ + +void *my_memmem( const void*buf, size_t buf_len, const void *s, size_t s_len) +{ + void *p, *q; + size_t n; + unsigned int c; - if (!first && little >= littleend) { - return big; - } - if (bigend - big < littleend - little) { - return NULL; - } - bigend -= littleend - little++; - while (big <= bigend) { - if (*big++ != first) { - continue; - } - for (x=big,s=little; s < littleend; /**/ ) { - if (*s++ != *x++) { - s--; - break; - } - } - if (s >= littleend) { - return big-1; - } + if (s_len > buf_len || s_len == 0 || buf_len == 0) return NULL; + p = (void *) buf; + c = *((unsigned char *) s); + for (n = buf_len - s_len + 1; n > 0; n--, p++) { + q = memchr(p,c,n); + if (!q) return NULL; + n -= (q-p); + p = q; + if (memcmp(p,s,s_len) == 0) return p; + } + return NULL; +} + +void multipart_buffer_flush(multipart_buffer *self) +{ + /* shifts all unread data in the buffer to the beginning of the buffer */ + + char *ptr = self->buffer; + while ( ptr < self->buffer + self->buffer_len) { + *ptr = *(ptr + self->buffer_off); + ++ptr; } - return NULL; -} + + self->buffer_off = 0; +} + /* * This fills up our internal buffer in such a way that the - * boundary is never split between reads + * boundary is never split between reads. Returns number of bytes read, + * -1 on dropped connection, 0 if EOF (same as ap_get_client_block) OR + * IF bytes == 0 ! */ -void multipart_buffer_fill(multipart_buffer *self, long bytes) + +int multipart_buffer_fill(multipart_buffer *self, long bytes) { - int len_read, length, bytes_to_read; - int buffer_len = self->buffer_len; + + int len_read=0, length, bytes_to_read, total=0; if (!self->length) { return; } - length = (bytes - buffer_len) + self->boundary_length + 2; - if (self->length < length) { - length = self->length; + + bytes_to_read = min( (BUFSIZE) - self->buffer_len, + min( bytes, self->length) ); + + if ( bytes_to_read + self->buffer_off + self->buffer_len > BUFSIZE ) + { /* overflow condition - need to flush stale data */ + multipart_buffer_flush(self); } - bytes_to_read = length; + +#if DEBUG == 1 + ap_log_rerror(MPB_ERROR, + "[libapreq]: BUFFER_FILL: SIZE=%d, OFF=%d, LENGTH=%d", + bytes_to_read, self->buffer_off, + self->buffer_len); +#endif /* DEBUG */ ap_hard_timeout("multipart_buffer_fill", self->r); + + total = bytes_to_read; + while (bytes_to_read > 0) { - /*XXX: we can optimize this loop*/ - char *buff = (char *)ap_pcalloc(self->subp, - sizeof(char) * bytes_to_read + 1); - len_read = ap_get_client_block(self->r, buff, bytes_to_read); + + char *buff = self->buffer + self->buffer_len + self->buffer_off; + len_read = ap_get_client_block(self->r, + buff, + bytes_to_read); if (len_read < 0) { ap_log_rerror(MPB_ERROR, "[libapreq] client dropped connection during read"); self->length = 0; - self->buffer = NULL; self->buffer_len = 0; - return; + return len_read; + } + + if (len_read == 0) { /* premature EOF */ + break; } - self->buffer = self->buffer ? - my_join(self->r->pool, - self->buffer, self->buffer_len, - buff, len_read) : - ap_pstrndup(self->r->pool, buff, len_read); - - self->total += len_read; self->buffer_len += len_read; - self->length -= len_read; - bytes_to_read -= len_read; + bytes_to_read -= len_read; ap_reset_timeout(self->r); } + + self->total += total; + self->length -= total; + ap_kill_timeout(self->r); - ap_clear_pool(self->subp); + + return total; } char *multipart_buffer_read(multipart_buffer *self, long bytes, int *blen) { int start = -1; - char *retval, *str; - - /* default number of bytes to read */ + char *mem, *retval; + + /* default number of bytes to read - max */ if (!bytes) { - bytes = FILLUNIT; + bytes = FILLUNIT; } /* * Fill up our internal buffer in such a way that the boundary * is never split between reads. */ - multipart_buffer_fill(self, bytes); - /* Find the boundary in the buffer (it may not be there). */ - if (self->buffer) { - if ((str = my_ninstr(self->buffer, - self->buffer+self->buffer_len, - self->boundary, - self->boundary+self->boundary_length))) - { - start = str - self->buffer; + while ( self->buffer_len < self->boundary_length + 2 ) { + if ( multipart_buffer_fill(self, bytes) <= 0 ) { + self->length = 0; /* prevent further read attempts */ + return NULL; } - } + } + + +#if DEBUG == 1 + ap_log_rerror(MPB_ERROR, + "[libapreq]: BUFFER_READ: SIZE=%d, OFF=%d, LENGTH=%d\n", + self->length, self->buffer_off, + self->buffer_len, self->buffer + self->buffer_off); +#endif /* DEBUG */ + + + + if (self->buffer_len == 0) { return NULL; } + + /* Find the boundary in the buffer (it may not be there). */ + + if (mem = my_memmem( self->buffer + self->buffer_off, self->buffer_len, + self->boundary, self->boundary_length ) ) + { + start = mem - ( self->buffer + self->buffer_off ); + } /* protect against malformed multipart POST operations */ + if (!(start >= 0 || self->length > 0)) { ap_log_rerror(MPB_ERROR, "[libapreq] malformed upload: start=%d, self->length=%d", @@ -195,109 +241,166 @@ * and return NULL. The +2 here is a fiendish plot to * remove the CR/LF pair at the end of the boundary. */ + if (start == 0) { - /* clear us out completely if we've hit the last boundary. */ - if (strEQ(self->buffer, self->boundary_end)) { - self->buffer = NULL; - self->buffer_len = 0; + if ( memcmp(self->buffer + self->buffer_off, + self->boundary_end, self->boundary_length+2) == 0 ) { self->length = 0; + self->buffer_len = 0; + return NULL; + } else { + self->buffer_off += (self->boundary_length + 2); + self->buffer_len -= (self->boundary_length + 2); return NULL; } - - /* otherwise just remove the boundary. */ - self->buffer += (self->boundary_length + 2); - self->buffer_len -= (self->boundary_length + 2); - return NULL; } - if (start > 0) { /* read up to the boundary */ - *blen = start > bytes ? bytes : start; - } - else { /* read the requested number of bytes */ - /* + /* read the requested number of bytes, and * leave enough bytes in the buffer to allow us to read * the boundary. Thanks to Kevin Hendrick for finding * this one. */ - *blen = bytes - (self->boundary_length + 1); + + *blen = (start > 0) ? start : + self->buffer_len - (self->boundary_length + 1); + + *blen = min(*blen, bytes); + + retval = self->buffer + self->buffer_off; + self->buffer_len -= *blen; + + if (!self->buffer_len) { + self->buffer_off = 0; /* no data left, so we reset buffer */ + } else { + self->buffer_off += *blen; } - - retval = ap_pstrndup(self->r->pool, self->buffer, *blen); - - self->buffer += *blen; - self->buffer_len -= *blen; /* If we hit the boundary, remove the CRLF from the end. */ + if (start > 0) { *blen -= 2; retval[*blen] = '\0'; } - return retval; + return retval; /* retval is volatile, and must be copied immediately. */ } table *multipart_buffer_headers(multipart_buffer *self) { - int end=0, ok=0, bad=0; + int end=-1, length=0, bytes=FILLUNIT; table *tab; char *header; + char *entry; + + /* This loop will ultimately allocate roughly T * (N+1) / 2 + * bytes of memory - here T is the total size of input data to be read, + * and N is the number of loops required to read it all in, i.e. + * N = T / bytes. As this is a header read, there's no good reason + * why T is more than 1KB in size, and hence this loop should + * only redo itself at most once. + */ + + do { + + char *mem; + + multipart_buffer_fill(self, bytes); + + if (mem = my_memmem( self->buffer + self->buffer_off, self->buffer_len, + CRLF_CRLF, 4)) + { + /* have final header 'fragment' in the buffer */ + + end = mem - ( self->buffer + self->buffer_off ); + + header = my_join( self->subp, + header, length, + self->buffer + self->buffer_off, end+2 ); + + length += end+2; + self->buffer_off += end+4; + self->buffer_len -= end+4; + + } else if (self->buffer_len > 3) { + /* fetch entire header fragment */ + + header = my_join( self->subp, + header, length, + self->buffer + self->buffer_off, self->buffer_len-3); + + length += self->buffer_len - 3; + self->buffer_off += self->buffer_len - 3; + self->buffer_len = 3; + + } else if (self->length <= 0) { + /* bad header read, check out early */ + + ap_log_rerror( MPB_ERROR, "[libapreq]: Bad header read near EOF." ); + return NULL; - do { - char *str; - multipart_buffer_fill(self, FILLUNIT); - if ((str = strstr(self->buffer, CRLF_CRLF))) { - ++ok; - end = str - self->buffer; - } - if (self->buffer == NULL || *self->buffer == '\0') { - ++ok; - } - if (!ok && self->length <= 0) { - ++bad; } - } while (!ok && !bad); - if (bad) { - return NULL; - } + } while (end < 0); + - header = ap_pstrndup(self->r->pool, self->buffer, end+2); - /*XXX: need to merge continuation lines here?*/ +#if DEBUG == 1 + ap_log_rerror(MPB_ERROR, + "[libapreq]: DONE READING HEADER: OFF=%d, LENGTH=%d\n%s", + self->buffer_off, + self->buffer_len, self->buffer + self->buffer_off); +#endif /* DEBUG */ tab = ap_make_table(self->r->pool, 10); - self->buffer += end+4; - self->buffer_len -= end+4; - { - char *entry; - while ((entry = ap_getword_nc(self->r->pool, &header, '\r')) && *header) { - char *key; - key = ap_getword_nc(self->r->pool, &entry, ':'); - while (ap_isspace(*entry)) { - ++entry; - } - if (*header == '\n') { - ++header; - } - ap_table_add(tab, key, entry); + while ((entry = ap_getword_nc(self->subp, &header, '\r')) && *header) { + char *key; + key = ap_getword_nc(self->subp, &entry, ':'); + while (ap_isspace(*entry)) { + ++entry; } + if (*header == '\n') { + ++header; + } + ap_table_add(tab, key, entry); /* copies key & entry strings to tab */ } + ap_clear_pool(self->subp); /* OK to clean up here */ + return tab; } char *multipart_buffer_read_body(multipart_buffer *self) { + /* This function is NOT used for file uploads! + See multipart_buffer_read instead. */ + char *data, *retval=NULL; - int blen = 0, old_len = 0; + int blen = 0, old_len = 0, bytes = FILLUNIT; - while ((data = multipart_buffer_read(self, 0, &blen))) { + /* This loop will ultimately allocate roughly T * (N+1) / 2 + * bytes of memory - here T is the total size of input data to be read, + * and N is the number of loops required to read it all in, i.e. + * N = T / bytes. Since we have no way of knowing T in advance (all we + * know is that T <= self->length), we'll just hope that T is at most + * a few KB, in which case N = 1 or 2. Since this memory is cleaned up + * before we call this function again, this should be fine. If you're + * going to allow users to POST large amounts of data, either make them + * upload it through a file or use enctype=x-www-form-urlencoded in your + * form. If you're still having memory troubles here, do a client-side + * check on the size of the form's values before submitting the data to + * the server and/or increase FILLUNIT and BUFSIZE in multipart_buffer.h + * as appropriate. Don't forget about your POST_MAX settings! + */ + + while ((data = multipart_buffer_read(self, bytes, &blen))) { retval = retval ? - my_join(self->r->pool, retval, old_len, data, blen) : - ap_pstrndup(self->r->pool, data, blen); - old_len = blen; + my_join(self->subp, retval, old_len, data, blen) : + ap_pstrndup(self->subp, data, blen); + old_len += blen; } + /* can't clear subp here w/o destroying retval */ + return retval; } @@ -307,14 +410,14 @@ multipart_buffer *self = (multipart_buffer *) ap_pcalloc(r->pool, sizeof(multipart_buffer)); + self->subp = ap_make_sub_pool(r->pool); self->r = r; self->length = length; self->boundary = ap_pstrcat(r->pool, "--", boundary, NULL); self->boundary_length = strlen(self->boundary); self->boundary_end = ap_pstrcat(r->pool, self->boundary, "--", NULL); - self->buffer = NULL; - self->buffer_len = 0; - self->subp = ap_make_sub_pool(self->r->pool); + self->buffer_off = 0; + self->buffer_len = 0; /* Read the preamble and the topmost (boundary) line plus the CRLF. */ (void)multipart_buffer_read(self, 0, &blen); @@ -325,3 +428,5 @@ return self; } + + diff -ur libapreq-0.31/c/multipart_buffer.h libapreq/c/multipart_buffer.h --- libapreq-0.31/c/multipart_buffer.h Sat May 1 02:44:28 1999 +++ libapreq/c/multipart_buffer.h Sat Jul 1 00:29:29 2000 @@ -1,24 +1,30 @@ -#include "apache_request.h" +#include "apache_request.h" /* JS: 6/30/2000 - more comments and fixes */ + +#define BUFSIZE (1024 * 8) /* >> maximum possible boundary_length */ +#define FILLUNIT (1024 * 4) /* < BUFSIZE (default byte read) */ typedef struct { request_rec *r; pool *subp; long length; - long total; - long boundary_length; - char *boundary; + long total; + long boundary_length; /* ~ 40 ; see BUFSIZE above */ + char *boundary; char *boundary_end; - char *buffer; - long buffer_len; + char buffer[BUFSIZE]; + long buffer_len; /* size of new data in buff[BUFSIZE] */ + long buffer_off; /* NEW: starting point of new data in buff[BUFSIZE] */ } multipart_buffer; #define multipart_buffer_eof(self) \ -(((self->buffer == NULL) || (*self->buffer == '\0')) && (self->length <= 0)) - +(((self->buffer_len == 0 ) || (*(self->buffer + self->buffer_off) == '\0')) \ +&& (self->length <= 0)) char *multipart_buffer_read_body(multipart_buffer *self); table *multipart_buffer_headers(multipart_buffer *self); -void multipart_buffer_fill(multipart_buffer *self, long bytes); +void multipart_buffer_flush(multipart_buffer *self); /* new */ +int multipart_buffer_fill(multipart_buffer *self, long bytes); char *multipart_buffer_read(multipart_buffer *self, long bytes, int *blen); multipart_buffer *multipart_buffer_new(char *boundary, long length, request_rec *r); - #define MPB_ERROR APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, self->r + +