Doug MacEachern <[EMAIL PROTECTED]> writes:

> >              my $args = $q->param; # hash ref
> 
> you mean parms() ?   the Apache::Request::parms hash ref is tied, so
> there are still method calls, but less than calling params(), which does
> extra stuff to emulate CGI::params.  

I just looked at line 21 from Request.pm; it looks like $q->param() returns
the same thing as $q->parms does; but surely $q->parms is even better!

> parms() is going to be renamed (to something less like params()) and 
> documented as faster than using the params() wrapper, in the next release.

A new libapreq release? Great news! Here's YAP for libapreq - I added Dave Mitchell's
memmem in multipart_buffer.c for better portability, and made some minor changes
to apache_request.c to eliminate some unnecessary copying. I'd be glad to send
you a url to a production server, if you'd like to see it in action.

HTH, and thanks again.
-- 
Joe Schaefer
[EMAIL PROTECTED]

SunStar Systems, Inc.

diff -ur libapreq-0.31/c/apache_request.c libapreq/c/apache_request.c
--- libapreq-0.31/c/apache_request.c	Fri Jul  2 21:00:17 1999
+++ libapreq/c/apache_request.c	Sun Sep 24 22:10:18 2000
@@ -64,8 +64,20 @@
     for(x=0;str[x];x++) if(str[x] == '+') str[x] = ' ';
 }
 
-static int util_read(ApacheRequest *req, const char **rbuf)
+static int util_read(ApacheRequest *req, char **rbuf)
 {
+    /* could make pointer array (LoL) to capture growth
+     * p[1] -> [...........\0]
+     * p[2] -> [........\0]
+     * p[3] -> [......................\0]
+     *
+     * would need hi-tech flushing routine (per string)
+     * also need a hi-tech reader. is it really worth the trouble?
+     *
+     *
+     *
+     */
+
     request_rec *r = req->r;
     int rc = OK;
 
@@ -84,9 +96,9 @@
 	    return HTTP_REQUEST_ENTITY_TOO_LARGE;
 	}
 
-	*rbuf = ap_pcalloc(r->pool, length + 1); 
+	*rbuf = ap_pcalloc(r->pool, length + 1); /* can we improve this? */
 
-	ap_hard_timeout("[libapreq] util_read", r);
+	ap_hard_timeout("[libapreq] util_parse", r);
 
 	while ((len_read =
 		ap_get_client_block(r, buff, sizeof(buff))) > 0) {
@@ -234,56 +246,56 @@
     return req;
 }
 
-static int urlword_dlm[] = {'&', ';', 0};
+/* static int urlword_dlm[] = {'&', ';', 0}; */
 
-static char *my_urlword(pool *p, const char **line)
+char *my_urlword(ApacheRequest *req, char **line)
 {
-    int i;
-
-    for (i = 0; urlword_dlm[i]; i++) {
-	int stop = urlword_dlm[i];
-	char *pos = strchr(*line, stop);
-	char *res;
-
-	if (!pos) {
-	    if (!urlword_dlm[i+1]) {
-		int len = strlen(*line);
-		res = ap_pstrndup(p, *line, len);
-		*line += len;
-		return res;
-	    }
-	    continue;
-	}
+    char dlm[] = "&;"; 
+    long int len;
+    char *word, *dlm_ptr;
 
-	res = ap_pstrndup(p, *line, pos - *line);
+    if (! *line) { return NULL; }
 
-	while (*pos == stop) {
-	    ++pos;
-	}
+    dlm_ptr = strpbrk(*line, dlm);
 
-	*line = pos;
+    if (!dlm_ptr) {
+	word = *line;
+	*line = NULL;
+	return word;
 
-	return res;
+    } else {
+	*dlm_ptr = '\0';
+	word = *line;
+	*line = dlm_ptr + 1; 
+	return word;
     }
-
-    return NULL;
 }
 
-static void split_to_parms(ApacheRequest *req, const char *data)
+static void split_to_parms(ApacheRequest *req, char *data)
 {
     request_rec *r = req->r; 
-    const char *val;
-
-    while (*data && (val = my_urlword(r->pool, &data))) {
-	const char *key = ap_getword(r->pool, &val, '=');
+    char dlm[] = "&;";
+    char *word;
+    char *val;
+
+    do { 
+	word = my_urlword(req, &data); /* modifies data */
+
+	if (!(val = strchr( word, '='))) {
+	    continue; /* ignores words w/o an = sign */
+	}	
+	    
+	*val = '\0';	
+	++val;
 
-	req_plustospace((char*)key);
-	ap_unescape_url((char*)key);
 	req_plustospace((char*)val);
 	ap_unescape_url((char*)val);
+	req_plustospace((char*)word);
+	ap_unescape_url((char*)word);
+	
+	ap_table_add(req->parms, word, val);
 
-	ap_table_add(req->parms, key, val);
-    }
+    } while ( data ) ;
 
 }
 
@@ -293,7 +305,8 @@
     int rc = OK;
 
     if (r->method_number == M_POST) { 
-	const char *data, *type;
+	char *data = NULL; 
+	const char *type;
 
 	type = ap_table_get(r->headers_in, "Content-Type");
 
@@ -304,12 +317,13 @@
 	    return rc;
 	}
 	if (data) {
-	    split_to_parms(req, data);
+            split_to_parms(req, data);
 	}
+
     }
 
     if (r->args) {
-	split_to_parms(req, r->args);
+	split_to_parms(req, ap_pstrdup(r->pool,r->args));
     }        
 
     return OK;
@@ -439,7 +453,7 @@
     }
 
     if (r->args) {
-	split_to_parms(req, r->args);
+	split_to_parms(req, ap_pstrdup(r->pool,r->args));
     }        
 
     return OK;
diff -ur libapreq-0.31/c/multipart_buffer.c libapreq/c/multipart_buffer.c
--- libapreq-0.31/c/multipart_buffer.c	Sat May  1 02:44:28 1999
+++ libapreq/c/multipart_buffer.c	Fri Sep 22 02:14:25 2000
@@ -55,134 +55,180 @@
  *
  */
 
+/* JS: 6/30/2000
+ * This should fix the memory allocation issues, and handle client disconnects
+ * gracefully. Comments in the code should explain what I think is going on. 
+ */
+
+
 #include "multipart_buffer.h"
 
-#define FILLUNIT (1024 * 5)
 #ifndef CRLF
 #define CRLF "\015\012"
 #endif
 #define CRLF_CRLF "\015\012\015\012"
+#define min(A, B) ((A) < (B) ? (A) : (B))
+
+#define DEBUG 0 /* 0 = off, 1 = noisy */
 
 static char *my_join(pool *p, char *one, int lenone, char *two, int lentwo)
 {
-    char *res; 
-    int len = lenone+lentwo;
-    res = (char *)ap_palloc(p, len + 1); 
-    memcpy(res, one, lenone);  
+    char *res;
+    /* p is typically self->subp so we can free this at lines 229, 345 */
+    res = (char *)ap_palloc(p, lenone + lentwo  + 1);
+    memcpy(res, one, lenone);
     memcpy(res+lenone, two, lentwo);
+    *(res+lenone+lentwo+1) = '\0'; /* be nice to character strings */
     return res;
 }
 
-static char * 
-my_ninstr(register char *big, register char *bigend, char *little, char *lend) 
-{ 
-    register char *s, *x; 
-    register int first = *little; 
-    register char *littleend = lend; 
+/* Dave Mitchell's memmem */
+
+void *my_memmem( const void*buf, size_t buf_len, const void *s, size_t s_len)
+{
+ 	void *p, *q;
+ 	size_t n;
+ 	unsigned int c;
  
-    if (!first && little >= littleend) {
-        return big; 
-    }
-    if (bigend - big < littleend - little) {
-        return NULL; 
-    }
-    bigend -= littleend - little++; 
-    while (big <= bigend) { 
-	if (*big++ != first) {
-	    continue; 
-	}
-	for (x=big,s=little; s < littleend; /**/ ) { 
-	    if (*s++ != *x++) { 
-		s--; 
-		break; 
-	    }
-	}
-	if (s >= littleend) {
-	    return big-1; 
-	}
+ 	if (s_len > buf_len || s_len == 0 || buf_len == 0) return NULL;
+ 	p = (void *) buf;
+ 	c = *((unsigned char *) s);
+ 	for (n = buf_len - s_len + 1; n > 0; n--, p++) {
+ 		q = memchr(p,c,n);
+ 		if (!q) return NULL;
+ 		n -= (q-p);
+ 		p = q;
+ 		if (memcmp(p,s,s_len) == 0) return p;
+ 	}
+ 	return NULL;
+}
+
+void multipart_buffer_flush(multipart_buffer *self)
+{
+    /* shifts all unread data in the buffer to the beginning of the buffer */
+
+    char *ptr = self->buffer;
+    while ( ptr < self->buffer + self->buffer_len) {
+	*ptr = *(ptr + self->buffer_off);
+	++ptr;
     }
-    return NULL; 
-} 
+
+    self->buffer_off = 0;
+}
+
 
 /*
  * This fills up our internal buffer in such a way that the
- * boundary is never split between reads
+ * boundary is never split between reads. Returns number of bytes read,
+ * -1 on dropped connection, 0 if EOF (same as ap_get_client_block) OR
+ * IF bytes == 0 !
  */
-void multipart_buffer_fill(multipart_buffer *self, long bytes)
+
+int multipart_buffer_fill(multipart_buffer *self, long bytes)
 {
-    int len_read, length, bytes_to_read;
-    int buffer_len = self->buffer_len;
+
+    int len_read=0, length, bytes_to_read, total=0;
 
     if (!self->length) {
 	return;
     }
-    length = (bytes - buffer_len) + self->boundary_length + 2;
-    if (self->length < length) {
-	length = self->length;
+
+    bytes_to_read = min( (BUFSIZE) - self->buffer_len,
+			 min( bytes, self->length) );
+
+    if ( bytes_to_read + self->buffer_off + self->buffer_len > BUFSIZE ) 
+    { /* overflow condition - need to flush stale data */
+	multipart_buffer_flush(self);
     }
-    bytes_to_read = length;
+
+#if DEBUG == 1
+    ap_log_rerror(MPB_ERROR,
+ 		  "[libapreq]: BUFFER_FILL: SIZE=%d, OFF=%d, LENGTH=%d", 
+ 		  bytes_to_read, self->buffer_off,
+  		  self->buffer_len);
+#endif /* DEBUG */
 
     ap_hard_timeout("multipart_buffer_fill", self->r);
+
+    total = bytes_to_read;
+
     while (bytes_to_read > 0) {
-	/*XXX: we can optimize this loop*/ 
-	char *buff = (char *)ap_pcalloc(self->subp,
-					sizeof(char) * bytes_to_read + 1);
-	len_read = ap_get_client_block(self->r, buff, bytes_to_read);
+
+	char *buff = self->buffer + self->buffer_len + self->buffer_off;
+	len_read = ap_get_client_block(self->r, 
+					  buff, 
+					  bytes_to_read);
 
 	if (len_read < 0) {
 	    ap_log_rerror(MPB_ERROR,
 			  "[libapreq] client dropped connection during read");
 	    self->length = 0;
-	    self->buffer = NULL;
 	    self->buffer_len = 0;
-	    return;
+	    return len_read;
+	}
+	
+	if (len_read == 0) { /* premature EOF */
+	    break;
 	}
 
-	self->buffer = self->buffer ? 
-	    my_join(self->r->pool, 
-		    self->buffer, self->buffer_len, 
-		    buff, len_read) :
-	    ap_pstrndup(self->r->pool, buff, len_read);
-
-	self->total      += len_read;
 	self->buffer_len += len_read;
-	self->length     -= len_read;
-	bytes_to_read    -= len_read;
+	bytes_to_read -= len_read;
 
 	ap_reset_timeout(self->r);
     }
+
+    self->total += total;
+    self->length -= total;
+
     ap_kill_timeout(self->r);
-    ap_clear_pool(self->subp);
+   
+    return total;
 }
 
 char *multipart_buffer_read(multipart_buffer *self, long bytes, int *blen)
 {
     int start = -1;
-    char *retval, *str;
-
-    /* default number of bytes to read */
+    char *mem, *retval;
+    
+    /* default number of bytes to read - max */
     if (!bytes) {
-	bytes = FILLUNIT;       
+	bytes = FILLUNIT;     
     }
 
     /*
      * Fill up our internal buffer in such a way that the boundary
      * is never split between reads.
      */
-    multipart_buffer_fill(self, bytes);
 
-    /* Find the boundary in the buffer (it may not be there). */
-    if (self->buffer) {
-	if ((str = my_ninstr(self->buffer, 
-			    self->buffer+self->buffer_len, 
-			    self->boundary, 
-			    self->boundary+self->boundary_length))) 
-	{
-	    start = str - self->buffer;
+    while ( self->buffer_len < self->boundary_length + 2 ) {
+	if ( multipart_buffer_fill(self, bytes) <= 0 ) {
+	    self->length = 0; /* prevent further read attempts */
+	    return NULL;
 	}
-    }
+    }	
+
+
+#if DEBUG == 1    
+    ap_log_rerror(MPB_ERROR,
+ 		  "[libapreq]: BUFFER_READ: SIZE=%d, OFF=%d, LENGTH=%d\n", 
+ 		  self->length, self->buffer_off,
+  		  self->buffer_len, self->buffer + self->buffer_off);
+#endif /* DEBUG */
+
+
+
+    if (self->buffer_len == 0) { return NULL; }
+
+    /* Find the boundary in the buffer (it may not be there). */
+
+    if (mem = my_memmem( self->buffer + self->buffer_off, self->buffer_len,
+		      self->boundary, self->boundary_length ) )
+    {
+ 	 start = mem - ( self->buffer + self->buffer_off );
+    } 
 
     /* protect against malformed multipart POST operations */
+
     if (!(start >= 0 || self->length > 0)) {
 	ap_log_rerror(MPB_ERROR, 
 		      "[libapreq] malformed upload: start=%d, self->length=%d", 
@@ -195,109 +241,166 @@
      * and return NULL.  The +2 here is a fiendish plot to
      * remove the CR/LF pair at the end of the boundary.
      */
+
     if (start == 0) {
-        /* clear us out completely if we've hit the last boundary. */
-	if (strEQ(self->buffer, self->boundary_end)) {
-	    self->buffer = NULL;
-	    self->buffer_len = 0;
+	if ( memcmp(self->buffer + self->buffer_off,
+		    self->boundary_end, self->boundary_length+2) == 0 ) {
 	    self->length = 0;
+	    self->buffer_len = 0;
+	    return NULL;
+	} else {
+	    self->buffer_off += (self->boundary_length + 2);
+	    self->buffer_len -= (self->boundary_length + 2);
 	    return NULL;
 	}
-
-        /* otherwise just remove the boundary. */
-	self->buffer += (self->boundary_length + 2);
-	self->buffer_len -= (self->boundary_length + 2);
-	return NULL;
     }
 
-    if (start > 0) {           /* read up to the boundary */
-	*blen = start > bytes ? bytes : start;
-    } 
-    else {    /* read the requested number of bytes */
-	/*
+        /* read the requested number of bytes, and
 	 * leave enough bytes in the buffer to allow us to read
 	 * the boundary.  Thanks to Kevin Hendrick for finding
 	 * this one.
 	 */
-	*blen = bytes - (self->boundary_length + 1);
+
+    *blen = (start > 0) ? start :
+	self->buffer_len - (self->boundary_length + 1); 
+
+    *blen = min(*blen, bytes);
+
+    retval = self->buffer + self->buffer_off;
+    self->buffer_len -= *blen; 
+
+    if (!self->buffer_len) { 
+	self->buffer_off = 0; /* no data left, so we reset buffer */
+    } else { 
+	self->buffer_off += *blen; 
     }
-    
-    retval = ap_pstrndup(self->r->pool, self->buffer, *blen); 
-    
-    self->buffer += *blen;
-    self->buffer_len -= *blen;
 
     /* If we hit the boundary, remove the CRLF from the end. */
+
     if (start > 0) {
 	*blen -= 2;
 	retval[*blen] = '\0';
     }
 
-    return retval;
+    return retval; /* retval is volatile, and must be copied immediately. */ 
 }
 
 table *multipart_buffer_headers(multipart_buffer *self)
 {
-    int end=0, ok=0, bad=0;
+    int end=-1, length=0, bytes=FILLUNIT;
     table *tab;
     char *header;
+    char *entry;
+
+    /* This loop will ultimately allocate roughly T * (N+1) / 2  
+     * bytes of memory - here T is the total size of input data to be read, 
+     * and N is the number of loops required to read it all in, i.e.
+     * N = T / bytes. As this is a header read, there's no good reason
+     * why T is more than 1KB in size, and hence this loop should
+     * only redo itself at most once.
+     */
+
+    do {	
+
+	char *mem;
+
+	multipart_buffer_fill(self, bytes);
+
+	if (mem = my_memmem( self->buffer + self->buffer_off, self->buffer_len, 
+			  CRLF_CRLF, 4)) 
+	{
+	    /* have final header 'fragment' in the buffer */
+
+	    end = mem - ( self->buffer + self->buffer_off );
+
+	    header = my_join( self->subp, 
+			      header, length,
+			      self->buffer + self->buffer_off, end+2 );
+
+	    length += end+2;
+	    self->buffer_off += end+4;
+	    self->buffer_len -= end+4;
+
+	} else if (self->buffer_len > 3) {
+	    /* fetch entire header fragment */
+
+	    header = my_join( self->subp, 
+			      header, length,
+			      self->buffer + self->buffer_off, self->buffer_len-3);
+
+	    length += self->buffer_len - 3;
+	    self->buffer_off += self->buffer_len - 3; 
+	    self->buffer_len = 3;
+
+	} else if (self->length <= 0) {
+	    /* bad header read, check out early */
+
+	    ap_log_rerror( MPB_ERROR, "[libapreq]: Bad header read near EOF." );
+	    return NULL;
 
-    do {
-	char *str;
-	multipart_buffer_fill(self, FILLUNIT);
-	if ((str = strstr(self->buffer, CRLF_CRLF))) {
-	    ++ok;
-	    end = str - self->buffer;
-	}
-	if (self->buffer == NULL || *self->buffer == '\0') {
-	    ++ok;
-	}
-	if (!ok && self->length <= 0) {
-	    ++bad;
 	}
-    } while (!ok && !bad);
 
-    if (bad) {
-	return NULL;
-    }
+    } while (end < 0);
+
 
-    header = ap_pstrndup(self->r->pool, self->buffer, end+2);
-    /*XXX: need to merge continuation lines here?*/
+#if DEBUG == 1 
+   ap_log_rerror(MPB_ERROR,
+ 		  "[libapreq]: DONE READING HEADER: OFF=%d, LENGTH=%d\n%s", 
+		 self->buffer_off,
+		 self->buffer_len, self->buffer + self->buffer_off);
+#endif /* DEBUG */
 
     tab = ap_make_table(self->r->pool, 10);
-    self->buffer += end+4;
-    self->buffer_len -= end+4;
 
-    {
-	char *entry;
-	while ((entry = ap_getword_nc(self->r->pool, &header, '\r')) && *header) {
-	    char *key;
-	    key = ap_getword_nc(self->r->pool, &entry, ':');
-	    while (ap_isspace(*entry)) {
-		++entry;
-	    }
-	    if (*header == '\n') {
-		++header;
-	    }
-	    ap_table_add(tab, key, entry);
+    while ((entry = ap_getword_nc(self->subp, &header, '\r')) && *header) {
+	char *key;
+	key = ap_getword_nc(self->subp, &entry, ':');
+	while (ap_isspace(*entry)) {
+	    ++entry;
 	}
+	if (*header == '\n') {
+	    ++header;
+	}
+	ap_table_add(tab, key, entry); /* copies key & entry strings to tab */
     }
 
+    ap_clear_pool(self->subp); /* OK to clean up here */
+
     return tab;
 }
 
 char *multipart_buffer_read_body(multipart_buffer *self) 
 {
+    /* This function is NOT used for file uploads! 
+       See multipart_buffer_read instead. */
+
     char *data, *retval=NULL;
-    int blen = 0, old_len = 0;
+    int blen = 0, old_len = 0, bytes = FILLUNIT;
 
-    while ((data = multipart_buffer_read(self, 0, &blen))) {
+    /* This loop will ultimately allocate roughly T * (N+1) / 2  
+     * bytes of memory - here T is the total size of input data to be read, 
+     * and N is the number of loops required to read it all in, i.e.
+     * N = T / bytes. Since we have no way of knowing T in advance (all we
+     * know is that T <= self->length), we'll just hope that T is at most
+     * a few KB, in which case N = 1 or 2. Since this memory is cleaned up
+     * before we call this function again, this should be fine.  If you're
+     * going to allow users to POST large amounts of data, either make them
+     * upload it through a file or use enctype=x-www-form-urlencoded in your 
+     * form.  If you're still having memory troubles here, do a client-side 
+     * check on the size of the form's values before submitting the data to
+     * the server and/or increase FILLUNIT and BUFSIZE in multipart_buffer.h
+     * as appropriate. Don't forget about your POST_MAX settings!
+     */
+
+    while ((data = multipart_buffer_read(self, bytes, &blen))) {
 	retval = retval ?
-	    my_join(self->r->pool, retval, old_len, data, blen) :
-	    ap_pstrndup(self->r->pool, data, blen);
-	old_len = blen;
+	    my_join(self->subp, retval, old_len, data, blen) :
+	    ap_pstrndup(self->subp, data, blen);
+	old_len += blen;
     }
 
+    /* can't clear subp here w/o destroying retval */	    
+
     return retval;
 }
 
@@ -307,14 +410,14 @@
     multipart_buffer *self = (multipart_buffer *)
 	ap_pcalloc(r->pool, sizeof(multipart_buffer));
 
+    self->subp = ap_make_sub_pool(r->pool);
     self->r = r;
     self->length = length;
     self->boundary = ap_pstrcat(r->pool, "--", boundary, NULL);
     self->boundary_length = strlen(self->boundary);
     self->boundary_end = ap_pstrcat(r->pool, self->boundary, "--", NULL);
-    self->buffer = NULL;
-    self->buffer_len = 0;
-    self->subp = ap_make_sub_pool(self->r->pool);
+    self->buffer_off = 0; 
+    self->buffer_len = 0; 
 
     /* Read the preamble and the topmost (boundary) line plus the CRLF. */
     (void)multipart_buffer_read(self, 0, &blen);
@@ -325,3 +428,5 @@
 
     return self;
 }
+
+
diff -ur libapreq-0.31/c/multipart_buffer.h libapreq/c/multipart_buffer.h
--- libapreq-0.31/c/multipart_buffer.h	Sat May  1 02:44:28 1999
+++ libapreq/c/multipart_buffer.h	Sat Jul  1 00:29:29 2000
@@ -1,24 +1,30 @@
-#include "apache_request.h"
+#include "apache_request.h"	/* JS: 6/30/2000 - more comments and fixes */
+
+#define BUFSIZE (1024 * 8)  	/* >> maximum possible boundary_length */ 
+#define FILLUNIT (1024 * 4) 	/* < BUFSIZE  (default byte read) */
 
 typedef struct {
     request_rec *r;
     pool *subp;
     long length;
-    long total;
-    long boundary_length;
-    char *boundary;
+    long total;	
+    long boundary_length;   	/* ~ 40 ; see BUFSIZE above */
+    char *boundary;	
     char *boundary_end;
-    char *buffer;
-    long buffer_len;
+    char buffer[BUFSIZE];
+    long buffer_len; 		/* size of new data in buff[BUFSIZE] */
+    long buffer_off; 		/* NEW: starting point of new data in buff[BUFSIZE] */
 } multipart_buffer;
 
 #define multipart_buffer_eof(self) \
-(((self->buffer == NULL) || (*self->buffer == '\0')) && (self->length <= 0))
-
+(((self->buffer_len == 0 ) || (*(self->buffer + self->buffer_off) == '\0')) \
+&& (self->length <= 0))
 char *multipart_buffer_read_body(multipart_buffer *self); 
 table *multipart_buffer_headers(multipart_buffer *self);
-void multipart_buffer_fill(multipart_buffer *self, long bytes);
+void multipart_buffer_flush(multipart_buffer *self); /* new */
+int multipart_buffer_fill(multipart_buffer *self, long bytes);
 char *multipart_buffer_read(multipart_buffer *self, long bytes, int *blen);
 multipart_buffer *multipart_buffer_new(char *boundary, long length, request_rec *r);
-
 #define MPB_ERROR APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, self->r
+
+



Reply via email to