Hello,

I have done my planned speedup for _ic_fetch() for 2.0.1. It is
attached. Note that this is a patch against 2.0, NOT cvs-head. (Whether
dbmysql.patch is applied or not doesn't matter).

I have switched to a more correct way of creating patches; apply with
patch -p1, inside the dbmail directory.

The code is extensively commented (search for "/*MR" to read my
comments), and has sanity checks in place that fall back to the old way.
After some testing and some "peer review" I think it could go into
2.0.1.

Unfortunately, while it is exactly what I was planning, and there is
only one query per message now, the speedup turned out to be way smaller
than I expected. Previously it fetched 20 messages per second on my
machine; now it fetches about 30 messages per second.

I have a further idea on fixing that, but it's a "HACK HACK HACK" thing.
I'll try it and then tell you more ;)

Yours, Mikhail Ramendik

diff -ur dbmail-orig/imapcommands.c dbmail-new/imapcommands.c
--- dbmail-orig/imapcommands.c	2004-10-26 14:39:45.000000000 +0400
+++ dbmail-new/imapcommands.c	2004-10-26 16:47:05.000000000 +0400
@@ -1,6 +1,8 @@
 /*
  Copyright (C) 1999-2004 IC & S  [EMAIL PROTECTED]
 
+ Modified by Mikhail Ramendik [EMAIL PROTECTED] (MR)
+
  This program is free software; you can redistribute it and/or 
  modify it under the terms of the GNU General Public License 
  as published by the Free Software Foundation; either 
@@ -2146,6 +2148,11 @@
 	int no_parsing_at_all = 1, getrfcsize = 0, getinternaldate =
 	    0, getflags = 0;
 	char *lastchar = NULL;
+/*MR added this*/        
+        int we_have_prefetched_data;
+        u64_t index_in_msginfo, index_in_msginfo_next;
+/*MR end of addition*/        
+        
 	memset(&fetch_list, 0, sizeof(fetch_list));
 	memset(&headermsg, 0, sizeof(headermsg));
 
@@ -2198,17 +2205,30 @@
 			return 1;
 		}
 
-		if (fetchitem.msgparse_needed) {
-			no_parsing_at_all = 0;
-		} else if (no_parsing_at_all) {
-			if (fetchitem.getFlags)
-				getflags = 1;
-			if (fetchitem.getSize)
-				getrfcsize = 1;
-			if (fetchitem.getInternalDate)
-				getinternaldate = 1;
-			/* the msgUID will be retrieved anyway so if it is requested, it will be fetched */
-		}
+/*MR here the changes start.
+
+     The idea is simple. The code that gets the items that don't require parsing - 
+     flags, rfcsize, internaldate - in one quesry was there before, for the 
+     no_parsing_at_all case. Now we use it for all cases, instead of getting this data
+     with separate queries for every message. I'll call this code "prefetch". 
+
+     These messages theoretically should not change between the prefetch and the message
+     fetch. We alreasy have a set of UIDs in ud->mailbox.seq_list, and it SHOULD be the same.
+     But just to make sure, we will do a sanity check anyway, and so the system will survive
+     an inserted or appended message. (And log the fact).                
+*/      
+
+		if (fetchitem.msgparse_needed) no_parsing_at_all = 0;
+                
+                /*MR as we use the prefetch code in all cases, we need the values for it in all cases as well*/
+		if (fetchitem.getFlags)
+			getflags = 1;
+		if (fetchitem.getSize)
+			getrfcsize = 1;
+		if (fetchitem.getInternalDate)
+			getinternaldate = 1;
+		/* the msgUID will be retrieved anyway so if it is requested, it will be fetched */
+		
 
 		if (fetchitem.msgparse_needed && only_main_header_parsing) {
 			/* check to see wether all the information can be retrieved from the
@@ -2334,43 +2354,49 @@
 			fetch_end--;
 		}
 
-		if (no_parsing_at_all) {
-			trace(TRACE_DEBUG,
-			      "ic_fetch(): no parsing at all");
 
-			/* all the info we need can be retrieved by a single
-			 * call to db_get_msginfo_range()
-			 */
-			if (!imapcommands_use_uid) {
-				/* find the msgUID's to use */
-				lo = ud->mailbox.seq_list[fetch_start];
-				hi = ud->mailbox.seq_list[fetch_end];
+/*MR here is the prefetch code. It was under if (no_parsing_at_all) ; it's not under it now.*/
 
-			} else {
-				lo = fetch_start;
-				hi = fetch_end;
-			}
 
-			/* (always retrieve uid) */
-			result =
-			    db_get_msginfo_range(lo, hi, ud->mailbox.uid,
-						 getflags, getinternaldate,
-						 getrfcsize, 1, &msginfo,
-						 &nmatching);
+		/* all the info we need can be retrieved by a single
+		 * call to db_get_msginfo_range()
+		 MR: we need this info anyway so let's retrieve it*/
+		if (!imapcommands_use_uid) {
+			/* find the msgUID's to use */
+			lo = ud->mailbox.seq_list[fetch_start];
+			hi = ud->mailbox.seq_list[fetch_end];
+		} else {
+			lo = fetch_start;
+			hi = fetch_end;
+		}
 
-			if (result == -1) {
-				list_freelist(&fetch_list.start);
-				ci_write(ci->tx,
-					"* BYE internal dbase error\r\n");
-				return -1;
-			}
+		/* (always retrieve uid) */
+                /*MR IMPORTANT WARNING. My code will depend on the msginfo data being ordered by UID, 
+                  ascending. This is what db_get_msginfo_range currently does. If it's not su, stuff 
+                  won't break but will fall back to the old method*/                  
+		result =
+		    db_get_msginfo_range(lo, hi, ud->mailbox.uid,
+					 getflags, getinternaldate,
+					 getrfcsize, 1, &msginfo,
+					 &nmatching);
+		if (result == -1) {
+		list_freelist(&fetch_list.start);
+			ci_write(ci->tx,
+				"* BYE internal dbase error\r\n");
+			return -1;
+		}
 
-			if (result == -2) {
-				list_freelist(&fetch_list.start);
-				ci_write(ci->tx, "* BYE out of memory\r\n");
-				return -1;
-			}
+		if (result == -2) {
+			list_freelist(&fetch_list.start);
+			ci_write(ci->tx, "* BYE out of memory\r\n");
+			return -1;
+		}
 
+/*MR The following code is indeed for the non-parsed case*/
+                if (no_parsing_at_all) {
+			trace(TRACE_DEBUG,
+			      "ic_fetch(): no parsing at all"); 
+                
 			for (i = 0; i < nmatching; i++) {
 				if (getrfcsize && msginfo[i].rfcsize == 0) {
 					/* parse the message to calc the size */
@@ -2519,11 +2545,31 @@
 		}
 
 		/* if there is no parsing at all, this loop is not needed */
+/*MR this is where my main changes lie. We have msginfo already, with the data for
+     all these messages. But, we introduce a sanity check here; if the uid in the
+     msginfo is wrong, we get the data the old way.
+     
+     Note: we depend on two things. First, that the data in msginfo is arranged by 
+     UID ascending - as the current code in db_get_msginfo_range() does. And second,
+     if we are fetching by sequential number not UID, then for a larger sequential
+     number the UID will also be larger. This is what RFC3501 says.
+     
+     If any of these dependencies break, the code is likely to work the old slow way. 
+     But it should not break anyway.
+*/
+                index_in_msginfo_next = 0; 
+/*MR index_in_msginfo will hold the current index in msginfo
+     index_in_msginfo_next will be the value for the next loop run. We could determine it at the
+     end of the previous loop run - but I really can't be sure where the loop ends in this code. 
+     I'd rather waste some RAM bytes than risk a hidden bug*/
+     
 		for (i = fetch_start; i <= fetch_end && !no_parsing_at_all;
 		     i++) {
 			thisnum =
 			    (imapcommands_use_uid ? i : ud->mailbox.
 			     seq_list[i]);
+                        /*MR the code was present before but is very useful for my purpose. 
+                             thisnum now holds the uid, so we'll use it for the sanity check*/
 			insert_rfcsize = 0;
 
 			if (imapcommands_use_uid) {
@@ -2557,8 +2603,34 @@
 			bad_response_send = 0;
 
 			isfirstfetchout = 1;
-
-			while (curr && !bad_response_send) {
+                        
+                        /*MR index_in_msginfo becomes what it should be for this loop run*/
+                        index_in_msginfo = index_in_msginfo_next;
+
+                        /*MR here is the sanity check*/
+			if (index_in_msginfo < nmatching) {
+                            /*MR at least the index is valid, let's check the UID*/
+                            if (thisnum == msginfo[index_in_msginfo].uid) {
+                                /*MR all OK*/
+                                we_have_prefetched_data = 1;
+                            } else {
+                                /*MR sanity check failed!*/
+                                trace(TRACE_DEBUG,
+                                      "Prefetched UID not the same as real UID, retrieving data separately for this message");
+                                we_have_prefetched_data = 0;
+                            };
+                        } else {
+                            /*MR the index is invalid */
+                            trace(TRACE_DEBUG,
+                                  "There are more real messages than prefetched data, retrieving data separately for this message");
+                            we_have_prefetched_data = 0;
+                        };
+                            
+                        /*MR if the current index_in_msginfo is used, then use the next one for the next loop run, 
+                             otherwise keep the same */
+                        index_in_msginfo_next = (we_have_prefetched_data ? index_in_msginfo+1 : index_in_msginfo);
+                        
+                        while (curr && !bad_response_send) {
 				fi = (fetch_items_t *) curr->data;
 				fflush(ci->tx);
 
@@ -2566,18 +2638,23 @@
 
 				/* check RFC822.SIZE request */
 				if (fi->getSize) {
-					/* ok, try to fetch size from dbase */
-					if (db_get_rfcsize
-					    (thisnum, ud->mailbox.uid,
-					     &rfcsize) == -1) {
-						ci_write(ci->tx,
-							"\r\n* BYE internal dbase error\r\n");
-						list_freelist(&fetch_list.
-							      start);
-						return -1;
-					}
-
-					if (rfcsize == 0) {
+					/*MR if we do have prefetched data, let's use it. If not, let's query*/
+                                        
+                                        if (we_have_prefetched_data) {
+                                            rfcsize = msginfo[index_in_msginfo].rfcsize;
+                                        } else {
+                                            /* ok, try to fetch size from dbase */
+    					    if (db_get_rfcsize
+					        (thisnum, ud->mailbox.uid,
+					        &rfcsize) == -1) {
+					        	ci_write(ci->tx,
+						        	"\r\n* BYE internal dbase error\r\n");
+						        list_freelist(&fetch_list.start);
+						        return -1;
+					        };
+                                        };
+                                        
+        				if (rfcsize == 0) {
 						/* field is empty in dbase, message needs 
 						   to be parsed */
 						fi->msgparse_needed = 1;
@@ -2701,17 +2778,21 @@
 				}
 
 				if (fi->getInternalDate) {
-					result =
-					    db_get_msgdate(ud->mailbox.uid,
+					/*MR if we do have prefetched data, let's use it. If not, let's query*/
+                                        if (we_have_prefetched_data) {
+                                            strncpy(date,msginfo[index_in_msginfo].internaldate,IMAP_INTERNALDATE_LEN); 
+                                        } else {                   
+                                            result =
+					        db_get_msgdate(ud->mailbox.uid,
 							   thisnum, date);
-					if (result == -1) {
-						ci_write(ci->tx,
-							"\r\n* BYE internal dbase error\r\n");
-						list_freelist(&fetch_list.
-							      start);
-						db_free_msg(&headermsg);
-						return -1;
-					}
+					    if (result == -1) {
+						    ci_write(ci->tx,
+					        	"\r\n* BYE internal dbase error\r\n");
+						    list_freelist(&fetch_list.start);
+						    db_free_msg(&headermsg);
+						    return -1;
+					    };
+                                        };
 
 					if (isfirstfetchout)
 						isfirstfetchout = 0;
@@ -3768,19 +3849,27 @@
 
 					isfirstout = 1;
 
-					result =
-					    db_get_msgflag_all(thisnum,
+					/*MR if we do have prefetched data, let's use it. If not, let's query*/
+                                        
+                                        if (we_have_prefetched_data) {
+                                            for (j = 0; j < IMAP_NFLAGS; j++) {
+                                                msgflags[j]=msginfo[index_in_msginfo].flags[j];
+                                            };
+                                        } else {                                        
+                                            result =
+					        db_get_msgflag_all(thisnum,
 							       ud->mailbox.
 							       uid,
 							       msgflags);
-					if (result == -1) {
-						ci_write(ci->tx,
-							"\r\n* BYE internal dbase error\r\n");
-						list_freelist(&fetch_list.
+					    if (result == -1) {
+						    ci_write(ci->tx,
+						    	"\r\n* BYE internal dbase error\r\n");
+						    list_freelist(&fetch_list.
 							      start);
-						db_free_msg(&headermsg);
-						return -1;
-					}
+						    db_free_msg(&headermsg);
+						    return -1;
+					    };
+                                        };
 
 					for (j = 0; j < IMAP_NFLAGS; j++) {
 						if (msgflags[j]) {

Reply via email to