Hello, I have done my planned speedup for _ic_fetch() for 2.0.1. It is attached. Note that this is a patch against 2.0, NOT cvs-head. (Whether dbmysql.patch is applied or not doesn't matter).
I have switched to a more correct way of creating patches; apply with patch -p1, inside the dbmail directory. The code is extensively commented (search for "/*MR" to read my comments), and has sanity checks in place that fall back to the old way. After some testing and some "peer review" I think it could go into 2.0.1. Unfortunately, while it is exactly what I was planning, and there is only one query per message now, the speedup turned out to be way smaller than I expected. Previously it fetched 20 messages per second on my machine; now it fetches about 30 messages per second. I have a further idea on fixing that, but it's a "HACK HACK HACK" thing. I'll try it and then tell you more ;) Yours, Mikhail Ramendik
diff -ur dbmail-orig/imapcommands.c dbmail-new/imapcommands.c --- dbmail-orig/imapcommands.c 2004-10-26 14:39:45.000000000 +0400 +++ dbmail-new/imapcommands.c 2004-10-26 16:47:05.000000000 +0400 @@ -1,6 +1,8 @@ /* Copyright (C) 1999-2004 IC & S [EMAIL PROTECTED] + Modified by Mikhail Ramendik [EMAIL PROTECTED] (MR) + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either @@ -2146,6 +2148,11 @@ int no_parsing_at_all = 1, getrfcsize = 0, getinternaldate = 0, getflags = 0; char *lastchar = NULL; +/*MR added this*/ + int we_have_prefetched_data; + u64_t index_in_msginfo, index_in_msginfo_next; +/*MR end of addition*/ + memset(&fetch_list, 0, sizeof(fetch_list)); memset(&headermsg, 0, sizeof(headermsg)); @@ -2198,17 +2205,30 @@ return 1; } - if (fetchitem.msgparse_needed) { - no_parsing_at_all = 0; - } else if (no_parsing_at_all) { - if (fetchitem.getFlags) - getflags = 1; - if (fetchitem.getSize) - getrfcsize = 1; - if (fetchitem.getInternalDate) - getinternaldate = 1; - /* the msgUID will be retrieved anyway so if it is requested, it will be fetched */ - } +/*MR here the changes start. + + The idea is simple. The code that gets the items that don't require parsing - + flags, rfcsize, internaldate - in one quesry was there before, for the + no_parsing_at_all case. Now we use it for all cases, instead of getting this data + with separate queries for every message. I'll call this code "prefetch". + + These messages theoretically should not change between the prefetch and the message + fetch. We alreasy have a set of UIDs in ud->mailbox.seq_list, and it SHOULD be the same. + But just to make sure, we will do a sanity check anyway, and so the system will survive + an inserted or appended message. (And log the fact). +*/ + + if (fetchitem.msgparse_needed) no_parsing_at_all = 0; + + /*MR as we use the prefetch code in all cases, we need the values for it in all cases as well*/ + if (fetchitem.getFlags) + getflags = 1; + if (fetchitem.getSize) + getrfcsize = 1; + if (fetchitem.getInternalDate) + getinternaldate = 1; + /* the msgUID will be retrieved anyway so if it is requested, it will be fetched */ + if (fetchitem.msgparse_needed && only_main_header_parsing) { /* check to see wether all the information can be retrieved from the @@ -2334,43 +2354,49 @@ fetch_end--; } - if (no_parsing_at_all) { - trace(TRACE_DEBUG, - "ic_fetch(): no parsing at all"); - /* all the info we need can be retrieved by a single - * call to db_get_msginfo_range() - */ - if (!imapcommands_use_uid) { - /* find the msgUID's to use */ - lo = ud->mailbox.seq_list[fetch_start]; - hi = ud->mailbox.seq_list[fetch_end]; +/*MR here is the prefetch code. It was under if (no_parsing_at_all) ; it's not under it now.*/ - } else { - lo = fetch_start; - hi = fetch_end; - } - /* (always retrieve uid) */ - result = - db_get_msginfo_range(lo, hi, ud->mailbox.uid, - getflags, getinternaldate, - getrfcsize, 1, &msginfo, - &nmatching); + /* all the info we need can be retrieved by a single + * call to db_get_msginfo_range() + MR: we need this info anyway so let's retrieve it*/ + if (!imapcommands_use_uid) { + /* find the msgUID's to use */ + lo = ud->mailbox.seq_list[fetch_start]; + hi = ud->mailbox.seq_list[fetch_end]; + } else { + lo = fetch_start; + hi = fetch_end; + } - if (result == -1) { - list_freelist(&fetch_list.start); - ci_write(ci->tx, - "* BYE internal dbase error\r\n"); - return -1; - } + /* (always retrieve uid) */ + /*MR IMPORTANT WARNING. My code will depend on the msginfo data being ordered by UID, + ascending. This is what db_get_msginfo_range currently does. If it's not su, stuff + won't break but will fall back to the old method*/ + result = + db_get_msginfo_range(lo, hi, ud->mailbox.uid, + getflags, getinternaldate, + getrfcsize, 1, &msginfo, + &nmatching); + if (result == -1) { + list_freelist(&fetch_list.start); + ci_write(ci->tx, + "* BYE internal dbase error\r\n"); + return -1; + } - if (result == -2) { - list_freelist(&fetch_list.start); - ci_write(ci->tx, "* BYE out of memory\r\n"); - return -1; - } + if (result == -2) { + list_freelist(&fetch_list.start); + ci_write(ci->tx, "* BYE out of memory\r\n"); + return -1; + } +/*MR The following code is indeed for the non-parsed case*/ + if (no_parsing_at_all) { + trace(TRACE_DEBUG, + "ic_fetch(): no parsing at all"); + for (i = 0; i < nmatching; i++) { if (getrfcsize && msginfo[i].rfcsize == 0) { /* parse the message to calc the size */ @@ -2519,11 +2545,31 @@ } /* if there is no parsing at all, this loop is not needed */ +/*MR this is where my main changes lie. We have msginfo already, with the data for + all these messages. But, we introduce a sanity check here; if the uid in the + msginfo is wrong, we get the data the old way. + + Note: we depend on two things. First, that the data in msginfo is arranged by + UID ascending - as the current code in db_get_msginfo_range() does. And second, + if we are fetching by sequential number not UID, then for a larger sequential + number the UID will also be larger. This is what RFC3501 says. + + If any of these dependencies break, the code is likely to work the old slow way. + But it should not break anyway. +*/ + index_in_msginfo_next = 0; +/*MR index_in_msginfo will hold the current index in msginfo + index_in_msginfo_next will be the value for the next loop run. We could determine it at the + end of the previous loop run - but I really can't be sure where the loop ends in this code. + I'd rather waste some RAM bytes than risk a hidden bug*/ + for (i = fetch_start; i <= fetch_end && !no_parsing_at_all; i++) { thisnum = (imapcommands_use_uid ? i : ud->mailbox. seq_list[i]); + /*MR the code was present before but is very useful for my purpose. + thisnum now holds the uid, so we'll use it for the sanity check*/ insert_rfcsize = 0; if (imapcommands_use_uid) { @@ -2557,8 +2603,34 @@ bad_response_send = 0; isfirstfetchout = 1; - - while (curr && !bad_response_send) { + + /*MR index_in_msginfo becomes what it should be for this loop run*/ + index_in_msginfo = index_in_msginfo_next; + + /*MR here is the sanity check*/ + if (index_in_msginfo < nmatching) { + /*MR at least the index is valid, let's check the UID*/ + if (thisnum == msginfo[index_in_msginfo].uid) { + /*MR all OK*/ + we_have_prefetched_data = 1; + } else { + /*MR sanity check failed!*/ + trace(TRACE_DEBUG, + "Prefetched UID not the same as real UID, retrieving data separately for this message"); + we_have_prefetched_data = 0; + }; + } else { + /*MR the index is invalid */ + trace(TRACE_DEBUG, + "There are more real messages than prefetched data, retrieving data separately for this message"); + we_have_prefetched_data = 0; + }; + + /*MR if the current index_in_msginfo is used, then use the next one for the next loop run, + otherwise keep the same */ + index_in_msginfo_next = (we_have_prefetched_data ? index_in_msginfo+1 : index_in_msginfo); + + while (curr && !bad_response_send) { fi = (fetch_items_t *) curr->data; fflush(ci->tx); @@ -2566,18 +2638,23 @@ /* check RFC822.SIZE request */ if (fi->getSize) { - /* ok, try to fetch size from dbase */ - if (db_get_rfcsize - (thisnum, ud->mailbox.uid, - &rfcsize) == -1) { - ci_write(ci->tx, - "\r\n* BYE internal dbase error\r\n"); - list_freelist(&fetch_list. - start); - return -1; - } - - if (rfcsize == 0) { + /*MR if we do have prefetched data, let's use it. If not, let's query*/ + + if (we_have_prefetched_data) { + rfcsize = msginfo[index_in_msginfo].rfcsize; + } else { + /* ok, try to fetch size from dbase */ + if (db_get_rfcsize + (thisnum, ud->mailbox.uid, + &rfcsize) == -1) { + ci_write(ci->tx, + "\r\n* BYE internal dbase error\r\n"); + list_freelist(&fetch_list.start); + return -1; + }; + }; + + if (rfcsize == 0) { /* field is empty in dbase, message needs to be parsed */ fi->msgparse_needed = 1; @@ -2701,17 +2778,21 @@ } if (fi->getInternalDate) { - result = - db_get_msgdate(ud->mailbox.uid, + /*MR if we do have prefetched data, let's use it. If not, let's query*/ + if (we_have_prefetched_data) { + strncpy(date,msginfo[index_in_msginfo].internaldate,IMAP_INTERNALDATE_LEN); + } else { + result = + db_get_msgdate(ud->mailbox.uid, thisnum, date); - if (result == -1) { - ci_write(ci->tx, - "\r\n* BYE internal dbase error\r\n"); - list_freelist(&fetch_list. - start); - db_free_msg(&headermsg); - return -1; - } + if (result == -1) { + ci_write(ci->tx, + "\r\n* BYE internal dbase error\r\n"); + list_freelist(&fetch_list.start); + db_free_msg(&headermsg); + return -1; + }; + }; if (isfirstfetchout) isfirstfetchout = 0; @@ -3768,19 +3849,27 @@ isfirstout = 1; - result = - db_get_msgflag_all(thisnum, + /*MR if we do have prefetched data, let's use it. If not, let's query*/ + + if (we_have_prefetched_data) { + for (j = 0; j < IMAP_NFLAGS; j++) { + msgflags[j]=msginfo[index_in_msginfo].flags[j]; + }; + } else { + result = + db_get_msgflag_all(thisnum, ud->mailbox. uid, msgflags); - if (result == -1) { - ci_write(ci->tx, - "\r\n* BYE internal dbase error\r\n"); - list_freelist(&fetch_list. + if (result == -1) { + ci_write(ci->tx, + "\r\n* BYE internal dbase error\r\n"); + list_freelist(&fetch_list. start); - db_free_msg(&headermsg); - return -1; - } + db_free_msg(&headermsg); + return -1; + }; + }; for (j = 0; j < IMAP_NFLAGS; j++) { if (msgflags[j]) {