Tõnu Samuel wrote:
>
> I found that there is no mention in Changelog about serious speed
> improvement in last mnogosearch while it exists in code. Today I
> investigated again MySQL logs made by mnogosearch I tried to eliminate
> ¨INSERT INTO url ...¨ commands from there. I implemented it using global
> buffer to keep url-s to be inserted and flushing it when needed.
>
> patch attached which worked for me nice 10 minutes :) , so bugs can be
> inside. I am anyway not going to test it very carefully so you can
> experiment with it and if found usable, use it.
>
Sorry, new patch included ;>
--
MySQL Development Team
__ ___ ___ ____ __
/ |/ /_ __/ __/ __ \/ / Tonu Samuel <[EMAIL PROTECTED]>
/ /|_/ / // /\ \/ /_/ / /__ MySQL AB, http://www.mysql.com/
/_/ /_/\_, /___/\___\_\___/ Tallinn, Estonia
<___/
diff -urN mnogosearch-3.1.11/include/udm_db.h mnogosearch-3.1.11-tonu/include/udm_db.h
--- mnogosearch-3.1.11/include/udm_db.h Tue Feb 20 13:12:36 2001
+++ mnogosearch-3.1.11-tonu/include/udm_db.h Fri Feb 23 16:48:30 2001
@@ -19,6 +19,8 @@
#define UDM_DB_SAPDB 11
#define UDM_DB_FILES 100
+#define UDM_MAXSQLCOMMANDLENGTH (64*1024) /* This means we TRY to keep command
+lengths below this value */
+
/* FIXME: hide this function into SQL.c */
extern char * UdmDBEscStr(int dbtype,char * dst,const char *src);
@@ -27,6 +29,7 @@
extern int UdmLoadServerTable(UDM_AGENT * Indexer, char * path,int flags);
extern int UdmFindURL(UDM_AGENT * Indexer,char *url);
extern int UdmAddURL(UDM_AGENT * Indexer,char *url,int referrer,int hops, char *
msg_id, UDM_SERVER * Server);
+extern int UdmFlushAddURL(UDM_AGENT *);
extern int UdmDeleteWordFromURL(UDM_AGENT* Indexer,int url_id);
extern int UdmDeleteCrossWordFromURL(UDM_AGENT* Indexer,int url_id,int ref_id);
extern int UdmDeleteUrl(UDM_AGENT* Indexer,int url_id);
diff -urN mnogosearch-3.1.11/src/hrefs.c mnogosearch-3.1.11-tonu/src/hrefs.c
--- mnogosearch-3.1.11/src/hrefs.c Mon Feb 19 14:55:44 2001
+++ mnogosearch-3.1.11-tonu/src/hrefs.c Fri Feb 23 16:48:30 2001
@@ -127,6 +127,10 @@
added++;
}
}
+ /* UdmFlushAddURL(Indexer); can be here and can be not. Will be faster if not
+ * but may be wanted to add some safety
+ */
+
/* Remember last stored URL num */
/* Note that it will became 0 */
/* after next sort in AddUrl */
diff -urN mnogosearch-3.1.11/src/parser.c mnogosearch-3.1.11-tonu/src/parser.c
--- mnogosearch-3.1.11/src/parser.c Wed Feb 7 14:17:39 2001
+++ mnogosearch-3.1.11-tonu/src/parser.c Fri Feb 23 16:48:30 2001
@@ -250,7 +250,7 @@
arg2pos=strstr(parser->cmd,"$2");
/* Build temp file names and command line */
- tmpnam(fn0);strcpy(fn1,fn0);
+ mkstemp(fn0);strcpy(fn1,fn0);
fnames[0]=strcat(fn0,".in");
fnames[1]=strcat(fn1,".out");
UdmBuildParamStr(cmd,sizeof(cmd),parser->cmd,fnames,2);
diff -urN mnogosearch-3.1.11/src/sql.c mnogosearch-3.1.11-tonu/src/sql.c
--- mnogosearch-3.1.11/src/sql.c Tue Feb 20 11:16:41 2001
+++ mnogosearch-3.1.11-tonu/src/sql.c Fri Feb 23 16:58:40 2001
@@ -144,6 +144,8 @@
static const char udmver[]= PACKAGE "-" VERSION "/" DBVER;
static char emptystr[]="";
+static char * url_qb=NULL,*url_qe=NULL;
+static size_t url_len=0,url_recordno=0;
__INDLIB__ const char * UdmVersion(void){
return(udmver);
@@ -2635,6 +2637,7 @@
char qbuf[UDMSTRSIZ]="AddURL";
const char * tag="", * category="";
+
if(Server){
if(Server->tag)tag=Server->tag;
if(Server->category)category=Server->category;
@@ -2688,7 +2691,42 @@
case UDM_DB_MYSQL:
/* MySQL generates itself */
- default:
+
+ /* Initialize INSERT buffer if not done yet */
+ if(!url_qb) {
+ url_qb=(char*)malloc(UDM_MAXSQLCOMMANDLENGTH);
+#ifdef NEWS_EXT
+ sprintf(url_qb,"INSERT INTO url
+(url,referrer,hops,crc32,last_index_time,next_index_time,status,msg_id,tag,category)
+VALUES ");
+#else
+ sprintf(url_qb,"INSERT INTO url
+(url,referrer,hops,crc32,last_index_time,next_index_time,status,tag,category) VALUES
+");
+#endif
+ url_qe=url_qb+strlen(url_qb);
+ } else {
+ /* Buffer was initialized already. We just add to it */
+ if(url_recordno++>0) strcpy(url_qe++,",");
+#ifdef NEWS_EXT
+
+sprintf(url_qe,"('%s',%d,%d,next_url_id.nextval,0,%d,%d,0,'%s','%s','%s')",
+ e_url,referrer,hops,(int)now(),(int)now(),msg_id,tag,category);
+#else
+ sprintf(url_qe,"('%s',%d,%d,0,%d,%d,0,'%s','%s')",
+ e_url,referrer,hops,(int)now(),(int)now(),tag,category);
+#endif
+ url_qe=url_qe+strlen(url_qe);
+ url_len=url_qe-url_qb;
+ if (UDM_MAXSQLCOMMANDLENGTH < url_len+UDM_MAXWORDSIZE+100) { /* Danger!
+We need to flush buffer */
+ /*
+ * Let's flush it when command is 64K long.
+ * MySQL gives error if we exceed maximum packet size which is
+ * (if I remember properly) by default is 16Megs ;)
+ * But having some safety is not bad at all
+ *
+ * [EMAIL PROTECTED]
+ */
+ UdmFlushAddURL(Indexer);
+ }
+ }
+ break;
+ default:
#ifdef NEWS_EXT
sprintf(qbuf,"INSERT INTO url
(url,referrer,hops,crc32,last_index_time,next_index_time,status,msg_id,tag,category)
VALUES
('%s',%d,%d,0,%d,%d,0,'%s','%s','%s')",e_url,referrer,hops,(int)now(),(int)now(),msg_id,tag,category);
#else
@@ -2698,12 +2736,34 @@
}
/* Exec INSERT now */
+ if(Indexer->Conf->DBType != UDM_DB_MYSQL) {
sql_query(Indexer,qbuf);
if(UdmDBErrorCode(Indexer->db))
return(IND_ERROR);
else
return(IND_OK);
+ }
+ return(IND_OK);
}
+
+/*
+ * We call this function if we want to write all URL-s from memory
+ * to database.
+ */
+
+int UdmFlushAddURL(UDM_AGENT *Indexer) {
+ if (url_qb) { /* Can be already flushed */
+ if (url_recordno) { /* Can be incomplete INSERT statement */
+ sql_query(Indexer,url_qb);
+ if(UdmDBErrorCode(Indexer->db)) return (IND_ERROR);
+ }
+ free(url_qb);
+ url_qe=url_qb=NULL;
+ url_recordno=0;
+ url_len=0;
+ }
+}
+
int UdmDeleteUrl(UDM_AGENT *Indexer,int url_id){
char qbuf[UDMSTRSIZ];
int res;
@@ -3107,7 +3167,7 @@
strcat(sortstr,", hops ASC ");
}
}
-
+ UdmFlushAddURL(Indexer); /* Would be stupid if only URLs to index are remained
+in cache */
#ifdef HAVE_SQL_LIMIT
sprintf(qbuf,
"SELECT
url,rec_id,docsize,status,last_index_time,hops,crc32,last_mod_time FROM url WHERE
next_index_time<=%d %s%s%s%s%s%s LIMIT %d",