Re: UdmSearch: Webboard: Segfault (grrr)
Hello! We finally found a bug in cache.c. New version is in attachement. Everybody who has problems with splitter's crashes are welcome to test. Please, give feedback! #include "udm_config.h" #include stdio.h #include stdlib.h #include string.h #include errno.h #ifdef HAVE_UNISTD_H #include unistd.h #endif #include sys/types.h #include fcntl.h #include sys/stat.h #include dirent.h #include "udm_common.h" #include "udm_utils.h" #include "udm_charset.h" #include "udm_spell.h" #include "udm_cache.h" #include "udm_crc32.h" #include "udm_indexer.h" #include "udm_db.h" #include "udm_boolean.h" #include "udm_searchtool.h" #include "udm_agent.h" #include "udm_xmalloc.h" #include "udm_stopwords.h" #include "udm_proto.h" #ifdef HAVE_WINSOCK_H #include winsock.h #endif #ifdef HAVE_SYS_SOCKET_H #include sys/socket.h #endif #ifdef HAVE_NETINET_IN_H #include netinet/in.h #endif #ifdef HAVE_ARPA_INET_H #include arpa/inet.h #endif #ifdef HAVE_NETDB_H #include netdb.h #endif #ifndef INADDR_NONE #define INADDR_NONE ((unsigned long) -1) #endif #define DEBUG_SEARCH 1 static int open_host(char *hostname,int port, int timeout) { int net; struct hostent *host; struct sockaddr_in sa_in; bzero((char*)sa_in,sizeof(sa_in)); if (port){ sa_in.sin_port= htons((u_short)port); }else{ return(UDM_NET_ERROR); } if ((sa_in.sin_addr.s_addr=inet_addr(hostname)) != INADDR_NONE){ sa_in.sin_family=AF_INET; }else{ host=gethostbyname(hostname); if (host){ sa_in.sin_family=host-h_addrtype; memcpy(sa_in.sin_addr, host-h_addr, (size_t)host-h_length); }else{ return(UDM_NET_CANT_RESOLVE); } } net=socket(AF_INET, SOCK_STREAM, 0); if(connect(net, (struct sockaddr *)sa_in, sizeof (sa_in))) return(UDM_NET_CANT_CONNECT); return(net); } /***/ #define LOGDIR "raw" #define TREEDIR "tree" #define SPLDIR "splitter" /**/ typedef struct udm_cache_table { int wrd_id; int weight; int pos; int len; } UDM_CACHETABLE; typedef struct udm_cache_hheader { int ntables; int version; } UDM_CACHEHEADER; typedef struct udm_cacheword_struct { int url_id; #ifdef UDM_STORE_CACHE_WRDPOS int wrd_pos; #endif #ifdef UDM_STORE_CACHE_SITEID int site_id; #endif #ifdef UDM_STORE_CACHE_CATEGORY int category; #endif #ifdef UDM_STORE_CACHE_TAG int tag; #endif } UDM_CACHEWORD; / Convert category string into 32 bit number */ static void UdmDecodeCatStr(const char * cat_str, int * cat, int * mask){ unsigned int t[5]; char str[128]=""; strcpy(str,cat_str); strcat(str,""); str[10]=0; sscanf(str,"%02x%02x%02x%02x%02x",t+0,t+1,t+2,t+3,t+4); *mask= (int)( (t[0]?0x7F25:0) | (t[1]?0x7F18:0) | (t[2]?0x3F12:0) | (t[3]?0x3F6:0) | (t[4]?0x3F0:0)); *cat= (int)((t[0]25) | (t[1]18) | (t[2]12) | (t[3]6) | (t[4]0)); } /*** Sort functions **/ /* Sort SEARCHWORD by url_id order */ static int cmpurlid(const void *s1,const void *s2){ return(((const UDM_SEARCHWORD*)s1)-url_id-((const UDM_SEARCHWORD*)s2)-url_id); } /* Function to sort LOGWORD list in (WRD_ID,TIME_STAMP) order */ static int cmplog(const void *s1,const void *s2){ unsigned int n1,n2; n1=((const UDM_LOGWORD*)s1)-wrd_id; n2=((const UDM_LOGWORD*)s2)-wrd_id; if(n1==n2){ n1=((const UDM_LOGWORD*)s1)-url_id; n2=((const UDM_LOGWORD*)s2)-url_id; if(n1==n2){ n1=((const UDM_LOGWORD*)s2)-stamp; n2=((const UDM_LOGWORD*)s1)-stamp; } } if(n1n2)return(-1); if(n1n2)return(1); return(0); } /* Function to sort LOGDEL list in URL_ID order */ static int cmpurldellog(const void *s1,const void *s2){ unsigned int n1,n2; n1=((const UDM_LOGDEL*)s1)-url_id; n2=((const UDM_LOGDEL*)s2)-url_id; if(n1==n2){ n1=((const UDM_LOGDEL*)s2)-stamp; n2=((const UDM_LOGDEL*)s1)-stamp; } if(n1n2)return(-1); if(n1n2)return(1); return(0); } /* Function to sort CACHEWORD list in URL_ID order */ static int cmpcache(const void *s1,const void *s2){ int n1,n2; n1=((const UDM_LOGWORD*)s1)-wrd_id; n2=((const UDM_LOGWORD*)s2)-wrd_id; if(n1==n2){ n1=((const UDM_LOGWORD*)s1)-weight; n2=((const UDM_LOGWORD*)s2)-weight; if(n1==n2){ n1=((const UDM_LOGWORD*)s1)-url_id; n2=((const
Re: UdmSearch: Webboard: Segfault (grrr)
Caffeinate The World wrote: --- Alexander Barkov [EMAIL PROTECTED] wrote: Caffeinate The World wrote: --- Alexander Barkov [EMAIL PROTECTED] wrote: Hello! We finally found a bug in cache.c. New version is in attachement. Everybody who has problems with splitter's crashes are welcome to test. should the 'tree' directory be removed? can we split the raw log files we have thus far or is re-indexing necessary? I hope it should work without having to remove tree directory. But better to remove it. It is safe to use old /raw and /splitter files without having to reindex. ok. what exactly was the bug? There was actually two bugs. The first one because of table[4096] in sql.c. Sometimes it may became more large. Now dinamic realloc'ing has been added. The second one that cmpcache() function passed to qsort gave ordering slightly different with supposed one. BTW. We found this using your logs for NetBSD/Alpha. Thanks! __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
--- Alexander Barkov [EMAIL PROTECTED] wrote: Caffeinate The World wrote: --- Alexander Barkov [EMAIL PROTECTED] wrote: Hello! We finally found a bug in cache.c. New version is in attachement. Everybody who has problems with splitter's crashes are welcome to test. should the 'tree' directory be removed? can we split the raw log files we have thus far or is re-indexing necessary? I hope it should work without having to remove tree directory. But better to remove it. It is safe to use old /raw and /splitter files without having to reindex. ok. what exactly was the bug? __ Do You Yahoo!? Get personalized email addresses from Yahoo! Mail - only $35 a year! http://personal.mail.yahoo.com/ __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
There was actually two bugs. The first one because of table[4096] in sql.c. Sometimes it may became more large. Now dinamic realloc'ing has been added. Opps. I mean in cache.c, not sql.c The second one that cmpcache() function passed to qsort gave ordering slightly different with supposed one. BTW. We found this using your logs for NetBSD/Alpha. Thanks! __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED] __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Alexander Barkov skrev: We finally found a bug in cache.c. New version is in attachement. Everybody who has problems with splitter's crashes are welcome to test. Please, give feedback! You guys are great! I'll re-compile and get back to you with reports. BTW, can I remove http://search.freewinds.cx/garbage_in_sbin.tar.gz now? Z -- oracle@everywhere: The ephemeral source of the eternal truth... __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Zenon Panoussis wrote: Alexander Barkov skrev: We finally found a bug in cache.c. New version is in attachement. Everybody who has problems with splitter's crashes are welcome to test. Please, give feedback! You guys are great! I'll re-compile and get back to you with reports. BTW, can I remove http://search.freewinds.cx/garbage_in_sbin.tar.gz now? I think yes. __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
--- Alexander Barkov [EMAIL PROTECTED] wrote: Hello! We finally found a bug in cache.c. New version is in attachement. Everybody who has problems with splitter's crashes are welcome to test. should the 'tree' directory be removed? can we split the raw log files we have thus far or is re-indexing necessary? __ Do You Yahoo!? Get personalized email addresses from Yahoo! Mail - only $35 a year! http://personal.mail.yahoo.com/ __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Alexander Barkov skrev: We finally found a bug in cache.c. New version is in attachement. Everybody who has problems with splitter's crashes are welcome to test. Please, give feedback! Oops. Something else is not OK: cache.c:687:87: warning: #ifdef with no argument cache.c:692:87: warning: #ifdef with no argument cache.c:697:87: warning: #ifdef with no argument cache.c:702:87: warning: #ifdef with no argument cache.c: In function `UdmFindCache': cache.c:969: parse error before `?' cache.c:982: `real_num' undeclared (first use in this function) cache.c:982: (Each undeclared identifier is reported only once cache.c:982: for each function it appears in.) cache.c:994: `fd1' undeclared (first use in this function) cache.c:996: `group' undeclared (first use in this function) cache.c:1000: `group_num' undeclared (first use in this function) cache.c: At top level: cache.c:1011: initializer element is not constant cache.c:1011: warning: data definition has no type or storage class cache.c:1012: parse error before string constant cache.c:1013: parse error before string constant cache.c:1013: warning: data definition has no type or storage class cache.c:1014: redefinition of `ticks' cache.c:1011: `ticks' previously defined here cache.c:1014: initializer element is not constant cache.c:1014: warning: data definition has no type or storage class cache.c:1015: parse error before string constant cache.c:1015: warning: data definition has no type or storage class cache.c:1024: `i' undeclared here (not in a function) cache.c:1024: parse error before `.' cache.c:1030: register name not specified for `p' cache.c:1032: parse error before `if' cache.c:1035: `pmerg' undeclared here (not in a function) cache.c:1035: `pmerg' undeclared here (not in a function) cache.c:1035: warning: data definition has no type or storage class cache.c:1036: parse error before `' cache.c:1043: `k' undeclared here (not in a function) cache.c:1043: warning: data definition has no type or storage class cache.c:1044: parse error before `}' cache.c:1046: conflicting types for `p' cache.c:1030: previous declaration of `p' cache.c:1046: `pmerg' undeclared here (not in a function) cache.c:1046: warning: data definition has no type or storage class cache.c:1047: parse error before `' cache.c:1048: parse error before `-' cache.c:1058: warning: initialization makes integer from pointer without a cast cache.c:1058: warning: data definition has no type or storage class cache.c:1058: parse error before `}' cache.c:1061: redefinition of `ticks' cache.c:1014: `ticks' previously defined here cache.c:1061: initializer element is not constant cache.c:1061: warning: data definition has no type or storage class cache.c:1063: parse error before string constant cache.c:1071: warning: parameter names (without types) in function declaration cache.c:1071: conflicting types for `UdmGroupByURL' ../include/udm_searchtool.h:7: previous declaration of `UdmGroupByURL' cache.c:1071: warning: data definition has no type or storage class cache.c:1072: parse error before `}' make[1]: *** [cache.lo] Error 1 make[1]: Leaving directory `/root/mnogosearch-3.1.10/src' make: *** [all-recursive] Error 1 -- oracle@everywhere: The ephemeral source of the eternal truth... __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Zenon Panoussis skrev: Oops. Something else is not OK: cache.c:687:87: warning: #ifdef with no argument [etc] I think that the mailer is responsible for this. There are lots of broken lines in the code, that shouldn't be broken. Perhaps it's better to attach the file in .gz format instead of text. Z -- oracle@everywhere: The ephemeral source of the eternal truth... __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
i didn't get this error on my NetBSD/Alpha. compile was fine. what system are you on? --- Zenon Panoussis [EMAIL PROTECTED] wrote: Alexander Barkov skrev: We finally found a bug in cache.c. New version is in attachement. Everybody who has problems with splitter's crashes are welcome to test. Please, give feedback! Oops. Something else is not OK: cache.c:687:87: warning: #ifdef with no argument cache.c:692:87: warning: #ifdef with no argument cache.c:697:87: warning: #ifdef with no argument cache.c:702:87: warning: #ifdef with no argument cache.c: In function `UdmFindCache': cache.c:969: parse error before `?' cache.c:982: `real_num' undeclared (first use in this function) cache.c:982: (Each undeclared identifier is reported only once cache.c:982: for each function it appears in.) cache.c:994: `fd1' undeclared (first use in this function) cache.c:996: `group' undeclared (first use in this function) cache.c:1000: `group_num' undeclared (first use in this function) cache.c: At top level: cache.c:1011: initializer element is not constant cache.c:1011: warning: data definition has no type or storage class cache.c:1012: parse error before string constant cache.c:1013: parse error before string constant cache.c:1013: warning: data definition has no type or storage class cache.c:1014: redefinition of `ticks' cache.c:1011: `ticks' previously defined here cache.c:1014: initializer element is not constant cache.c:1014: warning: data definition has no type or storage class cache.c:1015: parse error before string constant cache.c:1015: warning: data definition has no type or storage class cache.c:1024: `i' undeclared here (not in a function) cache.c:1024: parse error before `.' cache.c:1030: register name not specified for `p' cache.c:1032: parse error before `if' cache.c:1035: `pmerg' undeclared here (not in a function) cache.c:1035: `pmerg' undeclared here (not in a function) cache.c:1035: warning: data definition has no type or storage class cache.c:1036: parse error before `' cache.c:1043: `k' undeclared here (not in a function) cache.c:1043: warning: data definition has no type or storage class cache.c:1044: parse error before `}' cache.c:1046: conflicting types for `p' cache.c:1030: previous declaration of `p' cache.c:1046: `pmerg' undeclared here (not in a function) cache.c:1046: warning: data definition has no type or storage class cache.c:1047: parse error before `' cache.c:1048: parse error before `-' cache.c:1058: warning: initialization makes integer from pointer without a cast cache.c:1058: warning: data definition has no type or storage class cache.c:1058: parse error before `}' cache.c:1061: redefinition of `ticks' cache.c:1014: `ticks' previously defined here cache.c:1061: initializer element is not constant cache.c:1061: warning: data definition has no type or storage class cache.c:1063: parse error before string constant cache.c:1071: warning: parameter names (without types) in function declaration cache.c:1071: conflicting types for `UdmGroupByURL' ../include/udm_searchtool.h:7: previous declaration of `UdmGroupByURL' cache.c:1071: warning: data definition has no type or storage class cache.c:1072: parse error before `}' make[1]: *** [cache.lo] Error 1 make[1]: Leaving directory `/root/mnogosearch-3.1.10/src' make: *** [all-recursive] Error 1 -- oracle@everywhere: The ephemeral source of the eternal truth... __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED] __ Do You Yahoo!? Get personalized email addresses from Yahoo! Mail - only $35 a year! http://personal.mail.yahoo.com/ __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Is this file to be used with 3.1.9 sources, or 3.1.10? (Either is fine - I can adjust as necessary quite easily). Thanks for the fix. I have over a million urls inserted and climbing. :-) -- Dan On Thu, 15 Feb 2001, Alexander Barkov wrote: Dan,please take new cache.c and recompile everything. It should fix the problem. [EMAIL PROTECTED] wrote: I just have to put in my encounters here, because they seem very similar. I get a large amount of information indexed, but upon trying to run splitter, it will core dump somewhere midway through, and on one round left wierd directories in the $VAR/raw directory: [root@spider raw]# ls -al total 32988 drwxr-xr-x 5 root root 8192 Feb 14 04:13 . drwxr-xr-x 6 root root 4096 Feb 13 01:58 .. drwxr-xr-x 3 root root 4096 Feb 13 03:12 64 -rw--- 1 root root 33132544 Feb 14 04:13 core -rw-r--r-- 1 root root 8464 Feb 14 04:22 del.log -rw-r--r-- 1 root root 566272 Feb 14 04:22 wrd.log drwxr-xr-x 3 root root 4096 Feb 13 03:58 ??64 drwxr-xr-x 3 root root 4096 Feb 13 06:06 ??18 [root@spider raw]# Unfortunately I wasn't thinking and I deleted all the .done files, and not all of the logs were split. Well, back to indexing... I'm using 3.1.9 on Linux/Oracle -- Dan Hanks On Wed, 14 Feb 2001, Zenon Panoussis wrote: Zenon Panoussis skrev: By now, I have almost 1 GB of indexed files, 4 indexer crashes and one splitter crash. I'll do the debugging and post its output tomorrow. === # gdb indexer core.indexer.01 GNU gdb 5.0 Copyright 2000 Free Software Foundation, Inc. GDB is free software, covered by the GNU General Public License, and you are welcome to change it and/or distribute copies of it under certain conditions. Type "show copying" to see the conditions. There is absolutely no warranty for GDB. Type "show warranty" for details. This GDB was configured as "i386-redhat-linux"... Core was generated by `./indexer -m -s 200'. Program terminated with signal 11, Segmentation fault. Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done. Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10 Reading symbols from /lib/libm.so.6...done. Loaded symbols for /lib/libm.so.6 Reading symbols from /usr/lib/libz.so.1...done. Loaded symbols for /usr/lib/libz.so.1 Reading symbols from /lib/libc.so.6...done. Loaded symbols for /lib/libc.so.6 Reading symbols from /lib/libcrypt.so.1...done. Loaded symbols for /lib/libcrypt.so.1 Reading symbols from /lib/libnsl.so.1...done. Loaded symbols for /lib/libnsl.so.1 Reading symbols from /lib/ld-linux.so.2...done. Loaded symbols for /lib/ld-linux.so.2 Reading symbols from /lib/libnss_files.so.2...done. Loaded symbols for /lib/libnss_files.so.2 Reading symbols from /lib/libnss_nisplus.so.2...done. Loaded symbols for /lib/libnss_nisplus.so.2 Reading symbols from /lib/libnss_nis.so.2...done. Loaded symbols for /lib/libnss_nis.so.2 Reading symbols from /lib/libnss_dns.so.2...done. Loaded symbols for /lib/libnss_dns.so.2 Reading symbols from /lib/libresolv.so.2...done. Loaded symbols for /lib/libresolv.so.2 #0 0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at crc32.c:97 97 _CRC32_(crc, *p) ; (gdb) print crc $1 = 1928826335 (gdb) print p $2 = 0x40431000 Address 0x40431000 out of bounds === # gdb indexer core.indexer.02 snip loading #0 0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at crc32.c:97 97 _CRC32_(crc, *p) ; (gdb) print crc $1 = 835566978 (gdb) print p $2 = 0x40404000 Address 0x40404000 out of bounds === # gdb indexer core.indexer.03 snip loading #0 0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at crc32.c:97 97 _CRC32_(crc, *p) ; (gdb) print crc $1 = 2869617068 (gdb) print p $2 = 0x40404000 Address 0x40404000 out of bounds === # gdb indexer core.indexer.04 snip loading (gdb) print crc $1 = 1253677059 (gdb) print p $2 = 0x40431000 Address 0x40431000 out of bounds === And finally the splitter: # gdb splitter core.splitter.01 snip copyright This GDB was configured as "i386-redhat-linux"... Core was generated by `/usr/local/mnogo3110/sbin/splitter'. Program terminated with signal 11, Segmentation fault. Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done. Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10 Reading symbols from /lib/libm.so.6...done. Loaded symbols for /lib/libm.so.6 Reading symbols from /usr/lib/libz.so.1...done. Loaded symbols for /usr/lib/libz.so.1 Reading symbols
Re: UdmSearch: Webboard: Segfault (grrr)
Caffeinate The World skrev: i've been going through this and back again time and time again. what would really be nice is indexer save the logs in a format that's easy to use again. for instance, you can use the format re-index to sql etc. or if you want to reindex again, you don't have to crawl through all the external websites. saves a lot of time and we can debug faster. I'm not sure what you mean here. The Mirror statement does just that (and luckily, I had an almost complete mirror already). Z -- oracle@everywhere: The ephemeral source of the eternal truth... __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Zenon Panoussis wrote: Caffeinate The World skrev: i've been going through this and back again time and time again. what would really be nice is indexer save the logs in a format that's easy to use again. for instance, you can use the format re-index to sql etc. or if you want to reindex again, you don't have to crawl through all the external websites. saves a lot of time and we can debug faster. I'm not sure what you mean here. The Mirror statement does just that (and luckily, I had an almost complete mirror already). Yes, mirroring features are for the fast reindexing. __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
i completely forgot about this feature!!! i read about it when i first started using mnogosearch, but never bothered to use it. with mirror feature, wouldn't it be easy to implement Google's "cache" feature where the user can view a cache of the page from the last time you indexed. I think it's possible. Moreover, we may use zlib to compress those files, so they'll use less space. __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Alexander Barkov wrote: i completely forgot about this feature!!! i read about it when i first started using mnogosearch, but never bothered to use it. with mirror feature, wouldn't it be easy to implement Google's "cache" feature where the user can view a cache of the page from the last time you indexed. I think it's possible. Moreover, we may use zlib to compress those files, so they'll use less space. The only one disadvantage is that it will not work on huge search engines with millions documents. There is a limit on total file number on file system in most unixes. For example, my 30G /usr partition on FreeBSD box can create about 8 mln files. __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
--- Alexander Barkov [EMAIL PROTECTED] wrote: Alexander Barkov wrote: i completely forgot about this feature!!! i read about it when i first started using mnogosearch, but never bothered to use it. with mirror feature, wouldn't it be easy to implement Google's "cache" feature where the user can view a cache of the page from the last time you indexed. I think it's possible. Moreover, we may use zlib to compress those files, so they'll use less space. The only one disadvantage is that it will not work on huge search engines with millions documents. There is a limit on total file number on file system in most unixes. For example, my 30G /usr partition on FreeBSD box can create about 8 mln files. is that a per file system limit or per unix box limit? __ Do You Yahoo!? Get personalized email addresses from Yahoo! Mail - only $35 a year! http://personal.mail.yahoo.com/ __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Caffeinate The World wrote: The only one disadvantage is that it will not work on huge search engines with millions documents. There is a limit on total file number on file system in most unixes. For example, my 30G /usr partition on FreeBSD box can create about 8 mln files. is that a per file system limit or per unix box limit? Per file system limit. __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
--- Alexander Barkov [EMAIL PROTECTED] wrote: Caffeinate The World wrote: The only one disadvantage is that it will not work on huge search engines with millions documents. There is a limit on total file number on file system in most unixes. For example, my 30G /usr partition on FreeBSD box can create about 8 mln files. is that a per file system limit or per unix box limit? Per file system limit. couldn't you do something like mount multiple FS: sd0a /data/part1 sd1a /data/part2 ... sdna /data/partn wouldn't that work? __ Do You Yahoo!? Get personalized email addresses from Yahoo! Mail - only $35 a year! http://personal.mail.yahoo.com/ __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
The only one disadvantage is that it will not work on huge search engines with millions documents. There is a limit on total file number on file system in most unixes. For example, my 30G /usr partition on FreeBSD box can create about 8 mln files. is that a per file system limit or per unix box limit? Generally it's a limitation with how the file system has been created. Different parameters when creating the filesystem will yield different results. So the number of available inodes is really dependent on the parameters with which you create thge filesystem. On Linux it's dependent on parameters for block_size, bytes_per_inode, etc. -- Dan = Daniel Hanks Network Administrator Web Services Group About The Human Internet http://about.com/ 1253 N. Research Way, Suite Q-2500. Orem, UT 84097 ph: 801-437-6023fax: 801-437-6020 email: [EMAIL PROTECTED] __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Zenon Panoussis skrev: By now, I have almost 1 GB of indexed files, 4 indexer crashes and one splitter crash. I'll do the debugging and post its output tomorrow. === # gdb indexer core.indexer.01 GNU gdb 5.0 Copyright 2000 Free Software Foundation, Inc. GDB is free software, covered by the GNU General Public License, and you are welcome to change it and/or distribute copies of it under certain conditions. Type "show copying" to see the conditions. There is absolutely no warranty for GDB. Type "show warranty" for details. This GDB was configured as "i386-redhat-linux"... Core was generated by `./indexer -m -s 200'. Program terminated with signal 11, Segmentation fault. Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done. Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10 Reading symbols from /lib/libm.so.6...done. Loaded symbols for /lib/libm.so.6 Reading symbols from /usr/lib/libz.so.1...done. Loaded symbols for /usr/lib/libz.so.1 Reading symbols from /lib/libc.so.6...done. Loaded symbols for /lib/libc.so.6 Reading symbols from /lib/libcrypt.so.1...done. Loaded symbols for /lib/libcrypt.so.1 Reading symbols from /lib/libnsl.so.1...done. Loaded symbols for /lib/libnsl.so.1 Reading symbols from /lib/ld-linux.so.2...done. Loaded symbols for /lib/ld-linux.so.2 Reading symbols from /lib/libnss_files.so.2...done. Loaded symbols for /lib/libnss_files.so.2 Reading symbols from /lib/libnss_nisplus.so.2...done. Loaded symbols for /lib/libnss_nisplus.so.2 Reading symbols from /lib/libnss_nis.so.2...done. Loaded symbols for /lib/libnss_nis.so.2 Reading symbols from /lib/libnss_dns.so.2...done. Loaded symbols for /lib/libnss_dns.so.2 Reading symbols from /lib/libresolv.so.2...done. Loaded symbols for /lib/libresolv.so.2 #0 0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at crc32.c:97 97 _CRC32_(crc, *p) ; (gdb) print crc $1 = 1928826335 (gdb) print p $2 = 0x40431000 Address 0x40431000 out of bounds === # gdb indexer core.indexer.02 snip loading #0 0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at crc32.c:97 97 _CRC32_(crc, *p) ; (gdb) print crc $1 = 835566978 (gdb) print p $2 = 0x40404000 Address 0x40404000 out of bounds === # gdb indexer core.indexer.03 snip loading #0 0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at crc32.c:97 97 _CRC32_(crc, *p) ; (gdb) print crc $1 = 2869617068 (gdb) print p $2 = 0x40404000 Address 0x40404000 out of bounds === # gdb indexer core.indexer.04 snip loading (gdb) print crc $1 = 1253677059 (gdb) print p $2 = 0x40431000 Address 0x40431000 out of bounds === And finally the splitter: # gdb splitter core.splitter.01 snip copyright This GDB was configured as "i386-redhat-linux"... Core was generated by `/usr/local/mnogo3110/sbin/splitter'. Program terminated with signal 11, Segmentation fault. Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done. Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10 Reading symbols from /lib/libm.so.6...done. Loaded symbols for /lib/libm.so.6 Reading symbols from /usr/lib/libz.so.1...done. Loaded symbols for /usr/lib/libz.so.1 Reading symbols from /lib/libc.so.6...done. Loaded symbols for /lib/libc.so.6 Reading symbols from /lib/libcrypt.so.1...done. Loaded symbols for /lib/libcrypt.so.1 Reading symbols from /lib/libnsl.so.1...done. Loaded symbols for /lib/libnsl.so.1 Reading symbols from /lib/ld-linux.so.2...done. Loaded symbols for /lib/ld-linux.so.2 #0 0x8057d15 in UdmSplitCacheLog (log=118) at cache.c:635 635 logwords[count+j].wrd_id=table[w].wrd_id; (gdb) print count $1 = 13121220 (gdb) print count+j $2 = 13125316 (gdb) print logwords $3 = (UDM_LOGWORD *) 0x0 (gdb) print table[w] $4 = {wrd_id = 1918989871, weight = 1869507887, pos = 825454439, len = 1949249585} (gdb) print logwords[count+j] Cannot access memory at address 0x15e7bd70 === This time I'm keeping the core dumps, so let me know if there's anything else you want me to check. Apart from this, I got some garbage directories with misnamed splitter files in them in sbin: # pwd /usr/local/mnogo3110/sbin # ls -l snip normal stuff -rw-r--r--1 root root 457672 Feb 13 08:28 ??? drwxr-xr-x3 root root 4096 Feb 13 08:28 ???3F -rw-r--r--1 root root 487224 Feb 13 08:27 mE?56.tmp # ls -lR .: total 403708 -rwxr-xr-x1 root root67163 Feb 12 18:02 cachelogd -rw-r--r--1 root root0 Feb 14 06:18 cachelogd.out -rw---1 root root 3862528 Feb 13 02:39 core.indexer.01 -rw---1 root root 3416064 Feb 13 06:06 core.indexer.02 -rw---1 root root 2953216 Feb 13 06:57 core.indexer.03 -rw---1 root root 3235840 Feb 13 07:51 core.indexer.04
Re: UdmSearch: Webboard: Segfault (grrr)
I just have to put in my encounters here, because they seem very similar. I get a large amount of information indexed, but upon trying to run splitter, it will core dump somewhere midway through, and on one round left wierd directories in the $VAR/raw directory: [root@spider raw]# ls -al total 32988 drwxr-xr-x 5 root root 8192 Feb 14 04:13 . drwxr-xr-x 6 root root 4096 Feb 13 01:58 .. drwxr-xr-x 3 root root 4096 Feb 13 03:12 64 -rw--- 1 root root 33132544 Feb 14 04:13 core -rw-r--r-- 1 root root 8464 Feb 14 04:22 del.log -rw-r--r-- 1 root root 566272 Feb 14 04:22 wrd.log drwxr-xr-x 3 root root 4096 Feb 13 03:58 ??64 drwxr-xr-x 3 root root 4096 Feb 13 06:06 ??18 [root@spider raw]# Unfortunately I wasn't thinking and I deleted all the .done files, and not all of the logs were split. Well, back to indexing... I'm using 3.1.9 on Linux/Oracle -- Dan Hanks On Wed, 14 Feb 2001, Zenon Panoussis wrote: Zenon Panoussis skrev: By now, I have almost 1 GB of indexed files, 4 indexer crashes and one splitter crash. I'll do the debugging and post its output tomorrow. === # gdb indexer core.indexer.01 GNU gdb 5.0 Copyright 2000 Free Software Foundation, Inc. GDB is free software, covered by the GNU General Public License, and you are welcome to change it and/or distribute copies of it under certain conditions. Type "show copying" to see the conditions. There is absolutely no warranty for GDB. Type "show warranty" for details. This GDB was configured as "i386-redhat-linux"... Core was generated by `./indexer -m -s 200'. Program terminated with signal 11, Segmentation fault. Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done. Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10 Reading symbols from /lib/libm.so.6...done. Loaded symbols for /lib/libm.so.6 Reading symbols from /usr/lib/libz.so.1...done. Loaded symbols for /usr/lib/libz.so.1 Reading symbols from /lib/libc.so.6...done. Loaded symbols for /lib/libc.so.6 Reading symbols from /lib/libcrypt.so.1...done. Loaded symbols for /lib/libcrypt.so.1 Reading symbols from /lib/libnsl.so.1...done. Loaded symbols for /lib/libnsl.so.1 Reading symbols from /lib/ld-linux.so.2...done. Loaded symbols for /lib/ld-linux.so.2 Reading symbols from /lib/libnss_files.so.2...done. Loaded symbols for /lib/libnss_files.so.2 Reading symbols from /lib/libnss_nisplus.so.2...done. Loaded symbols for /lib/libnss_nisplus.so.2 Reading symbols from /lib/libnss_nis.so.2...done. Loaded symbols for /lib/libnss_nis.so.2 Reading symbols from /lib/libnss_dns.so.2...done. Loaded symbols for /lib/libnss_dns.so.2 Reading symbols from /lib/libresolv.so.2...done. Loaded symbols for /lib/libresolv.so.2 #0 0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at crc32.c:97 97 _CRC32_(crc, *p) ; (gdb) print crc $1 = 1928826335 (gdb) print p $2 = 0x40431000 Address 0x40431000 out of bounds === # gdb indexer core.indexer.02 snip loading #0 0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at crc32.c:97 97 _CRC32_(crc, *p) ; (gdb) print crc $1 = 835566978 (gdb) print p $2 = 0x40404000 Address 0x40404000 out of bounds === # gdb indexer core.indexer.03 snip loading #0 0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at crc32.c:97 97 _CRC32_(crc, *p) ; (gdb) print crc $1 = 2869617068 (gdb) print p $2 = 0x40404000 Address 0x40404000 out of bounds === # gdb indexer core.indexer.04 snip loading (gdb) print crc $1 = 1253677059 (gdb) print p $2 = 0x40431000 Address 0x40431000 out of bounds === And finally the splitter: # gdb splitter core.splitter.01 snip copyright This GDB was configured as "i386-redhat-linux"... Core was generated by `/usr/local/mnogo3110/sbin/splitter'. Program terminated with signal 11, Segmentation fault. Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done. Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10 Reading symbols from /lib/libm.so.6...done. Loaded symbols for /lib/libm.so.6 Reading symbols from /usr/lib/libz.so.1...done. Loaded symbols for /usr/lib/libz.so.1 Reading symbols from /lib/libc.so.6...done. Loaded symbols for /lib/libc.so.6 Reading symbols from /lib/libcrypt.so.1...done. Loaded symbols for /lib/libcrypt.so.1 Reading symbols from /lib/libnsl.so.1...done. Loaded symbols for /lib/libnsl.so.1 Reading symbols from /lib/ld-linux.so.2...done. Loaded symbols for /lib/ld-linux.so.2 #0 0x8057d15 in UdmSplitCacheLog (log=118) at cache.c:635 635 logwords[count+j].wrd_id=table[w].wrd_id; (gdb) print count $1 = 13121220 (gdb) print count+j $2 = 13125316 (gdb) print logwords $3 = (UDM_LOGWORD *) 0x0 (gdb) print table[w] $4 = {wrd_id = 1918989871, weight
Re: UdmSearch: Webboard: Segfault (grrr)
Zenon Panoussis skrev: Now for 31 MB adventures :) # ./run-splitter -k Sending -HUP signal to cachelogd... Done # ./run-splitter -p Preparing logs... Open dir '/var/mnogo3110/raw' Preparing word log 982024900 [ 42176 bytes] Preparing word log 982027284 [31465324 bytes] Preparing word log 982027618 [ 8815804 bytes] Preparing del log 982024900 Preparing del log 982027284 Preparing del log 982027618 Renaming logs... Done Running ./run-splitter on these worked fine. No problems at all. After that, I went on indexing and created 59920 Feb 13 06:05 982040748.del.done 31457740 Feb 13 06:05 982040748.wrd.done 1480 Feb 13 06:06 982040807.del.done 637240 Feb 13 06:06 982040807.wrd.done 51920 Feb 13 07:21 982045300.del.done 31469304 Feb 13 07:21 982045300.wrd.done 69248 Feb 13 07:51 982047843.del.done 30213344 Feb 13 07:51 982047843.wrd.done another two 31 MB files and two smaller ones. All of them were splitted without problems. [two days later] Indexing kept crashing (see separate posting) and splitting kept going fine until tonight, when the opposite occured. By now, I have almost 1 GB of indexed files, 4 indexer crashes and one splitter crash. I'll do the debugging and post its output tomorrow. Z -- oracle@everywhere: The ephemeral source of the eternal truth... __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
i've been going through this and back again time and time again. what would really be nice is indexer save the logs in a format that's easy to use again. for instance, you can use the format re-index to sql etc. or if you want to reindex again, you don't have to crawl through all the external websites. saves a lot of time and we can debug faster. --- Zenon Panoussis [EMAIL PROTECTED] wrote: Zenon Panoussis skrev: Now for 31 MB adventures :) # ./run-splitter -k Sending -HUP signal to cachelogd... Done # ./run-splitter -p Preparing logs... Open dir '/var/mnogo3110/raw' Preparing word log 982024900 [ 42176 bytes] Preparing word log 982027284 [31465324 bytes] Preparing word log 982027618 [ 8815804 bytes] Preparing del log 982024900 Preparing del log 982027284 Preparing del log 982027618 Renaming logs... Done Running ./run-splitter on these worked fine. No problems at all. After that, I went on indexing and created 59920 Feb 13 06:05 982040748.del.done 31457740 Feb 13 06:05 982040748.wrd.done 1480 Feb 13 06:06 982040807.del.done 637240 Feb 13 06:06 982040807.wrd.done 51920 Feb 13 07:21 982045300.del.done 31469304 Feb 13 07:21 982045300.wrd.done 69248 Feb 13 07:51 982047843.del.done 30213344 Feb 13 07:51 982047843.wrd.done another two 31 MB files and two smaller ones. All of them were splitted without problems. [two days later] Indexing kept crashing (see separate posting) and splitting kept going fine until tonight, when the opposite occured. By now, I have almost 1 GB of indexed files, 4 indexer crashes and one splitter crash. I'll do the debugging and post its output tomorrow. Z -- oracle@everywhere: The ephemeral source of the eternal truth... __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED] __ Do You Yahoo!? Get personalized email addresses from Yahoo! Mail - only $35 a year! http://personal.mail.yahoo.com/ __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Hi! Can you guys give us a log file produced by splitter -p which caused crash? We can't reproduce crash :-( Caffeinate The World wrote: i reported this problems a while back. i believe it's being worked on. atleast the recently found the bug why it wasn't splitting out to FFF. the seg fault happens during the splitter process and not index. i've been splitter when the logs are at about 2 MB and i've not had splitter core dump on me yet. but before when i let the log file build up to about 15 to 30 MB, i had that core dump problem. i hope this will be resolved soon because it's a pain in the behind. ;-( --- Zenon Panoussis [EMAIL PROTECTED] wrote: Author: Zenon Panoussis Email: [EMAIL PROTECTED] Message: RH Linux 7.0, search 3.1.9, MySQL 3.23.29, cache mode, with the new patches for cache.c and sql.c. It happens all the time. It started happening when "maximum size" 31 MB log files were indexed, but by now it happens on any indexing, no matter how big or small the log file, as if the database somehow was corrupt: Delete from cache-file /var/mnogo319/tree/12/B/12BFD000 /var/mnogo319/tree/12/C/12C1 old: 69 new: 1 total: 70 ./run-splitter: line 118: 18790 Segmentation fault (core dumped) $SPLITTER For the same log file it always crashes at the same index file (e.g. every time I try to reindex 12345678.log it will crash at tree/12/3/4567000). If I delete the log file and start again with a new log file, it will crash at a different place, but it will still be consistent in crashing at the same place every time. And the backtrace: # gdb splitter core GNU gdb 5.0 [...] This GDB was configured as "i386-redhat-linux"... Core was generated by `/usr/local/mnogo319/sbin/splitter'. Program terminated with signal 11, Segmentation fault. Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done. Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10 Reading symbols from /lib/libm.so.6...done. Loaded symbols for /lib/libm.so.6 Reading symbols from /usr/lib/libz.so.1...done. Loaded symbols for /usr/lib/libz.so.1 Reading symbols from /lib/libc.so.6...done. Loaded symbols for /lib/libc.so.6 Reading symbols from /lib/libcrypt.so.1...done. Loaded symbols for /lib/libcrypt.so.1 Reading symbols from /lib/libnsl.so.1...done. Loaded symbols for /lib/libnsl.so.1 Reading symbols from /lib/ld-linux.so.2...done. Loaded symbols for /lib/ld-linux.so.2 #0 0x8059061 in UdmSplitCacheLog (log=300) at cache.c:552 552 logwords[count+j].wrd_id=table[w].wrd_id; (gdb) backtrace #0 0x8059061 in UdmSplitCacheLog (log=300) at cache.c:552 #1 0x8049e89 in main (argc=1, argv=0xba94) at splitter.c:70 #2 0x4009bbfc in __libc_start_main (main=0x8049d80 main, argc=1, ubp_av=0xba94, init=0x80495bc _init, fini=0x8065b7c _fini, rtld_fini=0x4000d674 _dl_fini, stack_end=0xba8c) at ../sysdeps/generic/libc-start.c:118 Since 3.1.10 is coming out today, I'll try it and see if things work better. If not, I'll post more bad news later ;) __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Alexander Barkov skrev: Can you guys give us a log file produced by splitter -p which caused crash? We can't reproduce crash :-( Huh? splitter doesn't accept the -v5 argument, so it won't give more detailed logs than the normal ones. The only log I had, that to stdout, is the one I included with my first posting in this thread: Delete from cache-file /var/mnogo319/tree/12/B/12BFD000 /var/mnogo319/tree/12/C/12C1 old: 69 new: 1 total: 70 ./run-splitter: line 118: 18790 Segmentation fault (core dumped) $SPLITTER Until this point everything was normal. Anyway, as I said, I strongly suspect corruption in the word database. On a previous occasion when this happened, I deleted the entire tree/* directory structure and started all over again. Splitter worked like a dream with both small and big log files until one of the following occured: 1. I stopped indexer with ^C and then run splitter or 2. Splitter had to work itself through some 31 MB files. (These files are not all the same size; they tend to get slightly bigger the more they are, i.e. something like this: 0001.log31.500.000 bytes 0002.log31.550.000 bytes 0003.log31.580.000 bytes sort of). Unfortunately I haven't been making notes, so I can't tell for sure which one of these two things happened before things stopped working. I tried splitter again today with ./splitter splitter.log . It went in a very normal way *almost* as far as yesterday, and then hang so badly that not even kill -9 could kill it. The log of this run looks like snip normal operation Delete from cache-file /var/mnogo319/tree/12/B/12B27000 Delete from cache-file /var/mnogo319/tree/12/B/12B2D000 Delete from cache-file /var/mnogo319/tree/12/B/12B3 Delete from cache-file /var/mnogo319/tree/12/B/12B31000 Delete from cache-file /var/mnogo319/tree/12/B/12B3 I am attaching the three files that could be involved, namely tree/12/B/12B31000, 12B32000 and 12B35000. I'll install 3.1.10 now, try it on the old word database and see what it does. If it doesn't work, I'll remove the word database and start again from scratch. I'll try to make detailed notes this time and report back. Z -- oracle@everywhere: The ephemeral source of the eternal truth... wordfiles.tar.gz
Re: UdmSearch: Webboard: Segfault (grrr)
in my tests your 3 little files wouldn't make a difference. he would have to run splitter -p and splitter on all the files starting from the first original RAW file, including all the 31 MB file. i believe in my case it was the original 31mb file which caused the problem. while processing the first 31mb file, it didn't core dump, but all the preceeding files did cause core dumps at unpredictable times, but often at the same location initially (i.e. 77C3000...) therefore, in order to recreate the scenario, one would have to start from the first raw file. i've tar-ed up such a series of file for Alex. perhaps he'll be able to find out why. my hypothesis is an array or buffer overflow in splitter.c. --- Zenon Panoussis [EMAIL PROTECTED] wrote: Alexander Barkov skrev: Can you guys give us a log file produced by splitter -p which caused crash? We can't reproduce crash :-( Huh? splitter doesn't accept the -v5 argument, so it won't give more detailed logs than the normal ones. The only log I had, that to stdout, is the one I included with my first posting in this thread: Delete from cache-file /var/mnogo319/tree/12/B/12BFD000 /var/mnogo319/tree/12/C/12C1 old: 69 new: 1 total: 70 ./run-splitter: line 118: 18790 Segmentation fault (core dumped) $SPLITTER Until this point everything was normal. Anyway, as I said, I strongly suspect corruption in the word database. On a previous occasion when this happened, I deleted the entire tree/* directory structure and started all over again. Splitter worked like a dream with both small and big log files until one of the following occured: 1. I stopped indexer with ^C and then run splitter or 2. Splitter had to work itself through some 31 MB files. (These files are not all the same size; they tend to get slightly bigger the more they are, i.e. something like this: 0001.log31.500.000 bytes 0002.log31.550.000 bytes 0003.log31.580.000 bytes sort of). Unfortunately I haven't been making notes, so I can't tell for sure which one of these two things happened before things stopped working. I tried splitter again today with ./splitter splitter.log . It went in a very normal way *almost* as far as yesterday, and then hang so badly that not even kill -9 could kill it. The log of this run looks like snip normal operation Delete from cache-file /var/mnogo319/tree/12/B/12B27000 Delete from cache-file /var/mnogo319/tree/12/B/12B2D000 Delete from cache-file /var/mnogo319/tree/12/B/12B3 Delete from cache-file /var/mnogo319/tree/12/B/12B31000 Delete from cache-file /var/mnogo319/tree/12/B/12B3 I am attaching the three files that could be involved, namely tree/12/B/12B31000, 12B32000 and 12B35000. I'll install 3.1.10 now, try it on the old word database and see what it does. If it doesn't work, I'll remove the word database and start again from scratch. I'll try to make detailed notes this time and report back. Z -- oracle@everywhere: The ephemeral source of the eternal truth... ATTACHMENT part 2 application/x-gzip name=wordfiles.tar.gz __ Do You Yahoo!? Get personalized email addresses from Yahoo! Mail - only $35 a year! http://personal.mail.yahoo.com/ __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Could you please put zipped /var/mnogo319/tree/12/B/12BFD000 and a file /splitter/XXX.wrd with correspondent XXX.del which produce crash somewhere on the net? Zenon Panoussis wrote: Alexander Barkov skrev: Can you guys give us a log file produced by splitter -p which caused crash? We can't reproduce crash :-( Huh? splitter doesn't accept the -v5 argument, so it won't give more detailed logs than the normal ones. The only log I had, that to stdout, is the one I included with my first posting in this thread: Delete from cache-file /var/mnogo319/tree/12/B/12BFD000 /var/mnogo319/tree/12/C/12C1 old: 69 new: 1 total: 70 ./run-splitter: line 118: 18790 Segmentation fault (core dumped) $SPLITTER Until this point everything was normal. Anyway, as I said, I strongly suspect corruption in the word database. On a previous occasion when this happened, I deleted the entire tree/* directory structure and started all over again. Splitter worked like a dream with both small and big log files until one of the following occured: 1. I stopped indexer with ^C and then run splitter or 2. Splitter had to work itself through some 31 MB files. (These files are not all the same size; they tend to get slightly bigger the more they are, i.e. something like this: 0001.log31.500.000 bytes 0002.log31.550.000 bytes 0003.log31.580.000 bytes sort of). Unfortunately I haven't been making notes, so I can't tell for sure which one of these two things happened before things stopped working. I tried splitter again today with ./splitter splitter.log . It went in a very normal way *almost* as far as yesterday, and then hang so badly that not even kill -9 could kill it. The log of this run looks like snip normal operation Delete from cache-file /var/mnogo319/tree/12/B/12B27000 Delete from cache-file /var/mnogo319/tree/12/B/12B2D000 Delete from cache-file /var/mnogo319/tree/12/B/12B3 Delete from cache-file /var/mnogo319/tree/12/B/12B31000 Delete from cache-file /var/mnogo319/tree/12/B/12B3 I am attaching the three files that could be involved, namely tree/12/B/12B31000, 12B32000 and 12B35000. I'll install 3.1.10 now, try it on the old word database and see what it does. If it doesn't work, I'll remove the word database and start again from scratch. I'll try to make detailed notes this time and report back. Z -- oracle@everywhere: The ephemeral source of the eternal truth... Name: wordfiles.tar.gz wordfiles.tar.gzType: Unix Tape Archive (application/x-tar) Encoding: base64 __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Zenon Panoussis wrote: Alexander Barkov skrev: Could you please put zipped /var/mnogo319/tree/12/B/12BFD000 and a file /splitter/XXX.wrd with correspondent XXX.del which produce crash somewhere on the net? http://search.freewinds.cx/logs/logs.tar.gz Not Found The requested URL /logs/logs.tar.gz was not found on this server. Apache/1.3.14 Server at search.freewinds.cx Port 80 __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Zenon Panoussis skrev: And a really HARD hang at the same place as before. So hard that I can't even kill splitter. BTW, although I couldn't kill splitter, I did find a core dump in sbin. Here's the backtrace: # gdb splitter core GNU gdb 5.0 snip copyright This GDB was configured as "i386-redhat-linux"... Core was generated by `./splitter'. Program terminated with signal 11, Segmentation fault. Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done. Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10 Reading symbols from /lib/libm.so.6...done. Loaded symbols for /lib/libm.so.6 Reading symbols from /usr/lib/libz.so.1...done. Loaded symbols for /usr/lib/libz.so.1 Reading symbols from /lib/libc.so.6...done. Loaded symbols for /lib/libc.so.6 Reading symbols from /lib/libcrypt.so.1...done. Loaded symbols for /lib/libcrypt.so.1 Reading symbols from /lib/libnsl.so.1...done. Loaded symbols for /lib/libnsl.so.1 Reading symbols from /lib/ld-linux.so.2...done. Loaded symbols for /lib/ld-linux.so.2 #0 0x8057d15 in UdmSplitCacheLog (log=300) at cache.c:635 635 logwords[count+j].wrd_id=table[w].wrd_id; (gdb) backtrace #0 0x8057d15 in UdmSplitCacheLog (log=300) at cache.c:635 #1 0x8049f29 in main (argc=1, argv=0xbac4) at splitter.c:74 #2 0x4009bbfc in __libc_start_main (main=0x8049e20 main, argc=1, ubp_av=0xbac4, init=0x8049630 _init, fini=0x8064f7c _fini, rtld_fini=0x4000d674 _dl_fini, stack_end=0xbabc) at ../sysdeps/generic/libc-start.c:118 Z -- oracle@everywhere: The ephemeral source of the eternal truth... __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
Zenon Panoussis skrev: I'll delete the entire tree directory and start re-indexing from scratch. I'll make and split a small file first, ca 5 MB, then a 31 MB file, if that works yet another 31 MB file, and so on until I get in problems again. Will report back later this evening. First step OK: - indexed for a while, created 2.8 MB log file - split successfully and even got the FFF directory: snip /var/mnogo3110/tree/FF/F/FFFE6000 old: 0 new: 2 total: 2 /var/mnogo3110/tree/FF/F/FFFE7000 old: 0 new: 24 total: 24 Now for 31 MB adventures :) Z -- oracle@everywhere: The ephemeral source of the eternal truth... -- oracle@everywhere: The ephemeral source of the eternal truth... __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]
Re: UdmSearch: Webboard: Segfault (grrr)
i reported this problems a while back. i believe it's being worked on. atleast the recently found the bug why it wasn't splitting out to FFF. the seg fault happens during the splitter process and not index. i've been splitter when the logs are at about 2 MB and i've not had splitter core dump on me yet. but before when i let the log file build up to about 15 to 30 MB, i had that core dump problem. i hope this will be resolved soon because it's a pain in the behind. ;-( --- Zenon Panoussis [EMAIL PROTECTED] wrote: Author: Zenon Panoussis Email: [EMAIL PROTECTED] Message: RH Linux 7.0, search 3.1.9, MySQL 3.23.29, cache mode, with the new patches for cache.c and sql.c. It happens all the time. It started happening when "maximum size" 31 MB log files were indexed, but by now it happens on any indexing, no matter how big or small the log file, as if the database somehow was corrupt: Delete from cache-file /var/mnogo319/tree/12/B/12BFD000 /var/mnogo319/tree/12/C/12C1 old: 69 new: 1 total: 70 ./run-splitter: line 118: 18790 Segmentation fault (core dumped) $SPLITTER For the same log file it always crashes at the same index file (e.g. every time I try to reindex 12345678.log it will crash at tree/12/3/4567000). If I delete the log file and start again with a new log file, it will crash at a different place, but it will still be consistent in crashing at the same place every time. And the backtrace: # gdb splitter core GNU gdb 5.0 [...] This GDB was configured as "i386-redhat-linux"... Core was generated by `/usr/local/mnogo319/sbin/splitter'. Program terminated with signal 11, Segmentation fault. Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done. Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10 Reading symbols from /lib/libm.so.6...done. Loaded symbols for /lib/libm.so.6 Reading symbols from /usr/lib/libz.so.1...done. Loaded symbols for /usr/lib/libz.so.1 Reading symbols from /lib/libc.so.6...done. Loaded symbols for /lib/libc.so.6 Reading symbols from /lib/libcrypt.so.1...done. Loaded symbols for /lib/libcrypt.so.1 Reading symbols from /lib/libnsl.so.1...done. Loaded symbols for /lib/libnsl.so.1 Reading symbols from /lib/ld-linux.so.2...done. Loaded symbols for /lib/ld-linux.so.2 #0 0x8059061 in UdmSplitCacheLog (log=300) at cache.c:552 552 logwords[count+j].wrd_id=table[w].wrd_id; (gdb) backtrace #0 0x8059061 in UdmSplitCacheLog (log=300) at cache.c:552 #1 0x8049e89 in main (argc=1, argv=0xba94) at splitter.c:70 #2 0x4009bbfc in __libc_start_main (main=0x8049d80 main, argc=1, ubp_av=0xba94, init=0x80495bc _init, fini=0x8065b7c _fini, rtld_fini=0x4000d674 _dl_fini, stack_end=0xba8c) at ../sysdeps/generic/libc-start.c:118 Since 3.1.10 is coming out today, I'll try it and see if things work better. If not, I'll post more bad news later ;) Z Reply: http://search.mnogo.ru/board/message.php?id=1320 __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED] __ Do You Yahoo!? Get personalized email addresses from Yahoo! Mail - only $35 a year! http://personal.mail.yahoo.com/ __ If you want to unsubscribe send "unsubscribe udmsearch" to [EMAIL PROTECTED]