Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-15 Thread Alexander Barkov

  Hello!

We finally found a bug in cache.c. New version is in attachement.
Everybody who has problems with splitter's crashes are welcome to test. 
Please, give feedback!

#include "udm_config.h"
#include stdio.h
#include stdlib.h
#include string.h
#include errno.h
#ifdef HAVE_UNISTD_H
#include unistd.h
#endif
#include sys/types.h
#include fcntl.h
#include sys/stat.h
#include dirent.h

#include "udm_common.h"
#include "udm_utils.h"
#include "udm_charset.h"
#include "udm_spell.h"
#include "udm_cache.h"
#include "udm_crc32.h"
#include "udm_indexer.h"
#include "udm_db.h"
#include "udm_boolean.h"
#include "udm_searchtool.h"
#include "udm_agent.h"
#include "udm_xmalloc.h"
#include "udm_stopwords.h"
#include "udm_proto.h"

#ifdef HAVE_WINSOCK_H
#include winsock.h
#endif
#ifdef HAVE_SYS_SOCKET_H
#include sys/socket.h
#endif
#ifdef HAVE_NETINET_IN_H
#include netinet/in.h
#endif
#ifdef HAVE_ARPA_INET_H
#include arpa/inet.h
#endif
#ifdef HAVE_NETDB_H
#include netdb.h
#endif

#ifndef INADDR_NONE
#define INADDR_NONE ((unsigned long) -1)
#endif



#define DEBUG_SEARCH 1


static int open_host(char *hostname,int port, int timeout)
{
int net;
struct hostent *host;
struct sockaddr_in sa_in;

bzero((char*)sa_in,sizeof(sa_in));

if (port){
sa_in.sin_port= htons((u_short)port);
}else{
return(UDM_NET_ERROR);
}

if ((sa_in.sin_addr.s_addr=inet_addr(hostname)) != INADDR_NONE){
sa_in.sin_family=AF_INET;
}else{
host=gethostbyname(hostname);
if (host){
sa_in.sin_family=host-h_addrtype;
memcpy(sa_in.sin_addr, host-h_addr, (size_t)host-h_length);
}else{
return(UDM_NET_CANT_RESOLVE);
}
}
net=socket(AF_INET, SOCK_STREAM, 0);

if(connect(net, (struct sockaddr *)sa_in, sizeof (sa_in)))
return(UDM_NET_CANT_CONNECT);

return(net);
}


/***/

#define LOGDIR  "raw"
#define TREEDIR "tree"
#define SPLDIR  "splitter"

/**/


typedef struct udm_cache_table {
int wrd_id;
int weight;
int pos;
int len;
} UDM_CACHETABLE;

typedef struct udm_cache_hheader {
int ntables;
int version;
} UDM_CACHEHEADER;

typedef struct udm_cacheword_struct {
int url_id;
#ifdef UDM_STORE_CACHE_WRDPOS
int wrd_pos;
#endif
#ifdef UDM_STORE_CACHE_SITEID
int site_id;
#endif  
#ifdef UDM_STORE_CACHE_CATEGORY
int category;
#endif
#ifdef UDM_STORE_CACHE_TAG
int tag;
#endif
} UDM_CACHEWORD;


/ Convert category string into 32 bit number */
static void UdmDecodeCatStr(const char * cat_str, int * cat, int * mask){
unsigned int t[5];
char str[128]="";

strcpy(str,cat_str);
strcat(str,"");
str[10]=0;
sscanf(str,"%02x%02x%02x%02x%02x",t+0,t+1,t+2,t+3,t+4);

*mask= (int)( (t[0]?0x7F25:0) | (t[1]?0x7F18:0) | 
(t[2]?0x3F12:0) | (t[3]?0x3F6:0) | (t[4]?0x3F0:0));

*cat= (int)((t[0]25) | (t[1]18) | (t[2]12) | (t[3]6) | (t[4]0));
}

/*** Sort functions **/
/* Sort SEARCHWORD by url_id order */
static int cmpurlid(const void *s1,const void *s2){
return(((const UDM_SEARCHWORD*)s1)-url_id-((const 
UDM_SEARCHWORD*)s2)-url_id);
}

/* Function to sort LOGWORD list in (WRD_ID,TIME_STAMP) order */
static int cmplog(const void *s1,const void *s2){
unsigned int n1,n2;
n1=((const UDM_LOGWORD*)s1)-wrd_id;
n2=((const UDM_LOGWORD*)s2)-wrd_id;
if(n1==n2){
n1=((const UDM_LOGWORD*)s1)-url_id;
n2=((const UDM_LOGWORD*)s2)-url_id;
if(n1==n2){
n1=((const UDM_LOGWORD*)s2)-stamp;
n2=((const UDM_LOGWORD*)s1)-stamp;
}
}
if(n1n2)return(-1);
if(n1n2)return(1);
return(0);
}

/* Function to sort LOGDEL list in URL_ID order */
static int cmpurldellog(const void *s1,const void *s2){
unsigned int n1,n2;
n1=((const UDM_LOGDEL*)s1)-url_id;
n2=((const UDM_LOGDEL*)s2)-url_id;
if(n1==n2){
n1=((const UDM_LOGDEL*)s2)-stamp;
n2=((const UDM_LOGDEL*)s1)-stamp;
}
if(n1n2)return(-1);
if(n1n2)return(1);
return(0);
}

/* Function to sort CACHEWORD list in URL_ID order */
static int cmpcache(const void *s1,const void *s2){
int n1,n2;
n1=((const UDM_LOGWORD*)s1)-wrd_id;
n2=((const UDM_LOGWORD*)s2)-wrd_id;
if(n1==n2){
n1=((const UDM_LOGWORD*)s1)-weight;
n2=((const UDM_LOGWORD*)s2)-weight;
if(n1==n2){
n1=((const UDM_LOGWORD*)s1)-url_id;
n2=((const 

Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-15 Thread Alexander Barkov

Caffeinate The World wrote:
 
 --- Alexander Barkov [EMAIL PROTECTED] wrote:
  Caffeinate The World wrote:
  
   --- Alexander Barkov [EMAIL PROTECTED] wrote:
  Hello!
   
We finally found a bug in cache.c. New version is in attachement.
Everybody who has problems with splitter's crashes are welcome to
test.
  
   should the 'tree' directory be removed? can we split the raw log
  files
   we have thus far or is re-indexing necessary?
 
 
  I hope it should work without having to remove tree directory.
  But better to remove it. It is safe to use old /raw  and /splitter
  files
  without having to reindex.
 
 ok. what exactly was the bug?


There was actually two  bugs. The first  one because of table[4096]
in sql.c. Sometimes it may became more large. Now dinamic realloc'ing
has been added.

The second one that cmpcache() function passed to qsort gave ordering
slightly different with supposed one.


BTW. We found this using your logs for NetBSD/Alpha. Thanks!
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-15 Thread Caffeinate The World


--- Alexander Barkov [EMAIL PROTECTED] wrote:
 Caffeinate The World wrote:
  
  --- Alexander Barkov [EMAIL PROTECTED] wrote:
 Hello!
  
   We finally found a bug in cache.c. New version is in attachement.
   Everybody who has problems with splitter's crashes are welcome to
   test.
  
  should the 'tree' directory be removed? can we split the raw log
 files
  we have thus far or is re-indexing necessary?
 
 
 I hope it should work without having to remove tree directory.
 But better to remove it. It is safe to use old /raw  and /splitter
 files
 without having to reindex.

ok. what exactly was the bug?

__
Do You Yahoo!?
Get personalized email addresses from Yahoo! Mail - only $35 
a year!  http://personal.mail.yahoo.com/
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-15 Thread Alexander Barkov

 
 There was actually two  bugs. The first  one because of table[4096]
 in sql.c. Sometimes it may became more large. Now dinamic realloc'ing
 has been added.

Opps. I mean in cache.c, not sql.c


 The second one that cmpcache() function passed to qsort gave ordering
 slightly different with supposed one.
 
 BTW. We found this using your logs for NetBSD/Alpha. Thanks!
 __
 If you want to unsubscribe send "unsubscribe udmsearch"
 to [EMAIL PROTECTED]
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-15 Thread Zenon Panoussis



Alexander Barkov skrev:
 

 We finally found a bug in cache.c. New version is in attachement.
 Everybody who has problems with splitter's crashes are welcome to test.
 Please, give feedback!

You guys are great! I'll re-compile and get back to you with 
reports. 

BTW, can I remove http://search.freewinds.cx/garbage_in_sbin.tar.gz 
now? 

Z


-- 
oracle@everywhere: The ephemeral source of the eternal truth...
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-15 Thread Alexander Barkov

Zenon Panoussis wrote:
 
 Alexander Barkov skrev:
 
 
  We finally found a bug in cache.c. New version is in attachement.
  Everybody who has problems with splitter's crashes are welcome to test.
  Please, give feedback!
 
 You guys are great! I'll re-compile and get back to you with
 reports.
 
 BTW, can I remove http://search.freewinds.cx/garbage_in_sbin.tar.gz
 now?


I think yes.
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-15 Thread Caffeinate The World


--- Alexander Barkov [EMAIL PROTECTED] wrote:
   Hello!
 
 We finally found a bug in cache.c. New version is in attachement.
 Everybody who has problems with splitter's crashes are welcome to
 test. 

should the 'tree' directory be removed? can we split the raw log files
we have thus far or is re-indexing necessary?

__
Do You Yahoo!?
Get personalized email addresses from Yahoo! Mail - only $35 
a year!  http://personal.mail.yahoo.com/
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-15 Thread Zenon Panoussis



Alexander Barkov skrev:
 

 We finally found a bug in cache.c. New version is in attachement.
 Everybody who has problems with splitter's crashes are welcome to test.
 Please, give feedback!

Oops. Something else is not OK: 

cache.c:687:87: warning: #ifdef with no argument
cache.c:692:87: warning: #ifdef with no argument
cache.c:697:87: warning: #ifdef with no argument
cache.c:702:87: warning: #ifdef with no argument
cache.c: In function `UdmFindCache':
cache.c:969: parse error before `?'
cache.c:982: `real_num' undeclared (first use in this function)
cache.c:982: (Each undeclared identifier is reported only once
cache.c:982: for each function it appears in.)
cache.c:994: `fd1' undeclared (first use in this function)
cache.c:996: `group' undeclared (first use in this function)
cache.c:1000: `group_num' undeclared (first use in this function)
cache.c: At top level:
cache.c:1011: initializer element is not constant
cache.c:1011: warning: data definition has no type or storage class
cache.c:1012: parse error before string constant
cache.c:1013: parse error before string constant
cache.c:1013: warning: data definition has no type or storage class
cache.c:1014: redefinition of `ticks'
cache.c:1011: `ticks' previously defined here
cache.c:1014: initializer element is not constant
cache.c:1014: warning: data definition has no type or storage class
cache.c:1015: parse error before string constant
cache.c:1015: warning: data definition has no type or storage class
cache.c:1024: `i' undeclared here (not in a function)
cache.c:1024: parse error before `.'
cache.c:1030: register name not specified for `p'
cache.c:1032: parse error before `if'
cache.c:1035: `pmerg' undeclared here (not in a function)
cache.c:1035: `pmerg' undeclared here (not in a function)
cache.c:1035: warning: data definition has no type or storage class
cache.c:1036: parse error before `'
cache.c:1043: `k' undeclared here (not in a function)
cache.c:1043: warning: data definition has no type or storage class
cache.c:1044: parse error before `}'
cache.c:1046: conflicting types for `p'
cache.c:1030: previous declaration of `p'
cache.c:1046: `pmerg' undeclared here (not in a function)
cache.c:1046: warning: data definition has no type or storage class
cache.c:1047: parse error before `'
cache.c:1048: parse error before `-'
cache.c:1058: warning: initialization makes integer from pointer without
a cast
cache.c:1058: warning: data definition has no type or storage class
cache.c:1058: parse error before `}'
cache.c:1061: redefinition of `ticks'
cache.c:1014: `ticks' previously defined here
cache.c:1061: initializer element is not constant
cache.c:1061: warning: data definition has no type or storage class
cache.c:1063: parse error before string constant
cache.c:1071: warning: parameter names (without types) in function
declaration
cache.c:1071: conflicting types for `UdmGroupByURL'
../include/udm_searchtool.h:7: previous declaration of `UdmGroupByURL'
cache.c:1071: warning: data definition has no type or storage class
cache.c:1072: parse error before `}'
make[1]: *** [cache.lo] Error 1
make[1]: Leaving directory `/root/mnogosearch-3.1.10/src'
make: *** [all-recursive] Error 1


-- 
oracle@everywhere: The ephemeral source of the eternal truth...
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-15 Thread Zenon Panoussis



Zenon Panoussis skrev:
 

 Oops. Something else is not OK:
 
 cache.c:687:87: warning: #ifdef with no argument
[etc]

I think that the mailer is responsible for this. There are 
lots of broken lines in the code, that shouldn't be broken. 
Perhaps it's better to attach the file in .gz format instead 
of text.

Z


-- 
oracle@everywhere: The ephemeral source of the eternal truth...
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-15 Thread Caffeinate The World

i didn't get this error on my NetBSD/Alpha. compile was fine.
what system are you on?

--- Zenon Panoussis [EMAIL PROTECTED] wrote:
 
 
 Alexander Barkov skrev:
  
 
  We finally found a bug in cache.c. New version is in attachement.
  Everybody who has problems with splitter's crashes are welcome to
 test.
  Please, give feedback!
 
 Oops. Something else is not OK: 
 
 cache.c:687:87: warning: #ifdef with no argument
 cache.c:692:87: warning: #ifdef with no argument
 cache.c:697:87: warning: #ifdef with no argument
 cache.c:702:87: warning: #ifdef with no argument
 cache.c: In function `UdmFindCache':
 cache.c:969: parse error before `?'
 cache.c:982: `real_num' undeclared (first use in this function)
 cache.c:982: (Each undeclared identifier is reported only once
 cache.c:982: for each function it appears in.)
 cache.c:994: `fd1' undeclared (first use in this function)
 cache.c:996: `group' undeclared (first use in this function)
 cache.c:1000: `group_num' undeclared (first use in this function)
 cache.c: At top level:
 cache.c:1011: initializer element is not constant
 cache.c:1011: warning: data definition has no type or storage class
 cache.c:1012: parse error before string constant
 cache.c:1013: parse error before string constant
 cache.c:1013: warning: data definition has no type or storage class
 cache.c:1014: redefinition of `ticks'
 cache.c:1011: `ticks' previously defined here
 cache.c:1014: initializer element is not constant
 cache.c:1014: warning: data definition has no type or storage class
 cache.c:1015: parse error before string constant
 cache.c:1015: warning: data definition has no type or storage class
 cache.c:1024: `i' undeclared here (not in a function)
 cache.c:1024: parse error before `.'
 cache.c:1030: register name not specified for `p'
 cache.c:1032: parse error before `if'
 cache.c:1035: `pmerg' undeclared here (not in a function)
 cache.c:1035: `pmerg' undeclared here (not in a function)
 cache.c:1035: warning: data definition has no type or storage class
 cache.c:1036: parse error before `'
 cache.c:1043: `k' undeclared here (not in a function)
 cache.c:1043: warning: data definition has no type or storage class
 cache.c:1044: parse error before `}'
 cache.c:1046: conflicting types for `p'
 cache.c:1030: previous declaration of `p'
 cache.c:1046: `pmerg' undeclared here (not in a function)
 cache.c:1046: warning: data definition has no type or storage class
 cache.c:1047: parse error before `'
 cache.c:1048: parse error before `-'
 cache.c:1058: warning: initialization makes integer from pointer
 without
 a cast
 cache.c:1058: warning: data definition has no type or storage class
 cache.c:1058: parse error before `}'
 cache.c:1061: redefinition of `ticks'
 cache.c:1014: `ticks' previously defined here
 cache.c:1061: initializer element is not constant
 cache.c:1061: warning: data definition has no type or storage class
 cache.c:1063: parse error before string constant
 cache.c:1071: warning: parameter names (without types) in function
 declaration
 cache.c:1071: conflicting types for `UdmGroupByURL'
 ../include/udm_searchtool.h:7: previous declaration of
 `UdmGroupByURL'
 cache.c:1071: warning: data definition has no type or storage class
 cache.c:1072: parse error before `}'
 make[1]: *** [cache.lo] Error 1
 make[1]: Leaving directory `/root/mnogosearch-3.1.10/src'
 make: *** [all-recursive] Error 1
 
 
 -- 
 oracle@everywhere: The ephemeral source of the eternal truth...
 __
 If you want to unsubscribe send "unsubscribe udmsearch"
 to [EMAIL PROTECTED]
 


__
Do You Yahoo!?
Get personalized email addresses from Yahoo! Mail - only $35 
a year!  http://personal.mail.yahoo.com/
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-15 Thread hanksdc

Is this file to be used with 3.1.9 sources, or 3.1.10? (Either is fine - I can adjust 
as necessary quite easily).

Thanks for the fix. I have over a million urls inserted and climbing. :-)

-- Dan


On Thu, 15 Feb 2001, Alexander Barkov wrote:

 Dan,please take new cache.c and recompile everything.
 It should fix the problem.


 [EMAIL PROTECTED] wrote:
 
  I just have to put in my encounters here, because they seem very similar. I get a 
large amount of information indexed, but upon trying to run splitter, it will core 
dump somewhere midway through, and on one round left wierd directories in the 
$VAR/raw directory:
 
  [root@spider raw]# ls -al
  total 32988
  drwxr-xr-x   5 root root 8192 Feb 14 04:13 .
  drwxr-xr-x   6 root root 4096 Feb 13 01:58 ..
  drwxr-xr-x   3 root root 4096 Feb 13 03:12 64
  -rw---   1 root root 33132544 Feb 14 04:13 core
  -rw-r--r--   1 root root 8464 Feb 14 04:22 del.log
  -rw-r--r--   1 root root   566272 Feb 14 04:22 wrd.log
  drwxr-xr-x   3 root root 4096 Feb 13 03:58 ??64
  drwxr-xr-x   3 root root 4096 Feb 13 06:06 ??18
  [root@spider raw]#
 
  Unfortunately I wasn't thinking and I deleted all the .done files, and not all of 
the logs were split. Well, back to indexing...
 
  I'm using 3.1.9 on Linux/Oracle
 
  -- Dan Hanks
 
  On Wed, 14 Feb 2001, Zenon Panoussis wrote:
 
  
  
   Zenon Panoussis skrev:
   
  
By now, I have almost 1 GB of indexed files, 4 indexer
crashes and one splitter crash. I'll do the debugging and
post its output tomorrow.
  
   ===
   # gdb indexer core.indexer.01
   GNU gdb 5.0
   Copyright 2000 Free Software Foundation, Inc.
   GDB is free software, covered by the GNU General Public License, and you
   are
   welcome to change it and/or distribute copies of it under certain
   conditions.
   Type "show copying" to see the conditions.
   There is absolutely no warranty for GDB.  Type "show warranty" for
   details.
   This GDB was configured as "i386-redhat-linux"...
   Core was generated by `./indexer -m -s 200'.
   Program terminated with signal 11, Segmentation fault.
   Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done.
   Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10
   Reading symbols from /lib/libm.so.6...done.
   Loaded symbols for /lib/libm.so.6
   Reading symbols from /usr/lib/libz.so.1...done.
   Loaded symbols for /usr/lib/libz.so.1
   Reading symbols from /lib/libc.so.6...done.
   Loaded symbols for /lib/libc.so.6
   Reading symbols from /lib/libcrypt.so.1...done.
   Loaded symbols for /lib/libcrypt.so.1
   Reading symbols from /lib/libnsl.so.1...done.
   Loaded symbols for /lib/libnsl.so.1
   Reading symbols from /lib/ld-linux.so.2...done.
   Loaded symbols for /lib/ld-linux.so.2
   Reading symbols from /lib/libnss_files.so.2...done.
   Loaded symbols for /lib/libnss_files.so.2
   Reading symbols from /lib/libnss_nisplus.so.2...done.
   Loaded symbols for /lib/libnss_nisplus.so.2
   Reading symbols from /lib/libnss_nis.so.2...done.
   Loaded symbols for /lib/libnss_nis.so.2
   Reading symbols from /lib/libnss_dns.so.2...done.
   Loaded symbols for /lib/libnss_dns.so.2
   Reading symbols from /lib/libresolv.so.2...done.
   Loaded symbols for /lib/libresolv.so.2
   #0  0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at
   crc32.c:97
   97  _CRC32_(crc, *p) ;
   (gdb) print crc
   $1 = 1928826335
   (gdb) print p
   $2 = 0x40431000 Address 0x40431000 out of bounds
  
   ===
  
   # gdb indexer core.indexer.02
   snip loading
   #0  0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at
   crc32.c:97
   97  _CRC32_(crc, *p) ;
   (gdb) print crc
   $1 = 835566978
   (gdb) print p
   $2 = 0x40404000 Address 0x40404000 out of bounds
  
   ===
  
   # gdb indexer core.indexer.03
   snip loading
   #0  0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at
   crc32.c:97
   97  _CRC32_(crc, *p) ;
   (gdb) print crc
   $1 = 2869617068
   (gdb) print p
   $2 = 0x40404000 Address 0x40404000 out of bounds
  
   ===
  
   # gdb indexer core.indexer.04
   snip loading
   (gdb) print crc
   $1 = 1253677059
   (gdb) print p
   $2 = 0x40431000 Address 0x40431000 out of bounds
  
   ===
  
   And finally the splitter:
  
   # gdb splitter core.splitter.01
   snip copyright
   This GDB was configured as "i386-redhat-linux"...
   Core was generated by `/usr/local/mnogo3110/sbin/splitter'.
   Program terminated with signal 11, Segmentation fault.
   Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done.
   Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10
   Reading symbols from /lib/libm.so.6...done.
   Loaded symbols for /lib/libm.so.6
   Reading symbols from /usr/lib/libz.so.1...done.
   Loaded symbols for /usr/lib/libz.so.1
   Reading symbols 

Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-14 Thread Zenon Panoussis



Caffeinate The World skrev:
 

 i've been going through this and back again time and time again. what
 would really be nice is indexer save the logs in a format that's easy
 to use again. for instance, you can use the format re-index to sql etc.
 
 or if you want to reindex again, you don't have to crawl through all
 the external websites. saves a lot of time and we can debug faster.

I'm not sure what you mean here. The Mirror statement does just that 
(and luckily, I had an almost complete mirror already). 

Z


-- 
oracle@everywhere: The ephemeral source of the eternal truth...
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-14 Thread Alexander Barkov

Zenon Panoussis wrote:
 
 Caffeinate The World skrev:
 
 
  i've been going through this and back again time and time again. what
  would really be nice is indexer save the logs in a format that's easy
  to use again. for instance, you can use the format re-index to sql etc.
 
  or if you want to reindex again, you don't have to crawl through all
  the external websites. saves a lot of time and we can debug faster.
 
 I'm not sure what you mean here. The Mirror statement does just that
 (and luckily, I had an almost complete mirror already).


Yes, mirroring features are for the fast reindexing.
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-14 Thread Alexander Barkov

 i completely forgot about this feature!!! i read about it when i first
 started using mnogosearch, but never bothered to use it.
 
 with mirror feature, wouldn't it be easy to implement Google's "cache"
 feature where the user can view a cache of the page from the last time
 you indexed.

I think it's possible. Moreover, we may use zlib to compress those
files,
so they'll use less space.
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-14 Thread Alexander Barkov

Alexander Barkov wrote:
 
  i completely forgot about this feature!!! i read about it when i first
  started using mnogosearch, but never bothered to use it.
 
  with mirror feature, wouldn't it be easy to implement Google's "cache"
  feature where the user can view a cache of the page from the last time
  you indexed.
 
 I think it's possible. Moreover, we may use zlib to compress those
 files,
 so they'll use less space.


The only one disadvantage is that it will not work on huge
search engines with millions documents. There is a limit on total
file number on file system in most unixes.
For example, my 30G /usr partition on FreeBSD box can create about 8 mln
files.
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-14 Thread Caffeinate The World


--- Alexander Barkov [EMAIL PROTECTED] wrote:
 Alexander Barkov wrote:
  
   i completely forgot about this feature!!! i read about it when i
 first
   started using mnogosearch, but never bothered to use it.
  
   with mirror feature, wouldn't it be easy to implement Google's
 "cache"
   feature where the user can view a cache of the page from the last
 time
   you indexed.
  
  I think it's possible. Moreover, we may use zlib to compress those
  files,
  so they'll use less space.
 
 
 The only one disadvantage is that it will not work on huge
 search engines with millions documents. There is a limit on total
 file number on file system in most unixes.
 For example, my 30G /usr partition on FreeBSD box can create about 8
 mln
 files.

is that a per file system limit or per unix box limit?

__
Do You Yahoo!?
Get personalized email addresses from Yahoo! Mail - only $35 
a year!  http://personal.mail.yahoo.com/
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-14 Thread Alexander Barkov

Caffeinate The World wrote:
  The only one disadvantage is that it will not work on huge
  search engines with millions documents. There is a limit on total
  file number on file system in most unixes.
  For example, my 30G /usr partition on FreeBSD box can create about 8
  mln
  files.
 
 is that a per file system limit or per unix box limit?
 

Per file system limit.
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-14 Thread Caffeinate The World


--- Alexander Barkov [EMAIL PROTECTED] wrote:
 Caffeinate The World wrote:
   The only one disadvantage is that it will not work on huge
   search engines with millions documents. There is a limit on total
   file number on file system in most unixes.
   For example, my 30G /usr partition on FreeBSD box can create
 about 8
   mln
   files.
  
  is that a per file system limit or per unix box limit?
  
 
 Per file system limit.

couldn't you do something like mount multiple FS:

sd0a /data/part1
sd1a /data/part2
...
sdna /data/partn

wouldn't that work?


__
Do You Yahoo!?
Get personalized email addresses from Yahoo! Mail - only $35 
a year!  http://personal.mail.yahoo.com/
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-14 Thread hanksdc

 
 
  The only one disadvantage is that it will not work on huge
  search engines with millions documents. There is a limit on total
  file number on file system in most unixes.
  For example, my 30G /usr partition on FreeBSD box can create about 8
  mln
  files.

 is that a per file system limit or per unix box limit?


Generally it's a limitation with how the file system has been created. Different 
parameters when creating the filesystem will yield different results. So the number of 
available inodes is really dependent on the parameters with which you create thge 
filesystem. On Linux it's dependent on parameters for block_size, bytes_per_inode, etc.

-- Dan
=
Daniel Hanks
Network Administrator
Web Services Group

About
The Human Internet

http://about.com/
1253 N. Research Way, Suite Q-2500.  Orem, UT 84097
ph: 801-437-6023fax: 801-437-6020
email: [EMAIL PROTECTED]



__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-14 Thread Zenon Panoussis



Zenon Panoussis skrev:
 

 By now, I have almost 1 GB of indexed files, 4 indexer
 crashes and one splitter crash. I'll do the debugging and
 post its output tomorrow.

===
# gdb indexer core.indexer.01
GNU gdb 5.0
Copyright 2000 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you
are
welcome to change it and/or distribute copies of it under certain
conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for
details.
This GDB was configured as "i386-redhat-linux"...
Core was generated by `./indexer -m -s 200'.
Program terminated with signal 11, Segmentation fault.
Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done.
Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10
Reading symbols from /lib/libm.so.6...done.
Loaded symbols for /lib/libm.so.6
Reading symbols from /usr/lib/libz.so.1...done.
Loaded symbols for /usr/lib/libz.so.1
Reading symbols from /lib/libc.so.6...done.
Loaded symbols for /lib/libc.so.6
Reading symbols from /lib/libcrypt.so.1...done.
Loaded symbols for /lib/libcrypt.so.1
Reading symbols from /lib/libnsl.so.1...done.
Loaded symbols for /lib/libnsl.so.1
Reading symbols from /lib/ld-linux.so.2...done.
Loaded symbols for /lib/ld-linux.so.2
Reading symbols from /lib/libnss_files.so.2...done.
Loaded symbols for /lib/libnss_files.so.2
Reading symbols from /lib/libnss_nisplus.so.2...done.
Loaded symbols for /lib/libnss_nisplus.so.2
Reading symbols from /lib/libnss_nis.so.2...done.
Loaded symbols for /lib/libnss_nis.so.2
Reading symbols from /lib/libnss_dns.so.2...done.
Loaded symbols for /lib/libnss_dns.so.2
Reading symbols from /lib/libresolv.so.2...done.
Loaded symbols for /lib/libresolv.so.2
#0  0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at
crc32.c:97
97  _CRC32_(crc, *p) ;
(gdb) print crc
$1 = 1928826335
(gdb) print p
$2 = 0x40431000 Address 0x40431000 out of bounds

===

# gdb indexer core.indexer.02
snip loading
#0  0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at
crc32.c:97
97  _CRC32_(crc, *p) ;
(gdb) print crc
$1 = 835566978
(gdb) print p
$2 = 0x40404000 Address 0x40404000 out of bounds

===

# gdb indexer core.indexer.03
snip loading
#0  0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at
crc32.c:97
97  _CRC32_(crc, *p) ;
(gdb) print crc
$1 = 2869617068
(gdb) print p
$2 = 0x40404000 Address 0x40404000 out of bounds

===

# gdb indexer core.indexer.04
snip loading
(gdb) print crc
$1 = 1253677059
(gdb) print p
$2 = 0x40431000 Address 0x40431000 out of bounds

===

And finally the splitter:

# gdb splitter core.splitter.01 
snip copyright
This GDB was configured as "i386-redhat-linux"...
Core was generated by `/usr/local/mnogo3110/sbin/splitter'.
Program terminated with signal 11, Segmentation fault.
Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done.
Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10
Reading symbols from /lib/libm.so.6...done.
Loaded symbols for /lib/libm.so.6
Reading symbols from /usr/lib/libz.so.1...done.
Loaded symbols for /usr/lib/libz.so.1
Reading symbols from /lib/libc.so.6...done.
Loaded symbols for /lib/libc.so.6
Reading symbols from /lib/libcrypt.so.1...done.
Loaded symbols for /lib/libcrypt.so.1
Reading symbols from /lib/libnsl.so.1...done.
Loaded symbols for /lib/libnsl.so.1
Reading symbols from /lib/ld-linux.so.2...done.
Loaded symbols for /lib/ld-linux.so.2
#0  0x8057d15 in UdmSplitCacheLog (log=118) at cache.c:635
635
logwords[count+j].wrd_id=table[w].wrd_id;
(gdb) print count
$1 = 13121220
(gdb) print count+j
$2 = 13125316
(gdb) print logwords
$3 = (UDM_LOGWORD *) 0x0
(gdb) print table[w]
$4 = {wrd_id = 1918989871, weight = 1869507887, pos = 825454439, len =
1949249585}
(gdb) print logwords[count+j]
Cannot access memory at address 0x15e7bd70

===

This time I'm keeping the core dumps, so let me know if there's 
anything else you want me to check.

Apart from this, I got some garbage directories with misnamed 
splitter files in them in sbin:

# pwd
/usr/local/mnogo3110/sbin
# ls -l
snip normal stuff
-rw-r--r--1 root root   457672 Feb 13 08:28 ???
drwxr-xr-x3 root root 4096 Feb 13 08:28 ???3F
-rw-r--r--1 root root   487224 Feb 13 08:27 mE?56.tmp

# ls -lR   
.:
total 403708
-rwxr-xr-x1 root root67163 Feb 12 18:02 cachelogd
-rw-r--r--1 root root0 Feb 14 06:18 cachelogd.out
-rw---1 root root  3862528 Feb 13 02:39 core.indexer.01
-rw---1 root root  3416064 Feb 13 06:06 core.indexer.02
-rw---1 root root  2953216 Feb 13 06:57 core.indexer.03
-rw---1 root root  3235840 Feb 13 07:51 core.indexer.04

Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-14 Thread hanksdc

I just have to put in my encounters here, because they seem very similar. I get a 
large amount of information indexed, but upon trying to run splitter, it will core 
dump somewhere midway through, and on one round left wierd directories in the $VAR/raw 
directory:

[root@spider raw]# ls -al
total 32988
drwxr-xr-x   5 root root 8192 Feb 14 04:13 .
drwxr-xr-x   6 root root 4096 Feb 13 01:58 ..
drwxr-xr-x   3 root root 4096 Feb 13 03:12 64
-rw---   1 root root 33132544 Feb 14 04:13 core
-rw-r--r--   1 root root 8464 Feb 14 04:22 del.log
-rw-r--r--   1 root root   566272 Feb 14 04:22 wrd.log
drwxr-xr-x   3 root root 4096 Feb 13 03:58 ??64
drwxr-xr-x   3 root root 4096 Feb 13 06:06 ??18
[root@spider raw]#

Unfortunately I wasn't thinking and I deleted all the .done files, and not all of the 
logs were split. Well, back to indexing...

I'm using 3.1.9 on Linux/Oracle

-- Dan Hanks


On Wed, 14 Feb 2001, Zenon Panoussis wrote:



 Zenon Panoussis skrev:
 

  By now, I have almost 1 GB of indexed files, 4 indexer
  crashes and one splitter crash. I'll do the debugging and
  post its output tomorrow.

 ===
 # gdb indexer core.indexer.01
 GNU gdb 5.0
 Copyright 2000 Free Software Foundation, Inc.
 GDB is free software, covered by the GNU General Public License, and you
 are
 welcome to change it and/or distribute copies of it under certain
 conditions.
 Type "show copying" to see the conditions.
 There is absolutely no warranty for GDB.  Type "show warranty" for
 details.
 This GDB was configured as "i386-redhat-linux"...
 Core was generated by `./indexer -m -s 200'.
 Program terminated with signal 11, Segmentation fault.
 Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done.
 Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10
 Reading symbols from /lib/libm.so.6...done.
 Loaded symbols for /lib/libm.so.6
 Reading symbols from /usr/lib/libz.so.1...done.
 Loaded symbols for /usr/lib/libz.so.1
 Reading symbols from /lib/libc.so.6...done.
 Loaded symbols for /lib/libc.so.6
 Reading symbols from /lib/libcrypt.so.1...done.
 Loaded symbols for /lib/libcrypt.so.1
 Reading symbols from /lib/libnsl.so.1...done.
 Loaded symbols for /lib/libnsl.so.1
 Reading symbols from /lib/ld-linux.so.2...done.
 Loaded symbols for /lib/ld-linux.so.2
 Reading symbols from /lib/libnss_files.so.2...done.
 Loaded symbols for /lib/libnss_files.so.2
 Reading symbols from /lib/libnss_nisplus.so.2...done.
 Loaded symbols for /lib/libnss_nisplus.so.2
 Reading symbols from /lib/libnss_nis.so.2...done.
 Loaded symbols for /lib/libnss_nis.so.2
 Reading symbols from /lib/libnss_dns.so.2...done.
 Loaded symbols for /lib/libnss_dns.so.2
 Reading symbols from /lib/libresolv.so.2...done.
 Loaded symbols for /lib/libresolv.so.2
 #0  0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at
 crc32.c:97
 97  _CRC32_(crc, *p) ;
 (gdb) print crc
 $1 = 1928826335
 (gdb) print p
 $2 = 0x40431000 Address 0x40431000 out of bounds

 ===

 # gdb indexer core.indexer.02
 snip loading
 #0  0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at
 crc32.c:97
 97  _CRC32_(crc, *p) ;
 (gdb) print crc
 $1 = 835566978
 (gdb) print p
 $2 = 0x40404000 Address 0x40404000 out of bounds

 ===

 # gdb indexer core.indexer.03
 snip loading
 #0  0x805e5fa in UdmCRC32 (buf=0x4021b03e "", size=4294967295) at
 crc32.c:97
 97  _CRC32_(crc, *p) ;
 (gdb) print crc
 $1 = 2869617068
 (gdb) print p
 $2 = 0x40404000 Address 0x40404000 out of bounds

 ===

 # gdb indexer core.indexer.04
 snip loading
 (gdb) print crc
 $1 = 1253677059
 (gdb) print p
 $2 = 0x40431000 Address 0x40431000 out of bounds

 ===

 And finally the splitter:

 # gdb splitter core.splitter.01
 snip copyright
 This GDB was configured as "i386-redhat-linux"...
 Core was generated by `/usr/local/mnogo3110/sbin/splitter'.
 Program terminated with signal 11, Segmentation fault.
 Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done.
 Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10
 Reading symbols from /lib/libm.so.6...done.
 Loaded symbols for /lib/libm.so.6
 Reading symbols from /usr/lib/libz.so.1...done.
 Loaded symbols for /usr/lib/libz.so.1
 Reading symbols from /lib/libc.so.6...done.
 Loaded symbols for /lib/libc.so.6
 Reading symbols from /lib/libcrypt.so.1...done.
 Loaded symbols for /lib/libcrypt.so.1
 Reading symbols from /lib/libnsl.so.1...done.
 Loaded symbols for /lib/libnsl.so.1
 Reading symbols from /lib/ld-linux.so.2...done.
 Loaded symbols for /lib/ld-linux.so.2
 #0  0x8057d15 in UdmSplitCacheLog (log=118) at cache.c:635
 635
 logwords[count+j].wrd_id=table[w].wrd_id;
 (gdb) print count
 $1 = 13121220
 (gdb) print count+j
 $2 = 13125316
 (gdb) print logwords
 $3 = (UDM_LOGWORD *) 0x0
 (gdb) print table[w]
 $4 = {wrd_id = 1918989871, weight 

Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-13 Thread Zenon Panoussis



Zenon Panoussis skrev:
 
 
 Now for 31 MB adventures :)

# ./run-splitter -k
Sending -HUP signal to cachelogd...
Done
# ./run-splitter -p
Preparing logs...
Open dir '/var/mnogo3110/raw'
Preparing word log 982024900  [   42176 bytes]
Preparing word log 982027284  [31465324 bytes]
Preparing word log 982027618  [ 8815804 bytes]
Preparing del log 982024900  
Preparing del log 982027284
Preparing del log 982027618
Renaming logs...
Done

Running ./run-splitter on these worked fine. No problems at all. 
After that, I went on indexing and created 

59920 Feb 13 06:05 982040748.del.done
 31457740 Feb 13 06:05 982040748.wrd.done
 1480 Feb 13 06:06 982040807.del.done
   637240 Feb 13 06:06 982040807.wrd.done
51920 Feb 13 07:21 982045300.del.done
 31469304 Feb 13 07:21 982045300.wrd.done
69248 Feb 13 07:51 982047843.del.done
 30213344 Feb 13 07:51 982047843.wrd.done

another two 31 MB files and two smaller ones. All of them were 
splitted without problems.

[two days later] 

Indexing kept crashing (see separate posting) and splitting 
kept going fine until tonight, when the opposite occured. 
By now, I have almost 1 GB of indexed files, 4 indexer 
crashes and one splitter crash. I'll do the debugging and 
post its output tomorrow. 

Z


-- 
oracle@everywhere: The ephemeral source of the eternal truth...
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-13 Thread Caffeinate The World

i've been going through this and back again time and time again. what
would really be nice is indexer save the logs in a format that's easy
to use again. for instance, you can use the format re-index to sql etc.

or if you want to reindex again, you don't have to crawl through all
the external websites. saves a lot of time and we can debug faster.

--- Zenon Panoussis [EMAIL PROTECTED] wrote:
 
 
 Zenon Panoussis skrev:
  
  
  Now for 31 MB adventures :)
 
 # ./run-splitter -k
 Sending -HUP signal to cachelogd...
 Done
 # ./run-splitter -p
 Preparing logs...
 Open dir '/var/mnogo3110/raw'
 Preparing word log 982024900  [   42176 bytes]
 Preparing word log 982027284  [31465324 bytes]
 Preparing word log 982027618  [ 8815804 bytes]
 Preparing del log 982024900  
 Preparing del log 982027284
 Preparing del log 982027618
 Renaming logs...
 Done
 
 Running ./run-splitter on these worked fine. No problems at all. 
 After that, I went on indexing and created 
 
 59920 Feb 13 06:05 982040748.del.done
  31457740 Feb 13 06:05 982040748.wrd.done
  1480 Feb 13 06:06 982040807.del.done
637240 Feb 13 06:06 982040807.wrd.done
 51920 Feb 13 07:21 982045300.del.done
  31469304 Feb 13 07:21 982045300.wrd.done
 69248 Feb 13 07:51 982047843.del.done
  30213344 Feb 13 07:51 982047843.wrd.done
 
 another two 31 MB files and two smaller ones. All of them were 
 splitted without problems.
 
 [two days later] 
 
 Indexing kept crashing (see separate posting) and splitting 
 kept going fine until tonight, when the opposite occured. 
 By now, I have almost 1 GB of indexed files, 4 indexer 
 crashes and one splitter crash. I'll do the debugging and 
 post its output tomorrow. 
 
 Z
 
 
 -- 
 oracle@everywhere: The ephemeral source of the eternal truth...
 __
 If you want to unsubscribe send "unsubscribe udmsearch"
 to [EMAIL PROTECTED]
 


__
Do You Yahoo!?
Get personalized email addresses from Yahoo! Mail - only $35 
a year!  http://personal.mail.yahoo.com/
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-12 Thread Alexander Barkov

Hi!

Can you guys give us a log file produced by splitter -p which caused
crash? We can't reproduce crash :-(



Caffeinate The World wrote:
 
 i reported this problems a while back. i believe it's being worked on.
 atleast the recently found the bug why it wasn't splitting out to FFF.
 the seg fault happens during the splitter process and not index. i've
 been splitter when the logs are at about  2 MB and i've not had
 splitter core dump on me yet. but before when i let the log file build
 up to about 15 to 30 MB, i had that core dump problem.
 
 i hope this will be resolved soon because it's a pain in the behind.
 ;-(
 --- Zenon Panoussis [EMAIL PROTECTED] wrote:
  Author: Zenon Panoussis
  Email: [EMAIL PROTECTED]
  Message:
  RH Linux 7.0, search 3.1.9, MySQL 3.23.29, cache mode, with the
  new patches for cache.c and sql.c.
 
  It happens all the time. It started happening when "maximum size"
  31 MB log files were indexed, but by now it happens on any indexing,
  no matter how big or small the log file, as if the database somehow
  was corrupt:
 
Delete from cache-file /var/mnogo319/tree/12/B/12BFD000
/var/mnogo319/tree/12/C/12C1 old:  69 new:   1 total:  70
./run-splitter: line 118: 18790 Segmentation fault  (core
  dumped) $SPLITTER
 
  For the same log file it always crashes at the same index file
  (e.g. every time I try to reindex 12345678.log it will crash
  at tree/12/3/4567000). If I delete the log file and start again
  with a new log file, it will crash at a different place, but it
  will still be consistent in crashing at the same place every time.
 
  And the backtrace:
 
  # gdb splitter core
  GNU gdb 5.0
  [...]
  This GDB was configured as "i386-redhat-linux"...
  Core was generated by `/usr/local/mnogo319/sbin/splitter'.
  Program terminated with signal 11, Segmentation fault.
  Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done.
  Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10
  Reading symbols from /lib/libm.so.6...done.
  Loaded symbols for /lib/libm.so.6
  Reading symbols from /usr/lib/libz.so.1...done.
  Loaded symbols for /usr/lib/libz.so.1
  Reading symbols from /lib/libc.so.6...done.
  Loaded symbols for /lib/libc.so.6
  Reading symbols from /lib/libcrypt.so.1...done.
  Loaded symbols for /lib/libcrypt.so.1
  Reading symbols from /lib/libnsl.so.1...done.
  Loaded symbols for /lib/libnsl.so.1
  Reading symbols from /lib/ld-linux.so.2...done.
  Loaded symbols for /lib/ld-linux.so.2
  #0  0x8059061 in UdmSplitCacheLog (log=300) at cache.c:552
  552
logwords[count+j].wrd_id=table[w].wrd_id;
 
  (gdb) backtrace
  #0  0x8059061 in UdmSplitCacheLog (log=300) at cache.c:552
  #1  0x8049e89 in main (argc=1, argv=0xba94) at splitter.c:70
  #2  0x4009bbfc in __libc_start_main (main=0x8049d80 main, argc=1,
  ubp_av=0xba94,
  init=0x80495bc _init, fini=0x8065b7c _fini,
  rtld_fini=0x4000d674 _dl_fini, stack_end=0xba8c)
  at ../sysdeps/generic/libc-start.c:118
 
  Since 3.1.10 is coming out today, I'll try it and see if things
  work better. If not, I'll post more bad news later ;)
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-12 Thread Zenon Panoussis



Alexander Barkov skrev:
 

 Can you guys give us a log file produced by splitter -p which caused
 crash? We can't reproduce crash :-(

Huh? splitter doesn't accept the -v5 argument, so it won't give 
more detailed logs than the normal ones. The only log I had, that 
to stdout, is the one I included with my first posting in this 
thread: 

  Delete from cache-file /var/mnogo319/tree/12/B/12BFD000 
/var/mnogo319/tree/12/C/12C1 old: 69 new: 1 total: 70 
./run-splitter: line 118: 18790 Segmentation fault (core dumped) $SPLITTER 

Until this point everything was normal. 

Anyway, as I said, I strongly suspect corruption in the word 
database. On a previous occasion when this happened, I deleted 
the entire tree/* directory structure and started all over again. 
Splitter worked like a dream with both small and big log files 
until one of the following occured:

1. I stopped indexer with ^C and then run splitter 
   or
2. Splitter had to work itself through some 31 MB files. (These 
   files are not all the same size; they tend to get slightly 
   bigger the more they are, i.e. something like this:
 0001.log31.500.000 bytes
 0002.log31.550.000 bytes 
 0003.log31.580.000 bytes
   sort of). 

Unfortunately I haven't been making notes, so I can't tell for 
sure which one of these two things happened before things stopped 
working. 

I tried splitter again today with ./splitter splitter.log . It 
went in a very normal way *almost* as far as yesterday, and then 
hang so badly that not even kill -9 could kill it. The log of 
this run looks like 

snip normal operation
Delete from cache-file /var/mnogo319/tree/12/B/12B27000
Delete from cache-file /var/mnogo319/tree/12/B/12B2D000
Delete from cache-file /var/mnogo319/tree/12/B/12B3
Delete from cache-file /var/mnogo319/tree/12/B/12B31000
Delete from cache-file /var/mnogo319/tree/12/B/12B3

I am attaching the three files that could be involved, 
namely tree/12/B/12B31000, 12B32000 and 12B35000. 


I'll install 3.1.10 now, try it on the old word database and see 
what it does. If it doesn't work, I'll remove the word database 
and start again from scratch. I'll try to make detailed notes this 
time and report back. 

Z


-- 
oracle@everywhere: The ephemeral source of the eternal truth...
 wordfiles.tar.gz


Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-12 Thread Caffeinate The World

in my tests your 3 little files wouldn't make a difference. he would
have to run splitter -p and splitter on all the files starting from the
first original RAW file, including all the 31 MB file. i believe in my
case it was the original 31mb file which caused the problem. 

while processing the first 31mb file, it didn't core dump, but all the
preceeding files did cause core dumps at unpredictable times, but often
at the same location initially (i.e. 77C3000...)

therefore, in order to recreate the scenario, one would have to start
from the first raw file. i've tar-ed up such a series of file for Alex.
perhaps he'll be able to find out why. my hypothesis is an array or
buffer overflow in splitter.c.




--- Zenon Panoussis [EMAIL PROTECTED] wrote:
 
 
 Alexander Barkov skrev:
  
 
  Can you guys give us a log file produced by splitter -p which
 caused
  crash? We can't reproduce crash :-(
 
 Huh? splitter doesn't accept the -v5 argument, so it won't give 
 more detailed logs than the normal ones. The only log I had, that 
 to stdout, is the one I included with my first posting in this 
 thread: 
 
   Delete from cache-file /var/mnogo319/tree/12/B/12BFD000 
 /var/mnogo319/tree/12/C/12C1 old: 69 new: 1 total: 70 
 ./run-splitter: line 118: 18790 Segmentation fault (core
 dumped) $SPLITTER 
 
 Until this point everything was normal. 
 
 Anyway, as I said, I strongly suspect corruption in the word 
 database. On a previous occasion when this happened, I deleted 
 the entire tree/* directory structure and started all over again. 
 Splitter worked like a dream with both small and big log files 
 until one of the following occured:
 
 1. I stopped indexer with ^C and then run splitter 
or
 2. Splitter had to work itself through some 31 MB files. (These 
files are not all the same size; they tend to get slightly 
bigger the more they are, i.e. something like this:
  0001.log31.500.000 bytes
  0002.log31.550.000 bytes 
  0003.log31.580.000 bytes
sort of). 
 
 Unfortunately I haven't been making notes, so I can't tell for 
 sure which one of these two things happened before things stopped 
 working. 
 
 I tried splitter again today with ./splitter splitter.log . It 
 went in a very normal way *almost* as far as yesterday, and then 
 hang so badly that not even kill -9 could kill it. The log of 
 this run looks like 
 
 snip normal operation
 Delete from cache-file /var/mnogo319/tree/12/B/12B27000
 Delete from cache-file /var/mnogo319/tree/12/B/12B2D000
 Delete from cache-file /var/mnogo319/tree/12/B/12B3
 Delete from cache-file /var/mnogo319/tree/12/B/12B31000
 Delete from cache-file /var/mnogo319/tree/12/B/12B3
 
 I am attaching the three files that could be involved, 
 namely tree/12/B/12B31000, 12B32000 and 12B35000. 
 
 
 I'll install 3.1.10 now, try it on the old word database and see 
 what it does. If it doesn't work, I'll remove the word database 
 and start again from scratch. I'll try to make detailed notes this 
 time and report back. 
 
 Z
 
 
 -- 
 oracle@everywhere: The ephemeral source of the eternal truth...

 ATTACHMENT part 2 application/x-gzip name=wordfiles.tar.gz



__
Do You Yahoo!?
Get personalized email addresses from Yahoo! Mail - only $35 
a year!  http://personal.mail.yahoo.com/
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-12 Thread Alexander Barkov

Could you please put zipped /var/mnogo319/tree/12/B/12BFD000 and
a file /splitter/XXX.wrd with correspondent XXX.del which produce 
crash somewhere on the net?




Zenon Panoussis wrote:
 
 Alexander Barkov skrev:
 
 
  Can you guys give us a log file produced by splitter -p which caused
  crash? We can't reproduce crash :-(
 
 Huh? splitter doesn't accept the -v5 argument, so it won't give
 more detailed logs than the normal ones. The only log I had, that
 to stdout, is the one I included with my first posting in this
 thread:
 
   Delete from cache-file /var/mnogo319/tree/12/B/12BFD000
 /var/mnogo319/tree/12/C/12C1 old: 69 new: 1 total: 70
 ./run-splitter: line 118: 18790 Segmentation fault (core dumped) $SPLITTER
 
 Until this point everything was normal.
 
 Anyway, as I said, I strongly suspect corruption in the word
 database. On a previous occasion when this happened, I deleted
 the entire tree/* directory structure and started all over again.
 Splitter worked like a dream with both small and big log files
 until one of the following occured:
 
 1. I stopped indexer with ^C and then run splitter
or
 2. Splitter had to work itself through some 31 MB files. (These
files are not all the same size; they tend to get slightly
bigger the more they are, i.e. something like this:
  0001.log31.500.000 bytes
  0002.log31.550.000 bytes
  0003.log31.580.000 bytes
sort of).
 
 Unfortunately I haven't been making notes, so I can't tell for
 sure which one of these two things happened before things stopped
 working.
 
 I tried splitter again today with ./splitter splitter.log . It
 went in a very normal way *almost* as far as yesterday, and then
 hang so badly that not even kill -9 could kill it. The log of
 this run looks like
 
 snip normal operation
 Delete from cache-file /var/mnogo319/tree/12/B/12B27000
 Delete from cache-file /var/mnogo319/tree/12/B/12B2D000
 Delete from cache-file /var/mnogo319/tree/12/B/12B3
 Delete from cache-file /var/mnogo319/tree/12/B/12B31000
 Delete from cache-file /var/mnogo319/tree/12/B/12B3
 
 I am attaching the three files that could be involved,
 namely tree/12/B/12B31000, 12B32000 and 12B35000.
 
 I'll install 3.1.10 now, try it on the old word database and see
 what it does. If it doesn't work, I'll remove the word database
 and start again from scratch. I'll try to make detailed notes this
 time and report back.
 
 Z
 
 --
 oracle@everywhere: The ephemeral source of the eternal truth...
 
   
Name: wordfiles.tar.gz
wordfiles.tar.gzType: Unix Tape Archive (application/x-tar)
Encoding: base64
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-12 Thread Alexander Barkov

Zenon Panoussis wrote:
 
 Alexander Barkov skrev:
 
 
  Could you please put zipped /var/mnogo319/tree/12/B/12BFD000 and
  a file /splitter/XXX.wrd with correspondent XXX.del which produce
  crash somewhere on the net?
 
 http://search.freewinds.cx/logs/logs.tar.gz


Not Found
The requested URL /logs/logs.tar.gz was not found on this server.
Apache/1.3.14 Server at search.freewinds.cx Port 80
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-12 Thread Zenon Panoussis


Zenon Panoussis skrev:
 

And a really HARD hang at the same place as before. So hard
that I can't even kill splitter.

BTW, although I couldn't kill splitter, I did find a core dump 
in sbin. Here's the backtrace:


# gdb splitter core
GNU gdb 5.0
snip copyright
This GDB was configured as "i386-redhat-linux"...
Core was generated by `./splitter'.
Program terminated with signal 11, Segmentation fault.
Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done.
Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10
Reading symbols from /lib/libm.so.6...done.
Loaded symbols for /lib/libm.so.6
Reading symbols from /usr/lib/libz.so.1...done.
Loaded symbols for /usr/lib/libz.so.1
Reading symbols from /lib/libc.so.6...done.
Loaded symbols for /lib/libc.so.6
Reading symbols from /lib/libcrypt.so.1...done.
Loaded symbols for /lib/libcrypt.so.1
Reading symbols from /lib/libnsl.so.1...done.
Loaded symbols for /lib/libnsl.so.1
Reading symbols from /lib/ld-linux.so.2...done.
Loaded symbols for /lib/ld-linux.so.2
#0  0x8057d15 in UdmSplitCacheLog (log=300) at cache.c:635
635
 logwords[count+j].wrd_id=table[w].wrd_id;
(gdb) backtrace
#0  0x8057d15 in UdmSplitCacheLog (log=300) at cache.c:635
#1  0x8049f29 in main (argc=1, argv=0xbac4) at splitter.c:74
#2  0x4009bbfc in __libc_start_main (main=0x8049e20 main, argc=1, ubp_av=0xbac4, 
init=0x8049630 _init, fini=0x8064f7c _fini, rtld_fini=0x4000d674 _dl_fini, 
stack_end=0xbabc)
at ../sysdeps/generic/libc-start.c:118

Z


-- 
oracle@everywhere: The ephemeral source of the eternal truth...
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-12 Thread Zenon Panoussis



Zenon Panoussis skrev:
 

 I'll delete the entire tree directory and start re-indexing from
 scratch. I'll make and split a small file first, ca 5 MB, then a
 31 MB file, if that works yet another 31 MB file, and so on until
 I get in problems again. Will report back later this evening.

First step OK: 

- indexed for a while, created 2.8 MB log file
- split successfully and even got the FFF directory:
  snip
  /var/mnogo3110/tree/FF/F/FFFE6000 old:   0 new:   2 total:   2
  /var/mnogo3110/tree/FF/F/FFFE7000 old:   0 new:  24 total:  24

Now for 31 MB adventures :)

Z


-- 
oracle@everywhere: The ephemeral source of the eternal truth...


-- 
oracle@everywhere: The ephemeral source of the eternal truth...
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




UdmSearch: Webboard: Segfault (grrr)

2001-02-11 Thread Zenon Panoussis

Author: Zenon Panoussis
Email: [EMAIL PROTECTED]
Message:
RH Linux 7.0, search 3.1.9, MySQL 3.23.29, cache mode, with the 
new patches for cache.c and sql.c. 

It happens all the time. It started happening when "maximum size" 
31 MB log files were indexed, but by now it happens on any indexing, 
no matter how big or small the log file, as if the database somehow 
was corrupt:

  Delete from cache-file /var/mnogo319/tree/12/B/12BFD000
  /var/mnogo319/tree/12/C/12C1 old:  69 new:   1 total:  70
  ./run-splitter: line 118: 18790 Segmentation fault  (core dumped) $SPLITTER

For the same log file it always crashes at the same index file 
(e.g. every time I try to reindex 12345678.log it will crash 
at tree/12/3/4567000). If I delete the log file and start again 
with a new log file, it will crash at a different place, but it 
will still be consistent in crashing at the same place every time. 

And the backtrace:

# gdb splitter core
GNU gdb 5.0
[...]
This GDB was configured as "i386-redhat-linux"...
Core was generated by `/usr/local/mnogo319/sbin/splitter'.
Program terminated with signal 11, Segmentation fault.
Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done.
Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10
Reading symbols from /lib/libm.so.6...done.
Loaded symbols for /lib/libm.so.6
Reading symbols from /usr/lib/libz.so.1...done.
Loaded symbols for /usr/lib/libz.so.1
Reading symbols from /lib/libc.so.6...done.
Loaded symbols for /lib/libc.so.6
Reading symbols from /lib/libcrypt.so.1...done.
Loaded symbols for /lib/libcrypt.so.1
Reading symbols from /lib/libnsl.so.1...done.
Loaded symbols for /lib/libnsl.so.1
Reading symbols from /lib/ld-linux.so.2...done.
Loaded symbols for /lib/ld-linux.so.2
#0  0x8059061 in UdmSplitCacheLog (log=300) at cache.c:552
552
 logwords[count+j].wrd_id=table[w].wrd_id;

(gdb) backtrace
#0  0x8059061 in UdmSplitCacheLog (log=300) at cache.c:552
#1  0x8049e89 in main (argc=1, argv=0xba94) at splitter.c:70
#2  0x4009bbfc in __libc_start_main (main=0x8049d80 main, argc=1, ubp_av=0xba94, 
init=0x80495bc _init, fini=0x8065b7c _fini, rtld_fini=0x4000d674 _dl_fini, 
stack_end=0xba8c)
at ../sysdeps/generic/libc-start.c:118

Since 3.1.10 is coming out today, I'll try it and see if things 
work better. If not, I'll post more bad news later ;)

Z



Reply: http://search.mnogo.ru/board/message.php?id=1320

__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]




Re: UdmSearch: Webboard: Segfault (grrr)

2001-02-11 Thread Caffeinate The World

i reported this problems a while back. i believe it's being worked on.
atleast the recently found the bug why it wasn't splitting out to FFF.
the seg fault happens during the splitter process and not index. i've
been splitter when the logs are at about  2 MB and i've not had
splitter core dump on me yet. but before when i let the log file build
up to about 15 to 30 MB, i had that core dump problem.

i hope this will be resolved soon because it's a pain in the behind.
;-(
--- Zenon Panoussis [EMAIL PROTECTED] wrote:
 Author: Zenon Panoussis
 Email: [EMAIL PROTECTED]
 Message:
 RH Linux 7.0, search 3.1.9, MySQL 3.23.29, cache mode, with the 
 new patches for cache.c and sql.c. 
 
 It happens all the time. It started happening when "maximum size" 
 31 MB log files were indexed, but by now it happens on any indexing, 
 no matter how big or small the log file, as if the database somehow 
 was corrupt:
 
   Delete from cache-file /var/mnogo319/tree/12/B/12BFD000
   /var/mnogo319/tree/12/C/12C1 old:  69 new:   1 total:  70
   ./run-splitter: line 118: 18790 Segmentation fault  (core
 dumped) $SPLITTER
 
 For the same log file it always crashes at the same index file 
 (e.g. every time I try to reindex 12345678.log it will crash 
 at tree/12/3/4567000). If I delete the log file and start again 
 with a new log file, it will crash at a different place, but it 
 will still be consistent in crashing at the same place every time. 
 
 And the backtrace:
 
 # gdb splitter core
 GNU gdb 5.0
 [...]
 This GDB was configured as "i386-redhat-linux"...
 Core was generated by `/usr/local/mnogo319/sbin/splitter'.
 Program terminated with signal 11, Segmentation fault.
 Reading symbols from /usr/lib/mysql/libmysqlclient.so.10...done.
 Loaded symbols for /usr/lib/mysql/libmysqlclient.so.10
 Reading symbols from /lib/libm.so.6...done.
 Loaded symbols for /lib/libm.so.6
 Reading symbols from /usr/lib/libz.so.1...done.
 Loaded symbols for /usr/lib/libz.so.1
 Reading symbols from /lib/libc.so.6...done.
 Loaded symbols for /lib/libc.so.6
 Reading symbols from /lib/libcrypt.so.1...done.
 Loaded symbols for /lib/libcrypt.so.1
 Reading symbols from /lib/libnsl.so.1...done.
 Loaded symbols for /lib/libnsl.so.1
 Reading symbols from /lib/ld-linux.so.2...done.
 Loaded symbols for /lib/ld-linux.so.2
 #0  0x8059061 in UdmSplitCacheLog (log=300) at cache.c:552
 552  
   logwords[count+j].wrd_id=table[w].wrd_id;
 
 (gdb) backtrace
 #0  0x8059061 in UdmSplitCacheLog (log=300) at cache.c:552
 #1  0x8049e89 in main (argc=1, argv=0xba94) at splitter.c:70
 #2  0x4009bbfc in __libc_start_main (main=0x8049d80 main, argc=1,
 ubp_av=0xba94, 
 init=0x80495bc _init, fini=0x8065b7c _fini,
 rtld_fini=0x4000d674 _dl_fini, stack_end=0xba8c)
 at ../sysdeps/generic/libc-start.c:118
 
 Since 3.1.10 is coming out today, I'll try it and see if things 
 work better. If not, I'll post more bad news later ;)
 
 Z
 
 
 
 Reply: http://search.mnogo.ru/board/message.php?id=1320
 
 __
 If you want to unsubscribe send "unsubscribe udmsearch"
 to [EMAIL PROTECTED]
 


__
Do You Yahoo!?
Get personalized email addresses from Yahoo! Mail - only $35 
a year!  http://personal.mail.yahoo.com/
__
If you want to unsubscribe send "unsubscribe udmsearch"
to [EMAIL PROTECTED]