- - - - - - - - - - - - - - - - - - - - - - - - - - - -
Name: Roman
Subject: Re: Храним в одной БД (посте) индексируем другую(мускул).
Может конфиг я неправильно сделал, ну не хочет на freebsd искать русиш ворд и
удалять дубликаты:
---indexer.conf:
DBAddr pgsql://a:b@/search/?dbmode=cache
Include itogo.conf
LocalCharset windows-1251
CrossWords yes
CollectLinks yes
Include stopwords.conf
Include langmap.conf
MinWordLength 1
MaxWordLength 32
MaxDocSize 1048576
Disallow *.b *.sh *.md5 *.rpm
Disallow *.arj *.tar *.zip *.tgz *.gz *.z *.bz2
Disallow *.lha *.lzh *.rar *.zoo *.ha *.tar.Z
Disallow *.gif *.jpg *.jpeg *.bmp *.tiff *.tif *.xpm *.xbm *.pcx
Disallow *.vdo *.mpeg *.mpe *.mpg *.avi *.movie *.mov *.dat
Disallow *.mid *.mp3 *.rm *.ram *.wav *.aiff *.ra
Disallow *.vrml *.wrl *.png *.psd
Disallow *.exe *.com *.cab *.dll *.bin *.class *.ex_
Disallow *.tex *.texi *.xls *.doc *.texinfo
Disallow *.rtf *.pdf *.cdf *.ps
Disallow *.ai *.eps *.ppt *.hqx
Disallow *.cpt *.bms *.oda *.tcl
Disallow *.o *.a *.la *.so
Disallow *.pat *.pm *.m4 *.am *.css
Disallow *.map *.aif *.sit *.sea
Disallow *.m3u *.qt *.mov
Disallow Regex \.r[0-9][0-9]$ \.a[0-9][0-9]$ \.so\.[0-9]$
ReverseAlias regex ^(.*)[&\?][a-zA-Z;]+=[a-zA-Z0-9]{32}$ $1
ReverseAlias regex ^(.*)&[a-zA-Z;]+=[a-zA-Z0-9]{32}(.*) $1$2
ReverseAlias regex ^(.*)\?[a-zA-Z;]+=[a-zA-Z0-9]{32}&(.*) $1?$2
ReverseAlias regex ^(.*)\?[a-zA-Z;]+=[a-zA-Z0-9]{32}&(.*) $1?$2
ReverseAlias regex ^(.*)\?[a-zA-Z;]+=[a-zA-Z0-9]{32}(.*) $1?$2
ReverseAlias regex ^(.*)&[a-zA-Z;]+=[a-zA-Z0-9]{32}(.*) $1$2
ReverseAlias regex ^(.*)[&\?][a-zA-Z;]+=[a-zA-Z0-9]{16}$ $1
ReverseAlias regex ^(.*)&[a-zA-Z;]+=[a-zA-Z0-9]{16}(.*) $1$2
ReverseAlias regex ^(.*)\?[a-zA-Z;]+=[a-zA-Z0-9]{16}&(.*) $1?$2
ReverseAlias regex ^(.*)\?[a-zA-Z;]+=[a-zA-Z0-9]{16}&(.*) $1?$2
ReverseAlias regex ^(.*)\?[a-zA-Z;]+=[a-zA-Z0-9]{16}(.*) $1?$2
ReverseAlias regex ^(.*)&[a-zA-Z;]+=[a-zA-Z0-9]{16}(.*) $1$2
DeleteOlder 90d
UseRemoteContentType yes
AddType image/gif *.gif
AddType text/plain *.txt *.pl *.js *.h *.c *.pm *.e
AddType text/html *.html *.htm
AddType application/msword *.doc
AddType application/unknown *.*
ParserTimeOut 300
MaxHops 256
TrackHops yes
ReadTimeOut 30s
DocTimeOut 60s
Cookies yes
PopRankMethod Neo
RemoteCharset windows-1251
HTDBAddr mysql://a:[EMAIL PROTECTED]/news/
HTDBLimit 50
HTDBList "SELECT concat('http://www.site.ru/news/', id) FROM newsline"
HTDBDoc "SELECT concat('HTTP/1.0 200 OK\\r\\n','Content-type: text/html;
charset: windows-1251\\r\\n','Last-Modified: ',Date_format(time,'%Y-%m-%d
%T'),'
MSK\\r\\n','\\r\\n','<html><head><title>',title,'</title></head><body>',bodytext,'</body></html>')
FROM newsline WHERE id='$1'"
Server htdb:/
Realm Allow http://www.site.ru/news/*
Alias http://www.site.ru/news/ htdb:/
---search_mod.conf:
LogLevel 5
DBAddr cache://localhost/?dbmode=cache
Include obc.conf
Cache yes
HoldCache 7m
ResultsLimit 900
LocalCharset windows-1251
BrowserCharset windows-1251
StopwordFile stopwords/en.sl
StopwordFile stopwords/ru.sl
Spell en iso-8859-1 ispell/english.dict
Affix en iso-8859-1 ispell/english.aff
Affix ru koi8-r ispell/russian.aff
Spell ru koi8-r ispell/russian.dict
MinWordLength 1
MaxWordLength 32
PagesPerScreen 10
Synonym synonym/english.syn
Synonym synonym/russian.syn
HlBeg <b>
HlEnd </b>
Log2stderr yes
ExcerptSize 450
---itogo.conf:
VarDir /var
DoStore yes
URLInfoSQL no
UseCRC32URLId yes
WrdFiles 800
URLDataFiles 800
AccentExtensions yes
IndexDocSizeLimit 65536
- - - - - - - - - - - - - - - - - - - - - - - - - - - -
Read the full topic here:
http://www.dataparksearch.org/cgi-bin/simpleforum.cgi?fid=05;topic_id=1157640166;page=3