Hi, What is stopword ? Used for ? Seems cool, linked to Lucene... Thanks. On 02/02/2011 07:45 PM, rud...@apache.org wrote: > Author: rudolf > Date: Wed Feb 2 18:45:29 2011 > New Revision: 1066576 > > URL: http://svn.apache.org/viewvc?rev=1066576&view=rev > Log: > add russian and arabic stopword files > > Added: > > lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Arabic_ar.xml > Modified: > > lenya/branches/BRANCH_2_1_X/src/modules/lucene/config/cocoon-xconf/analyzer_manager.xconf > > lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Russian_ru.xml > > Modified: > lenya/branches/BRANCH_2_1_X/src/modules/lucene/config/cocoon-xconf/analyzer_manager.xconf > URL: > http://svn.apache.org/viewvc/lenya/branches/BRANCH_2_1_X/src/modules/lucene/config/cocoon-xconf/analyzer_manager.xconf?rev=1066576&r1=1066575&r2=1066576&view=diff > ============================================================================== > --- > lenya/branches/BRANCH_2_1_X/src/modules/lucene/config/cocoon-xconf/analyzer_manager.xconf > (original) > +++ > lenya/branches/BRANCH_2_1_X/src/modules/lucene/config/cocoon-xconf/analyzer_manager.xconf > Wed Feb 2 18:45:29 2011 > @@ -21,6 +21,7 @@ > > <analyzer_manager logger="core.search.analyzer_manager"> > <analyzer id="standard" > class="org.apache.lucene.analysis.standard.StandardAnalyzer"/> > + <analyzer id="stopword_ar" > class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer" > configfile="context://WEB-INF/analyzer/stopword/Arabic_ar.xml" > checkupdate="true"/> > <analyzer id="stopword_da" > class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer" > configfile="context://WEB-INF/analyzer/stopword/Danish_da.xml" > checkupdate="true"/> > <analyzer id="stopword_nl" > class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer" > configfile="context://WEB-INF/analyzer/stopword/Dutch_nl.xml" > checkupdate="true"/> > <analyzer id="stopword_en" > class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer" > configfile="context://WEB-INF/analyzer/stopword/English_en.xml" > checkupdate="true"/> > @@ -31,9 +32,7 @@ > <analyzer id="stopword_no" > class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer" > configfile="context://WEB-INF/analyzer/stopword/Norwegian_no.xml" > checkupdate="true"/> > <analyzer id="stopword_pl" > class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer" > configfile="context://WEB-INF/analyzer/stopword/Polish_pl.xml" > checkupdate="true"/> > <analyzer id="stopword_pt" > class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer" > configfile="context://WEB-INF/analyzer/stopword/Portuguese_pt.xml" > checkupdate="true"/> > - <!-- > <analyzer id="stopword_ru" > class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer" > configfile="context://WEB-INF/analyzer/stopword/Russian_ru.xml" > checkupdate="true"/> > - --> > <analyzer id="stopword_es" > class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer" > configfile="context://WEB-INF/analyzer/stopword/Spanish_es.xml" > checkupdate="true"/> > <analyzer id="stopword_sv" > class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer" > configfile="context://WEB-INF/analyzer/stopword/Swedish_sv.xml" > checkupdate="true"/> > <analyzer id="analyzer_research" > class="org.apache.cocoon.components.search.analyzer.ConfigurablePerFieldAnalyzer" > configfile="context://WEB-INF/analyzer/perfieldconf.xml" > checkupdate="false"/> > > Added: > lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Arabic_ar.xml > URL: > http://svn.apache.org/viewvc/lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Arabic_ar.xml?rev=1066576&view=auto > ============================================================================== > --- > lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Arabic_ar.xml > (added) > +++ > lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Arabic_ar.xml > Wed Feb 2 18:45:29 2011 > @@ -0,0 +1,124 @@ > +<?xml version="1.0" encoding="UTF-8"?> > + > +<stopwords> > + <stopword>Ù Ù</stopword> > + <stopword>ÙÙ Ù</stopword> > + <stopword>Ù ÙÙØ§</stopword> > + <stopword>Ù ÙÙ</stopword> > + <stopword>ÙÙ</stopword> > + <stopword>ÙÙÙ</stopword> > + <stopword>ÙÙÙØ§</stopword> > + <stopword>ÙÙÙ</stopword> > + <stopword>Ù</stopword> > + <stopword>Ù</stopword> > + <stopword>ث٠</stopword> > + <stopword>اÙ</stopword> > + <stopword>Ø£Ù</stopword> > + <stopword>ب</stopword> > + <stopword>Ø¨ÙØ§</stopword> > + <stopword>بÙ</stopword> > + <stopword>ا</stopword> > + <stopword>Ø£</stopword> > + <stopword>اÙ</stopword> > + <stopword>اÙ</stopword> > + <stopword>Ø£Ù</stopword> > + <stopword>Ø£Ù</stopword> > + <stopword>ÙØ§</stopword> > + <stopword>ÙÙØ§</stopword> > + <stopword>Ø§ÙØ§</stopword> > + <stopword>Ø£ÙØ§</stopword> > + <stopword>Ø¥ÙØ§</stopword> > + <stopword>ÙÙÙ</stopword> > + <stopword>٠ا</stopword> > + <stopword>Ù٠ا</stopword> > + <stopword>Ù٠ا</stopword> > + <stopword>Ù٠ا</stopword> > + <stopword>عÙ</stopword> > + <stopword>٠ع</stopword> > + <stopword>اذا</stopword> > + <stopword>إذا</stopword> > + <stopword>اÙ</stopword> > + <stopword>Ø£Ù</stopword> > + <stopword>Ø¥Ù</stopword> > + <stopword>اÙÙØ§</stopword> > + <stopword>Ø£ÙÙØ§</stopword> > + <stopword>Ø¥ÙÙØ§</stopword> > + <stopword>اÙÙ</stopword> > + <stopword>Ø£ÙÙ</stopword> > + <stopword>Ø¥ÙÙ</stopword> > + <stopword>باÙ</stopword> > + <stopword>بأÙ</stopword> > + <stopword>ÙØ§Ù</stopword> > + <stopword>ÙØ£Ù</stopword> > + <stopword>ÙØ§Ù</stopword> > + <stopword>ÙØ£Ù</stopword> > + <stopword>ÙØ¥Ù</stopword> > + <stopword>Ø§ÙØªÙ</stopword> > + <stopword>Ø§ÙØªÙ</stopword> > + <stopword>Ø§ÙØ°Ù</stopword> > + <stopword>Ø§ÙØ°Ù</stopword> > + <stopword>Ø§ÙØ°ÙÙ</stopword> > + <stopword>اÙÙ</stopword> > + <stopword>اÙÙ</stopword> > + <stopword>Ø¥ÙÙ</stopword> > + <stopword>Ø¥ÙÙ</stopword> > + <stopword>عÙÙ</stopword> > + <stopword>عÙÙÙØ§</stopword> > + <stopword>عÙÙÙ</stopword> > + <stopword>ا٠ا</stopword> > + <stopword>أ٠ا</stopword> > + <stopword>إ٠ا</stopword> > + <stopword>Ø§ÙØ¶Ø§</stopword> > + <stopword>Ø£ÙØ¶Ø§</stopword> > + <stopword>ÙÙ</stopword> > + <stopword>ÙÙÙ</stopword> > + <stopword>ÙÙ </stopword> > + <stopword>ÙÙÙ </stopword> > + <stopword>ÙÙ</stopword> > + <stopword>ÙÙÙ</stopword> > + <stopword>ÙÙ</stopword> > + <stopword>ÙÙ</stopword> > + <stopword>ÙÙ</stopword> > + <stopword>ÙÙÙ</stopword> > + <stopword>ÙÙÙ</stopword> > + <stopword>ÙÙÙ</stopword> > + <stopword>ÙÙÙ</stopword> > + <stopword>ÙÙÙ</stopword> > + <stopword>ÙÙÙ</stopword> > + <stopword>Ø§ÙØª</stopword> > + <stopword>Ø£ÙØª</stopword> > + <stopword>ÙÙ</stopword> > + <stopword>ÙÙØ§</stopword> > + <stopword>ÙÙ</stopword> > + <stopword>ÙØ°Ù</stopword> > + <stopword>ÙØ°Ø§</stopword> > + <stopword>تÙÙ</stopword> > + <stopword>ذÙÙ</stopword> > + <stopword>ÙÙØ§Ù</stopword> > + <stopword>ÙØ§Ùت</stopword> > + <stopword>ÙØ§Ù</stopword> > + <stopword>ÙÙÙÙ</stopword> > + <stopword>تÙÙÙ</stopword> > + <stopword>ÙÙØ§Ùت</stopword> > + <stopword>ÙÙØ§Ù</stopword> > + <stopword>ØºÙØ±</stopword> > + <stopword>بعض</stopword> > + <stopword>ÙØ¯</stopword> > + <stopword>ÙØÙ</stopword> > + <stopword>بÙÙ</stopword> > + <stopword>بÙÙ٠ا</stopword> > + <stopword>Ù ÙØ°</stopword> > + <stopword>ض٠Ù</stopword> > + <stopword>ØÙØ«</stopword> > + <stopword>Ø§ÙØ§Ù</stopword> > + <stopword>Ø§ÙØ¢Ù</stopword> > + <stopword>Ø®ÙØ§Ù</stopword> > + <stopword>بعد</stopword> > + <stopword>ÙØ¨Ù</stopword> > + <stopword>ØØªÙ</stopword> > + <stopword>Ø¹ÙØ¯</stopword> > + <stopword>Ø¹ÙØ¯Ù ا</stopword> > + <stopword>ÙØ¯Ù</stopword> > + <stopword>Ø¬Ù ÙØ¹</stopword> > +</stopwords> > + > > Modified: > lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Russian_ru.xml > URL: > http://svn.apache.org/viewvc/lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Russian_ru.xml?rev=1066576&r1=1066575&r2=1066576&view=diff > ============================================================================== > --- > lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Russian_ru.xml > (original) > +++ > lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Russian_ru.xml > Wed Feb 2 18:45:29 2011 > @@ -1,101 +1,164 @@ > <?xml version="1.0" encoding="UTF-8"?> > + > <stopwords> > -<stopword>а</stopword> > -<stopword>без</stopword> > -<stopword>более</stopword> > -<stopword>бÑ</stopword> > -<stopword>бÑл</stopword> > -<stopword>бÑла</stopword> > -<stopword>бÑли</stopword> > -<stopword>бÑло</stopword> > -<stopword>бÑÑÑ</stopword> > -<stopword>в</stopword> > -<stopword>вам</stopword> > -<stopword>ваÑ?</stopword> > -<stopword>веÑ?Ñ</stopword> > -<stopword>во</stopword> > -<stopword>воÑ</stopword> > -<stopword>вÑ?е</stopword> > -<stopword>вÑ?его</stopword> > -<stopword>вÑ?ÐµÑ </stopword> > -<stopword>вÑ</stopword> > -<stopword>где</stopword> > -<stopword>да</stopword> > -<stopword>даже</stopword> > -<stopword>длÑ?</stopword> > -<stopword>до</stopword> > -<stopword>его</stopword> > -<stopword>ее</stopword> > -<stopword>еÑ?ли</stopword> > -<stopword>еÑ?ÑÑ</stopword> > -<stopword>еÑе</stopword> > -<stopword>же</stopword> > -<stopword>за</stopword> > -<stopword>здеÑ?Ñ</stopword> > -<stopword>и</stopword> > -<stopword>из</stopword> > -<stopword>или</stopword> > -<stopword>им</stopword> > -<stopword>Ð¸Ñ </stopword> > -<stopword>к</stopword> > -<stopword>как</stopword> > -<stopword>ко</stopword> > -<stopword>когда</stopword> > -<stopword>кÑо</stopword> > -<stopword>ли</stopword> > -<stopword>либо</stopword> > -<stopword>мне</stopword> > -<stopword>можеÑ</stopword> > -<stopword>мÑ</stopword> > -<stopword>на</stopword> > -<stopword>надо</stopword> > -<stopword>наÑ</stopword> > -<stopword>не</stopword> > -<stopword>него</stopword> > -<stopword>нее</stopword> > -<stopword>неÑ</stopword> > -<stopword>ни</stopword> > -<stopword>Ð½Ð¸Ñ </stopword> > -<stopword>но</stopword> > -<stopword>нÑ</stopword> > -<stopword>о</stopword> > -<stopword>об</stopword> > -<stopword>однако</stopword> > -<stopword>он</stopword> > -<stopword>она</stopword> > -<stopword>они</stopword> > -<stopword>оно</stopword> > -<stopword>оÑ</stopword> > -<stopword>оÑенÑ</stopword> > -<stopword>по</stopword> > -<stopword>под</stopword> > -<stopword>пÑи</stopword> > -<stopword>Ñ?</stopword> > -<stopword>Ñ?о</stopword> > -<stopword>Ñак</stopword> > -<stopword>Ñакже</stopword> > -<stopword>Ñакой</stopword> > -<stopword>Ñам</stopword> > -<stopword>Ñе</stopword> > -<stopword>Ñем</stopword> > -<stopword>Ñо</stopword> > -<stopword>Ñого</stopword> > -<stopword>Ñоже</stopword> > -<stopword>Ñой</stopword> > -<stopword>ÑолÑко</stopword> > -<stopword>Ñом</stopword> > -<stopword>ÑÑ</stopword> > -<stopword>Ñ</stopword> > -<stopword>Ñже</stopword> > -<stopword>Ñ Ð¾ÑÑ?</stopword> > -<stopword>Ñего</stopword> > -<stopword>Ñей</stopword> > -<stopword>Ñем</stopword> > -<stopword>ÑÑо</stopword> > -<stopword>ÑÑобÑ</stopword> > -<stopword>ÑÑе</stopword> > -<stopword>ÑÑÑ?</stopword> > -<stopword>Ñ?Ñа</stopword> > -<stopword>Ñ?Ñи</stopword> > -<stopword>Ñ?Ñо</stopword> > -<stopword>Ñ?</stopword></stopwords> > + <stopword>и</stopword> > + <stopword>в</stopword> > + <stopword>во</stopword> > + <stopword>не</stopword> > + <stopword>ÑÑо</stopword> > + <stopword>он</stopword> > + <stopword>на</stopword> > + <stopword>Ñ</stopword> > + <stopword>Ñ</stopword> > + <stopword>Ñо</stopword> > + <stopword>как</stopword> > + <stopword>а</stopword> > + <stopword>Ñо</stopword> > + <stopword>вÑе</stopword> > + <stopword>она</stopword> > + <stopword>Ñак</stopword> > + <stopword>его</stopword> > + <stopword>но</stopword> > + <stopword>да</stopword> > + <stopword>ÑÑ</stopword> > + <stopword>к</stopword> > + <stopword>Ñ</stopword> > + <stopword>же</stopword> > + <stopword>вÑ</stopword> > + <stopword>за</stopword> > + <stopword>бÑ</stopword> > + <stopword>по</stopword> > + <stopword>ÑолÑко</stopword> > + <stopword>ее</stopword> > + <stopword>мне</stopword> > + <stopword>бÑло</stopword> > + <stopword>воÑ</stopword> > + <stopword>оÑ</stopword> > + <stopword>менÑ</stopword> > + <stopword>еÑе</stopword> > + <stopword>неÑ</stopword> > + <stopword>о</stopword> > + <stopword>из</stopword> > + <stopword>емÑ</stopword> > + <stopword>ÑепеÑÑ</stopword> > + <stopword>когда</stopword> > + <stopword>даже</stopword> > + <stopword>нÑ</stopword> > + <stopword>вдÑÑг</stopword> > + <stopword>ли</stopword> > + <stopword>еÑли</stopword> > + <stopword>Ñже</stopword> > + <stopword>или</stopword> > + <stopword>ни</stopword> > + <stopword>бÑÑÑ</stopword> > + <stopword>бÑл</stopword> > + <stopword>него</stopword> > + <stopword>до</stopword> > + <stopword>ваÑ</stopword> > + <stopword>нибÑдÑ</stopword> > + <stopword>опÑÑÑ</stopword> > + <stopword>Ñж</stopword> > + <stopword>вам</stopword> > + <stopword>Ñказал</stopword> > + <stopword>ведÑ</stopword> > + <stopword>Ñам</stopword> > + <stopword>поÑом</stopword> > + <stopword>ÑебÑ</stopword> > + <stopword>ниÑего</stopword> > + <stopword>ей</stopword> > + <stopword>можеÑ</stopword> > + <stopword>они</stopword> > + <stopword>ÑÑÑ</stopword> > + <stopword>где</stopword> > + <stopword>еÑÑÑ</stopword> > + <stopword>надо</stopword> > + <stopword>ней</stopword> > + <stopword>длÑ</stopword> > + <stopword>мÑ</stopword> > + <stopword>ÑебÑ</stopword> > + <stopword>Ð¸Ñ </stopword> > + <stopword>Ñем</stopword> > + <stopword>бÑла</stopword> > + <stopword>Ñам</stopword> > + <stopword>ÑÑоб</stopword> > + <stopword>без</stopword> > + <stopword>бÑдÑо</stopword> > + <stopword>Ñеловек</stopword> > + <stopword>Ñего</stopword> > + <stopword>Ñаз</stopword> > + <stopword>Ñоже</stopword> > + <stopword>Ñебе</stopword> > + <stopword>под</stopword> > + <stopword>жизнÑ</stopword> > + <stopword>бÑдеÑ</stopword> > + <stopword>ж</stopword> > + <stopword>Ñогда</stopword> > + <stopword>кÑо</stopword> > + <stopword>ÑÑоÑ</stopword> > + <stopword>говоÑил</stopword> > + <stopword>Ñого</stopword> > + <stopword>поÑомÑ</stopword> > + <stopword>ÑÑого</stopword> > + <stopword>какой</stopword> > + <stopword>ÑовÑем</stopword> > + <stopword>ним</stopword> > + <stopword>здеÑÑ</stopword> > + <stopword>ÑÑом</stopword> > + <stopword>один</stopword> > + <stopword>поÑÑи</stopword> > + <stopword>мой</stopword> > + <stopword>Ñем</stopword> > + <stopword>ÑÑобÑ</stopword> > + <stopword>нее</stopword> > + <stopword>кажеÑÑÑ</stopword> > + <stopword>ÑейÑаÑ</stopword> > + <stopword>бÑли</stopword> > + <stopword>кÑда</stopword> > + <stopword>заÑем</stopword> > + <stopword>ÑказаÑÑ</stopword> > + <stopword>вÑÐµÑ </stopword> > + <stopword>никогда</stopword> > + <stopword>ÑегоднÑ</stopword> > + <stopword>можно</stopword> > + <stopword>пÑи</stopword> > + <stopword>наконеÑ</stopword> > + <stopword>два</stopword> > + <stopword>об</stopword> > + <stopword>дÑÑгой</stopword> > + <stopword>Ñ Ð¾ÑÑ</stopword> > + <stopword>поÑле</stopword> > + <stopword>над</stopword> > + <stopword>болÑÑе</stopword> > + <stopword>ÑоÑ</stopword> > + <stopword>ÑеÑез</stopword> > + <stopword>ÑÑи</stopword> > + <stopword>наÑ</stopword> > + <stopword>пÑо</stopword> > + <stopword>вÑего</stopword> > + <stopword>Ð½Ð¸Ñ </stopword> > + <stopword>какаÑ</stopword> > + <stopword>много</stopword> > + <stopword>Ñазве</stopword> > + <stopword>Ñказала</stopword> > + <stopword>ÑÑи</stopword> > + <stopword>ÑÑÑ</stopword> > + <stopword>моÑ</stopword> > + <stopword>впÑоÑем</stopword> > + <stopword>Ñ Ð¾ÑоÑо</stopword> > + <stopword>ÑвоÑ</stopword> > + <stopword>ÑÑой</stopword> > + <stopword>пеÑед</stopword> > + <stopword>иногда</stopword> > + <stopword>лÑÑÑе</stopword> > + <stopword>ÑÑÑÑ</stopword> > + <stopword>Ñом</stopword> > + <stopword>нелÑзÑ</stopword> > + <stopword>Ñакой</stopword> > + <stopword>им</stopword> > + <stopword>более</stopword> > + <stopword>вÑегда</stopword> > + <stopword>конеÑно</stopword> > + <stopword>вÑÑ</stopword> > + <stopword>междÑ</stopword> > +</stopwords> > + > > > > --------------------------------------------------------------------- > To unsubscribe, e-mail: commits-unsubscr...@lenya.apache.org > For additional commands, e-mail: commits-h...@lenya.apache.org >
--------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lenya.apache.org For additional commands, e-mail: dev-h...@lenya.apache.org