Hi,
What is stopword ? Used for ?
Seems cool, linked to Lucene...
Thanks.

On 02/02/2011 07:45 PM, rud...@apache.org wrote:
> Author: rudolf
> Date: Wed Feb  2 18:45:29 2011
> New Revision: 1066576
> 
> URL: http://svn.apache.org/viewvc?rev=1066576&view=rev
> Log:
> add russian and arabic stopword files
> 
> Added:
>     
> lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Arabic_ar.xml
> Modified:
>     
> lenya/branches/BRANCH_2_1_X/src/modules/lucene/config/cocoon-xconf/analyzer_manager.xconf
>     
> lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Russian_ru.xml
> 
> Modified: 
> lenya/branches/BRANCH_2_1_X/src/modules/lucene/config/cocoon-xconf/analyzer_manager.xconf
> URL: 
> http://svn.apache.org/viewvc/lenya/branches/BRANCH_2_1_X/src/modules/lucene/config/cocoon-xconf/analyzer_manager.xconf?rev=1066576&r1=1066575&r2=1066576&view=diff
> ==============================================================================
> --- 
> lenya/branches/BRANCH_2_1_X/src/modules/lucene/config/cocoon-xconf/analyzer_manager.xconf
>  (original)
> +++ 
> lenya/branches/BRANCH_2_1_X/src/modules/lucene/config/cocoon-xconf/analyzer_manager.xconf
>  Wed Feb  2 18:45:29 2011
> @@ -21,6 +21,7 @@
>  
>       <analyzer_manager logger="core.search.analyzer_manager">
>               <analyzer id="standard" 
> class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
> +       <analyzer id="stopword_ar" 
> class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer"
>  configfile="context://WEB-INF/analyzer/stopword/Arabic_ar.xml" 
> checkupdate="true"/>
>         <analyzer id="stopword_da" 
> class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer"
>  configfile="context://WEB-INF/analyzer/stopword/Danish_da.xml" 
> checkupdate="true"/>
>         <analyzer id="stopword_nl" 
> class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer"
>  configfile="context://WEB-INF/analyzer/stopword/Dutch_nl.xml" 
> checkupdate="true"/>
>         <analyzer id="stopword_en" 
> class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer"
>  configfile="context://WEB-INF/analyzer/stopword/English_en.xml" 
> checkupdate="true"/>
> @@ -31,9 +32,7 @@
>         <analyzer id="stopword_no" 
> class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer"
>  configfile="context://WEB-INF/analyzer/stopword/Norwegian_no.xml" 
> checkupdate="true"/>
>         <analyzer id="stopword_pl" 
> class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer"
>  configfile="context://WEB-INF/analyzer/stopword/Polish_pl.xml" 
> checkupdate="true"/>
>         <analyzer id="stopword_pt" 
> class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer"
>  configfile="context://WEB-INF/analyzer/stopword/Portuguese_pt.xml" 
> checkupdate="true"/>
> -       <!--
>         <analyzer id="stopword_ru" 
> class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer"
>  configfile="context://WEB-INF/analyzer/stopword/Russian_ru.xml" 
> checkupdate="true"/>
> -       -->
>         <analyzer id="stopword_es" 
> class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer"
>  configfile="context://WEB-INF/analyzer/stopword/Spanish_es.xml" 
> checkupdate="true"/>
>         <analyzer id="stopword_sv" 
> class="org.apache.cocoon.components.search.analyzer.ConfigurableStopwordAnalyzer"
>  configfile="context://WEB-INF/analyzer/stopword/Swedish_sv.xml" 
> checkupdate="true"/>
>         <analyzer id="analyzer_research" 
> class="org.apache.cocoon.components.search.analyzer.ConfigurablePerFieldAnalyzer"
>  configfile="context://WEB-INF/analyzer/perfieldconf.xml" 
> checkupdate="false"/>
> 
> Added: 
> lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Arabic_ar.xml
> URL: 
> http://svn.apache.org/viewvc/lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Arabic_ar.xml?rev=1066576&view=auto
> ==============================================================================
> --- 
> lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Arabic_ar.xml
>  (added)
> +++ 
> lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Arabic_ar.xml
>  Wed Feb  2 18:45:29 2011
> @@ -0,0 +1,124 @@
> +<?xml version="1.0" encoding="UTF-8"?>
> +
> +<stopwords>
> +     <stopword>من</stopword>
> +     <stopword>ومن</stopword>
> +     <stopword>منها</stopword>
> +     <stopword>منه</stopword>
> +     <stopword>في</stopword>
> +     <stopword>وفي</stopword>
> +     <stopword>فيها</stopword>
> +     <stopword>فيه</stopword>
> +     <stopword>و</stopword>
> +     <stopword>ف</stopword>
> +     <stopword>ثم</stopword>
> +     <stopword>او</stopword>
> +     <stopword>أو</stopword>
> +     <stopword>ب</stopword>
> +     <stopword>بها</stopword>
> +     <stopword>به</stopword>
> +     <stopword>ا</stopword>
> +     <stopword>Ø£</stopword>
> +     <stopword>اى</stopword>
> +     <stopword>اي</stopword>
> +     <stopword>أي</stopword>
> +     <stopword>أى</stopword>
> +     <stopword>لا</stopword>
> +     <stopword>ولا</stopword>
> +     <stopword>الا</stopword>
> +     <stopword>ألا</stopword>
> +     <stopword>إلا</stopword>
> +     <stopword>لكن</stopword>
> +     <stopword>ما</stopword>
> +     <stopword>وما</stopword>
> +     <stopword>كما</stopword>
> +     <stopword>فما</stopword>
> +     <stopword>عن</stopword>
> +     <stopword>مع</stopword>
> +     <stopword>اذا</stopword>
> +     <stopword>إذا</stopword>
> +     <stopword>ان</stopword>
> +     <stopword>أن</stopword>
> +     <stopword>إن</stopword>
> +     <stopword>انها</stopword>
> +     <stopword>أنها</stopword>
> +     <stopword>إنها</stopword>
> +     <stopword>انه</stopword>
> +     <stopword>أنه</stopword>
> +     <stopword>إنه</stopword>
> +     <stopword>بان</stopword>
> +     <stopword>بأن</stopword>
> +     <stopword>فان</stopword>
> +     <stopword>فأن</stopword>
> +     <stopword>وان</stopword>
> +     <stopword>وأن</stopword>
> +     <stopword>وإن</stopword>
> +     <stopword>التى</stopword>
> +     <stopword>التي</stopword>
> +     <stopword>الذى</stopword>
> +     <stopword>الذي</stopword>
> +     <stopword>الذين</stopword>
> +     <stopword>الى</stopword>
> +     <stopword>الي</stopword>
> +     <stopword>إلى</stopword>
> +     <stopword>إلي</stopword>
> +     <stopword>على</stopword>
> +     <stopword>عليها</stopword>
> +     <stopword>عليه</stopword>
> +     <stopword>اما</stopword>
> +     <stopword>أما</stopword>
> +     <stopword>إما</stopword>
> +     <stopword>ايضا</stopword>
> +     <stopword>أيضا</stopword>
> +     <stopword>كل</stopword>
> +     <stopword>وكل</stopword>
> +     <stopword>لم</stopword>
> +     <stopword>ولم</stopword>
> +     <stopword>لن</stopword>
> +     <stopword>ولن</stopword>
> +     <stopword>هى</stopword>
> +     <stopword>هي</stopword>
> +     <stopword>هو</stopword>
> +     <stopword>وهى</stopword>
> +     <stopword>وهي</stopword>
> +     <stopword>وهو</stopword>
> +     <stopword>فهى</stopword>
> +     <stopword>فهي</stopword>
> +     <stopword>فهو</stopword>
> +     <stopword>انت</stopword>
> +     <stopword>أنت</stopword>
> +     <stopword>لك</stopword>
> +     <stopword>لها</stopword>
> +     <stopword>له</stopword>
> +     <stopword>هذه</stopword>
> +     <stopword>هذا</stopword>
> +     <stopword>تلك</stopword>
> +     <stopword>ذلك</stopword>
> +     <stopword>هناك</stopword>
> +     <stopword>كانت</stopword>
> +     <stopword>كان</stopword>
> +     <stopword>يكون</stopword>
> +     <stopword>تكون</stopword>
> +     <stopword>وكانت</stopword>
> +     <stopword>وكان</stopword>
> +     <stopword>غير</stopword>
> +     <stopword>بعض</stopword>
> +     <stopword>قد</stopword>
> +     <stopword>نحو</stopword>
> +     <stopword>بين</stopword>
> +     <stopword>بينما</stopword>
> +     <stopword>منذ</stopword>
> +     <stopword>ضمن</stopword>
> +     <stopword>حيث</stopword>
> +     <stopword>الان</stopword>
> +     <stopword>الآن</stopword>
> +     <stopword>خلال</stopword>
> +     <stopword>بعد</stopword>
> +     <stopword>قبل</stopword>
> +     <stopword>حتى</stopword>
> +     <stopword>عند</stopword>
> +     <stopword>عندما</stopword>
> +     <stopword>لدى</stopword>
> +     <stopword>جميع</stopword>
> +</stopwords>
> +
> 
> Modified: 
> lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Russian_ru.xml
> URL: 
> http://svn.apache.org/viewvc/lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Russian_ru.xml?rev=1066576&r1=1066575&r2=1066576&view=diff
> ==============================================================================
> --- 
> lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Russian_ru.xml
>  (original)
> +++ 
> lenya/branches/BRANCH_2_1_X/src/webapp/WEB-INF/analyzer/stopword/Russian_ru.xml
>  Wed Feb  2 18:45:29 2011
> @@ -1,101 +1,164 @@
>  <?xml version="1.0" encoding="UTF-8"?>
> +
>  <stopwords>
> -<stopword>а</stopword>
> -<stopword>без</stopword>
> -<stopword>более</stopword>
> -<stopword>бы</stopword>
> -<stopword>был</stopword>
> -<stopword>была</stopword>
> -<stopword>были</stopword>
> -<stopword>было</stopword>
> -<stopword>быть</stopword>
> -<stopword>в</stopword>
> -<stopword>вам</stopword>
> -<stopword>ваÑ?</stopword>
> -<stopword>веÑ?ь</stopword>
> -<stopword>во</stopword>
> -<stopword>вот</stopword>
> -<stopword>вÑ?е</stopword>
> -<stopword>вÑ?его</stopword>
> -<stopword>вÑ?ех</stopword>
> -<stopword>вы</stopword>
> -<stopword>где</stopword>
> -<stopword>да</stopword>
> -<stopword>даже</stopword>
> -<stopword>длÑ?</stopword>
> -<stopword>до</stopword>
> -<stopword>его</stopword>
> -<stopword>ее</stopword>
> -<stopword>еÑ?ли</stopword>
> -<stopword>еÑ?ть</stopword>
> -<stopword>еще</stopword>
> -<stopword>же</stopword>
> -<stopword>за</stopword>
> -<stopword>здеÑ?ь</stopword>
> -<stopword>и</stopword>
> -<stopword>из</stopword>
> -<stopword>или</stopword>
> -<stopword>им</stopword>
> -<stopword>их</stopword>
> -<stopword>к</stopword>
> -<stopword>как</stopword>
> -<stopword>ко</stopword>
> -<stopword>когда</stopword>
> -<stopword>кто</stopword>
> -<stopword>ли</stopword>
> -<stopword>либо</stopword>
> -<stopword>мне</stopword>
> -<stopword>может</stopword>
> -<stopword>мы</stopword>
> -<stopword>на</stopword>
> -<stopword>надо</stopword>
> -<stopword>наш</stopword>
> -<stopword>не</stopword>
> -<stopword>него</stopword>
> -<stopword>нее</stopword>
> -<stopword>нет</stopword>
> -<stopword>ни</stopword>
> -<stopword>них</stopword>
> -<stopword>но</stopword>
> -<stopword>ну</stopword>
> -<stopword>о</stopword>
> -<stopword>об</stopword>
> -<stopword>однако</stopword>
> -<stopword>он</stopword>
> -<stopword>она</stopword>
> -<stopword>они</stopword>
> -<stopword>оно</stopword>
> -<stopword>от</stopword>
> -<stopword>очень</stopword>
> -<stopword>по</stopword>
> -<stopword>под</stopword>
> -<stopword>при</stopword>
> -<stopword>Ñ?</stopword>
> -<stopword>Ñ?о</stopword>
> -<stopword>так</stopword>
> -<stopword>также</stopword>
> -<stopword>такой</stopword>
> -<stopword>там</stopword>
> -<stopword>те</stopword>
> -<stopword>тем</stopword>
> -<stopword>то</stopword>
> -<stopword>того</stopword>
> -<stopword>тоже</stopword>
> -<stopword>той</stopword>
> -<stopword>только</stopword>
> -<stopword>том</stopword>
> -<stopword>ты</stopword>
> -<stopword>у</stopword>
> -<stopword>уже</stopword>
> -<stopword>хотÑ?</stopword>
> -<stopword>чего</stopword>
> -<stopword>чей</stopword>
> -<stopword>чем</stopword>
> -<stopword>что</stopword>
> -<stopword>чтобы</stopword>
> -<stopword>чье</stopword>
> -<stopword>чьÑ?</stopword>
> -<stopword>Ñ?та</stopword>
> -<stopword>Ñ?ти</stopword>
> -<stopword>Ñ?то</stopword>
> -<stopword>Ñ?</stopword></stopwords>
> +     <stopword>и</stopword>
> +     <stopword>в</stopword>
> +     <stopword>во</stopword>
> +     <stopword>не</stopword>
> +     <stopword>что</stopword>
> +     <stopword>он</stopword>
> +     <stopword>на</stopword>
> +     <stopword>я</stopword>
> +     <stopword>с</stopword>
> +     <stopword>со</stopword>
> +     <stopword>как</stopword>
> +     <stopword>а</stopword>
> +     <stopword>то</stopword>
> +     <stopword>все</stopword>
> +     <stopword>она</stopword>
> +     <stopword>так</stopword>
> +     <stopword>его</stopword>
> +     <stopword>но</stopword>
> +     <stopword>да</stopword>
> +     <stopword>ты</stopword>
> +     <stopword>к</stopword>
> +     <stopword>у</stopword>
> +     <stopword>же</stopword>
> +     <stopword>вы</stopword>
> +     <stopword>за</stopword>
> +     <stopword>бы</stopword>
> +     <stopword>по</stopword>
> +     <stopword>только</stopword>
> +     <stopword>ее</stopword>
> +     <stopword>мне</stopword>
> +     <stopword>было</stopword>
> +     <stopword>вот</stopword>
> +     <stopword>от</stopword>
> +     <stopword>меня</stopword>
> +     <stopword>еще</stopword>
> +     <stopword>нет</stopword>
> +     <stopword>о</stopword>
> +     <stopword>из</stopword>
> +     <stopword>ему</stopword>
> +     <stopword>теперь</stopword>
> +     <stopword>когда</stopword>
> +     <stopword>даже</stopword>
> +     <stopword>ну</stopword>
> +     <stopword>вдруг</stopword>
> +     <stopword>ли</stopword>
> +     <stopword>если</stopword>
> +     <stopword>уже</stopword>
> +     <stopword>или</stopword>
> +     <stopword>ни</stopword>
> +     <stopword>быть</stopword>
> +     <stopword>был</stopword>
> +     <stopword>него</stopword>
> +     <stopword>до</stopword>
> +     <stopword>вас</stopword>
> +     <stopword>нибудь</stopword>
> +     <stopword>опять</stopword>
> +     <stopword>уж</stopword>
> +     <stopword>вам</stopword>
> +     <stopword>сказал</stopword>
> +     <stopword>ведь</stopword>
> +     <stopword>там</stopword>
> +     <stopword>потом</stopword>
> +     <stopword>себя</stopword>
> +     <stopword>ничего</stopword>
> +     <stopword>ей</stopword>
> +     <stopword>может</stopword>
> +     <stopword>они</stopword>
> +     <stopword>тут</stopword>
> +     <stopword>где</stopword>
> +     <stopword>есть</stopword>
> +     <stopword>надо</stopword>
> +     <stopword>ней</stopword>
> +     <stopword>для</stopword>
> +     <stopword>мы</stopword>
> +     <stopword>тебя</stopword>
> +     <stopword>их</stopword>
> +     <stopword>чем</stopword>
> +     <stopword>была</stopword>
> +     <stopword>сам</stopword>
> +     <stopword>чтоб</stopword>
> +     <stopword>без</stopword>
> +     <stopword>будто</stopword>
> +     <stopword>человек</stopword>
> +     <stopword>чего</stopword>
> +     <stopword>раз</stopword>
> +     <stopword>тоже</stopword>
> +     <stopword>себе</stopword>
> +     <stopword>под</stopword>
> +     <stopword>жизнь</stopword>
> +     <stopword>будет</stopword>
> +     <stopword>ж</stopword>
> +  <stopword>тогда</stopword>
> +     <stopword>кто</stopword>
> +     <stopword>этот</stopword>
> +     <stopword>говорил</stopword>
> +     <stopword>того</stopword>
> +     <stopword>потому</stopword>
> +     <stopword>этого</stopword>
> +     <stopword>какой</stopword>
> +     <stopword>совсем</stopword>
> +     <stopword>ним</stopword>
> +     <stopword>здесь</stopword>
> +     <stopword>этом</stopword>
> +     <stopword>один</stopword>
> +     <stopword>почти</stopword>
> +     <stopword>мой</stopword>
> +     <stopword>тем</stopword>
> +     <stopword>чтобы</stopword>
> +     <stopword>нее</stopword>
> +     <stopword>кажется</stopword>
> +     <stopword>сейчас</stopword>
> +     <stopword>были</stopword>
> +     <stopword>куда</stopword>
> +     <stopword>зачем</stopword>
> +     <stopword>сказать</stopword>
> +     <stopword>всех</stopword>
> +     <stopword>никогда</stopword>
> +     <stopword>сегодня</stopword>
> +     <stopword>можно</stopword>
> +     <stopword>при</stopword>
> +     <stopword>наконец</stopword>
> +     <stopword>два</stopword>
> +     <stopword>об</stopword>
> +     <stopword>другой</stopword>
> +     <stopword>хоть</stopword>
> +     <stopword>после</stopword>
> +     <stopword>над</stopword>
> +     <stopword>больше</stopword>
> +     <stopword>тот</stopword>
> +     <stopword>через</stopword>
> +     <stopword>эти</stopword>
> +     <stopword>нас</stopword>
> +     <stopword>про</stopword>
> +     <stopword>всего</stopword>
> +     <stopword>них</stopword>
> +     <stopword>какая</stopword>
> +     <stopword>много</stopword>
> +     <stopword>разве</stopword>
> +     <stopword>сказала</stopword>
> +     <stopword>три</stopword>
> +     <stopword>эту</stopword>
> +  <stopword>моя</stopword>
> +     <stopword>впрочем</stopword>
> +     <stopword>хорошо</stopword>
> +     <stopword>свою</stopword>
> +     <stopword>этой</stopword>
> +     <stopword>перед</stopword>
> +     <stopword>иногда</stopword>
> +     <stopword>лучше</stopword>
> +     <stopword>чуть</stopword>
> +     <stopword>том</stopword>
> +     <stopword>нельзя</stopword>
> +     <stopword>такой</stopword>
> +     <stopword>им</stopword>
> +     <stopword>более</stopword>
> +     <stopword>всегда</stopword>
> +     <stopword>конечно</stopword>
> +     <stopword>всю</stopword>
> +     <stopword>между</stopword>
> +</stopwords>
> +
> 
> 
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: commits-unsubscr...@lenya.apache.org
> For additional commands, e-mail: commits-h...@lenya.apache.org
> 

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@lenya.apache.org
For additional commands, e-mail: dev-h...@lenya.apache.org

Reply via email to