Author: jnioche Date: Fri Feb 5 11:52:57 2010 New Revision: 906907 URL: http://svn.apache.org/viewvc?rev=906907&view=rev Log: NUTCH-786
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/conf/domain-suffixes.xml Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=906907&r1=906906&r2=906907&view=diff ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Fri Feb 5 11:52:57 2010 @@ -2,6 +2,8 @@ Unreleased Changes +* NUTCH-786 Improvement to the list of suffix domains (jnioche) + * NUTCH-775 Enhance searcher interface (siren) * NUTCH-781 Update Tika to v0.6 (jnioche) Modified: lucene/nutch/trunk/conf/domain-suffixes.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/conf/domain-suffixes.xml?rev=906907&r1=906906&r2=906907&view=diff ============================================================================== --- lucene/nutch/trunk/conf/domain-suffixes.xml (original) +++ lucene/nutch/trunk/conf/domain-suffixes.xml Fri Feb 5 11:52:57 2010 @@ -1744,6 +1744,16 @@ <suffix domain="retina.ar" /> <suffix domain="uba.ar" /> + <suffix domain="com.ar" /> + <suffix domain="edu.ar" /> + <suffix domain="gob.ar" /> + <suffix domain="gov.ar" /> + <suffix domain="int.ar" /> + <suffix domain="mil.ar" /> + <suffix domain="net.ar" /> + <suffix domain="org.ar" /> + <suffix domain="tur.ar" /> + <!-- arpa : http://en.wikipedia.org/wiki/.arpa--> <suffix domain="e164.arpa" /> <suffix domain="in-addr.arpa" /> @@ -1955,6 +1965,14 @@ <!-- co : http://en.wikipedia.org/wiki/.co--> + <suffix domain="com.co" /> + <suffix domain="org.co" /> + <suffix domain="edu.co" /> + <suffix domain="gov.co" /> + <suffix domain="net.co" /> + <suffix domain="mil.co" /> + <suffix domain="nom.co" /> + <!-- com : http://en.wikipedia.org/wiki/.com--> <!-- coop : http://en.wikipedia.org/wiki/.coop--> @@ -2215,9 +2233,26 @@ <!-- id : http://en.wikipedia.org/wiki/.id--> + <suffix domain="ac.id" /> + <suffix domain="co.id" /> + <suffix domain="net.id" /> + <suffix domain="or.id" /> + <suffix domain="web.id" /> + <suffix domain="sch.id" /> + <suffix domain="mil.id" /> + <suffix domain="go.id" /> + <!-- ie : http://en.wikipedia.org/wiki/.ie--> <!-- il : http://en.wikipedia.org/wiki/.il--> + <suffix domain="ac.il" /> + <suffix domain="co.il" /> + <suffix domain="org.il" /> + <suffix domain="net.il" /> + <suffix domain="k12.il" /> + <suffix domain="gov.il" /> + <suffix domain="muni.il" /> + <suffix domain="idf.il" /> <!-- im : https://www.nic.im/pdfs/imfaqs.pdf--> <suffix domain="co.im" /> @@ -2854,6 +2889,11 @@ <suffix domain="org.mw" /> <!-- mx : http://www.nic.mx/--> + <suffix domain="com.mx" /> + <suffix domain="edu.mx" /> + <suffix domain="gob.mx" /> + <suffix domain="net.mx" /> + <suffix domain="org.mx" /> <!-- my : http://www.mynic.net.my/--> @@ -3661,6 +3701,19 @@ <!-- nu : http://en.wikipedia.org/wiki/.nu--> <!-- nz : http://en.wikipedia.org/wiki/.nz--> + <suffix domain="ac.nz" /> + <suffix domain="co.nz" /> + <suffix domain="cri.nz" /> + <suffix domain="geek.nz" /> + <suffix domain="gen.nz" /> + <suffix domain="govt.nz" /> + <suffix domain="iwi.nz" /> + <suffix domain="maori.nz" /> + <suffix domain="mil.nz" /> + <suffix domain="net.nz" /> + <suffix domain="org.nz" /> + <suffix domain="parliament.nz" /> + <suffix domain="school.nz" /> <!-- om : http://en.wikipedia.org/wiki/.om--> @@ -4344,7 +4397,28 @@ <!-- yu : http://www.nic.yu/pravilnik-e.html--> - <!-- za : http://www.zadna.org.za/slds.html--> + <!-- za : http://www.zadna.org.za/slds.html + http://en.wikipedia.org/wiki/.za + --> + <suffix domain="ac.za" /> + <suffix domain="city.za" /> + <suffix domain="co.za" /> + <suffix domain="edu.za" /> + <suffix domain="gov.za" /> + <suffix domain="law.za" /> + <suffix domain="mil.za" /> + <suffix domain="nom.za" /> + <suffix domain="org.za" /> + <suffix domain="school.za" /> + <suffix domain="ecape.school.za" /> + <suffix domain="fs.school.za" /> + <suffix domain="gp.school.za" /> + <suffix domain="kzn.school.za" /> + <suffix domain="mpm.school.za" /> + <suffix domain="ncape.school.za" /> + <suffix domain="lp.school.za" /> + <suffix domain="nw.school.za" /> + <suffix domain="wcape.school.za" /> <!-- zm : http://en.wikipedia.org/wiki/.zm-->