Update of /cvsroot/nutch/nutch/src/plugin/index-more/src/java/net/nutch/indexer/more In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21410
Modified Files: MoreIndexingFilter.java Log Message: Try a bit harder to parse the dates. Index: MoreIndexingFilter.java =================================================================== RCS file: /cvsroot/nutch/nutch/src/plugin/index-more/src/java/net/nutch/indexer/more/MoreIndexingFilter.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** MoreIndexingFilter.java 3 Oct 2004 22:23:31 -0000 1.1 --- MoreIndexingFilter.java 22 Nov 2004 16:42:57 -0000 1.2 *************** *** 19,23 **** --- 19,26 ---- import net.nutch.net.protocols.HttpDateFormat; + + import java.text.DateFormat; import java.text.ParseException; + import java.text.SimpleDateFormat; import net.nutch.parse.Parse; *************** *** 34,37 **** --- 37,41 ---- import java.util.logging.Logger; + import java.util.Date; import java.util.Enumeration; import java.util.Properties; *************** *** 106,115 **** // index/store it as long value ! HttpDateFormat format = new HttpDateFormat(); try { ! lastModified = new Long(format.toLong(lastModified)).toString(); } catch (ParseException e) { ! LOG.warning(url+": can't parse erroneous last-modified: "+lastModified); ! lastModified = null; } --- 110,125 ---- // index/store it as long value ! DateFormat df = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy zzz"); try { ! lastModified = new Long(HttpDateFormat.toLong(lastModified)).toString(); } catch (ParseException e) { ! // try to parse it as date in alternative format ! try { ! Date d = df.parse(lastModified); ! lastModified = new Long(d.getTime()).toString(); ! } catch (Exception e1) { ! LOG.fine(url+": can't parse erroneous last-modified: "+lastModified); ! lastModified = null; ! } } ------------------------------------------------------- SF email is sponsored by - The IT Product Guide Read honest & candid reviews on hundreds of IT Products from real users. Discover which products truly live up to the hype. Start reading now. http://productguide.itmanagersjournal.com/ _______________________________________________ Nutch-cvs mailing list [EMAIL PROTECTED] https://lists.sourceforge.net/lists/listinfo/nutch-cvs