Update of 
/cvsroot/nutch/nutch/src/plugin/index-more/src/java/net/nutch/indexer/more
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21410

Modified Files:
        MoreIndexingFilter.java 
Log Message:
Try a bit harder to parse the dates.



Index: MoreIndexingFilter.java
===================================================================
RCS file: 
/cvsroot/nutch/nutch/src/plugin/index-more/src/java/net/nutch/indexer/more/MoreIndexingFilter.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** MoreIndexingFilter.java     3 Oct 2004 22:23:31 -0000       1.1
--- MoreIndexingFilter.java     22 Nov 2004 16:42:57 -0000      1.2
***************
*** 19,23 ****
--- 19,26 ----
  
  import net.nutch.net.protocols.HttpDateFormat;
+ 
+ import java.text.DateFormat;
  import java.text.ParseException;
+ import java.text.SimpleDateFormat;
  
  import net.nutch.parse.Parse;
***************
*** 34,37 ****
--- 37,41 ----
  import java.util.logging.Logger;
  
+ import java.util.Date;
  import java.util.Enumeration;
  import java.util.Properties;
***************
*** 106,115 ****
  
      // index/store it as long value
!     HttpDateFormat format = new HttpDateFormat();
      try {
!       lastModified = new Long(format.toLong(lastModified)).toString();
      } catch  (ParseException e) {
!       LOG.warning(url+": can't parse erroneous last-modified: "+lastModified);
!       lastModified = null;
      }
  
--- 110,125 ----
  
      // index/store it as long value
!     DateFormat df = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy zzz");
      try {
!       lastModified = new Long(HttpDateFormat.toLong(lastModified)).toString();
      } catch  (ParseException e) {
!       // try to parse it as date in alternative format
!       try {
!         Date d = df.parse(lastModified);
!         lastModified = new Long(d.getTime()).toString();
!       } catch (Exception e1) {
!         LOG.fine(url+": can't parse erroneous last-modified: "+lastModified);
!         lastModified = null;
!       }
      }
  



-------------------------------------------------------
SF email is sponsored by - The IT Product Guide
Read honest & candid reviews on hundreds of IT Products from real users.
Discover which products truly live up to the hype. Start reading now. 
http://productguide.itmanagersjournal.com/
_______________________________________________
Nutch-cvs mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/nutch-cvs

Reply via email to