apac...

jerome Wed, 08 Feb 2006 13:54:04 -0800

Modified: 
lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java
 Wed Feb  8 13:48:52 2006
@@ -20,6 +20,7 @@
 import org.apache.poi.poifs.eventfilesystem.*;
 import org.apache.poi.poifs.filesystem.*;
 import org.apache.poi.util.LittleEndian;
+import org.apache.nutch.metadata.Metadata;
 
 import java.util.*;
 import java.io.*;
@@ -33,8 +34,7 @@
  * code to extract all msword properties.
  *
  */
-public class WordExtractor
-{
+public class WordExtractor {
 
   /**
    * Constructor
@@ -276,39 +276,40 @@
       /*Dates are being stored in millis since the epoch to aid
       localization*/
       if(title != null)
-        properties.setProperty("Title", title);
+        properties.setProperty(Metadata.TITLE, title);
       if(applicationName != null)
-        properties.setProperty("Application-Name", applicationName);
+        properties.setProperty(Metadata.APPLICATION_NAME, applicationName);
       if(author != null)
-        properties.setProperty("Author", author);
+        properties.setProperty(Metadata.AUTHOR, author);
       if(charCount != 0)
-        properties.setProperty("Character Count", charCount + "");
+        properties.setProperty(Metadata.CHARACTER_COUNT, charCount + "");
       if(comments != null)
-        properties.setProperty("Comments", comments);
+        properties.setProperty(Metadata.COMMENTS, comments);
       if(createDateTime != null)
-        properties.setProperty("Creation-Date", createDateTime.getTime() + "");
+        properties.setProperty(Metadata.DATE,
+                               Metadata.DATE_FORMAT.format(createDateTime));
       if(editTime != 0)
-        properties.setProperty("Edit-Time", editTime + "");
+        properties.setProperty(Metadata.LAST_MODIFIED, editTime + "");
       if(keywords != null)
-        properties.setProperty("Keywords", keywords);
+        properties.setProperty(Metadata.KEYWORDS, keywords);
       if(lastAuthor != null)
-        properties.setProperty("Last-Author", lastAuthor);
+        properties.setProperty(Metadata.LAST_AUTHOR, lastAuthor);
       if(lastPrinted != null)
-        properties.setProperty("Last-Printed", lastPrinted.getTime() + "");
+        properties.setProperty(Metadata.LAST_PRINTED, lastPrinted.getTime() + 
"");
       if(lastSaveDateTime != null)
-        properties.setProperty("Last-Save-Date", lastSaveDateTime.getTime() + 
"");
+        properties.setProperty(Metadata.LAST_SAVED, lastSaveDateTime.getTime() 
+ "");
       if(pageCount != 0)
-        properties.setProperty("Page-Count", pageCount + "");
+        properties.setProperty(Metadata.PAGE_COUNT, pageCount + "");
       if(revNumber != null)
-        properties.setProperty("Revision-Number", revNumber);
+        properties.setProperty(Metadata.REVISION_NUMBER, revNumber);
       if(security != 0)
-        properties.setProperty("Security", security + "");
+        properties.setProperty(Metadata.RIGHTS, security + "");
       if(subject != null)
-        properties.setProperty("Subject", subject);
+        properties.setProperty(Metadata.SUBJECT, subject);
       if(template != null)
-        properties.setProperty("Template", template);
+        properties.setProperty(Metadata.TEMPLATE, template);
       if(wordCount != 0)
-        properties.setProperty("Word-Count", wordCount + "");
+        properties.setProperty(Metadata.WORD_COUNT, wordCount + "");
       propertiesBroker.setProperties(properties);
 
       //si.getThumbnail(); // can't think of a sensible way of turning this 
into a string.


Modified: 
lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
 Wed Feb  8 13:48:52 2006
@@ -26,7 +26,8 @@
 import org.pdfbox.exceptions.InvalidPasswordException;
 
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.parse.ParseStatus;
@@ -89,12 +90,13 @@
 
     String text = null;
     String title = null;
+    Metadata metadata = new Metadata();
 
     try {
 
       byte[] raw = content.getContent();
 
-      String contentLength = content.get("Content-Length");
+      String contentLength = 
content.getMetadata().get(Response.CONTENT_LENGTH);
       if (contentLength != null
             && raw.length != Integer.parseInt(contentLength)) {
           return new ParseStatus(ParseStatus.FAILED, 
ParseStatus.FAILED_TRUNCATED,
@@ -102,8 +104,7 @@
             +" bytes. Parser can't handle incomplete pdf 
file.").getEmptyParse(getConf());
       }
 
-      PDFParser parser = new PDFParser(
-        new ByteArrayInputStream(raw));
+      PDFParser parser = new PDFParser(new ByteArrayInputStream(raw));
       parser.parse();
 
       pdf = parser.getPDDocument();
@@ -122,15 +123,18 @@
       PDDocumentInformation info = pdf.getDocumentInformation();
       title = info.getTitle();
       // more useful info, currently not used. please keep them for future use.
-      // pdf.getPageCount();
-      // info.getAuthor()
-      // info.getSubject()
-      // info.getKeywords()
-      // info.getCreator()
-      // info.getProducer()
-      // info.getTrapped()
-      // formatDate(info.getCreationDate())
-      // formatDate(info.getModificationDate())
+      metadata.add(Metadata.PAGE_COUNT, String.valueOf(pdf.getPageCount()));
+      metadata.add(Metadata.AUTHOR, info.getAuthor());
+      metadata.add(Metadata.SUBJECT, info.getSubject());
+      metadata.add(Metadata.KEYWORDS, info.getKeywords());
+      metadata.add(Metadata.CREATOR, info.getCreator());
+      metadata.add(Metadata.PUBLISHER, info.getProducer());
+      
+      //TODO: Figure out why we get a java.io.IOException: Error converting 
date:1-Jan-3 18:15PM
+      //error here
+      
+      //metadata.put(DATE, 
dcDateFormatter.format(info.getCreationDate().getTime()));
+      //metadata.put(LAST_MODIFIED, 
dcDateFormatter.format(info.getModificationDate().getTime()));
 
     } catch (CryptographyException e) {
       return new ParseStatus(ParseStatus.FAILED,
@@ -139,6 +143,8 @@
       return new ParseStatus(ParseStatus.FAILED,
               "Can't decrypt document - invalid password. " + 
e).getEmptyParse(getConf());
     } catch (Exception e) { // run time exception
+        LOG.warning("General exception in PDF parser: "+e.getMessage());
+        e.printStackTrace();        
       return new ParseStatus(ParseStatus.FAILED,
               "Can't be handled as pdf document. " + 
e).getEmptyParse(getConf());
     } finally {
@@ -159,11 +165,9 @@
     // collect outlink
     Outlink[] outlinks = OutlinkExtractor.getOutlinks(text, getConf());
 
-    // collect meta data
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata()); // copy through
-
-    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title, 
outlinks, metadata);
+    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title,
+                                        outlinks, content.getMetadata(),
+                                        metadata);
     parseData.setConf(this.conf);
     return new ParseImpl(text, parseData);
     // any filter?

Modified: 
lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java
 Wed Feb  8 13:48:52 2006
@@ -18,7 +18,16 @@
 
 import org.apache.nutch.parse.*;
 import org.apache.nutch.protocol.Content;
+<<<<<<< .mine
+<<<<<<< .mine
+import org.apache.nutch.util.MetadataNames;
+
+=======
+import org.apache.nutch.util.NutchConf;
+=======
 import org.apache.hadoop.conf.Configuration;
+>>>>>>> .r374853
+>>>>>>> .r373941
 import java.io.ByteArrayInputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
@@ -31,7 +40,7 @@
  * 
  * @author Andy Hedges
  */
-public class RTFParseFactory implements Parser {
+public class RTFParseFactory implements Parser, MetadataNames {
 
   private Configuration conf;
 
@@ -53,10 +62,13 @@
     Properties metadata = new Properties();
     metadata.putAll(content.getMetadata());
     metadata.putAll(delegate.getMetaData());
-    String title = metadata.getProperty("title");
+    String title = metadata.getProperty(TITLE);
 
     if (title != null) {
-      metadata.remove(title);
+        //(CM): Why remove the title metadata property here? Even 
+        //though it's stored in the ParseData, it still might be useful
+        //to have via this properties object?
+        //metadata.remove(title);
     } else {
       title = "";
     }

Modified: 
lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParserDelegateImpl.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParserDelegateImpl.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParserDelegateImpl.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParserDelegateImpl.java
 Wed Feb  8 13:48:52 2006
@@ -22,18 +22,20 @@
 import java.util.List;
 import java.util.Properties;
 
+import org.apache.nutch.util.MetadataNames;
+
 /**
  * A parser delegate for handling rtf events.
  * @author Andy Hedges
  */
-public class RTFParserDelegateImpl implements RTFParserDelegate {
+public class RTFParserDelegateImpl implements RTFParserDelegate, MetadataNames 
{
 
   String tabs = "";
   Properties metadata = new Properties();
 
-  String[] META_NAMES_TEXT = {"title", "subject", "author", "manager",
-                              "company", "operator", "category", "keywords",
-                              "comment", "doccomm", "hlinkbase"};
+  String[] META_NAMES_TEXT = {TITLE, SUBJECT, AUTHOR, "manager",
+                              "company", "operator", "category", KEYWORDS,
+                              COMMENTS, "doccomm", "hlinkbase"};
   String[] META_NAMES_DATE = {"creatim", "creatim", "printim", "buptim"};
 
   String metaName = "";

Modified: 
lucene/nutch/trunk/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java
 Wed Feb  8 13:48:52 2006
@@ -25,7 +25,15 @@
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.ProtocolException;
 import org.apache.nutch.protocol.ProtocolFactory;
+<<<<<<< .mine
+<<<<<<< .mine
+import org.apache.nutch.util.MetadataNames;
+=======
+import org.apache.nutch.util.NutchConf;
+=======
 import org.apache.hadoop.conf.Configuration;
+>>>>>>> .r374853
+>>>>>>> .r373941
 
 import java.util.Properties;
 
@@ -34,7 +42,7 @@
  *
  * @author Andy Hedges
  */
-public class TestRTFParser extends TestCase {
+public class TestRTFParser extends TestCase implements MetadataNames {
 
   private String fileSeparator = System.getProperty("file.separator");
   // This system property is defined in ./src/plugin/build-plugin.xml
@@ -73,7 +81,7 @@
     String title = parse.getData().getTitle();
     Properties meta = parse.getData().getMetadata();
     assertEquals("test rft document", title);
-    assertEquals("tests", meta.getProperty("subject"));
+    assertEquals("tests", meta.getProperty(SUBJECT));
 
 
 

Modified: 
lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
 Wed Feb  8 13:48:52 2006
@@ -21,9 +21,10 @@
 import java.util.*;
 import java.util.logging.Logger;
 
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.parse.*;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
@@ -60,19 +61,17 @@
   public Parse getParse(Content content) {
 
     String text = null;
-    // collect meta data
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata()); // copy through
     Vector outlinks = new Vector();
 
     try {
 
       byte[] raw = content.getContent();
 
-      String contentLength = content.get("Content-Length");
+      String contentLength = 
content.getMetadata().get(Response.CONTENT_LENGTH);
       if (contentLength != null && raw.length != 
Integer.parseInt(contentLength)) {
-        return new ParseStatus(ParseStatus.FAILED, 
ParseStatus.FAILED_TRUNCATED, "Content truncated at " + raw.length
-                + " bytes. Parser can't handle incomplete 
files.").getEmptyParse(conf);
+        return new ParseStatus(ParseStatus.FAILED, 
ParseStatus.FAILED_TRUNCATED,
+                               "Content truncated at " + raw.length +
+                               " bytes. Parser can't handle incomplete 
files.").getEmptyParse(conf);
       }
       ExtractText extractor = new ExtractText();
 
@@ -106,7 +105,8 @@
     if (text == null) text = "";
 
     Outlink[] links = (Outlink[]) outlinks.toArray(new 
Outlink[outlinks.size()]);
-    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "", links, 
metadata);
+    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "", links,
+                                        content.getMetadata());
     return new ParseImpl(text, parseData);
   }
 
@@ -119,8 +119,10 @@
     byte[] buf = new byte[in.available()];
     in.read(buf);
     SWFParser parser = new SWFParser();
-    Parse p = parser.getParse(new Content("file:" + args[0], "file:" + 
args[0], buf, "application/x-shockwave-flash",
-            new ContentProperties(), NutchConfiguration.create()));
+    Parse p = parser.getParse(new Content("file:" + args[0], "file:" + args[0],
+                                          buf, "application/x-shockwave-flash",
+                                          new Metadata(),
+                                          NutchConfiguration.create()));
     System.out.println("Parse Text:");
     System.out.println(p.getText());
     System.out.println("Parse Data:");

Modified: 
lucene/nutch/trunk/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java
 Wed Feb  8 13:48:52 2006
@@ -17,7 +17,6 @@
 package org.apache.nutch.parse.text;
 
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.parse.*;
 import org.apache.nutch.util.*;
 
@@ -27,9 +26,6 @@
   private Configuration conf;
 
   public Parse getParse(Content content) {
-    // copy content meta data through
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata());
 
     // ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "", new
     // Outlink[0], metadata);
@@ -49,7 +45,7 @@
       text = new String(content.getContent()); // use default encoding
     }
     ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "",
-        OutlinkExtractor.getOutlinks(text, getConf()), metadata);
+        OutlinkExtractor.getOutlinks(text, getConf()), content.getMetadata());
     parseData.setConf(this.conf);
     return new ParseImpl(text, parseData);
     

Modified: 
lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
 Wed Feb  8 13:48:52 2006
@@ -23,6 +23,8 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseData;
@@ -30,7 +32,6 @@
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
 
@@ -59,7 +60,7 @@
     Properties properties = null;
 
     try {
-      final String contentLen = content.get("Content-Length");
+      final String contentLen = 
content.getMetadata().get(Response.CONTENT_LENGTH);
       final int len = Integer.parseInt(contentLen);
       System.out.println("ziplen: " + len);
       final byte[] contentInBytes = content.getContent();
@@ -86,10 +87,6 @@
           "Can't be handled as Zip document. " + e).getEmptyParse(getConf());
     }
 
-    // collect meta data
-    final ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata()); // copy through
-
     if (resultText == null) {
       resultText = "";
     }
@@ -100,7 +97,8 @@
 
     outlinks = (Outlink[]) outLinksList.toArray(new Outlink[0]);
     final ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
-        resultTitle, outlinks, metadata);
+                                              resultTitle, outlinks,
+                                              content.getMetadata());
     parseData.setConf(this.conf);
 
     LOG.finest("Zip file parsed sucessfully !!");

Modified: 
lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
 Wed Feb  8 13:48:52 2006
@@ -26,13 +26,14 @@
 import java.net.URL;
 
 // Nutch imports
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.parse.Parse;
-import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseData;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseException;
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.mime.MimeTypes;
@@ -87,9 +88,9 @@
           // Trying to resolve the Mime-Type
           String contentType = MIME.getMimeType(fname).getName();
           try {
-            ContentProperties metadata = new ContentProperties();
-            metadata.setProperty("Content-Length", 
Long.toString(entry.getSize()));
-            metadata.setProperty("Content-Type", contentType);
+            Metadata metadata = new Metadata();
+            metadata.set(Response.CONTENT_LENGTH, 
Long.toString(entry.getSize()));
+            metadata.set(Response.CONTENT_TYPE, contentType);
             Content content = new Content(newurl, base, b, contentType, 
metadata, this.conf);
             Parse parse = new ParseUtil(this.conf).parse(content);
             ParseData theParseData = parse.getData();

Modified: 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
 Wed Feb  8 13:48:52 2006
@@ -19,7 +19,9 @@
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.hadoop.io.UTF8;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.net.protocols.HttpDateFormat;
+import org.apache.nutch.net.protocols.Response;
 
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
@@ -144,8 +146,10 @@
     Content content = file.getProtocolOutput(new UTF8(urlString), new 
CrawlDatum()).getContent();
 
     System.err.println("Content-Type: " + content.getContentType());
-    System.err.println("Content-Length: " + content.get("Content-Length"));
-    System.err.println("Last-Modified: " + content.get("Last-Modified"));
+    System.err.println("Content-Length: " +
+                       content.getMetadata().get(Response.CONTENT_LENGTH));
+    System.err.println("Last-Modified: " +
+                       content.getMetadata().get(Response.LAST_MODIFIED));
     if (dumpContent) {
       System.out.print(new String(content.getContent()));
     }

Modified: 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
 Wed Feb  8 13:48:52 2006
@@ -18,6 +18,7 @@
 
 // JDK imports
 import java.net.URL;
+import java.util.Date;
 import java.util.TreeMap;
 import java.util.logging.Level;
 import java.io.IOException;
@@ -25,7 +26,10 @@
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
+
+// Hadoop imports
 import org.apache.hadoop.conf.Configuration;
 
 
@@ -61,7 +65,7 @@
   private String base;
   private byte[] content;
   private int code;
-  private ContentProperties headers = new ContentProperties();
+  private Metadata headers = new Metadata();
 
   private final File file;
   private Configuration conf;
@@ -71,17 +75,17 @@
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
-    return (String)headers.get(name);
+    return headers.get(name);
   }
 
   public byte[] getContent() { return content; }
 
   public Content toContent() {
     return new Content(orig, base, content,
-                       getHeader("Content-Type"),
+                       getHeader(Response.CONTENT_TYPE),
                        headers, this.conf);
   }
-
+  
   public FileResponse(URL url, CrawlDatum datum, File file, Configuration conf)
     throws FileException, IOException {
 
@@ -124,10 +128,8 @@
       // where case is insensitive
       if (!f.equals(f.getCanonicalFile())) {
         // set headers
-        TreeMap hdrs = new TreeMap(String.CASE_INSENSITIVE_ORDER);
         //hdrs.put("Location", f.getCanonicalFile().toURI());
-        hdrs.put("Location", f.getCanonicalFile().toURL().toString());
-        this.headers.putAll(hdrs);
+        headers.set(Response.LOCATION, 
f.getCanonicalFile().toURL().toString());
 
         this.code = 300;  // http redirect
         return;
@@ -181,16 +183,10 @@
     is.close(); 
 
     // set headers
-    TreeMap hdrs = new TreeMap(String.CASE_INSENSITIVE_ORDER);
-
-    hdrs.put("Content-Length", new Long(size).toString());
-
-    hdrs.put("Last-Modified",
+    headers.set(Response.CONTENT_LENGTH, new Long(size).toString());
+    headers.set(Response.LAST_MODIFIED,
       this.file.httpDateFormat.toString(f.lastModified()));
-
-    hdrs.put("Content-Type", "");   // No Content-Type at file protocol level
-
-    this.headers.putAll(hdrs);
+    headers.set(Response.CONTENT_TYPE, "");   // No Content-Type at file 
protocol level
 
     // response code
     this.code = 200; // http OK
@@ -204,17 +200,11 @@
     this.content = list2html(f.listFiles(), path, "/".equals(path) ? false : 
true);
 
     // set headers
-    TreeMap hdrs = new TreeMap(String.CASE_INSENSITIVE_ORDER);
-
-    hdrs.put("Content-Length",
+    headers.set(Response.CONTENT_LENGTH,
       new Integer(this.content.length).toString());
-
-    hdrs.put("Content-Type", "text/html");
-
-    hdrs.put("Last-Modified",
+    headers.set(Response.CONTENT_TYPE, "text/html");
+    headers.set(Response.LAST_MODIFIED,
       this.file.httpDateFormat.toString(f.lastModified()));
-
-    this.headers.putAll(hdrs);
 
     // response code
     this.code = 200; // http OK

Modified: 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
 Wed Feb  8 13:48:52 2006
@@ -22,6 +22,7 @@
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.hadoop.io.UTF8;
 import org.apache.nutch.net.protocols.HttpDateFormat;
+import org.apache.nutch.net.protocols.Response;
 
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
@@ -212,8 +213,10 @@
     Content content = ftp.getProtocolOutput(new UTF8(urlString), new 
CrawlDatum()).getContent();
 
     System.err.println("Content-Type: " + content.getContentType());
-    System.err.println("Content-Length: " + content.get("Content-Length"));
-    System.err.println("Last-Modified: " + content.get("Last-Modified"));
+    System.err.println("Content-Length: " +
+                       content.getMetadata().get(Response.CONTENT_LENGTH));
+    System.err.println("Last-Modified: " +
+                      content.getMetadata().get(Response.LAST_MODIFIED));
     if (dumpContent) {
       System.out.print(new String(content.getContent()));
     }

Modified: 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
 Wed Feb  8 13:48:52 2006
@@ -26,7 +26,9 @@
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
+
 import org.apache.hadoop.conf.Configuration;
 
 import java.net.InetAddress;
@@ -34,11 +36,9 @@
 
 import java.util.List;
 import java.util.LinkedList;
-
 import java.util.logging.Level;
 
 import java.io.ByteArrayOutputStream;
-//import java.io.InputStream;
 import java.io.IOException;
 
 
@@ -61,7 +61,7 @@
   private String base;
   private byte[] content;
   private int code;
-  private ContentProperties headers = new ContentProperties();
+  private Metadata headers = new Metadata();
 
   private final Ftp ftp;
   private Configuration conf;
@@ -71,14 +71,14 @@
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
-    return (String)headers.get(name);
+    return headers.get(name);
   }
 
   public byte[] getContent() { return content; }
 
   public Content toContent() {
     return new Content(orig, base, content,
-                       getHeader("Content-Type"),
+                       getHeader(Response.CONTENT_TYPE),
                        headers, this.conf);
   }
 
@@ -294,11 +294,11 @@
       ftp.client.retrieveFile(path, os, ftp.maxContentLength);
 
       FTPFile ftpFile = (FTPFile) list.get(0);
-      this.headers.put("Content-Length",
-        new Long(ftpFile.getSize()).toString());
+      this.headers.set(Response.CONTENT_LENGTH,
+                       new Long(ftpFile.getSize()).toString());
       //this.headers.put("content-type", "text/html");
-      this.headers.put("Last-Modified",
-        ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
+      this.headers.set(Response.LAST_MODIFIED,
+                       ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
       this.content = os.toByteArray();
 
 //      // approximate bytes sent and read
@@ -330,11 +330,11 @@
       }
 
       FTPFile ftpFile = (FTPFile) list.get(0);
-      this.headers.put("Content-Length",
-        new Long(ftpFile.getSize()).toString());
+      this.headers.set(Response.CONTENT_LENGTH,
+                       new Long(ftpFile.getSize()).toString());
       //this.headers.put("content-type", "text/html");
-      this.headers.put("Last-Modified",
-        ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
+      this.headers.set(Response.LAST_MODIFIED,
+                      ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
       this.content = os.toByteArray();
 
 //      // approximate bytes sent and read
@@ -349,7 +349,7 @@
 
       if (FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
       // it is not a file, but dir, so redirect as a dir
-        this.headers.put("Location", path + "/");
+        this.headers.set(Response.LOCATION, path + "/");
         this.code = 300;  // http redirect
         // fixme, should we do ftp.client.cwd("/"), back to top dir?
       } else {
@@ -386,9 +386,9 @@
 
       ftp.client.retrieveList(null, list, ftp.maxContentLength, ftp.parser);
       this.content = list2html(list, path, "/".equals(path) ? false : true);
-      this.headers.put("Content-Length",
-        new Integer(this.content.length).toString());
-      this.headers.put("Content-Type", "text/html");
+      this.headers.set(Response.CONTENT_LENGTH,
+                       new Integer(this.content.length).toString());
+      this.headers.set(Response.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
 //      // approximate bytes sent and read
@@ -408,9 +408,9 @@
       ftp.client = null;
 
       this.content = list2html(list, path, "/".equals(path) ? false : true);
-      this.headers.put("Content-Length",
-        new Integer(this.content.length).toString());
-      this.headers.put("Content-Type", "text/html");
+      this.headers.set(Response.CONTENT_LENGTH,
+                       new Integer(this.content.length).toString());
+      this.headers.set(Response.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
 //      // approximate bytes sent and read

Modified: 
lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
 Wed Feb  8 13:48:52 2006
@@ -28,12 +28,13 @@
 import java.net.URL;
 import java.util.Map;
 import java.util.TreeMap;
+import java.util.Date;
 import java.util.logging.Level;
 
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.net.protocols.Response;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.protocol.ProtocolException;
 import org.apache.nutch.protocol.http.api.HttpBase;
 import org.apache.nutch.protocol.http.api.HttpException;
@@ -49,7 +50,7 @@
   private String base;
   private byte[] content;
   private int code;
-  private ContentProperties headers = new ContentProperties();
+  private Metadata headers = new Metadata();
 
 
   public HttpResponse(HttpBase http, URL url, CrawlDatum datum)
@@ -141,13 +142,13 @@
         // parse status code line
         this.code = parseStatusLine(in, line); 
         // parse headers
-        headers.putAll(parseHeaders(in, line));
+        parseHeaders(in, line);
         haveSeenNonContinueStatus= code != 100; // 100 is "Continue"
       }
 
       readPlainContent(in);
 
-      String contentEncoding= getHeader("Content-Encoding");
+      String contentEncoding = getHeader(Response.CONTENT_ENCODING);
       if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
         Http.LOG.fine("uncompressing....");
         byte[] compressed = content;
@@ -187,10 +188,10 @@
   }
 
   public String getHeader(String name) {
-    return (String) headers.get(name);
+    return headers.get(name);
   }
   
-  public ContentProperties getHeaders() {
+  public Metadata getHeaders() {
     return headers;
   }
 
@@ -207,7 +208,7 @@
     throws HttpException, IOException {
 
     int contentLength = Integer.MAX_VALUE;    // get content length
-    String contentLengthString = (String)headers.get("Content-Length");
+    String contentLengthString = headers.get(Response.CONTENT_LENGTH);
     if (contentLengthString != null) {
       contentLengthString = contentLengthString.trim();
       try {
@@ -333,8 +334,9 @@
   }
 
 
-  private void processHeaderLine(StringBuffer line, TreeMap headers)
+  private void processHeaderLine(StringBuffer line)
     throws IOException, HttpException {
+
     int colonIndex = line.indexOf(":");       // key is up to colon
     if (colonIndex == -1) {
       int i;
@@ -355,20 +357,14 @@
       valueStart++;
     }
     String value = line.substring(valueStart);
-
-    headers.put(key, value);
+    headers.set(key, value);
   }
 
-  private Map parseHeaders(PushbackInputStream in, StringBuffer line)
-    throws IOException, HttpException {
-    TreeMap headers = new TreeMap(String.CASE_INSENSITIVE_ORDER);
-    return parseHeaders(in, line, headers);
-  }
 
-  // Adds headers to an existing TreeMap
-  private Map parseHeaders(PushbackInputStream in, StringBuffer line,
-                           TreeMap headers)
+  // Adds headers to our headers Metadata
+  private void parseHeaders(PushbackInputStream in, StringBuffer line)
     throws IOException, HttpException {
+
     while (readLine(in, line, true) != 0) {
 
       // handle HTTP responses with missing blank line after headers
@@ -381,18 +377,21 @@
         line.setLength(pos);
 
         try {
-          processHeaderLine(line, headers);
+            //TODO: (CM) We don't know the header names here
+            //since we're just handling them generically. It would
+            //be nice to provide some sort of mapping function here
+            //for the returned header names to the standard metadata
+            //names in the ParseData class
+          processHeaderLine(line);
         } catch (Exception e) {
           // fixme:
           e.printStackTrace();
         }
-
-        return headers;
+        return;
       }
 
-      processHeaderLine(line, headers);
+      processHeaderLine(line);
     }
-    return headers;
   }
 
   private static int readLine(PushbackInputStream in, StringBuffer line,

Modified: 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
 Wed Feb  8 13:48:52 2006
@@ -11,7 +11,8 @@
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
+
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configurable;
@@ -35,7 +36,7 @@
      * The HTTP Authentication (WWW-Authenticate) header which is returned 
      * by a webserver requiring authentication.
      */
-    public static final String AUTH_HEADER = "WWW-Authenticate";
+    public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
        
     public static final Logger LOG =
                
LogFormatter.getLogger(HttpAuthenticationFactory.class.getName());
@@ -72,13 +73,14 @@
      * ---------------------------------- */
 
 
-    public HttpAuthentication findAuthentication(ContentProperties header) {
+    public HttpAuthentication findAuthentication(Metadata header) {
+
         if (header == null) return null;
         
        try {
                        Collection challenge = null;
-                       if (header instanceof ContentProperties) {
-                               Object o = header.get(AUTH_HEADER);
+                       if (header instanceof Metadata) {
+                               Object o = header.get(WWW_AUTHENTICATE);
                                if (o instanceof Collection) {
                                        challenge = (Collection) o;
                                } else {
@@ -86,7 +88,7 @@
                                        challenge.add(o.toString());
                                }
                        } else {
-                               String challengeString = 
header.getProperty(AUTH_HEADER); 
+                               String challengeString = 
header.get(WWW_AUTHENTICATE); 
                                if (challengeString != null) {
                                        challenge = new ArrayList();
                                        challenge.add(challengeString);

Modified: 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
 Wed Feb  8 13:48:52 2006
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
+import java.util.Date;
 
 // HTTP Client imports
 import org.apache.commons.httpclient.Header;
@@ -30,8 +31,8 @@
 
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.net.protocols.Response;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.protocol.http.api.HttpBase;
 
 
@@ -52,7 +53,7 @@
 
   private int code;
 
-  private ContentProperties headers = new ContentProperties();
+  private Metadata headers = new Metadata();
 
   
   public HttpResponse(HttpBase http, URL url, CrawlDatum datum) throws 
IOException {
@@ -85,8 +86,9 @@
       Header[] heads = get.getResponseHeaders();
 
       for (int i = 0; i < heads.length; i++) {
-        headers.setProperty(heads[i].getName(), heads[i].getValue());
+        headers.set(heads[i].getName(), heads[i].getValue());
       }
+      
       // always read content. Sometimes content is useful to find a cause
       // for error.
       try {
@@ -131,10 +133,10 @@
   }
 
   public String getHeader(String name) {
-    return (String) headers.get(name);
+    return headers.get(name);
   }
   
-  public ContentProperties getHeaders() {
+  public Metadata getHeaders() {
     return headers;
   }
 

Added: lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java?rev=376089&view=auto
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java 
(added)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java Wed 
Feb  8 13:48:52 2006
@@ -0,0 +1,268 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.metadata;
+
+// JDK imports
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Properties;
+import junit.framework.Test;
+
+// JUnit imports
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import junit.textui.TestRunner;
+
+// Nutch imports
+import org.apache.nutch.metadata.Metadata;
+
+
+/**
+ * JUnit based tests of class [EMAIL PROTECTED] 
org.apache.nutch.metadata.Metadata}.
+ *
+ * @author Chris Mattmann
+ * @author J&eacute;r&ocirc;me Charron
+ */
+public class TestMetadata extends TestCase {
+
+  
+  public TestMetadata(String testName) {
+    super(testName);
+  }
+  
+  public static Test suite() {
+    return new TestSuite(TestMetadata.class);
+  }
+  
+  public static void main(String[] args) {
+    TestRunner.run(suite());
+  }
+  
+
+  /** Test for the <code>getNormalizedName(String)</code> method. */
+  public void testGetNormalizedName() {
+    assertEquals("Content-Type", Metadata.getNormalizedName("Content-Type"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("ContentType"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("Content-type"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("contenttype"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("contentype"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("contntype"));
+  }
+
+  /** Test for the <code>add(String, String)</code> method. */
+  public void testAdd() {
+    String[] values = null;
+    Metadata meta = new Metadata();
+
+    values = meta.getValues("contentype");
+    assertEquals(0, values.length);
+
+    meta.add("contentype", "value1");
+    values = meta.getValues("contentype");
+    assertEquals(1, values.length);
+    assertEquals("value1", values[0]);
+    
+    meta.add("Content-Type", "value2");
+    values = meta.getValues("contentype");
+    assertEquals(2, values.length);
+    assertEquals("value1", values[0]);
+    assertEquals("value2", values[1]);
+
+    // NOTE : For now, the same value can be added many times.
+    //        Should it be changed?
+    meta.add("ContentType", "value1");
+    values = meta.getValues("Content-Type");
+    assertEquals(3, values.length);
+    assertEquals("value1", values[0]);
+    assertEquals("value2", values[1]);
+    assertEquals("value1", values[2]);
+  }
+
+  /** Test for the <code>set(String, String)</code> method. */
+  public void testSet() {
+    String[] values = null;
+    Metadata meta = new Metadata();
+
+    values = meta.getValues("contentype");
+    assertEquals(0, values.length);
+
+    meta.set("contentype", "value1");
+    values = meta.getValues("contentype");
+    assertEquals(1, values.length);
+    assertEquals("value1", values[0]);
+    
+    meta.set("Content-Type", "value2");
+    values = meta.getValues("contentype");
+    assertEquals(1, values.length);
+    assertEquals("value2", values[0]);
+    
+    meta.set("contenttype", "new value 1");
+    meta.add("contenttype", "new value 2");
+    values = meta.getValues("contentype");
+    assertEquals(2, values.length);
+    assertEquals("new value 1", values[0]);
+    assertEquals("new value 2", values[1]);
+  }
+  
+  /** Test for <code>setAll(Properties)</code> method */
+  public void testSetProperties() {
+    String[] values = null;
+    Metadata meta = new Metadata();
+    Properties props = new Properties();
+    
+    meta.setAll(props);
+    assertEquals(0, meta.size());
+    
+    props.setProperty("name-one", "value1.1");
+    meta.setAll(props);
+    assertEquals(1, meta.size());
+    values = meta.getValues("name-one");
+    assertEquals(1, values.length);
+    assertEquals("value1.1", values[0]);
+    
+    props.setProperty("name-two", "value2.1");
+    meta.setAll(props);
+    assertEquals(2, meta.size());
+    values = meta.getValues("name-one");
+    assertEquals(1, values.length);
+    assertEquals("value1.1", values[0]);
+    values = meta.getValues("name-two");
+    assertEquals(1, values.length);
+    assertEquals("value2.1", values[0]);
+  }
+    
+  /** Test for <code>get(String)</code> method */
+  public void testGet() {
+    String[] values = null;
+    Metadata meta = new Metadata();
+    assertNull(meta.get("a-name"));
+    
+    meta.add("a-name", "value-1");
+    assertEquals("value-1", meta.get("a-name"));
+    meta.add("a-name", "value-2");
+    assertEquals("value-1", meta.get("a-name"));
+  }
+    
+  /** Test for <code>isMultiValued()</code> method */
+  public void testIsMultiValued() {
+    Metadata meta = new Metadata();
+    assertFalse(meta.isMultiValued("key"));
+    meta.add("key", "value1");
+    assertFalse(meta.isMultiValued("key"));
+    meta.add("key", "value2");
+    assertTrue(meta.isMultiValued("key"));
+  }
+
+  /** Test for <code>names</code> method */
+  public void testNames() {
+    String[] names = null;
+    Metadata meta = new Metadata();
+    names = meta.names();
+    assertEquals(0, names.length);
+    
+    meta.add("name-one", "value");
+    names = meta.names();
+    assertEquals(1, names.length);
+    assertEquals("name-one", names[0]);
+    meta.add("name-two", "value");
+    names = meta.names();
+    assertEquals(2, names.length);
+  }
+  
+  /** Test for <code>remove(String)</code> method */
+  public void testRemove() {
+    Metadata meta = new Metadata();
+    meta.remove("name-one");
+    assertEquals(0, meta.size());
+    meta.add("name-one", "value-1.1");
+    meta.add("name-one", "value-1.2");
+    meta.add("name-two", "value-2.2");
+    assertEquals(2, meta.size());
+    assertNotNull(meta.get("name-one"));
+    assertNotNull(meta.get("name-two"));
+    meta.remove("name-one");
+    assertEquals(1, meta.size());
+    assertNull(meta.get("name-one"));
+    assertNotNull(meta.get("name-two"));
+    meta.remove("name-two");
+    assertEquals(0, meta.size());
+    assertNull(meta.get("name-one"));
+    assertNull(meta.get("name-two"));
+  }
+
+  /** Test for <code>equals(Object)</code> method */
+  public void testObject() {
+    Metadata meta1 = new Metadata();
+    Metadata meta2 = new Metadata();
+    assertFalse(meta1.equals(null));
+    assertFalse(meta1.equals("String"));
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-one", "value-1.1");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-one", "value-1.1");
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-one", "value-1.2");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-one", "value-1.2");
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-two", "value-2.1");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-two", "value-2.1");
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-two", "value-2.2");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-two", "value-2.x");
+    assertFalse(meta1.equals(meta2));
+  }
+  
+  /** Test for <code>Writable</code> implementation */
+  public void testWritable() {
+    Metadata result = null;
+    Metadata meta = new Metadata();
+    result = writeRead(meta);
+    assertEquals(0, result.size());
+    meta.add("name-one", "value-1.1");
+    result = writeRead(meta);
+    assertEquals(1, result.size());
+    assertEquals(1, result.getValues("name-one").length);
+    assertEquals("value-1.1", result.get("name-one"));
+    meta.add("name-two", "value-2.1");
+    meta.add("name-two", "value-2.2");
+    result = writeRead(meta);
+    assertEquals(2, result.size());
+    assertEquals(1, result.getValues("name-one").length);
+    assertEquals("value-1.1", result.getValues("name-one")[0]);
+    assertEquals(2, result.getValues("name-two").length);
+    assertEquals("value-2.1", result.getValues("name-two")[0]);
+    assertEquals("value-2.2", result.getValues("name-two")[1]);
+  }
+  
+  private Metadata writeRead(Metadata meta) {
+    Metadata readed = new Metadata();
+    try {
+      ByteArrayOutputStream out = new ByteArrayOutputStream();
+      meta.write(new DataOutputStream(out));
+      readed.readFields(new DataInputStream(new 
ByteArrayInputStream(out.toByteArray())));
+    } catch (IOException ioe) {
+      fail(ioe.toString());
+    }
+    return readed;
+  }
+       
+}

Propchange: 
lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java 
(original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java Wed 
Feb  8 13:48:52 2006
@@ -20,7 +20,7 @@
 import org.apache.nutch.util.NutchConfiguration;
 
 import org.apache.nutch.util.WritableTestUtils;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
 
 import junit.framework.TestCase;
 
@@ -41,9 +41,9 @@
       new Outlink("http://bar.com/";, "Bar", conf)
     };
 
-    ContentProperties metaData = new ContentProperties();
-    metaData.put("Language", "en/us");
-    metaData.put("Charset", "UTF-8");
+    Metadata metaData = new Metadata();
+    metaData.add("Language", "en/us");
+    metaData.add("Charset", "UTF-8");
 
     ParseData r = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, 
metaData);
     r.setConf(conf);

Modified: lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java 
(original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java Wed 
Feb  8 13:48:52 2006
@@ -16,13 +16,14 @@
 
 package org.apache.nutch.protocol;
 
+import org.apache.nutch.metadata.Metadata;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.NutchConfiguration;
-
 import org.apache.nutch.util.WritableTestUtils;
 
 import junit.framework.TestCase;
 
+
 /** Unit tests for Content. */
 
 public class TestContent extends TestCase {
@@ -37,9 +38,9 @@
 
     String url = "http://www.foo.com/";;
 
-    ContentProperties metaData = new ContentProperties();
-    metaData.put("Host", "www.foo.com");
-    metaData.put("Content-Type", "text/html");
+    Metadata metaData = new Metadata();
+    metaData.add("Host", "www.foo.com");
+    metaData.add("Content-Type", "text/html");
 
     Content r = new Content(url, url, page.getBytes("UTF8"), "text/html",
                             metaData, conf);
@@ -47,12 +48,13 @@
     WritableTestUtils.testWritable(r);
     assertEquals("text/html", r.getMetadata().get("Content-Type"));
     assertEquals("text/html", r.getMetadata().get("content-type"));
+    assertEquals("text/html", r.getMetadata().get("CONTENTYPE"));
   }
 
   /** Unit tests for getContentType(String, String, byte[]) method. */
   public void testGetContentType() throws Exception {
     Content c = null;
-    ContentProperties p = new ContentProperties();
+    Metadata p = new Metadata();
 
     c = new Content("http://www.foo.com/";,
                     "http://www.foo.com/";,

Modified: lucene/nutch/trunk/src/web/jsp/cached.jsp
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/web/jsp/cached.jsp?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- lucene/nutch/trunk/src/web/jsp/cached.jsp (original)
+++ lucene/nutch/trunk/src/web/jsp/cached.jsp Wed Feb  8 13:48:52 2006
@@ -6,7 +6,7 @@
 
   import="org.apache.nutch.searcher.*"
   import="org.apache.nutch.parse.ParseData"
-  import="org.apache.nutch.protocol.ContentProperties"
+  import="org.apache.nutch.metadata.Metadata"
   import="org.apache.hadoop.conf.Configuration"
   import="org.apache.nutch.util.NutchConfiguration"
 %><%
@@ -26,10 +26,10 @@
     ResourceBundle.getBundle("org.nutch.jsp.cached", request.getLocale())
     .getLocale().getLanguage();
 
-  ContentProperties metaData = bean.getParseData(details).getMetadata();
+  Metadata metaData = bean.getParseData(details).getContentMeta();
 
   String content = null;
-  String contentType = (String) metaData.get("Content-Type");
+  String contentType = (String) metaData.get(Metadata.CONTENT_TYPE);
   if (contentType.startsWith("text/html")) {
     // FIXME : it's better to emit the original 'byte' sequence 
     // with 'charset' set to the value of 'CharEncoding',

svn commit: r376089 [2/2] - in /lucene/nutch/trunk: ./ src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/metadata/ src/java/org/apache/nutch/net/protocols/ src/java/org/apache/nutch/parse/ src/java/org/apac...

Reply via email to