apac...

jerome Wed, 08 Feb 2006 13:55:01 -0800

Modified: 
lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java
 Wed Feb  8 13:48:52 2006
@@ -20,6 +20,7 @@
 import org.apache.poi.poifs.eventfilesystem.*;
 import org.apache.poi.poifs.filesystem.*;
 import org.apache.poi.util.LittleEndian;
+import org.apache.nutch.metadata.Metadata;
 
 import java.util.*;
 import java.io.*;
@@ -33,8 +34,7 @@
  * code to extract all msword properties.
  *
  */
-public class WordExtractor
-{
+public class WordExtractor {
 
   /**
    * Constructor
@@ -276,39 +276,40 @@
       /*Dates are being stored in millis since the epoch to aid
       localization*/
       if(title != null)
-        properties.setProperty("Title", title);
+        properties.setProperty(Metadata.TITLE, title);
       if(applicationName != null)
-        properties.setProperty("Application-Name", applicationName);
+        properties.setProperty(Metadata.APPLICATION_NAME, applicationName);
       if(author != null)
-        properties.setProperty("Author", author);
+        properties.setProperty(Metadata.AUTHOR, author);
       if(charCount != 0)
-        properties.setProperty("Character Count", charCount + "");
+        properties.setProperty(Metadata.CHARACTER_COUNT, charCount + "");
       if(comments != null)
-        properties.setProperty("Comments", comments);
+        properties.setProperty(Metadata.COMMENTS, comments);
       if(createDateTime != null)
-        properties.setProperty("Creation-Date", createDateTime.getTime() + "");
+        properties.setProperty(Metadata.DATE,
+                               Metadata.DATE_FORMAT.format(createDateTime));
       if(editTime != 0)
-        properties.setProperty("Edit-Time", editTime + "");
+        properties.setProperty(Metadata.LAST_MODIFIED, editTime + "");
       if(keywords != null)
-        properties.setProperty("Keywords", keywords);
+        properties.setProperty(Metadata.KEYWORDS, keywords);
       if(lastAuthor != null)
-        properties.setProperty("Last-Author", lastAuthor);
+        properties.setProperty(Metadata.LAST_AUTHOR, lastAuthor);
       if(lastPrinted != null)
-        properties.setProperty("Last-Printed", lastPrinted.getTime() + "");
+        properties.setProperty(Metadata.LAST_PRINTED, lastPrinted.getTime() + 
"");
       if(lastSaveDateTime != null)
-        properties.setProperty("Last-Save-Date", lastSaveDateTime.getTime() + 
"");
+        properties.setProperty(Metadata.LAST_SAVED, lastSaveDateTime.getTime() 
+ "");
       if(pageCount != 0)
-        properties.setProperty("Page-Count", pageCount + "");
+        properties.setProperty(Metadata.PAGE_COUNT, pageCount + "");
       if(revNumber != null)
-        properties.setProperty("Revision-Number", revNumber);
+        properties.setProperty(Metadata.REVISION_NUMBER, revNumber);
       if(security != 0)
-        properties.setProperty("Security", security + "");
+        properties.setProperty(Metadata.RIGHTS, security + "");
       if(subject != null)
-        properties.setProperty("Subject", subject);
+        properties.setProperty(Metadata.SUBJECT, subject);
       if(template != null)
-        properties.setProperty("Template", template);
+        properties.setProperty(Metadata.TEMPLATE, template);
       if(wordCount != 0)
-        properties.setProperty("Word-Count", wordCount + "");
+        properties.setProperty(Metadata.WORD_COUNT, wordCount + "");
       propertiesBroker.setProperties(properties);
 
       //si.getThumbnail(); // can't think of a sensible way of turning this 
into a string.


Modified: 
lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
 Wed Feb  8 13:48:52 2006
@@ -26,7 +26,8 @@
 import org.pdfbox.exceptions.InvalidPasswordException;
 
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.parse.ParseStatus;
@@ -89,12 +90,13 @@
 
     String text = null;
     String title = null;
+    Metadata metadata = new Metadata();
 
     try {
 
       byte[] raw = content.getContent();
 
-      String contentLength = content.get("Content-Length");
+      String contentLength = 
content.getMetadata().get(Response.CONTENT_LENGTH);
       if (contentLength != null
             && raw.length != Integer.parseInt(contentLength)) {
           return new ParseStatus(ParseStatus.FAILED, 
ParseStatus.FAILED_TRUNCATED,
@@ -102,8 +104,7 @@
             +" bytes. Parser can't handle incomplete pdf 
file.").getEmptyParse(getConf());
       }
 
-      PDFParser parser = new PDFParser(
-        new ByteArrayInputStream(raw));
+      PDFParser parser = new PDFParser(new ByteArrayInputStream(raw));
       parser.parse();
 
       pdf = parser.getPDDocument();
@@ -122,15 +123,18 @@
       PDDocumentInformation info = pdf.getDocumentInformation();
       title = info.getTitle();
       // more useful info, currently not used. please keep them for future use.
-      // pdf.getPageCount();
-      // info.getAuthor()
-      // info.getSubject()
-      // info.getKeywords()
-      // info.getCreator()
-      // info.getProducer()
-      // info.getTrapped()
-      // formatDate(info.getCreationDate())
-      // formatDate(info.getModificationDate())
+      metadata.add(Metadata.PAGE_COUNT, String.valueOf(pdf.getPageCount()));
+      metadata.add(Metadata.AUTHOR, info.getAuthor());
+      metadata.add(Metadata.SUBJECT, info.getSubject());
+      metadata.add(Metadata.KEYWORDS, info.getKeywords());
+      metadata.add(Metadata.CREATOR, info.getCreator());
+      metadata.add(Metadata.PUBLISHER, info.getProducer());
+      
+      //TODO: Figure out why we get a java.io.IOException: Error converting 
date:1-Jan-3 18:15PM
+      //error here
+      
+      //metadata.put(DATE, 
dcDateFormatter.format(info.getCreationDate().getTime()));
+      //metadata.put(LAST_MODIFIED, 
dcDateFormatter.format(info.getModificationDate().getTime()));
 
     } catch (CryptographyException e) {
       return new ParseStatus(ParseStatus.FAILED,
@@ -139,6 +143,8 @@
       return new ParseStatus(ParseStatus.FAILED,
               "Can't decrypt document - invalid password. " + 
e).getEmptyParse(getConf());
     } catch (Exception e) { // run time exception
+        LOG.warning("General exception in PDF parser: "+e.getMessage());
+        e.printStackTrace();        
       return new ParseStatus(ParseStatus.FAILED,
               "Can't be handled as pdf document. " + 
e).getEmptyParse(getConf());
     } finally {
@@ -159,11 +165,9 @@
     // collect outlink
     Outlink[] outlinks = OutlinkExtractor.getOutlinks(text, getConf());
 
-    // collect meta data
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata()); // copy through
-
-    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title, 
outlinks, metadata);
+    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title,
+                                        outlinks, content.getMetadata(),
+                                        metadata);
     parseData.setConf(this.conf);
     return new ParseImpl(text, parseData);
     // any filter?

Modified: 
lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java
 Wed Feb  8 13:48:52 2006
@@ -18,7 +18,16 @@
 
 import org.apache.nutch.parse.*;
 import org.apache.nutch.protocol.Content;
+<<<<<<< .mine
+<<<<<<< .mine
+import org.apache.nutch.util.MetadataNames;
+
+=======
+import org.apache.nutch.util.NutchConf;
+=======
 import org.apache.hadoop.conf.Configuration;
+>>>>>>> .r374853
+>>>>>>> .r373941
 import java.io.ByteArrayInputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
@@ -31,7 +40,7 @@
  * 
  * @author Andy Hedges
  */
-public class RTFParseFactory implements Parser {
+public class RTFParseFactory implements Parser, MetadataNames {
 
   private Configuration conf;
 
@@ -53,10 +62,13 @@
     Properties metadata = new Properties();
     metadata.putAll(content.getMetadata());
     metadata.putAll(delegate.getMetaData());
-    String title = metadata.getProperty("title");
+    String title = metadata.getProperty(TITLE);
 
     if (title != null) {
-      metadata.remove(title);
+        //(CM): Why remove the title metadata property here? Even 
+        //though it's stored in the ParseData, it still might be useful
+        //to have via this properties object?
+        //metadata.remove(title);
     } else {
       title = "";
     }

Modified: 
lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParserDelegateImpl.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParserDelegateImpl.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParserDelegateImpl.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParserDelegateImpl.java
 Wed Feb  8 13:48:52 2006
@@ -22,18 +22,20 @@
 import java.util.List;
 import java.util.Properties;
 
+import org.apache.nutch.util.MetadataNames;
+
 /**
  * A parser delegate for handling rtf events.
  * @author Andy Hedges
  */
-public class RTFParserDelegateImpl implements RTFParserDelegate {
+public class RTFParserDelegateImpl implements RTFParserDelegate, MetadataNames 
{
 
   String tabs = "";
   Properties metadata = new Properties();
 
-  String[] META_NAMES_TEXT = {"title", "subject", "author", "manager",
-                              "company", "operator", "category", "keywords",
-                              "comment", "doccomm", "hlinkbase"};
+  String[] META_NAMES_TEXT = {TITLE, SUBJECT, AUTHOR, "manager",
+                              "company", "operator", "category", KEYWORDS,
+                              COMMENTS, "doccomm", "hlinkbase"};
   String[] META_NAMES_DATE = {"creatim", "creatim", "printim", "buptim"};
 
   String metaName = "";

Modified: 
lucene/nutch/trunk/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java
 Wed Feb  8 13:48:52 2006
@@ -25,7 +25,15 @@
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.ProtocolException;
 import org.apache.nutch.protocol.ProtocolFactory;
+<<<<<<< .mine
+<<<<<<< .mine
+import org.apache.nutch.util.MetadataNames;
+=======
+import org.apache.nutch.util.NutchConf;
+=======
 import org.apache.hadoop.conf.Configuration;
+>>>>>>> .r374853
+>>>>>>> .r373941
 
 import java.util.Properties;
 
@@ -34,7 +42,7 @@
  *
  * @author Andy Hedges
  */
-public class TestRTFParser extends TestCase {
+public class TestRTFParser extends TestCase implements MetadataNames {
 
   private String fileSeparator = System.getProperty("file.separator");
   // This system property is defined in ./src/plugin/build-plugin.xml
@@ -73,7 +81,7 @@
     String title = parse.getData().getTitle();
     Properties meta = parse.getData().getMetadata();
     assertEquals("test rft document", title);
-    assertEquals("tests", meta.getProperty("subject"));
+    assertEquals("tests", meta.getProperty(SUBJECT));
 
 
 

Modified: 
lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
 Wed Feb  8 13:48:52 2006
@@ -21,9 +21,10 @@
 import java.util.*;
 import java.util.logging.Logger;
 
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.parse.*;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
@@ -60,19 +61,17 @@
   public Parse getParse(Content content) {
 
     String text = null;
-    // collect meta data
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata()); // copy through
     Vector outlinks = new Vector();
 
     try {
 
       byte[] raw = content.getContent();
 
-      String contentLength = content.get("Content-Length");
+      String contentLength = 
content.getMetadata().get(Response.CONTENT_LENGTH);
       if (contentLength != null && raw.length != 
Integer.parseInt(contentLength)) {
-        return new ParseStatus(ParseStatus.FAILED, 
ParseStatus.FAILED_TRUNCATED, "Content truncated at " + raw.length
-                + " bytes. Parser can't handle incomplete 
files.").getEmptyParse(conf);
+        return new ParseStatus(ParseStatus.FAILED, 
ParseStatus.FAILED_TRUNCATED,
+                               "Content truncated at " + raw.length +
+                               " bytes. Parser can't handle incomplete 
files.").getEmptyParse(conf);
       }
       ExtractText extractor = new ExtractText();
 
@@ -106,7 +105,8 @@
     if (text == null) text = "";
 
     Outlink[] links = (Outlink[]) outlinks.toArray(new 
Outlink[outlinks.size()]);
-    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "", links, 
metadata);
+    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "", links,
+                                        content.getMetadata());
     return new ParseImpl(text, parseData);
   }
 
@@ -119,8 +119,10 @@
     byte[] buf = new byte[in.available()];
     in.read(buf);
     SWFParser parser = new SWFParser();
-    Parse p = parser.getParse(new Content("file:" + args[0], "file:" + 
args[0], buf, "application/x-shockwave-flash",
-            new ContentProperties(), NutchConfiguration.create()));
+    Parse p = parser.getParse(new Content("file:" + args[0], "file:" + args[0],
+                                          buf, "application/x-shockwave-flash",
+                                          new Metadata(),
+                                          NutchConfiguration.create()));
     System.out.println("Parse Text:");
     System.out.println(p.getText());
     System.out.println("Parse Data:");

Modified: 
lucene/nutch/trunk/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java
 Wed Feb  8 13:48:52 2006
@@ -17,7 +17,6 @@
 package org.apache.nutch.parse.text;
 
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.parse.*;
 import org.apache.nutch.util.*;
 
@@ -27,9 +26,6 @@
   private Configuration conf;
 
   public Parse getParse(Content content) {
-    // copy content meta data through
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata());
 
     // ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "", new
     // Outlink[0], metadata);
@@ -49,7 +45,7 @@
       text = new String(content.getContent()); // use default encoding
     }
     ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "",
-        OutlinkExtractor.getOutlinks(text, getConf()), metadata);
+        OutlinkExtractor.getOutlinks(text, getConf()), content.getMetadata());
     parseData.setConf(this.conf);
     return new ParseImpl(text, parseData);
     

Modified: 
lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
 Wed Feb  8 13:48:52 2006
@@ -23,6 +23,8 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseData;
@@ -30,7 +32,6 @@
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
 
@@ -59,7 +60,7 @@
     Properties properties = null;
 
     try {
-      final String contentLen = content.get("Content-Length");
+      final String contentLen = 
content.getMetadata().get(Response.CONTENT_LENGTH);
       final int len = Integer.parseInt(contentLen);
       System.out.println("ziplen: " + len);
       final byte[] contentInBytes = content.getContent();
@@ -86,10 +87,6 @@
           "Can't be handled as Zip document. " + e).getEmptyParse(getConf());
     }
 
-    // collect meta data
-    final ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata()); // copy through
-
     if (resultText == null) {
       resultText = "";
     }
@@ -100,7 +97,8 @@
 
     outlinks = (Outlink[]) outLinksList.toArray(new Outlink[0]);
     final ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
-        resultTitle, outlinks, metadata);
+                                              resultTitle, outlinks,
+                                              content.getMetadata());
     parseData.setConf(this.conf);
 
     LOG.finest("Zip file parsed sucessfully !!");

Modified: 
lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
 Wed Feb  8 13:48:52 2006
@@ -26,13 +26,14 @@
 import java.net.URL;
 
 // Nutch imports
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.parse.Parse;
-import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseData;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseException;
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.mime.MimeTypes;
@@ -87,9 +88,9 @@
           // Trying to resolve the Mime-Type
           String contentType = MIME.getMimeType(fname).getName();
           try {
-            ContentProperties metadata = new ContentProperties();
-            metadata.setProperty("Content-Length", 
Long.toString(entry.getSize()));
-            metadata.setProperty("Content-Type", contentType);
+            Metadata metadata = new Metadata();
+            metadata.set(Response.CONTENT_LENGTH, 
Long.toString(entry.getSize()));
+            metadata.set(Response.CONTENT_TYPE, contentType);
             Content content = new Content(newurl, base, b, contentType, 
metadata, this.conf);
             Parse parse = new ParseUtil(this.conf).parse(content);
             ParseData theParseData = parse.getData();

Modified: 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
 Wed Feb  8 13:48:52 2006
@@ -19,7 +19,9 @@
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.hadoop.io.UTF8;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.net.protocols.HttpDateFormat;
+import org.apache.nutch.net.protocols.Response;
 
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
@@ -144,8 +146,10 @@
     Content content = file.getProtocolOutput(new UTF8(urlString), new 
CrawlDatum()).getContent();
 
     System.err.println("Content-Type: " + content.getContentType());
-    System.err.println("Content-Length: " + content.get("Content-Length"));
-    System.err.println("Last-Modified: " + content.get("Last-Modified"));
+    System.err.println("Content-Length: " +
+                       content.getMetadata().get(Response.CONTENT_LENGTH));
+    System.err.println("Last-Modified: " +
+                       content.getMetadata().get(Response.LAST_MODIFIED));
     if (dumpContent) {
       System.out.print(new String(content.getContent()));
     }

Modified: 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
 Wed Feb  8 13:48:52 2006
@@ -18,6 +18,7 @@
 
 // JDK imports
 import java.net.URL;
+import java.util.Date;
 import java.util.TreeMap;
 import java.util.logging.Level;
 import java.io.IOException;
@@ -25,7 +26,10 @@
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
+
+// Hadoop imports
 import org.apache.hadoop.conf.Configuration;
 
 
@@ -61,7 +65,7 @@
   private String base;
   private byte[] content;
   private int code;
-  private ContentProperties headers = new ContentProperties();
+  private Metadata headers = new Metadata();
 
   private final File file;
   private Configuration conf;
@@ -71,17 +75,17 @@
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
-    return (String)headers.get(name);
+    return headers.get(name);
   }
 
   public byte[] getContent() { return content; }
 
   public Content toContent() {
     return new Content(orig, base, content,
-                       getHeader("Content-Type"),
+                       getHeader(Response.CONTENT_TYPE),
                        headers, this.conf);
   }
-
+  
   public FileResponse(URL url, CrawlDatum datum, File file, Configuration conf)
     throws FileException, IOException {
 
@@ -124,10 +128,8 @@
       // where case is insensitive
       if (!f.equals(f.getCanonicalFile())) {
         // set headers
-        TreeMap hdrs = new TreeMap(String.CASE_INSENSITIVE_ORDER);
         //hdrs.put("Location", f.getCanonicalFile().toURI());
-        hdrs.put("Location", f.getCanonicalFile().toURL().toString());
-        this.headers.putAll(hdrs);
+        headers.set(Response.LOCATION, 
f.getCanonicalFile().toURL().toString());
 
         this.code = 300;  // http redirect
         return;
@@ -181,16 +183,10 @@
     is.close(); 
 
     // set headers
-    TreeMap hdrs = new TreeMap(String.CASE_INSENSITIVE_ORDER);
-
-    hdrs.put("Content-Length", new Long(size).toString());
-
-    hdrs.put("Last-Modified",
+    headers.set(Response.CONTENT_LENGTH, new Long(size).toString());
+    headers.set(Response.LAST_MODIFIED,
       this.file.httpDateFormat.toString(f.lastModified()));
-
-    hdrs.put("Content-Type", "");   // No Content-Type at file protocol level
-
-    this.headers.putAll(hdrs);
+    headers.set(Response.CONTENT_TYPE, "");   // No Content-Type at file 
protocol level
 
     // response code
     this.code = 200; // http OK
@@ -204,17 +200,11 @@
     this.content = list2html(f.listFiles(), path, "/".equals(path) ? false : 
true);
 
     // set headers
-    TreeMap hdrs = new TreeMap(String.CASE_INSENSITIVE_ORDER);
-
-    hdrs.put("Content-Length",
+    headers.set(Response.CONTENT_LENGTH,
       new Integer(this.content.length).toString());
-
-    hdrs.put("Content-Type", "text/html");
-
-    hdrs.put("Last-Modified",
+    headers.set(Response.CONTENT_TYPE, "text/html");
+    headers.set(Response.LAST_MODIFIED,
       this.file.httpDateFormat.toString(f.lastModified()));
-
-    this.headers.putAll(hdrs);
 
     // response code
     this.code = 200; // http OK

Modified: 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
 Wed Feb  8 13:48:52 2006
@@ -22,6 +22,7 @@
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.hadoop.io.UTF8;
 import org.apache.nutch.net.protocols.HttpDateFormat;
+import org.apache.nutch.net.protocols.Response;
 
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
@@ -212,8 +213,10 @@
     Content content = ftp.getProtocolOutput(new UTF8(urlString), new 
CrawlDatum()).getContent();
 
     System.err.println("Content-Type: " + content.getContentType());
-    System.err.println("Content-Length: " + content.get("Content-Length"));
-    System.err.println("Last-Modified: " + content.get("Last-Modified"));
+    System.err.println("Content-Length: " +
+                       content.getMetadata().get(Response.CONTENT_LENGTH));
+    System.err.println("Last-Modified: " +
+                      content.getMetadata().get(Response.LAST_MODIFIED));
     if (dumpContent) {
       System.out.print(new String(content.getContent()));
     }

Modified: 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
 Wed Feb  8 13:48:52 2006
@@ -26,7 +26,9 @@
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
+
 import org.apache.hadoop.conf.Configuration;
 
 import java.net.InetAddress;
@@ -34,11 +36,9 @@
 
 import java.util.List;
 import java.util.LinkedList;
-
 import java.util.logging.Level;
 
 import java.io.ByteArrayOutputStream;
-//import java.io.InputStream;
 import java.io.IOException;
 
 
@@ -61,7 +61,7 @@
   private String base;
   private byte[] content;
   private int code;
-  private ContentProperties headers = new ContentProperties();
+  private Metadata headers = new Metadata();
 
   private final Ftp ftp;
   private Configuration conf;
@@ -71,14 +71,14 @@
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
-    return (String)headers.get(name);
+    return headers.get(name);
   }
 
   public byte[] getContent() { return content; }
 
   public Content toContent() {
     return new Content(orig, base, content,
-                       getHeader("Content-Type"),
+                       getHeader(Response.CONTENT_TYPE),
                        headers, this.conf);
   }
 
@@ -294,11 +294,11 @@
       ftp.client.retrieveFile(path, os, ftp.maxContentLength);
 
       FTPFile ftpFile = (FTPFile) list.get(0);
-      this.headers.put("Content-Length",
-        new Long(ftpFile.getSize()).toString());
+      this.headers.set(Response.CONTENT_LENGTH,
+                       new Long(ftpFile.getSize()).toString());
       //this.headers.put("content-type", "text/html");
-      this.headers.put("Last-Modified",
-        ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
+      this.headers.set(Response.LAST_MODIFIED,
+                       ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
       this.content = os.toByteArray();
 
 //      // approximate bytes sent and read
@@ -330,11 +330,11 @@
       }
 
       FTPFile ftpFile = (FTPFile) list.get(0);
-      this.headers.put("Content-Length",
-        new Long(ftpFile.getSize()).toString());
+      this.headers.set(Response.CONTENT_LENGTH,
+                       new Long(ftpFile.getSize()).toString());
       //this.headers.put("content-type", "text/html");
-      this.headers.put("Last-Modified",
-        ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
+      this.headers.set(Response.LAST_MODIFIED,
+                      ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
       this.content = os.toByteArray();
 
 //      // approximate bytes sent and read
@@ -349,7 +349,7 @@
 
       if (FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
       // it is not a file, but dir, so redirect as a dir
-        this.headers.put("Location", path + "/");
+        this.headers.set(Response.LOCATION, path + "/");
         this.code = 300;  // http redirect
         // fixme, should we do ftp.client.cwd("/"), back to top dir?
       } else {
@@ -386,9 +386,9 @@
 
       ftp.client.retrieveList(null, list, ftp.maxContentLength, ftp.parser);
       this.content = list2html(list, path, "/".equals(path) ? false : true);
-      this.headers.put("Content-Length",
-        new Integer(this.content.length).toString());
-      this.headers.put("Content-Type", "text/html");
+      this.headers.set(Response.CONTENT_LENGTH,
+                       new Integer(this.content.length).toString());
+      this.headers.set(Response.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
 //      // approximate bytes sent and read
@@ -408,9 +408,9 @@
       ftp.client = null;
 
       this.content = list2html(list, path, "/".equals(path) ? false : true);
-      this.headers.put("Content-Length",
-        new Integer(this.content.length).toString());
-      this.headers.put("Content-Type", "text/html");
+      this.headers.set(Response.CONTENT_LENGTH,
+                       new Integer(this.content.length).toString());
+      this.headers.set(Response.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
 //      // approximate bytes sent and read

Modified: 
lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
 Wed Feb  8 13:48:52 2006
@@ -28,12 +28,13 @@
 import java.net.URL;
 import java.util.Map;
 import java.util.TreeMap;
+import java.util.Date;
 import java.util.logging.Level;
 
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.net.protocols.Response;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.protocol.ProtocolException;
 import org.apache.nutch.protocol.http.api.HttpBase;
 import org.apache.nutch.protocol.http.api.HttpException;
@@ -49,7 +50,7 @@
   private String base;
   private byte[] content;
   private int code;
-  private ContentProperties headers = new ContentProperties();
+  private Metadata headers = new Metadata();
 
 
   public HttpResponse(HttpBase http, URL url, CrawlDatum datum)
@@ -141,13 +142,13 @@
         // parse status code line
         this.code = parseStatusLine(in, line); 
         // parse headers
-        headers.putAll(parseHeaders(in, line));
+        parseHeaders(in, line);
         haveSeenNonContinueStatus= code != 100; // 100 is "Continue"
       }
 
       readPlainContent(in);
 
-      String contentEncoding= getHeader("Content-Encoding");
+      String contentEncoding = getHeader(Response.CONTENT_ENCODING);
       if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
         Http.LOG.fine("uncompressing....");
         byte[] compressed = content;
@@ -187,10 +188,10 @@
   }
 
   public String getHeader(String name) {
-    return (String) headers.get(name);
+    return headers.get(name);
   }
   
-  public ContentProperties getHeaders() {
+  public Metadata getHeaders() {
     return headers;
   }
 
@@ -207,7 +208,7 @@
     throws HttpException, IOException {
 
     int contentLength = Integer.MAX_VALUE;    // get content length
-    String contentLengthString = (String)headers.get("Content-Length");
+    String contentLengthString = headers.get(Response.CONTENT_LENGTH);
     if (contentLengthString != null) {
       contentLengthString = contentLengthString.trim();
       try {
@@ -333,8 +334,9 @@
   }
 
 
-  private void processHeaderLine(StringBuffer line, TreeMap headers)
+  private void processHeaderLine(StringBuffer line)
     throws IOException, HttpException {
+
     int colonIndex = line.indexOf(":");       // key is up to colon
     if (colonIndex == -1) {
       int i;
@@ -355,20 +357,14 @@
       valueStart++;
     }
     String value = line.substring(valueStart);
-
-    headers.put(key, value);
+    headers.set(key, value);
   }
 
-  private Map parseHeaders(PushbackInputStream in, StringBuffer line)
-    throws IOException, HttpException {
-    TreeMap headers = new TreeMap(String.CASE_INSENSITIVE_ORDER);
-    return parseHeaders(in, line, headers);
-  }
 
-  // Adds headers to an existing TreeMap
-  private Map parseHeaders(PushbackInputStream in, StringBuffer line,
-                           TreeMap headers)
+  // Adds headers to our headers Metadata
+  private void parseHeaders(PushbackInputStream in, StringBuffer line)
     throws IOException, HttpException {
+
     while (readLine(in, line, true) != 0) {
 
       // handle HTTP responses with missing blank line after headers
@@ -381,18 +377,21 @@
         line.setLength(pos);
 
         try {
-          processHeaderLine(line, headers);
+            //TODO: (CM) We don't know the header names here
+            //since we're just handling them generically. It would
+            //be nice to provide some sort of mapping function here
+            //for the returned header names to the standard metadata
+            //names in the ParseData class
+          processHeaderLine(line);
         } catch (Exception e) {
           // fixme:
           e.printStackTrace();
         }
-
-        return headers;
+        return;
       }
 
-      processHeaderLine(line, headers);
+      processHeaderLine(line);
     }
-    return headers;
   }
 
   private static int readLine(PushbackInputStream in, StringBuffer line,

Modified: 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
 Wed Feb  8 13:48:52 2006
@@ -11,7 +11,8 @@
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
+
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configurable;
@@ -35,7 +36,7 @@
      * The HTTP Authentication (WWW-Authenticate) header which is returned 
      * by a webserver requiring authentication.
      */
-    public static final String AUTH_HEADER = "WWW-Authenticate";
+    public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
        
     public static final Logger LOG =
                
LogFormatter.getLogger(HttpAuthenticationFactory.class.getName());
@@ -72,13 +73,14 @@
      * ---------------------------------- */
 
 
-    public HttpAuthentication findAuthentication(ContentProperties header) {
+    public HttpAuthentication findAuthentication(Metadata header) {
+
         if (header == null) return null;
         
        try {
                        Collection challenge = null;
-                       if (header instanceof ContentProperties) {
-                               Object o = header.get(AUTH_HEADER);
+                       if (header instanceof Metadata) {
+                               Object o = header.get(WWW_AUTHENTICATE);
                                if (o instanceof Collection) {
                                        challenge = (Collection) o;
                                } else {
@@ -86,7 +88,7 @@
                                        challenge.add(o.toString());
                                }
                        } else {
-                               String challengeString = 
header.getProperty(AUTH_HEADER); 
+                               String challengeString = 
header.get(WWW_AUTHENTICATE); 
                                if (challengeString != null) {
                                        challenge = new ArrayList();
                                        challenge.add(challengeString);

Modified: 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
 Wed Feb  8 13:48:52 2006
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
+import java.util.Date;
 
 // HTTP Client imports
 import org.apache.commons.httpclient.Header;
@@ -30,8 +31,8 @@
 
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.net.protocols.Response;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.protocol.http.api.HttpBase;
 
 
@@ -52,7 +53,7 @@
 
   private int code;
 
-  private ContentProperties headers = new ContentProperties();
+  private Metadata headers = new Metadata();
 
   
   public HttpResponse(HttpBase http, URL url, CrawlDatum datum) throws 
IOException {
@@ -85,8 +86,9 @@
       Header[] heads = get.getResponseHeaders();
 
       for (int i = 0; i < heads.length; i++) {
-        headers.setProperty(heads[i].getName(), heads[i].getValue());
+        headers.set(heads[i].getName(), heads[i].getValue());
       }
+      
       // always read content. Sometimes content is useful to find a cause
       // for error.
       try {
@@ -131,10 +133,10 @@
   }
 
   public String getHeader(String name) {
-    return (String) headers.get(name);
+    return headers.get(name);
   }
   
-  public ContentProperties getHeaders() {
+  public Metadata getHeaders() {
     return headers;
   }
 

Added: lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java?rev=376089&view=auto
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java 
(added)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java Wed 
Feb  8 13:48:52 2006
@@ -0,0 +1,268 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.metadata;
+
+// JDK imports
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Properties;
+import junit.framework.Test;
+
+// JUnit imports
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import junit.textui.TestRunner;
+
+// Nutch imports
+import org.apache.nutch.metadata.Metadata;
+
+
+/**
+ * JUnit based tests of class [EMAIL PROTECTED] 
org.apache.nutch.metadata.Metadata}.
+ *
+ * @author Chris Mattmann
+ * @author J&eacute;r&ocirc;me Charron
+ */
+public class TestMetadata extends TestCase {
+
+  
+  public TestMetadata(String testName) {
+    super(testName);
+  }
+  
+  public static Test suite() {
+    return new TestSuite(TestMetadata.class);
+  }
+  
+  public static void main(String[] args) {
+    TestRunner.run(suite());
+  }
+  
+
+  /** Test for the <code>getNormalizedName(String)</code> method. */
+  public void testGetNormalizedName() {
+    assertEquals("Content-Type", Metadata.getNormalizedName("Content-Type"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("ContentType"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("Content-type"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("contenttype"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("contentype"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("contntype"));
+  }
+
+  /** Test for the <code>add(String, String)</code> method. */
+  public void testAdd() {
+    String[] values = null;
+    Metadata meta = new Metadata();
+
+    values = meta.getValues("contentype");
+    assertEquals(0, values.length);
+
+    meta.add("contentype", "value1");
+    values = meta.getValues("contentype");
+    assertEquals(1, values.length);
+    assertEquals("value1", values[0]);
+    
+    meta.add("Content-Type", "value2");
+    values = meta.getValues("contentype");
+    assertEquals(2, values.length);
+    assertEquals("value1", values[0]);
+    assertEquals("value2", values[1]);
+
+    // NOTE : For now, the same value can be added many times.
+    //        Should it be changed?
+    meta.add("ContentType", "value1");
+    values = meta.getValues("Content-Type");
+    assertEquals(3, values.length);
+    assertEquals("value1", values[0]);
+    assertEquals("value2", values[1]);
+    assertEquals("value1", values[2]);
+  }
+
+  /** Test for the <code>set(String, String)</code> method. */
+  public void testSet() {
+    String[] values = null;
+    Metadata meta = new Metadata();
+
+    values = meta.getValues("contentype");
+    assertEquals(0, values.length);
+
+    meta.set("contentype", "value1");
+    values = meta.getValues("contentype");
+    assertEquals(1, values.length);
+    assertEquals("value1", values[0]);
+    
+    meta.set("Content-Type", "value2");
+    values = meta.getValues("contentype");
+    assertEquals(1, values.length);
+    assertEquals("value2", values[0]);
+    
+    meta.set("contenttype", "new value 1");
+    meta.add("contenttype", "new value 2");
+    values = meta.getValues("contentype");
+    assertEquals(2, values.length);
+    assertEquals("new value 1", values[0]);
+    assertEquals("new value 2", values[1]);
+  }
+  
+  /** Test for <code>setAll(Properties)</code> method */
+  public void testSetProperties() {
+    String[] values = null;
+    Metadata meta = new Metadata();
+    Properties props = new Properties();
+    
+    meta.setAll(props);
+    assertEquals(0, meta.size());
+    
+    props.setProperty("name-one", "value1.1");
+    meta.setAll(props);
+    assertEquals(1, meta.size());
+    values = meta.getValues("name-one");
+    assertEquals(1, values.length);
+    assertEquals("value1.1", values[0]);
+    
+    props.setProperty("name-two", "value2.1");
+    meta.setAll(props);
+    assertEquals(2, meta.size());
+    values = meta.getValues("name-one");
+    assertEquals(1, values.length);
+    assertEquals("value1.1", values[0]);
+    values = meta.getValues("name-two");
+    assertEquals(1, values.length);
+    assertEquals("value2.1", values[0]);
+  }
+    
+  /** Test for <code>get(String)</code> method */
+  public void testGet() {
+    String[] values = null;
+    Metadata meta = new Metadata();
+    assertNull(meta.get("a-name"));
+    
+    meta.add("a-name", "value-1");
+    assertEquals("value-1", meta.get("a-name"));
+    meta.add("a-name", "value-2");
+    assertEquals("value-1", meta.get("a-name"));
+  }
+    
+  /** Test for <code>isMultiValued()</code> method */
+  public void testIsMultiValued() {
+    Metadata meta = new Metadata();
+    assertFalse(meta.isMultiValued("key"));
+    meta.add("key", "value1");
+    assertFalse(meta.isMultiValued("key"));
+    meta.add("key", "value2");
+    assertTrue(meta.isMultiValued("key"));
+  }
+
+  /** Test for <code>names</code> method */
+  public void testNames() {
+    String[] names = null;
+    Metadata meta = new Metadata();
+    names = meta.names();
+    assertEquals(0, names.length);
+    
+    meta.add("name-one", "value");
+    names = meta.names();
+    assertEquals(1, names.length);
+    assertEquals("name-one", names[0]);
+    meta.add("name-two", "value");
+    names = meta.names();
+    assertEquals(2, names.length);
+  }
+  
+  /** Test for <code>remove(String)</code> method */
+  public void testRemove() {
+    Metadata meta = new Metadata();
+    meta.remove("name-one");
+    assertEquals(0, meta.size());
+    meta.add("name-one", "value-1.1");
+    meta.add("name-one", "value-1.2");
+    meta.add("name-two", "value-2.2");
+    assertEquals(2, meta.size());
+    assertNotNull(meta.get("name-one"));
+    assertNotNull(meta.get("name-two"));
+    meta.remove("name-one");
+    assertEquals(1, meta.size());
+    assertNull(meta.get("name-one"));
+    assertNotNull(meta.get("name-two"));
+    meta.remove("name-two");
+    assertEquals(0, meta.size());
+    assertNull(meta.get("name-one"));
+    assertNull(meta.get("name-two"));
+  }
+
+  /** Test for <code>equals(Object)</code> method */
+  public void testObject() {
+    Metadata meta1 = new Metadata();
+    Metadata meta2 = new Metadata();
+    assertFalse(meta1.equals(null));
+    assertFalse(meta1.equals("String"));
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-one", "value-1.1");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-one", "value-1.1");
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-one", "value-1.2");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-one", "value-1.2");
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-two", "value-2.1");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-two", "value-2.1");
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-two", "value-2.2");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-two", "value-2.x");
+    assertFalse(meta1.equals(meta2));
+  }
+  
+  /** Test for <code>Writable</code> implementation */
+  public void testWritable() {
+    Metadata result = null;
+    Metadata meta = new Metadata();
+    result = writeRead(meta);
+    assertEquals(0, result.size());
+    meta.add("name-one", "value-1.1");
+    result = writeRead(meta);
+    assertEquals(1, result.size());
+    assertEquals(1, result.getValues("name-one").length);
+    assertEquals("value-1.1", result.get("name-one"));
+    meta.add("name-two", "value-2.1");
+    meta.add("name-two", "value-2.2");
+    result = writeRead(meta);
+    assertEquals(2, result.size());
+    assertEquals(1, result.getValues("name-one").length);
+    assertEquals("value-1.1", result.getValues("name-one")[0]);
+    assertEquals(2, result.getValues("name-two").length);
+    assertEquals("value-2.1", result.getValues("name-two")[0]);
+    assertEquals("value-2.2", result.getValues("name-two")[1]);
+  }
+  
+  private Metadata writeRead(Metadata meta) {
+    Metadata readed = new Metadata();
+    try {
+      ByteArrayOutputStream out = new ByteArrayOutputStream();
+      meta.write(new DataOutputStream(out));
+      readed.readFields(new DataInputStream(new 
ByteArrayInputStream(out.toByteArray())));
+    } catch (IOException ioe) {
+      fail(ioe.toString());
+    }
+    return readed;
+  }
+       
+}

Propchange: 
lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java 
(original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java Wed 
Feb  8 13:48:52 2006
@@ -20,7 +20,7 @@
 import org.apache.nutch.util.NutchConfiguration;
 
 import org.apache.nutch.util.WritableTestUtils;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
 
 import junit.framework.TestCase;
 
@@ -41,9 +41,9 @@
       new Outlink("http://bar.com/";, "Bar", conf)
     };
 
-    ContentProperties metaData = new ContentProperties();
-    metaData.put("Language", "en/us");
-    metaData.put("Charset", "UTF-8");
+    Metadata metaData = new Metadata();
+    metaData.add("Language", "en/us");
+    metaData.add("Charset", "UTF-8");
 
     ParseData r = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, 
metaData);
     r.setConf(conf);

Modified: lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java 
(original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java Wed 
Feb  8 13:48:52 2006
@@ -16,13 +16,14 @@
 
 package org.apache.nutch.protocol;
 
+import org.apache.nutch.metadata.Metadata;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.NutchConfiguration;
-
 import org.apache.nutch.util.WritableTestUtils;
 
 import junit.framework.TestCase;
 
+
 /** Unit tests for Content. */
 
 public class TestContent extends TestCase {
@@ -37,9 +38,9 @@
 
     String url = "http://www.foo.com/";;
 
-    ContentProperties metaData = new ContentProperties();
-    metaData.put("Host", "www.foo.com");
-    metaData.put("Content-Type", "text/html");
+    Metadata metaData = new Metadata();
+    metaData.add("Host", "www.foo.com");
+    metaData.add("Content-Type", "text/html");
 
     Content r = new Content(url, url, page.getBytes("UTF8"), "text/html",
                             metaData, conf);
@@ -47,12 +48,13 @@
     WritableTestUtils.testWritable(r);
     assertEquals("text/html", r.getMetadata().get("Content-Type"));
     assertEquals("text/html", r.getMetadata().get("content-type"));
+    assertEquals("text/html", r.getMetadata().get("CONTENTYPE"));
   }
 
   /** Unit tests for getContentType(String, String, byte[]) method. */
   public void testGetContentType() throws Exception {
     Content c = null;
-    ContentProperties p = new ContentProperties();
+    Metadata p = new Metadata();
 
     c = new Content("http://www.foo.com/";,
                     "http://www.foo.com/";,

Modified: lucene/nutch/trunk/src/web/jsp/cached.jsp
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/web/jsp/cached.jsp?rev=376089&r1=376088&r2=376089&view=diff
==============================================================================
--- lucene/nutch/trunk/src/web/jsp/cached.jsp (original)
+++ lucene/nutch/trunk/src/web/jsp/cached.jsp Wed Feb  8 13:48:52 2006
@@ -6,7 +6,7 @@
 
   import="org.apache.nutch.searcher.*"
   import="org.apache.nutch.parse.ParseData"
-  import="org.apache.nutch.protocol.ContentProperties"
+  import="org.apache.nutch.metadata.Metadata"
   import="org.apache.hadoop.conf.Configuration"
   import="org.apache.nutch.util.NutchConfiguration"
 %><%
@@ -26,10 +26,10 @@
     ResourceBundle.getBundle("org.nutch.jsp.cached", request.getLocale())
     .getLocale().getLanguage();
 
-  ContentProperties metaData = bean.getParseData(details).getMetadata();
+  Metadata metaData = bean.getParseData(details).getContentMeta();
 
   String content = null;
-  String contentType = (String) metaData.get("Content-Type");
+  String contentType = (String) metaData.get(Metadata.CONTENT_TYPE);
   if (contentType.startsWith("text/html")) {
     // FIXME : it's better to emit the original 'byte' sequence 
     // with 'charset' set to the value of 'CharEncoding',




-------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc. Do you grep through log files
for problems?  Stop!  Download the new AJAX search engine that makes
searching your log files as easy as surfing the  web.  DOWNLOAD SPLUNK!
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=103432&bid=230486&dat=121642
_______________________________________________
Nutch-cvs mailing list
Nutch-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nutch-cvs

[Nutch-cvs] svn commit: r376089 [2/2] - in /lucene/nutch/trunk: ./ src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/metadata/ src/java/org/apache/nutch/net/protocols/ src/java/org/apache/nutch/parse/ src/java/org/apac...

Reply via email to