luetzkendorf    2005/01/14 10:34:14

  Modified:    src/share/org/apache/slide/extractor
                        AbstractContentExtractor.java
                        AbstractPropertyExtractor.java Extractor.java
                        ExtractorManager.java MSExcelExtractor.java
                        MSPowerPointExtractor.java MSWordExtractor.java
                        OfficeExtractor.java PDFExtractor.java
                        SimpleXmlExtractor.java TextContentExtractor.java
                        XmlContentExtractor.java
  Log:
  Improvements by Eirikur S. Hrafnsson for Content-Type handling in
  the extractor framework (see Bugzilla 33065)
  
  Revision  Changes    Path
  1.5       +20 -4     
jakarta-slide/src/share/org/apache/slide/extractor/AbstractContentExtractor.java
  
  Index: AbstractContentExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/AbstractContentExtractor.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- AbstractContentExtractor.java     29 Sep 2004 15:28:06 -0000      1.4
  +++ AbstractContentExtractor.java     14 Jan 2005 18:34:13 -0000      1.5
  @@ -47,10 +47,26 @@
   
       public abstract Reader extract(InputStream content) throws 
ExtractorException;
   
  +    /* (non-Javadoc)
  +     * @see org.apache.slide.extractor.Extractor#getContentType()
  +     */
       public String getContentType() {
           return contentType;
       }
  -
  +    
  +     /**
  +      * Default implementation returns true if getContentType() contains the 
fileToIndexContentType<br/>
  +      * OR if getContentType() returns null.
  +      * @param fileToIndexContentType The content type of the file we want 
to index.
  +      */
  +     public boolean isAcceptableContentType(String fileToIndexContentType) {
  +             if(getContentType()!=null){
  +                     //return true if the contentType string contains 
fileToIndexContentType
  +                     return 
(getContentType().indexOf(fileToIndexContentType)>=0);
  +             }
  +             return true;
  +     }
  +     
       public String getUri() {
           return uri;
       }
  
  
  
  1.4       +20 -4     
jakarta-slide/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
  
  Index: AbstractPropertyExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- AbstractPropertyExtractor.java    29 Sep 2004 15:28:06 -0000      1.3
  +++ AbstractPropertyExtractor.java    14 Jan 2005 18:34:13 -0000      1.4
  @@ -23,8 +23,8 @@
   
   package org.apache.slide.extractor;
   
  -import java.util.Map;
   import java.io.InputStream;
  +import java.util.Map;
   
   /**
    * The AbstractPropertyExtractor class
  @@ -47,6 +47,9 @@
   
       public abstract Map extract(InputStream content) throws 
ExtractorException;
   
  +    /* (non-Javadoc)
  +     * @see org.apache.slide.extractor.Extractor#getContentType()
  +     */
       public String getContentType() {
           return contentType;
       }
  @@ -58,5 +61,18 @@
       public String getNamespace() {
           return namespace;
       }
  +    
  +     /**
  +      * Default implementation returns true if getContentType() contains the 
fileToIndexContentType<br/>
  +      * OR if getContentType() returns null.
  +      * @param fileToIndexContentType The content type of the file we want 
to index.
  +      */
  +     public boolean isAcceptableContentType(String fileToIndexContentType) {
  +             if(getContentType()!=null){
  +                     //return true if the contentType string contains 
fileToIndexContentType
  +                     return 
(getContentType().indexOf(fileToIndexContentType)>=0);
  +             }
  +             return true;
  +     }
   
   }
  
  
  
  1.6       +26 -6     
jakarta-slide/src/share/org/apache/slide/extractor/Extractor.java
  
  Index: Extractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/Extractor.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- Extractor.java    29 Sep 2004 15:28:06 -0000      1.5
  +++ Extractor.java    14 Jan 2005 18:34:13 -0000      1.6
  @@ -24,15 +24,35 @@
   package org.apache.slide.extractor;
   
   /**
  - * The Extractor interface
  - * 
  + * The Extractor interface.<br/>
  + * Default init parameters in Domain.xml include:<br/>
  + * <li>"classname" - The extractor class.</li>
  + * <li>"uri" - The URI the extractor handles.</li>
  + * <li>"content-type" - A comma separated list of supported content 
types</li>
  + * <li>"namespace" - The namespace the extractor handles.</li>
    */
   public interface Extractor {
   
  -    public String getContentType();
  +     /**
  +      * @param contentType of the file to index
  +      * @return true if this extractor can handle indexing a file of the 
supplied contentType otherwise false.
  +      */
  +    public boolean isAcceptableContentType(String contentType);
   
  +    /**
  +     * @return a comma separated list of content types this extractor is 
registered to handle.<br/>
  +     * May return null also if it handles any type of file.
  +     */
  +    public String getContentType();
  +    
  +    /**
  +     * @return The URI this extractor is registered to handle.
  +     */
       public String getUri();
   
  +    /**
  +     * @return The namespace this extractor is registered to handle.
  +     */
       public String getNamespace();
   
   }
  
  
  
  1.7       +15 -15    
jakarta-slide/src/share/org/apache/slide/extractor/ExtractorManager.java
  
  Index: ExtractorManager.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/ExtractorManager.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- ExtractorManager.java     8 Nov 2004 09:37:43 -0000       1.6
  +++ ExtractorManager.java     14 Jan 2005 18:34:13 -0000      1.7
  @@ -23,17 +23,20 @@
   
   package org.apache.slide.extractor;
   
  +import java.lang.reflect.Constructor;
  +import java.util.ArrayList;
  +import java.util.Enumeration;
  +import java.util.Iterator;
  +import java.util.List;
  +import org.apache.slide.content.NodeRevisionDescriptor;
  +import org.apache.slide.content.NodeRevisionDescriptors;
   import org.apache.slide.util.conf.Configurable;
   import org.apache.slide.util.conf.Configuration;
   import org.apache.slide.util.conf.ConfigurationException;
  -import org.apache.slide.content.NodeRevisionDescriptors;
  -import org.apache.slide.content.NodeRevisionDescriptor;
  -
  -import java.util.*;
  -import java.lang.reflect.Constructor;
   
   /**
    * The ExtractorManager class
  + * 
    */
   public class ExtractorManager implements Configurable {
       private final static ExtractorManager manager = new ExtractorManager();
  @@ -103,8 +106,7 @@
       {
           for ( Iterator i = extractors.iterator(); i.hasNext(); ) {
               Extractor extractor = (Extractor)i.next();
  -            if ( extractor instanceof ContentExtractor && 
  -                    matches(extractor, namespace, uri, descriptor)) {
  +            if ( extractor instanceof ContentExtractor && matches(extractor, 
namespace, uri, descriptor)) {
                   return true;
               }
           }
  @@ -113,7 +115,7 @@
   
       static boolean matches(Extractor extractor, String namespace, 
NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor) {
           boolean matching = true;
  -        if ( descriptor != null && extractor.getContentType() != null && 
!descriptor.getContentType().equals(extractor.getContentType()) ) {
  +        if ( descriptor != null && 
!extractor.isAcceptableContentType(descriptor.getContentType())) {
               matching = false;
           }
           if ( descriptors != null && extractor.getUri() != null && 
!descriptors.getUri().startsWith(extractor.getUri()) ) {
  @@ -125,10 +127,8 @@
           return matching;
       }
       
  -    static boolean matches(Extractor extractor, String namespace, String 
uri, 
  -            NodeRevisionDescriptor descriptor) 
  -    {
  -        if ( descriptor != null && 
!descriptor.getContentType().equals(extractor.getContentType()) ) {
  +    static boolean matches(Extractor extractor, String namespace, String 
uri, NodeRevisionDescriptor descriptor) {
  +        if ( descriptor != null && 
!extractor.isAcceptableContentType(descriptor.getContentType()) ) {
               return false;
           }
           if ( extractor.getUri() != null && 
!uri.startsWith(extractor.getUri()) ) {
  
  
  
  1.3       +31 -17    
jakarta-slide/src/share/org/apache/slide/extractor/MSExcelExtractor.java
  
  Index: MSExcelExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/MSExcelExtractor.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- MSExcelExtractor.java     29 Sep 2004 15:28:06 -0000      1.2
  +++ MSExcelExtractor.java     14 Jan 2005 18:34:13 -0000      1.3
  @@ -23,23 +23,27 @@
   
   package org.apache.slide.extractor;
   
  -/**
  - * Author: Ryan Rhodes
  - * Date: Jun 26, 2004
  - * Time: 1:53:31 AM
  - */
  -
  -import java.io.*;
  +import java.io.CharArrayReader;
  +import java.io.CharArrayWriter;
  +import java.io.FileInputStream;
  +import java.io.InputStream;
  +import java.io.Reader;
   import java.util.Iterator;
  -
  -import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  -import org.apache.poi.hssf.usermodel.HSSFWorkbook;
  -import org.apache.poi.hssf.usermodel.HSSFSheet;
  -import org.apache.poi.hssf.usermodel.HSSFRow;
   import org.apache.poi.hssf.usermodel.HSSFCell;
  +import org.apache.poi.hssf.usermodel.HSSFRow;
  +import org.apache.poi.hssf.usermodel.HSSFSheet;
  +import org.apache.poi.hssf.usermodel.HSSFWorkbook;
  +import org.apache.poi.poifs.filesystem.POIFSFileSystem;
   
  -public class MSExcelExtractor  extends AbstractContentExtractor
  -{
  +/**
  + * Content extractor for Microsoft Excel documents.
  + */
  +public class MSExcelExtractor extends AbstractContentExtractor {
  +     
  +     static final String CONTENT_TYPE_EXCEL_1 = "application/msexcel";
  +     static final String CONTENT_TYPE_EXCEL_2 = "application/vnd.ms-excel";
  +     static final String CONTENT_TYPE_EXCEL_ALL_CSV = 
CONTENT_TYPE_EXCEL_1+","+CONTENT_TYPE_EXCEL_2;
  +     
       public MSExcelExtractor(String uri, String contentType, String 
namespace) {
         super(uri, contentType, namespace);
       }
  @@ -104,4 +108,14 @@
           }
           while(c != -1);
       }
  +    
  +     /* (non-Javadoc)
  +      * @see org.apache.slide.extractor.Extractor#getContentType()
  +      */
  +     public String getContentType() {
  +             if(super.getContentType()==null){
  +                     return CONTENT_TYPE_EXCEL_ALL_CSV;
  +             }
  +             return super.getContentType();
  +     }
   }
  
  
  
  1.4       +29 -13    
jakarta-slide/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
  
  Index: MSPowerPointExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- MSPowerPointExtractor.java        29 Sep 2004 15:28:06 -0000      1.3
  +++ MSPowerPointExtractor.java        14 Jan 2005 18:34:13 -0000      1.4
  @@ -23,21 +23,27 @@
   
   package org.apache.slide.extractor;
   
  -import org.apache.poi.util.LittleEndian;
  -import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
  -import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
  +import java.io.ByteArrayInputStream;
  +import java.io.ByteArrayOutputStream;
  +import java.io.FileInputStream;
  +import java.io.InputStream;
  +import java.io.InputStreamReader;
  +import java.io.Reader;
   import org.apache.poi.poifs.eventfilesystem.POIFSReader;
  +import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
  +import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
   import org.apache.poi.poifs.filesystem.DocumentInputStream;
  -
  -import java.io.*;
  +import org.apache.poi.util.LittleEndian;
   
   /**
  - * Author: Ryan Rhodes
  - * Date: Jun 27, 2004
  - * Time: 3:45:39 AM
  + * Content extractor for Microsoft Powerpoint documents.
    */
  -public class MSPowerPointExtractor extends AbstractContentExtractor 
implements POIFSReaderListener
  -{
  +public class MSPowerPointExtractor extends AbstractContentExtractor 
implements POIFSReaderListener{
  +
  +    static final String CONTENT_TYPE_POWERPOINT_1 = 
"application/mspowerpoint";
  +    static final String CONTENT_TYPE_POWERPOINT_2 = 
"application/vnd.ms-powerpoint";
  +    static final String CONTENT_TYPE_POWERPOINT_ALL_CSV = 
CONTENT_TYPE_POWERPOINT_1+","+CONTENT_TYPE_POWERPOINT_2;
  +     
       private ByteArrayOutputStream writer = new ByteArrayOutputStream();
   
       public MSPowerPointExtractor(String uri, String contentType, String 
namespace) {
  @@ -104,4 +110,14 @@
           }
           while( c != -1 );
       }
  +    
  +     /* (non-Javadoc)
  +      * @see org.apache.slide.extractor.Extractor#getContentType()
  +      */
  +     public String getContentType() {
  +             if(super.getContentType()==null){
  +                     return CONTENT_TYPE_POWERPOINT_ALL_CSV;
  +             }
  +             return super.getContentType();
  +     }
   }
  
  
  
  1.3       +26 -13    
jakarta-slide/src/share/org/apache/slide/extractor/MSWordExtractor.java
  
  Index: MSWordExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/MSWordExtractor.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- MSWordExtractor.java      29 Sep 2004 15:28:06 -0000      1.2
  +++ MSWordExtractor.java      14 Jan 2005 18:34:13 -0000      1.3
  @@ -23,17 +23,20 @@
   
   package org.apache.slide.extractor;
   
  -/**
  - * Author: Ryan Rhodes
  - * Date: Jun 26, 2004
  - * Time: 12:34:29 AM
  - */
  -
  -import java.io.*;
  -
  +import java.io.FileInputStream;
  +import java.io.InputStream;
  +import java.io.Reader;
  +import java.io.StringReader;
   import org.textmining.text.extraction.WordExtractor;
   
  +/**
  + * Content extractor for Microsoft Word documents.
  + */
   public class MSWordExtractor extends AbstractContentExtractor {
  +     
  +    static final String CONTENT_TYPE_WORD_1 = "application/msword";
  +    static final String CONTENT_TYPE_WORD_2 = "application/vnd.ms-word";
  +    static final String CONTENT_TYPE_WORD_ALL_CSV = 
CONTENT_TYPE_WORD_1+","+CONTENT_TYPE_WORD_2;
   
       public MSWordExtractor(String uri, String contentType, String namespace) 
{
           super(uri, contentType, namespace);
  @@ -41,8 +44,7 @@
   
       public Reader extract(InputStream content)  throws ExtractorException {
           try {
  -            WordExtractor  extractor =
  -                    new WordExtractor();
  +            WordExtractor  extractor = new WordExtractor();
               String text = extractor.extractText(content);          
   
               StringReader reader = new StringReader(text);
  @@ -70,4 +72,15 @@
               }
               while( c != -1 );
           }
  +        
  +     /* (non-Javadoc)
  +      * @see org.apache.slide.extractor.Extractor#getContentType()
  +      */
  +     public String getContentType() {
  +             if(super.getContentType()==null){
  +                     return CONTENT_TYPE_WORD_ALL_CSV;
  +             }
  +             return super.getContentType();
  +     }
  +     
   }
  
  
  
  1.4       +27 -6     
jakarta-slide/src/share/org/apache/slide/extractor/OfficeExtractor.java
  
  Index: OfficeExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/OfficeExtractor.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- OfficeExtractor.java      29 Sep 2004 15:28:06 -0000      1.3
  +++ OfficeExtractor.java      14 Jan 2005 18:34:13 -0000      1.4
  @@ -1,21 +1,31 @@
   package org.apache.slide.extractor;
   
   import java.io.InputStream;
  -import java.util.*;
  -
  -import org.apache.poi.hpsf.*;
  -import org.apache.poi.poifs.eventfilesystem.*;
  +import java.util.ArrayList;
  +import java.util.Enumeration;
  +import java.util.HashMap;
  +import java.util.Iterator;
  +import java.util.List;
  +import java.util.Map;
  +import org.apache.poi.hpsf.NoPropertySetStreamException;
  +import org.apache.poi.hpsf.Property;
  +import org.apache.poi.hpsf.PropertySet;
  +import org.apache.poi.hpsf.PropertySetFactory;
  +import org.apache.poi.hpsf.Section;
  +import org.apache.poi.poifs.eventfilesystem.POIFSReader;
  +import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
  +import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
   import org.apache.slide.util.conf.Configurable;
   import org.apache.slide.util.conf.Configuration;
   import org.apache.slide.util.conf.ConfigurationException;
   
   /**
  - * The OfficeExtractor class
  - * 
  + * Property extractor for Microsoft office documents.
    */
   public class OfficeExtractor extends AbstractPropertyExtractor implements 
Configurable {
        protected List instructions = new ArrayList();
        protected Map propertyMap = new HashMap();
  +     static final String CONTENT_TYPE_MS_OFFICE_ALL_CSV = 
MSWordExtractor.CONTENT_TYPE_WORD_ALL_CSV+","+MSExcelExtractor.CONTENT_TYPE_EXCEL_ALL_CSV+","+MSPowerPointExtractor.CONTENT_TYPE_POWERPOINT_ALL_CSV;
        
        public OfficeExtractor(String uri, String contentType, String 
namespace) {
                super(uri, contentType, namespace);
  @@ -81,4 +91,15 @@
                        propertyMap.put(id, property);
           }
        }
  +     
  +     /* (non-Javadoc)
  +      * @see org.apache.slide.extractor.Extractor#getContentType()
  +      */
  +     public String getContentType() {
  +             if(super.getContentType()==null){
  +                     return CONTENT_TYPE_MS_OFFICE_ALL_CSV;
  +             }
  +             return super.getContentType();
  +     }
  +     
   }
  
  
  
  1.3       +27 -11    
jakarta-slide/src/share/org/apache/slide/extractor/PDFExtractor.java
  
  Index: PDFExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/PDFExtractor.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- PDFExtractor.java 29 Sep 2004 15:28:06 -0000      1.2
  +++ PDFExtractor.java 14 Jan 2005 18:34:13 -0000      1.3
  @@ -23,19 +23,24 @@
   
   package org.apache.slide.extractor;
   
  -import org.pdfbox.util.PDFTextStripper;
  +import java.io.CharArrayReader;
  +import java.io.CharArrayWriter;
  +import java.io.FileInputStream;
  +import java.io.InputStream;
  +import java.io.Reader;
   import org.pdfbox.pdfparser.PDFParser;
   import org.pdfbox.pdmodel.PDDocument;
  -
  -import java.io.*;
  +import org.pdfbox.util.PDFTextStripper;
   
   /**
  - * Author: Ryan Rhodes
  - * Date: Jun 26, 2004
  - * Time: 4:03:00 AM
  + * Content extractor for PDF documents.
    */
  -public class PDFExtractor extends AbstractContentExtractor
  -{
  +public class PDFExtractor extends AbstractContentExtractor{
  +     
  +    private static final String CONTENT_TYPE_PDF_1 = "application/pdf";
  +    private static final String CONTENT_TYPE_PDF_2 = "application/x-pdf";
  +    private static final String CONTENT_TYPE_PDF_ALL_CSV = 
CONTENT_TYPE_PDF_1+","+CONTENT_TYPE_PDF_2;
  +
   
       public PDFExtractor(String uri, String contentType, String namespace)
       {
  @@ -84,4 +89,15 @@
           }
           while(c != -1);
       }
  +    
  +     /* (non-Javadoc)
  +      * @see org.apache.slide.extractor.Extractor#getContentType()
  +      */
  +     public String getContentType() {
  +             if(super.getContentType()==null){
  +                     return CONTENT_TYPE_PDF_ALL_CSV;
  +             }       
  +             return super.getContentType();
  +     }
  +     
   }
  
  
  
  1.11      +28 -8     
jakarta-slide/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
  
  Index: SimpleXmlExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- SimpleXmlExtractor.java   29 Nov 2004 18:44:30 -0000      1.10
  +++ SimpleXmlExtractor.java   14 Jan 2005 18:34:13 -0000      1.11
  @@ -23,6 +23,14 @@
   
   package org.apache.slide.extractor;
   
  +import java.io.IOException;
  +import java.io.InputStream;
  +import java.util.ArrayList;
  +import java.util.Enumeration;
  +import java.util.HashMap;
  +import java.util.Iterator;
  +import java.util.List;
  +import java.util.Map;
   import org.apache.slide.common.PropertyName;
   import org.apache.slide.util.conf.Configurable;
   import org.apache.slide.util.conf.Configuration;
  @@ -34,16 +42,18 @@
   import org.jdom.input.SAXBuilder;
   import org.jdom.xpath.XPath;
   
  -import java.io.IOException;
  -import java.io.InputStream;
  -import java.util.*;
  -
   /**
    * The SimpleXmlExtractor class
    * 
    */
   public class SimpleXmlExtractor extends AbstractPropertyExtractor implements 
Configurable {
  -
  +     
  +     static final String CONTENT_TYPE_XML = "text/xml";
  +     static final String CONTENT_TYPE_XHTML = "application/xhtml+xml";
  +     //html also because xhtml can and most often has the html content type
  +     static final String CONTENT_TYPE_HTML = "text/html";
  +     static final String CONTENT_TYPE_XML_ALL_CSV = 
CONTENT_TYPE_XML+","+CONTENT_TYPE_XHTML+","+CONTENT_TYPE_HTML;
  +     
       protected List instructions = new ArrayList();
   
       public SimpleXmlExtractor(String uri, String contentType, String 
namespace) {
  @@ -133,4 +143,14 @@
               return propertyName;
           }
       }
  +
  +     /* (non-Javadoc)
  +      * @see org.apache.slide.extractor.Extractor#getContentType()
  +      */
  +     public String getContentType() {
  +             if(super.getContentType()==null){
  +                     return CONTENT_TYPE_XML_ALL_CSV;
  +             }       
  +             return super.getContentType();
  +     }
   }
  
  
  
  1.3       +16 -5     
jakarta-slide/src/share/org/apache/slide/extractor/TextContentExtractor.java
  
  Index: TextContentExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/TextContentExtractor.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- TextContentExtractor.java 29 Nov 2004 18:43:09 -0000      1.2
  +++ TextContentExtractor.java 14 Jan 2005 18:34:13 -0000      1.3
  @@ -30,8 +30,9 @@
   /**
    * Content extractor that simply returns the content. 
    */
  -public class TextContentExtractor extends AbstractContentExtractor
  -{
  +public class TextContentExtractor extends AbstractContentExtractor{
  +     
  +     private static final String CONTENT_TYPE_TEXT = "text/plain";
   
       public TextContentExtractor(String uri, String contentType)
       {
  @@ -46,5 +47,15 @@
       {
           return new InputStreamReader(content);
       }
  +    
  +     /* (non-Javadoc)
  +      * @see org.apache.slide.extractor.Extractor#getContentType()
  +      */
  +     public String getContentType() {
  +             if(super.getContentType()==null){
  +                     return CONTENT_TYPE_TEXT;
  +             }       
  +             return super.getContentType();
  +     }
   
   }
  
  
  
  1.2       +13 -3     
jakarta-slide/src/share/org/apache/slide/extractor/XmlContentExtractor.java
  
  Index: XmlContentExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/XmlContentExtractor.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- XmlContentExtractor.java  29 Nov 2004 18:43:21 -0000      1.1
  +++ XmlContentExtractor.java  14 Jan 2005 18:34:13 -0000      1.2
  @@ -124,4 +124,14 @@
               return new InputSource(new StringReader(""));
           }
       }
  +
  +     /* (non-Javadoc)
  +      * @see org.apache.slide.extractor.Extractor#getContentType()
  +      */
  +     public String getContentType() {
  +             if(super.getContentType()==null){
  +                     return SimpleXmlExtractor.CONTENT_TYPE_XML_ALL_CSV;
  +             }       
  +             return super.getContentType();
  +     }
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to