unico       2004/09/29 08:01:27

  Modified:    src/share/org/apache/slide/extractor Tag:
                        SLIDE_2_1_RELEASE_BRANCH MSExcelExtractor.java
                        PDFExtractor.java MSPowerPointExtractor.java
                        Extractor.java ExtractorManager.java
                        OfficeExtractor.java AbstractContentExtractor.java
                        AbstractPropertyExtractor.java
                        SimpleXmlExtractor.java MSWordExtractor.java
                        PropertyExtractorTrigger.java
               src/stores/org/apache/slide/index Tag:
                        SLIDE_2_1_RELEASE_BRANCH TextContentIndexer.java
               src/share/org/apache/slide/common Tag:
                        SLIDE_2_1_RELEASE_BRANCH Domain.java Namespace.java
  Log:
  per-namespace extractor configuration:
  extractors can now be declared for each namespace separately
  in addition to the global extractor configuration for the whole domain.
  
  Revision  Changes    Path
  No                   revision
  No                   revision
  1.1.2.1   +6 -6      
jakarta-slide/src/share/org/apache/slide/extractor/MSExcelExtractor.java
  
  Index: MSExcelExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/MSExcelExtractor.java,v
  retrieving revision 1.1
  retrieving revision 1.1.2.1
  diff -u -r1.1 -r1.1.2.1
  --- MSExcelExtractor.java     29 Jun 2004 08:10:57 -0000      1.1
  +++ MSExcelExtractor.java     29 Sep 2004 15:01:26 -0000      1.1.2.1
  @@ -40,8 +40,8 @@
   
   public class MSExcelExtractor  extends AbstractContentExtractor
   {
  -    public MSExcelExtractor(String uri, String contentType) {
  -      super(uri, contentType);
  +    public MSExcelExtractor(String uri, String contentType, String namespace) {
  +      super(uri, contentType, namespace);
       }
   
       public Reader extract(InputStream content)  throws ExtractorException
  @@ -92,7 +92,7 @@
       {
           FileInputStream in = new FileInputStream(args[0]);
   
  -        MSExcelExtractor ex = new MSExcelExtractor(null, null);
  +        MSExcelExtractor ex = new MSExcelExtractor(null, null, null);
   
           Reader reader = ex.extract(in);
   
  
  
  
  1.1.2.1   +6 -6      
jakarta-slide/src/share/org/apache/slide/extractor/PDFExtractor.java
  
  Index: PDFExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/PDFExtractor.java,v
  retrieving revision 1.1
  retrieving revision 1.1.2.1
  diff -u -r1.1 -r1.1.2.1
  --- PDFExtractor.java 29 Jun 2004 08:10:57 -0000      1.1
  +++ PDFExtractor.java 29 Sep 2004 15:01:26 -0000      1.1.2.1
  @@ -37,9 +37,9 @@
   public class PDFExtractor extends AbstractContentExtractor
   {
   
  -    public PDFExtractor(String uri, String contentType)
  +    public PDFExtractor(String uri, String contentType, String namespace)
       {
  -        super(uri, contentType);
  +        super(uri, contentType, namespace);
       }
   
       public Reader extract(InputStream content)  throws ExtractorException
  @@ -72,7 +72,7 @@
       {
           FileInputStream in = new FileInputStream(args[0]);
   
  -        PDFExtractor ex = new PDFExtractor(null, null);
  +        PDFExtractor ex = new PDFExtractor(null, null, null);
   
           Reader reader = ex.extract(in);
   
  
  
  
  1.2.2.1   +6 -6      
jakarta-slide/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
  
  Index: MSPowerPointExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java,v
  retrieving revision 1.2
  retrieving revision 1.2.2.1
  diff -u -r1.2 -r1.2.2.1
  --- MSPowerPointExtractor.java        14 Jul 2004 08:01:31 -0000      1.2
  +++ MSPowerPointExtractor.java        29 Sep 2004 15:01:26 -0000      1.2.2.1
  @@ -40,8 +40,8 @@
   {
       private ByteArrayOutputStream writer = new ByteArrayOutputStream();
   
  -    public MSPowerPointExtractor(String uri, String contentType) {
  -        super(uri, contentType);
  +    public MSPowerPointExtractor(String uri, String contentType, String namespace) {
  +        super(uri, contentType, namespace);
       }
   
       public Reader extract(InputStream content)  throws ExtractorException {
  @@ -91,7 +91,7 @@
       {
           FileInputStream in = new FileInputStream(args[0]);
   
  -        MSPowerPointExtractor ex = new MSPowerPointExtractor(null, null);
  +        MSPowerPointExtractor ex = new MSPowerPointExtractor(null, null, null);
   
           Reader reader = ex.extract(in);
   
  
  
  
  1.4.2.1   +7 -3      
jakarta-slide/src/share/org/apache/slide/extractor/Extractor.java
  
  Index: Extractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/Extractor.java,v
  retrieving revision 1.4
  retrieving revision 1.4.2.1
  diff -u -r1.4 -r1.4.2.1
  --- Extractor.java    28 Jul 2004 09:36:01 -0000      1.4
  +++ Extractor.java    29 Sep 2004 15:01:26 -0000      1.4.2.1
  @@ -28,7 +28,11 @@
    * 
    */
   public interface Extractor {
  +
       public String getContentType();
   
       public String getUri();
  +
  +    public String getNamespace();
  +
   }
  
  
  
  1.4.2.1   +16 -13    
jakarta-slide/src/share/org/apache/slide/extractor/ExtractorManager.java
  
  Index: ExtractorManager.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/ExtractorManager.java,v
  retrieving revision 1.4
  retrieving revision 1.4.2.1
  diff -u -r1.4 -r1.4.2.1
  --- ExtractorManager.java     28 Jul 2004 09:36:01 -0000      1.4
  +++ ExtractorManager.java     29 Sep 2004 15:01:26 -0000      1.4.2.1
  @@ -34,7 +34,6 @@
   
   /**
    * The ExtractorManager class
  - * 
    */
   public class ExtractorManager implements Configurable {
       private final static ExtractorManager manager = new ExtractorManager();
  @@ -51,11 +50,11 @@
           extractors.add(extractor);
       }
   
  -    public PropertyExtractor[] getPropertyExtractors(NodeRevisionDescriptors 
descriptors, NodeRevisionDescriptor descriptor) {
  +    public PropertyExtractor[] getPropertyExtractors(String namespace, 
NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor) {
           List matchingExtractors = new ArrayList();
           for ( Iterator i = extractors.iterator(); i.hasNext(); ) {
               Extractor extractor = (Extractor)i.next();
  -            if ( extractor instanceof PropertyExtractor && matches(extractor,  
descriptors, descriptor)) {
  +            if ( extractor instanceof PropertyExtractor && matches(extractor, 
namespace, descriptors, descriptor)) {
                   matchingExtractors.add(extractor);
               }
           }
  @@ -63,11 +62,11 @@
           return (PropertyExtractor [])matchingExtractors.toArray(extractors);
       };
   
  -    public ContentExtractor[] getContentExtractors(NodeRevisionDescriptors 
descriptors, NodeRevisionDescriptor descriptor) {
  +    public ContentExtractor[] getContentExtractors(String namespace, 
NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor) {
           List matchingExtractors = new ArrayList();
           for ( Iterator i = extractors.iterator(); i.hasNext(); ) {
               Extractor extractor = (Extractor)i.next();
  -            if ( extractor instanceof ContentExtractor && matches(extractor,  
descriptors, descriptor)) {
  +            if ( extractor instanceof ContentExtractor && matches(extractor, 
namespace, descriptors, descriptor)) {
                   matchingExtractors.add(extractor);
               }
           }
  @@ -75,11 +74,11 @@
           return (ContentExtractor [])matchingExtractors.toArray(extractors);
       };
   
  -    public Extractor[] getExtractors(NodeRevisionDescriptors descriptors, 
NodeRevisionDescriptor descriptor) {
  +    public Extractor[] getExtractors(String namespace, NodeRevisionDescriptors 
descriptors, NodeRevisionDescriptor descriptor) {
           List matchingExtractors = new ArrayList();
           for ( Iterator i = extractors.iterator(); i.hasNext(); ) {
               Extractor extractor = (Extractor)i.next();
  -            if ( matches(extractor,  descriptors, descriptor)) {
  +            if ( matches(extractor, namespace, descriptors, descriptor)) {
                   matchingExtractors.add(extractor);
               }
           }
  @@ -87,7 +86,7 @@
           return (Extractor [])matchingExtractors.toArray(extractors);
       };
   
  -    public boolean matches(Extractor extractor, NodeRevisionDescriptors 
descriptors, NodeRevisionDescriptor descriptor) {
  +    public boolean matches(Extractor extractor, String namespace, 
NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor) {
           boolean matching = true;
           if ( descriptor != null && extractor.getContentType() != null && 
!descriptor.getContentType().equals(extractor.getContentType()) ) {
               matching = false;
  @@ -95,6 +94,9 @@
           if ( descriptors != null && extractor.getUri() != null && 
!descriptors.getUri().startsWith(extractor.getUri()) ) {
               matching = false;
           }
  +        if ( descriptors != null && extractor.getNamespace() != null && 
!extractor.getNamespace().equals(namespace)) {
  +            matching = false;
  +        }
           return matching;
       }
   
  @@ -105,11 +107,12 @@
               String classname = extractorConfig.getAttribute("classname");
               String uri = extractorConfig.getAttribute("uri", null);
               String contentType = extractorConfig.getAttribute("content-type", null);
  +            String namespace = extractorConfig.getAttribute("namespace", null);
               try {
                   Class extractorClass = Class.forName(classname);
                   Extractor extractor = null;
  -                Constructor extractorConstructor = 
extractorClass.getConstructor(new Class[] { String.class, String.class } );
  -                extractor = (Extractor)extractorConstructor.newInstance(new 
String[] { uri, contentType });
  +                Constructor extractorConstructor = 
extractorClass.getConstructor(new Class[] { String.class, String.class, String.class } 
);
  +                extractor = (Extractor)extractorConstructor.newInstance(new 
String[] { uri, contentType, namespace });
                   if ( extractor instanceof Configurable ) {
                       
((Configurable)extractor).configure(extractorConfig.getConfiguration("configuration"));
                   }
  
  
  
  1.2.2.1   +83 -83    
jakarta-slide/src/share/org/apache/slide/extractor/OfficeExtractor.java
  
  Index: OfficeExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/OfficeExtractor.java,v
  retrieving revision 1.2
  retrieving revision 1.2.2.1
  diff -u -r1.2 -r1.2.2.1
  --- OfficeExtractor.java      28 Jul 2004 09:36:01 -0000      1.2
  +++ OfficeExtractor.java      29 Sep 2004 15:01:26 -0000      1.2.2.1
  @@ -1,84 +1,84 @@
  -package org.apache.slide.extractor;

  -

  -import java.io.InputStream;

  -import java.util.*;

  -

  -import org.apache.poi.hpsf.*;

  -import org.apache.poi.poifs.eventfilesystem.*;

  -import org.apache.slide.util.conf.Configurable;

  -import org.apache.slide.util.conf.Configuration;

  -import org.apache.slide.util.conf.ConfigurationException;

  -

  -/**

  - * The OfficeExtractor class

  - * 

  - */

  -public class OfficeExtractor extends AbstractPropertyExtractor implements 
Configurable {

  -     protected List instructions = new ArrayList();

  -     protected Map propertyMap = new HashMap();

  -     

  -     public OfficeExtractor(String uri, String contentType) {

  -             super(uri, contentType);

  -     }

  -

  -     public Map extract(InputStream content) throws ExtractorException {

  -             OfficePropertiesListener listener = new OfficePropertiesListener();

  -             try {

  -                     POIFSReader r = new POIFSReader();

  -                     r.registerListener(listener);

  -                     r.read(content);

  -             } catch (Exception e) {

  -                     throw new ExtractorException("Exception while extracting 
properties in OfficeExtractor");

  -             }

  -             return listener.getProperties();

  -     }

  -

  -     class OfficePropertiesListener implements POIFSReaderListener {

  -

  -             private HashMap properties = new HashMap();

  -

  -             public Map getProperties() {

  -                             return properties;

  -             }

  -

  -             public void processPOIFSReaderEvent(POIFSReaderEvent event) {

  -                     PropertySet ps = null;

  -                     try {

  -                             ps = PropertySetFactory.create(event.getStream());

  -                     } catch (NoPropertySetStreamException ex) {

  -                             return;

  -                     } catch (Exception ex) {

  -                             throw new RuntimeException("Property set stream \"" + 
event.getPath() + event.getName() + "\": " + ex);

  -                     }

  -                     String eventName = event.getName().trim();

  -                     final long sectionCount = ps.getSectionCount();

  -                     List sections = ps.getSections();

  -                     int nr = 0;

  -                     for (Iterator i = sections.iterator(); i.hasNext();) {

  -                             Section sec = (Section) i.next();

  -                             int propertyCount = sec.getPropertyCount();

  -                             Property[] props = sec.getProperties();

  -                             for (int i2 = 0; i2 < props.length; i2++) {

  -                                     Property p = props[i2];

  -                                     int id = p.getID();

  -                                     long type = p.getType();

  -                                     Object value = p.getValue();

  -                                     String key = eventName + "-" + nr + "-" + id; 

  -                                     if ( propertyMap.containsKey(key) ) {

  -                                             properties.put(propertyMap.get(key), 
value);

  -                                     }

  -                             }

  -                     }

  -             }

  -     }

  -

  -     public void configure(Configuration configuration) throws 
ConfigurationException {

  -        Enumeration instructions = configuration.getConfigurations("instruction");

  -        while (instructions.hasMoreElements()) {

  -            Configuration extract = (Configuration)instructions.nextElement();

  -            String property = extract.getAttribute("property");

  -            String id = extract.getAttribute("id");

  -                     propertyMap.put(id, property);

  -        }

  -     }

  +package org.apache.slide.extractor;
  +
  +import java.io.InputStream;
  +import java.util.*;
  +
  +import org.apache.poi.hpsf.*;
  +import org.apache.poi.poifs.eventfilesystem.*;
  +import org.apache.slide.util.conf.Configurable;
  +import org.apache.slide.util.conf.Configuration;
  +import org.apache.slide.util.conf.ConfigurationException;
  +
  +/**
  + * The OfficeExtractor class
  + * 
  + */
  +public class OfficeExtractor extends AbstractPropertyExtractor implements 
Configurable {
  +     protected List instructions = new ArrayList();
  +     protected Map propertyMap = new HashMap();
  +     
  +     public OfficeExtractor(String uri, String contentType, String namespace) {
  +             super(uri, contentType, namespace);
  +     }
  +
  +     public Map extract(InputStream content) throws ExtractorException {
  +             OfficePropertiesListener listener = new OfficePropertiesListener();
  +             try {
  +                     POIFSReader r = new POIFSReader();
  +                     r.registerListener(listener);
  +                     r.read(content);
  +             } catch (Exception e) {
  +                     throw new ExtractorException("Exception while extracting 
properties in OfficeExtractor");
  +             }
  +             return listener.getProperties();
  +     }
  +
  +     class OfficePropertiesListener implements POIFSReaderListener {
  +
  +             private HashMap properties = new HashMap();
  +
  +             public Map getProperties() {
  +                             return properties;
  +             }
  +
  +             public void processPOIFSReaderEvent(POIFSReaderEvent event) {
  +                     PropertySet ps = null;
  +                     try {
  +                             ps = PropertySetFactory.create(event.getStream());
  +                     } catch (NoPropertySetStreamException ex) {
  +                             return;
  +                     } catch (Exception ex) {
  +                             throw new RuntimeException("Property set stream \"" + 
event.getPath() + event.getName() + "\": " + ex);
  +                     }
  +                     String eventName = event.getName().trim();
  +                     final long sectionCount = ps.getSectionCount();
  +                     List sections = ps.getSections();
  +                     int nr = 0;
  +                     for (Iterator i = sections.iterator(); i.hasNext();) {
  +                             Section sec = (Section) i.next();
  +                             int propertyCount = sec.getPropertyCount();
  +                             Property[] props = sec.getProperties();
  +                             for (int i2 = 0; i2 < props.length; i2++) {
  +                                     Property p = props[i2];
  +                                     int id = p.getID();
  +                                     long type = p.getType();
  +                                     Object value = p.getValue();
  +                                     String key = eventName + "-" + nr + "-" + id; 
  +                                     if ( propertyMap.containsKey(key) ) {
  +                                             properties.put(propertyMap.get(key), 
value);
  +                                     }
  +                             }
  +                     }
  +             }
  +     }
  +
  +     public void configure(Configuration configuration) throws 
ConfigurationException {
  +        Enumeration instructions = configuration.getConfigurations("instruction");
  +        while (instructions.hasMoreElements()) {
  +            Configuration extract = (Configuration)instructions.nextElement();
  +            String property = extract.getAttribute("property");
  +            String id = extract.getAttribute("id");
  +                     propertyMap.put(id, property);
  +        }
  +     }
   }
  
  
  
  1.3.2.1   +13 -6     
jakarta-slide/src/share/org/apache/slide/extractor/AbstractContentExtractor.java
  
  Index: AbstractContentExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/AbstractContentExtractor.java,v
  retrieving revision 1.3
  retrieving revision 1.3.2.1
  diff -u -r1.3 -r1.3.2.1
  --- AbstractContentExtractor.java     28 Jul 2004 09:36:01 -0000      1.3
  +++ AbstractContentExtractor.java     29 Sep 2004 15:01:26 -0000      1.3.2.1
  @@ -28,15 +28,17 @@
   
   /**
    * The AbstractPropertyExtractor class
  - * 
    */
   public abstract class AbstractContentExtractor implements ContentExtractor {
  +
       private String contentType;
       private String uri;
  +    private String namespace;
   
  -    public AbstractContentExtractor(String uri, String contentType) {
  +    public AbstractContentExtractor(String uri, String contentType, String 
namespace) {
           this.contentType = contentType;
           this.uri = uri;
  +        this.namespace = namespace;
       }
   
       public abstract Reader extract(InputStream content) throws ExtractorException;
  @@ -48,4 +50,9 @@
       public String getUri() {
           return uri;
       }
  -}
  \ No newline at end of file
  +    
  +    public String getNamespace() {
  +        return namespace;
  +    }
  +
  +}
  
  
  
  1.2.2.1   +13 -6     
jakarta-slide/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
  
  Index: AbstractPropertyExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java,v
  retrieving revision 1.2
  retrieving revision 1.2.2.1
  diff -u -r1.2 -r1.2.2.1
  --- AbstractPropertyExtractor.java    28 Jul 2004 09:36:01 -0000      1.2
  +++ AbstractPropertyExtractor.java    29 Sep 2004 15:01:26 -0000      1.2.2.1
  @@ -28,15 +28,17 @@
   
   /**
    * The AbstractPropertyExtractor class
  - * 
    */
   public abstract class AbstractPropertyExtractor implements PropertyExtractor {
  +
       private String contentType;
       private String uri;
  +    private String namespace;
   
  -    public AbstractPropertyExtractor(String uri, String contentType) {
  +    public AbstractPropertyExtractor(String uri, String contentType, String 
namespace) {
           this.contentType = contentType;
           this.uri = uri;
  +        this.namespace = namespace;
       }
   
       public abstract Map extract(InputStream content) throws ExtractorException;
  @@ -48,4 +50,9 @@
       public String getUri() {
           return uri;
       }
  -}
  \ No newline at end of file
  +
  +    public String getNamespace() {
  +        return namespace;
  +    }
  +
  +}
  
  
  
  1.7.2.1   +6 -5      
jakarta-slide/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
  
  Index: SimpleXmlExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java,v
  retrieving revision 1.7
  retrieving revision 1.7.2.1
  diff -u -r1.7 -r1.7.2.1
  --- SimpleXmlExtractor.java   28 Jul 2004 09:36:01 -0000      1.7
  +++ SimpleXmlExtractor.java   29 Sep 2004 15:01:26 -0000      1.7.2.1
  @@ -42,10 +42,11 @@
    * 
    */
   public class SimpleXmlExtractor extends AbstractPropertyExtractor implements 
Configurable {
  +
       protected List instructions = new ArrayList();
   
  -    public SimpleXmlExtractor(String uri, String contentType) {
  -        super(uri, contentType);
  +    public SimpleXmlExtractor(String uri, String contentType, String namespace) {
  +        super(uri, contentType, namespace);
       }
   
       public Map extract(InputStream content) throws ExtractorException {
  
  
  
  1.1.2.1   +6 -6      
jakarta-slide/src/share/org/apache/slide/extractor/MSWordExtractor.java
  
  Index: MSWordExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/MSWordExtractor.java,v
  retrieving revision 1.1
  retrieving revision 1.1.2.1
  diff -u -r1.1 -r1.1.2.1
  --- MSWordExtractor.java      29 Jun 2004 08:10:57 -0000      1.1
  +++ MSWordExtractor.java      29 Sep 2004 15:01:26 -0000      1.1.2.1
  @@ -35,8 +35,8 @@
   
   public class MSWordExtractor extends AbstractContentExtractor {
   
  -    public MSWordExtractor(String uri, String contentType) {
  -        super(uri, contentType);
  +    public MSWordExtractor(String uri, String contentType, String namespace) {
  +        super(uri, contentType, namespace);
       }
   
       public Reader extract(InputStream content)  throws ExtractorException {
  @@ -57,7 +57,7 @@
           {
               FileInputStream in = new FileInputStream(args[0]);
   
  -            MSWordExtractor ex = new MSWordExtractor(null, null);
  +            MSWordExtractor ex = new MSWordExtractor(null, null, null);
   
               Reader reader = ex.extract(in);
   
  
  
  
  1.5.2.1   +6 -5      
jakarta-slide/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java
  
  Index: PropertyExtractorTrigger.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java,v
  retrieving revision 1.5
  retrieving revision 1.5.2.1
  diff -u -r1.5 -r1.5.2.1
  --- PropertyExtractorTrigger.java     28 Jul 2004 09:36:01 -0000      1.5
  +++ PropertyExtractorTrigger.java     29 Sep 2004 15:01:26 -0000      1.5.2.1
  @@ -45,11 +45,12 @@
   
       public void store(ContentEvent event) throws VetoException {
           try {
  +            String namespaceName = event.getNamespace() == null ? null : 
event.getNamespace().getName();
               NodeRevisionDescriptors descriptors = event.getRevisionDescriptors();
               NodeRevisionDescriptor descriptor = event.getRevisionDescriptor();
               NodeRevisionContent content = event.getRevisionContent();
               if ( content != null && descriptor != null ) {
  -                PropertyExtractor[] extractor = 
ExtractorManager.getInstance().getPropertyExtractors(descriptors, descriptor);
  +                PropertyExtractor[] extractor = 
ExtractorManager.getInstance().getPropertyExtractors(namespaceName, descriptors, 
descriptor);
                   for ( int i = 0; i < extractor.length; i++ ) {
                       Map extractedProperties = extractor[i].extract(new 
ByteArrayInputStream(content.getContentBytes()));
                       for ( Iterator j = extractedProperties.entrySet().iterator(); 
j.hasNext(); ) {
  
  
  
  No                   revision
  No                   revision
  1.5.2.2   +5 -5      
jakarta-slide/src/stores/org/apache/slide/index/TextContentIndexer.java
  
  Index: TextContentIndexer.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-slide/src/stores/org/apache/slide/index/TextContentIndexer.java,v
  retrieving revision 1.5.2.1
  retrieving revision 1.5.2.2
  diff -u -r1.5.2.1 -r1.5.2.2
  --- TextContentIndexer.java   13 Sep 2004 16:52:25 -0000      1.5.2.1
  +++ TextContentIndexer.java   29 Sep 2004 15:01:26 -0000      1.5.2.2
  @@ -142,7 +142,7 @@
            doc.add(Field.Text(CONTENT_TEXT, readContent(revisionDescriptor, 
revisionContent)));
   
            if ( revisionContent != null && revisionDescriptor != null ) {
  -            ContentExtractor[] extractor = 
ExtractorManager.getInstance().getContentExtractors(null, revisionDescriptor);
  +            ContentExtractor[] extractor = 
ExtractorManager.getInstance().getContentExtractors(uri.getNamespace().getName(), 
null, revisionDescriptor);
                    for ( int i = 0; i < extractor.length; i++ ) {
                         Reader reader = extractor[i].extract(new 
ByteArrayInputStream(revisionContent.getContentBytes()));
                         doc.add(Field.Text(CONTENT_TEXT, reader));
  @@ -217,7 +217,7 @@
               doc.add(Field.Text(CONTENT_TEXT, readContent(revisionDescriptor, 
revisionContent)));
   
               if ( revisionContent != null && revisionDescriptor != null ) {
  -                 ContentExtractor[] extractor = 
ExtractorManager.getInstance().getContentExtractors(null, revisionDescriptor);
  +                 ContentExtractor[] extractor = 
ExtractorManager.getInstance().getContentExtractors(uri.getNamespace().getName(), 
null, revisionDescriptor);
                    for ( int i = 0; i < extractor.length; i++ ) {
                         Reader reader = extractor[i].extract(new 
ByteArrayInputStream(revisionContent.getContentBytes()));
                         doc.add(Field.Text(CONTENT_TEXT, reader));
  
  
  
  No                   revision
  No                   revision
  1.48.2.1  +13 -8     jakarta-slide/src/share/org/apache/slide/common/Domain.java
  
  Index: Domain.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/common/Domain.java,v
  retrieving revision 1.48
  retrieving revision 1.48.2.1
  diff -u -r1.48 -r1.48.2.1
  --- Domain.java       28 Jul 2004 09:38:20 -0000      1.48
  +++ Domain.java       29 Sep 2004 15:01:26 -0000      1.48.2.1
  @@ -848,10 +848,15 @@
               }
               
               namespace.loadConfiguration(namespaceConfigurationDefinition);
  -            
  -            
  -            
  -            
  +
  +            try {
  +                Configuration namespaceExtractorsDefinition = 
  +                    configuration.getConfiguration("extractors");
  +                namespace.loadExtractors(namespaceExtractorsDefinition);
  +            } catch (ConfigurationException e) {
  +                // ignore
  +            }
  +
               // preparation to add services, please ignore now
               try {
                   Configuration services = configuration.getConfiguration("services");
  
  
  
  1.65.2.1  +39 -4     jakarta-slide/src/share/org/apache/slide/common/Namespace.java
  
  Index: Namespace.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/common/Namespace.java,v
  retrieving revision 1.65
  retrieving revision 1.65.2.1
  diff -u -r1.65 -r1.65.2.1
  --- Namespace.java    5 Aug 2004 15:42:32 -0000       1.65
  +++ Namespace.java    29 Sep 2004 15:01:26 -0000      1.65.2.1
  @@ -23,6 +23,7 @@
   
   package org.apache.slide.common;
   
  +import java.lang.reflect.Constructor;
   import java.lang.reflect.Method;
   import java.util.Enumeration;
   import java.util.Hashtable;
  @@ -32,6 +33,8 @@
   import javax.transaction.TransactionManager;
   import org.apache.slide.authenticate.CredentialsToken;
   import org.apache.slide.content.ContentInterceptor;
  +import org.apache.slide.extractor.Extractor;
  +import org.apache.slide.extractor.ExtractorManager;
   import org.apache.slide.store.ContentStore;
   import org.apache.slide.store.DefaultIndexer;
   import org.apache.slide.store.IndexStore;
  @@ -45,6 +48,7 @@
   import org.apache.slide.structure.ObjectAlreadyExistsException;
   import org.apache.slide.structure.SubjectNode;
   import org.apache.slide.transaction.SlideTransactionManager;
  +import org.apache.slide.util.conf.Configurable;
   import org.apache.slide.util.conf.Configuration;
   import org.apache.slide.util.conf.ConfigurationException;
   import org.apache.slide.util.logger.Logger;
  @@ -888,6 +892,37 @@
           
       }
       
  +    
  +    void loadExtractors(Configuration namespaceExtractorsDefinition) 
  +     throws SlideException {
  +        
  +        getLogger().log("Loading namespace " + getName() + " 
extractors",LOG_CHANNEL,Logger.INFO);
  +        
  +        Enumeration extractorConfigs = 
namespaceExtractorsDefinition.getConfigurations("extractor");
  +        while (extractorConfigs.hasMoreElements()) {
  +            Configuration extractorConfig = (Configuration) 
extractorConfigs.nextElement();
  +            String classname = extractorConfig.getAttribute("classname");
  +            String uri = extractorConfig.getAttribute("uri", null);
  +            String contentType = extractorConfig.getAttribute("content-type", null);
  +            String namespace = getName();
  +            try {
  +                Class extractorClass = Class.forName(classname);
  +                Extractor extractor = null;
  +                Constructor extractorConstructor = 
extractorClass.getConstructor(new Class[] { String.class, String.class, String.class } 
);
  +                extractor = (Extractor)extractorConstructor.newInstance(new 
String[] { uri, contentType, namespace });
  +                if ( extractor instanceof Configurable ) {
  +                    
((Configurable)extractor).configure(extractorConfig.getConfiguration("configuration"));
  +                }
  +                ExtractorManager.getInstance().addExtractor(extractor);
  +            } catch (ClassCastException e) {
  +                throw new ConfigurationException("Extractor '"+classname+"' is not 
of type Extractor", namespaceExtractorsDefinition);
  +            } catch (ConfigurationException e) {
  +                throw e;
  +            } catch (Exception e) {
  +                throw new ConfigurationException("Extractor '"+classname+"' could 
not be loaded", namespaceExtractorsDefinition);
  +            }
  +        }
  +    }
       
       // -------------------------------------------------------- Private Methods
       
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to