tdraier     2005/11/30 20:24:17 CET

  Modified files:
    core/src/java/org/jahia/hibernate/dao JahiaIndexingJobDAO.java 
    core/src/java/org/jahia/services/fileextraction 
                                                    FileExtractor.java 
                                                    
JahiaFileExtractionService.java 
                                                    
JahiaFileExtractionServiceImpl.java 
                                                    JahiaMSExcelExtractor.java 
                                                    
JahiaMSPowerPointExtractor.java 
                                                    JahiaMSWordExtractor.java 
                                                    JahiaOfficeExtractor.java 
                                                    MP3Extractor.java 
                                                    PDFExtractor.java 
                                                    TextExtractor.java 
    core/src/java/org/jahia/services/search/lucene/jdbc 
                                                        
LuceneJDBCSearchIndexer.java 
    core/src/java/org/jahia/services/search/lucene 
                                                   
AbstractLuceneSearchIndexer.java 
    core/src/java/org/jahia/services/search/valves 
                                                   
FieldSearchIndexProcessValveImpl.java 
                                                   FileFieldIndexingThread.java 
    core/src/java/org/jahia/services/search 
                                            JahiaSearchBaseService.java 
  Log:
  few optimizations - removed useless charset detection, check existence of 
indexing jobs before enqueuing
  
  Revision  Changes    Path
  1.4       +26 -1     
jahia/core/src/java/org/jahia/hibernate/dao/JahiaIndexingJobDAO.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/hibernate/dao/JahiaIndexingJobDAO.java.diff?r1=1.3&r2=1.4&f=h
  1.2       +7 -33     
jahia/core/src/java/org/jahia/services/fileextraction/FileExtractor.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/fileextraction/FileExtractor.java.diff?r1=1.1&r2=1.2&f=h
  1.3       +9 -27     
jahia/core/src/java/org/jahia/services/fileextraction/JahiaFileExtractionService.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/fileextraction/JahiaFileExtractionService.java.diff?r1=1.2&r2=1.3&f=h
  1.6       +10 -33    
jahia/core/src/java/org/jahia/services/fileextraction/JahiaFileExtractionServiceImpl.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/fileextraction/JahiaFileExtractionServiceImpl.java.diff?r1=1.5&r2=1.6&f=h
  1.2       +1 -18     
jahia/core/src/java/org/jahia/services/fileextraction/JahiaMSExcelExtractor.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/fileextraction/JahiaMSExcelExtractor.java.diff?r1=1.1&r2=1.2&f=h
  1.2       +1 -18     
jahia/core/src/java/org/jahia/services/fileextraction/JahiaMSPowerPointExtractor.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/fileextraction/JahiaMSPowerPointExtractor.java.diff?r1=1.1&r2=1.2&f=h
  1.2       +1 -18     
jahia/core/src/java/org/jahia/services/fileextraction/JahiaMSWordExtractor.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/fileextraction/JahiaMSWordExtractor.java.diff?r1=1.1&r2=1.2&f=h
  1.5       +10 -39    
jahia/core/src/java/org/jahia/services/fileextraction/JahiaOfficeExtractor.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/fileextraction/JahiaOfficeExtractor.java.diff?r1=1.4&r2=1.5&f=h
  1.4       +4 -37     
jahia/core/src/java/org/jahia/services/fileextraction/MP3Extractor.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/fileextraction/MP3Extractor.java.diff?r1=1.3&r2=1.4&f=h
  1.5       +13 -70    
jahia/core/src/java/org/jahia/services/fileextraction/PDFExtractor.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/fileextraction/PDFExtractor.java.diff?r1=1.4&r2=1.5&f=h
  1.2       +32 -49    
jahia/core/src/java/org/jahia/services/fileextraction/TextExtractor.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/fileextraction/TextExtractor.java.diff?r1=1.1&r2=1.2&f=h
  1.33      +8 -5      
jahia/core/src/java/org/jahia/services/search/JahiaSearchBaseService.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/search/JahiaSearchBaseService.java.diff?r1=1.32&r2=1.33&f=h
  1.9       +2 -2      
jahia/core/src/java/org/jahia/services/search/lucene/AbstractLuceneSearchIndexer.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/search/lucene/AbstractLuceneSearchIndexer.java.diff?r1=1.8&r2=1.9&f=h
  1.3       +1 -1      
jahia/core/src/java/org/jahia/services/search/lucene/jdbc/LuceneJDBCSearchIndexer.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/search/lucene/jdbc/LuceneJDBCSearchIndexer.java.diff?r1=1.2&r2=1.3&f=h
  1.3       +1 -7      
jahia/core/src/java/org/jahia/services/search/valves/FieldSearchIndexProcessValveImpl.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/search/valves/FieldSearchIndexProcessValveImpl.java.diff?r1=1.2&r2=1.3&f=h
  1.3       +2 -8      
jahia/core/src/java/org/jahia/services/search/valves/FileFieldIndexingThread.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/search/valves/FileFieldIndexingThread.java.diff?r1=1.2&r2=1.3&f=h
  
  
  
  Index: JahiaIndexingJobDAO.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/hibernate/dao/JahiaIndexingJobDAO.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- JahiaIndexingJobDAO.java  30 Nov 2005 16:23:56 -0000      1.3
  +++ JahiaIndexingJobDAO.java  30 Nov 2005 19:24:15 -0000      1.4
  @@ -3,7 +3,9 @@
    */
   package org.jahia.hibernate.dao;
   
  -import org.jahia.hibernate.model.indexingjob.JahiaIndexingJob;
  +import org.jahia.hibernate.model.indexingjob.*;
  +import org.jahia.services.search.indexingscheduler.IndexingJob;
  +import org.jahia.services.search.indexingscheduler.impl.*;
   import org.springframework.orm.hibernate3.HibernateTemplate;
   import org.springframework.orm.hibernate3.support.HibernateDaoSupport;
   
  @@ -67,6 +69,29 @@
   
       public synchronized void save(JahiaIndexingJob job) {
           HibernateTemplate template = getHibernateTemplate();
  +
  +        List old = null;
  +        if ( job instanceof JahiaFieldIndexingJob ){
  +            String query = "from JahiaFieldIndexingJob as indexingJob where 
indexingJob.fieldId = ?";
  +            old = template.find(query, 
((JahiaFieldIndexingJob)job).getFieldId());
  +        } else if ( job instanceof JahiaContainerIndexingJob ) {
  +            String query = "from JahiaContainerIndexingJob as indexingJob 
where indexingJob.ctnId = ?";
  +            old = template.find(query, 
((JahiaContainerIndexingJob)job).getCtnId());
  +        } else if ( job instanceof JahiaContainerListIndexingJob ) {
  +            String query = "from JahiaContainerListIndexingJob as 
indexingJob where indexingJob.ctnListId = ?";
  +            old = template.find(query, 
((JahiaContainerListIndexingJob)job).getCtnListId());
  +        } else if ( job instanceof JahiaPageIndexingJob ) {
  +            String query = "from JahiaPageIndexingJob as indexingJob where 
indexingJob.pageId = ?";
  +            old = template.find(query, 
((JahiaPageIndexingJob)job).getPageId());
  +        } else if ( job instanceof JahiaRemoveFromIndexJob ){
  +            String query = "from JahiaRemoveFromIndexJob as indexingJob 
where indexingJob.keyFieldName = ? and indexingJob.keyFieldValue = ?";
  +            old = template.find(query, new Object[] 
{((JahiaRemoveFromIndexJob)job).getKeyFieldName(), 
((JahiaRemoveFromIndexJob)job).getKeyFieldValue()});
  +        }
  +
  +        if (old != null && !old.isEmpty()) {
  +            job.setId(((JahiaIndexingJob)old.iterator().next()).getId());
  +        }
  +
           template.merge(job);
       }
   }
  
  
  
  Index: FileExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/fileextraction/FileExtractor.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- FileExtractor.java        14 Feb 2005 14:22:39 -0000      1.1
  +++ FileExtractor.java        30 Nov 2005 19:24:15 -0000      1.2
  @@ -5,34 +5,20 @@
   
   public interface FileExtractor {
   
  +
      /**
  -    *
  -    * @param path String
  +   *
  +   * @param path String
       * @param lastModified long
       * @param fileStream InputStream
       * @throws Exception
       * @return String
  -    */
  -   public abstract ExtractedDocument getExtractedDocument(
  -                                                  String path,
  -                                             long lastModified,
  -                                             InputStream fileStream)
  -   throws Exception;
  -
  -   /**
  -   *
  -   * @param path String
  -   * @param lastModified long
  -   * @param fileStream InputStream
  -   * @param charSet String
  -   * @throws Exception
  -   * @return String
      */
     public abstract ExtractedDocument getExtractedDocument(
  -                                            String path,
  -                                            long lastModified,
  -                                            InputStream fileStream,
  -                                            String charSet)
  +           String path,
  +           long lastModified,
  +           InputStream fileStream
  +   )
     throws Exception;
      
       /**
  @@ -48,16 +34,4 @@
                                                 InputStream fileStream)
       throws Exception;
   
  -    /**
  -     *
  -     * @param path String
  -     * @param lastModified long
  -     * @param fileStream InputStream
  -     * @return String
  -     */
  -    public abstract String getContentAsString(String path,
  -                                              long lastModified,
  -                                              InputStream fileStream,
  -                                              String charSet) throws 
Exception;
  -
   }
  
  
  
  Index: JahiaFileExtractionService.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/fileextraction/JahiaFileExtractionService.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- JahiaFileExtractionService.java   30 Sep 2005 15:34:01 -0000      1.2
  +++ JahiaFileExtractionService.java   30 Nov 2005 19:24:15 -0000      1.3
  @@ -36,22 +36,6 @@
       public abstract FileExtractor getFileExtractor(String contentType)
           throws JahiaException;
   
  -    /**
  -     *  
  -     * @param contentType
  -     * @param cacheKey , a unique key used to cache the extraction ( 
serializing on disk )
  -     * @param lastModified
  -     * @param allowCache if true, use a cached extractio if any
  -     * @param fileStream
  -     * @return
  -     * @throws Exception
  -     */
  -    public abstract ExtractedDocument getExtractedDocument( String 
contentType,
  -                                                   String cacheKey,
  -                                                   long lastModified,
  -                                                   boolean allowCache,
  -                                                   InputStream fileStream)
  -    throws Exception;
   
       /**
        *  
  @@ -60,29 +44,27 @@
        * @param lastModified
        * @param allowCache if true, use a cached extractio if any
        * @param fileStream
  -     * @param charSet
        * @return
        * @throws Exception
        */
  -    public abstract ExtractedDocument getExtractedDocument( String 
contentType,
  -                                                   String cacheKey,
  -                                                   long lastModified,
  -                                                   boolean allowCache,
  -                                                   InputStream fileStream,
  -                                                   String charSet)
  +    public abstract ExtractedDocument getExtractedDocument(String 
contentType,
  +                                                           String cacheKey,
  +                                                           long lastModified,
  +                                                           boolean 
allowCache,
  +                                                           InputStream 
fileStream
  +    )
       throws Exception;
   
       /**
        *
        * @param contentType
        * @param cacheKey , a unique key used to cache the extraction ( 
serializing on disk )
  -     * @param charSet
        * @return
        * @throws Exception
        */
  -    public abstract ExtractedDocument getAlreadyExtractedDocument(  String 
contentType,
  -                                                                    String 
cacheKey,
  -                                                                    String 
charSet )
  +    public abstract ExtractedDocument getAlreadyExtractedDocument(String 
contentType,
  +                                                                  String 
cacheKey
  +    )
       throws Exception;
   
   
  
  
  
  Index: JahiaFileExtractionServiceImpl.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/fileextraction/JahiaFileExtractionServiceImpl.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- JahiaFileExtractionServiceImpl.java       30 Sep 2005 15:34:01 -0000      
1.5
  +++ JahiaFileExtractionServiceImpl.java       30 Nov 2005 19:24:15 -0000      
1.6
  @@ -21,11 +21,9 @@
   import java.io.ObjectOutput;
   import java.io.ObjectOutputStream;
   import java.util.Properties;
  -import java.util.HashMap;
   
   import org.jahia.exceptions.JahiaException;
   import org.jahia.exceptions.JahiaInitializationException;
  -import org.jahia.settings.SettingsBean;
   import org.jahia.utils.JahiaTools;
   
   /**
  @@ -126,39 +124,19 @@
       /**
        *  
        * @param contentType
  -     * @param cacheKey a unique key used to cache the extraction ( 
serializing on disk )
  -     * @param lastModified
  -     * @param allowCache if true, use a cached extractio if any
  -     * @param fileStream
  -     * @return
  -     * @throws Exception
  -     */
  -    public ExtractedDocument getExtractedDocument( String contentType,
  -                                                   String cacheKey,
  -                                                   long lastModified,
  -                                                   boolean allowCache,
  -                                                   InputStream fileStream)
  -    throws Exception {
  -        return getExtractedDocument(contentType, cacheKey,lastModified, 
allowCache, fileStream, null);
  -    }
  -
  -    /**
  -     *  
  -     * @param contentType
        * @param cacheKey , a unique key used to cache the extraction ( 
serializing on disk )
        * @param lastModified
        * @param allowCache if true, use a cached extractio if any
        * @param fileStream
  -     * @param charSet
        * @return
        * @throws Exception
        */
  -    public ExtractedDocument getExtractedDocument( String contentType,
  -                                                   String cacheKey,
  -                                                   long lastModified,
  -                                                   boolean allowCache,
  -                                                   InputStream fileStream,
  -                                                   String charSet )
  +    public ExtractedDocument getExtractedDocument(String contentType,
  +                                                  String cacheKey,
  +                                                  long lastModified,
  +                                                  boolean allowCache,
  +                                                  InputStream fileStream
  +    )
       throws Exception {
   
           ExtractedDocument extDoc = null;
  @@ -194,7 +172,7 @@
                       }
                   }
                   if ( extDoc == null ){
  -                    extDoc = 
extractor.getExtractedDocument(cacheKey,lastModified,fileStream,charSet);
  +                    extDoc = 
extractor.getExtractedDocument(cacheKey,lastModified,fileStream);
                       if ( extDoc == null ){
                           extDoc = new ExtractedDocumentImpl();
                       }
  @@ -221,13 +199,12 @@
        *
        * @param contentType
        * @param cacheKey , a unique key used to cache the extraction ( 
serializing on disk )
  -     * @param charSet
        * @return
        * @throws Exception
        */
  -    public ExtractedDocument getAlreadyExtractedDocument(   String 
contentType,
  -                                                            String cacheKey,
  -                                                            String charSet )
  +    public ExtractedDocument getAlreadyExtractedDocument(String contentType,
  +                                                         String cacheKey
  +    )
       throws Exception {
   
           ExtractedDocument extDoc = null;
  
  
  
  Index: JahiaMSExcelExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/fileextraction/JahiaMSExcelExtractor.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- JahiaMSExcelExtractor.java        14 Feb 2005 14:22:39 -0000      1.1
  +++ JahiaMSExcelExtractor.java        30 Nov 2005 19:24:15 -0000      1.2
  @@ -26,22 +26,6 @@
   
       public JahiaMSExcelExtractor(){
       }
  -   
  -    /**
  -     * This method returns all the text of an HDF file.
  -     *
  -     * @param path String
  -     * @param lastModified long
  -     * @param fileStream InputStream
  -     * @throws IOException thrown if there was an error while parsing the
  -     * file format, notably if the file is an RTF file instead of a HDF file.
  -     * @return String
  -     */
  -    public String getContentAsString(String path, long lastModified,
  -                                     InputStream fileStream)
  -    throws IOException {
  -        return getContentAsString(path, lastModified, fileStream, null);
  -    }
   
       /**
        * This method returns all the text of an HDF file.
  @@ -49,12 +33,11 @@
        * @param path String
        * @param lastModified long
        * @param fileStream InputStream
  -     * @param charSet String
        * @throws IOException thrown if there was an error while parsing the 
file
        * @return String
        */
       public String getContentAsString(String path, long lastModified,
  -                                     InputStream fileStream, String charSet)
  +                                     InputStream fileStream)
       throws IOException {
        MSExcelExtractor ex = new MSExcelExtractor("","","");
           try {
  
  
  
  Index: JahiaMSPowerPointExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/fileextraction/JahiaMSPowerPointExtractor.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- JahiaMSPowerPointExtractor.java   14 Feb 2005 14:22:39 -0000      1.1
  +++ JahiaMSPowerPointExtractor.java   30 Nov 2005 19:24:15 -0000      1.2
  @@ -34,28 +34,11 @@
        * @param path String
        * @param lastModified long
        * @param fileStream InputStream
  -     * @throws IOException thrown if there was an error while parsing the
  -     * file format, notably if the file is an RTF file instead of a HDF file.
  -     * @return String
  -     */
  -    public String getContentAsString(String path, long lastModified,
  -                                     InputStream fileStream)
  -    throws IOException {
  -        return getContentAsString(path, lastModified, fileStream, null);
  -    }
  -
  -    /**
  -     * This method returns all the text of an HDF file.
  -     *
  -     * @param path String
  -     * @param lastModified long
  -     * @param fileStream InputStream
  -     * @param charSet String
        * @throws IOException thrown if there was an error while parsing the 
file
        * @return String
        */
       public String getContentAsString(String path, long lastModified,
  -                                     InputStream fileStream, String charSet)
  +                                     InputStream fileStream)
       throws IOException {
        MSPowerPointExtractor ex = new MSPowerPointExtractor("","","");
           try {
  
  
  
  Index: JahiaMSWordExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/fileextraction/JahiaMSWordExtractor.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- JahiaMSWordExtractor.java 14 Feb 2005 14:22:39 -0000      1.1
  +++ JahiaMSWordExtractor.java 30 Nov 2005 19:24:15 -0000      1.2
  @@ -33,28 +33,11 @@
        * @param path String
        * @param lastModified long
        * @param fileStream InputStream
  -     * @throws IOException thrown if there was an error while parsing the
  -     * file format, notably if the file is an RTF file instead of a HDF file.
  -     * @return String
  -     */
  -    public String getContentAsString(String path, long lastModified,
  -                                     InputStream fileStream)
  -    throws IOException {
  -        return getContentAsString(path, lastModified, fileStream, null);
  -    }
  -
  -    /**
  -     * This method returns all the text of an HDF file.
  -     *
  -     * @param path String
  -     * @param lastModified long
  -     * @param fileStream InputStream
  -     * @param charSet String
        * @throws IOException thrown if there was an error while parsing the 
file
        * @return String
        */
       public String getContentAsString(String path, long lastModified,
  -                                     InputStream fileStream, String charSet)
  +                                     InputStream fileStream)
       throws IOException {
        MSWordExtractor ex = new MSWordExtractor("","","");
           try {
  
  
  
  Index: JahiaOfficeExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/fileextraction/JahiaOfficeExtractor.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- JahiaOfficeExtractor.java 11 Oct 2005 14:17:31 -0000      1.4
  +++ JahiaOfficeExtractor.java 30 Nov 2005 19:24:15 -0000      1.5
  @@ -9,7 +9,6 @@
   
   import org.apache.commons.io.CopyUtils;
   import org.apache.slide.extractor.OfficeExtractor;
  -import org.apache.slide.common.Domain;
   import org.apache.slide.common.PropertyName;
   import org.jahia.services.sites.JahiaSitesSlideService;
   
  @@ -20,36 +19,20 @@
           org.apache.log4j.Logger.getLogger (JahiaOfficeExtractor.class);
           
   
  +
      /**
  -    *
  -    * @param path String
  +   *
  +   * @param path String
       * @param lastModified long
       * @param fileStream InputStream
       * @throws Exception
       * @return String
  -    */
  -   public ExtractedDocument getExtractedDocument(
  -                                                  String path,
  -                                             long lastModified,
  -                                             InputStream fileStream)
  -   throws Exception{
  -         return this.getExtractedDocument(path,lastModified,fileStream,null);
  -   }
  -
  -   /**
  -   *
  -   * @param path String
  -   * @param lastModified long
  -   * @param fileStream InputStream
  -   * @param charSet String
  -   * @throws Exception
  -   * @return String
      */
      public ExtractedDocument getExtractedDocument(
  -                                            String path,
  -                                            long lastModified,
  -                                            InputStream fileStream,
  -                                            String charSet)
  +           String path,
  +           long lastModified,
  +           InputStream fileStream
  +   )
      throws Exception{
   
          //create a tmp output stream with the size of the content.
  @@ -58,23 +41,11 @@
          out.flush();
          byte[] contents = out.toByteArray();            
          ExtractedDocument extDoc = this.getPropertiesExtractedDocument(new 
ByteArrayInputStream(contents));
  -       extDoc.setContent(this.getContentAsString(path,lastModified,new 
ByteArrayInputStream(contents),charSet));
  +       extDoc.setContent(this.getContentAsString(path,lastModified,new 
ByteArrayInputStream(contents)));
       return extDoc;
      
      }
      
  -    /**
  -     *
  -     * @param path String
  -     * @param lastModified long
  -     * @param fileStream InputStream
  -     * @throws Exception
  -     * @return String
  -     */
  -    public abstract String getContentAsString(String path,
  -                                              long lastModified,
  -                                              InputStream fileStream)
  -    throws Exception;
   
       /**
        *
  @@ -85,8 +56,8 @@
        */
       public abstract String getContentAsString(String path,
                                                 long lastModified,
  -                                              InputStream fileStream,
  -                                              String charSet) throws 
Exception;
  +                                              InputStream fileStream
  +    ) throws Exception;
   
       public Map extract(InputStream content){
           Map map = null;
  
  
  
  Index: MP3Extractor.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/fileextraction/MP3Extractor.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- MP3Extractor.java 25 Feb 2005 12:50:45 -0000      1.3
  +++ MP3Extractor.java 30 Nov 2005 19:24:15 -0000      1.4
  @@ -39,30 +39,13 @@
        * @param path String
        * @param lastModified long
        * @param fileStream InputStream
  -     * @throws Exception
  -     * @return String
  -     * @todo Implement this org.jahia.services.fileextraction.FileExtractor
  -     *   method
  -     */
  -    public String getContentAsString (String path, long lastModified,
  -                                      InputStream fileStream)
  -        throws Exception {
  -        return getContentAsString(path, lastModified, fileStream, null);
  -    }
  -
  -    /**
  -     *
  -     * @param path String
  -     * @param lastModified long
  -     * @param fileStream InputStream
  -     * @param charSet String
        * @return String
        * @throws Exception
        * @todo Implement this org.jahia.services.fileextraction.FileExtractor
        *   method
        */
  -    public String getContentAsString (String path, long lastModified,
  -                                      InputStream fileStream, String charSet)
  +    public String getContentAsString(String path, long lastModified,
  +                                     InputStream fileStream)
           throws Exception {
   
           StringBuffer contentBuf = new StringBuffer();
  @@ -106,35 +89,19 @@
           return contentBuf.toString();
       }
   
  -    /**
  -     *
  -     * @param path String
  -     * @param lastModified long
  -     * @param fileStream InputStream
  -     * @throws Exception
  -     * @return String
  -     * @todo Implement this org.jahia.services.fileextraction.FileExtractor
  -     *   method
  -     */
  -    public ExtractedDocument getExtractedDocument (String path,
  -        long lastModified, InputStream fileStream)
  -        throws Exception {
  -        return getExtractedDocument(path, lastModified, fileStream, null);
  -    }
   
       /**
        *
        * @param path String
        * @param lastModified long
        * @param fileStream InputStream
  -     * @param charSet String
        * @throws Exception
        * @return String
        * @todo Implement this org.jahia.services.fileextraction.FileExtractor
        *   method
        */
  -    public ExtractedDocument getExtractedDocument (String path,
  -        long lastModified, InputStream fileStream, String charSet)
  +    public ExtractedDocument getExtractedDocument(String path,
  +                                                  long lastModified, 
InputStream fileStream)
           throws Exception {
   
           ExtractedDocumentImpl extDoc = new ExtractedDocumentImpl();
  
  
  
  Index: PDFExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/fileextraction/PDFExtractor.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- PDFExtractor.java 8 Apr 2005 12:48:40 -0000       1.4
  +++ PDFExtractor.java 30 Nov 2005 19:24:15 -0000      1.5
  @@ -6,13 +6,8 @@
   import org.jahia.utils.*;
   import org.jahia.services.sites.JahiaSitesSlideService;
   import org.pdfbox.cos.COSDictionary;
  -import org.pdfbox.cos.COSDocument;
   import org.pdfbox.cos.COSName;
   import org.pdfbox.cos.COSString;
  -import org.pdfbox.encryption.DocumentEncryption;
  -import org.pdfbox.exceptions.CryptographyException;
  -import org.pdfbox.exceptions.InvalidPasswordException;
  -import org.pdfbox.pdfparser.PDFParser;
   import org.pdfbox.util.PDFTextStripper;
   import org.pdfbox.util.*;
   import org.pdfbox.pdmodel.PDDocument;
  @@ -52,10 +47,10 @@
        * @return String
        */
       public synchronized ExtractedDocument getExtractedDocument(
  -                                            String path,
  -                                            long lastModified,
  -                                            InputStream fileStream,
  -                                            String charSet)
  +            String path,
  +            long lastModified,
  +            InputStream fileStream
  +    )
       throws Exception{
   
           ExtractedDocumentImpl extDoc = new ExtractedDocumentImpl();
  @@ -72,22 +67,13 @@
               }
   
               //create a tmp output stream with the size of the content.
  -            ByteArrayOutputStream out = new ByteArrayOutputStream();
  -            OutputStreamWriter writer = null;
  -            if ( charSet != null ) {
  -                writer = new OutputStreamWriter(out,charSet);
  -            } else {
  -                writer = new OutputStreamWriter(out);
  -            }
  +            StringWriter writer = new StringWriter();
  +
               PDFTextStripper stripper = new PDFTextStripper();
               stripper.writeText(pdfDocument, writer);
               writer.close();
   
  -            String content = out.toString(charSet);
  -            if ( content == null ){
  -                content = "";
  -            }
  -            extDoc.setContent(content);
  +            extDoc.setContent(writer.toString());
   
               PDDocumentInformation info = 
pdfDocument.getDocumentInformation();
               /*
  @@ -168,40 +154,9 @@
        * @throws Exception
        * @return String
        */
  -    public synchronized ExtractedDocument getExtractedDocument(
  -                                             String path,
  -                                             long lastModified,
  -                                             InputStream fileStream)
  -    throws Exception{
  -        return this.getExtractedDocument(path, lastModified, fileStream, 
null);
  -    }
  -
  -    /**
  -     *
  -     * @param path String
  -     * @param lastModified long
  -     * @param fileStream InputStream
  -     * @throws Exception
  -     * @return String
  -     */
       public String getContentAsString(String path, long lastModified,
  -                                     InputStream fileStream)
  -    throws Exception {
  -       return getContentAsString(path, lastModified, fileStream, null);
  -    }
  -
  -    /**
  -     *
  -     * @param path String
  -     * @param lastModified long
  -     * @param fileStream InputStream
  -     * @param charSet String
  -     * @throws Exception
  -     * @return String
  -     */
  -    public String getContentAsString(String path, long lastModified,
  -                                     InputStream fileStream,
  -                                     String charSet) throws Exception {
  +                                     InputStream fileStream
  +    ) throws Exception {
           this.path = path;
           this.lastModifed = lastModified;
           String strVal = null;
  @@ -210,7 +165,7 @@
                Reader pdfReader = null;
                try {
                    long startTime = System.currentTimeMillis();
  -                 pdfReader = this.getPDFReader(fileStream, charSet);
  +                 pdfReader = this.getPDFReader(fileStream);
                    long elapsedTime = System.currentTimeMillis() - startTime;
                    logger.info("Finished pdf extraction with PDFBox in " +
                                elapsedTime + "ms.");
  @@ -238,12 +193,7 @@
            return strVal;
       }
   
  -    public Reader getPDFReader(InputStream fileStream) throws IOException {
  -        return getPDFReader(fileStream, null);
  -    }
  -
  -    public Reader getPDFReader(InputStream fileStream,
  -                               String charSet) throws IOException
  +    public Reader getPDFReader(InputStream fileStream) throws IOException
       {
           Reader reader = null;
           PDDocument pdfDocument = null;
  @@ -254,18 +204,11 @@
                   pdfDocument.decrypt("");
               }
               //create a tmp output stream with the size of the content.
  -            ByteArrayOutputStream out = new ByteArrayOutputStream();
  -            OutputStreamWriter writer = new OutputStreamWriter(out);
  +            StringWriter writer = new StringWriter();
               PDFTextStripper stripper = new PDFTextStripper();
               stripper.writeText(pdfDocument, writer);
               writer.close();
  -            byte[] contents = out.toByteArray();
  -            if ( charSet != null ){
  -                reader = new InputStreamReader(new 
ByteArrayInputStream(contents),
  -                                             charSet);
  -            } else {
  -                reader = new InputStreamReader(new 
ByteArrayInputStream(contents));
  -            }
  +            reader = new StringReader(writer.toString());
           }
           catch( Throwable t )
           {
  
  
  
  Index: TextExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/fileextraction/TextExtractor.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- TextExtractor.java        14 Feb 2005 14:22:39 -0000      1.1
  +++ TextExtractor.java        30 Nov 2005 19:24:15 -0000      1.2
  @@ -3,8 +3,11 @@
   import java.io.IOException;
   import java.io.InputStream;
   import java.io.InputStreamReader;
  +import java.io.ByteArrayOutputStream;
   
   import org.jahia.utils.FileUtils;
  +import org.mozilla.intl.chardet.nsDetector;
  +import org.mozilla.intl.chardet.nsPSMDetector;
   
   
   /**
  @@ -27,39 +30,22 @@
       /**
       *
       * @param path String
  -    * @param lastModified long
  -    * @param fileStream InputStream
  -    * @throws Exception
  -    * @return String
  -    */
  -   public ExtractedDocument getExtractedDocument(
  -                                             String path,
  -                                             long lastModified,
  -                                             InputStream fileStream)
  -   throws Exception{
  -         return getExtractedDocument(path,lastModified,fileStream,null);
  -   }
  -
  -    /**
  -    *
  -    * @param path String
  -    * @param lastModified long
  -    * @param fileStream InputStream
  -    * @param charSet String
  -    * @throws Exception
  -    * @return String
  +     * @param lastModified long
  +     * @param fileStream InputStream
  +     * @throws Exception
  +     * @return String
       */
      public ExtractedDocument getExtractedDocument(
  -                                             String path,
  -                                             long lastModified,
  -                                             InputStream fileStream,
  -                                             String charSet)
  +            String path,
  +            long lastModified,
  +            InputStream fileStream
  +    )
      throws Exception{
  -        ExtractedDocumentImpl extDoc = new ExtractedDocumentImpl();
  -       
extDoc.setContent(this.getContentAsString(path,lastModified,fileStream,charSet));
  +          ExtractedDocumentImpl extDoc = new ExtractedDocumentImpl();
  +       
extDoc.setContent(this.getContentAsString(path,lastModified,fileStream));
          return extDoc;
      }
  -    
  +
       /**
        * This method returns all the text of an HDF file.
        *
  @@ -73,29 +59,26 @@
       public String getContentAsString(String path, long lastModified,
                                        InputStream fileStream)
       throws IOException {
  -        return getContentAsString(path, lastModified, fileStream, null);
  -    }
   
  -    /**
  -     * This method returns all the text of an HDF file.
  -     *
  -     * @param path String
  -     * @param lastModified long
  -     * @param fileStream InputStream
  -     * @param charSet String
  -     * @throws IOException thrown if there was an error while parsing the
  -     * file format, notably if the file is an RTF file instead of a HDF file.
  -     * @return String
  -     */
  -    public String getContentAsString(String path, long lastModified,
  -                                     InputStream fileStream, String charSet)
  -    throws IOException {
  -        InputStreamReader reader = null;
  -        if ( charSet != null ){
  -            reader = new InputStreamReader(fileStream, charSet);
  +
  +        ByteArrayOutputStream baos = new ByteArrayOutputStream();
  +        byte[] b = new byte[1024];
  +        int l = 0;
  +        while ((l = fileStream.read(b)) > 0) {
  +            baos.write(b,0,l);
  +        }
  +        nsDetector det = new nsDetector(nsPSMDetector.ALL);
  +        byte[] bytes = baos.toByteArray();
  +        det.Init(null);
  +        det.DoIt(bytes, bytes.length, true);
  +        det.DataEnd();
  +
  +        // charset detection does not seem to work very well
  +        String[]charSets = det.getProbableCharsets();
  +        if (charSets.length>0) {
  +            return new String(bytes,charSets[0]);
           } else {
  -            reader = new InputStreamReader(fileStream);
  +            return new String(bytes);
           }
  -        return FileUtils.readerToString(reader);
       }
   }
  
  
  
  Index: LuceneJDBCSearchIndexer.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/search/lucene/jdbc/LuceneJDBCSearchIndexer.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- LuceneJDBCSearchIndexer.java      27 Apr 2005 13:37:58 -0000      1.2
  +++ LuceneJDBCSearchIndexer.java      30 Nov 2005 19:24:16 -0000      1.3
  @@ -26,7 +26,7 @@
   /**
    * Created by IntelliJ IDEA.
    * User: hollis
  - * Date: 15 févr. 2005
  + * Date: 15 f�vr. 2005
    * Time: 13:05:57
    * To change this template use File | Settings | File Templates.
    */
  
  
  
  Index: AbstractLuceneSearchIndexer.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/search/lucene/AbstractLuceneSearchIndexer.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- AbstractLuceneSearchIndexer.java  29 Nov 2005 13:12:27 -0000      1.8
  +++ AbstractLuceneSearchIndexer.java  30 Nov 2005 19:24:16 -0000      1.9
  @@ -24,7 +24,7 @@
   /**
    * Created by IntelliJ IDEA.
    * User: hollis
  - * Date: 15 févr. 2005
  + * Date: 15 f�vr. 2005
    * Time: 13:05:57
    * To change this template use File | Settings | File Templates.
    */
  @@ -859,4 +859,4 @@
           }
       }
   
  -}
  +}
  \ No newline at end of file
  
  
  
  Index: FieldSearchIndexProcessValveImpl.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/search/valves/FieldSearchIndexProcessValveImpl.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- FieldSearchIndexProcessValveImpl.java     11 Oct 2005 14:18:43 -0000      
1.2
  +++ FieldSearchIndexProcessValveImpl.java     30 Nov 2005 19:24:17 -0000      
1.3
  @@ -150,16 +150,10 @@
                       
doc.setFieldValue(JahiaSearchConstant.FILE_CONTENT_TYPE,contentType);
                       if (contentType != null && !file.getPath().equals("#")) {
                           try {
  -                            InputStream ins = file.downloadFile();
  -                            String charSet = null; // by default open as 
ascii
  -                            CharsetDetection charsetDet = new 
CharsetDetection();
  -                            charsetDet.charsetDetection(ins);
  -                            charSet = charsetDet.getCharset();
  -                            ins.close();
                               ExtractedDocument extDoc = ServicesRegistry
                                       .getInstance().getFileExtractionService()
                                       .getAlreadyExtractedDocument(
  -                                            
contentType,file.getPath(),charSet);
  +                                            contentType,file.getPath());
                               if ( extDoc != null ){
                                   strVal = extDoc.getContentAsString();
                                   
doc.addFieldValue(JahiaSearchConstant.FILE_CONTENT_FULLTEXT_SEARCH_FIELD,strVal);
  
  
  
  Index: FileFieldIndexingThread.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/search/valves/FileFieldIndexingThread.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- FileFieldIndexingThread.java      24 Oct 2005 12:42:29 -0000      1.2
  +++ FileFieldIndexingThread.java      30 Nov 2005 19:24:17 -0000      1.3
  @@ -52,12 +52,6 @@
                       String contentType = fField.getType();
                       if (contentType != null && !file.getPath().equals("#")) {
                           try {
  -                            InputStream ins = file.downloadFile();
  -                            String charSet = null; // by default open as 
ascii
  -                            CharsetDetection charsetDet = new 
CharsetDetection();
  -                            charsetDet.charsetDetection(ins);
  -                            charSet = charsetDet.getCharset();
  -                            ins.close();
                               long lastModifiedDate = 
System.currentTimeMillis();
                               try {
                                   lastModifiedDate = file.getJahiaFileField()
  @@ -65,10 +59,10 @@
                               } catch (Throwable t) {
                                   logger.debug(t);
                               }
  -                            ins = file.downloadFile();
  +                            InputStream ins = file.downloadFile();
                               
ServicesRegistry.getInstance().getFileExtractionService()
                                       .getExtractedDocument(contentType, 
file.getPath(), lastModifiedDate,
  -                                            true, ins, charSet);
  +                                            true, ins);
                               ins.close();
                               
ServicesRegistry.getInstance().getJahiaSearchService()
                                       .indexField(field.getID(), 
this.context.getUser(),false, false);
  
  
  
  Index: JahiaSearchBaseService.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/search/JahiaSearchBaseService.java,v
  retrieving revision 1.32
  retrieving revision 1.33
  diff -u -r1.32 -r1.33
  --- JahiaSearchBaseService.java       30 Nov 2005 16:25:16 -0000      1.32
  +++ JahiaSearchBaseService.java       30 Nov 2005 19:24:17 -0000      1.33
  @@ -47,7 +47,6 @@
   import org.jahia.services.scheduler.SchedulerService;
   import org.jahia.services.search.indexingscheduler.IndexingJob;
   import org.jahia.services.search.indexingscheduler.IndexingJobConsummer;
  -import org.jahia.services.search.indexingscheduler.IndexingJobTools;
   import org.jahia.services.search.indexingscheduler.impl.ContainerIndexingJob;
   import org.jahia.services.search.indexingscheduler.impl.FieldIndexingJob;
   import org.jahia.services.search.indexingscheduler.impl.PageIndexingJob;
  @@ -290,16 +289,14 @@
           while ( !this.disabled && this.localIndexing ){
               cachedAdminUsers = new HashMap();
               try {
  -                Thread.sleep(2000);
  +                Thread.sleep(20000);
               } catch ( InterruptedException inte ){
               }
               if ( sitesServ == null || !sitesServ.isStarted() ){
                   continue;
               }
               try {
  -                jobsList = IndexingJobTools.resolveIndexingJobs(
  -                    
this.indJobMgr.getIndexingJobsAfter(this.getLastIndexingJobTime(),
  -                            false));
  +                jobsList = 
this.indJobMgr.getIndexingJobsAfter(this.getLastIndexingJobTime(),false);
                   Iterator jobs = jobsList.iterator();
                   if ( jobsList.size()>0 ){
                       while (jobs.hasNext()) {
  @@ -552,11 +549,13 @@
                   "".equals(keyFieldName.trim()))
               return;
   
  +
           RemoveFromIndexJob job = null;
           if (allowQueuing || (!allowQueuing && notifyCluster)) {
               job = new RemoveFromIndexJob(
                       siteId, keyFieldName, keyFieldValue, user.getUserKey(),
                       System.currentTimeMillis());
  +
           }
   
           if (!allowQueuing) {
  @@ -816,6 +815,7 @@
               if (allowQueuing || (!allowQueuing && notifyCluster)) {
                   indJob =
                           new ContainerIndexingJob(ctnId, user.getUserKey(), 
System.currentTimeMillis());
  +
               }
               if (!allowQueuing) {
                   if (notifyCluster) {
  @@ -912,6 +912,8 @@
               if (allowQueuing || (!allowQueuing && notifyCluster)) {
                   indJob =
                           new PageIndexingJob(pageId, user.getUserKey(), 
System.currentTimeMillis());
  +
  +
               }
               if (!allowQueuing) {
                   if (notifyCluster) {
  @@ -1011,6 +1013,7 @@
               if (allowQueuing || (!allowQueuing && notifyCluster)) {
                   indJob =
                           new FieldIndexingJob(fieldID, user.getUserKey(), 
System.currentTimeMillis());
  +
               }
               if (!allowQueuing) {
                   if (notifyCluster) {
  

Reply via email to