knguyen     2005/04/27 11:11:36 CEST

  Modified files:        (Branch: JAHIA-4-1-BRANCH)
    src/java/org/jahia/services/search AddedField.java 
                                       JahiaIndexableDocument.java 
                                       JahiaIndexableDocumentImpl.java 
                                       JahiaSearchBaseService.java 
  Added files:           (Branch: JAHIA-4-1-BRANCH)
    src/java/org/jahia/services/search IndexationJobDetail.java 
                                       ScheduledIndexationJob.java 
  Log:
  - move indexation of files in separate scheduled job so that the main 
indexation Thread can continue indexing all other fields.
  
  Revision       Changes    Path
  1.14.2.8.2.4   +29 -0     
jahia/src/java/org/jahia/services/search/AddedField.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/services/search/AddedField.java.diff?r1=1.14.2.8.2.3&r2=1.14.2.8.2.4&f=h
  1.1.2.1        +51 -0     
jahia/src/java/org/jahia/services/search/IndexationJobDetail.java (new)
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/services/search/IndexationJobDetail.java?rev=1.1.2.1&content-type=text/plain
  1.5.4.1        +16 -3     
jahia/src/java/org/jahia/services/search/JahiaIndexableDocument.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/services/search/JahiaIndexableDocument.java.diff?r1=1.5&r2=1.5.4.1&f=h
  1.5.2.1.2.1    +20 -8     
jahia/src/java/org/jahia/services/search/JahiaIndexableDocumentImpl.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/services/search/JahiaIndexableDocumentImpl.java.diff?r1=1.5.2.1&r2=1.5.2.1.2.1&f=h
  1.42.2.15.2.2  +56 -1     
jahia/src/java/org/jahia/services/search/JahiaSearchBaseService.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/services/search/JahiaSearchBaseService.java.diff?r1=1.42.2.15.2.1&r2=1.42.2.15.2.2&f=h
  1.1.2.1        +40 -0     
jahia/src/java/org/jahia/services/search/ScheduledIndexationJob.java (new)
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/services/search/ScheduledIndexationJob.java?rev=1.1.2.1&content-type=text/plain
  
  
  
  
  
  
  
  
  
  Index: AddedField.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/src/java/org/jahia/services/search/Attic/AddedField.java,v
  retrieving revision 1.14.2.8.2.3
  retrieving revision 1.14.2.8.2.4
  diff -u -r1.14.2.8.2.3 -r1.14.2.8.2.4
  --- AddedField.java   8 Apr 2005 13:05:30 -0000       1.14.2.8.2.3
  +++ AddedField.java   27 Apr 2005 09:11:35 -0000      1.14.2.8.2.4
  @@ -15,6 +15,8 @@
   import org.jahia.services.fileextraction.ExtractedDocument;
   import org.jahia.utils.fileparsers.*;
   import org.jahia.utils.JahiaTools;
  +import org.jahia.tools.files.FileWatcherJob;
  +import org.quartz.*;
   
   /**
    * <p>Title: This class represents a field wrapper used by search engine</p>
  @@ -348,4 +350,31 @@
   
           this.setField (this.getKeyFieldName() , this.getKey ());
       }
  +
  +    /**
  +     * If this document can be scheduled ( indexation can be delayed, i.e in 
case of File Field Document which can take a lot
  +     * of time when parsing pdf file ), it should return true and add itself 
in the indexationJob
  +     *
  +     * By default, return false.
  +     *
  +     * @param indexationJobDetail
  +     * @return
  +     */
  +    public boolean scheduled(IndexationJobDetail indexationJobDetail){
  +        if ( this.getType() == FieldTypes.FILE &&
  +                !this.alreadyLoadedValues.booleanValue()
  +                && indexationJobDetail != null ){
  +            indexationJobDetail.addDocument(this);
  +            return true;
  +        }
  +        return false;
  +    }
  +
  +    /**
  +     * load all datas
  +     */
  +    public void doScheduledLoad(){
  +        this.getFields();
  +    }
  +
   }
  
  
  
  Index: JahiaIndexableDocument.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/src/java/org/jahia/services/search/Attic/JahiaIndexableDocument.java,v
  retrieving revision 1.5
  retrieving revision 1.5.4.1
  diff -u -r1.5 -r1.5.4.1
  --- JahiaIndexableDocument.java       10 Feb 2004 19:39:11 -0000      1.5
  +++ JahiaIndexableDocument.java       27 Apr 2005 09:11:35 -0000      1.5.4.1
  @@ -1,5 +1,7 @@
   package org.jahia.services.search;
   
  +import org.quartz.Job;
  +
   import java.util.Hashtable;
   //import org.apache.jetspeed.services.search.ParsedObject;
   
  @@ -150,9 +152,20 @@
       public abstract boolean isFieldUnStored (String key);
   
       /**
  -     * Return a Jetspeed ParsedObject instance
  -     * @return ParsedObject
  +     * If this document can be scheduled ( indexation can be delayed, i.e in 
case of File Field Document which can take a lot
  +     * of time when parsing pdf file ), it should return true and add itself 
in the indexationJob
  +     *
  +     * @param indexationJobDetail
  +     * @return
        */
  -   // public abstract ParsedObject getParsedObject();
  +    public abstract boolean scheduled(IndexationJobDetail 
indexationJobDetail);
  +
  +
  +    /**
  +     * This method is called when the document add itself in the 
ScheduledIndexationJob's vector of schedulable documents
  +     *
  +     */
  +    public abstract void doScheduledLoad();
  +
   
   }
  
  
  
  Index: JahiaIndexableDocumentImpl.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/src/java/org/jahia/services/search/Attic/JahiaIndexableDocumentImpl.java,v
  retrieving revision 1.5.2.1
  retrieving revision 1.5.2.1.2.1
  diff -u -r1.5.2.1 -r1.5.2.1.2.1
  --- JahiaIndexableDocumentImpl.java   26 Aug 2004 16:29:41 -0000      1.5.2.1
  +++ JahiaIndexableDocumentImpl.java   27 Apr 2005 09:11:35 -0000      
1.5.2.1.2.1
  @@ -1,5 +1,7 @@
   package org.jahia.services.search;
   
  +import org.quartz.Job;
  +
   import java.io.*;
   import java.util.*;
   
  @@ -248,15 +250,25 @@
       }
   
       /**
  -     * Return a Jetspeed ParsedObject instance
  -     * @return ParsedObject
  +     * If this document can be scheduled ( indexation can be delayed, i.e in 
case of File Field Document which can take a lot
  +     * of time when parsing pdf file ), it should return true and add itself 
in the indexationJob
  +     *
  +     * By default, return false.
  +     *
  +     * @param indexationJobDetail
  +     * @return
        */
  -    /*
  -    public ParsedObject getParsedObject(){
  -        BaseParsedObject parsedObject = new BaseParsedObject();
  -        
parsedObject.setContent((String)this.getFields().get(JahiaSearchConstant.FIELD_VALUE));
  +    public boolean scheduled(IndexationJobDetail indexationJobDetail){
  +        return false;
  +    }
   
  -        return parsedObject;
  -    }*/
  +
  +    /**
  +     * This method is called when the document add itself in the 
ScheduledIndexationJob's vector of schedulable documents
  +     *
  +     */
  +    public void doScheduledLoad(){
  +        // by default do nothing
  +    }
   
   }
  
  
  
  Index: JahiaSearchBaseService.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/src/java/org/jahia/services/search/Attic/JahiaSearchBaseService.java,v
  retrieving revision 1.42.2.15.2.1
  retrieving revision 1.42.2.15.2.2
  diff -u -r1.42.2.15.2.1 -r1.42.2.15.2.2
  --- JahiaSearchBaseService.java       22 Mar 2005 11:46:30 -0000      
1.42.2.15.2.1
  +++ JahiaSearchBaseService.java       27 Apr 2005 09:11:35 -0000      
1.42.2.15.2.2
  @@ -43,6 +43,10 @@
   import org.springframework.beans.factory.xml.XmlBeanFactory;
   import org.springframework.beans.factory.BeanFactory;
   import org.jahia.utils.fileparsers.FileExtractorsConfig;
  +import org.quartz.JobDetail;
  +import org.quartz.Scheduler;
  +import org.quartz.JobDataMap;
  +import org.quartz.SimpleTrigger;
   
   /**
    * Search Service based on Lucene engine.
  @@ -82,6 +86,9 @@
       // list of all removed/added fields to add/remove from the index in 
background
       private Vector indexOrders; // les ordres de add/remove
   
  +    private JobDetail scheduledIndexationJob;
  +    private long scheduledIndexationInterval = 5000;
  +
       private Thread backgroundIndexingThread;
       private boolean indexingThreadActivated = true;
   
  @@ -190,6 +197,18 @@
                                                           + 
this.fileExtractorsConfigFilePath, t);
               }
   
  +            val = org.jahia.bin.Jahia.getStaticServletConfig()
  +                
.getInitParameter("org.jahia.services.search.scheduledJobInterval");
  +            if ( val != null ){
  +                try {
  +                    this.scheduledIndexationInterval = Long.parseLong(val);
  +                } catch ( Throwable t) {
  +                }
  +            }
  +
  +            // launch indexation Job
  +            this.startScheduledIndexationJob();
  +
               if ( this.indexingThreadActivated ){
                   backgroundIndexingThread = new Thread(theObject,
                       "Background content indexing");
  @@ -211,6 +230,29 @@
           }
       }
   
  +    private void startScheduledIndexationJob(){
  +
  +        this.scheduledIndexationJob = new 
IndexationJobDetail("scheduledIndexation" + "_Job", Scheduler.DEFAULT_GROUP,
  +                                  ScheduledIndexationJob.class);
  +        JobDataMap jobDataMap = new JobDataMap();
  +        this.scheduledIndexationJob.setJobDataMap(jobDataMap);
  +
  +        SimpleTrigger trigger = new SimpleTrigger("scheduledIndexation" + 
"_Trigger",
  +                                    Scheduler.DEFAULT_GROUP,
  +                                    SimpleTrigger.REPEAT_INDEFINITELY,
  +                                    this.scheduledIndexationInterval);
  +
  +        try {
  +            
ServicesRegistry.getInstance().getSchedulerService().unscheduleJob(trigger.getName(),
  +                    Scheduler.DEFAULT_GROUP);
  +            ServicesRegistry.getInstance().getSchedulerService().scheduleJob(
  +                this.scheduledIndexationJob, trigger);
  +        } catch (JahiaException je) {
  +            logger.error("Error while scheduling search indexation", je);
  +        }
  +
  +    }
  +
       private void loadFileExtractors() throws JahiaException {
           java.io.InputStream is = null;
           try {
  @@ -867,14 +909,19 @@
           while (indexingThreadActivated) {
   
               Vector v = new Vector();
  +            Vector lowPriorityQueue = new Vector();
  +
               HashMap toBeAdded = new HashMap();
               HashMap toBeRemoved = new HashMap();
               JahiaIndexableDocument doc = null;
               synchronized (this) {
   
  -                // 1. separate docs that are going to be added or removed
  +                // 1.Separate docs that are going to be added or removed
                   for ( int i=0; i<indexOrders.size(); i++ ){
                       doc = (JahiaIndexableDocument)indexOrders.get(i);
  +                    if ( 
doc.scheduled((IndexationJobDetail)this.scheduledIndexationJob) ){
  +                        continue;
  +                    }
                       if ( doc.toBeAdded() ){
                           toBeAdded.put(doc.getKeyFieldName() + "_" + 
doc.getKey(),new Integer(i));
                       } else {
  @@ -918,9 +965,17 @@
   
                   // okay now we have the next added/removed field, we process 
it!
                   if (nextObject != null) {
  +
                       siteId = new Integer(nextObject.getSiteId());
   
                       if (nextObject.toBeAdded()) {
  +                        synchronized(this.indexOrders) {
  +                            if ( this.indexOrders.size()>0 && nextObject 
instanceof AddedField
  +                                    && ((AddedField)nextObject).getType() == 
FieldTypes.FILE ) {
  +                                // as long as there are
  +                            }
  +                        }
  +
                           // close cached reader first
                           closeCachedReader(siteId);
   
  

Reply via email to