knguyen 2005/04/27 11:11:36 CEST
Modified files: (Branch: JAHIA-4-1-BRANCH)
src/java/org/jahia/services/search AddedField.java
JahiaIndexableDocument.java
JahiaIndexableDocumentImpl.java
JahiaSearchBaseService.java
Added files: (Branch: JAHIA-4-1-BRANCH)
src/java/org/jahia/services/search IndexationJobDetail.java
ScheduledIndexationJob.java
Log:
- move indexation of files in separate scheduled job so that the main
indexation Thread can continue indexing all other fields.
Revision Changes Path
1.14.2.8.2.4 +29 -0
jahia/src/java/org/jahia/services/search/AddedField.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/services/search/AddedField.java.diff?r1=1.14.2.8.2.3&r2=1.14.2.8.2.4&f=h
1.1.2.1 +51 -0
jahia/src/java/org/jahia/services/search/IndexationJobDetail.java (new)
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/services/search/IndexationJobDetail.java?rev=1.1.2.1&content-type=text/plain
1.5.4.1 +16 -3
jahia/src/java/org/jahia/services/search/JahiaIndexableDocument.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/services/search/JahiaIndexableDocument.java.diff?r1=1.5&r2=1.5.4.1&f=h
1.5.2.1.2.1 +20 -8
jahia/src/java/org/jahia/services/search/JahiaIndexableDocumentImpl.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/services/search/JahiaIndexableDocumentImpl.java.diff?r1=1.5.2.1&r2=1.5.2.1.2.1&f=h
1.42.2.15.2.2 +56 -1
jahia/src/java/org/jahia/services/search/JahiaSearchBaseService.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/services/search/JahiaSearchBaseService.java.diff?r1=1.42.2.15.2.1&r2=1.42.2.15.2.2&f=h
1.1.2.1 +40 -0
jahia/src/java/org/jahia/services/search/ScheduledIndexationJob.java (new)
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/services/search/ScheduledIndexationJob.java?rev=1.1.2.1&content-type=text/plain
Index: AddedField.java
===================================================================
RCS file:
/home/cvs/repository/jahia/src/java/org/jahia/services/search/Attic/AddedField.java,v
retrieving revision 1.14.2.8.2.3
retrieving revision 1.14.2.8.2.4
diff -u -r1.14.2.8.2.3 -r1.14.2.8.2.4
--- AddedField.java 8 Apr 2005 13:05:30 -0000 1.14.2.8.2.3
+++ AddedField.java 27 Apr 2005 09:11:35 -0000 1.14.2.8.2.4
@@ -15,6 +15,8 @@
import org.jahia.services.fileextraction.ExtractedDocument;
import org.jahia.utils.fileparsers.*;
import org.jahia.utils.JahiaTools;
+import org.jahia.tools.files.FileWatcherJob;
+import org.quartz.*;
/**
* <p>Title: This class represents a field wrapper used by search engine</p>
@@ -348,4 +350,31 @@
this.setField (this.getKeyFieldName() , this.getKey ());
}
+
+ /**
+ * If this document can be scheduled ( indexation can be delayed, i.e in
case of File Field Document which can take a lot
+ * of time when parsing pdf file ), it should return true and add itself
in the indexationJob
+ *
+ * By default, return false.
+ *
+ * @param indexationJobDetail
+ * @return
+ */
+ public boolean scheduled(IndexationJobDetail indexationJobDetail){
+ if ( this.getType() == FieldTypes.FILE &&
+ !this.alreadyLoadedValues.booleanValue()
+ && indexationJobDetail != null ){
+ indexationJobDetail.addDocument(this);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * load all datas
+ */
+ public void doScheduledLoad(){
+ this.getFields();
+ }
+
}
Index: JahiaIndexableDocument.java
===================================================================
RCS file:
/home/cvs/repository/jahia/src/java/org/jahia/services/search/Attic/JahiaIndexableDocument.java,v
retrieving revision 1.5
retrieving revision 1.5.4.1
diff -u -r1.5 -r1.5.4.1
--- JahiaIndexableDocument.java 10 Feb 2004 19:39:11 -0000 1.5
+++ JahiaIndexableDocument.java 27 Apr 2005 09:11:35 -0000 1.5.4.1
@@ -1,5 +1,7 @@
package org.jahia.services.search;
+import org.quartz.Job;
+
import java.util.Hashtable;
//import org.apache.jetspeed.services.search.ParsedObject;
@@ -150,9 +152,20 @@
public abstract boolean isFieldUnStored (String key);
/**
- * Return a Jetspeed ParsedObject instance
- * @return ParsedObject
+ * If this document can be scheduled ( indexation can be delayed, i.e in
case of File Field Document which can take a lot
+ * of time when parsing pdf file ), it should return true and add itself
in the indexationJob
+ *
+ * @param indexationJobDetail
+ * @return
*/
- // public abstract ParsedObject getParsedObject();
+ public abstract boolean scheduled(IndexationJobDetail
indexationJobDetail);
+
+
+ /**
+ * This method is called when the document add itself in the
ScheduledIndexationJob's vector of schedulable documents
+ *
+ */
+ public abstract void doScheduledLoad();
+
}
Index: JahiaIndexableDocumentImpl.java
===================================================================
RCS file:
/home/cvs/repository/jahia/src/java/org/jahia/services/search/Attic/JahiaIndexableDocumentImpl.java,v
retrieving revision 1.5.2.1
retrieving revision 1.5.2.1.2.1
diff -u -r1.5.2.1 -r1.5.2.1.2.1
--- JahiaIndexableDocumentImpl.java 26 Aug 2004 16:29:41 -0000 1.5.2.1
+++ JahiaIndexableDocumentImpl.java 27 Apr 2005 09:11:35 -0000
1.5.2.1.2.1
@@ -1,5 +1,7 @@
package org.jahia.services.search;
+import org.quartz.Job;
+
import java.io.*;
import java.util.*;
@@ -248,15 +250,25 @@
}
/**
- * Return a Jetspeed ParsedObject instance
- * @return ParsedObject
+ * If this document can be scheduled ( indexation can be delayed, i.e in
case of File Field Document which can take a lot
+ * of time when parsing pdf file ), it should return true and add itself
in the indexationJob
+ *
+ * By default, return false.
+ *
+ * @param indexationJobDetail
+ * @return
*/
- /*
- public ParsedObject getParsedObject(){
- BaseParsedObject parsedObject = new BaseParsedObject();
-
parsedObject.setContent((String)this.getFields().get(JahiaSearchConstant.FIELD_VALUE));
+ public boolean scheduled(IndexationJobDetail indexationJobDetail){
+ return false;
+ }
- return parsedObject;
- }*/
+
+ /**
+ * This method is called when the document add itself in the
ScheduledIndexationJob's vector of schedulable documents
+ *
+ */
+ public void doScheduledLoad(){
+ // by default do nothing
+ }
}
Index: JahiaSearchBaseService.java
===================================================================
RCS file:
/home/cvs/repository/jahia/src/java/org/jahia/services/search/Attic/JahiaSearchBaseService.java,v
retrieving revision 1.42.2.15.2.1
retrieving revision 1.42.2.15.2.2
diff -u -r1.42.2.15.2.1 -r1.42.2.15.2.2
--- JahiaSearchBaseService.java 22 Mar 2005 11:46:30 -0000
1.42.2.15.2.1
+++ JahiaSearchBaseService.java 27 Apr 2005 09:11:35 -0000
1.42.2.15.2.2
@@ -43,6 +43,10 @@
import org.springframework.beans.factory.xml.XmlBeanFactory;
import org.springframework.beans.factory.BeanFactory;
import org.jahia.utils.fileparsers.FileExtractorsConfig;
+import org.quartz.JobDetail;
+import org.quartz.Scheduler;
+import org.quartz.JobDataMap;
+import org.quartz.SimpleTrigger;
/**
* Search Service based on Lucene engine.
@@ -82,6 +86,9 @@
// list of all removed/added fields to add/remove from the index in
background
private Vector indexOrders; // les ordres de add/remove
+ private JobDetail scheduledIndexationJob;
+ private long scheduledIndexationInterval = 5000;
+
private Thread backgroundIndexingThread;
private boolean indexingThreadActivated = true;
@@ -190,6 +197,18 @@
+
this.fileExtractorsConfigFilePath, t);
}
+ val = org.jahia.bin.Jahia.getStaticServletConfig()
+
.getInitParameter("org.jahia.services.search.scheduledJobInterval");
+ if ( val != null ){
+ try {
+ this.scheduledIndexationInterval = Long.parseLong(val);
+ } catch ( Throwable t) {
+ }
+ }
+
+ // launch indexation Job
+ this.startScheduledIndexationJob();
+
if ( this.indexingThreadActivated ){
backgroundIndexingThread = new Thread(theObject,
"Background content indexing");
@@ -211,6 +230,29 @@
}
}
+ private void startScheduledIndexationJob(){
+
+ this.scheduledIndexationJob = new
IndexationJobDetail("scheduledIndexation" + "_Job", Scheduler.DEFAULT_GROUP,
+ ScheduledIndexationJob.class);
+ JobDataMap jobDataMap = new JobDataMap();
+ this.scheduledIndexationJob.setJobDataMap(jobDataMap);
+
+ SimpleTrigger trigger = new SimpleTrigger("scheduledIndexation" +
"_Trigger",
+ Scheduler.DEFAULT_GROUP,
+ SimpleTrigger.REPEAT_INDEFINITELY,
+ this.scheduledIndexationInterval);
+
+ try {
+
ServicesRegistry.getInstance().getSchedulerService().unscheduleJob(trigger.getName(),
+ Scheduler.DEFAULT_GROUP);
+ ServicesRegistry.getInstance().getSchedulerService().scheduleJob(
+ this.scheduledIndexationJob, trigger);
+ } catch (JahiaException je) {
+ logger.error("Error while scheduling search indexation", je);
+ }
+
+ }
+
private void loadFileExtractors() throws JahiaException {
java.io.InputStream is = null;
try {
@@ -867,14 +909,19 @@
while (indexingThreadActivated) {
Vector v = new Vector();
+ Vector lowPriorityQueue = new Vector();
+
HashMap toBeAdded = new HashMap();
HashMap toBeRemoved = new HashMap();
JahiaIndexableDocument doc = null;
synchronized (this) {
- // 1. separate docs that are going to be added or removed
+ // 1.Separate docs that are going to be added or removed
for ( int i=0; i<indexOrders.size(); i++ ){
doc = (JahiaIndexableDocument)indexOrders.get(i);
+ if (
doc.scheduled((IndexationJobDetail)this.scheduledIndexationJob) ){
+ continue;
+ }
if ( doc.toBeAdded() ){
toBeAdded.put(doc.getKeyFieldName() + "_" +
doc.getKey(),new Integer(i));
} else {
@@ -918,9 +965,17 @@
// okay now we have the next added/removed field, we process
it!
if (nextObject != null) {
+
siteId = new Integer(nextObject.getSiteId());
if (nextObject.toBeAdded()) {
+ synchronized(this.indexOrders) {
+ if ( this.indexOrders.size()>0 && nextObject
instanceof AddedField
+ && ((AddedField)nextObject).getType() ==
FieldTypes.FILE ) {
+ // as long as there are
+ }
+ }
+
// close cached reader first
closeCachedReader(siteId);