jford       2004/09/28 13:42:20

  Added:       components/search/src/java/org/apache/jetspeed/search
                        AbstractObjectHandler.java BaseParsedObject.java
               components/search/src/java/org/apache/jetspeed/search/lucene
                        SearchEngineImpl.java
               components/search/src/java/org/apache/jetspeed/search/handlers
                        URLToDocHandler.java HandlerFactoryImpl.java
  Log:
  Added a Lucene based implementation of the search component
  
  Revision  Changes    Path
  1.1                  
jakarta-jetspeed-2/components/search/src/java/org/apache/jetspeed/search/AbstractObjectHandler.java
  
  Index: AbstractObjectHandler.java
  ===================================================================
  /*
   * Copyright 2000-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
   
  package org.apache.jetspeed.search;
  
  import java.util.HashSet;
  import java.util.Set;
  
  /**
   * Abstract handler that new handlers can dervie from
   * 
   * @author <a href="mailto:[EMAIL PROTECTED]">Jeremy Ford</a>
   * @version $Id: AbstractObjectHandler.java,v 1.1 2004/09/28 20:42:19 jford Exp $
   */
  public abstract class AbstractObjectHandler implements ObjectHandler
  {
      protected final HashSet fields = new HashSet();
      protected final HashSet keywords = new HashSet();
      
  
      /** 
       * @see org.apache.jetspeed.services.search.ObjectHandler#getFields()
       */
      public Set getFields()
      {
         return fields;
      }
      
      /**
       * @see org.apache.jetspeed.services.search.ObjectHandler#getKeywords()
       */
      public Set getKeywords()
      {
          return keywords;
      }
  
  }
  
  
  
  1.1                  
jakarta-jetspeed-2/components/search/src/java/org/apache/jetspeed/search/BaseParsedObject.java
  
  Index: BaseParsedObject.java
  ===================================================================
  /*
   * Copyright 2000-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  package org.apache.jetspeed.search;
  
  import java.util.Map;
  import java.net.URL;
  
  /**
   * Base parsed object.
   *
   * @author <a href="mailto:[EMAIL PROTECTED]">Mark Orciuch</a>
   * @version $Id: BaseParsedObject.java,v 1.1 2004/09/28 20:42:19 jford Exp $
   */
  public class BaseParsedObject implements ParsedObject
  {
  
      private String key;
      private String type;
      private String title;
      private String description;
      private String content;
      private String language;
      private URL url;
      private String[] keywords;
      private Map keywordsMap;
      private Map fields;
      private float score;
      private String className;
  
      /**
       * Returns parsed object key
       * 
       * @return 
       */
      public String getKey()
      {
          return this.key;
      }
  
      /**
       * Sets parsed object key
       * 
       * @param content
       */
      public void setKey(String key)
      {
          this.key = key;
      }
  
      /**
       * Returns parsed object type
       * 
       * @return 
       */
      public String getType()
      {
          return this.type;
      }
  
      /**
       * Sets parsed object type
       * 
       * @param type
       */
      public void setType(String type)
      {
          this.type = type;
      }
  
      /**
       * Returns parsed object content
       * 
       * @return 
       */
      public String getContent()
      {
          return this.content;
      }
  
      /**
       * Sets parsed object content
       * 
       * @param content
       */
      public void setContent(String content)
      {
          this.content = content;
      }
  
      /**
       * Returns parsed object description
       * 
       * @return 
       */
      public String getDescription()
      {
          return this.description;
      }
  
      /**
       * Sets parsed object description
       * 
       * @param description
       */
      public void setDescription(String description)
      {
          this.description = description;
      }
  
      /**
       * Returns parsed object keywords
       * 
       * @return 
       */
      public String[] getKeywords()
      {
          return this.keywords;
      }
  
      /**
       * Sets parsed object keywords
       * 
       * @param keywords
       */
      public void setKeywords(String[] keywords)
      {
          this.keywords = keywords;
      }
  
      /**
       * Returns parsed object title
       * 
       * @return 
       */
      public String getTitle() 
      {
          return this.title;
      }
  
      /**
       * Sets parsed object title
       * 
       * @param title
       */
      public void setTitle(String title)
      {
          this.title = title;
      }
  
      /**
       * Returns parsed object language
       * 
       * @return 
       */
      public String getLanguage()
      {
          return this.language;
      }
  
      /**
       * Sets parsed object language
       * 
       * @param language
       */
      public void setLanguage(String language)
      {
          this.language = language;
      }
  
      /**
       * Returns parsed object searchable fields
       * 
       * @return 
       */
      public Map getFields()
      {
          return this.fields;
      }
  
      /**
       * Sets parsed object searchable fields
       * 
       * @param fields
       */
      public void setFields(Map fields)
      {
          this.fields = fields;
      }
  
      /**
       * Returns parsed object URL
       * 
       * @return 
       */
      public URL getURL()
      {
          return this.url;
      }
  
      /**
       * Sets parsed object URL
       * 
       * @param fields
       */
      public void setURL(URL url)
      {
          this.url = url;
      }
  
      /**
       * Getter for property score.
       * 
       * @return Value of property score.
       */
      public float getScore()
      {
          return this.score;
      }
      
      /**
       * Setter for property score.
       * 
       * @param score  New value of property score.
       */
      public void setScore(float score)
      {
          this.score = score;
      }
  
      /**
       * Getter for property className.
       * 
       * @return Value of property className.
       */
      public String getClassName()
      {
          return className;
      }
  
      /**
       * Setter for property className.
       * 
       * @param score  New value of property className.
       */
      public void setClassName(String className)
      {
         this.className = className;        
      }
  
      /* (non-Javadoc)
       * @see org.apache.jetspeed.search.ParsedObject#getKeywordsMap()
       */
      public Map getKeywordsMap()
      {
          return keywordsMap;
      }
  
      /* (non-Javadoc)
       * @see org.apache.jetspeed.search.ParsedObject#setKeywordsMap(java.util.Map)
       */
      public void setKeywordsMap(Map keywordsMap)
      {
          this.keywordsMap = keywordsMap;        
      }
  
  }
  
  
  
  
  1.1                  
jakarta-jetspeed-2/components/search/src/java/org/apache/jetspeed/search/lucene/SearchEngineImpl.java
  
  Index: SearchEngineImpl.java
  ===================================================================
  /*
   * Copyright 2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  package org.apache.jetspeed.search.lucene;
  
  import java.io.File;
  import java.io.IOException;
  import java.net.URL;
  import java.util.ArrayList;
  import java.util.Collection;
  import java.util.HashMap;
  import java.util.Iterator;
  import java.util.Map;
  import java.util.Set;
  
  import org.apache.commons.collections.MultiHashMap;
  import org.apache.commons.collections.MultiMap;
  import org.apache.jetspeed.search.BaseParsedObject;
  import org.apache.jetspeed.search.HandlerFactory;
  import org.apache.jetspeed.search.ObjectHandler;
  import org.apache.jetspeed.search.ParsedObject;
  import org.apache.jetspeed.search.SearchEngine;
  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.analysis.standard.StandardAnalyzer;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.document.Field;
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.index.IndexWriter;
  import org.apache.lucene.index.Term;
  import org.apache.lucene.queryParser.MultiFieldQueryParser;
  import org.apache.lucene.queryParser.ParseException;
  import org.apache.lucene.search.Hits;
  import org.apache.lucene.search.IndexSearcher;
  import org.apache.lucene.search.Query;
  import org.apache.lucene.search.Searcher;
  
  /**
   * @author <a href="mailto: [EMAIL PROTECTED]">Jeremy Ford</a>
   *
   */
  public class SearchEngineImpl implements SearchEngine
  {
      private File rootIndexDir = null;
      private String analyzerClassName = null;
      private boolean optimizeAfterUpdate = true;
      private HandlerFactory handlerFactory;
      
      private static final int KEYWORD = 0;
      private static final int TEXT = 1;
      
      public SearchEngineImpl(String indexRoot, String analyzerClassName, boolean 
optimzeAfterUpdate, HandlerFactory handlerFactory)
      {
          //assume it's full path for now
          rootIndexDir = new File(indexRoot);
          if(!rootIndexDir.exists())
          {
              rootIndexDir.mkdirs();
          }
          
          this.analyzerClassName = analyzerClassName;
          this.optimizeAfterUpdate = optimzeAfterUpdate;
          this.handlerFactory = handlerFactory;
          
          try
          {
              Searcher searcher = null;
              searcher = new IndexSearcher(rootIndexDir.getPath());
              searcher.close();
          }
          catch (Exception e)
          {
              try
              {
                  IndexWriter indexWriter = new IndexWriter(rootIndexDir, 
newAnalyzer(), true);
                  indexWriter.close();
                  indexWriter = null;
                  //logger.info("Created Lucene Index in " + rootIndexDir.getPath());
              }
              catch (Exception e1)
              {
                  //logger.error(this.getClass().getName() + ".initConfiguration - 
Getting or creating IndexSearcher", e);
                  //throw new InitializationException("Getting or creating Index 
Searcher");
              }
          }
      }
  
      /* (non-Javadoc)
       * @see org.apache.jetspeed.search.SearchEnging#add(java.lang.Object)
       */
      public boolean add(Object o)
      {
          Collection c = new ArrayList(1);
          c.add(o);
  
          return add(c);
      }
  
      /* (non-Javadoc)
       * @see org.apache.jetspeed.search.SearchEnging#add(java.util.Collection)
       */
      public boolean add(Collection objects)
      {
          boolean result = false;
          
          IndexWriter indexWriter;
          try
          {
              indexWriter = new IndexWriter(rootIndexDir, newAnalyzer(), false);
          }
          catch (IOException e)
          {
              //logger.error("Error while creating index writer. Skipping add...", e);
              return result;
          }
  
          Iterator it = objects.iterator();
          while (it.hasNext()) 
          {
              Object o = it.next();
              // Look up appropriate handler
              ObjectHandler handler = null;
              try
              {
                  handler = handlerFactory.getHandler(o);
              }
              catch (Exception e)
              {
                  //logger.error("Failed to create hanlder for object " + 
o.getClass().getName());
                  continue;
              }
  
              // Parse the object
              ParsedObject parsedObject = handler.parseObject(o);
  
              // Create document
              Document doc = new Document();
  
              // Populate document from the parsed object
              if (parsedObject.getKey() != null)
              {
                  doc.add(Field.Keyword(ParsedObject.FIELDNAME_KEY, 
parsedObject.getKey()));
              }
              if (parsedObject.getType() != null)
              {
                  doc.add(Field.Text(ParsedObject.FIELDNAME_TYPE, 
parsedObject.getType()));
              }
              if (parsedObject.getTitle() != null)
              {
                  doc.add(Field.Text(ParsedObject.FIELDNAME_TITLE, 
parsedObject.getTitle()));
              }
              if (parsedObject.getDescription() != null)
              {
                  doc.add(Field.Text(ParsedObject.FIELDNAME_DESCRIPTION, 
parsedObject.getDescription()));
              }
              if (parsedObject.getContent() != null)
              {
                  doc.add(Field.Text(ParsedObject.FIELDNAME_CONTENT, 
parsedObject.getContent()));
              }
              if (parsedObject.getLanguage() != null)
              {
                  doc.add(Field.Text(ParsedObject.FIELDNAME_LANGUAGE, 
parsedObject.getLanguage()));   
              }
              if (parsedObject.getURL() != null)
              {
                  doc.add(Field.Text(ParsedObject.FIELDNAME_URL, 
parsedObject.getURL().toString()));
              }
              if(parsedObject.getClassName() != null)
              {
                  doc.add(Field.Text(ParsedObject.FIELDNAME_CLASSNAME, 
parsedObject.getClassName()));
              }
  
              Map keywords = parsedObject.getKeywordsMap();
              addFieldsToDocument(doc, keywords, KEYWORD);
              
              Map fields = parsedObject.getFields();
              addFieldsToDocument(doc, fields, TEXT);
   
              // Add the document to search index
              try
              {
                  indexWriter.addDocument(doc);
              }
              catch (IOException e)
              {
                 //logger.error("Error adding document to index.", e);
              }
              //logger.debug("Index Document Count = " + indexWriter.docCount());
              //logger.info("Added '" + parsedObject.getTitle() + "' to index");
              result = true;
          }
  
          try
          {
                if(optimizeAfterUpdate)
              {
                  indexWriter.optimize();
              }
          }
          catch (IOException e)
          {
              //logger.error("Error while trying to optimize index.");
          }
          finally
          {
              try
              {
                  indexWriter.close();
              }
              catch (IOException e)
              {
                 //logger.error("Error while closing index writer.", e);
              }
          }
          
          return result;
      }
  
      /* (non-Javadoc)
       * @see org.apache.jetspeed.search.SearchEnging#remove(java.lang.Object)
       */
      public boolean remove(Object o)
      {
          Collection c = new ArrayList(1);
          c.add(o);
  
          return remove(c);
      }
  
      /* (non-Javadoc)
       * @see org.apache.jetspeed.search.SearchEnging#remove(java.util.Collection)
       */
      public boolean remove(Collection objects)
      {
          boolean result = false;
          
          try 
          {
              IndexReader indexReader = IndexReader.open(this.rootIndexDir);
  
              Iterator it = objects.iterator();
              while (it.hasNext()) 
              {
                  Object o = it.next();
                  // Look up appropriate handler
                  ObjectHandler handler = handlerFactory.getHandler(o);
  
                  // Parse the object
                  ParsedObject parsedObject = handler.parseObject(o);
  
                  // Create term
                  Term term = null;
  
                  if (parsedObject.getKey() != null)
                  {
                      term = new Term(ParsedObject.FIELDNAME_KEY, 
parsedObject.getKey());
                      // Remove the document from search index
                      int rc = indexReader.delete(term);
                      //logger.info("Attempted to delete '" + term.toString() + "' 
from index, documents deleted = " + rc);
                      //System.out.println("Attempted to delete '" + term.toString() + 
"' from index, documents deleted = " + rc);
                      result = rc > 0;
                  }
              }
  
              indexReader.close();
  
              if(optimizeAfterUpdate)
              {
                  optimize();
              }
  
          }
          catch (Exception e)
          {
              //logger.error("Exception", e);
              result = false;
          }
  
          return result;
      }
  
      /* (non-Javadoc)
       * @see org.apache.jetspeed.search.SearchEnging#update(java.lang.Object)
       */
      public boolean update(Object o)
      {
          Collection c = new ArrayList(1);
          c.add(o);
          
          return update(c);
      }
  
      /* (non-Javadoc)
       * @see org.apache.jetspeed.search.SearchEnging#update(java.util.Collection)
       */
      public boolean update(Collection objects)
      {
          boolean result = false;
          
          try
          {
              // Delete entries from index
              remove(objects);
              result = true;
          }
          catch (Throwable e)
          {
              //logger.error("Exception",  e);
          }
  
          try
          {
              // Add entries to index
                if(result)
                {
                        add(objects);
                        result = true;
                }
          }
          catch (Throwable e)
          {
              //logger.error("Exception",  e);
          }
          
          return result;
      }
  
      /* (non-Javadoc)
       * @see org.apache.jetspeed.search.SearchEnging#optimize()
       */
      public boolean optimize()
      {
          boolean result = false;
  
        try
                {
                IndexWriter indexWriter = new IndexWriter(rootIndexDir, newAnalyzer(), 
false);
              indexWriter.optimize();
              indexWriter.close();
              result = true;
          }
          catch (IOException e)
          {
               //logger.error("Error while trying to optimize index.");
          }
          return result;
      }
  
      /* (non-Javadoc)
       * @see org.apache.jetspeed.search.SearchEngine#search(java.lang.String)
       */
      public Iterator search(String queryString)
      {        
          Searcher searcher = null;
          Hits hits = null;
          
          try
          {
              searcher = new IndexSearcher(rootIndexDir.getPath());
          }
          catch (IOException e)
          {
              //logger.error("Failed to create index search using path " + 
rootDir.getPath());
              return null;
          }
          
          Analyzer analyzer = newAnalyzer();
          
          String[] searchFields = {ParsedObject.FIELDNAME_CONTENT, 
ParsedObject.FIELDNAME_DESCRIPTION, ParsedObject.FIELDNAME_FIELDS,
                             ParsedObject.FIELDNAME_KEY, 
ParsedObject.FIELDNAME_KEYWORDS, ParsedObject.FIELDNAME_LANGUAGE,
                             ParsedObject.FIELDNAME_SCORE, 
ParsedObject.FIELDNAME_TITLE, ParsedObject.FIELDNAME_TYPE,
                             ParsedObject.FIELDNAME_URL, 
ParsedObject.FIELDNAME_CLASSNAME};
                              
          Query query= null;
          try
          {
              query = MultiFieldQueryParser.parse(queryString, searchFields, analyzer);
  //          Query query = QueryParser.parse(searchString, 
ParsedObject.FIELDNAME_CONTENT, analyzer);
          }
          catch (ParseException e)
          {
              //logger.info("Failed to parse query " + query);
              return null;
          }
          
          try
          {
              hits = searcher.search(query);
          }
          catch (IOException e)
          {
             //logger.error("Error while peforming search.", e);
             return null;
          }
  
          int hitNum = hits.length();
          ArrayList results = new ArrayList(hitNum);
          for(int i=0; i<hitNum; i++)
          {
              ParsedObject result = new BaseParsedObject();
              try
              {
                    Document doc = hits.doc(i);
                
                        addFieldsToParsedObject(doc, result);
                        
                        result.setScore(hits.score(i));
                        Field type = doc.getField(ParsedObject.FIELDNAME_TYPE);
                        if(type != null)
                        {
                            result.setType(type.stringValue());
                        }
                        
                        Field key = doc.getField(ParsedObject.FIELDNAME_KEY);
                        if(key != null)
                        {
                            result.setKey(key.stringValue());
                        }
                        
                        Field description = 
doc.getField(ParsedObject.FIELDNAME_DESCRIPTION);
                        if(description != null)
                        {
                            result.setDescription(description.stringValue());
                        }
                        
                        Field title = doc.getField(ParsedObject.FIELDNAME_TITLE);
                        if(title != null)
                        {
                            result.setTitle(title.stringValue());
                        }
                        
                        Field content = doc.getField(ParsedObject.FIELDNAME_CONTENT);
                        if(content != null)
                        {
                            result.setContent(content.stringValue());
                        }
                        
                        Field language = doc.getField(ParsedObject.FIELDNAME_LANGUAGE);
                        if (language != null)
                        {
                                result.setLanguage(language.stringValue());
                        }
                        
                        Field classname = 
doc.getField(ParsedObject.FIELDNAME_CLASSNAME);
                        if (classname != null)
                        {
                                result.setClassName(classname.stringValue());
                        }
                        
                        Field url = doc.getField(ParsedObject.FIELDNAME_URL);
                        if (url != null)
                        {
                            result.setURL(new URL(url.stringValue()));
                        }
                        
                        results.add(i, result);
              }
              catch(IOException e)
              {
                  //logger
              }
          }
  
          if (searcher != null)
          {
              try
              {
                  searcher.close();
              }
              catch (IOException ioe)
              {
                  //logger.error("Closing Searcher", ioe);
              }
          }
          return results.iterator();
      }
      
      private Analyzer newAnalyzer() {
          Analyzer rval = null;
  
          if(analyzerClassName != null)
          {
                try {
                    Class analyzerClass = Class.forName(analyzerClassName);
                    rval = (Analyzer) analyzerClass.newInstance();
                } catch(InstantiationException e) {
                    //logger.error("InstantiationException", e);
                } catch(ClassNotFoundException e) {
                    //logger.error("ClassNotFoundException", e);
                } catch(IllegalAccessException e) {
                    //logger.error("IllegalAccessException", e);
                }
          }
  
          if(rval == null) {
              rval = new StandardAnalyzer();
          }
  
          return rval;
      }
  
      private void addFieldsToDocument(Document doc, Map fields, int type)
      {
          if(fields != null)
          {
              Iterator keyIter = fields.keySet().iterator();
              while(keyIter.hasNext())
              {
                  Object key = keyIter.next();
                  if(key != null)
                  {
                      Object values = fields.get(key);
                      if(values != null)
                      {
                          if(values instanceof Collection)
                          {
                              Iterator valueIter = ((Collection)values).iterator();
                              while(valueIter.hasNext())
                              {
                                  Object value = valueIter.next();
                                  if(value != null)
                                  {
                                      if(type == TEXT)
                                      {
                                          doc.add(Field.Text(key.toString(), 
value.toString()));
                                      }
                                      else
                                      {
                                          doc.add(Field.Keyword(key.toString(), 
value.toString()));
                                      }
                                  }
                              }
                          }
                          else
                          {
                              if(type == TEXT)
                              {
                                  doc.add(Field.Text(key.toString(), 
values.toString()));
                              }
                              else
                              {
                                  doc.add(Field.Keyword(key.toString(), 
values.toString()));
                              }
                          }
                      }
                  }
              } 
          }
      }
      
      private void addFieldsToParsedObject(Document doc, ParsedObject o)
      {
          try
          {
              MultiMap multiKeywords = new MultiHashMap();
              MultiMap multiFields = new MultiHashMap();
              HashMap fieldMap = new HashMap();
              
              Field classNameField = doc.getField(ParsedObject.FIELDNAME_CLASSNAME);
              if(classNameField != null)
              {
                  String className = classNameField.stringValue();
                  o.setClassName(className);
                  ObjectHandler handler = handlerFactory.getHandler(className);
                  
                  Set fields = handler.getFields();
                  addFieldsToMap(doc, fields, multiFields);
                  addFieldsToMap(doc, fields, fieldMap);
                  
                  Set keywords = handler.getKeywords();
                  addFieldsToMap(doc, keywords, multiKeywords);
              }
              
              o.setKeywordsMap(multiKeywords);
              o.setFields(multiFields);
              o.setFields(fieldMap);
          }
          catch(Exception e)
          {
              //logger.error("Error trying to add fields to parsed object.", e);
          }
      }
      
      private void addFieldsToMap(Document doc, Set fieldNames, Map fields)
      {
          Iterator fieldIter = fieldNames.iterator();
          while(fieldIter.hasNext())
          {
              String fieldName = (String)fieldIter.next();
              Field[] docFields = doc.getFields(fieldName);
              if(docFields != null)
              {
                  for(int i=0; i<docFields.length; i++)
                  {
                      Field field = docFields[i];
                      if(field != null)
                      {
                          String value = field.stringValue();
                          fields.put(fieldName, value);
                      }
                  }
              }
          }
      }
  }
  
  
  
  1.1                  
jakarta-jetspeed-2/components/search/src/java/org/apache/jetspeed/search/handlers/URLToDocHandler.java
  
  Index: URLToDocHandler.java
  ===================================================================
  /*
   * Copyright 2000-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  package org.apache.jetspeed.search.handlers;
  
  // Java imports
  import java.io.IOException;
  import java.net.URL;
  
  import org.apache.commons.httpclient.HttpClient;
  import org.apache.commons.httpclient.HttpException;
  import org.apache.commons.httpclient.methods.GetMethod;
  import org.apache.jetspeed.search.AbstractObjectHandler;
  import org.apache.jetspeed.search.BaseParsedObject;
  
  /**
   * This object handler deals with URLs.
   * 
   * @author <a href="mailto:[EMAIL PROTECTED]">Mark Orciuch</a>
   * @version $Id: URLToDocHandler.java,v 1.1 2004/09/28 20:42:19 jford Exp $
   */
  public class URLToDocHandler extends AbstractObjectHandler
  {
      /**
       * Static initialization of the logger for this class
       */    
      //private static final JetspeedLogger logger = 
JetspeedLogFactoryService.getLogger(URLToDocHandler.class.getName());
      
      /**
       * Parses a specific object into a document suitable for index placement
       * 
       * @param o
       * @return 
       */
      public org.apache.jetspeed.search.ParsedObject parseObject(Object o)
      {
          org.apache.jetspeed.search.ParsedObject result = new BaseParsedObject();
  
          if ((o instanceof URL) == false)
          {
              //logger.error("URLToDocHandler: invalid object type: " + o);
              return null;
          }
  
          URL pageToAdd = (URL) o;
  
          HttpClient client = new HttpClient();
          client.startSession(pageToAdd);
          GetMethod method = new GetMethod(pageToAdd.getPath());
          method.setFollowRedirects(true);
          int statusCode = -1;
          int attempt = 0;
  
          // We will retry up to 3 times.
          while (statusCode == -1 && attempt < 3)
          {
              try
              {
                  // execute the method.
                  client.executeMethod(method);
                  statusCode = method.getStatusCode();
                  //if (logger.isDebugEnabled())
                  {
                      //logger.debug("URL = " + pageToAdd.toString() + "Status code = 
" + statusCode);
                  }
              }
              catch (HttpException e)
              {
                  // We will retry
                  attempt++;
              }
              catch (IOException e)
              {
                  return null;
              }
          }
          // Check that we didn't run out of retries.
          if (statusCode != -1)
          {
              String content = null;
              try
              {
                  content = method.getResponseBodyAsString();
              }
              catch (Exception ioe)
              {
                  //logger.error("Getting content for " + pageToAdd.toString(), ioe);
              }
  
              if (content != null)
              {
                  try
                  {
                      result.setKey(java.net.URLEncoder.encode(pageToAdd.toString()));
                      
result.setType(org.apache.jetspeed.search.ParsedObject.OBJECT_TYPE_URL);
                      // TODO: We should extract the <title> tag here.
                      result.setTitle(pageToAdd.toString());
                      result.setContent(content);
                      result.setDescription("");
                      result.setLanguage("");
                      result.setURL(pageToAdd);
                      result.setClassName(o.getClass().getName());
                      //logger.info("Parsed '" + pageToAdd.toString() + "'");
                  }
                  catch (Exception e)
                  {
                      e.printStackTrace();
                      //logger.error("Adding document to index", e);
                  }
              }
          }
          try
          {
              client.endSession();
          }
          catch (IOException ioe)
          {
              ioe.printStackTrace();
              //logger.error("Ending session to " + pageToAdd.toString(), ioe);
          }
  
          return result;
  
      }
  }
  
  
  
  
  1.1                  
jakarta-jetspeed-2/components/search/src/java/org/apache/jetspeed/search/handlers/HandlerFactoryImpl.java
  
  Index: HandlerFactoryImpl.java
  ===================================================================
  /*
   * Copyright 2000-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.jetspeed.search.handlers;
  
  import java.util.HashMap;
  import java.util.Map;
  
  import org.apache.jetspeed.search.HandlerFactory;
  import org.apache.jetspeed.search.ObjectHandler;
  
  /**
   * Search object handler factory
   *
   * @author <a href="mailto: [EMAIL PROTECTED]">Mark Orciuch</a>
   * @author <a href="mailto: [EMAIL PROTECTED]">Jeremy Ford</a>
   * 
   * @version $Id: HandlerFactoryImpl.java,v 1.1 2004/09/28 20:42:19 jford Exp $
   */
  public class HandlerFactoryImpl implements HandlerFactory
  {
      private final Map handlerCache = new HashMap();
      private Map classNameMapping = new HashMap();
      
      public HandlerFactoryImpl(Map classNameMapping)
      {
          this.classNameMapping = classNameMapping;
      }
      
      public void addClassNameMapping(String className, String handlerClassName)
      {
          classNameMapping.put(className, handlerClassName);
      }
      
      /**
       * Returns parsed object handler for specific object
       * 
       * @param obj
       * @return 
       */
      public ObjectHandler getHandler(Object obj) throws Exception
      {
          return getHandler(obj.getClass().getName());
  
      }
      
      /**
      * Returns parsed object handler for specific object
      * 
      * @param obj
      * @return 
      */
      public ObjectHandler getHandler(String className) throws Exception
      {
          ObjectHandler handler = null;
          
          if(handlerCache.containsKey(className))
          {
              handler = (ObjectHandler)handlerCache.get(className);
          }
          else
          {
              String handlerClass = (String) classNameMapping.get(className);
      
              if (handlerClass == null)
              {
                  throw new Exception("No handler was found for document type: " + 
className);
              }
      
              ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
              
              handler = (ObjectHandler) 
classLoader.loadClass(handlerClass).newInstance();
              handlerCache.put(className, handler);
          }
          //System.out.println("HandlerFactory: returning handler " + handler + " for 
" + obj);
  
          return handler;
      }
  }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to