knguyen     2004/10/26 13:17:40 CEST

  Modified files:        (Branch: JAHIA-4-0-BRANCH)
    src/java/org/jahia/services/search AddedField.java 
    src/java/org/jahia/utils/fileparsers PDFExtractor.java 
  Log:
  - serialize file extraction with other types too.
  
  Revision  Changes    Path
  1.14.2.7  +54 -14    jahia/src/java/org/jahia/services/search/AddedField.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/services/search/AddedField.java.diff?r1=1.14.2.6&r2=1.14.2.7&f=h
  1.3.2.6   +5 -0      jahia/src/java/org/jahia/utils/fileparsers/PDFExtractor.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/utils/fileparsers/PDFExtractor.java.diff?r1=1.3.2.5&r2=1.3.2.6&f=h
  
  
  
  Index: AddedField.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/src/java/org/jahia/services/search/Attic/AddedField.java,v
  retrieving revision 1.14.2.6
  retrieving revision 1.14.2.7
  diff -u -r1.14.2.6 -r1.14.2.7
  --- AddedField.java   20 Oct 2004 15:08:55 -0000      1.14.2.6
  +++ AddedField.java   26 Oct 2004 11:17:39 -0000      1.14.2.7
  @@ -12,11 +12,8 @@
   import org.jahia.services.containers.*;
   import org.jahia.services.sites.*;
   import org.jahia.services.webdav.*;
  -import org.jahia.utils.*;
   import org.jahia.utils.fileparsers.*;
  -
  -
  -import org.springframework.web.servlet.view.document.AbstractPdfView;
  +import org.jahia.utils.JahiaTools;
   
   /**
    * <p>Title: This class represents a field wrapper used by search engine</p>
  @@ -198,11 +195,8 @@
                                               InputStream ins = file.downloadFile();
                                               String charSet = null; // by default 
open as ascii
                                               CharsetDetection charsetDet = new 
CharsetDetection();
  -                                            int charsetDetection = 
charsetDet.charsetDetection(ins);
  -                                            if ( charsetDetection == 0 ){
  -                                                // not ascii only
  -                                                charSet = charsetDet.getCharset();
  -                                            }
  +                                            charsetDet.charsetDetection(ins);
  +                                            charSet = charsetDet.getCharset();
                                               long lastModifiedDate = 
System.currentTimeMillis();
                                               try {
                                                   lastModifiedDate = 
file.getJahiaFileField()
  @@ -210,11 +204,57 @@
                                               } catch ( Throwable t ){
                                                   logger.debug(t);
                                               }
  -                                            strVal = fileExt
  -                                                .getContentAsString(file.getPath(),
  -                                                                    
lastModifiedDate,
  -                                                                    
file.downloadFile(),
  -                                                                    charSet);
  +
  +                                            // try to load previously extracted 
data if the file has not changed
  +                                            String formattedPath = 
JahiaTools.replacePattern(file.getPath(),"/","\\");
  +                                            formattedPath = 
JahiaTools.replacePattern(formattedPath,"\\","_");
  +
  +                                            String tmpFilePath = 
ServicesRegistry.getInstance()
  +                                                
.getJahiaSearchService().getSearchIndexRootDir()
  +                                                + File.separator + 
"jahia_pdf_tmpfile_" + formattedPath;
  +
  +                                            try {
  +                                                // Deserialize from a file
  +                                                File f = new File(tmpFilePath);
  +                                                if ( f.exists() && 
f.lastModified()>lastModifiedDate ){
  +                                                    ObjectInputStream in = new
  +                                                        ObjectInputStream(new
  +                                                        FileInputStream(f));
  +                                                    // Deserialize the object
  +                                                    strVal = (String) in.
  +                                                        readObject();
  +                                                    in.close();
  +                                                    logger.info(
  +                                                        "Use previous extracted pdf 
tmp file " +
  +                                                        tmpFilePath);
  +                                                }
  +                                            } catch (ClassNotFoundException e) {
  +                                                logger.debug(e);
  +                                            } catch (IOException e) {
  +                                                //logger.debug(e); file could not 
exist and it's not an error
  +                                            }
  +
  +                                            if ( strVal == null ){
  +                                                strVal = fileExt
  +                                                    .getContentAsString(file.
  +                                                    getPath(),
  +                                                    lastModifiedDate,
  +                                                    file.downloadFile(),
  +                                                    charSet);
  +
  +                                                if ( strVal == null ){
  +                                                    strVal = "";
  +                                                }
  +                                                try {
  +                                                    // Serialize to a file
  +                                                    ObjectOutput out = new 
ObjectOutputStream(new
  +                                                        
FileOutputStream(tmpFilePath));
  +                                                    out.writeObject(strVal);
  +                                                    out.close();
  +                                                }
  +                                                catch (IOException e) {
  +                                                }
  +                                            }
                                           }
                                       } catch ( Throwable t ){
                                           logger.debug(t);
  
  
  
  Index: PDFExtractor.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/src/java/org/jahia/utils/fileparsers/Attic/PDFExtractor.java,v
  retrieving revision 1.3.2.5
  retrieving revision 1.3.2.6
  diff -u -r1.3.2.5 -r1.3.2.6
  --- PDFExtractor.java 25 Oct 2004 16:30:42 -0000      1.3.2.5
  +++ PDFExtractor.java 26 Oct 2004 11:17:39 -0000      1.3.2.6
  @@ -110,6 +110,11 @@
                   pdfDocument.close();
               } catch ( Throwable t ){
               }
  +            try {
  +                bufFileStream.close();
  +            } catch ( Throwable t ){
  +            }
  +
               if ( charSet != null ){
                   return new InputStreamReader(new ByteArrayInputStream(contents),
                                                charSet);
  

Reply via email to