knguyen 2004/10/26 13:17:40 CEST
Modified files: (Branch: JAHIA-4-0-BRANCH)
src/java/org/jahia/services/search AddedField.java
src/java/org/jahia/utils/fileparsers PDFExtractor.java
Log:
- serialize file extraction with other types too.
Revision Changes Path
1.14.2.7 +54 -14 jahia/src/java/org/jahia/services/search/AddedField.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/services/search/AddedField.java.diff?r1=1.14.2.6&r2=1.14.2.7&f=h
1.3.2.6 +5 -0 jahia/src/java/org/jahia/utils/fileparsers/PDFExtractor.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/src/java/org/jahia/utils/fileparsers/PDFExtractor.java.diff?r1=1.3.2.5&r2=1.3.2.6&f=h
Index: AddedField.java
===================================================================
RCS file:
/home/cvs/repository/jahia/src/java/org/jahia/services/search/Attic/AddedField.java,v
retrieving revision 1.14.2.6
retrieving revision 1.14.2.7
diff -u -r1.14.2.6 -r1.14.2.7
--- AddedField.java 20 Oct 2004 15:08:55 -0000 1.14.2.6
+++ AddedField.java 26 Oct 2004 11:17:39 -0000 1.14.2.7
@@ -12,11 +12,8 @@
import org.jahia.services.containers.*;
import org.jahia.services.sites.*;
import org.jahia.services.webdav.*;
-import org.jahia.utils.*;
import org.jahia.utils.fileparsers.*;
-
-
-import org.springframework.web.servlet.view.document.AbstractPdfView;
+import org.jahia.utils.JahiaTools;
/**
* <p>Title: This class represents a field wrapper used by search engine</p>
@@ -198,11 +195,8 @@
InputStream ins = file.downloadFile();
String charSet = null; // by default
open as ascii
CharsetDetection charsetDet = new
CharsetDetection();
- int charsetDetection =
charsetDet.charsetDetection(ins);
- if ( charsetDetection == 0 ){
- // not ascii only
- charSet = charsetDet.getCharset();
- }
+ charsetDet.charsetDetection(ins);
+ charSet = charsetDet.getCharset();
long lastModifiedDate =
System.currentTimeMillis();
try {
lastModifiedDate =
file.getJahiaFileField()
@@ -210,11 +204,57 @@
} catch ( Throwable t ){
logger.debug(t);
}
- strVal = fileExt
- .getContentAsString(file.getPath(),
-
lastModifiedDate,
-
file.downloadFile(),
- charSet);
+
+ // try to load previously extracted
data if the file has not changed
+ String formattedPath =
JahiaTools.replacePattern(file.getPath(),"/","\\");
+ formattedPath =
JahiaTools.replacePattern(formattedPath,"\\","_");
+
+ String tmpFilePath =
ServicesRegistry.getInstance()
+
.getJahiaSearchService().getSearchIndexRootDir()
+ + File.separator +
"jahia_pdf_tmpfile_" + formattedPath;
+
+ try {
+ // Deserialize from a file
+ File f = new File(tmpFilePath);
+ if ( f.exists() &&
f.lastModified()>lastModifiedDate ){
+ ObjectInputStream in = new
+ ObjectInputStream(new
+ FileInputStream(f));
+ // Deserialize the object
+ strVal = (String) in.
+ readObject();
+ in.close();
+ logger.info(
+ "Use previous extracted pdf
tmp file " +
+ tmpFilePath);
+ }
+ } catch (ClassNotFoundException e) {
+ logger.debug(e);
+ } catch (IOException e) {
+ //logger.debug(e); file could not
exist and it's not an error
+ }
+
+ if ( strVal == null ){
+ strVal = fileExt
+ .getContentAsString(file.
+ getPath(),
+ lastModifiedDate,
+ file.downloadFile(),
+ charSet);
+
+ if ( strVal == null ){
+ strVal = "";
+ }
+ try {
+ // Serialize to a file
+ ObjectOutput out = new
ObjectOutputStream(new
+
FileOutputStream(tmpFilePath));
+ out.writeObject(strVal);
+ out.close();
+ }
+ catch (IOException e) {
+ }
+ }
}
} catch ( Throwable t ){
logger.debug(t);
Index: PDFExtractor.java
===================================================================
RCS file:
/home/cvs/repository/jahia/src/java/org/jahia/utils/fileparsers/Attic/PDFExtractor.java,v
retrieving revision 1.3.2.5
retrieving revision 1.3.2.6
diff -u -r1.3.2.5 -r1.3.2.6
--- PDFExtractor.java 25 Oct 2004 16:30:42 -0000 1.3.2.5
+++ PDFExtractor.java 26 Oct 2004 11:17:39 -0000 1.3.2.6
@@ -110,6 +110,11 @@
pdfDocument.close();
} catch ( Throwable t ){
}
+ try {
+ bufFileStream.close();
+ } catch ( Throwable t ){
+ }
+
if ( charSet != null ){
return new InputStreamReader(new ByteArrayInputStream(contents),
charSet);