NATARAJAN THILLAI wrote:
Hi Sergiu,
I am Natarajan from India and now I was working search engine project. I saw u r article in the net (http://article.gmane.org/gmane.comp.jakarta.poi.user/4851). It's very nice and useful to me.


I want to Indexing exe file so pls send me your "com.configworks.cwk.share.Utils" file.

Advance Thanks.

Regards
Natarajan.


------------------------------------------------------------------------

Hi,

I'm glad to find that the code I submitted (I cannot claim is mine) is usefull for other programmers.
I can sent you the utils class, no problem. But we are not indexing exe files. the ExeConverterImpl is an external converter that converts different
file formats to text in a batch mode. For example antiword is such an converter.
Also the ppt converter I submitted throws an OutOfMemoryError. I'll send the code with the bugfix.


  I wish you luck in your work, and here is the classes:

 Sergiu

/******* Util class *******/
package com.configworks.cwk.share;

import com.configworks.cwk.be.system.CwkConfigurationFactory;
import com.configworks.cwk.be.system.Debug;
import com.configworks.cwk.be.system.ICwkConfiguration;
import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.turbine.services.intake.model.Field;


/**
* KM-Portal Utilities Alle m�glichen Methoden, die das programmieren mit dem KM-Portal erleichtern
* sollen.
*
* @author Christine Keim
* @version 1
*/
public class Utils {


private static final Log logger = LogFactory.getLog(Utils.class.getName());

   public static final String DATE_PATTERN = "dd.MM.yyyy";
   public static final String DATE_TIME_PATTERN = "dd.MM.yyyy HH:mm:ss";

public static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat(DATE_PATTERN);
public final static SimpleDateFormat DATE_TIME_FORMAT = new SimpleDateFormat(DATE_TIME_PATTERN);


private static final int MAX_FILENAME_LENGTH = Integer.parseInt(CwkConfigurationFactory.getConfigurationInstance(
CwkConfigurationFactory.PROP_FILE_CONFIGURATION).getProperty(ICwkConfiguration.FILENAMES_LENGTH_MAX));


private static String[] typeimages = {"pdf.gif", "audio.gif", "video.gif", "image.gif",
"office.gif", "data.gif", "archive.gif", "link.gif",
"unknown.gif", "word.gif", "excel.gif", "powerpoint.gif"};


   private static String unknownTypeimage = "unknown.gif";

   public String getDatePattern() {
       return DATE_PATTERN;
   }

/**
* Checks wether the given String is ull or empty, contains nothing or only blanks...
*
* @param v String to check
* @return true if string is empty, else false
*/
public static boolean isEmpty(String v) {
return (v == null || "".equals(v.trim()));
}


   /**
    * @param current DOCUMENT ME!
    * @return DOCUMENT ME!
    * @see com.configworks.cwk.fe.tools.CwkToolkit#getRatingImage(float)
    * @deprecated Use cwktoolkit.getRatingImage instead
    */
   public static String getRatingImage(float current) {
       if (current < 0) {
           return "unrated.gif";
       }

       if (current == 0) {
           return "stars-0-0.gif";
       }

       if (current <= 1) {
           return "stars-0-5.gif";
       }

       if (current <= 2) {
           return "stars-1-0.gif";
       }

       if (current <= 3) {
           return "stars-1-5.gif";
       }

       if (current <= 4) {
           return "stars-2-0.gif";
       }

       if (current <= 5) {
           return "stars-2-5.gif";
       }

       if (current <= 6) {
           return "stars-3-0.gif";
       }

       if (current <= 7) {
           return "stars-3-5.gif";
       }

       if (current <= 8) {
           return "stars-4-0.gif";
       }

       if (current <= 9) {
           return "stars-4-5.gif";
       } else {
           return "stars-5-0.gif";
       }
   }

   /**
    * @param type DOCUMENT ME!
    * @return DOCUMENT ME!
    * @see com.configworks.cwk.fe.tools.CwkToolkit#getTypeImage(int)
    * @deprecated use cwktoolkit.getTypeImage instead
    */
   public static String getTypeImage(int type) {
       if ((type >= 0) && (type < typeimages.length)) {
           return typeimages[type];
       } else {
           return unknownTypeimage;
       }
   }

/**
* DOCUMENT ME!
*
* @param quality DOCUMENT ME!
* @param maxvalue DOCUMENT ME!
* @return DOCUMENT ME!
*/
public static String calculatePercentage(float quality, float maxvalue) {
float result = (quality * 100) / maxvalue;


       if (result < 0) {
           result = 0;
       }

       return "" + Math.round(result);
   }

/**
* DOCUMENT ME!
*
* @param s DOCUMENT ME!
* @return DOCUMENT ME!
*/
public static String capitalize(String s) {
String ret = s.substring(0, 1).toUpperCase() + s.substring(1, s.length());


       return ret;
   }

   /**
    * DOCUMENT ME!
    *
    * @param fn DOCUMENT ME!
    * @return DOCUMENT ME!
    */
   public static String cutFileName(String fn) {
       String separator = File.separator;
       String cutfn = null;

       if (fn.startsWith("/")) {
           separator = "/";
       } else {
           separator = "\\";
       }

       if (fn.lastIndexOf(separator) >= 0) {
           cutfn = fn.substring(fn.lastIndexOf(separator) + 1);
       } else {
           cutfn = fn;
       }
       return cutfn;
   }

/**
* Cats the Classname from a packagePath+className string
* com.configworks.cwk.share.om.Tutorial becomes Tutorial
* @param fn String to cut ClassName from
*
* @return className
*/
public static String cutClassName(String fn) {
String separator = ".";
String cutfn = null;
if (fn.lastIndexOf(separator) >= 0) {
cutfn = fn.substring(fn.lastIndexOf(separator) + 1);
} else {
cutfn = fn;
}
return cutfn;
}
/**
* @param date DOCUMENT ME!
* @return DOCUMENT ME!
* @see com.configworks.cwk.fe.tools.CwkToolkit#formatDate(Date, Locale)
* @deprecated use cwktoolkit.formatDate instead
*/
public static String dateToString(Date date) {
if (date != null) {
return DATE_FORMAT.format(date);
} else {
return null;
}
}


/**
* @param date DOCUMENT ME!
* @param format DOCUMENT ME!
* @return DOCUMENT ME!
* @deprecated use cwktoolkit.formatDate or cwktoolkit.formatDateTime instead
* DOCUMENT ME!
*/
public static String dateToString(Date date, String format) {
if (date != null) {
SimpleDateFormat df = new SimpleDateFormat(format);


           return df.format(date);
       } else {
           return null;
       }
   }

/**
* @param date DOCUMENT ME!
* @return DOCUMENT ME!
* @deprecated use cwktoolkit.formatDate or cwktoolkit.formatDateTime instead
* <p/>
* DOCUMENT ME!
*/
public static String dateToString(Field date) {
if ((date != null) && (date.getValue() != null)) {
return DATE_FORMAT.format(date.getValue());
} else {
return null;
}
}


   /**
    * converts an Java conform name to  a torque underscore name
    *
    * @param javaname
    * @return
    */
   public static String jToU(String javaname) {
       char[] chars = javaname.toCharArray();
       StringBuffer underscore = new StringBuffer();
       underscore.append(Character.toLowerCase(chars[0]));

       for (int i = 1; i < chars.length; i++) {
           if (Character.isUpperCase(chars[i])) {
               underscore.append("_");
               underscore.append(Character.toLowerCase(chars[i]));
           } else {
               underscore.append(chars[i]);
           }
       }

       Debug.println(javaname + " =>" + underscore);

       return underscore.toString();
   }

   /**
    * DOCUMENT ME!
    *
    * @param in DOCUMENT ME!
    * @return DOCUMENT ME!
    */
   public static String nl2br(String in) {
       return in.replaceAll("\n", "<br>");
   }

   /**
    * @param in DOCUMENT ME!
    * @return DOCUMENT ME!
    * @see com.configworks.cwk.fe.tools.CwkToolkit#htmlEncode(String)
    * @deprecated use cwktoolkit.htmlencode instead
    */
   public static String out(String in) {

       return nl2br(in);
   }

   /**
    * replaces a torque name with underscores with an Java conform name
    *
    * @param uname
    * @return
    */
   public static String uToJ(String uname) {
       char[] chars = uname.toCharArray();
       StringBuffer java = new StringBuffer();

       for (int i = 0; i < chars.length; i++) {
           if (chars[i] == '_') {
               i++;
               java.append(Character.toUpperCase(chars[i]));
           } else {
               java.append(chars[i]);
           }
       }

       Debug.println(uname + " =>" + java);

       return java.toString();
   }

/**
* this method is used to execute an OS COmmand
*
* @param execPath the execution path (path to executable file)
* @param sourcePath the source path (path to imput file)
* @param destinationPath the destination path (path to output file)
* @param params aditional parameters (if null or "" this parameter is ignored)
* @return a refference of the created proccess
* @throws IOException
*/
public static Process executeOSCommand(String execPath, String sourcePath,
String destinationPath, String params)
throws IOException {
final String source = "<source>";
final String destination = "<destination>";


       Runtime runtime = Runtime.getRuntime();

       if (execPath == null) {
           if (logger.isErrorEnabled())
               logger.error("Execution command is not specified!");
           return null;
       }

       int sourceIndex = execPath.indexOf(source);
       int destinationIndex = execPath.indexOf(destination);

if ((sourceIndex >= 0) && (destinationIndex >= 0)) {
String execCommand = execPath.substring(0, sourceIndex) + sourcePath + execPath.substring(
sourceIndex + source.length(), destinationIndex) + destinationPath + execPath.substring(
destinationIndex + destination.length());


if (Utils.notEmptyString(params))
execCommand += " " + params;
//for windows change all / in the path to \ otherwise the command cannot be executed
if (File.separator.equals("\\"))
execCommand = execCommand.replace('/', '\\');


           System.out.println(execCommand);
           if (logger.isTraceEnabled())
               logger.trace("Executing command: " + execCommand);
           return runtime.exec(execCommand);
       }
       return null;
   }

   /**
    * !isEmptyString
    *
    * @param s
    * @return
    * @see #isEmpty(String)
    * @since CWK 1.4.0
    */
   public static boolean notEmptyString(String s) {
       return !isEmpty(s);
   }

   /**
    * @param s
    * @return
    * @see
    * @see #isEmpty(String)
    * @since CWK 1.4.0
    * @deprecated use isempty instead
    *             True if s==null or ""
    */
   public static boolean isEmptyString(String s) {
       return (s == null || s.equals(""));
   }

}

/****************************** ppt converter implementation ***************/

/* @(#) CWK 1.5 23.06.2004
*
* Copyright 2003-2005 ConfigWorks Informationssysteme & Consulting GmbH
* Universit�tsstr. 94/7 9020 Klagenfurt Austria
* www.configworks.com
* All rights reserved.
*/

package com.configworks.cwk.be.search.converters;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
import org.apache.poi.util.LittleEndian;

/**
* Class description
*
* @author sergiu
* @version 1.0
* @since CWK 1.5
*/
public class PPTConverterImpl extends Converter {

static final String lineSeparator = System.getProperty("line.separator");
private BufferedOutputStream txtFileWriter = null;
File dest = null;
/* (non-Javadoc)
* @see com.configworks.cwk.be.search.converters.Converter#convertSource(java.io.File)
*/
public Reader convertSource(File source) {
if (source == null)
return null;
Reader reader = null;
InputStream inputStream = null;
try {
String filename = source.getName();
filename = filename.replace('.', '_');
filename += ".txt";
File tmpDir = new File(_config.getTempDirectory());
tmpDir.mkdirs();
dest = new File(tmpDir.getPath(), filename);
boolean created = dest.createNewFile(); //create the input and output streams
txtFileWriter = new BufferedOutputStream(
new FileOutputStream(dest));
inputStream = new FileInputStream(source);
extractText(inputStream);
if (!dest.exists())
return null;
dest.deleteOnExit();
reader = new BufferedReader(new FileReader(dest));


} catch (Exception e) {
getLogger().error("JavaDocumentConverter cannot convert the source file: "
+ source.getAbsolutePath(), e);
reader = null;
}finally{
try {
if(txtFileWriter != null)
txtFileWriter.close();
if(inputStream != null)
inputStream.close();
} catch (IOException ex) {
if(getLogger().isDebugEnabled())
getLogger().error("Cannot close the stream: " + ex);
}
}
return reader;


}
/**
* Extract the text from a number of presentations.
*/
public boolean extractText(InputStream reader) throws IOException{
if(txtFileWriter == null)
throw new IOException("Writer Not Iititalized!");
POIFSReader r = new POIFSReader();


/* Register a listener for *all* documents. */
PptDocReaderListener listener = new PptDocReaderListener(txtFileWriter);
r.registerListener(listener, "PowerPoint Document");
r.read(reader);
//if no exception was trown, consider that the conversion was successful return true;
} class PptDocReaderListener implements POIFSReaderListener{
private BufferedOutputStream writer = null;
public PptDocReaderListener(){
}
public PptDocReaderListener(BufferedOutputStream writer){
this.writer = writer;
}


public void processPOIFSReaderEvent(POIFSReaderEvent event) {
try{
org.apache.poi.poifs.filesystem.DocumentInputStream dis = null;
if(!event.getName().equalsIgnoreCase("PowerPoint Document"))
return;
dis=event.getStream();
byte btoWrite[]= new byte[12];
dis.read(btoWrite);
btoWrite = new byte[dis.available()];
dis.read(btoWrite, 0, dis.available());


long type = 0;
long size = 0;
int offset = 0;
int length = 0;
for(int i=0; i<btoWrite.length-20; i++){


type=LittleEndian.getUShort(btoWrite,i+2);
size=LittleEndian.getUInt(btoWrite,i+4);
if (type==4008){
length = (int)size+3;
offset = i+4+1;
writer.write(btoWrite, offset, length);
//skip the bytes that were already read i = Math.max(i, (offset + length));
}
}
PropertySetFactory.create(event.getStream());
}catch (Exception e){
String msg = "Cannot index ppt file: \n";
if(getLogger().isErrorEnabled())
getLogger().error(msg + e);
} } }


/* (non-Javadoc)
* @see com.configworks.cwk.be.search.converters.JavaDocumentConverter#initWriter(java.io.File)
*/
public void initWriter(File dest) throws IOException {
if (txtFileWriter == null)
txtFileWriter = new BufferedOutputStream(new FileOutputStream(dest));
}


/* (non-Javadoc)
* @see com.configworks.cwk.be.search.converters.JavaDocumentConverter#closeWriter()
*/
public void closeWriter() throws IOException {
if(txtFileWriter != null)
txtFileWriter.close();
else
throw new IOException("Cannot close the writer, the object is Null!");
}
}






Do you Yahoo!?
Take Yahoo! Mail with you! <http://us.rd.yahoo.com/mail_us/taglines/mobile/*http://mobile.yahoo.com/maildemo> Get it on your mobile phone.




---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]



Reply via email to