Power Point Extractor Source Codes

Koundinya \(Sudhakar Chavali\) Sun, 01 Aug 2004 13:45:43 -0700

Hello All,

This was my first contribution 
http://wiki.apache.org/jakarta-lucene-data/attachments/PowerPoint/attachments/PPT2Text.java
 for
jakarta team. And it seems another expert(Ryan Rhodes- [EMAIL PROTECTED]) has already
started working on that based on my first given contribution.


That sounds great to me.

So In order to increase the development process for Powerpoint extractor, I just 
wanted to
contribute Our team efforts in developing the Powerpoint extractor

Authors :- Sudhakar Chavali ([EMAIL PROTECTED]) and Hari Shanker Goud
([EMAIL PROTECTED])


Have a look on the below source codes


Regards
Sudhakar

____________________________________________________________________________________

/**
 * Title: DocumentParserException class
 * Description: This is root Exceptional class for throwing the runtime errors that 
can be raised
by different parsers
 * @author Sudhakar
 * @version 1.0
 */

public class DocumentParserException
    extends Exception {

  /**
   * Constructs a new exception with null as its detail message.
   */

  public DocumentParserException() {
  }

  /**
   * Constructs a new exception with the specified detail message.
   * @param message
   */

  public DocumentParserException(String message) {
    super(message);
  }

  /**
   * Constructs a new exception with the specified detail message.
   * @param message
   * @param cause
   */
  public DocumentParserException(String message, Throwable cause) {
    super(message, cause);
  }

}
_____________________________________________________________________________________

import java.io.*;

/**
 *
 * Title: Summary Base
 * Description: A Generic one that reads the document's summary information and 
returns it through
different internal methods
 * @author Sudhakar Chavali
 * @version 1.0
 */
public interface SummaryBase {
  /**
   * A method returns the Document's Author
   * @return String
   */
  public String getDocAuthor();

  /**
   * A method that returns the Document Created Date
   * @return String
   */
  public String getDocCreatedDate();

  /**
   * A method that returns the Document's Key words
   * @return String
   */
  public String getDocKeywords();

  /**
   * A method that returns the Document's comments
   * @return String
   */
  public String getDocComments();

  /**
   * A method that returns the Document Name
   * @return String
   */
  public String getDocName();

  /**
   * A method that returns the Document's Subject
   * @return String
   */
  public String getDocSubject();

  /**
   * A method that returns the Document's title
   */

  public String getDocTitle();

  /**
   * A method that reads the document's Summary Information
   * @throws DocumentParserException
   */
  public void read() throws DocumentParserException;

  /**
   * A method that writes the Document's summary information as an XML into the file
   * @param strXMLFile
   * @throws DocumentParserException
   */
  public void write(String strXMLFile) throws 
      DocumentParserException;

  /**
   * A method that writes the document's summary information as an XML into 
OutputStream Object
   * @param out
   * @throws DocumentParserException
   */
  public void write(OutputStream out) throws 
      DocumentParserException;

  /**
   * A method that returns the Document's summary as an XML String
   * @return String
   * @throws DocumentParserException
   */
  public String getSummaryAsXML() throws 
      DocumentParserException;

  /**
   * A method that returns document's summary information as normal text
   * @return String
   * @throws DocumentParserException
   */
  public String getSummaryAsText() throws 
      DocumentParserException;
}

______________________________________________________________________________________

import java.io.*;

/**
 * A generic document that reads the document's text and parses it into normal Ascii 
text using
the different methods.
 */
public interface Document {

  /**
   * A method that returns the document's text after parsing. This method should be 
called after
calling the read method
   * @return String
   * @see #read()
   * @throws DocumentParserException
   */
  public abstract String getText() throws 
      DocumentParserException;

  /**
   * A method that returns the parsed text as byte array. This method should be called 
after
calling the read method
   * @return byte[]
   * @throws DocumentParserException
   */
  public abstract byte[] getBytes() throws 
      DocumentParserException;

  /**
   * A method that writes the parsed text into the OutputStream object. This method 
should be
called after calling the read method
   * @param out
   * @throws DocumentParserException
   */
  public abstract void write(OutputStream out) throws 
      DocumentParserException, Exception;

  /**
   * A method that reads and parses the document into Normal text
   * @throws DocumentParserException
   */
  public abstract void read() throws 
      DocumentParserException, Exception;

  /**
   * A method that returns the document summary information as Normal Text
   * @return String
   */
  public abstract String getDocumentSummaryAsText();

  /**
   * A method that returns the document summary information as an XML String
   * @return String
   */
  public abstract String getDocumentSummaryAsXML();

  /**
   * A method that returns the InputStream which contains the document summary 
information as a
Normal Text
   * @return InputStream
   * @throws DocumentParserException
   */
  public abstract InputStream getDocumentTextStream() throws 
      DocumentParserException;

  ;

  /**
   * A method that returns the InputStream which contains the document summary 
information as an
XML String
   * @return InputStream
   * @throws DocumentParserException
   */
  public abstract InputStream getDocumentSummaryStream() throws 
      DocumentParserException;

  ;

}


______________________________________________________________________________________

/**
 * <p>Title: PPTSlide</p>
 * <p>Description: A class that holds the Powerpoint slide</p>
 * @author Hari Shanker, Sudhakar Chavali
 * @version 1.0
 */

import java.util.*;
import java.io.*;

class PPTSlide {
  /**
   * Holds the Slide Number
   */
  protected long slideNumber;

  /**
   * Holds the contents of the Slide
   */
  protected Vector contents;

  /**
   * Initialise the Object for holding the contents of Power Point Slide
   * @param number
   */
  public PPTSlide(long number) {
    slideNumber = number;
    contents = new Vector();
  }

  /**
   * Add the Content of Slide to this Object
   * @param content
   */

  public void addContent(String content) {
    contents.addElement(content);
  }

  /**
   * returns the contents of slide as a vector object
   * @return Vector
   */
  public Vector getContent() {
    return contents;
  }

  /**
   * returns the slide value
   * @return long
   */

  public long getSlideNumber() {
    return slideNumber;
  }

}

_____________________________________________________________________________________





import java.io.OutputStream;
import java.io.InputStream;
import java.io.*;

/**
 * <p>Title: Parsers</p>
 * <p>Description: Class that parses the Power Point Document Content and it's summary 
to text</p>
 * @author Sudhakar Chavali
 * @version 1.0
 */

public class PPTDocument
    implements Document {
  /**
   * Checks whether the summary information was already read or not
   */

  private boolean bRead = false;

  /**
   * Holds the Powerpoint Document Summary Content
   */
  private SummaryBase summary = null;

  /**
   * Holds the Powerpoint Document Summary Content
   */
  private String docName = null;

  /**
   * Holds the Powerpoint Document   Text
   */
  private String pptText = "";

  /**
   * Constructor for setting  the PowerPoint document path for initialising the POI 
object
   * @param docName
   */
  public PPTDocument(String docName) {
    this.docName = docName;
    summary = new MSDocumentSummary(docName);

  }

  /**
   * returns the parsed Powerpoint Document text
   * @return String
   */
  public String getText() {

    return pptText;

  }

  /**
   * returns the Powerpoint Document text as bytes
   * @return byte[]
   */
  public byte[] getBytes() {
    return getText().getBytes();
  }

  /**
   * Writes the Powerpoint Document Text into OutputStream object
   * @param out
   * @throws MSPowerPointDocumentParserException
   */
  public void write(OutputStream out) throws
      MSPowerPointDocumentParserException {
    try {
      out.write(getText().getBytes());
    }
    catch (Throwable _docError) {
      throw new MSPowerPointDocumentParserException(
          "Error Raised while writing the text into OutputStream Object \nError Cause 
: " +
          _docError, _docError);
    }

  }

  /**
   * Reads the Powerpoint document for getting the text from it.
   * @throws MSPowerPointDocumentParserException
   */
  public void read() throws MSPowerPointDocumentParserException {
    PPT2Text ppt2Text = new PPT2Text(docName);
    ppt2Text.read();
    pptText = ppt2Text.getText();

    pptText = pptText.replaceAll("\r", "\n");
    pptText = pptText.replaceAll("\n", "\r\n");

  }

  /**
   * returns the document summary as tab delimited text
   * @return
   */
  public String getDocumentSummaryAsText() {
    try {
      if (!bRead) {
        summary.read(); //read the summary object
        bRead = true; //ensure that summary information was read only one time
      }
      return summary.getSummaryAsText(); //Build and return the Summary as Normal text

    }
    catch (Exception ex) {
      return "";
    }

  }

  /**
   * returns the document summary as xml
   * @return
   */
  public String getDocumentSummaryAsXML() {
    try {
      if (!bRead) {
        summary.read(); //read the summary information of a document
        bRead = true; //ensure that summary information was read only one time
      }
      return summary.getSummaryAsXML(); //build annd return the summary as XML string
    }
    catch (Exception ex) {
      return "";
    }

  }

  /**
   * returns the document text as Stream Object
   * @return InputStream
   * @throws MSPowerPointDocumentParserException
   */
  public InputStream getDocumentTextStream() throws
      MSPowerPointDocumentParserException {
    try {
      ByteArrayInputStream in = new ByteArrayInputStream(this.getBytes()); //Write the 
Document
Text in InputStream Object
      return (InputStream) in; //return InputStream Object
    }
    catch (Throwable _documentError) {
      //EXCEPTION RAISED WHILE CREATING THE InputStream OBJECT
      throw new MSPowerPointDocumentParserException(
          "Unable to return the document text as an InputStream\n\tException Root :" +
          _documentError);
    }

  }

  /**
   * returns the document summary as stream object
   * @return InputStream
   * @throws MSPowerPointDocumentParserException
   */
  public InputStream getDocumentSummaryStream() throws
      MSPowerPointDocumentParserException {
    try {
      /*
       A method that reads the Document Summary Stream in  InputStream Object
       */
      ByteArrayInputStream in = new ByteArrayInputStream(
          getDocumentSummaryAsText().getBytes()); //Write the Summary Information into 
the
InputStream Object
      return (InputStream) in; //return InputStream Object
    }
    catch (Throwable _documentError) {
      //error raised while creating the document summary info stream object
      // throw it
      throw new MSPowerPointDocumentParserException(
          "Unable to get Document Summary Information as Stream\n\tException Root:  " +
          _documentError);
    }

  }

}
________________________________________________________________________________________________



/**
 * <p>Title: PPTConstants</p>
 * <p>Description: A class that holds the Powerpoint constants</p>
 * @author Sudhakar Chavali
 * @version 1.0
 */

public interface PPTConstants {


  public static final int PPT_SLIDEPERSISTANT_ATOM = 1011;
  public static final int PPT_DIAGRAMGROUP_ATOM = 61448;
  public static final int PPT_TEXTCHAR_ATOM = 4000;
  public static final int PPT_TEXTBYTE_ATOM = 4008;
  public static final int PPT_USEREDIT_ATOM = 4085;
  public static final int PPT_MASTERSLIDE = 1024;
}

_________________________________________________________________________________________________






/**
 * <p>Title: PPTClientTextBox</p>
 * <p>Description:  A class that holds the Powrpoint Client Text box content</p>
 * @author Hari Shanker, Sudhakar Chavali
 * @version 1.0
 */

import java.util.*;
import java.io.*;

public class PPTClientTextBox {
  /**
   * Holds the current id of a client text box
   */
  protected long currentID;

  /**
   * holds the content of client text box
   */
  protected String content;

  /**
   * Instantiates the client text box object
   * @param number
   */
  public PPTClientTextBox(long number) {
    currentID = number;
    this.content = "";
  }

  /**
   * Instantiates the client text box object
   * @param number
   * @param content
   */
  public PPTClientTextBox(long number, String content) {
    currentID = number;
    this.content = content;
  }

  /**
   * Sets the content of a client text box
   * @param content
   */
  public void setContent(String content) {
    this.content = content;
  }

  /**
   * returns the content of a client text box
   * @return
   */

  public String getContent() {
    return content;
  }

  /**
   * returns the current client text box id
   * @return long
   */
  public long getID() {
    return currentID;
  }
}
_________________________________________________________________________________________________



import java.io.*;
import java.util.*;
import org.apache.poi.hpsf.*;
import org.apache.poi.poifs.eventfilesystem.*;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.hdf.extractor.*;

/**
 * <p>Title: PPT2Text</p>
 * <p>Description: A class that parses the Powerpoint document content to text </p>
 * @author Hari Shanker, Sudhakar Chavali
 * @version 1.0
 */

public class PPT2Text
    implements PPTConstants {
  /**
   *
   * <p>Title: PPTListener</p>
   * <p>Description:  Class that used to handle the Power Point Events</p>
   * @author Hari Shanker,Sudhakar Chavali
   * @version 1.0
   */

  class PPTListener
      implements POIFSReaderListener {

    public void processPOIFSReaderEvent(POIFSReaderEvent event) {

      try {

        org.apache.poi.poifs.filesystem.DocumentInputStream _documentStream = null;

        // Checking for PowerPoint Document Stream
        if (!event.getName().startsWith("PowerPoint Document")) {
          return;
        }

        _documentStream = event.getStream();

        byte pptdata[] = new byte[_documentStream.available()];
        _documentStream.read(pptdata, 0, _documentStream.available());

        int sNum = 0;

        long offset = 0, offsetEnd = 0;
        long offsetPD = 0, oldoffsetPD = 0, docRef = 0, maxPresist = 0;

        // Traverse Bytearray to get CurrentUserEditAtom

        // Call to extract the Text in all PlaceHolders

        // To hold PPTClientTextBox objects for mapping into Slide Objects
        java.util.Hashtable _containerClientTextBox = new java.util.Hashtable();

        // Traverse ByteArray to identiy edit paths of ClientTextBoxes
        for (long i = 0; i < pptdata.length - 20; i++) {
          long type = LittleEndian.getUShort(pptdata, (int) i + 2);
          long size = LittleEndian.getUInt(pptdata, (int) i + 4);

          if (type == PPT_USEREDIT_ATOM) { // Checking the Record Header (UserEditAtom)
            long lastSlideID = LittleEndian.getInt(pptdata, (int) i + 8);
            long version = LittleEndian.getUInt(pptdata, (int) i + 12);
            offset = LittleEndian.getUInt(pptdata, (int) i + 16);
            offsetPD = LittleEndian.getUInt(pptdata,
                                            (int) i + 20);

            // Call to extract ClientTextBox text in each UserEditAtom
            _containerClientTextBox = extractClientTextBoxes(
                _containerClientTextBox, offset, pptdata, offsetPD);

          }
        }

        Vector slides = extractPlaceHoders(offset, pptdata, offsetPD);

        if (slides.size() == 0) {
          slides.addElement(new PPTSlide(256));
        }

        PPTSlide _slide = (PPTSlide) slides.get(slides.size() - 1);

        for (Enumeration enum = _containerClientTextBox.elements();
             enum.hasMoreElements(); ) {
          PPTClientTextBox _clientTextBox = (PPTClientTextBox) enum.nextElement();
          _slide.addContent(_clientTextBox.getContent());

        }

        //Merging ClientTextBox data with Slide Data

        // Printing the text from Slides vector object (need further modification)

        for (int i = 0; i < slides.size(); i++) {

          _slide = (PPTSlide) slides.get(i);

          Vector scontent = _slide.getContent();
//          StringBuffer _stringbuffer = new StringBuffer();
          for (int j = 0; j < scontent.size(); j++) {
            pptTextBuffer.append(scontent.get(j).toString());
          }

        }

      }
      catch (Throwable ex) {

        return;
      }
    }
  }

  /**
   * Method that returns the client text boxes of a slide
   * @param clientTextBoxContainer
   * @param offset
   * @param pptBytes
   * @param offsetPD
   * @return Hashtable
   * @throws MSPowerPointDocumentParserException
   */

  public java.util.Hashtable extractClientTextBoxes(java.util.Hashtable
      _containerClientTextBox, long offset, byte[] pptdata, long offsetPD) throws
      Throwable {

    //To hold temparary data
    ByteArrayOutputStream _outStream = new ByteArrayOutputStream();

    PPTClientTextBox _clientTextBox = null;

    // Traversing the bytearray upto Presist directory position
    for (long i = offset; i < offsetPD - 20; i++) {

      try {

        long rinfo = LittleEndian.getUShort(pptdata, (int) i);
        long type = LittleEndian.getUShort(pptdata, (int) i + 2); // Record Type
        long size = LittleEndian.getUInt(pptdata, (int) i + 4); // Record Size

        if (type == PPT_DIAGRAMGROUP_ATOM) { //Record type is of Drawing Group

          long shapeCount = LittleEndian.getUInt(pptdata, (int) i + 8); // Total 
number of objects
          long _currentID = LittleEndian.getInt(pptdata, (int) i + 12); // Group 
ID+number of
objects

          _currentID = ( (int) (_currentID / 1024)) * 1024;

          if (_currentID == PPT_MASTERSLIDE) { // Ignore Master Slide objects
            i++;
            continue;
          }

          //Check for the ClientTextBox GroupID existence
          if (!_containerClientTextBox.containsKey(new Long(_currentID))) {
            _clientTextBox = new PPTClientTextBox(_currentID);
            _containerClientTextBox.put(new Long(_currentID), _clientTextBox);
          }
          else {
            // If exists get Client Textbox Group
            _clientTextBox = (PPTClientTextBox) _containerClientTextBox.get(new
                Long(_currentID));
            _clientTextBox.setContent("");
          }

          // Iterating the bytearray for TextCharAtoms and TextBytesAtom
          for (long j = i + 8; j < offsetPD - 20; j++) {
            try {
              long nrinfo = LittleEndian.getUShort(pptdata, (int) j);
              long ntype = LittleEndian.getUShort(pptdata, (int) j + 2); //Record Type
              long nsize = LittleEndian.getUInt(pptdata, (int) j + 4); // Record size

              if (ntype == PPT_DIAGRAMGROUP_ATOM) { // Break the loop if next GroupID 
found
                i = j - 1;
                break;
              }
              else if (ntype == PPT_TEXTBYTE_ATOM) { //TextByteAtom record

                _outStream = new ByteArrayOutputStream();
                long ii = 0;
                for (ii = j + 6; ii <= j + 6 + nsize; ii++) { // For loop to changed 
to a function
                  short ch = Utils.convertBytesToShort(pptdata, (int) ii + 2);
                  if (ch == 0 || ch == 16 || ch == 13 || ch == 10) {
                    _outStream.write( (byte) '\r');

                  }
                  else if (ch == 0x201c) { // for left double quote
                    _outStream.write( (byte) 147);
                  }
                  else if (ch == 0x201d) { // for right double quote
                    _outStream.write( (byte) 148);

                  }
                  else if (ch == 0x2019) { // for right single quote
                    _outStream.write( (byte) 146);
                  }
                  else if (ch == 0x2018) { // for left single quote
                    _outStream.write( (byte) 145);
                  }
                  else if (ch == 0x2013) { // for '-' character
                    _outStream.write( (byte) 150);
                  }
                  else {
                    _outStream.write( (byte) ch);
                  }
                }

                // Setting the identified text for Current groupID
                _clientTextBox.setContent(_clientTextBox.getContent() +
                                          new String(_outStream.toByteArray()));

              }
              else if (ntype == PPT_TEXTCHAR_ATOM) { // TextCharAtom record
                _outStream = new ByteArrayOutputStream();
                String strTempContent = new String(pptdata, (int) j + 6,
                    (int) (nsize) + 2);
                byte bytes[] = strTempContent.getBytes();

                for (int ii = 0; ii < bytes.length - 1; ii += 2) { // For loop to 
changed to a
function
                  short ch = Utils.convertBytesToShort(bytes, ii);
                  if (ch == 0 || ch == 16 || ch == 13 || ch == 10) {
                    _outStream.write( (byte) '\r');

                  }

                  else if (ch == 0x201c) {
                    _outStream.write( (byte) 147);

                  }
                  else if (ch == 0x201d) {
                    _outStream.write( (byte) 148);

                  }
                  else if (ch == 0x2019) {
                    _outStream.write( (byte) 146);
                  }
                  else if (ch == 0x2018) {
                    _outStream.write( (byte) 145);
                  }
                  else if (ch == 0x2013) { // for - character
                    _outStream.write( (byte) 150);
                  }

                  else {
                    _outStream.write( (byte) ch);
                  }
                }

                // Setting the identified text for Current groupID
                _clientTextBox.setContent(_clientTextBox.getContent() +
                                          new String(_outStream.toByteArray()));
              }
            }
            catch (Throwable e) {
              break;
            }
          }
        }
      }
      catch (Throwable ee) {
        return _containerClientTextBox;
      }
    }
    return _containerClientTextBox;
  }

  /**
   * Method that returns the Powerpoint place holders
   * @param offset
   * @param pptBytes
   * @param offsetPD
   * @return Vector
   * @throws MSPowerPointDocumentParserException
   */

  public Vector extractPlaceHoders(long offset, byte[] pptdata, long offsetPD) throws
      Throwable {

    int sNum = 0;

    Vector slides = new Vector(); // To All Slides data

    PPTSlide currentSlide = null; // Object to hold current slide data

    ByteArrayOutputStream _outStream = new ByteArrayOutputStream(); // To store data 
found in
TextCharAtoms and TextBytesAtoms

    for (long i = offset; i < pptdata.length - 20; i++) {
      try {
        long rinfo = LittleEndian.getUShort(pptdata, (int) i);
        long type = LittleEndian.getUShort(pptdata, (int) i + 2);
        long size = LittleEndian.getUInt(pptdata, (int) i + 4);

        if (type == PPT_TEXTBYTE_ATOM) { //TextByteAtom record

          _outStream = new ByteArrayOutputStream();
          long ii = 0;
          for (ii = i + 6; ii <= i + 6 + size; ii++) {
            short ch = Utils.convertBytesToShort(pptdata, (int) ii + 2);

            if (ch == 0 || ch == 16 || ch == 13 || ch == 10) {
              _outStream.write( (byte) '\r');

            }
            else if (ch == 0x201c) { // for left double quote
              _outStream.write( (byte) 147);
            }
            else if (ch == 0x201d) { // for right double quote
              _outStream.write( (byte) 148);
            }
            else if (ch == 0x2019) { // for right single quote
              _outStream.write( (byte) 146);
            }
            else if (ch == 0x2018) { // for left single quote
              _outStream.write( (byte) 145);
            }
            else if (ch == 0x2013) { // for '-' character
              _outStream.write( (byte) 150);
            }
            else {
              _outStream.write(ch);
            }
          }

          // Setting the identified text for Current Slide
          currentSlide.addContent(_outStream.toString());

        }
        else if (type == PPT_TEXTCHAR_ATOM) { //TextCharAtom record
          _outStream = new ByteArrayOutputStream();
          String strTempContent = new String(pptdata, (int) i + 6,
                                             (int) (size) + 2);
          byte bytes[] = strTempContent.getBytes();

          for (int ii = 0; ii < bytes.length - 1; ii += 2) {
            short ch = Utils.convertBytesToShort(bytes, ii);

            if (ch == 0 || ch == 16 || ch == 13 || ch == 10) {
              _outStream.write( (byte) '\r');
            }
            else if (ch == 0x201c) {
              _outStream.write( (byte) 147);
            }
            else if (ch == 0x201d) {
              _outStream.write( (byte) 148);
            }
            else if (ch == 0x2019) {
              _outStream.write( (byte) 146);
            }
            else if (ch == 0x2018) {
              _outStream.write( (byte) 145);
            }
            else if (ch == 0x2013) { // for - character
              _outStream.write( (byte) 150);
            }

            else {
              _outStream.write( (byte) ch);
            }
          }

          // Setting the identified text for Current Slide
          currentSlide.addContent(_outStream.toString());
        }
        else if (type == PPT_SLIDEPERSISTANT_ATOM) { // SlidePresistAtom Record type
          if (sNum != 0) {
            _outStream = new ByteArrayOutputStream();

            long slideID = LittleEndian.getUInt(pptdata,
                                                (int) i + 20);

            currentSlide = new PPTSlide(slideID);
            //currentSlide.addContent(_outStream.toString());
            slides.addElement(currentSlide);
          }
          sNum++;
        }
        else
        if (type == PPT_DIAGRAMGROUP_ATOM) { //DG
          break;
        }
      }
      catch (Throwable ee) {

      }

      /*******************************************************************/

    }

    return slides;
  }

  /**
   * Constructor that takes a Powerpoint document name as an argument for getting the 
text
   * @param fileName
   */
  public PPT2Text(String fileName) {
    this.docName = fileName;
  }

  /**
   * Method that reads the Powerpoint document for parsing the text
   * @throws MSPowerPointDocumentParserException
   */
  public void read() throws MSPowerPointDocumentParserException {
    try {
      POIFSReader reader = new POIFSReader();
      reader.registerListener(new PPTListener());
      reader.read(new FileInputStream(docName));
    }
    catch (Throwable _docError) {
      throw new MSPowerPointDocumentParserException(
          "Unable to read the PPT Document \nError Cause : " + _docError,
          _docError);
    }

  }

  /**
   * returns the PowerPoint text
   * @return String
   */
  public String getText() {

    return pptTextBuffer.toString();
  }

  /**
   * Holds the Powerpoint document name
   */
  private String docName;

  /**
   * Holds the parsed Powerpoint Text
   */
  private StringBuffer pptTextBuffer = new StringBuffer();

}
_________________________________________________________________________________________________







/**
 * Title: MSPowerPointDocumentParserException
 * Description: This is An Exception class and used for catching the runtime 
exceptions for the
Powerpoint Document class
 * @author Sudhakar Chavali
 * @see microsoft.powerpoint.PowerPointDocument
 * @version 1.0
 */

public class MSPowerPointDocumentParserException
    extends DocumentParserException {

  /**
   * A constructor that builds the Exception object
   * @param message
   */
  public MSPowerPointDocumentParserException(String message) {
    super(message);
  }

  /**
   * A constructor that builds the Exception object
   * @param message
   * @param cause
   */

  public MSPowerPointDocumentParserException(String message, Throwable cause) {
    super(message, cause);
  }

}





=====
"No one can earn a million dollars honestly."- William Jennings Bryan (1860-1925) 

"Make everything as simple as possible, but not simpler."- Albert Einstein (1879-1955)

"It is dangerous to be sincere unless you are also stupid."- George Bernard Shaw 
(1856-1950)


        
                
__________________________________
Do you Yahoo!?
New and Improved Yahoo! Mail - 100MB free storage!
http://promotions.yahoo.com/new_mail 

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Power Point Extractor Source Codes

Reply via email to