Hello,
Why is document.get("contents"); null ?
Thanks,
G�nter
private static void addContent(PortalServlet servlet, Document document, InputStream
is, String documentLocation ) throws IOException {
try {
PDFParser parser = new PDFParser( is );
parser.parse();
COSDocument pdfDocument = parser.getDocument();
if( pdfDocument.isEncrypted() ) {
DecryptDocument decryptor = new DecryptDocument( pdfDocument );
/*Just try using the default password and move on */
decryptor.decryptDocument( "" );
}
/*create a tmp output stream with the size of the content.*/
ByteArrayOutputStream out = new ByteArrayOutputStream();
PDFTextStripper stripper = new PDFTextStripper();
stripper.writeText( pdfDocument, new OutputStreamWriter( out ) );
byte[] contents = out.toByteArray();
InputStreamReader input = new InputStreamReader( new ByteArrayInputStream(
contents ) );
// Add the tag-stripped contents as a Reader-valued Text field so it will
// get tokenized and indexed.
document.add(Field.Text("contents", input ));
servlet.log("documentstripper: "+stripper.getText(pdfDocument));
servlet.log("documentLocation: "+documentLocation);
servlet.log("contents: "+input+" doc: "+document.get("contents"));
servlet.log("document: "+document);
}
catch( CryptographyException e ) {
throw new IOException( "Error decrypting document(" + documentLocation +
"): " + e );
}
catch( InvalidPasswordException e ) {
throw new IOException( "Error: The document(" + documentLocation + ") is
encrypted and will not be indexed." );
}
finally {
if( is != null ) {
is.close();
}
}
}