As far as PDF 1.7 (ISO 32000-1) is concerned, that will handle both of the
defined locations for embedded files – though not necessary other places that
embedded filestreams can be used.
For PDF 2.0 (ISO 32000-2), there are many more potential places that embedded
file streams can exist.
Leonard
From: "F. E." <exler7...@gmail.com>
Date: Sunday, June 18, 2017 at 12:51 PM
To: "podofo-users@lists.sourceforge.net" <podofo-users@lists.sourceforge.net>
Subject: [Podofo-users] Removing Embedded Attachments and File Annotations
Hello Podofo Users,
I'm using Podofo for removing Embedded Files and File Annotation. My code works
fine so far, no files can be seen using a PDF Viewer, and I even managed to
remove the dependent filestreams (otherwise files are still "hidden" inside the
pdf and take space).
What I'm not sure about is, whether my removal code is complete (removing all
data) and safe (e.g. no double frees). So I'd like to commit my code here and
ask whether you think the code is correct.
On removing Embedded Files:
There's code somewhere on how to extract embedded files. I refactored this code
and added Clear instructions whenever I thought them necessary for the task.
// Check if given ptr is a reference, if so, resolve ptr reference and
store previous ptr to ref_ptr
template <typename T>
void CheckReference( const PdfMemDocument* doc, T* &ptr, T* &ref_ptr )
{
if ( IS_NULL( ptr ) || !ptr->IsReference() ) { return; }
ref_ptr = ptr;
ptr = doc->GetObjects().GetObject( ref_ptr->GetReference() );
}
template <typename T>
void Clear( T* ptr )
{
if ( ptr ) { ptr->Clear(); }
}
void RemoveEmbeddedFileData( PdfObject *pObj ) const
{
PdfObject *pRefObj = NULL;
CheckReference( m_pDocument, pObj, pRefObj );
if ( !IS_NULL( pObj ) && pObj->GetDataType() == ePdfDataType_Dictionary
)
{
PdfDictionary &outerDict = pObj->GetDictionary();
if ( outerDict.HasKey("EF") )
{
PdfDictionary &innerDict =
outerDict.GetKey("EF")->GetDictionary();
if ( innerDict.HasKey("F") )
{
PdfObject *pStreamObj = innerDict.GetKey("F"),
*pStreamRefObj = NULL;
CheckReference( m_pDocument, pStreamObj, pStreamRefObj );
Clear( pStreamObj );
Clear( pStreamRefObj );
}
innerDict.Clear();
}
outerDict.Clear();
}
Clear( pObj );
Clear( pRefObj );
}
void RemoveEmbeddedFiles()
{
PdfObject *pEmbFilesObj = NULL;
{
const PdfNamesTree *pNamesTree = m_pDocument->GetNamesTree( true );
RETURN_ON_NULL( pNamesTree );
const PdfObject *pNamesTreeObj = pNamesTree->GetObject();
RETURN_ON_NULL( pNamesTreeObj );
pEmbFilesObj = pNamesTreeObj->GetIndirectKey("EmbeddedFiles");
RETURN_ON_NULL( pEmbFilesObj );
}
PdfObject *pEmbFilesRefObj = NULL;
CheckReference( m_pDocument, pEmbFilesObj, pEmbFilesRefObj );
RETURN_ON_NULL( pEmbFilesObj );
PdfObject *pEmbFilesNamesObj = pEmbFilesObj->GetIndirectKey("Names");
RETURN_ON_NULL( pEmbFilesNamesObj );
PdfArray* pEmbFilesNamesArray = &pEmbFilesNamesObj->GetArray();
PdfArray::iterator it = pEmbFilesNamesArray->begin(), it_end =
pEmbFilesNamesArray->end();
for (; it != it_end; ++it ) { RemoveEmbeddedFileData( &(*it) ); }
Clear( pEmbFilesNamesArray );
Clear( pEmbFilesNamesObj );
// Removing 'EmbeddedFiles' object seems not to be required!
//Clear( pEmbFilesObj );
//Clear( pEmbFilesRefObj );
}
On removing File Annotation Data:
bool HasFileStream( const PdfDictionary &dict )
{
return dict.HasKey("EF") &&
dict.GetKey("EF")->GetDictionary().HasKey("F");
}
PdfObject* GetFileStream( PdfDictionary &dict )
{
return ( HasFileStream( dict ) ?
dict.GetKey("EF")->GetDictionary().GetKey("F") : NULL );
}
void RemoveFileAttachment
(
const PdfMemDocument *pDoc,
const PdfAnnotation *pAnnot
)
{
if ( IS_NULL( pAnnot ) || !pAnnot->HasFileAttachement() ) { return; }
PdfFileSpec *pFileSpec = pAnnot->GetFileAttachement();
RETURN_ON_NULL( pFileSpec );
PdfObject *pFileSpecObj = pFileSpec->GetObject();
if ( IS_NULL( pFileSpecObj ) || pFileSpecObj->GetDataType() !=
ePdfDataType_Dictionary ) { return; }
PdfDictionary &fileDict = pFileSpecObj->GetDictionary();
PdfObject *pFileStreamObj = GetFileStream( fileDict ),
*pFileStreamRefObj = NULL;
CheckReference( pDoc, pFileStreamObj, pFileStreamRefObj );
RETURN_ON_NULL( pFileStreamObj );
Clear( pFileStreamObj );
Clear( pFileStreamRefObj );
fileDict.Clear();
Clear( pFileSpecObj );
}
void RemoveAnnotationsImpl
(
const PdfMemDocument *pDoc
)
{
int pageCount = pDoc->GetPageCount();
for ( int p = 0; p < pageCount; ++p )
{
PdfPage* pPage = pDoc->GetPage( p );
CONTINUE_ON_NULL( pPage );
int annotCount = pPage->GetNumAnnots();
for ( int a = annotCount - 1; a >= 0; --a )
{
const PdfAnnotation* pAnnot = pPage->GetAnnotation( a );
if ( IS_NULL( pAnnot ) || pAnnot->GetType() !=
ePdfAnnotation_FileAttachement ) { continue; }
RemoveFileAttachment( pDoc, pAnnot );
pPage->DeleteAnnotation( a );
}
}
}
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Podofo-users mailing list
Podofo-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/podofo-users