import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import
org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
import
org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList;
import
org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentGroup;
import
org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentProperties;
import org.apache.pdfbox.text.PDFMarkedContentExtractor;
public class ExtractMarkedContent extends PDFMarkedContentExtractor
{
public ExtractMarkedContent() throws IOException
{
}
public static void main(String[] args) throws IOException
{
PDDocument doc = PDDocument.load(new File("C......\\PDFBox
reactor\\pdfbox\\target\\test-output","ocg-generation.pdf"));
PDOptionalContentProperties ocp =
doc.getDocumentCatalog().getOCProperties();
System.out.println("Group names in document catalog: " +
Arrays.toString(ocp.getGroupNames()));
for (String groupName : ocp.getGroupNames())
{
PDOptionalContentGroup group = ocp.getGroup(groupName);
System.out.println(group.getCOSObject());
}
ExtractMarkedContent extractMarkedContent = new ExtractMarkedContent();
PDPage page = doc.getPage(0);
System.out.println("Property names in page resources: " +
page.getResources().getPropertiesNames());
extractMarkedContent.processPage(page);
List<PDMarkedContent> markedContents =
extractMarkedContent.getMarkedContents();
System.out.println("Extracted contents: ");
for (PDMarkedContent mc : markedContents)
{
PDPropertyList propertyList =
page.getResources().getProperties(COSName.getPDFName(mc.getTag()));
String propName =
propertyList.getCOSObject().getString(COSName.NAME);
System.out.println(mc.getTag() + " (" + propName + "): " +
mc.getContents());
}
doc.close();
}
}
The output is:
Group names in document catalog: [background, enabled, disabled]
COSDictionary{(COSName{Type}:COSName{OCG})
(COSName{Name}:COSString{background}) }
COSDictionary{(COSName{Type}:COSName{OCG}) (COSName{Name}:COSString{enabled}) }
COSDictionary{(COSName{Type}:COSName{OCG}) (COSName{Name}:COSString{disabled}) }
Property names in page resources: [COSName{oc1}, COSName{oc2}, COSName{oc3}]
Extracted contents:
oc1 (background): [P, D, F, , 1, ., 5, :, , O, p, t, i, o, n, a, l, , C, o,
n, t, e, n, t, , G, r, o, u, p, s, Y, o, u, , s, h, o, u, l, d, , s, e, e, ,
a, , g, r, e, e, n, , t, e, x, t, l, i, n, e, ,, , b, u, t, , n, o, , r, e,
d, , t, e, x, t, , l, i, n, e, .]
oc2 (enabled): [T, h, i, s, , i, s, , f, r, o, m, , a, n, , e, n, a, b, l,
e, d, , l, a, y, e, r, ., , I, f, , y, o, u, , s, e, e, , t, h, i, s, ,, ,
t, h, a, t, ', s, , g, o, o, d, .]
oc3 (disabled): [T, h, i, s, , i, s, , f, r, o, m, , a, , d, i, s, a, b, l,
e, d, , l, a, y, e, r, ., , I, f, , y, o, u, , s, e, e, , t, h, i, s, ,, ,
t, h, a, t, ', s, , N, O, T, , g, o, o, d, !]