This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 36b6af2a1 TIKA-4434 -- extract more info out of ppt/pptx (#2243)
36b6af2a1 is described below

commit 36b6af2a1ba02a7522c68c11b90810e8dec16486
Author: Tim Allison <[email protected]>
AuthorDate: Thu Jun 5 17:02:55 2025 -0400

    TIKA-4434 -- extract more info out of ppt/pptx (#2243)
    
    * TIKA-4434 -- extract more info out of ppt/pptx
---
 .../main/java/org/apache/tika/metadata/Office.java |   5 +
 .../tika/parser/microsoft/HSLFExtractor.java       |  60 ++++++-
 .../ooxml/OOXMLWordAndPowerPointTextHandler.java   |  13 ++
 .../ooxml/SXSLFPowerPointExtractorDecorator.java   |  10 +-
 .../ooxml/XSLFPowerPointExtractorDecorator.java    | 188 +++++++++++++--------
 5 files changed, 194 insertions(+), 82 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/metadata/Office.java 
b/tika-core/src/main/java/org/apache/tika/metadata/Office.java
index 7883df999..4f0146aeb 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/Office.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/Office.java
@@ -175,4 +175,9 @@ public interface Office {
 
     Property COMMENT_PERSONS = 
Property.internalTextBag("msoffice:comment-person-display-name");
 
+    Property HAS_HIDDEN_SLIDES = 
Property.internalBoolean("msoffice:ppt:has-hidden-slides");
+
+    Property NUM_HIDDEN_SLIDES = 
Property.internalInteger("msoffice:ppt:num-hidden-slides");
+
+    Property HAS_ANIMATIONS = 
Property.internalBoolean("msoffice:ppt:has-animations");
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
index 31588d1c9..2fe46014d 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
@@ -28,6 +28,7 @@ import org.apache.poi.common.usermodel.Hyperlink;
 import org.apache.poi.hslf.exceptions.EncryptedPowerPointFileException;
 import org.apache.poi.hslf.model.HeadersFooters;
 import org.apache.poi.hslf.record.DocInfoListContainer;
+import org.apache.poi.hslf.record.RecordContainer;
 import org.apache.poi.hslf.record.RecordTypes;
 import org.apache.poi.hslf.record.VBAInfoAtom;
 import org.apache.poi.hslf.record.VBAInfoContainer;
@@ -59,6 +60,7 @@ import org.apache.tika.exception.TikaException;
 import org.apache.tika.extractor.EmbeddedDocumentUtil;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.sax.XHTMLContentHandler;
@@ -66,6 +68,15 @@ import org.apache.tika.utils.StringUtils;
 
 public class HSLFExtractor extends AbstractPOIFSExtractor {
 
+    //This is from Andreas: https://stackoverflow.com/a/45664920
+    private static final int[] TIMING_RECORD_PATH = {
+            RecordTypes.ProgTags.typeID,
+            RecordTypes.ProgBinaryTag.typeID,
+            RecordTypes.BinaryTagData.typeID
+    };
+
+    private static final int EXT_TIME_NODE_CONTAINER = 0xf144;
+
     public HSLFExtractor(ParseContext context, Metadata metadata) {
         super(context, metadata);
     }
@@ -93,6 +104,8 @@ public class HSLFExtractor extends AbstractPOIFSExtractor {
             xhtml.startElement("div", "class", "slideShow");
 
             /* Iterate over slides and extract text */
+            int hiddenSlides = 0;
+            Set<String> commentAuthors = new HashSet<>();
             for (HSLFSlide slide : _slides) {
                 xhtml.startElement("div", "class", "slide");
                 HeadersFooters slideHeaderFooters =
@@ -144,7 +157,7 @@ public class HSLFExtractor extends AbstractPOIFSExtractor {
                         xhtml.endElement("p");
                     }
                 }
-                handleComments(slide, xhtml);
+                handleComments(slide, xhtml, commentAuthors);
                 handleNotes(slide, notesHeadersFooters, xhtml);
 
                 // Now any embedded resources
@@ -152,8 +165,15 @@ public class HSLFExtractor extends AbstractPOIFSExtractor {
 
                 // Slide complete
                 xhtml.endElement("div");
+                if (slide.isHidden()) {
+                    hiddenSlides++;
+                }
+                findAnimations(slide);
+            }
+            if (hiddenSlides > 0) {
+                parentMetadata.set(Office.NUM_HIDDEN_SLIDES, hiddenSlides);
+                parentMetadata.set(Office.HAS_HIDDEN_SLIDES, true);
             }
-
             handleSlideEmbeddedPictures(ss, xhtml);
             handleShowEmbeddedResources(ss, xhtml, true);
 
@@ -167,6 +187,24 @@ public class HSLFExtractor extends AbstractPOIFSExtractor {
         xhtml.endElement("div");
     }
 
+    private void findAnimations(HSLFSlide slide) {
+        if (parentMetadata.get(Office.HAS_ANIMATIONS) != null) {
+            return;
+        }
+        RecordContainer lastRecord = slide.getSheetContainer();
+        for (int ri : TIMING_RECORD_PATH) {
+            if (lastRecord == null) {
+                return;
+            }
+            lastRecord = (RecordContainer) lastRecord.findFirstOfType(ri);
+
+        }
+
+        if (lastRecord != null && 
lastRecord.findFirstOfType(EXT_TIME_NODE_CONTAINER) != null) {
+            parentMetadata.set(Office.HAS_ANIMATIONS, true);
+        }
+    }
+
     /**
      * This is the catch-all for embedded objects.  If we didn't come across
      * them in the shapes in the slides, headers/footers, etc, try to
@@ -216,10 +254,11 @@ public class HSLFExtractor extends AbstractPOIFSExtractor 
{
         }
     }
 
-    private void handleComments(HSLFSlide slide, XHTMLContentHandler xhtml) 
throws SAXException {
-        if (slide.getComments() == null || slide.getComments().size() == 0) {
+    private void handleComments(HSLFSlide slide, XHTMLContentHandler xhtml, 
Set<String> commentAuthors) throws SAXException {
+        if (slide.getComments() == null || slide.getComments().isEmpty()) {
             return;
         }
+
         xhtml.startElement("div", "class", "slide-comments");
 
         // Comments, if present
@@ -228,16 +267,21 @@ public class HSLFExtractor extends AbstractPOIFSExtractor 
{
             authorStringBuilder.setLength(0);
             xhtml.startElement("p", "class", "slide-comment");
 
-            if (comment.getAuthor() != null) {
-                authorStringBuilder.append(comment.getAuthor());
+            if (! StringUtils.isBlank(comment.getAuthor())) {
+                String author = comment.getAuthor();
+                authorStringBuilder.append(author);
+                if (! commentAuthors.contains(author)) {
+                    parentMetadata.add(Office.COMMENT_PERSONS, 
comment.getAuthor());
+                    commentAuthors.add(author);
+                }
             }
             if (comment.getAuthorInitials() != null) {
-                if (authorStringBuilder.length() > 0) {
+                if (! authorStringBuilder.isEmpty()) {
                     authorStringBuilder.append(" ");
                 }
                 
authorStringBuilder.append("(").append(comment.getAuthorInitials()).append(")");
             }
-            if (authorStringBuilder.length() > 0) {
+            if (! authorStringBuilder.isEmpty()) {
                 if (comment.getText() != null) {
                     authorStringBuilder.append(" - ");
                 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLWordAndPowerPointTextHandler.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLWordAndPowerPointTextHandler.java
index 77d088701..8137e6967 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLWordAndPowerPointTextHandler.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLWordAndPowerPointTextHandler.java
@@ -85,6 +85,8 @@ public class OOXMLWordAndPowerPointTextHandler extends 
DefaultHandler {
     private final static String RUBY = "ruby"; //phonetic section
     private final static String RT = "rt"; //phonetic run
     private static final String VAL = "val";
+    private static final String SLIDE = "sld";
+    private static final String SHOW = "show";
     private final static String MC_NS =
             "http://schemas.openxmlformats.org/markup-compatibility/2006";;
     private final static String O_NS = 
"urn:schemas-microsoft-com:office:office";
@@ -148,6 +150,8 @@ public class OOXMLWordAndPowerPointTextHandler extends 
DefaultHandler {
             OOXMLWordAndPowerPointTextHandler.EditType.NONE;
     private DateUtils dateUtils = new DateUtils();
 
+    private boolean hiddenSlide = false;
+
     public OOXMLWordAndPowerPointTextHandler(XWPFBodyContentsHandler 
bodyContentsHandler,
                                              Map<String, String> hyperlinks) {
         this(bodyContentsHandler, hyperlinks, true, true);
@@ -333,6 +337,11 @@ public class OOXMLWordAndPowerPointTextHandler extends 
DefaultHandler {
             inV = true;
         } else if (RT.equals(localName)) {
             inRt = true;
+        } else if (SLIDE.equals(localName)) {
+            String val = atts.getValue("show");
+            if ("0".equals(val) || "false".equals(val)) {
+                hiddenSlide = true;
+            }
         }
 
     }
@@ -571,4 +580,8 @@ public class OOXMLWordAndPowerPointTextHandler extends 
DefaultHandler {
 
         void endBookmark(String id) throws SAXException;
     }
+
+    public boolean isHiddenSlide() {
+        return hiddenSlide;
+    }
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
index c036f086f..a95abf95f 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
@@ -41,6 +41,7 @@ import org.xml.sax.helpers.DefaultHandler;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.ParseContext;
 import 
org.apache.tika.parser.microsoft.ooxml.xslf.XSLFEventBasedPowerPointExtractor;
@@ -178,10 +179,13 @@ public class SXSLFPowerPointExtractorDecorator extends 
AbstractOOXMLExtractor {
 //        Map<String, String> hyperlinks = 
loadHyperlinkRelationships(packagePart);
         xhtml.startElement("div", "class", "slide-content");
         try (InputStream stream = slidePart.getInputStream()) {
+            OOXMLWordAndPowerPointTextHandler wordAndPPTHandler = new 
OOXMLWordAndPowerPointTextHandler(
+                    new OOXMLTikaBodyPartHandler(xhtml), linkedRelationships);
             XMLReaderUtils.parseSAX(CloseShieldInputStream.wrap(stream),
-                    new EmbeddedContentHandler(new 
OOXMLWordAndPowerPointTextHandler(
-                            new OOXMLTikaBodyPartHandler(xhtml), 
linkedRelationships)), context);
-
+                    new EmbeddedContentHandler(wordAndPPTHandler), context);
+            if (wordAndPPTHandler.isHiddenSlide()) {
+                metadata.set(Office.HAS_HIDDEN_SLIDES, true);
+            }
         } catch (TikaException | IOException e) {
             metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING,
                     ExceptionUtils.getStackTrace(e));
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
index 38e9c8aac..d292f5c57 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
@@ -19,7 +19,10 @@ package org.apache.tika.parser.microsoft.ooxml;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
 import javax.xml.namespace.QName;
 
 import org.apache.poi.common.usermodel.Hyperlink;
@@ -55,7 +58,10 @@ import org.apache.poi.xslf.usermodel.XSLFTextRun;
 import org.apache.poi.xslf.usermodel.XSLFTextShape;
 import org.apache.xmlbeans.XmlException;
 import org.apache.xmlbeans.XmlObject;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentAuthor;
+import 
org.openxmlformats.schemas.presentationml.x2006.main.CTCommentAuthorList;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTPicture;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdList;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
 import org.xml.sax.SAXException;
@@ -63,8 +69,10 @@ import org.xml.sax.helpers.AttributesImpl;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.sax.XHTMLContentHandler;
+import org.apache.tika.utils.StringUtils;
 
 public class XSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor {
 
@@ -85,92 +93,130 @@ public class XSLFPowerPointExtractorDecorator extends 
AbstractOOXMLExtractor {
      */
     protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, 
IOException {
         XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument();
-        XSLFCommentAuthors commentAuthors = slideShow.getCommentAuthors();
+        handleCommentAuthors(slideShow);
 
         List<XSLFSlide> slides = slideShow.getSlides();
+        AtomicInteger hiddenSlideCounter = new AtomicInteger(0);
         for (XSLFSlide slide : slides) {
-            String slideDesc;
-            if (slide.getPackagePart() != null && 
slide.getPackagePart().getPartName() != null) {
-                slideDesc = 
getJustFileName(slide.getPackagePart().getPartName().toString());
-                slideDesc += "_";
-            } else {
-                slideDesc = null;
-            }
+            handleSlide(slide, xhtml, hiddenSlideCounter);
+        }
+        if (hiddenSlideCounter.get() > 0) {
+            metadata.set(Office.HAS_HIDDEN_SLIDES, true);
+            metadata.set(Office.NUM_HIDDEN_SLIDES, hiddenSlideCounter.get());
+        }
+    }
+
+    private void handleSlide(XSLFSlide slide, XHTMLContentHandler xhtml, 
AtomicInteger hiddenSlideCounter) throws SAXException {
+        String slideDesc;
+        if (slide.getPackagePart() != null && 
slide.getPackagePart().getPartName() != null) {
+            slideDesc = 
getJustFileName(slide.getPackagePart().getPartName().toString());
+            slideDesc += "_";
+        } else {
+            slideDesc = null;
+        }
+
+        if (slide.isHidden()) {
+            hiddenSlideCounter.incrementAndGet();
+        }
+
+        // slide content
+        xhtml.startElement("div", "class", "slide-content");
+        extractContent(slide.getShapes(), false, xhtml, slideDesc);
+        xhtml.endElement("div");
 
-            // slide content
-            xhtml.startElement("div", "class", "slide-content");
-            extractContent(slide.getShapes(), false, xhtml, slideDesc);
+        if (config.isIncludeSlideMasterContent()) {
+            // slide layout which is the master sheet for this slide
+            xhtml.startElement("div", "class", "slide-master-content");
+            XSLFSlideLayout slideLayout = slide.getMasterSheet();
+            extractContent(slideLayout.getShapes(), true, xhtml, null);
             xhtml.endElement("div");
 
-            if (config.isIncludeSlideMasterContent()) {
-                // slide layout which is the master sheet for this slide
-                xhtml.startElement("div", "class", "slide-master-content");
-                XSLFSlideLayout slideLayout = slide.getMasterSheet();
-                extractContent(slideLayout.getShapes(), true, xhtml, null);
+            // slide master which is the master sheet for all text layouts
+            XSLFSheet slideMaster = slideLayout.getMasterSheet();
+            extractContent(slideMaster.getShapes(), true, xhtml, null);
+        }
+        if (config.isIncludeSlideNotes()) {
+            // notes (if present)
+            XSLFNotes slideNotes = slide.getNotes();
+            if (slideNotes != null) {
+                xhtml.startElement("div", "class", "slide-notes");
+
+                extractContent(slideNotes.getShapes(), false, xhtml, 
slideDesc);
+
+                // master sheet for this notes
+                XSLFNotesMaster notesMaster = slideNotes.getMasterSheet();
+                if (notesMaster != null) {
+                    extractContent(notesMaster.getShapes(), true, xhtml, null);
+                }
                 xhtml.endElement("div");
-
-                // slide master which is the master sheet for all text layouts
-                XSLFSheet slideMaster = slideLayout.getMasterSheet();
-                extractContent(slideMaster.getShapes(), true, xhtml, null);
             }
-            if (config.isIncludeSlideNotes()) {
-                // notes (if present)
-                XSLFNotes slideNotes = slide.getNotes();
-                if (slideNotes != null) {
-                    xhtml.startElement("div", "class", "slide-notes");
-
-                    extractContent(slideNotes.getShapes(), false, xhtml, 
slideDesc);
-
-                    // master sheet for this notes
-                    XSLFNotesMaster notesMaster = slideNotes.getMasterSheet();
-                    if (notesMaster != null) {
-                        extractContent(notesMaster.getShapes(), true, xhtml, 
null);
+        }
+
+        // comments (if present)
+        List<XSLFComment> comments = slide.getComments();
+        if (comments != null) {
+            StringBuilder authorStringBuilder = new StringBuilder();
+            for (XSLFComment comment : comments) {
+                authorStringBuilder.setLength(0);
+                xhtml.startElement("p", "class", "slide-comment");
+                if (comment.getAuthor() != null) {
+                    authorStringBuilder.append(comment.getAuthor());
+                }
+                if (comment.getAuthorInitials() != null) {
+                    if (authorStringBuilder.length() > 0) {
+                        authorStringBuilder.append(" ");
                     }
-                    xhtml.endElement("div");
+                    
authorStringBuilder.append("(").append(comment.getAuthorInitials()).append(")");
+                }
+                if (comment.getText() != null && authorStringBuilder.length() 
> 0) {
+                    authorStringBuilder.append(" - ");
                 }
+                if (authorStringBuilder.length() > 0) {
+                    xhtml.startElement("b");
+                    xhtml.characters(authorStringBuilder.toString());
+                    xhtml.endElement("b");
+                }
+
+                xhtml.characters(comment.getText());
+                xhtml.endElement("p");
             }
+        }
+        //now dump diagram data
+        handleGeneralTextContainingPart(RELATION_DIAGRAM_DATA, "diagram-data",
+                slide.getPackagePart(), metadata,
+                new OOXMLWordAndPowerPointTextHandler(new 
OOXMLTikaBodyPartHandler(xhtml),
+                        new HashMap<>()//empty
+                ));
+        //now dump chart data
+        handleGeneralTextContainingPart(XSLFRelation.CHART.getRelation(), 
"chart",
+                slide.getPackagePart(), metadata,
+                new OOXMLWordAndPowerPointTextHandler(new 
OOXMLTikaBodyPartHandler(xhtml),
+                        new HashMap<>()//empty
+                ));
+
+        CTSlide ctSlide = slide.getXmlObject();
+        if (ctSlide.isSetTiming()) {
+            //perhaps require more, like: 
ctSlide.getTiming()?.getTnLst()?.getParArray()?.length
+            metadata.set(Office.HAS_ANIMATIONS, true);
+        }
+    }
 
-            // comments (if present)
-            List<XSLFComment> comments = slide.getComments();
-            if (comments != null) {
-                StringBuilder authorStringBuilder = new StringBuilder();
-                for (XSLFComment comment : comments) {
-                    authorStringBuilder.setLength(0);
-                    xhtml.startElement("p", "class", "slide-comment");
-                    if (comment.getAuthor() != null) {
-                        authorStringBuilder.append(comment.getAuthor());
-                    }
-                    if (comment.getAuthorInitials() != null) {
-                        if (authorStringBuilder.length() > 0) {
-                            authorStringBuilder.append(" ");
-                        }
-                        
authorStringBuilder.append("(").append(comment.getAuthorInitials()).append(")");
-                    }
-                    if (comment.getText() != null && 
authorStringBuilder.length() > 0) {
-                        authorStringBuilder.append(" - ");
-                    }
-                    if (authorStringBuilder.length() > 0) {
-                        xhtml.startElement("b");
-                        xhtml.characters(authorStringBuilder.toString());
-                        xhtml.endElement("b");
+    private void handleCommentAuthors(XMLSlideShow slideShow) {
+        XSLFCommentAuthors commentAuthors = slideShow.getCommentAuthors();
+        if (commentAuthors != null) {
+            CTCommentAuthorList ctAuthorList = 
commentAuthors.getCTCommentAuthorsList();
+            CTCommentAuthor[] ctAuthorArray = ctAuthorList.getCmAuthorArray();
+            if (ctAuthorArray != null) {
+                Set<String> names = new HashSet<>();
+                for (CTCommentAuthor ctCommentAuthor : ctAuthorArray) {
+                    String n = ctCommentAuthor.getName();
+                    if (StringUtils.isBlank(n) || names.contains(n)) {
+                        continue;
                     }
-
-                    xhtml.characters(comment.getText());
-                    xhtml.endElement("p");
+                    metadata.add(Office.COMMENT_PERSONS, n);
+                    names.add(n);
                 }
             }
-            //now dump diagram data
-            handleGeneralTextContainingPart(RELATION_DIAGRAM_DATA, 
"diagram-data",
-                    slide.getPackagePart(), metadata,
-                    new OOXMLWordAndPowerPointTextHandler(new 
OOXMLTikaBodyPartHandler(xhtml),
-                            new HashMap<>()//empty
-                    ));
-            //now dump chart data
-            handleGeneralTextContainingPart(XSLFRelation.CHART.getRelation(), 
"chart",
-                    slide.getPackagePart(), metadata,
-                    new OOXMLWordAndPowerPointTextHandler(new 
OOXMLTikaBodyPartHandler(xhtml),
-                            new HashMap<>()//empty
-                    ));
         }
     }
 

Reply via email to