This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 82f26b63dc7aa6ccc23a25e57377ab9d851db448
Author: Tim Allison <[email protected]>
AuthorDate: Tue Feb 3 06:32:51 2026 -0500

    TIKA-4646 -- extract hyperlinks from instrText and other areas in 
ooxml(#2578)
    
    (cherry picked from commit bef2d336b1e4e52e3ca262d656f93ee4d3145b5f)
---
 .../main/java/org/apache/tika/metadata/Office.java |  51 +++
 .../microsoft/ooxml/AbstractOOXMLExtractor.java    |  26 ++
 .../microsoft/ooxml/FieldHyperlinkTracker.java     | 168 +++++++++
 .../microsoft/ooxml/OOXMLTikaBodyPartHandler.java  |  25 ++
 .../ooxml/OOXMLWordAndPowerPointTextHandler.java   | 187 +++++++++-
 .../ooxml/SXWPFWordExtractorDecorator.java         | 179 +++++++++-
 .../ooxml/XSSFExcelExtractorDecorator.java         | 390 +++++++++++++++++++++
 .../ooxml/XWPFWordExtractorDecorator.java          |  95 ++++-
 .../xslf/XSLFEventBasedPowerPointExtractor.java    |   5 +
 .../ooxml/xwpf/XWPFEventBasedWordExtractor.java    |   5 +
 .../tika/parser/microsoft/ExcelParserTest.java     |  43 +++
 .../parser/microsoft/ooxml/OOXMLParserTest.java    |  39 +++
 .../parser/microsoft/ooxml/SXWPFExtractorTest.java | 109 ++++++
 .../parser/microsoft/pst/OutlookPSTParserTest.java |   3 +
 .../test-documents/testAttachedTemplate.docx       | Bin 0 -> 2284 bytes
 .../test-documents/testDataConnections.xlsx        | Bin 0 -> 2967 bytes
 .../test/resources/test-documents/testDdeLink.xlsx | Bin 0 -> 3030 bytes
 .../resources/test-documents/testExternalRefs.docx | Bin 0 -> 2125 bytes
 .../resources/test-documents/testFrameset.docx     | Bin 0 -> 2328 bytes
 .../resources/test-documents/testHoverAndVml.docx  | Bin 0 -> 2270 bytes
 .../resources/test-documents/testInstrLink.docx    | Bin 0 -> 14464 bytes
 .../resources/test-documents/testMailMerge.docx    | Bin 0 -> 2306 bytes
 .../resources/test-documents/testSubdocument.docx  | Bin 0 -> 1980 bytes
 23 files changed, 1322 insertions(+), 3 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/metadata/Office.java 
b/tika-core/src/main/java/org/apache/tika/metadata/Office.java
index 39607445f6..6e9a20e70b 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/Office.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/Office.java
@@ -201,8 +201,59 @@ public interface Office {
 
     Property WORKBOOK_CODENAME = 
Property.internalText("msoffice:excel:workbook-codename");
 
+    Property HAS_DATA_CONNECTIONS = 
Property.internalBoolean("msoffice:excel:has-data-connections");
+
+    Property HAS_EXTERNAL_LINKS = 
Property.internalBoolean("msoffice:excel:has-external-links");
+
+    Property HAS_WEB_QUERIES = 
Property.internalBoolean("msoffice:excel:has-web-queries");
+
+    Property HAS_EXTERNAL_OLE_OBJECTS = 
Property.internalBoolean("msoffice:has-external-ole-objects");
+
+    Property HAS_FIELD_HYPERLINKS = 
Property.internalBoolean("msoffice:has-field-hyperlinks");
+
+    Property HAS_HOVER_HYPERLINKS = 
Property.internalBoolean("msoffice:has-hover-hyperlinks");
+
+    Property HAS_VML_HYPERLINKS = 
Property.internalBoolean("msoffice:has-vml-hyperlinks");
+
     Property HAS_COMMENTS = Property.internalBoolean("msoffice:has-comments");
 
     Property COMMENT_PERSONS = 
Property.internalTextBag("msoffice:comment-person-display-name");
 
+    Property HAS_HIDDEN_SLIDES = 
Property.internalBoolean("msoffice:ppt:has-hidden-slides");
+
+    Property NUM_HIDDEN_SLIDES = 
Property.internalInteger("msoffice:ppt:num-hidden-slides");
+
+    Property HAS_ANIMATIONS = 
Property.internalBoolean("msoffice:ppt:has-animations");
+
+    //w:vanish or isVanish or isFldVanish
+    Property HAS_HIDDEN_TEXT = 
Property.internalBoolean("msoffice:doc:has-hidden-text");
+
+    Property HAS_TRACK_CHANGES = 
Property.internalBoolean("msoffice:has-track-changes");
+
+    // Security-relevant: DDE (Dynamic Data Exchange) links can execute 
commands
+    Property HAS_DDE_LINKS = 
Property.internalBoolean("msoffice:excel:has-dde-links");
+
+    // Security-relevant: Mail merge can reference external data sources
+    Property HAS_MAIL_MERGE = 
Property.internalBoolean("msoffice:doc:has-mail-merge");
+
+    // Security-relevant: Attached templates can be fetched from external URLs
+    Property HAS_ATTACHED_TEMPLATE = 
Property.internalBoolean("msoffice:doc:has-attached-template");
+
+    // Security-relevant: SubDocuments reference external documents in master 
docs
+    Property HAS_SUBDOCUMENTS = 
Property.internalBoolean("msoffice:doc:has-subdocuments");
+
+    // Security-relevant: Pivot tables can reference external OLAP/database 
sources
+    Property HAS_EXTERNAL_PIVOT_DATA = 
Property.internalBoolean("msoffice:excel:has-external-pivot-data");
+
+    // Security-relevant: Power Query can contain URLs and connection strings
+    Property HAS_POWER_QUERY = 
Property.internalBoolean("msoffice:excel:has-power-query");
+
+    // Security-relevant: OLE objects can link to external files (vs embedded)
+    Property HAS_LINKED_OLE_OBJECTS = 
Property.internalBoolean("msoffice:has-linked-ole-objects");
+
+    // Security-relevant: Charts can reference external workbook data
+    Property HAS_EXTERNAL_CHART_DATA = 
Property.internalBoolean("msoffice:has-external-chart-data");
+
+    // Security-relevant: Framesets can load external URLs
+    Property HAS_FRAMESETS = 
Property.internalBoolean("msoffice:doc:has-framesets");
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
index c7cfecfa49..a8d65cd895 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
@@ -265,6 +265,16 @@ public abstract class AbstractOOXMLExtractor implements 
OOXMLExtractor {
             sourceDesc = "";
         }
         if (rel.getTargetMode() != TargetMode.INTERNAL) {
+            // External target - emit as external reference for security 
analysis
+            String type = rel.getRelationshipType();
+            if (POIXMLDocument.OLE_OBJECT_REL_TYPE.equals(type)) {
+                emitExternalRef(xhtml, "externalOleObject", 
targetURI.toString());
+                parentMetadata.set(Office.HAS_EXTERNAL_OLE_OBJECTS, true);
+            } else if (PackageRelationshipTypes.IMAGE_PART.equals(type)) {
+                emitExternalRef(xhtml, "externalImage", targetURI.toString());
+            } else {
+                emitExternalRef(xhtml, "externalResource", 
targetURI.toString());
+            }
             return;
         }
         PackagePart target;
@@ -489,6 +499,22 @@ public abstract class AbstractOOXMLExtractor implements 
OOXMLExtractor {
         metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
     }
 
+    /**
+     * Emits an external reference as an anchor element with appropriate class.
+     * Used for detecting external resources that could be security risks.
+     */
+    private void emitExternalRef(XHTMLContentHandler xhtml, String refType, 
String url)
+            throws SAXException {
+        if (url == null || url.isEmpty()) {
+            return;
+        }
+        AttributesImpl attrs = new AttributesImpl();
+        attrs.addAttribute("", "class", "class", "CDATA", "external-ref-" + 
refType);
+        attrs.addAttribute("", "href", "href", "CDATA", url);
+        xhtml.startElement("a", attrs);
+        xhtml.endElement("a");
+    }
+
     /**
      * Populates the {@link XHTMLContentHandler} object received as parameter.
      */
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/FieldHyperlinkTracker.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/FieldHyperlinkTracker.java
new file mode 100644
index 0000000000..951711d99f
--- /dev/null
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/FieldHyperlinkTracker.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft.ooxml;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Tracks field hyperlink state across multiple runs within a paragraph.
+ * Field codes span multiple runs: begin -> instrText -> separate -> text runs 
-> end
+ * <p>
+ * This class handles HYPERLINK field codes as well as other external 
references
+ * like INCLUDEPICTURE, INCLUDETEXT, IMPORT, and LINK.
+ */
+class FieldHyperlinkTracker {
+
+    // Patterns for extracting URLs from field codes
+    private static final Pattern HYPERLINK_PATTERN =
+            Pattern.compile("HYPERLINK\\s{1,100}\"([^\"]{1,10000})\"", 
Pattern.CASE_INSENSITIVE);
+    private static final Pattern INCLUDEPICTURE_PATTERN =
+            Pattern.compile("INCLUDEPICTURE\\s{1,100}\"([^\"]{1,10000})\"", 
Pattern.CASE_INSENSITIVE);
+    private static final Pattern INCLUDETEXT_PATTERN =
+            Pattern.compile("INCLUDETEXT\\s{1,100}\"([^\"]{1,10000})\"", 
Pattern.CASE_INSENSITIVE);
+    private static final Pattern IMPORT_PATTERN =
+            Pattern.compile("IMPORT\\s{1,100}\"([^\"]{1,10000})\"", 
Pattern.CASE_INSENSITIVE);
+    private static final Pattern LINK_PATTERN =
+            
Pattern.compile("LINK\\s{1,100}[\\w.]{1,50}\\s{1,100}\"([^\"]{1,10000})\"",
+                    Pattern.CASE_INSENSITIVE);
+
+    private boolean inField = false;
+    private boolean inFieldHyperlink = false;
+    private final StringBuilder instrTextBuffer = new StringBuilder();
+    private String lastExternalRefType = null;
+    private String lastExternalRefUrl = null;
+
+    void startField() {
+        inField = true;
+        instrTextBuffer.setLength(0);
+        lastExternalRefType = null;
+        lastExternalRefUrl = null;
+    }
+
+    void addInstrText(String text) {
+        if (inField && text != null) {
+            instrTextBuffer.append(text);
+        }
+    }
+
+    /**
+     * Called when fldChar separate is encountered.
+     *
+     * @return the hyperlink URL if this is a HYPERLINK field, null otherwise
+     */
+    String separate() {
+        if (inField) {
+            String url = 
parseHyperlinkFromInstrText(instrTextBuffer.toString());
+            if (url != null) {
+                inFieldHyperlink = true;
+                return url;
+            }
+            // Check for other external refs (INCLUDEPICTURE, INCLUDETEXT, 
IMPORT, LINK)
+            StringBuilder fieldType = new StringBuilder();
+            String extUrl = 
parseExternalRefFromInstrText(instrTextBuffer.toString(), fieldType);
+            if (extUrl != null) {
+                lastExternalRefType = fieldType.toString();
+                lastExternalRefUrl = extUrl;
+            }
+        }
+        return null;
+    }
+
+    void endField() {
+        inField = false;
+        inFieldHyperlink = false;
+        instrTextBuffer.setLength(0);
+        lastExternalRefType = null;
+        lastExternalRefUrl = null;
+    }
+
+    boolean isInFieldHyperlink() {
+        return inFieldHyperlink;
+    }
+
+    String getLastExternalRefType() {
+        return lastExternalRefType;
+    }
+
+    String getLastExternalRefUrl() {
+        return lastExternalRefUrl;
+    }
+
+    void clearExternalRef() {
+        lastExternalRefType = null;
+        lastExternalRefUrl = null;
+    }
+
+    /**
+     * Parses a HYPERLINK URL from instrText field code content.
+     *
+     * @param instrText the accumulated instrText content
+     * @return the URL if found, or null
+     */
+    private static String parseHyperlinkFromInstrText(String instrText) {
+        if (instrText == null || instrText.isEmpty()) {
+            return null;
+        }
+        Matcher m = HYPERLINK_PATTERN.matcher(instrText.trim());
+        if (m.find()) {
+            return m.group(1);
+        }
+        return null;
+    }
+
+    /**
+     * Parses external reference URLs from instrText field codes
+     * (INCLUDEPICTURE, INCLUDETEXT, IMPORT, LINK).
+     *
+     * @param instrText the accumulated instrText content
+     * @param fieldType output parameter - will contain the field type if found
+     * @return the URL if found, or null
+     */
+    private static String parseExternalRefFromInstrText(String instrText, 
StringBuilder fieldType) {
+        if (instrText == null || instrText.isEmpty()) {
+            return null;
+        }
+        String trimmed = instrText.trim();
+
+        Matcher m = INCLUDEPICTURE_PATTERN.matcher(trimmed);
+        if (m.find()) {
+            fieldType.append("INCLUDEPICTURE");
+            return m.group(1);
+        }
+
+        m = INCLUDETEXT_PATTERN.matcher(trimmed);
+        if (m.find()) {
+            fieldType.append("INCLUDETEXT");
+            return m.group(1);
+        }
+
+        m = IMPORT_PATTERN.matcher(trimmed);
+        if (m.find()) {
+            fieldType.append("IMPORT");
+            return m.group(1);
+        }
+
+        m = LINK_PATTERN.matcher(trimmed);
+        if (m.find()) {
+            fieldType.append("LINK");
+            return m.group(1);
+        }
+
+        return null;
+    }
+}
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLTikaBodyPartHandler.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLTikaBodyPartHandler.java
index 8ff630da14..4bc445fb5e 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLTikaBodyPartHandler.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLTikaBodyPartHandler.java
@@ -321,6 +321,19 @@ public class OOXMLTikaBodyPartHandler
         xhtml.endElement("div");
     }
 
+    @Override
+    public void linkedOLERef(String relId) throws SAXException {
+        if (relId == null) {
+            return;
+        }
+        // Emit as an external reference anchor - linked OLE objects reference 
external files
+        AttributesImpl attributes = new AttributesImpl();
+        attributes.addAttribute("", "class", "class", "CDATA", 
"external-ref-linkedOle");
+        attributes.addAttribute("", "id", "id", "CDATA", relId);
+        xhtml.startElement("a", attributes);
+        xhtml.endElement("a");
+    }
+
     @Override
     public void embeddedPicRef(String picFileName, String picDescription) 
throws SAXException {
 
@@ -338,6 +351,18 @@ public class OOXMLTikaBodyPartHandler
 
     }
 
+    @Override
+    public void externalRef(String fieldType, String url) throws SAXException {
+        if (url == null || url.isEmpty()) {
+            return;
+        }
+        AttributesImpl attr = new AttributesImpl();
+        attr.addAttribute("", "class", "class", "CDATA", "external-ref-" + 
fieldType);
+        attr.addAttribute("", "href", "href", "CDATA", url);
+        xhtml.startElement("a", attr);
+        xhtml.endElement("a");
+    }
+
     @Override
     public void startBookmark(String id, String name) throws SAXException {
         //skip bookmarks within hyperlinks
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLWordAndPowerPointTextHandler.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLWordAndPowerPointTextHandler.java
index 6e355b8ff9..9e7110f773 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLWordAndPowerPointTextHandler.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLWordAndPowerPointTextHandler.java
@@ -19,12 +19,16 @@ package org.apache.tika.parser.microsoft.ooxml;
 
 import java.util.Date;
 import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.poi.xwpf.usermodel.UnderlinePatterns;
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
 
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
 import org.apache.tika.utils.DateUtils;
 
 /**
@@ -106,12 +110,34 @@ public class OOXMLWordAndPowerPointTextHandler extends 
DefaultHandler {
     private final static String MOVE_TO = "moveTo";
     private final static String ENDNOTE_REFERENCE = "endnoteReference";
     private static final String TEXTBOX = "textbox";
+    private final static String FLD_CHAR = "fldChar";
+    private final static String INSTR_TEXT = "instrText";
+    private final static String FLD_CHAR_TYPE = "fldCharType";
+    // DrawingML hyperlinks on shapes/pictures
+    private final static String HLINK_HOVER = "hlinkHover";
+    private final static String C_NV_PR = "cNvPr";
+    // VML shape hyperlinks
+    private final static String SHAPE = "shape";
+    private final static String HREF = "href";
+
+    // Patterns for extracting URLs from field codes
+    private static final Pattern HYPERLINK_PATTERN =
+            Pattern.compile("HYPERLINK\\s{1,100}\"([^\"]{1,10000})\"", 
Pattern.CASE_INSENSITIVE);
+    private static final Pattern INCLUDEPICTURE_PATTERN =
+            Pattern.compile("INCLUDEPICTURE\\s{1,100}\"([^\"]{1,10000})\"", 
Pattern.CASE_INSENSITIVE);
+    private static final Pattern INCLUDETEXT_PATTERN =
+            Pattern.compile("INCLUDETEXT\\s{1,100}\"([^\"]{1,10000})\"", 
Pattern.CASE_INSENSITIVE);
+    private static final Pattern IMPORT_PATTERN =
+            Pattern.compile("IMPORT\\s{1,100}\"([^\"]{1,10000})\"", 
Pattern.CASE_INSENSITIVE);
+    private static final Pattern LINK_PATTERN =
+            
Pattern.compile("LINK\\s{1,100}[\\w.]{1,50}\\s{1,100}\"([^\"]{1,10000})\"", 
Pattern.CASE_INSENSITIVE);
     private final XWPFBodyContentsHandler bodyContentsHandler;
     private final Map<String, String> linkedRelationships;
     private final RunProperties currRunProperties = new RunProperties();
     private final ParagraphProperties currPProperties = new 
ParagraphProperties();
     private final boolean includeTextBox;
     private final boolean concatenatePhoneticRuns;
+    private final Metadata metadata;
     private final StringBuilder runBuffer = new StringBuilder();
     private final StringBuilder rubyBuffer = new StringBuilder();
     private boolean inR = false;
@@ -143,22 +169,34 @@ public class OOXMLWordAndPowerPointTextHandler extends 
DefaultHandler {
     private boolean inHlinkClick = false;
     private boolean inTextBox = false;
     private boolean inV = false; //in c:v in chart file
+    // Field code tracking for instrText-based hyperlinks
+    private boolean inField = false;
+    private boolean inInstrText = false;
+    private boolean inFieldHyperlink = false;
+    private final StringBuilder instrTextBuffer = new StringBuilder();
     private OOXMLWordAndPowerPointTextHandler.EditType editType =
             OOXMLWordAndPowerPointTextHandler.EditType.NONE;
     private DateUtils dateUtils = new DateUtils();
 
     public OOXMLWordAndPowerPointTextHandler(XWPFBodyContentsHandler 
bodyContentsHandler,
                                              Map<String, String> hyperlinks) {
-        this(bodyContentsHandler, hyperlinks, true, true);
+        this(bodyContentsHandler, hyperlinks, true, true, null);
     }
 
     public OOXMLWordAndPowerPointTextHandler(XWPFBodyContentsHandler 
bodyContentsHandler,
                                              Map<String, String> hyperlinks, 
boolean includeTextBox,
                                              boolean concatenatePhoneticRuns) {
+        this(bodyContentsHandler, hyperlinks, includeTextBox, 
concatenatePhoneticRuns, null);
+    }
+
+    public OOXMLWordAndPowerPointTextHandler(XWPFBodyContentsHandler 
bodyContentsHandler,
+                                             Map<String, String> hyperlinks, 
boolean includeTextBox,
+                                             boolean concatenatePhoneticRuns, 
Metadata metadata) {
         this.bodyContentsHandler = bodyContentsHandler;
         this.linkedRelationships = hyperlinks;
         this.includeTextBox = includeTextBox;
         this.concatenatePhoneticRuns = concatenatePhoneticRuns;
+        this.metadata = metadata;
     }
 
     @Override
@@ -322,6 +360,12 @@ public class OOXMLWordAndPowerPointTextHandler extends 
DefaultHandler {
             }
             if ("Embed".equals(type)) {
                 bodyContentsHandler.embeddedOLERef(refId);
+            } else if ("Link".equals(type)) {
+                // Linked OLE object - references external file
+                bodyContentsHandler.linkedOLERef(refId);
+                if (metadata != null) {
+                    metadata.set(Office.HAS_LINKED_OLE_OBJECTS, true);
+                }
             }
         } else if (CR.equals(localName)) {
             runBuffer.append(NEWLINE);
@@ -332,6 +376,65 @@ public class OOXMLWordAndPowerPointTextHandler extends 
DefaultHandler {
             inV = true;
         } else if (RT.equals(localName)) {
             inRt = true;
+        } else if (FLD_CHAR.equals(localName)) {
+            String fldCharType = atts.getValue(W_NS, FLD_CHAR_TYPE);
+            if ("begin".equals(fldCharType)) {
+                inField = true;
+                instrTextBuffer.setLength(0);
+            } else if ("separate".equals(fldCharType)) {
+                // Parse instrText for HYPERLINK
+                String url = 
parseHyperlinkFromInstrText(instrTextBuffer.toString());
+                if (url != null) {
+                    bodyContentsHandler.hyperlinkStart(url);
+                    inFieldHyperlink = true;
+                    if (metadata != null) {
+                        metadata.set(Office.HAS_FIELD_HYPERLINKS, true);
+                    }
+                } else {
+                    // Check for external reference fields (INCLUDEPICTURE, 
INCLUDETEXT, etc.)
+                    StringBuilder fieldType = new StringBuilder();
+                    String extUrl = 
parseExternalRefFromInstrText(instrTextBuffer.toString(), fieldType);
+                    if (extUrl != null) {
+                        bodyContentsHandler.externalRef(fieldType.toString(), 
extUrl);
+                        if (metadata != null) {
+                            metadata.set(Office.HAS_FIELD_HYPERLINKS, true);
+                        }
+                    }
+                }
+            } else if ("end".equals(fldCharType)) {
+                if (inFieldHyperlink) {
+                    bodyContentsHandler.hyperlinkEnd();
+                    inFieldHyperlink = false;
+                }
+                inField = false;
+                instrTextBuffer.setLength(0);
+            }
+        } else if (INSTR_TEXT.equals(localName)) {
+            inInstrText = true;
+        } else if (HLINK_HOVER.equals(localName)) {
+            // DrawingML hover hyperlink on shapes/pictures
+            String hyperlinkId = atts.getValue(OFFICE_DOC_RELATIONSHIP_NS, 
"id");
+            if (hyperlinkId != null) {
+                String hyperlink = linkedRelationships.get(hyperlinkId);
+                if (hyperlink != null) {
+                    bodyContentsHandler.externalRef("hlinkHover", hyperlink);
+                    if (metadata != null) {
+                        metadata.set(Office.HAS_HOVER_HYPERLINKS, true);
+                    }
+                }
+            }
+        } else if (SHAPE.equals(localName) && V_NS.equals(uri)) {
+            // VML shape with href attribute
+            String href = atts.getValue(HREF);
+            if (href == null) {
+                href = atts.getValue(O_NS, HREF);
+            }
+            if (href != null && !href.isEmpty()) {
+                bodyContentsHandler.externalRef("vml-shape-href", href);
+                if (metadata != null) {
+                    metadata.set(Office.HAS_VML_HYPERLINKS, true);
+                }
+            }
         }
 
     }
@@ -367,6 +470,65 @@ public class OOXMLWordAndPowerPointTextHandler extends 
DefaultHandler {
         return -1;
     }
 
+    /**
+     * Parses a HYPERLINK URL from instrText field code content.
+     * Field codes like: HYPERLINK "https://example.com";
+     *
+     * @param instrText the accumulated instrText content
+     * @return the URL if found, or null
+     */
+    private String parseHyperlinkFromInstrText(String instrText) {
+        if (instrText == null || instrText.isEmpty()) {
+            return null;
+        }
+        Matcher m = HYPERLINK_PATTERN.matcher(instrText.trim());
+        if (m.find()) {
+            return m.group(1);
+        }
+        return null;
+    }
+
+    /**
+     * Parses URLs from instrText field codes that reference external 
resources.
+     * This includes INCLUDEPICTURE, INCLUDETEXT, IMPORT, and LINK fields.
+     *
+     * @param instrText the accumulated instrText content
+     * @param fieldType output parameter - will contain the field type if found
+     * @return the URL if found, or null
+     */
+    private String parseExternalRefFromInstrText(String instrText, 
StringBuilder fieldType) {
+        if (instrText == null || instrText.isEmpty()) {
+            return null;
+        }
+        String trimmed = instrText.trim();
+
+        Matcher m = INCLUDEPICTURE_PATTERN.matcher(trimmed);
+        if (m.find()) {
+            fieldType.append("INCLUDEPICTURE");
+            return m.group(1);
+        }
+
+        m = INCLUDETEXT_PATTERN.matcher(trimmed);
+        if (m.find()) {
+            fieldType.append("INCLUDETEXT");
+            return m.group(1);
+        }
+
+        m = IMPORT_PATTERN.matcher(trimmed);
+        if (m.find()) {
+            fieldType.append("IMPORT");
+            return m.group(1);
+        }
+
+        m = LINK_PATTERN.matcher(trimmed);
+        if (m.find()) {
+            fieldType.append("LINK");
+            return m.group(1);
+        }
+
+        return null;
+    }
+
     @Override
     public void endElement(String uri, String localName, String qName) throws 
SAXException {
 
@@ -432,6 +594,8 @@ public class OOXMLWordAndPowerPointTextHandler extends 
DefaultHandler {
             inRt = false;
         } else if (RUBY.equals(localName)) {
             handleEndOfRuby();
+        } else if (INSTR_TEXT.equals(localName)) {
+            inInstrText = false;
         }
     }
 
@@ -489,6 +653,9 @@ public class OOXMLWordAndPowerPointTextHandler extends 
DefaultHandler {
         } else if (inV) {
             appendToBuffer(ch, start, length);
             appendToBuffer(TAB_CHAR, 0, 1);
+        } else if (inInstrText && inField) {
+            // Accumulate instrText content for field code parsing (e.g., 
HYPERLINK)
+            instrTextBuffer.append(ch, start, length);
         }
     }
 
@@ -564,10 +731,28 @@ public class OOXMLWordAndPowerPointTextHandler extends 
DefaultHandler {
 
         void embeddedOLERef(String refId) throws SAXException;
 
+        /**
+         * Called when a linked (vs embedded) OLE object is found.
+         * These reference external files and are a security concern.
+         */
+        void linkedOLERef(String refId) throws SAXException;
+
         void embeddedPicRef(String picFileName, String picDescription) throws 
SAXException;
 
         void startBookmark(String id, String name) throws SAXException;
 
         void endBookmark(String id) throws SAXException;
+
+        /**
+         * Called when an external reference URL is found in a field code.
+         * This includes INCLUDEPICTURE, INCLUDETEXT, IMPORT, LINK fields,
+         * and DrawingML/VML hyperlinks on shapes.
+         *
+         * @param fieldType the type of field (e.g., "INCLUDEPICTURE", 
"hlinkHover", "vml-href")
+         * @param url the external URL
+         */
+        default void externalRef(String fieldType, String url) throws 
SAXException {
+            // Default no-op implementation for backward compatibility
+        }
     }
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java
index 2b4c52748c..fbe16d51a2 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXWPFWordExtractorDecorator.java
@@ -30,14 +30,18 @@ import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.openxml4j.opc.PackagePart;
 import org.apache.poi.openxml4j.opc.PackageRelationship;
 import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.TargetMode;
 import org.apache.poi.xssf.usermodel.XSSFRelation;
 import org.apache.poi.xwpf.usermodel.XWPFNumbering;
 import org.apache.poi.xwpf.usermodel.XWPFRelation;
 import org.apache.xmlbeans.XmlException;
+import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFEventBasedWordExtractor;
@@ -69,6 +73,16 @@ public class SXWPFWordExtractorDecorator extends 
AbstractOOXMLExtractor {
                     
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes";,
                     
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments"};
 
+    // Relationship types for Word settings
+    private static final String SETTINGS_RELATION =
+            
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings";;
+    private static final String WEB_SETTINGS_RELATION =
+            
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings";;
+    private static final String ATTACHED_TEMPLATE_RELATION =
+            
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/attachedTemplate";;
+    private static final String SUBDOCUMENT_RELATION =
+            
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/subDocument";;
+
     //a docx file should have one of these "main story" parts
     private final static String[] MAIN_STORY_PART_RELATIONS =
             new String[]{XWPFRelation.DOCUMENT.getContentType(),
@@ -116,6 +130,106 @@ public class SXWPFWordExtractorDecorator extends 
AbstractOOXMLExtractor {
                 xhtml.endElement("div");
             }
         }
+
+        // Detect security-relevant features in main document
+        pps = getStoryDocumentParts();
+        if (pps != null && !pps.isEmpty()) {
+            PackagePart mainDoc = pps.get(0);
+            detectSecurityFeatures(mainDoc, xhtml);
+        }
+    }
+
+    /**
+     * Detects security-relevant features like mail merge, attached templates,
+     * subdocuments, and framesets.
+     */
+    private void detectSecurityFeatures(PackagePart documentPart, 
XHTMLContentHandler xhtml) {
+        // Check for attached template (external template reference)
+        try {
+            PackageRelationshipCollection templateRels =
+                    
documentPart.getRelationshipsByType(ATTACHED_TEMPLATE_RELATION);
+            if (templateRels != null && templateRels.size() > 0) {
+                metadata.set(Office.HAS_ATTACHED_TEMPLATE, true);
+                for (PackageRelationship rel : templateRels) {
+                    if (rel.getTargetMode() == TargetMode.EXTERNAL) {
+                        emitExternalRef(xhtml, "attachedTemplate", 
rel.getTargetURI().toString());
+                    }
+                }
+            }
+        } catch (InvalidFormatException | SAXException e) {
+            // swallow
+        }
+
+        // Check for subdocuments (master document with external subdocs)
+        try {
+            PackageRelationshipCollection subDocRels =
+                    documentPart.getRelationshipsByType(SUBDOCUMENT_RELATION);
+            if (subDocRels != null && subDocRels.size() > 0) {
+                metadata.set(Office.HAS_SUBDOCUMENTS, true);
+                for (PackageRelationship rel : subDocRels) {
+                    if (rel.getTargetMode() == TargetMode.EXTERNAL) {
+                        emitExternalRef(xhtml, "subDocument", 
rel.getTargetURI().toString());
+                    }
+                }
+            }
+        } catch (InvalidFormatException | SAXException e) {
+            // swallow
+        }
+
+        // Check settings.xml for mail merge
+        try {
+            PackageRelationshipCollection settingsRels =
+                    documentPart.getRelationshipsByType(SETTINGS_RELATION);
+            if (settingsRels != null && settingsRels.size() > 0) {
+                PackagePart settingsPart = 
documentPart.getRelatedPart(settingsRels.getRelationship(0));
+                if (settingsPart != null) {
+                    try (InputStream is = settingsPart.getInputStream()) {
+                        WordSettingsHandler handler = new 
WordSettingsHandler(xhtml);
+                        XMLReaderUtils.parseSAX(is, handler, context);
+                        if (handler.hasMailMerge()) {
+                            metadata.set(Office.HAS_MAIL_MERGE, true);
+                        }
+                    }
+                }
+            }
+        } catch (InvalidFormatException | IOException | TikaException | 
SAXException e) {
+            // swallow
+        }
+
+        // Check webSettings.xml for framesets
+        try {
+            PackageRelationshipCollection webSettingsRels =
+                    documentPart.getRelationshipsByType(WEB_SETTINGS_RELATION);
+            if (webSettingsRels != null && webSettingsRels.size() > 0) {
+                PackagePart webSettingsPart = 
documentPart.getRelatedPart(webSettingsRels.getRelationship(0));
+                if (webSettingsPart != null) {
+                    try (InputStream is = webSettingsPart.getInputStream()) {
+                        WebSettingsHandler handler = new 
WebSettingsHandler(xhtml);
+                        XMLReaderUtils.parseSAX(is, handler, context);
+                        if (handler.hasFrameset()) {
+                            metadata.set(Office.HAS_FRAMESETS, true);
+                        }
+                    }
+                }
+            }
+        } catch (InvalidFormatException | IOException | TikaException | 
SAXException e) {
+            // swallow
+        }
+    }
+
+    /**
+     * Emits an external reference as an anchor element.
+     */
+    private void emitExternalRef(XHTMLContentHandler xhtml, String refType, 
String url)
+            throws SAXException {
+        if (url == null || url.isEmpty()) {
+            return;
+        }
+        org.xml.sax.helpers.AttributesImpl attrs = new 
org.xml.sax.helpers.AttributesImpl();
+        attrs.addAttribute("", "class", "class", "CDATA", "external-ref-" + 
refType);
+        attrs.addAttribute("", "href", "href", "CDATA", url);
+        xhtml.startElement("a", attrs);
+        xhtml.endElement("a");
     }
 
     private void handleDocumentPart(PackagePart documentPart, 
XHTMLContentHandler xhtml)
@@ -195,7 +309,7 @@ public class SXWPFWordExtractorDecorator extends 
AbstractOOXMLExtractor {
                     new EmbeddedContentHandler(new 
OOXMLWordAndPowerPointTextHandler(
                             new OOXMLTikaBodyPartHandler(xhtml, styles, 
listManager, config),
                             linkedRelationships, 
config.isIncludeShapeBasedContent(),
-                            config.isConcatenatePhoneticRuns())), context);
+                            config.isConcatenatePhoneticRuns(), metadata)), 
context);
         } catch (TikaException | IOException e) {
             metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING,
                     ExceptionUtils.getStackTrace(e));
@@ -299,4 +413,67 @@ public class SXWPFWordExtractorDecorator extends 
AbstractOOXMLExtractor {
         }
         return new ArrayList<>();
     }
+
+    /**
+     * Handler for parsing Word settings.xml to detect mail merge and other 
features.
+     */
+    private static class WordSettingsHandler extends DefaultHandler {
+        private final XHTMLContentHandler xhtml;
+        private boolean hasMailMerge = false;
+
+        WordSettingsHandler(XHTMLContentHandler xhtml) {
+            this.xhtml = xhtml;
+        }
+
+        @Override
+        public void startElement(String uri, String localName, String qName, 
Attributes atts)
+                throws SAXException {
+            // Mail merge element indicates document has mail merge data source
+            if ("mailMerge".equals(localName)) {
+                hasMailMerge = true;
+            }
+            // dataSource element contains the external data source reference
+            if ("dataSource".equals(localName) || "query".equals(localName)) {
+                String rId = 
atts.getValue("http://schemas.openxmlformats.org/officeDocument/2006/relationships";,
 "id");
+                // The actual data source location is in the relationship
+            }
+        }
+
+        boolean hasMailMerge() {
+            return hasMailMerge;
+        }
+    }
+
+    /**
+     * Handler for parsing Word webSettings.xml to detect framesets.
+     */
+    private static class WebSettingsHandler extends DefaultHandler {
+        private final XHTMLContentHandler xhtml;
+        private boolean hasFrameset = false;
+
+        WebSettingsHandler(XHTMLContentHandler xhtml) {
+            this.xhtml = xhtml;
+        }
+
+        @Override
+        public void startElement(String uri, String localName, String qName, 
Attributes atts)
+                throws SAXException {
+            // Frameset element indicates document contains frames
+            if ("frameset".equals(localName)) {
+                hasFrameset = true;
+            }
+            // Frame with src attribute contains URL
+            if ("frame".equals(localName)) {
+                String src = atts.getValue("src");
+                if (src != null && !src.isEmpty()) {
+                    // Frame references an external URL
+                    hasFrameset = true;
+                }
+            }
+        }
+
+        boolean hasFrameset() {
+            return hasFrameset;
+        }
+    }
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
index 873242927f..8b41630f32 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
@@ -79,6 +79,20 @@ import org.apache.tika.utils.StringUtils;
 import org.apache.tika.utils.XMLReaderUtils;
 
 public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor {
+
+    // Relationship types for external data sources
+    private static final String EXTERNAL_LINK_RELATION =
+            
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/externalLink";;
+    private static final String CONNECTIONS_RELATION =
+            
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/connections";;
+    private static final String QUERY_TABLE_RELATION =
+            
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/queryTable";;
+    private static final String PIVOT_CACHE_DEFINITION_RELATION =
+            
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotCacheDefinition";;
+    // Power Query stores data in customData parts
+    private static final String POWER_QUERY_CONTENT_TYPE =
+            "application/vnd.ms-excel.customDataProperties+xml";
+
     /**
      * Allows access to headers/footers from raw xml strings
      */
@@ -223,6 +237,382 @@ public class XSSFExcelExtractorDecorator extends 
AbstractOOXMLExtractor {
             //swallow
         }
 
+        // Extract external data sources (HIGH security risk - can hide 
malicious URLs)
+        try {
+            extractExternalDataSources(container, xhtml);
+        } catch (InvalidFormatException | TikaException | IOException | 
SAXException e) {
+            //swallow
+        }
+
+    }
+
+    /**
+     * Extracts external data sources from the workbook including:
+     * - External workbook links
+     * - Data connections (database, web queries)
+     * - Query tables
+     */
+    private void extractExternalDataSources(OPCPackage container, 
XHTMLContentHandler xhtml)
+            throws InvalidFormatException, TikaException, IOException, 
SAXException {
+
+        PackageRelationship coreDocRelationship = 
container.getRelationshipsByType(
+                PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
+        if (coreDocRelationship == null) {
+            return;
+        }
+        PackagePart workbookPart = container.getPart(coreDocRelationship);
+        if (workbookPart == null) {
+            return;
+        }
+
+        // Extract external workbook links
+        extractExternalLinks(workbookPart, xhtml);
+
+        // Extract connections (database, ODBC, web queries)
+        extractConnections(workbookPart, xhtml);
+
+        // Extract query tables from each sheet
+        for (PackagePart sheetPart : sheetParts) {
+            extractQueryTables(sheetPart, xhtml);
+        }
+
+        // Detect pivot cache with external data sources
+        extractPivotCacheExternalData(workbookPart, xhtml);
+
+        // Detect Power Query / Data Mashup
+        detectPowerQuery(container);
+    }
+
+    /**
+     * Detects pivot cache definitions with external data sources (OLAP, 
databases).
+     */
+    private void extractPivotCacheExternalData(PackagePart workbookPart, 
XHTMLContentHandler xhtml)
+            throws InvalidFormatException {
+        PackageRelationshipCollection coll = 
workbookPart.getRelationshipsByType(PIVOT_CACHE_DEFINITION_RELATION);
+        if (coll == null || coll.isEmpty()) {
+            return;
+        }
+        for (PackageRelationship rel : coll) {
+            try {
+                PackagePart pivotCachePart = workbookPart.getRelatedPart(rel);
+                if (pivotCachePart != null) {
+                    PivotCacheHandler handler = new PivotCacheHandler(xhtml);
+                    try (InputStream is = pivotCachePart.getInputStream()) {
+                        XMLReaderUtils.parseSAX(is, handler, parseContext);
+                    }
+                    if (handler.hasExternalData()) {
+                        metadata.set(Office.HAS_EXTERNAL_PIVOT_DATA, true);
+                    }
+                }
+            } catch (IOException | TikaException | SAXException e) {
+                // swallow
+            }
+        }
+    }
+
+    /**
+     * Detects Power Query / Data Mashup presence.
+     */
+    private void detectPowerQuery(OPCPackage container) {
+        // Power Query data is stored in customData parts with specific 
content type
+        // or in xl/customData/ folder
+        try {
+            List<PackagePart> customDataParts = 
container.getPartsByContentType(POWER_QUERY_CONTENT_TYPE);
+            if (customDataParts != null && !customDataParts.isEmpty()) {
+                metadata.set(Office.HAS_POWER_QUERY, true);
+            }
+            // Also check for customData folder parts
+            for (PackagePart part : container.getParts()) {
+                String partName = part.getPartName().getName();
+                if (partName.contains("/customData/") || 
partName.contains("/dataMashup")) {
+                    metadata.set(Office.HAS_POWER_QUERY, true);
+                    break;
+                }
+            }
+        } catch (InvalidFormatException e) {
+            // swallow
+        }
+    }
+
+    /**
+     * Extracts external workbook links from externalLink parts.
+     */
+    private void extractExternalLinks(PackagePart workbookPart, 
XHTMLContentHandler xhtml)
+            throws InvalidFormatException, SAXException {
+        PackageRelationshipCollection coll = 
workbookPart.getRelationshipsByType(EXTERNAL_LINK_RELATION);
+        if (coll == null || coll.isEmpty()) {
+            return;
+        }
+        // If we have any external link relationships, set the metadata flag
+        if (coll.size() > 0) {
+            metadata.set(Office.HAS_EXTERNAL_LINKS, true);
+        }
+        for (PackageRelationship rel : coll) {
+            if (rel.getTargetMode() == TargetMode.EXTERNAL) {
+                // Direct external reference
+                emitExternalRef(xhtml, "externalLink", 
rel.getTargetURI().toString());
+            } else {
+                // Internal part that contains external reference - parse it
+                try {
+                    PackagePart externalLinkPart = 
workbookPart.getRelatedPart(rel);
+                    if (externalLinkPart != null) {
+                        ExternalLinkHandler handler = new 
ExternalLinkHandler(xhtml);
+                        try (InputStream is = 
externalLinkPart.getInputStream()) {
+                            XMLReaderUtils.parseSAX(is, handler, parseContext);
+                        }
+                        if (handler.hasDdeLink()) {
+                            metadata.set(Office.HAS_DDE_LINKS, true);
+                        }
+                    }
+                } catch (IOException | TikaException e) {
+                    // swallow
+                }
+            }
+        }
+    }
+
+    /**
+     * Extracts data connections from connections.xml.
+     */
+    private void extractConnections(PackagePart workbookPart, 
XHTMLContentHandler xhtml)
+            throws InvalidFormatException, SAXException {
+        PackageRelationshipCollection coll = 
workbookPart.getRelationshipsByType(CONNECTIONS_RELATION);
+        if (coll == null || coll.isEmpty()) {
+            return;
+        }
+        for (PackageRelationship rel : coll) {
+            try {
+                PackagePart connectionsPart = workbookPart.getRelatedPart(rel);
+                if (connectionsPart != null) {
+                    ConnectionsHandler handler = new ConnectionsHandler(xhtml);
+                    try (InputStream is = connectionsPart.getInputStream()) {
+                        XMLReaderUtils.parseSAX(is, handler, parseContext);
+                    }
+                    if (handler.hasConnections()) {
+                        metadata.set(Office.HAS_DATA_CONNECTIONS, true);
+                    }
+                    if (handler.hasWebQueries()) {
+                        metadata.set(Office.HAS_WEB_QUERIES, true);
+                    }
+                }
+            } catch (IOException | TikaException e) {
+                // swallow
+            }
+        }
+    }
+
+    /**
+     * Extracts query table external sources.
+     */
+    private void extractQueryTables(PackagePart sheetPart, XHTMLContentHandler 
xhtml)
+            throws InvalidFormatException, SAXException {
+        PackageRelationshipCollection coll = 
sheetPart.getRelationshipsByType(QUERY_TABLE_RELATION);
+        if (coll == null || coll.isEmpty()) {
+            return;
+        }
+        for (PackageRelationship rel : coll) {
+            try {
+                PackagePart queryTablePart = sheetPart.getRelatedPart(rel);
+                if (queryTablePart != null) {
+                    try (InputStream is = queryTablePart.getInputStream()) {
+                        XMLReaderUtils.parseSAX(is, new 
QueryTableHandler(xhtml), parseContext);
+                    }
+                }
+            } catch (IOException | TikaException e) {
+                // swallow
+            }
+        }
+    }
+
+    /**
+     * Emits an external reference as an anchor element with appropriate class.
+     */
+    private void emitExternalRef(XHTMLContentHandler xhtml, String refType, 
String url)
+            throws SAXException {
+        if (url == null || url.isEmpty()) {
+            return;
+        }
+        org.xml.sax.helpers.AttributesImpl attrs = new 
org.xml.sax.helpers.AttributesImpl();
+        attrs.addAttribute("", "class", "class", "CDATA", "external-ref-" + 
refType);
+        attrs.addAttribute("", "href", "href", "CDATA", url);
+        xhtml.startElement("a", attrs);
+        xhtml.endElement("a");
+    }
+
+    /**
+     * Handler for parsing externalLink XML to extract external workbook 
references.
+     */
+    private class ExternalLinkHandler extends DefaultHandler {
+        private final XHTMLContentHandler xhtml;
+        private boolean foundDdeLink = false;
+
+        ExternalLinkHandler(XHTMLContentHandler xhtml) {
+            this.xhtml = xhtml;
+        }
+
+        @Override
+        public void startElement(String uri, String localName, String qName, 
Attributes atts)
+                throws SAXException {
+            // Look for externalBook element with r:id attribute
+            if ("externalBook".equals(localName)) {
+                String rId = 
atts.getValue("http://schemas.openxmlformats.org/officeDocument/2006/relationships";,
 "id");
+                // The actual URL is in the relationship, not directly in the 
XML
+                // For now, we note that there's an external book reference
+            }
+            // Look for file element with href attribute (older format)
+            if ("file".equals(localName)) {
+                String href = atts.getValue("href");
+                if (href != null && !href.isEmpty()) {
+                    emitExternalRef(xhtml, "externalWorkbook", href);
+                }
+            }
+            // Look for oleLink with r:id (OLE links to external files)
+            if ("oleLink".equals(localName)) {
+                String rId = 
atts.getValue("http://schemas.openxmlformats.org/officeDocument/2006/relationships";,
 "id");
+                if (rId != null) {
+                    emitExternalRef(xhtml, "oleLink", "relationship:" + rId);
+                }
+            }
+            // DDE links - security risk: can execute commands
+            if ("ddeLink".equals(localName)) {
+                foundDdeLink = true;
+                String ddeService = atts.getValue("ddeService");
+                String ddeTopic = atts.getValue("ddeTopic");
+                if (ddeService != null || ddeTopic != null) {
+                    String ddeRef = (ddeService != null ? ddeService : "") + 
"|" +
+                            (ddeTopic != null ? ddeTopic : "");
+                    emitExternalRef(xhtml, "ddeLink", ddeRef);
+                }
+            }
+        }
+
+        boolean hasDdeLink() {
+            return foundDdeLink;
+        }
+    }
+
+    /**
+     * Handler for parsing connections.xml to extract external data 
connections.
+     */
+    private class ConnectionsHandler extends DefaultHandler {
+        private final XHTMLContentHandler xhtml;
+        private boolean foundConnection = false;
+        private boolean foundWebQuery = false;
+
+        ConnectionsHandler(XHTMLContentHandler xhtml) {
+            this.xhtml = xhtml;
+        }
+
+        @Override
+        public void startElement(String uri, String localName, String qName, 
Attributes atts)
+                throws SAXException {
+            if ("connection".equals(localName)) {
+                foundConnection = true;
+            }
+            // Database connection string
+            if ("dbPr".equals(localName)) {
+                String connection = atts.getValue("connection");
+                if (connection != null && !connection.isEmpty()) {
+                    emitExternalRef(xhtml, "dbConnection", connection);
+                }
+            }
+            // Web query
+            if ("webPr".equals(localName)) {
+                foundWebQuery = true;
+                String url = atts.getValue("url");
+                if (url != null && !url.isEmpty()) {
+                    emitExternalRef(xhtml, "webQuery", url);
+                }
+            }
+            // ODBC connection
+            if ("olapPr".equals(localName)) {
+                String connection = atts.getValue("connection");
+                if (connection != null && !connection.isEmpty()) {
+                    emitExternalRef(xhtml, "olapConnection", connection);
+                }
+            }
+            // Text file import
+            if ("textPr".equals(localName)) {
+                String sourceFile = atts.getValue("sourceFile");
+                if (sourceFile != null && !sourceFile.isEmpty()) {
+                    emitExternalRef(xhtml, "textFileImport", sourceFile);
+                }
+            }
+        }
+
+        boolean hasConnections() {
+            return foundConnection;
+        }
+
+        boolean hasWebQueries() {
+            return foundWebQuery;
+        }
+    }
+
+    /**
+     * Handler for parsing queryTable XML to extract web query sources.
+     */
+    private class QueryTableHandler extends DefaultHandler {
+        private final XHTMLContentHandler xhtml;
+
+        QueryTableHandler(XHTMLContentHandler xhtml) {
+            this.xhtml = xhtml;
+        }
+
+        @Override
+        public void startElement(String uri, String localName, String qName, 
Attributes atts)
+                throws SAXException {
+            if ("queryTable".equals(localName)) {
+                String connectionId = atts.getValue("connectionId");
+                // Connection details are in connections.xml
+            }
+            // Web query table refresh
+            if ("queryTableRefresh".equals(localName)) {
+                // Contains refresh settings
+            }
+        }
+    }
+
+    /**
+     * Handler for parsing pivotCacheDefinition XML to detect external data 
sources.
+     */
+    private class PivotCacheHandler extends DefaultHandler {
+        private final XHTMLContentHandler xhtml;
+        private boolean hasExternalData = false;
+
+        PivotCacheHandler(XHTMLContentHandler xhtml) {
+            this.xhtml = xhtml;
+        }
+
+        @Override
+        public void startElement(String uri, String localName, String qName, 
Attributes atts)
+                throws SAXException {
+            // cacheSource with type="external" indicates external data
+            if ("cacheSource".equals(localName)) {
+                String type = atts.getValue("type");
+                if ("external".equals(type) || "consolidation".equals(type)) {
+                    hasExternalData = true;
+                }
+            }
+            // worksheetSource can have external references
+            if ("worksheetSource".equals(localName)) {
+                String ref = atts.getValue("ref");
+                String sheet = atts.getValue("sheet");
+                String rId = 
atts.getValue("http://schemas.openxmlformats.org/officeDocument/2006/relationships";,
 "id");
+                // If there's a relationship ID, it likely points to external 
workbook
+                if (rId != null) {
+                    hasExternalData = true;
+                }
+            }
+            // consolidation source (multiple ranges, possibly external)
+            if ("consolidation".equals(localName) || 
"rangeSets".equals(localName)) {
+                hasExternalData = true;
+            }
+        }
+
+        boolean hasExternalData() {
+            return hasExternalData;
+        }
     }
 
     private void getThreadedComments(OPCPackage container, PackagePart 
sheetPart, XHTMLContentHandler xhtml) throws TikaException,
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
index 922cdbd01d..a44f4525d7 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
@@ -61,13 +61,18 @@ import org.apache.xmlbeans.XmlCursor;
 import org.apache.xmlbeans.XmlException;
 import org.apache.xmlbeans.XmlObject;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFldChar;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTObject;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.STFldCharType;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.AttributesImpl;
 
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.microsoft.EMFParser;
 import org.apache.tika.parser.microsoft.FormattingUtils;
@@ -83,7 +88,6 @@ public class XWPFWordExtractorDecorator extends 
AbstractOOXMLExtractor {
     // Part 3, Step 3
     private static final String LIST_DELIMITER = " ";
 
-
     //include all parts that might have embedded objects
     private final static String[] MAIN_PART_RELATIONS =
             new String[]{XWPFRelation.HEADER.getRelation(), 
XWPFRelation.FOOTER.getRelation(),
@@ -233,8 +237,54 @@ public class XWPFWordExtractorDecorator extends 
AbstractOOXMLExtractor {
         //hyperlinks may or may not have hyperlink ids
         String lastHyperlinkId = null;
         boolean inHyperlink = false;
+        // Track field-based hyperlinks (using instrText/fldChar)
+        FieldHyperlinkTracker fieldTracker = new FieldHyperlinkTracker();
+        boolean inFieldHyperlink = false;
+
         // Do the iruns
         for (IRunElement run : paragraph.getIRuns()) {
+            // Check for field-based hyperlinks first (instrText HYPERLINK)
+            if (run instanceof XWPFRun) {
+                XWPFRun xwpfRun = (XWPFRun) run;
+                boolean wasInFieldHyperlink = 
fieldTracker.isInFieldHyperlink();
+                String fieldUrl = extractFieldLinks(xwpfRun, fieldTracker);
+
+                // If we just entered a field hyperlink, open the anchor tag
+                if (fieldUrl != null && !inFieldHyperlink) {
+                    // Close any existing relationship-based hyperlink first
+                    if (inHyperlink) {
+                        FormattingUtils.closeStyleTags(xhtml, formattingState);
+                        xhtml.endElement("a");
+                        inHyperlink = false;
+                        lastHyperlinkId = null;
+                    }
+                    FormattingUtils.closeStyleTags(xhtml, formattingState);
+                    xhtml.startElement("a", "href", fieldUrl);
+                    inFieldHyperlink = true;
+                    metadata.set(Office.HAS_FIELD_HYPERLINKS, true);
+                }
+
+                // If we just exited a field hyperlink, close the anchor tag
+                if (wasInFieldHyperlink && !fieldTracker.isInFieldHyperlink() 
&& inFieldHyperlink) {
+                    FormattingUtils.closeStyleTags(xhtml, formattingState);
+                    xhtml.endElement("a");
+                    inFieldHyperlink = false;
+                }
+
+                // Emit any external refs (INCLUDEPICTURE, INCLUDETEXT, 
IMPORT, LINK) as anchors
+                if (fieldTracker.getLastExternalRefUrl() != null) {
+                    AttributesImpl extRefAtts = new AttributesImpl();
+                    extRefAtts.addAttribute("", "class", "class", "CDATA",
+                            "external-ref-" + 
fieldTracker.getLastExternalRefType());
+                    extRefAtts.addAttribute("", "href", "href", "CDATA",
+                            fieldTracker.getLastExternalRefUrl());
+                    xhtml.startElement("a", extRefAtts);
+                    xhtml.endElement("a");
+                    metadata.set(Office.HAS_FIELD_HYPERLINKS, true);
+                    fieldTracker.clearExternalRef();
+                }
+            }
+
             if (run instanceof XWPFHyperlinkRun) {
                 XWPFHyperlinkRun hyperlinkRun = (XWPFHyperlinkRun) run;
                 if (hyperlinkRun.getHyperlinkId() == null ||
@@ -278,6 +328,9 @@ public class XWPFWordExtractorDecorator extends 
AbstractOOXMLExtractor {
         if (inHyperlink) {
             xhtml.endElement("a");
         }
+        if (inFieldHyperlink) {
+            xhtml.endElement("a");
+        }
 
 
         // Now do any comments for the paragraph
@@ -463,6 +516,46 @@ public class XWPFWordExtractorDecorator extends 
AbstractOOXMLExtractor {
         xhtml.characters(run.getContent().getText());
     }
 
+    /**
+     * Extracts field-based hyperlinks from a run by examining fldChar and 
instrText elements.
+     * This handles HYPERLINK field codes that are not relationship-based.
+     *
+     * @param run the run to examine
+     * @param tracker the field hyperlink tracker maintaining state across runs
+     * @return the hyperlink URL if this run starts a hyperlink, null otherwise
+     */
+    private String extractFieldLinks(XWPFRun run, FieldHyperlinkTracker 
tracker) {
+        CTR ctr = run.getCTR();
+        try (XmlCursor cursor = ctr.newCursor()) {
+            if (cursor.toFirstChild()) {
+                do {
+                    String localName = cursor.getName().getLocalPart();
+                    if ("fldChar".equals(localName)) {
+                        XmlObject obj = cursor.getObject();
+                        if (obj instanceof CTFldChar) {
+                            CTFldChar fldChar = (CTFldChar) obj;
+                            STFldCharType.Enum fldType = 
fldChar.getFldCharType();
+                            if (fldType == STFldCharType.BEGIN) {
+                                tracker.startField();
+                            } else if (fldType == STFldCharType.SEPARATE) {
+                                return tracker.separate();
+                            } else if (fldType == STFldCharType.END) {
+                                tracker.endField();
+                            }
+                        }
+                    } else if ("instrText".equals(localName)) {
+                        XmlObject obj = cursor.getObject();
+                        if (obj instanceof CTText) {
+                            CTText text = (CTText) obj;
+                            tracker.addInstrText(text.getStringValue());
+                        }
+                    }
+                } while (cursor.toNextSibling());
+            }
+        }
+        return null;
+    }
+
     private void extractTable(XWPFTable table, XWPFListManager listManager,
                               XHTMLContentHandler xhtml)
             throws SAXException, XmlException, IOException {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xslf/XSLFEventBasedPowerPointExtractor.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xslf/XSLFEventBasedPowerPointExtractor.java
index 72767fa15c..2950e46be3 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xslf/XSLFEventBasedPowerPointExtractor.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xslf/XSLFEventBasedPowerPointExtractor.java
@@ -201,6 +201,11 @@ public class XSLFEventBasedPowerPointExtractor implements 
POIXMLTextExtractor {
             //no-op
         }
 
+        @Override
+        public void linkedOLERef(String refId) {
+            //no-op
+        }
+
         @Override
         public void embeddedPicRef(String picFileName, String picDescription) {
             //no-op
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java
index 26cda41385..2fb45ca7fd 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java
@@ -363,6 +363,11 @@ public class XWPFEventBasedWordExtractor implements 
POIXMLTextExtractor {
             //no-op
         }
 
+        @Override
+        public void linkedOLERef(String refId) {
+            //no-op
+        }
+
         @Override
         public void embeddedPicRef(String picFileName, String picDescription) {
             //no-op
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
index a90d79445f..1cced50eeb 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
@@ -592,4 +592,47 @@ public class ExcelParserTest extends TikaTest {
         assertEquals("true", m.get(Office.HAS_COMMENTS));
         assertEquals("true", m.get(Office.HAS_HIDDEN_COLUMNS));
     }
+
+    /**
+     * Test extraction of external data connections from XLSX files.
+     * These can be used to exfiltrate data or load malicious content.
+     */
+    @Test
+    public void testDataConnections() throws Exception {
+        List<Metadata> metadataList = 
getRecursiveMetadata("testDataConnections.xlsx");
+        Metadata m = metadataList.get(0);
+        // Check metadata flags are set
+        assertEquals("true", m.get(Office.HAS_DATA_CONNECTIONS));
+        assertEquals("true", m.get(Office.HAS_WEB_QUERIES));
+
+        String xml = getXML("testDataConnections.xlsx").xml;
+        // Test web query extraction
+        assertContains("class=\"external-ref-webQuery\"", xml);
+        assertContains("http://example.com/data.html";, xml);
+        // Test database connection extraction
+        assertContains("class=\"external-ref-dbConnection\"", xml);
+        assertContains("db.example.org", xml);
+        // Test text file import
+        assertContains("class=\"external-ref-textFileImport\"", xml);
+        assertContains("http://example.net/data.csv";, xml);
+    }
+
+    /**
+     * Test detection of DDE links in Excel files.
+     * DDE (Dynamic Data Exchange) links are a security risk as they can 
execute commands.
+     */
+    @Test
+    public void testDdeLinks() throws Exception {
+        List<Metadata> metadataList = getRecursiveMetadata("testDdeLink.xlsx");
+        Metadata m = metadataList.get(0);
+        // Check DDE link metadata flag is set
+        assertEquals("true", m.get(Office.HAS_DDE_LINKS));
+        // Also check external links flag since DDE is in externalLinks
+        assertEquals("true", m.get(Office.HAS_EXTERNAL_LINKS));
+
+        String xml = getXML("testDdeLink.xlsx").xml;
+        // Test DDE link extraction (service|topic format)
+        assertContains("class=\"external-ref-ddeLink\"", xml);
+        assertContains("cmd|", xml);
+    }
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index 718c0e07c9..d75c7a20a8 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -1812,4 +1812,43 @@ public class OOXMLParserTest extends 
MultiThreadedTikaTest {
         String content = getText("testRecordSizeExceeded.xlsx");
         assertContains("Repetitive content pattern 3 for compression test row 
1", content);
     }
+
+    /**
+     * Test extraction of field-based hyperlinks using instrText/fldChar.
+     * These are hyperlinks embedded as field codes rather than 
relationship-based hyperlinks.
+     * Uses the DOM-based XWPFWordExtractorDecorator.
+     */
+    @Test
+    public void testInstrTextHyperlink() throws Exception {
+        String xml = getXML("testInstrLink.docx").xml;
+        // The document contains a HYPERLINK field code in instrText
+        assertContains("<a href=\"https://exmaple.com/file\";>", xml);
+        assertContains("Access Document(s)", xml);
+    }
+
+    /**
+     * Test extraction of external reference field codes (INCLUDEPICTURE, 
INCLUDETEXT, IMPORT, LINK).
+     * These can be used to hide malicious URLs in documents.
+     */
+    @Test
+    public void testExternalRefFieldCodes() throws Exception {
+        List<Metadata> metadataList = 
getRecursiveMetadata("testExternalRefs.docx");
+        Metadata m = metadataList.get(0);
+        // Check metadata flag is set
+        assertEquals("true", m.get(Office.HAS_FIELD_HYPERLINKS));
+
+        String xml = getXML("testExternalRefs.docx").xml;
+        // Test INCLUDEPICTURE field code
+        assertContains("class=\"external-ref-INCLUDEPICTURE\"", xml);
+        assertContains("http://example.com/tracking.png";, xml);
+        // Test INCLUDETEXT field code
+        assertContains("class=\"external-ref-INCLUDETEXT\"", xml);
+        assertContains("http://example.org/payload.txt";, xml);
+        // Test IMPORT field code
+        assertContains("class=\"external-ref-IMPORT\"", xml);
+        assertContains("http://example.net/exploit.wmf";, xml);
+        // Test LINK field code
+        assertContains("class=\"external-ref-LINK\"", xml);
+        assertContains("http://test.invalid/cmd.docx";, xml);
+    }
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java
index 91cb801ed1..c0482bd304 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java
@@ -850,4 +850,113 @@ public class SXWPFExtractorTest extends TikaTest {
         assertContainsCount("inside-text", xml, 1);
     }
 
+    /**
+     * Test extraction of field-based hyperlinks using instrText/fldChar.
+     * These are hyperlinks embedded as field codes rather than 
relationship-based hyperlinks.
+     */
+    @Test
+    public void testInstrTextHyperlink() throws Exception {
+        String xml = getXML("testInstrLink.docx", parseContext).xml;
+        // The document contains a HYPERLINK field code in instrText
+        assertContains("<a href=\"https://exmaple.com/file\";>", xml);
+        assertContains("Access Document(s)", xml);
+    }
+
+    /**
+     * Test extraction of external reference field codes (INCLUDEPICTURE, 
INCLUDETEXT, IMPORT, LINK).
+     * These can be used to hide malicious URLs in documents.
+     */
+    @Test
+    public void testExternalRefFieldCodes() throws Exception {
+        List<Metadata> metadataList = 
getRecursiveMetadata("testExternalRefs.docx", parseContext);
+        Metadata m = metadataList.get(0);
+        // Check metadata flag is set
+        assertEquals("true", m.get(Office.HAS_FIELD_HYPERLINKS));
+
+        String xml = getXML("testExternalRefs.docx", parseContext).xml;
+        // Test INCLUDEPICTURE field code
+        assertContains("class=\"external-ref-INCLUDEPICTURE\"", xml);
+        assertContains("http://example.com/tracking.png";, xml);
+        // Test INCLUDETEXT field code
+        assertContains("class=\"external-ref-INCLUDETEXT\"", xml);
+        assertContains("http://example.org/payload.txt";, xml);
+        // Test IMPORT field code
+        assertContains("class=\"external-ref-IMPORT\"", xml);
+        assertContains("http://example.net/exploit.wmf";, xml);
+        // Test LINK field code
+        assertContains("class=\"external-ref-LINK\"", xml);
+        assertContains("http://test.invalid/cmd.docx";, xml);
+    }
+
+    /**
+     * Test extraction of hlinkHover (hover hyperlinks) and VML shape hrefs.
+     * These are sneaky ways to hide malicious URLs.
+     */
+    @Test
+    public void testHoverAndVmlHyperlinks() throws Exception {
+        List<Metadata> metadataList = 
getRecursiveMetadata("testHoverAndVml.docx", parseContext);
+        Metadata m = metadataList.get(0);
+        // Check metadata flags are set
+        assertEquals("true", m.get(Office.HAS_HOVER_HYPERLINKS));
+        assertEquals("true", m.get(Office.HAS_VML_HYPERLINKS));
+
+        String xml = getXML("testHoverAndVml.docx", parseContext).xml;
+        // Test hlinkHover (activates on mouse hover, not click)
+        assertContains("class=\"external-ref-hlinkHover\"", xml);
+        assertContains("http://hover.example.com/phishing";, xml);
+        // Test VML shape href
+        assertContains("class=\"external-ref-vml-shape-href\"", xml);
+        assertContains("http://vml.example.org/shape-link";, xml);
+    }
+
+    /**
+     * Test detection of mail merge in Word documents.
+     * Mail merge can reference external data sources.
+     */
+    @Test
+    public void testMailMerge() throws Exception {
+        List<Metadata> metadataList = 
getRecursiveMetadata("testMailMerge.docx", parseContext);
+        Metadata m = metadataList.get(0);
+        assertEquals("true", m.get(Office.HAS_MAIL_MERGE));
+    }
+
+    /**
+     * Test detection of attached external template.
+     * Templates can be fetched from malicious URLs.
+     */
+    @Test
+    public void testAttachedTemplate() throws Exception {
+        List<Metadata> metadataList = 
getRecursiveMetadata("testAttachedTemplate.docx", parseContext);
+        Metadata m = metadataList.get(0);
+        assertEquals("true", m.get(Office.HAS_ATTACHED_TEMPLATE));
+
+        String xml = getXML("testAttachedTemplate.docx", parseContext).xml;
+        assertContains("class=\"external-ref-attachedTemplate\"", xml);
+        assertContains("example.com/templates", xml);
+    }
+
+    /**
+     * Test detection of subdocuments (master document linking external docs).
+     */
+    @Test
+    public void testSubdocument() throws Exception {
+        List<Metadata> metadataList = 
getRecursiveMetadata("testSubdocument.docx", parseContext);
+        Metadata m = metadataList.get(0);
+        assertEquals("true", m.get(Office.HAS_SUBDOCUMENTS));
+
+        String xml = getXML("testSubdocument.docx", parseContext).xml;
+        assertContains("class=\"external-ref-subDocument\"", xml);
+        assertContains("example.org/chapters", xml);
+    }
+
+    /**
+     * Test detection of framesets (HTML frames loading external URLs).
+     */
+    @Test
+    public void testFrameset() throws Exception {
+        List<Metadata> metadataList = 
getRecursiveMetadata("testFrameset.docx", parseContext);
+        Metadata m = metadataList.get(0);
+        assertEquals("true", m.get(Office.HAS_FRAMESETS));
+    }
+
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/pst/OutlookPSTParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/pst/OutlookPSTParserTest.java
index 6e9a6d6d1b..7cff052cf8 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/pst/OutlookPSTParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/pst/OutlookPSTParserTest.java
@@ -67,6 +67,9 @@ public class OutlookPSTParserTest extends TikaTest {
         assertEquals(10, metadataList.size());
 
         Metadata m1 = metadataList.get(1);
+        assertEquals("application/x-tika-pst-mail-item", 
m1.get(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE));
+        assertEquals("application/x-tika-pst-mail-item", 
m1.get(Metadata.CONTENT_TYPE));
+
         assertEquals("Jörn Kottmann", m1.get(Message.MESSAGE_FROM_NAME));
         assertEquals("Jörn Kottmann", m1.get(TikaCoreProperties.CREATOR));
         assertEquals("Re: Feature Generators", 
m1.get(TikaCoreProperties.TITLE));
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAttachedTemplate.docx
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAttachedTemplate.docx
new file mode 100644
index 0000000000..768258ad11
Binary files /dev/null and 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAttachedTemplate.docx
 differ
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDataConnections.xlsx
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDataConnections.xlsx
new file mode 100644
index 0000000000..af76b99347
Binary files /dev/null and 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDataConnections.xlsx
 differ
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDdeLink.xlsx
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDdeLink.xlsx
new file mode 100644
index 0000000000..be4912b4b5
Binary files /dev/null and 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDdeLink.xlsx
 differ
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testExternalRefs.docx
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testExternalRefs.docx
new file mode 100644
index 0000000000..8b8d3c1adc
Binary files /dev/null and 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testExternalRefs.docx
 differ
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testFrameset.docx
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testFrameset.docx
new file mode 100644
index 0000000000..d19070fe07
Binary files /dev/null and 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testFrameset.docx
 differ
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testHoverAndVml.docx
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testHoverAndVml.docx
new file mode 100644
index 0000000000..2b43e1e047
Binary files /dev/null and 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testHoverAndVml.docx
 differ
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testInstrLink.docx
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testInstrLink.docx
new file mode 100644
index 0000000000..3b2fc9257b
Binary files /dev/null and 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testInstrLink.docx
 differ
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testMailMerge.docx
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testMailMerge.docx
new file mode 100644
index 0000000000..e0c8f00b03
Binary files /dev/null and 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testMailMerge.docx
 differ
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testSubdocument.docx
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testSubdocument.docx
new file mode 100644
index 0000000000..7bf396e35b
Binary files /dev/null and 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testSubdocument.docx
 differ

Reply via email to