This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4361
in repository https://gitbox.apache.org/repos/asf/tika.git

commit bef786ed0cd777cc966384f7948a2c1d039e5dcd
Author: tallison <[email protected]>
AuthorDate: Wed Dec 4 15:35:37 2024 -0500

    TIKA-4361
---
 .../tika/parser/microsoft/rtf/TextExtractor.java   | 44 ++++++++++++----------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/TextExtractor.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/TextExtractor.java
index 83abb1ae6..e5a6ae6b4 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/TextExtractor.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/TextExtractor.java
@@ -941,6 +941,7 @@ final class TextExtractor {
         } else {
             // In document
             if (equals("b")) {
+                //TIKA-4361 -- need to make sure we're not in an href?
                 // b0
                 assert param == 0;
                 if (groupState.bold) {
@@ -1085,6 +1086,9 @@ final class TextExtractor {
     }
 
     private void end(String tag) throws IOException, SAXException, 
TikaException {
+        if ("b".equals(tag)) {
+            System.out.println("ending b");
+        }
         out.endElement(XHTML, tag, tag);
     }
 
@@ -1479,29 +1483,31 @@ final class TextExtractor {
         if (groupStates.size() > 0) {
             // Restore group state:
             final GroupState outerGroupState = groupStates.removeLast();
-
-            // Close italic, if outer does not have italic or
-            // bold changed:
-            if (groupState.italic) {
-                if (!outerGroupState.italic || groupState.bold != 
outerGroupState.bold) {
-                    end("i");
-                    groupState.italic = false;
+            //only modify styles if we're not in a hyperlink
+            if (fieldState == 0) {
+                // Close italic, if outer does not have italic or
+                // bold changed:
+                if (groupState.italic) {
+                    if (!outerGroupState.italic || groupState.bold != 
outerGroupState.bold) {
+                        end("i");
+                        groupState.italic = false;
+                    }
                 }
-            }
 
-            // Close bold
-            if (groupState.bold && !outerGroupState.bold) {
-                end("b");
-            }
+                // Close bold
+                if (groupState.bold && !outerGroupState.bold) {
+                    end("b");
+                }
 
-            // Open bold
-            if (!groupState.bold && outerGroupState.bold) {
-                start("b");
-            }
+                // Open bold
+                if (!groupState.bold && outerGroupState.bold) {
+                    start("b");
+                }
 
-            // Open italic
-            if (!groupState.italic && outerGroupState.italic) {
-                start("i");
+                // Open italic
+                if (!groupState.italic && outerGroupState.italic) {
+                    start("i");
+                }
             }
             groupState = outerGroupState;
         }

Reply via email to