On 4/8/08, Yury Batrakov <[EMAIL PROTECTED]> wrote:
> Thanks for your reply, I'll look to the code.
>

Looks like i am done with that. i'm not sure if my code is useful and
good enough to submit it to POI source tree :) . I still uses
low-level stuff and needs to be wrapped to something like
XWPFHyperlink. I attach my patch here for those who may be interested
in it.

Also there is a bug in latest openxml4j snapshot that prevents my code
from working :), I reported about it on openxml4j dev forum here:
https://sourceforge.net/forum/forum.php?thread_id=2000813&forum_id=603903
Index: src/ooxml/java/org/apache/poi/xwpf/XWPFDocument.java
===================================================================
--- src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFDocument.java	(revision 645870)
+++ src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFDocument.java	(working copy)
@@ -24,6 +24,8 @@
 import org.openxml4j.exceptions.OpenXML4JException;
 import org.openxml4j.opc.Package;
 import org.openxml4j.opc.PackagePart;
+import org.openxml4j.opc.PackageRelationshipCollection;
+import org.openxml4j.opc.PackageRelationshipTypes;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
@@ -47,6 +49,7 @@
 	public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
 	public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
 	public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";;
+	public static final String HREF_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";; 
 	
 	private DocumentDocument wordDoc;
 	
@@ -89,4 +92,11 @@
 			StylesDocument.Factory.parse(parts[0].getInputStream());
 		return sd.getStyles();
 	}
+
+	public PackageRelationshipCollection getHrefRelations() throws InvalidFormatException
+	{
+	       return getCorePart().getRelationshipsByType(HREF_RELATION_TYPE);
+ 
+	}
 }
+


Index: src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
===================================================================
--- src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java	(revision 645870)
+++ src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java	(working copy)
@@ -25,6 +25,10 @@
 import org.apache.xmlbeans.XmlException;
 import org.openxml4j.exceptions.OpenXML4JException;
 import org.openxml4j.opc.Package;
+import org.openxml4j.opc.internal.PackagePropertiesPart;
+import org.openxml4j.opc.PackageRelationship;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
+import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
@@ -60,12 +64,31 @@
 	public String getText() {
 		CTBody body = document.getDocumentBody();
 		StringBuffer text = new StringBuffer();
+
+		try
+		{
+		    PackagePropertiesPart props = document.getCoreProperties();
+		    CTProperties extprops = document.getExtendedProperties();
+		    System.out.println("Title: " + props.getTitleProperty().getValue());
+		    System.out.println("Author: " + props.getCreatorProperty().getValue());
+		    System.out.println("Subject: " + props.getSubjectProperty().getValue());
+		    System.out.println("Keywords: " + props.getKeywordsProperty().getValue());
+		    System.out.println("Category: " + props.getCategoryProperty().getValue());
+		    System.out.println("Status: " + props.getContentStatusProperty().getValue());
+		    System.out.println("Description: " + props.getDescriptionProperty().getValue());
+		}
+		catch(Exception e)
+		{
+		    System.out.println(e);		    
+		}
 		
 		// Loop over paragraphs
 		CTP[] ps = body.getPArray();
 		for (int i = 0; i < ps.length; i++) {
+			CTR[] rs = ps[i].getRArray();
+			CTHyperlink[] hl = ps[i].getHyperlinkArray();
+
 			// Loop over ranges
-			CTR[] rs = ps[i].getRArray();
 			for (int j = 0; j < rs.length; j++) {
 				// Loop over text runs
 				CTText[] texts = rs[j].getTArray();
@@ -75,6 +98,37 @@
 					);
 				}
 			}
+
+			if(hl.length > 0)
+			    text.append("\n");
+			
+	        // Loop over ranges
+			for(int j=0; j<hl.length; j++)
+			{
+			    CTR[] hlrs = hl[j].getRArray();
+			    
+			    for (int k = 0; k < hlrs.length; k++) {
+			        // Loop over text runs
+			        CTText[] texts = hlrs[k].getTArray();
+			        for (int l = 0; l < texts.length; l++) {
+			            text.append(
+			                    texts[l].getStringValue()
+			            );
+			        }
+			    }
+			    
+			    try
+			    {
+			        PackageRelationship rel = 
+			            document.getHrefRelations().getRelationshipByID(hl[j].getId());
+			        text.append(" ("+rel.getTargetURI()+")\n");
+			    }
+			    catch(Exception e)
+			    {
+			        System.err.println(e);
+			    }
+			}
+			
 			// New line after each paragraph.
 			text.append("\n");
 		}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to