On 4/8/08, Yury Batrakov <[EMAIL PROTECTED]> wrote: > Thanks for your reply, I'll look to the code. >
Looks like i am done with that. i'm not sure if my code is useful and good enough to submit it to POI source tree :) . I still uses low-level stuff and needs to be wrapped to something like XWPFHyperlink. I attach my patch here for those who may be interested in it. Also there is a bug in latest openxml4j snapshot that prevents my code from working :), I reported about it on openxml4j dev forum here: https://sourceforge.net/forum/forum.php?thread_id=2000813&forum_id=603903
Index: src/ooxml/java/org/apache/poi/xwpf/XWPFDocument.java =================================================================== --- src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFDocument.java (revision 645870) +++ src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFDocument.java (working copy) @@ -24,6 +24,8 @@ import org.openxml4j.exceptions.OpenXML4JException; import org.openxml4j.opc.Package; import org.openxml4j.opc.PackagePart; +import org.openxml4j.opc.PackageRelationshipCollection; +import org.openxml4j.opc.PackageRelationshipTypes; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles; @@ -47,6 +49,7 @@ public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"; public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"; public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"; + public static final String HREF_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"; private DocumentDocument wordDoc; @@ -89,4 +92,11 @@ StylesDocument.Factory.parse(parts[0].getInputStream()); return sd.getStyles(); } + + public PackageRelationshipCollection getHrefRelations() throws InvalidFormatException + { + return getCorePart().getRelationshipsByType(HREF_RELATION_TYPE); + + } } + Index: src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java =================================================================== --- src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java (revision 645870) +++ src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java (working copy) @@ -25,6 +25,10 @@ import org.apache.xmlbeans.XmlException; import org.openxml4j.exceptions.OpenXML4JException; import org.openxml4j.opc.Package; +import org.openxml4j.opc.internal.PackagePropertiesPart; +import org.openxml4j.opc.PackageRelationship; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink; +import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; @@ -60,12 +64,31 @@ public String getText() { CTBody body = document.getDocumentBody(); StringBuffer text = new StringBuffer(); + + try + { + PackagePropertiesPart props = document.getCoreProperties(); + CTProperties extprops = document.getExtendedProperties(); + System.out.println("Title: " + props.getTitleProperty().getValue()); + System.out.println("Author: " + props.getCreatorProperty().getValue()); + System.out.println("Subject: " + props.getSubjectProperty().getValue()); + System.out.println("Keywords: " + props.getKeywordsProperty().getValue()); + System.out.println("Category: " + props.getCategoryProperty().getValue()); + System.out.println("Status: " + props.getContentStatusProperty().getValue()); + System.out.println("Description: " + props.getDescriptionProperty().getValue()); + } + catch(Exception e) + { + System.out.println(e); + } // Loop over paragraphs CTP[] ps = body.getPArray(); for (int i = 0; i < ps.length; i++) { + CTR[] rs = ps[i].getRArray(); + CTHyperlink[] hl = ps[i].getHyperlinkArray(); + // Loop over ranges - CTR[] rs = ps[i].getRArray(); for (int j = 0; j < rs.length; j++) { // Loop over text runs CTText[] texts = rs[j].getTArray(); @@ -75,6 +98,37 @@ ); } } + + if(hl.length > 0) + text.append("\n"); + + // Loop over ranges + for(int j=0; j<hl.length; j++) + { + CTR[] hlrs = hl[j].getRArray(); + + for (int k = 0; k < hlrs.length; k++) { + // Loop over text runs + CTText[] texts = hlrs[k].getTArray(); + for (int l = 0; l < texts.length; l++) { + text.append( + texts[l].getStringValue() + ); + } + } + + try + { + PackageRelationship rel = + document.getHrefRelations().getRelationshipByID(hl[j].getId()); + text.append(" ("+rel.getTargetURI()+")\n"); + } + catch(Exception e) + { + System.err.println(e); + } + } + // New line after each paragraph. text.append("\n"); }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
