Just in case anyone else needs this, here is the code.
The key to it all was finding out that the section information is bound in
two places; the final paragraph in a section holds the section information
whilst the information for the final or only section is bound to the
document. This code focusses on the columns widths but there is a lot more
there once you have got your hands on the CTSectPr instance.
Yours
Mark B
import java.io.File;
import java.io.FileInputStream;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.util.List;
import java.util.Iterator;
import java.text.DecimalFormat;
import java.math.BigInteger;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTColumn;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTColumns;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPPr;
/**
* Illustartes how to recover information about a document's sections using
the
* classes defined whtin the underlying openxml4j layer.
*
* @author Mark B
* @version 1.00 21st April 2011
*/
public class XWPFSectionTest {
private static final long TO_POINTS_DIVISOR = 20;
private static final long TO_INCHES_DIVISOR = 72;
private static final double TO_CM_MULTIPLIER = 2.54;
/**
* Create an instance of the XWPFSectionTest class using the following
* parameter(s).
*
* @param filename An instance of the String class that encapsulates the
* path to and name of a valie Word 2007 file (.docx).
* @throws java.io.IOException Thrown if a problem occurs in the
underlying
* file system.
*/
public XWPFSectionTest(String filename) throws IOException {
File file = null;
FileInputStream fis = null;
BufferedInputStream bis = null;
XWPFDocument document = null;
XWPFParagraph paragraph = null;
XWPFRun run = null;
List<XWPFParagraph> paraList = null;
Iterator<XWPFParagraph> paraListIter = null;
CTPPr ctPPr = null;
CTSectPr sectPr = null;
DecimalFormat formatter = null;
try {
// The DataFormat object is used simply to format the column
width
// figures for display.
formatter = new DecimalFormat("#0.00");
// Open the Word document.
file = new File(filename);
fis = new FileInputStream(file);
bis = new BufferedInputStream(fis);
document = new XWPFDocument(bis);
// Get a List of the pargraphs the document contains and, from
that,
// an Iterator to step through the document one paragraph as a
time.
paraList = document.getParagraphs();
paraListIter = paraList.iterator();
while(paraListIter.hasNext()) {
// Print the pargraph text to illustrate how the section
// information is bound to a specific paragraph object.
paragraph = paraListIter.next();
System.out.println("Pargraph text: " +
paragraph.getParagraphText());
// The section information will only be bound to the final
// paragraph in the section. If the section information
// is missing then the call to getPPr() will retunr a null
// value. I suspect that it is not just the section
information
// that determines whether or not the CTPPr object will be
// created for the paragraph but only testing on more
complex
// documents will prove or disprove this conclusion.
ctPPr = paragraph.getCTP().getPPr();
if(ctPPr != null) {
// Get the CTSectPr object that contains the information
// about the document section and strip (some of) the
// information from it.
sectPr = ctPPr.getSectPr();
this.discoverSectionInfo(sectPr, formatter);
}
}
// Get the CTSectPr from the document here. This will contain
the
// information for the last or only section within the document.
sectPr = document.getDocument().getBody().getSectPr();
this.discoverSectionInfo(sectPr, formatter);
}
finally {
if(bis != null) {
bis.close();
bis = null;
}
}
}
/**
* Interrogates the various openxml4j objects in order to discover some
of
* the information about a specific section. Currently, all it does is
to
* disocver how many columns there are in a section along with the width
* of each column and the size of the inter-column gap (if any). There
is
* considerably more information avaliable.
*
* @param sectPr An instance of a class that implements the CTSectPr
* interface and which encapsulates information about a specific
* section within a Word document.
* @param formatter An instance of the DecimalFormat class that is
simply
* used to prepare numeric values for diaply to the user.
*/
private void discoverSectionInfo(CTSectPr sectPr, DecimalFormat
formatter) {
List<CTColumn> columnList = null;
CTColumns columns = null;
CTColumn column = null;
BigInteger bigInteger = null;
long widthPage = 0L;
long widthRightMargin = 0L;
long widthLeftMargin = 0L;
long widthColumn = 0L;
long widthColumnSpacing = 0L;
long totalColumnSpacing = 0L;
System.out.println("\n****************** Section Information.
******************");
// Recover the width of the page along with the widths of the
// right and left hand margins.
widthPage = sectPr.getPgSz().getW().longValue();
widthRightMargin = sectPr.getPgMar().getRight().longValue();
widthLeftMargin = sectPr.getPgMar().getLeft().longValue();
// ...and print them out.
System.out.println("Width of page: " +
this.convertSize(widthPage, formatter));
System.out.println("Width right margin: " +
this.convertSize(widthRightMargin, formatter));
System.out.println("Width left margin: " +
this.convertSize(widthLeftMargin, formatter));
// If the text in the section is organised into a single
// column - whether or not the user has explicitly set the
// number of columns to one when they created the section -
// the call to getCols() will return a null value. In this
// case, the width of the column will be calculated by
// subtracting the widths of the right and left margins from
// the width of the page.
columns = sectPr.getCols();
if(columns.getNum() == null) {
System.out.println("The text in this section is "+
"organised into a single column.");
widthColumn = widthPage - (widthRightMargin +
widthLeftMargin);
System.out.println("The width of the column is: " +
this.convertSize(widthColumn, formatter));
}
else {
// The section has been organised into more than one column so
// display how many there are.
System.out.println("The text in this section is organised into "
+
columns.getNum().longValue() +
" columns.");
// Get a List of CTColumn objects from the CTColumns object.
columnList = columns.getColList();
// If the length of this list os zero, then all of the columns
will
// be the same size and separated by an inter-column gap whichis
// liewise the same. In this case, it is safe to caculate the
widths
// of the columns 'manually' so to speak.
if(columnList.size() == 0) {
// Firstly, get the width of the inter-column space
widthColumnSpacing = columns.getSpace().longValue();
// If there are more than two columns, the inter-column
// space must be totalled
totalColumnSpacing = widthColumnSpacing *
(columns.getNum().longValue() - 1);
// Now determine the width of an individual column
// by suntracting the widths of the right and left
// columns along with the total width of the inter-column
// space(s) and then dividing the result by the number
// of columns in the section.
widthColumn = widthPage -
(widthRightMargin + widthLeftMargin +
totalColumnSpacing);
widthColumn = widthColumn / columns.getNum().longValue();
// ...and then print the columns width and gap.
System.out.println("The columns are each " +
this.convertSize(widthColumn, formatter) +
" wide.");
System.out.println("The columns are spaced " +
this.convertSize(widthColumnSpacing, formatter) +
" apart.");
}
else {
// If the columns list actually has CTColumn objects in it,
call
// iterateColumns() to print out the details for each.
this.ierateColumns(columnList, formatter);
}
}
System.out.println("****************** End Of Section Information.
******************\n");
}
/**
* Print out the width of each column and the size of the inter-column
gap.
*
* @param columnList A list of the columns 'contained' within a specific
* section.
* @param formatter An inatnce of the DecimalFormat class that is used
to
* prepare numeric values for display to the user.
*/
private void ierateColumns(List<CTColumn> columnList, DecimalFormat
formatter) {
CTColumn column = null;
Iterator<CTColumn> columnListIter = null;
BigInteger bigInteger = null;
columnListIter = columnList.iterator();
// Simply iterate through the columns and print out the width and
// inter-column gap for each.
while(columnListIter.hasNext()) {
column = columnListIter.next();
// The check for bigInteger being null is actually motivated by
// the call to getSpace(). That method will return null for the
// final column in the section as it does not have a space
following
// it, there is no record in the mark-up and so a null value is
// returned. To date, I have not seen this happen with the call
to
// getW() but it might.
bigInteger = column.getW();
if(bigInteger != null) {
System.out.println("Column width: " +
this.convertSize(bigInteger.longValue(), formatter));
}
bigInteger = column.getSpace();
if(bigInteger != null) {
System.out.println("Inter-column space: " +
this.convertSize(bigInteger.longValue(), formatter));
}
}
}
/**
* Microsoft use a standard unit - 1/20th of a point I think - to
* record the dimensions of various features of the document within the
* xml markup. This method converts them into more familiar units -
inches
* and centimetres.
*
* @param longValue A primitive long that stores the value to be
converted.
* @param formatter An instance of the DecimalFormat class that is used
to
* determine how the value should appear to the user.
* @return An instance of the String class that encpsulates a message
* containing the converted value.
*/
private String convertSize(long longValue, DecimalFormat formatter) {
return(this.convertSize((double)longValue, formatter));
}
/**
* Microsoft use a standrd unit - 1/20th of a point - to record the
dimensions
* of various features of the document - tabs, page size, margins etc -
* within the xml markup. This method converts them into more familiar
* units - inches and centimetres.
*
* @param doubleValue A primitive double that stores the value to be
* converted.
* @param formatter An instance of the DecimalFormat class that is used
to
* determine how the value should appear to the user.
* @return An instance of the String class that encpsulates a message
* containing the converted value.
*/
private String convertSize(double doubleValue, DecimalFormat formatter)
{
StringBuffer buffer = new StringBuffer();
double sizePoints = doubleValue / TO_POINTS_DIVISOR;
double sizeInches = sizePoints / TO_INCHES_DIVISOR;
double sizeCM = sizeInches * TO_CM_MULTIPLIER;
buffer.append(formatter.format(sizeInches));
buffer.append("inches or ");
buffer.append(formatter.format(sizeCM));
buffer.append("cm.");
return(buffer.toString());
}
public static void main(String[] args) {
if(args.length != 1) {
System.out.println("Usage new XWPFSectionTest(new
String[]{\"filename\"})");
System.out.println("where the filename parameter is an instance
of the");
System.out.println("String class that encapsulates the path to
and name");
System.out.println("a valid Word (docx) document.");
}
else {
try {
new XWPFSectionTest(args[0]);
}
catch(IOException ioEx) {
System.out.println("Caught an: " +
ioEx.getClass().getName());
System.out.println("Message: " + ioEx.getMessage());
System.out.println("Stacktrace follows:.....");
ioEx.printStackTrace(System.out);
}
}
}
}
--
View this message in context:
http://apache-poi.1045710.n5.nabble.com/XWPF-Sections-tp4313854p4334320.html
Sent from the POI - User mailing list archive at Nabble.com.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]