DO NOT REPLY TO THIS EMAIL, BUT PLEASE POST YOUR BUGĀ· RELATED COMMENTS THROUGH THE WEB INTERFACE AVAILABLE AT <http://issues.apache.org/bugzilla/show_bug.cgi?id=38616>. ANY REPLY MADE TO THIS MESSAGE WILL NOT BE COLLECTED ANDĀ· INSERTED IN THE BUG DATABASE.
http://issues.apache.org/bugzilla/show_bug.cgi?id=38616 ------- Additional Comments From [EMAIL PROTECTED] 2006-02-23 14:19 ------- (From update of attachment 17657) Index: D:/java/svn-apache/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java =================================================================== --- D:/java/svn-apache/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java (revision 376982) +++ D:/java/svn-apache/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java (working copy) @@ -89,6 +89,9 @@ /** Hold list tables */ protected ListTables _lt; + /** Holds pictures table */ + protected PicturesTable _pictures; + protected HWPFDocument() { @@ -152,6 +155,7 @@ _dataStream = new byte[0]; } + _pictures = new PicturesTable(_dataStream); // get the start of text in the main stream int fcMin = _fib.getFcMin(); @@ -237,7 +241,16 @@ { return _lt; } + /** + * @return PicturesTable object, that is able to extract images from this document + */ + public PicturesTable getPicturesTable() + { + return _pictures; + } + + /** * Writes out the word file that is represented by an instance of this class. * * @param out The OutputStream to write to. Index: D:/java/svn-apache/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.j ava =================================================================== --- D:/java/svn-apache/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.j ava (revision 0) +++ D:/java/svn-apache/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.j ava (revision 0) @@ -0,0 +1,114 @@ +/* ==================================================================== + Copyright 2002-2006 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hwpf.model; + +import org.apache.poi.util.LittleEndian; +import org.apache.poi.hwpf.usermodel.CharacterRun; +import org.apache.poi.hwpf.usermodel.Picture; + +import java.util.List; +import java.util.ArrayList; + + +/** + * Holds information about all pictures embedded in Word Document either via "Insert -> Picture -> From File" or via + * clipboard. Responsible for images extraction and determining whether some documentТs piece contains embedded image. + * Analyzes raw data bytestream УDataФ (where Word stores all embedded objects) provided by HWPFDocument. + * + * Word stores images as is within so called "Data stream" - the stream within a Word docfile containing various data + * that hang off of characters in the main stream. For example, binary data describing in-line pictures and/or + * formfields an also embedded objects-native data. Word picture structures are concatenated one after the other in + * the data stream if the document contains pictures. + * Data stream is easily reachable via HWPFDocument._dataStream property. + * A picture is represented in the document text stream as a special character, an Unicode \u0001 whose + * CharacterRun.isSpecial() returns true. The file location of the picture in the Word binary file is accessed + * via CharacterRun.getPicOffset(). The CharacterRun.getPicOffset() is a byte offset into the data stream. + * Beginning at the position recorded in picOffset, a header data structure, will be stored. + * + * @author Dmitry Romanov + */ +public class PicturesTable +{ + static final int TYPE_IMAGE = 0x08; + static final int TYPE_IMAGE_PASTED_FROM_CLIPBOARD = 0xA; + static final int BLOCK_TYPE_OFFSET = 0xE; + + private byte[] _dataStream; + + /** @link dependency + * @stereotype instantiate*/ + /*# Picture lnkPicture; */ + + /** + * + * @param _dataStream + */ + public PicturesTable(byte[] _dataStream) + { + this._dataStream = _dataStream; + } + + /** + * determines whether specified CharacterRun contains reference to a picture + * @param run + */ + public boolean hasPicture(CharacterRun run) { + if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData() && "\u0001".equals(run.text())) { + short blockType = getBlockType(_dataStream, run.getPicOffset()); + return (blockType == TYPE_IMAGE || blockType == TYPE_IMAGE_PASTED_FROM_CLIPBOARD); + } + return false; + } + + private static short getBlockType(byte[] dataStream, int pictOffset) { + return LittleEndian.getShort(dataStream, pictOffset + BLOCK_TYPE_OFFSET); + } + + /** + * Returns picture object tied to specified CharacterRun + * @param run + * @param fillBytes if true, Picture will be returned with filled byte array that represent picture's contents. If you don't want + * to have that byte array in memory but only write picture's contents to stream, pass false and then use Picture.writeImageContent + * @see Picture#writeImageContent(java.io.OutputStream) + * @return a Picture object if picture exists for specified CharacterRun, null otherwise. PicturesTable.hasPicture is used to determine this. + * @see #hasPicture(org.apache.poi.hwpf.usermodel.CharacterRun) + */ + public Picture extractPicture(CharacterRun run, boolean fillBytes) { + if (hasPicture(run)) { + return new Picture(run.getPicOffset(), _dataStream, fillBytes); + } + return null; + } + + /** + * @return a list of Picture objects found in current document + */ + public List getAllPictures() { + int i = 0; + ArrayList pictures = new ArrayList(); + while(i<_dataStream.length) { + short blockType = getBlockType(_dataStream, i); + if (blockType == TYPE_IMAGE || blockType==TYPE_IMAGE_PASTED_FROM_CLIPBOARD) { + pictures.add(new Picture(i, _dataStream, false)); + } + i += LittleEndian.getInt(_dataStream, i); + } + return pictures; + } + +} Index: D:/java/svn-apache/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.jav a =================================================================== --- D:/java/svn-apache/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.jav a (revision 0) +++ D:/java/svn-apache/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.jav a (revision 0) @@ -0,0 +1,341 @@ +/* ==================================================================== + Copyright 2002-2006 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hwpf.usermodel; + +import org.apache.poi.util.LittleEndian; + +import java.io.OutputStream; +import java.io.IOException; + +/** + * Represents embedded picture extracted from Word Document + * @author Dmitry Romanov + */ +public class Picture +{ +// public static final int FILENAME_OFFSET = 0x7C; +// public static final int FILENAME_SIZE_OFFSET = 0x6C; + static final int BLOCK_TYPE_OFFSET = 0xE; + static final int PICT_HEADER_OFFSET = 0x4; + static final int UNKNOWN_HEADER_SIZE = 0x49; + + public static final byte[] GIF = new byte[]{'G', 'I', 'F'}; + public static final byte[] PNG = new byte[]{ (byte)0x89, 0x50, 0x4E, 0x47,0x0D,0x0A,0x1A,0x0A}; + public static final byte[] JPG = new byte[]{(byte)0xFF, (byte)0xD8}; + public static final byte[] BMP = new byte[]{'B', 'M'}; + public static final byte[] TIFF = new byte[]{0x49, 0x49, 0x2A, 0x00}; + public static final byte[] TIFF1 = new byte[]{0x4D, 0x4D, 0x00, 0x2A}; + + public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'}; + + private int dataBlockStartOfsset; + private int pictureBytesStartOffset; + private int dataBlockSize; + private int size; +// private String fileName; + private byte[] content; + private byte[] _dataStream; + private int aspectRatioX; + private int aspectRatioY; + private int height = -1; + private int width = -1; + + + public Picture(int dataBlockStartOfsset, byte[] _dataStream, boolean fillBytes) + { + this._dataStream = _dataStream; + this.dataBlockStartOfsset = dataBlockStartOfsset; + this.pictureBytesStartOffset = getPictureBytesStartOffset(dataBlockStartOfsset, _dataStream); + this.dataBlockSize = LittleEndian.getInt(_dataStream, dataBlockStartOfsset); + this.size = dataBlockSize - (pictureBytesStartOffset - dataBlockStartOfsset); + + this.aspectRatioX = extractAspectRatioX(_dataStream, dataBlockStartOfsset); + this.aspectRatioY = extractAspectRatioY(_dataStream, dataBlockStartOfsset); +// this.fileName = extractFileName(dataBlockStartOfsset, _dataStream); +// if (fileName==null || fileName.length()==0) { +// fileName = "clipboard"; +// } + + if (fillBytes) + { + fillImageContent(_dataStream); + } + + String ext = suggestFileExtension(); + // trying to extract width and height from pictures content: + if ("jpg".equalsIgnoreCase(ext)) { + fillJPGWidthHeight(); + } else if ("png".equalsIgnoreCase(ext)) { + fillPNGWidthHeight(); + } + } + + private static int extractAspectRatioX(byte[] _dataStream, int dataBlockStartOffset) + { + return LittleEndian.getShort(_dataStream, dataBlockStartOffset+0x20)/10; + } + + private static int extractAspectRatioY(byte[] _dataStream, int dataBlockStartOffset) + { + return LittleEndian.getShort(_dataStream, dataBlockStartOffset+0x22)/10; + } + + /** + * Tries to suggest a filename: hex representation of picture structure offset in "Data" stream plus extension that + * is tried to determine from first byte of picture's content. + * + * @return suggested file name + */ + public String suggestFullFileName() + { + String fileExt = suggestFileExtension(); + return Integer.toHexString(dataBlockStartOfsset) + (fileExt.length()>0 ? "."+fileExt : ""); + } + + /** + * Writes Picture's content bytes to specified OutputStream. + * Is useful when there is need to write picture bytes directly to stream, omitting its representation in + * memory as distinct byte array. + * + * @param out a stream to write to + * @throws IOException if some exception is occured while writing to specified out + */ + public void writeImageContent(OutputStream out) throws IOException + { + if (content!=null && content.length>0) { + out.write(content, 0, size); + } else { + out.write(_dataStream, pictureBytesStartOffset, size); + } + } + + /** + * @return picture's content as byte array + */ + public byte[] getContent() + { + if (content == null || content.length<=0) + { + fillImageContent(this._dataStream); + } + return content; + } + + /** + * + * @return size in bytes of the picture + */ + public int getSize() + { + return size; + } + + /** + * returns horizontal aspect ratio for picture provided by user + */ + public int getAspectRatioX() + { + return aspectRatioX; + } + /** + * returns vertical aspect ratio for picture provided by user + */ + public int getAspectRatioY() + { + return aspectRatioY; + } + + /** + * tries to suggest extension for picture's file by matching signatures of popular image formats to first bytes + * of picture's contents + * @return suggested file extension + */ + public String suggestFileExtension() + { + if (content!=null && content.length>0) { + return suggestFileExtension(content, 0); + } + return suggestFileExtension(_dataStream, pictureBytesStartOffset); + } + + + private String suggestFileExtension(byte[] _dataStream, int pictureBytesStartOffset) + { + if (matchSignature(_dataStream, JPG, pictureBytesStartOffset)) { + return "jpg"; + } else if (matchSignature(_dataStream, PNG, pictureBytesStartOffset)) { + return "png"; + } else if (matchSignature(_dataStream, GIF, pictureBytesStartOffset)) { + return "gif"; + } else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) { + return "bmp"; + } else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset)) { + return "tiff"; + } else if (matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) { + return "tiff"; + } + return ""; + } + + private static boolean matchSignature(byte[] dataStream, byte[] signature, int pictureBytesOffset) + { + boolean matched = true; + for (int i = 0; i < dataStream.length && i< signature.length; i++) + { + if (dataStream[i+pictureBytesOffset] != signature[i]) + { + matched = false; + break; + } + } + return matched; + } + +// public String getFileName() +// { +// return fileName; +// } + +// private static String extractFileName(int blockStartIndex, byte[] dataStream) { +// int fileNameStartOffset = blockStartIndex + 0x7C; +// int fileNameSizeOffset = blockStartIndex + FILENAME_SIZE_OFFSET; +// int fileNameSize = LittleEndian.getShort(dataStream, fileNameSizeOffset); +// +// int fileNameIndex = fileNameStartOffset; +// char[] fileNameChars = new char[(fileNameSize-1)/2]; +// int charIndex = 0; +// while(charIndex<fileNameChars.length) { +// short aChar = LittleEndian.getShort(dataStream, fileNameIndex); +// fileNameChars[charIndex] = (char)aChar; +// charIndex++; +// fileNameIndex += 2; +// } +// String fileName = new String(fileNameChars); +// return fileName.trim(); +// } + + private void fillImageContent(byte[] dataStream) + { + this.content = new byte[size]; + System.arraycopy(dataStream, pictureBytesStartOffset, content, 0, size); + } + + private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream) + { + int realPicoffset = dataBlockStartOffset; + + int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET); + int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET; + int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICTF1BlockOffset); + int unknownHeaderOffset = PICTF1BlockSize + PICTF1BlockOffset; + realPicoffset += (unknownHeaderOffset + UNKNOWN_HEADER_SIZE); + return realPicoffset; + } + + private void fillJPGWidthHeight() { + /* + http://www.codecomments.com/archive281-2004-3-158083.html + + Algorhitm proposed by Patrick TJ McPhee: + + read 2 bytes + make sure they are 'ffd8'x + repeatedly: + read 2 bytes + make sure the first one is 'ff'x + if the second one is 'd9'x stop + else if the second one is c0 or c2 (or possibly other values ...) + skip 2 bytes + read one byte into depth + read two bytes into height + read two bytes into width + else + read two bytes into length + skip forward length-2 bytes + + Also used Ruby code snippet from: http://www.bigbold.com/snippets/posts/show/805 for reference + */ + int pointer = pictureBytesStartOffset+2; + int firstByte = _dataStream[pointer]; + int secondByte = _dataStream[pointer+1]; + + int endOfPicture = pictureBytesStartOffset + size; + while(pointer<endOfPicture-1) { + do { + firstByte = _dataStream[pointer]; + secondByte = _dataStream[pointer+1]; + } while (!(firstByte==(byte)0xFF) && pointer<endOfPicture-1); + + if (firstByte==((byte)0xFF) && pointer<endOfPicture-1) { + if (secondByte==(byte)0xD9 || secondByte==(byte)0xDA) { + break; + } else if ( (secondByte & 0xF0) == 0xC0 && secondByte!=(byte)0xC4 && secondByte!=(byte)0xC8 && secondByte!=(byte)0xCC) { + pointer += 5; + this.height = getBigEndianShort(_dataStream, pointer); + this.width = getBigEndianShort(_dataStream, pointer+2); + break; + } else { + pointer++; + pointer++; + int length = getBigEndianShort(_dataStream, pointer); + pointer+=length; + } + } else { + pointer++; + } + } + } + + private void fillPNGWidthHeight() + { + /* + Used PNG file format description from http://www.wotsit.org/download.asp?f=png + */ + int HEADER_START = pictureBytesStartOffset + PNG.length + 4; + if (matchSignature(_dataStream, IHDR, HEADER_START)) { + int IHDR_CHUNK_WIDTH = HEADER_START + 4; + this.width = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH); + this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4); + } + } + /** + * returns pixel width of the picture or -1 if dimensions determining was failed + */ + public int getWidth() + { + return width; + } + /** + * returns pixel height of the picture or -1 if dimensions determining was failed + */ + public int getHeight() + { + return height; + } + + private static int getBigEndianInt(byte[] data, int offset) + { + return (((data[offset] & 0xFF)<< 24) + ((data[offset +1] & 0xFF) << 16) + ((data[offset + 2] & 0xFF) << 8) + (data[offset +3] & 0xFF)); + } + + private static int getBigEndianShort(byte[] data, int offset) + { + return (((data[offset] & 0xFF)<< 8) + (data[offset +1] & 0xFF)); + } + +} Index: D:/java/svn-apache/src/scratchpad/testcases/org/apache/poi/hwpf/data/testPictur es.doc =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Property changes on: D:\java\svn-apache\src\scratchpad\testcases\org\apache\poi\hwpf\data\testPictur es.doc ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Index: D:/java/svn-apache/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPictu resTable.java =================================================================== --- D:/java/svn-apache/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPictu resTable.java (revision 0) +++ D:/java/svn-apache/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPictu resTable.java (revision 0) @@ -0,0 +1,71 @@ +/* ==================================================================== + Copyright 2002-2006 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hwpf.model; + +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.usermodel.Picture; +import junit.framework.TestCase; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.util.List; + +/** + * @author Dmitry Romanov + * @version $Id: $ + */ +public class TestPicturesTable + extends TestCase +{ + private HWPFDocument document; + private String testPath; + + + public TestPicturesTable(String string) + { + super(string); + } + + protected void setUp() throws Exception + { + testPath = System.getProperty("HWPF.testdata.path"); + if (testPath == null) + { + testPath = "c:"; + } + String testFile = testPath + "/testPictures.doc"; + document = new HWPFDocument(new FileInputStream(testFile)); + } + + public void testGetAllPictures() throws Exception { + PicturesTable picturesTable = document.getPicturesTable(); + List allPictures = picturesTable.getAllPictures(); + assertNotNull(allPictures); + assertTrue(allPictures.size() >= 5 ); + for (int i = 0; i < allPictures.size(); i++) + { + Picture picture = (Picture) allPictures.get(i); + System.out.println(picture.suggestFullFileName()+": "+picture.getSize()+" bytes"+" width: "+picture.getWidth()+"("+picture.getAspectRatioX()+ + "%) height: "+picture.getHeight()+"("+picture.getAspectRatioY()+"%)"); + + FileOutputStream out = new FileOutputStream(testPath+"/"+picture.suggestFullFileName()); + out.write(picture.getContent()); + out.close(); + } + } + +} -- Configure bugmail: http://issues.apache.org/bugzilla/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] Mailing List: http://jakarta.apache.org/site/mail2.html#poi The Apache Jakarta POI Project: http://jakarta.apache.org/poi/