Author: fanningpj
Date: Mon Dec 23 09:18:38 2019
New Revision: 1871921

URL: http://svn.apache.org/viewvc?rev=1871921&view=rev
Log:
convert some tabs to spaces

Modified:
    poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java
    
poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
    
poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java?rev=1871921&r1=1871920&r2=1871921&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java 
(original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java Mon Dec 
23 09:18:38 2019
@@ -34,49 +34,49 @@ import org.apache.poi.poifs.filesystem.P
  *  file format.
  */
 public final class HPBFDocument extends POIReadOnlyDocument {
-       private MainContents mainContents;
-       private QuillContents quillContents;
-       private EscherStm escherStm;
-       private EscherDelayStm escherDelayStm;
-
-       /**
-        * Opens a new publisher document
-        */
-       public HPBFDocument(POIFSFileSystem fs) throws IOException {
-          this(fs.getRoot());
-       }
-
-       public HPBFDocument(InputStream inp) throws IOException {
-          this(new POIFSFileSystem(inp));
-       }
-
-       /**
-        * Opens an embedded publisher document,
-        *  at the given directory.
-        */
-       public HPBFDocument(DirectoryNode dir) throws IOException {
-          super(dir);
-
-          // Go looking for our interesting child
-          //  streams
-          mainContents = new MainContents(dir);
-          quillContents = new QuillContents(dir);
-
-          // Now the Escher bits
-          escherStm = new EscherStm(dir);
-          escherDelayStm = new EscherDelayStm(dir);
-       }
-
-       public MainContents getMainContents() {
-               return mainContents;
-       }
-       public QuillContents getQuillContents() {
-               return quillContents;
-       }
-       public EscherStm getEscherStm() {
-               return escherStm;
-       }
-       public EscherDelayStm getEscherDelayStm() {
-               return escherDelayStm;
-       }
+    private MainContents mainContents;
+    private QuillContents quillContents;
+    private EscherStm escherStm;
+    private EscherDelayStm escherDelayStm;
+
+    /**
+     * Opens a new publisher document
+     */
+    public HPBFDocument(POIFSFileSystem fs) throws IOException {
+       this(fs.getRoot());
+    }
+
+    public HPBFDocument(InputStream inp) throws IOException {
+       this(new POIFSFileSystem(inp));
+    }
+
+    /**
+     * Opens an embedded publisher document,
+     *  at the given directory.
+     */
+    public HPBFDocument(DirectoryNode dir) throws IOException {
+       super(dir);
+
+       // Go looking for our interesting child
+       //  streams
+       mainContents = new MainContents(dir);
+       quillContents = new QuillContents(dir);
+
+       // Now the Escher bits
+       escherStm = new EscherStm(dir);
+       escherDelayStm = new EscherDelayStm(dir);
+    }
+
+    public MainContents getMainContents() {
+        return mainContents;
+    }
+    public QuillContents getQuillContents() {
+        return quillContents;
+    }
+    public EscherStm getEscherStm() {
+        return escherStm;
+    }
+    public EscherDelayStm getEscherDelayStm() {
+        return escherDelayStm;
+    }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java?rev=1871921&r1=1871920&r2=1871921&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java 
(original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java Mon 
Dec 23 09:18:38 2019
@@ -36,319 +36,319 @@ import org.apache.poi.util.StringUtil;
  *  constructed.
  */
 public final class HPBFDumper {
-       private POIFSFileSystem fs;
-       public HPBFDumper(POIFSFileSystem fs) {
-               this.fs = fs;
-       }
-       
-       @SuppressWarnings("resource")
+    private POIFSFileSystem fs;
+    public HPBFDumper(POIFSFileSystem fs) {
+        this.fs = fs;
+    }
+
+    @SuppressWarnings("resource")
     public HPBFDumper(InputStream inp) throws IOException {
-               this(new POIFSFileSystem(inp));
-       }
+        this(new POIFSFileSystem(inp));
+    }
 
-       private static byte[] getData(DirectoryNode dir, String name) throws 
IOException {
-               // Grab the document stream
-               InputStream is = dir.createDocumentInputStream(name);
-               byte[] d = IOUtils.toByteArray(is);
-               is.close();
-
-               // All done
-               return d;
-       }
-
-       /**
-        * Dumps out the given number of bytes as hex,
-        *  two chars
-        */
-       private String dumpBytes(byte[] data, int offset, int len) {
-               StringBuilder ret = new StringBuilder();
-               for(int i=0; i<len; i++) {
-                       int j = i + offset;
-                       int b = data[j];
-                       if(b < 0) { b += 256; }
-
-                       String bs = Integer.toHexString(b);
-                       if(bs.length() == 1)
-                               ret.append('0');
-                       ret.append(bs);
-                       ret.append(' ');
-               }
-               return ret.toString();
-       }
+    private static byte[] getData(DirectoryNode dir, String name) throws 
IOException {
+        // Grab the document stream
+        InputStream is = dir.createDocumentInputStream(name);
+        byte[] d = IOUtils.toByteArray(is);
+        is.close();
+
+        // All done
+        return d;
+    }
+
+    /**
+     * Dumps out the given number of bytes as hex,
+     *  two chars
+     */
+    private String dumpBytes(byte[] data, int offset, int len) {
+        StringBuilder ret = new StringBuilder();
+        for(int i=0; i<len; i++) {
+            int j = i + offset;
+            int b = data[j];
+            if(b < 0) { b += 256; }
+
+            String bs = Integer.toHexString(b);
+            if(bs.length() == 1)
+                ret.append('0');
+            ret.append(bs);
+            ret.append(' ');
+        }
+        return ret.toString();
+    }
 
-       @SuppressWarnings("resource")
+    @SuppressWarnings("resource")
     public static void main(String[] args) throws Exception {
-               if(args.length < 1) {
-                       System.err.println("Use:");
-                       System.err.println("  HPBFDumper <filename>");
-                       System.exit(1);
-               }
-               HPBFDumper dump = new HPBFDumper(new POIFSFileSystem(new 
File(args[0])));
-
-               System.out.println("Dumping " + args[0]);
-               dump.dumpContents();
-               dump.dumpEnvelope();
-               dump.dumpEscher();
-               dump.dump001CompObj(dump.fs.getRoot());
-               dump.dumpQuill();
-
-               // Still to go:
-               //  (0x03)Internal
-               //  Objects
-       }
-
-       /**
-        * Dump out the escher parts of the file.
-        * Escher -> EscherStm and EscherDelayStm
-        */
-       public void dumpEscher() throws IOException {
-               DirectoryNode escherDir = (DirectoryNode)
-                       fs.getRoot().getEntry("Escher");
-
-               dumpEscherStm(escherDir);
-               dumpEscherDelayStm(escherDir);
-       }
-       private void dumpEscherStream(byte[] data) {
-               DefaultEscherRecordFactory erf =
-                       new DefaultEscherRecordFactory();
-
-               // Dump
-               int left = data.length;
-               while(left > 0) {
-                       EscherRecord er = erf.createRecord(data, 0);
-                       er.fillFields(data, 0, erf);
-                       left -= er.getRecordSize();
-
-                       System.out.println(er);
-               }
-       }
-       protected void dumpEscherStm(DirectoryNode escherDir) throws 
IOException {
-               byte[] data = getData(escherDir, "EscherStm");
-               System.out.println();
-               System.out.println("EscherStm - " + data.length + " bytes 
long:");
-               if(data.length > 0)
-                       dumpEscherStream(data);
-       }
-       protected void dumpEscherDelayStm(DirectoryNode escherDir) throws 
IOException {
-               byte[] data = getData(escherDir, "EscherDelayStm");
-               System.out.println();
-               System.out.println("EscherDelayStm - " + data.length + " bytes 
long:");
-               if(data.length > 0)
-                       dumpEscherStream(data);
-       }
-
-       public void dumpEnvelope() throws IOException {
-               byte[] data = getData(fs.getRoot(), "Envelope");
-
-               System.out.println();
-               System.out.println("Envelope - " + data.length + " bytes 
long:");
-       }
-
-       public void dumpContents() throws IOException {
-               byte[] data = getData(fs.getRoot(), "Contents");
-
-               System.out.println();
-               System.out.println("Contents - " + data.length + " bytes 
long:");
-
-               // 8 bytes, always seems to be
-               // E8 AC 2C 00 E8 03 05 01
-               // E8 AC 2C 00 E8 03 05 01
-
-               // 4 bytes - size of contents
-               // 13/15 00 00 01
-
-               // ....
-
-           // E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... ..........
-
-           // 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... ..
-
-               // 01 18 30 00 03 20 00 00
-               // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
-               // 00 00 00 88 1E 00 00 00
-
-               // 01 18 31 00 03 20 00 00
-               // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
-               // 00 00 00 88 1E 00 00 00
-
-               // 01 18 32 00 03 20 00 00
-               // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
-               // 00 00 00 88 1E 00 00 00
-       }
-
-       public void dumpCONTENTSraw(DirectoryNode dir) throws IOException {
-               byte[] data = getData(dir, "CONTENTS");
-
-               System.out.println();
-               System.out.println("CONTENTS - " + data.length + " bytes 
long:");
-
-               // Between the start and 0x200 we have
-               //  CHNKINK(space) + 24 bytes
-               //  0x1800
-               //  TEXT + 6 bytes
-               //  TEXT + 8 bytes
-               //  0x1800
-               //  STSH + 6 bytes
-               //  STSH + 8 bytes
-               //  0x1800
-               //  STSH + 6 bytes
-               //  STSH + 8 bytes
-               // but towards 0x200 the pattern may
-               //  break down a little bit
-
-               // After the second of a given type,
-               //  it seems to be 4 bytes giving the start,
-               //  then 4 bytes giving the length, then
-               //  18 00
-               System.out.println(
-                               new String(data, 0, 8, LocaleUtil.CHARSET_1252) 
+
-                               dumpBytes(data, 8, 0x20-8)
-               );
-
-               int pos = 0x20;
-               boolean sixNotEight = true;
-               while(pos < 0x200) {
-                       if(sixNotEight) {
-                               System.out.println(
-                                               dumpBytes(data, pos, 2)
-                               );
-                               pos += 2;
-                       }
-                       String text = new String(data, pos, 4, 
LocaleUtil.CHARSET_1252);
-                       int blen = 8;
-                       if(sixNotEight)
-                               blen = 6;
-                       System.out.println(
-                                       text + " " + dumpBytes(data, pos+4, 
blen)
-                       );
-
-                       pos += 4 + blen;
-                       sixNotEight = ! sixNotEight;
-               }
-
-               // Text from 0x200 onwards until we get
-               //  to \r(00)\n(00)(00)(00)
-               int textStop = -1;
-               for(int i=0x200; i<data.length-2 && textStop == -1; i++) {
-                       if(data[i] == 0 && data[i+1] == 0 && data[i+2] == 0) {
-                               textStop = i;
-                       }
-               }
-               if(textStop > 0) {
-                       int len = (textStop - 0x200) / 2;
-                       System.out.println();
-                       System.out.println(
-                                       StringUtil.getFromUnicodeLE(data, 
0x200, len)
-                       );
-               }
-
-               // The font list comes slightly later
-
-               // The hyperlinks may come before the fonts,
-               //  or slightly in front
-       }
-       public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException {
-               byte[] data = getData(dir, "CONTENTS");
-
-               System.out.println();
-               System.out.println("CONTENTS - " + data.length + " bytes 
long:");
-
-               String[] startType = new String[20];
-               String[] endType = new String[20];
-               int[] optA = new int[20];
-               int[] optB = new int[20];
-               int[] optC = new int[20];
-               int[] from = new int[20];
-               int[] len = new int[20];
-
-               for(int i=0; i<20; i++) {
-                       int offset = 0x20 + i*24;
-                       if(data[offset] == 0x18 && data[offset+1] == 0x00) {
-                               // Has data
-                               startType[i] = new String(data, offset+2, 4, 
LocaleUtil.CHARSET_1252);
-                               optA[i] = LittleEndian.getUShort(data, 
offset+6);
-                               optB[i] = LittleEndian.getUShort(data, 
offset+8);
-                               optC[i] = LittleEndian.getUShort(data, 
offset+10);
-                               endType[i] = new String(data, offset+12, 4, 
LocaleUtil.CHARSET_1252);
-                               from[i] = (int)LittleEndian.getUInt(data, 
offset+16);
-                               len[i] = (int)LittleEndian.getUInt(data, 
offset+20);
-                       } else {
-                               // Doesn't have data
-                       }
-               }
-
-               String text = StringUtil.getFromUnicodeLE(
-                               data, from[0], len[0]/2
-               );
-
-               // Dump
-               for(int i=0; i<20; i++) {
-                       String num = Integer.toString(i);
-                       if(i < 10) {
-                               num = "0" + i;
-                       }
-                       System.out.print(num + " ");
-
-                       if(startType[i] == null) {
-                               System.out.println("(not present)");
-                       } else {
-                               System.out.println(
-                                               "\t" +
-                                               startType[i] + " " +
-                                               optA[i] + " " +
-                                               optB[i] + " " +
-                                               optC[i]
-                               );
-                               System.out.println(
-                                               "\t" +
-                                               endType[i] + " " +
-                                               "from: " +
-                                               Integer.toHexString(from[i]) +
-                                               " (" + from[i] + ")" +
-                                               ", len: " +
-                                               Integer.toHexString(len[i]) +
-                                               " (" + len[i] + ")"
-                               );
-                       }
-               }
-
-               // Text
-               System.out.println();
-               System.out.println("TEXT:");
-               System.out.println(text);
-               System.out.println();
-
-               // All the others
-               for(int i=0; i<20; i++) {
-                       if(startType[i] == null) {
-                               continue;
-                       }
-                       int start = from[i];
-
-                       System.out.println(
-                                       startType[i] + " -> " + endType[i] +
-                                       " @ " + Integer.toHexString(start) +
-                                       " (" + start + ")"
-                       );
-                       System.out.println("\t" + dumpBytes(data, start, 4));
-                       System.out.println("\t" + dumpBytes(data, start+4, 4));
-                       System.out.println("\t" + dumpBytes(data, start+8, 4));
-                       System.out.println("\t(etc)");
-               }
-       }
-
-       protected void dump001CompObj(DirectoryNode dir) {
-               // TODO
-       }
-
-       public void dumpQuill() throws IOException {
-               DirectoryNode quillDir = (DirectoryNode)
-                       fs.getRoot().getEntry("Quill");
-               DirectoryNode quillSubDir = (DirectoryNode)
-                       quillDir.getEntry("QuillSub");
-
-               dump001CompObj(quillSubDir);
-               dumpCONTENTSraw(quillSubDir);
-               dumpCONTENTSguessed(quillSubDir);
-       }
+        if(args.length < 1) {
+            System.err.println("Use:");
+            System.err.println("  HPBFDumper <filename>");
+            System.exit(1);
+        }
+        HPBFDumper dump = new HPBFDumper(new POIFSFileSystem(new 
File(args[0])));
+
+        System.out.println("Dumping " + args[0]);
+        dump.dumpContents();
+        dump.dumpEnvelope();
+        dump.dumpEscher();
+        dump.dump001CompObj(dump.fs.getRoot());
+        dump.dumpQuill();
+
+        // Still to go:
+        //  (0x03)Internal
+        //  Objects
+    }
+
+    /**
+     * Dump out the escher parts of the file.
+     * Escher -> EscherStm and EscherDelayStm
+     */
+    public void dumpEscher() throws IOException {
+        DirectoryNode escherDir = (DirectoryNode)
+            fs.getRoot().getEntry("Escher");
+
+        dumpEscherStm(escherDir);
+        dumpEscherDelayStm(escherDir);
+    }
+    private void dumpEscherStream(byte[] data) {
+        DefaultEscherRecordFactory erf =
+            new DefaultEscherRecordFactory();
+
+        // Dump
+        int left = data.length;
+        while(left > 0) {
+            EscherRecord er = erf.createRecord(data, 0);
+            er.fillFields(data, 0, erf);
+            left -= er.getRecordSize();
+
+            System.out.println(er);
+        }
+    }
+    protected void dumpEscherStm(DirectoryNode escherDir) throws IOException {
+        byte[] data = getData(escherDir, "EscherStm");
+        System.out.println();
+        System.out.println("EscherStm - " + data.length + " bytes long:");
+        if(data.length > 0)
+            dumpEscherStream(data);
+    }
+    protected void dumpEscherDelayStm(DirectoryNode escherDir) throws 
IOException {
+        byte[] data = getData(escherDir, "EscherDelayStm");
+        System.out.println();
+        System.out.println("EscherDelayStm - " + data.length + " bytes long:");
+        if(data.length > 0)
+            dumpEscherStream(data);
+    }
+
+    public void dumpEnvelope() throws IOException {
+        byte[] data = getData(fs.getRoot(), "Envelope");
+
+        System.out.println();
+        System.out.println("Envelope - " + data.length + " bytes long:");
+    }
+
+    public void dumpContents() throws IOException {
+        byte[] data = getData(fs.getRoot(), "Contents");
+
+        System.out.println();
+        System.out.println("Contents - " + data.length + " bytes long:");
+
+        // 8 bytes, always seems to be
+        // E8 AC 2C 00 E8 03 05 01
+        // E8 AC 2C 00 E8 03 05 01
+
+        // 4 bytes - size of contents
+        // 13/15 00 00 01
+
+        // ....
+
+        // E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... ..........
+
+        // 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... ..
+
+        // 01 18 30 00 03 20 00 00
+        // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+        // 00 00 00 88 1E 00 00 00
+
+        // 01 18 31 00 03 20 00 00
+        // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+        // 00 00 00 88 1E 00 00 00
+
+        // 01 18 32 00 03 20 00 00
+        // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+        // 00 00 00 88 1E 00 00 00
+    }
+
+    public void dumpCONTENTSraw(DirectoryNode dir) throws IOException {
+        byte[] data = getData(dir, "CONTENTS");
+
+        System.out.println();
+        System.out.println("CONTENTS - " + data.length + " bytes long:");
+
+        // Between the start and 0x200 we have
+        //  CHNKINK(space) + 24 bytes
+        //  0x1800
+        //  TEXT + 6 bytes
+        //  TEXT + 8 bytes
+        //  0x1800
+        //  STSH + 6 bytes
+        //  STSH + 8 bytes
+        //  0x1800
+        //  STSH + 6 bytes
+        //  STSH + 8 bytes
+        // but towards 0x200 the pattern may
+        //  break down a little bit
+
+        // After the second of a given type,
+        //  it seems to be 4 bytes giving the start,
+        //  then 4 bytes giving the length, then
+        //  18 00
+        System.out.println(
+                new String(data, 0, 8, LocaleUtil.CHARSET_1252) +
+                dumpBytes(data, 8, 0x20-8)
+        );
+
+        int pos = 0x20;
+        boolean sixNotEight = true;
+        while(pos < 0x200) {
+            if(sixNotEight) {
+                System.out.println(
+                        dumpBytes(data, pos, 2)
+                );
+                pos += 2;
+            }
+            String text = new String(data, pos, 4, LocaleUtil.CHARSET_1252);
+            int blen = 8;
+            if(sixNotEight)
+                blen = 6;
+            System.out.println(
+                    text + " " + dumpBytes(data, pos+4, blen)
+            );
+
+            pos += 4 + blen;
+            sixNotEight = ! sixNotEight;
+        }
+
+        // Text from 0x200 onwards until we get
+        //  to \r(00)\n(00)(00)(00)
+        int textStop = -1;
+        for(int i=0x200; i<data.length-2 && textStop == -1; i++) {
+            if(data[i] == 0 && data[i+1] == 0 && data[i+2] == 0) {
+                textStop = i;
+            }
+        }
+        if(textStop > 0) {
+            int len = (textStop - 0x200) / 2;
+            System.out.println();
+            System.out.println(
+                    StringUtil.getFromUnicodeLE(data, 0x200, len)
+            );
+        }
+
+        // The font list comes slightly later
+
+        // The hyperlinks may come before the fonts,
+        //  or slightly in front
+    }
+    public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException {
+        byte[] data = getData(dir, "CONTENTS");
+
+        System.out.println();
+        System.out.println("CONTENTS - " + data.length + " bytes long:");
+
+        String[] startType = new String[20];
+        String[] endType = new String[20];
+        int[] optA = new int[20];
+        int[] optB = new int[20];
+        int[] optC = new int[20];
+        int[] from = new int[20];
+        int[] len = new int[20];
+
+        for(int i=0; i<20; i++) {
+            int offset = 0x20 + i*24;
+            if(data[offset] == 0x18 && data[offset+1] == 0x00) {
+                // Has data
+                startType[i] = new String(data, offset+2, 4, 
LocaleUtil.CHARSET_1252);
+                optA[i] = LittleEndian.getUShort(data, offset+6);
+                optB[i] = LittleEndian.getUShort(data, offset+8);
+                optC[i] = LittleEndian.getUShort(data, offset+10);
+                endType[i] = new String(data, offset+12, 4, 
LocaleUtil.CHARSET_1252);
+                from[i] = (int)LittleEndian.getUInt(data, offset+16);
+                len[i] = (int)LittleEndian.getUInt(data, offset+20);
+            } else {
+                // Doesn't have data
+            }
+        }
+
+        String text = StringUtil.getFromUnicodeLE(
+                data, from[0], len[0]/2
+        );
+
+        // Dump
+        for(int i=0; i<20; i++) {
+            String num = Integer.toString(i);
+            if(i < 10) {
+                num = "0" + i;
+            }
+            System.out.print(num + " ");
+
+            if(startType[i] == null) {
+                System.out.println("(not present)");
+            } else {
+                System.out.println(
+                        "\t" +
+                        startType[i] + " " +
+                        optA[i] + " " +
+                        optB[i] + " " +
+                        optC[i]
+                );
+                System.out.println(
+                        "\t" +
+                        endType[i] + " " +
+                        "from: " +
+                        Integer.toHexString(from[i]) +
+                        " (" + from[i] + ")" +
+                        ", len: " +
+                        Integer.toHexString(len[i]) +
+                        " (" + len[i] + ")"
+                );
+            }
+        }
+
+        // Text
+        System.out.println();
+        System.out.println("TEXT:");
+        System.out.println(text);
+        System.out.println();
+
+        // All the others
+        for(int i=0; i<20; i++) {
+            if(startType[i] == null) {
+                continue;
+            }
+            int start = from[i];
+
+            System.out.println(
+                    startType[i] + " -> " + endType[i] +
+                    " @ " + Integer.toHexString(start) +
+                    " (" + start + ")"
+            );
+            System.out.println("\t" + dumpBytes(data, start, 4));
+            System.out.println("\t" + dumpBytes(data, start+4, 4));
+            System.out.println("\t" + dumpBytes(data, start+8, 4));
+            System.out.println("\t(etc)");
+        }
+    }
+
+    protected void dump001CompObj(DirectoryNode dir) {
+        // TODO
+    }
+
+    public void dumpQuill() throws IOException {
+        DirectoryNode quillDir = (DirectoryNode)
+            fs.getRoot().getEntry("Quill");
+        DirectoryNode quillSubDir = (DirectoryNode)
+            quillDir.getEntry("QuillSub");
+
+        dump001CompObj(quillSubDir);
+        dumpCONTENTSraw(quillSubDir);
+        dumpCONTENTSguessed(quillSubDir);
+    }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java?rev=1871921&r1=1871920&r2=1871921&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java 
(original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java Mon Dec 
23 09:18:38 2019
@@ -33,53 +33,53 @@ import org.apache.poi.util.HexDump;
  *  what the format of them is.
  */
 public final class PLCDumper {
-       private HPBFDocument doc;
-       private QuillContents qc;
+    private HPBFDocument doc;
+    private QuillContents qc;
 
-       public PLCDumper(HPBFDocument hpbfDoc) {
-               doc = hpbfDoc;
-               qc = doc.getQuillContents();
-       }
-       public PLCDumper(POIFSFileSystem fs) throws IOException {
-               this(new HPBFDocument(fs));
-       }
-       public PLCDumper(InputStream inp) throws IOException {
-               this(new POIFSFileSystem(inp));
-       }
-
-       public static void main(String[] args) throws Exception {
-               if(args.length < 1) {
-                       System.err.println("Use:");
-                       System.err.println("  PLCDumper <filename>");
-                       System.exit(1);
-               }
-
-               try (FileInputStream fis = new FileInputStream(args[0])) {
-                       PLCDumper dump = new PLCDumper(fis);
-
-                       System.out.println("Dumping " + args[0]);
-                       dump.dumpPLC();
-               }
-       }
-
-       private void dumpPLC() {
-               QCBit[] bits = qc.getBits();
-
-               for(int i=0; i<bits.length; i++) {
-                       if(bits[i] == null) continue;
-                       if(bits[i].getBitType().equals("PLC ")) {
-                               dumpBit(bits[i], i);
-                       }
-               }
-       }
-
-       private void dumpBit(QCBit bit, int index) {
-               System.out.println();
-               System.out.println("Dumping " + bit.getBitType() + " bit at " + 
index);
-               System.out.println("  Is a " + bit.getThingType() + ", number 
is " + bit.getOptA());
-               System.out.println("  Starts at " + bit.getDataOffset() + " 
(0x" + Integer.toHexString(bit.getDataOffset()) + ")");
-               System.out.println("  Runs for  " + bit.getLength() + " (0x" + 
Integer.toHexString(bit.getLength()) + ")");
+    public PLCDumper(HPBFDocument hpbfDoc) {
+        doc = hpbfDoc;
+        qc = doc.getQuillContents();
+    }
+    public PLCDumper(POIFSFileSystem fs) throws IOException {
+        this(new HPBFDocument(fs));
+    }
+    public PLCDumper(InputStream inp) throws IOException {
+        this(new POIFSFileSystem(inp));
+    }
+
+    public static void main(String[] args) throws Exception {
+        if(args.length < 1) {
+            System.err.println("Use:");
+            System.err.println("  PLCDumper <filename>");
+            System.exit(1);
+        }
+
+        try (FileInputStream fis = new FileInputStream(args[0])) {
+            PLCDumper dump = new PLCDumper(fis);
+
+            System.out.println("Dumping " + args[0]);
+            dump.dumpPLC();
+        }
+    }
+
+    private void dumpPLC() {
+        QCBit[] bits = qc.getBits();
+
+        for(int i=0; i<bits.length; i++) {
+            if(bits[i] == null) continue;
+            if(bits[i].getBitType().equals("PLC ")) {
+                dumpBit(bits[i], i);
+            }
+        }
+    }
+
+    private void dumpBit(QCBit bit, int index) {
+        System.out.println();
+        System.out.println("Dumping " + bit.getBitType() + " bit at " + index);
+        System.out.println("  Is a " + bit.getThingType() + ", number is " + 
bit.getOptA());
+        System.out.println("  Starts at " + bit.getDataOffset() + " (0x" + 
Integer.toHexString(bit.getDataOffset()) + ")");
+        System.out.println("  Runs for  " + bit.getLength() + " (0x" + 
Integer.toHexString(bit.getLength()) + ")");
 
-               System.out.println(HexDump.dump(bit.getData(), 0, 0));
-       }
+        System.out.println(HexDump.dump(bit.getData(), 0, 0));
+    }
 }

Modified: 
poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java?rev=1871921&r1=1871920&r2=1871921&view=diff
==============================================================================
--- 
poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
 (original)
+++ 
poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
 Mon Dec 23 09:18:38 2019
@@ -50,65 +50,65 @@ public final class PublisherTextExtracto
       this(new POIFSFileSystem(is));
    }
 
-       /**
-        * Should a call to getText() return hyperlinks inline
-        *  with the text?
-        * Default is no
-        */
-       public void setHyperlinksByDefault(boolean hyperlinksByDefault) {
-               this.hyperlinksByDefault = hyperlinksByDefault;
-       }
-
-
-       public String getText() {
-               StringBuilder text = new StringBuilder();
-
-               // Get the text from the Quill Contents
-               QCBit[] bits = doc.getQuillContents().getBits();
-               for (QCBit bit1 : bits) {
-                       if (bit1 != null && bit1 instanceof QCTextBit) {
-                               QCTextBit t = (QCTextBit) bit1;
-                               text.append(t.getText().replace('\r', '\n'));
-                       }
-               }
-
-               // If requested, add in the hyperlinks
-               // Ideally, we'd do these inline, but the hyperlink
-               //  positions are relative to the text area the
-               //  hyperlink is in, and we have yet to figure out
-               //  how to tie that together.
-               if(hyperlinksByDefault) {
-                       for (QCBit bit : bits) {
-                               if (bit != null && bit instanceof Type12) {
-                                       Type12 hyperlinks = (Type12) bit;
-                                       for (int j = 0; j < 
hyperlinks.getNumberOfHyperlinks(); j++) {
-                                               text.append("<");
-                                               
text.append(hyperlinks.getHyperlink(j));
-                                               text.append(">\n");
-                                       }
-                               }
-                       }
-               }
-
-               // Get more text
-               // TODO
-
-               return text.toString();
-       }
-
-
-       public static void main(String[] args) throws Exception {
-               if(args.length == 0) {
-                       System.err.println("Use:");
-                       System.err.println("  PublisherTextExtractor 
<file.pub>");
-               }
-
-               for (String arg : args) {
-                       try (FileInputStream fis = new FileInputStream(arg)) {
-                               PublisherTextExtractor te = new 
PublisherTextExtractor(fis);
-                               System.out.println(te.getText());
-                               te.close();
-                       }
-               }
-       }
+    /**
+     * Should a call to getText() return hyperlinks inline
+     *  with the text?
+     * Default is no
+     */
+    public void setHyperlinksByDefault(boolean hyperlinksByDefault) {
+        this.hyperlinksByDefault = hyperlinksByDefault;
+    }
+
+
+    public String getText() {
+        StringBuilder text = new StringBuilder();
+
+        // Get the text from the Quill Contents
+        QCBit[] bits = doc.getQuillContents().getBits();
+        for (QCBit bit1 : bits) {
+            if (bit1 != null && bit1 instanceof QCTextBit) {
+                QCTextBit t = (QCTextBit) bit1;
+                text.append(t.getText().replace('\r', '\n'));
+            }
+        }
+
+        // If requested, add in the hyperlinks
+        // Ideally, we'd do these inline, but the hyperlink
+        //  positions are relative to the text area the
+        //  hyperlink is in, and we have yet to figure out
+        //  how to tie that together.
+        if(hyperlinksByDefault) {
+            for (QCBit bit : bits) {
+                if (bit != null && bit instanceof Type12) {
+                    Type12 hyperlinks = (Type12) bit;
+                    for (int j = 0; j < hyperlinks.getNumberOfHyperlinks(); 
j++) {
+                        text.append("<");
+                        text.append(hyperlinks.getHyperlink(j));
+                        text.append(">\n");
+                    }
+                }
+            }
+        }
+
+        // Get more text
+        // TODO
+
+        return text.toString();
+    }
+
+
+    public static void main(String[] args) throws Exception {
+        if(args.length == 0) {
+            System.err.println("Use:");
+            System.err.println("  PublisherTextExtractor <file.pub>");
+        }
+
+        for (String arg : args) {
+            try (FileInputStream fis = new FileInputStream(arg)) {
+                PublisherTextExtractor te = new PublisherTextExtractor(fis);
+                System.out.println(te.getText());
+                te.close();
+            }
+        }
+    }
 }

Modified: 
poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java?rev=1871921&r1=1871920&r2=1871921&view=diff
==============================================================================
--- 
poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java
 (original)
+++ 
poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java
 Mon Dec 23 09:18:38 2019
@@ -53,158 +53,158 @@ import org.apache.poi.util.LittleEndian;
  *  lucene indexers) that would ever want to use this!
  */
 public final class QuickButCruddyTextExtractor {
-       private POIFSFileSystem fs;
-       private InputStream is;
-       private byte[] pptContents;
-
-       /**
-        * Really basic text extractor, that will also return lots of crud text.
-        * Takes a single argument, the file to extract from
-        */
-       public static void main(String[] args) throws IOException
-       {
-               if(args.length < 1) {
-                       System.err.println("Useage:");
-                       System.err.println("\tQuickButCruddyTextExtractor 
<file>");
-                       System.exit(1);
-               }
-
-               String file = args[0];
-
-               QuickButCruddyTextExtractor ppe = new 
QuickButCruddyTextExtractor(file);
-               System.out.println(ppe.getTextAsString());
-               ppe.close();
-       }
-
-       /**
-        * Creates an extractor from a given file name
-        * @param fileName
-        */
-       @SuppressWarnings("resource")
+    private POIFSFileSystem fs;
+    private InputStream is;
+    private byte[] pptContents;
+
+    /**
+     * Really basic text extractor, that will also return lots of crud text.
+     * Takes a single argument, the file to extract from
+     */
+    public static void main(String[] args) throws IOException
+    {
+        if(args.length < 1) {
+            System.err.println("Useage:");
+            System.err.println("\tQuickButCruddyTextExtractor <file>");
+            System.exit(1);
+        }
+
+        String file = args[0];
+
+        QuickButCruddyTextExtractor ppe = new 
QuickButCruddyTextExtractor(file);
+        System.out.println(ppe.getTextAsString());
+        ppe.close();
+    }
+
+    /**
+     * Creates an extractor from a given file name
+     * @param fileName
+     */
+    @SuppressWarnings("resource")
     public QuickButCruddyTextExtractor(String fileName) throws IOException {
-               this(new POIFSFileSystem(new File(fileName)));
-       }
+        this(new POIFSFileSystem(new File(fileName)));
+    }
 
-       /**
-        * Creates an extractor from a given input stream
-        * @param iStream
-        */
+    /**
+     * Creates an extractor from a given input stream
+     * @param iStream
+     */
     @SuppressWarnings("resource")
-       public QuickButCruddyTextExtractor(InputStream iStream) throws 
IOException {
-               this(new POIFSFileSystem(iStream));
-               is = iStream;
-       }
-
-       /**
-        * Creates an extractor from a POIFS Filesystem
-        * @param poifs
-        */
-       public QuickButCruddyTextExtractor(POIFSFileSystem poifs) throws 
IOException {
-               fs = poifs;
-
-               // Find the PowerPoint bit, and get out the bytes
-               InputStream pptIs = 
fs.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT);
-               pptContents = IOUtils.toByteArray(pptIs);
-               pptIs.close();
-       }
-
-
-       /**
-        * Shuts down the underlying streams
-        */
-       public void close() throws IOException {
-               if(is != null) { is.close(); }
-               fs = null;
-       }
-
-       /**
-        * Fetches the ALL the text of the powerpoint file, as a single string
-        */
-       public String getTextAsString() {
-               StringBuilder ret = new StringBuilder();
-               List<String> textV = getTextAsVector();
-               for(String text : textV) {
-                       ret.append(text);
-                       if(! text.endsWith("\n")) {
-                               ret.append('\n');
-                       }
-               }
-               return ret.toString();
-       }
-
-       /**
-        * Fetches the ALL the text of the powerpoint file, in a List of
-        *  strings, one per text record
-        */
-       public List<String> getTextAsVector() {
-           List<String> textV = new ArrayList<>();
+    public QuickButCruddyTextExtractor(InputStream iStream) throws IOException 
{
+        this(new POIFSFileSystem(iStream));
+        is = iStream;
+    }
+
+    /**
+     * Creates an extractor from a POIFS Filesystem
+     * @param poifs
+     */
+    public QuickButCruddyTextExtractor(POIFSFileSystem poifs) throws 
IOException {
+        fs = poifs;
+
+        // Find the PowerPoint bit, and get out the bytes
+        InputStream pptIs = 
fs.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT);
+        pptContents = IOUtils.toByteArray(pptIs);
+        pptIs.close();
+    }
+
+
+    /**
+     * Shuts down the underlying streams
+     */
+    public void close() throws IOException {
+        if(is != null) { is.close(); }
+        fs = null;
+    }
+
+    /**
+     * Fetches the ALL the text of the powerpoint file, as a single string
+     */
+    public String getTextAsString() {
+        StringBuilder ret = new StringBuilder();
+        List<String> textV = getTextAsVector();
+        for(String text : textV) {
+            ret.append(text);
+            if(! text.endsWith("\n")) {
+                ret.append('\n');
+            }
+        }
+        return ret.toString();
+    }
+
+    /**
+     * Fetches the ALL the text of the powerpoint file, in a List of
+     *  strings, one per text record
+     */
+    public List<String> getTextAsVector() {
+        List<String> textV = new ArrayList<>();
 
-               // Set to the start of the file
-               int walkPos = 0;
+        // Set to the start of the file
+        int walkPos = 0;
 
-               // Start walking the file, looking for the records
-               while(walkPos != -1) {
+        // Start walking the file, looking for the records
+        while(walkPos != -1) {
             walkPos = findTextRecords(walkPos,textV);
-               }
+        }
 
-               // Return what we find
-               return textV;
-       }
-
-       /**
-        * For the given position, look if the record is a text record, and wind
-        *  on after.
-        * If it is a text record, grabs out the text. Whatever happens, returns
-        *  the position of the next record, or -1 if no more.
-        */
-       public int findTextRecords(int startPos, List<String> textV) {
-               // Grab the length, and the first option byte
-               // Note that the length doesn't include the 8 byte atom header
-               int len = (int)LittleEndian.getUInt(pptContents,startPos+4);
-               byte opt = pptContents[startPos];
-
-               // If it's a container, step into it and return
-               // (If it's a container, option byte 1 BINARY_AND 0x0f will be 
0x0f)
-               int container = opt & 0x0f;
-               if(container == 0x0f) {
-                       return (startPos+8);
-               }
-
-               // Otherwise, check the type to see if it's text
-               int type = LittleEndian.getUShort(pptContents,startPos+2);
-
-               // TextBytesAtom
-               if(type == RecordTypes.TextBytesAtom.typeID) {
-                       TextBytesAtom tba = 
(TextBytesAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
-                       String text = 
HSLFTextParagraph.toExternalString(tba.getText(), -1);
-                       textV.add(text);
-               }
-               // TextCharsAtom
-               if(type == RecordTypes.TextCharsAtom.typeID) {
-                       TextCharsAtom tca = 
(TextCharsAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
+        // Return what we find
+        return textV;
+    }
+
+    /**
+     * For the given position, look if the record is a text record, and wind
+     *  on after.
+     * If it is a text record, grabs out the text. Whatever happens, returns
+     *  the position of the next record, or -1 if no more.
+     */
+    public int findTextRecords(int startPos, List<String> textV) {
+        // Grab the length, and the first option byte
+        // Note that the length doesn't include the 8 byte atom header
+        int len = (int)LittleEndian.getUInt(pptContents,startPos+4);
+        byte opt = pptContents[startPos];
+
+        // If it's a container, step into it and return
+        // (If it's a container, option byte 1 BINARY_AND 0x0f will be 0x0f)
+        int container = opt & 0x0f;
+        if(container == 0x0f) {
+            return (startPos+8);
+        }
+
+        // Otherwise, check the type to see if it's text
+        int type = LittleEndian.getUShort(pptContents,startPos+2);
+
+        // TextBytesAtom
+        if(type == RecordTypes.TextBytesAtom.typeID) {
+            TextBytesAtom tba = 
(TextBytesAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
+            String text = HSLFTextParagraph.toExternalString(tba.getText(), 
-1);
+            textV.add(text);
+        }
+        // TextCharsAtom
+        if(type == RecordTypes.TextCharsAtom.typeID) {
+            TextCharsAtom tca = 
(TextCharsAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
             String text = HSLFTextParagraph.toExternalString(tca.getText(), 
-1);
             textV.add(text);
-               }
+        }
 
-               // CString (doesn't go via a TextRun)
-               if(type == RecordTypes.CString.typeID) {
-                       CString cs = (CString)Record.createRecordForType(type, 
pptContents, startPos, len+8);
-                       String text = cs.getText();
-
-                       // Ignore the ones we know to be rubbish
-                       if(text.equals("___PPT10")) {
-                       } else if(text.equals("Default Design")) {
-                       } else {
-                               textV.add(text);
-                       }
-               }
-
-
-               // Wind on by the atom length, and check we're not at the end
-               int newPos = (startPos + 8 + len);
-               if(newPos > (pptContents.length - 8)) {
-                       newPos = -1;
-               }
-               return newPos;
-       }
+        // CString (doesn't go via a TextRun)
+        if(type == RecordTypes.CString.typeID) {
+            CString cs = (CString)Record.createRecordForType(type, 
pptContents, startPos, len+8);
+            String text = cs.getText();
+
+            // Ignore the ones we know to be rubbish
+            if(text.equals("___PPT10")) {
+            } else if(text.equals("Default Design")) {
+            } else {
+                textV.add(text);
+            }
+        }
+
+
+        // Wind on by the atom length, and check we're not at the end
+        int newPos = (startPos + 8 + len);
+        if(newPos > (pptContents.length - 8)) {
+            newPos = -1;
+        }
+        return newPos;
+    }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@poi.apache.org
For additional commands, e-mail: commits-h...@poi.apache.org

Reply via email to