Author: nick
Date: Fri Oct  7 21:05:22 2011
New Revision: 1180243

URL: http://svn.apache.org/viewvc?rev=1180243&view=rev
Log:
TIKA-749 Convert the DWG and PRT parsers to use the Tika endian util, rather 
than the POI one

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java?rev=1180243&r1=1180242&r2=1180243&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java 
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/EndianUtils.java Fri 
Oct  7 21:05:22 2011
@@ -169,8 +169,204 @@ public class EndianUtils {
       (ch7 <<  8) +
       (ch8 <<  0);
    }
+   
+   
+   /**
+    * Get a LE short value from the beginning of a byte array
+    *
+    *@param  data  the byte array
+    *@return       the short (16-bit) value
+    */
+   public static short getShortLE(byte[] data) {
+      return getShortLE(data, 0);
+   }
+   /**
+    * Get a LE short value from a byte array
+    *
+    *@param  data    the byte array
+    *@param  offset  a starting offset into the byte array
+    *@return         the short (16-bit) value
+    */
+   public static short getShortLE(byte[] data, int offset) {
+      return (short)getUShortLE(data, offset);
+   }
+
+   /**
+    * Get a LE unsigned short value from the beginning of a byte array
+    *
+    *@param  data  the byte array
+    *@return       the unsigned short (16-bit) value in an int
+    */
+   public static int getUShortLE(byte[] data) {
+      return getUShortLE(data, 0);
+   }
+   /**
+    * Get a LE unsigned short value from a byte array
+    *
+    *@param  data    the byte array
+    *@param  offset  a starting offset into the byte array
+    *@return         the unsigned short (16-bit) value in an integer
+    */
+   public static int getUShortLE(byte[] data, int offset) {
+      int b0 = data[offset] & 0xFF;
+      int b1 = data[offset+1] & 0xFF;
+      return (b1 << 8) + (b0 << 0);
+   }
+   
+   /**
+    * Get a BE short value from the beginning of a byte array
+    *
+    *@param  data  the byte array
+    *@return       the short (16-bit) value
+    */
+   public static short getShortBE(byte[] data) {
+      return getShortBE(data, 0);
+   }
+   /**
+    * Get a BE short value from a byte array
+    *
+    *@param  data    the byte array
+    *@param  offset  a starting offset into the byte array
+    *@return         the short (16-bit) value
+    */
+   public static short getShortBE(byte[] data, int offset) {
+      return (short)getUShortBE(data, offset);
+   }
+
+   /**
+    * Get a BE unsigned short value from the beginning of a byte array
+    *
+    *@param  data  the byte array
+    *@return       the unsigned short (16-bit) value in an int
+    */
+   public static int getUShortBE(byte[] data) {
+      return getUShortBE(data, 0);
+   }
+   /**
+    * Get a BE unsigned short value from a byte array
+    *
+    *@param  data    the byte array
+    *@param  offset  a starting offset into the byte array
+    *@return         the unsigned short (16-bit) value in an integer
+    */
+   public static int getUShortBE(byte[] data, int offset) {
+      int b0 = data[offset] & 0xFF;
+      int b1 = data[offset+1] & 0xFF;
+      return (b0 << 8) + (b1 << 0);
+   }
+
+   /**
+    * Get a LE int value from the beginning of a byte array
+    *
+    *@param  data  the byte array
+    *@return the int (32-bit) value
+    */
+   public static int getIntLE(byte[] data) {
+       return getIntLE(data, 0);
+   }
+   /**
+    * Get a LE int value from a byte array
+    *
+    *@param  data    the byte array
+    *@param  offset  a starting offset into the byte array
+    *@return         the int (32-bit) value
+    */
+   public static int getIntLE(byte[] data, int offset) {
+       int i=offset;
+       int b0 = data[i++] & 0xFF;
+       int b1 = data[i++] & 0xFF;
+       int b2 = data[i++] & 0xFF;
+       int b3 = data[i++] & 0xFF;
+       return (b3 << 24) + (b2 << 16) + (b1 << 8) + (b0 << 0);
+   }
 
    /**
+    * Get a BE int value from the beginning of a byte array
+    *
+    *@param  data  the byte array
+    *@return the int (32-bit) value
+    */
+   public static int getIntBE(byte[] data) {
+       return getIntBE(data, 0);
+   }
+   /**
+    * Get a BE int value from a byte array
+    *
+    *@param  data    the byte array
+    *@param  offset  a starting offset into the byte array
+    *@return         the int (32-bit) value
+    */
+   public static int getIntBE(byte[] data, int offset) {
+       int i=offset;
+       int b0 = data[i++] & 0xFF;
+       int b1 = data[i++] & 0xFF;
+       int b2 = data[i++] & 0xFF;
+       int b3 = data[i++] & 0xFF;
+       return (b0 << 24) + (b1 << 16) + (b2 << 8) + (b3 << 0);
+   }
+
+   /**
+    * Get a LE unsigned int value from a byte array
+    *
+    *@param  data    the byte array
+    *@return         the unsigned int (32-bit) value in a long
+    */
+   public static long getUIntLE(byte[] data) {
+       return getUIntLE(data,0);
+   }
+   /**
+    * Get a LE unsigned int value from a byte array
+    *
+    *@param  data    the byte array
+    *@param  offset  a starting offset into the byte array
+    *@return         the unsigned int (32-bit) value in a long
+    */
+   public static long getUIntLE(byte[] data, int offset) {
+       long retNum = getIntLE(data, offset);
+       return retNum & 0x00FFFFFFFFl;
+   }
+
+   /**
+    * Get a BE unsigned int value from a byte array
+    *
+    *@param  data    the byte array
+    *@return         the unsigned int (32-bit) value in a long
+    */
+   public static long getUIntBE(byte[] data) {
+       return getUIntBE(data,0);
+   }
+   /**
+    * Get a BE unsigned int value from a byte array
+    *
+    *@param  data    the byte array
+    *@param  offset  a starting offset into the byte array
+    *@return         the unsigned int (32-bit) value in a long
+    */
+   public static long getUIntBE(byte[] data, int offset) {
+       long retNum = getIntBE(data, offset);
+       return retNum & 0x00FFFFFFFFl;
+   }
+
+   /**
+    * Get a LE long value from a byte array
+    *
+    *@param  data    the byte array
+    *@param  offset  a starting offset into the byte array
+    *@return         the long (64-bit) value
+    */
+   public static long getLongLE(byte[] data, int offset) {
+      long result = 0;
+
+      for (int j = offset + LONG_SIZE - 1; j >= offset; j--) {
+         result <<= 8;
+         result |= 0xff & data[j];
+      }
+      return result;
+   }
+   private static final int LONG_SIZE = 8;
+
+   
+   /**
     *  Convert an 'unsigned' byte to an integer. ie, don't carry across the
     *  sign.
     *
@@ -195,7 +391,9 @@ public class EndianUtils {
       return (short) ( data[offset] & 0xFF );
    }
    
+   
    public static class BufferUnderrunException extends TikaException {
+      private static final long serialVersionUID = 8358288231138076276L;
       public BufferUnderrunException() {
          super("Insufficient data left in stream for required read");
       }

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java?rev=1180243&r1=1180242&r2=1180243&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java 
(original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java 
Fri Oct  7 21:05:22 2011
@@ -22,9 +22,9 @@ import java.util.Collections;
 import java.util.Set;
 
 import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.LittleEndian;
 import org.apache.poi.util.StringUtil;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.EndianUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
@@ -130,7 +130,7 @@ public class DWGParser extends AbstractP
      */
     private void get2004Props(
             InputStream stream, Metadata metadata, XHTMLContentHandler xhtml)
-            throws IOException, SAXException {
+            throws IOException, TikaException, SAXException {
        // Standard properties
         for (int i = 0; i < HEADER_PROPERTIES_ENTRIES.length; i++) {
             String headerValue = read2004String(stream);
@@ -148,8 +148,8 @@ public class DWGParser extends AbstractP
         }
     }
 
-    private String read2004String(InputStream stream) throws IOException {
-       int stringLen = LittleEndian.readUShort(stream);
+    private String read2004String(InputStream stream) throws IOException, 
TikaException {
+       int stringLen = EndianUtils.readUShortLE(stream);
 
        byte[] stringData = new byte[stringLen];
        IOUtils.readFully(stream, stringData);
@@ -167,7 +167,7 @@ public class DWGParser extends AbstractP
      */
     private void get2007and2010Props(
             InputStream stream, Metadata metadata, XHTMLContentHandler xhtml)
-            throws IOException, SAXException {
+            throws IOException, TikaException, SAXException {
         // Standard properties
         for (int i = 0; i < HEADER_PROPERTIES_ENTRIES.length; i++) {
             String headerValue = read2007and2010String(stream);
@@ -185,8 +185,8 @@ public class DWGParser extends AbstractP
         }
     }
 
-    private String read2007and2010String(InputStream stream) throws 
IOException {
-       int stringLen = LittleEndian.readUShort(stream);
+    private String read2007and2010String(InputStream stream) throws 
IOException, TikaException {
+       int stringLen = EndianUtils.readUShortLE(stream);
 
        byte[] stringData = new byte[stringLen * 2];
        IOUtils.readFully(stream, stringData);
@@ -202,11 +202,11 @@ public class DWGParser extends AbstractP
 
     private void get2000Props(
             InputStream stream, Metadata metadata, XHTMLContentHandler xhtml)
-            throws IOException, SAXException {
+            throws IOException, TikaException, SAXException {
         int propCount = 0;
         while(propCount < 30) {
-            int propIdx = LittleEndian.readUShort(stream);
-            int length = LittleEndian.readUShort(stream);
+            int propIdx = EndianUtils.readUShortLE(stream);
+            int length = EndianUtils.readUShortLE(stream);
             int valueType = stream.read();
             
             if(propIdx == 0x28) {
@@ -262,9 +262,9 @@ public class DWGParser extends AbstractP
      * Grab the offset, then skip there
      */
     private boolean skipToPropertyInfoSection(InputStream stream, byte[] 
header)
-            throws IOException {
+            throws IOException, TikaException {
         // The offset is stored in the header from 0x20 onwards
-        long offsetToSection = LittleEndian.getLong(header, 0x20);
+        long offsetToSection = EndianUtils.getLongLE(header, 0x20);
         long toSkip = offsetToSection - header.length;
         if(offsetToSection == 0){
             return false;
@@ -301,7 +301,7 @@ public class DWGParser extends AbstractP
     }
 
     private int skipToCustomProperties(InputStream stream) 
-            throws IOException {
+            throws IOException, TikaException {
        // There should be 4 zero bytes next
        byte[] padding = new byte[4];
        IOUtils.readFully(stream, padding);
@@ -312,7 +312,7 @@ public class DWGParser extends AbstractP
           IOUtils.readFully(stream, padding);
           
           // We should now have the count
-          int count = LittleEndian.readUShort(stream);
+          int count = EndianUtils.readUShortLE(stream);
           
           // Sanity check it
           if(count > 0 && count < 0x7f) {

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java?rev=1180243&r1=1180242&r2=1180243&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java 
(original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java 
Fri Oct  7 21:05:22 2011
@@ -23,8 +23,8 @@ import java.util.Collections;
 import java.util.Set;
 
 import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.LittleEndian;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.EndianUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
@@ -135,7 +135,7 @@ public class PRTParser extends AbstractP
           return;
        }
        
-       int length = LittleEndian.readUShort(stream);
+       int length = EndianUtils.readUShortLE(stream);
        if(length <= MAX_SANE_TEXT_LENGTH) {
           // Length sanity check passed
           handleText(length, stream, xhtml);
@@ -146,7 +146,7 @@ public class PRTParser extends AbstractP
           XHTMLContentHandler xhtml, Last5 l5) 
     throws IOException, SAXException, TikaException {
        // Is it 8 byte zero padded?
-       int maybeLength = LittleEndian.readUShort(stream);
+       int maybeLength = EndianUtils.readUShortLE(stream);
        if(maybeLength == 0) {
           // Check the next 6 bytes too
           for(int i=0; i<6; i++) {
@@ -161,7 +161,7 @@ public class PRTParser extends AbstractP
           
           byte[] b2 = new byte[2];
           IOUtils.readFully(stream, b2);
-          int length = LittleEndian.getUShort(b2);
+          int length = EndianUtils.getUShortLE(b2);
           if(length > 1 && length <= MAX_SANE_TEXT_LENGTH) {
              // Length sanity check passed
              handleText(length, stream, xhtml);


Reply via email to