Author: tgraves
Date: Tue Aug 21 22:38:46 2012
New Revision: 1375834

URL: http://svn.apache.org/viewvc?rev=1375834&view=rev
Log:
HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file checksum 
with CRC32C.  (Kihwal Lee via szetszwo)

Added:
    
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java
    
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java
Modified:
    
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt
    
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java
    
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java

Modified: 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1375834&r1=1375833&r2=1375834&view=diff
==============================================================================
--- 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt
 (original)
+++ 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt
 Tue Aug 21 22:38:46 2012
@@ -36,6 +36,9 @@ Release 0.23.3 - UNRELEASED
     HADOOP-8240. Add a new API to allow users to specify a checksum type
     on FileSystem.create(..).  (Kihwal Lee via szetszwo)
 
+    HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file
+    checksum with CRC32C.  (Kihwal Lee via szetszwo)
+
   OPTIMIZATIONS
 
   BUG FIXES

Added: 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java?rev=1375834&view=auto
==============================================================================
--- 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java
 (added)
+++ 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java
 Tue Aug 21 22:38:46 2012
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.util.DataChecksum;
+
+/** For CRC32 with the Castagnoli polynomial */
+public class MD5MD5CRC32CastagnoliFileChecksum extends MD5MD5CRC32FileChecksum 
{
+  /** Same as this(0, 0, null) */
+  public MD5MD5CRC32CastagnoliFileChecksum() {
+    this(0, 0, null);
+  }
+
+  /** Create a MD5FileChecksum */
+  public MD5MD5CRC32CastagnoliFileChecksum(int bytesPerCRC, long crcPerBlock, 
MD5Hash md5) {
+    super(bytesPerCRC, crcPerBlock, md5);
+  }
+
+  @Override
+  public DataChecksum.Type getCrcType() {
+    // default to the one that is understood by all releases.
+    return DataChecksum.Type.CRC32C;
+  }
+}

Modified: 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java?rev=1375834&r1=1375833&r2=1375834&view=diff
==============================================================================
--- 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java
 (original)
+++ 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java
 Tue Aug 21 22:38:46 2012
@@ -23,12 +23,17 @@ import java.io.IOException;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.Options.ChecksumOpt;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.util.DataChecksum;
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 import org.znerd.xmlenc.XMLOutputter;
 
+import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
+import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
+
 /** MD5 of MD5 of CRC32. */
 @InterfaceAudience.LimitedPrivate({"HDFS"})
 @InterfaceStability.Unstable
@@ -54,7 +59,19 @@ public class MD5MD5CRC32FileChecksum ext
   
   /** {@inheritDoc} */ 
   public String getAlgorithmName() {
-    return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + "CRC32";
+    return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC +
+        getCrcType().name();
+  }
+
+  public static DataChecksum.Type getCrcTypeFromAlgorithmName(String algorithm)
+      throws IOException {
+    if (algorithm.endsWith(DataChecksum.Type.CRC32.name())) {
+      return DataChecksum.Type.CRC32;
+    } else if (algorithm.endsWith(DataChecksum.Type.CRC32C.name())) {
+      return DataChecksum.Type.CRC32C;
+    }
+
+    throw new IOException("Unknown checksum type in " + algorithm);
   }
 
   /** {@inheritDoc} */ 
@@ -65,6 +82,16 @@ public class MD5MD5CRC32FileChecksum ext
     return WritableUtils.toByteArray(this);
   }
 
+  /** returns the CRC type */
+  public DataChecksum.Type getCrcType() {
+    // default to the one that is understood by all releases.
+    return DataChecksum.Type.CRC32;
+  }
+
+  public ChecksumOpt getChecksumOpt() {
+    return new ChecksumOpt(getCrcType(), bytesPerCRC);
+  }
+
   /** {@inheritDoc} */ 
   public void readFields(DataInput in) throws IOException {
     bytesPerCRC = in.readInt();
@@ -86,6 +113,7 @@ public class MD5MD5CRC32FileChecksum ext
     if (that != null) {
       xml.attribute("bytesPerCRC", "" + that.bytesPerCRC);
       xml.attribute("crcPerBlock", "" + that.crcPerBlock);
+      xml.attribute("crcType", ""+ that.getCrcType().name());
       xml.attribute("md5", "" + that.md5);
     }
     xml.endTag();
@@ -97,16 +125,40 @@ public class MD5MD5CRC32FileChecksum ext
     final String bytesPerCRC = attrs.getValue("bytesPerCRC");
     final String crcPerBlock = attrs.getValue("crcPerBlock");
     final String md5 = attrs.getValue("md5");
+    String crcType = attrs.getValue("crcType");
+    DataChecksum.Type finalCrcType;
     if (bytesPerCRC == null || crcPerBlock == null || md5 == null) {
       return null;
     }
 
     try {
-      return new MD5MD5CRC32FileChecksum(Integer.valueOf(bytesPerCRC),
-          Integer.valueOf(crcPerBlock), new MD5Hash(md5));
-    } catch(Exception e) {
+      // old versions don't support crcType.
+      if (crcType == null || crcType == "") {
+        finalCrcType = DataChecksum.Type.CRC32;
+      } else {
+        finalCrcType = DataChecksum.Type.valueOf(crcType);
+      }
+
+      switch (finalCrcType) {
+        case CRC32:
+          return new MD5MD5CRC32GzipFileChecksum(
+              Integer.valueOf(bytesPerCRC),
+              Integer.valueOf(crcPerBlock),
+              new MD5Hash(md5));
+        case CRC32C:
+          return new MD5MD5CRC32CastagnoliFileChecksum(
+              Integer.valueOf(bytesPerCRC),
+              Integer.valueOf(crcPerBlock),
+              new MD5Hash(md5));
+        default:
+          // we should never get here since finalCrcType will
+          // hold a valid type or we should have got an exception.
+          return null;
+      }
+    } catch (Exception e) {
       throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC
-          + ", crcPerBlock=" + crcPerBlock + ", md5=" + md5, e);
+          + ", crcPerBlock=" + crcPerBlock + ", crcType=" + crcType 
+          + ", md5=" + md5, e);
     }
   }
 
@@ -114,4 +166,4 @@ public class MD5MD5CRC32FileChecksum ext
   public String toString() {
     return getAlgorithmName() + ":" + md5;
   }
-}
\ No newline at end of file
+}

Added: 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java?rev=1375834&view=auto
==============================================================================
--- 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java
 (added)
+++ 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java
 Tue Aug 21 22:38:46 2012
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.util.DataChecksum;
+
+/** For CRC32 with the Gzip polynomial */
+public class MD5MD5CRC32GzipFileChecksum extends MD5MD5CRC32FileChecksum {
+  /** Same as this(0, 0, null) */
+  public MD5MD5CRC32GzipFileChecksum() {
+    this(0, 0, null);
+  }
+
+  /** Create a MD5FileChecksum */
+  public MD5MD5CRC32GzipFileChecksum(int bytesPerCRC, long crcPerBlock, 
MD5Hash md5) {
+    super(bytesPerCRC, crcPerBlock, md5);
+  }
+  @Override
+  public DataChecksum.Type getCrcType() {
+    // default to the one that is understood by all releases.
+    return DataChecksum.Type.CRC32;
+  }
+}

Modified: 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java?rev=1375834&r1=1375833&r2=1375834&view=diff
==============================================================================
--- 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java
 (original)
+++ 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java
 Tue Aug 21 22:38:46 2012
@@ -44,13 +44,15 @@ public class DataChecksum implements Che
   public static final int CHECKSUM_CRC32   = 1;
   public static final int CHECKSUM_CRC32C  = 2;
   public static final int CHECKSUM_DEFAULT = 3; 
+  public static final int CHECKSUM_MIXED   = 4;
 
   /** The checksum types */
   public static enum Type {
     NULL  (CHECKSUM_NULL, 0),
     CRC32 (CHECKSUM_CRC32, 4),
     CRC32C(CHECKSUM_CRC32C, 4),
-    DEFAULT(CHECKSUM_DEFAULT, 0); // This cannot be used to create DataChecksum
+    DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
+    MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
 
     public final int id;
     public final int size;


Reply via email to