Author: yanz
Date: Wed Mar 10 17:38:15 2010
New Revision: 921468

URL: http://svn.apache.org/viewvc?rev=921468&view=rev
Log:
PIG-1207 Data sanity check should be performed at the end of writing instead of 
later at query time (yanz)

Modified:
    hadoop/pig/trunk/contrib/zebra/CHANGES.txt
    
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java
    
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java

Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=921468&r1=921467&r2=921468&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Wed Mar 10 17:38:15 2010
@@ -14,6 +14,8 @@ Trunk (unreleased changes)
 
   IMPROVEMENTS
 
+    PIG-1207 Data sanity check should be performed at the end of writing 
instead of later at query time (yanz)
+
     PIG-1206 Storing descendingly sorted PIG table as unsorted table (yanz)
 
     PIG-1240 zebra manifest file enhancement (gauravj via yanz)

Modified: 
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java?rev=921468&r1=921467&r2=921468&view=diff
==============================================================================
--- 
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java
 (original)
+++ 
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java
 Wed Mar 10 17:38:15 2010
@@ -1459,9 +1459,33 @@ public class BasicTable {
       if (!finished)
         finish();
       try {
+        ColumnGroup.CGIndex firstCGIndex = null, cgIndex;
+        int first = -1;
         for (int nx = 0; nx < colGroups.length; nx++) {
           if (colGroups[nx] != null) {
             colGroups[nx].close();
+            if (first == -1)
+            {
+              first = nx;
+              firstCGIndex = colGroups[nx].index;
+            } else {
+              cgIndex = colGroups[nx].index;
+              if (cgIndex.size() != firstCGIndex.size())
+                throw new IOException("Column Group 
"+colGroups[nx].path.getName()+
+                    " has different number of files than in column group " + 
colGroups[first].path.getName());
+              int size = firstCGIndex.size();
+              for (int i = 0; i < size; i++)
+              {
+                if (!cgIndex.get(i).name.equals(firstCGIndex.get(i).name))
+                  throw new IOException("File["+i+"] in Column Group 
"+colGroups[nx].path.getName()+
+                      " has a different name: "+cgIndex.get(i).name+" than " + 
+                      firstCGIndex.get(i).name + " in column group " + 
colGroups[first].path.getName());
+                if (cgIndex.get(i).rows != firstCGIndex.get(i).rows)
+                  throw new IOException("File "+cgIndex.get(i).name+"Column 
Group "+colGroups[nx].path.getName()+
+                      " has a different number of rows, " + 
cgIndex.get(i).rows + ", than " +
+                      firstCGIndex.get(i).rows + " in column group " + 
colGroups[first].path.getName());
+              }
+            }
           }
         }
         metaWriter.close();

Modified: 
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java?rev=921468&r1=921467&r2=921468&view=diff
==============================================================================
--- 
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java
 (original)
+++ 
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java
 Wed Mar 10 17:38:15 2010
@@ -1519,6 +1519,7 @@ class ColumnGroup {
     FileSystem fs;
     CGSchema cgschema;
     private boolean finished, closed;
+    CGIndex index;
 
     /**
      * Create a ColumnGroup writer. The semantics are as follows:
@@ -1700,8 +1701,8 @@ class ColumnGroup {
 
     private void createIndex() throws IOException {
       MetaFile.Writer metaFile =
-      MetaFile.createWriter(makeMetaFilePath(finalOutputPath), conf);
-      CGIndex index = buildIndex(fs, finalOutputPath, false, conf);
+        MetaFile.createWriter(makeMetaFilePath(finalOutputPath), conf);
+      index = buildIndex(fs, finalOutputPath, false, conf);
       DataOutputStream dos = metaFile.createMetaBlock(BLOCK_NAME_INDEX);
       try {
         index.write(dos);


Reply via email to